From 4559043ab1cb5d8c35435ca5cae0c44d4e5a8e7b Mon Sep 17 00:00:00 2001 From: hyunssong Date: Mon, 9 Oct 2023 20:32:45 -0700 Subject: [PATCH 1/7] Add zero shot learning experiments so far --- SESAR/zeroshot-learning/README.md | 24 ++++ SESAR/zeroshot-learning/evaluate.py | 134 +++++++++++++++++ SESAR/zeroshot-learning/finetune_ZTC.py | 136 ++++++++++++++++++ .../zeroshot-learning/unique_multi_labels.txt | 101 +++++++++++++ 4 files changed, 395 insertions(+) create mode 100644 SESAR/zeroshot-learning/README.md create mode 100644 SESAR/zeroshot-learning/evaluate.py create mode 100644 SESAR/zeroshot-learning/finetune_ZTC.py create mode 100644 SESAR/zeroshot-learning/unique_multi_labels.txt diff --git a/SESAR/zeroshot-learning/README.md b/SESAR/zeroshot-learning/README.md new file mode 100644 index 0000000..69b20df --- /dev/null +++ b/SESAR/zeroshot-learning/README.md @@ -0,0 +1,24 @@ +This directory contains the ongoing experiments on using Zeroshot Text Classification to predict the material type iSamples vocabulary of the SESAR dataset. + +## Datasets +Different datasets were created from the original SESAR dump and the annotated data `SESARTrainingiSamKeywords.csv` to find the best method to solve our problem. +The datasets that are used during this process will be uploaded [here](https://drive.google.com/drive/folders/1PCm8greoBSBXm-YNHeJwbvBLj7ytw6Gr?usp=sharing). + +- `SESAR_ZTC_test_multiclass_label_fully_unseen.csv` : Used for multiclass label-fully-unseen tasks. +- `SESAR_ZTC_test_multilabel_label_fully_unseen.csv` : Used for multilabel label-fully-unseen tasks. +- `SESAR_ZTC_partial_label_unseen_train.csv`, `SESAR_ZTC_partial_label_unseen_dev.csv`,`SESAR_ZTC_partial_label_unseen_test.csv` : Used for multiclass partially-label-unseen tasks. + + +## Code +-`finetune_ZTC.py` : Implementation of fine-tuning a textual entailment model on the SESAR dataset. Converts the dataset into a format that is applicable for textual entailment finetuning task and uses the given arguments to execute finetuning. The finetuned model will be stored in the output directory. + +-`evaluate.py` : Implementation of evaluating the model on SESAR dataset. The model that can be used could be either a finetuned model from finetune_ZTC.py or an out-of-box textual entailment model(completely zeroshot). Result of evaluation will be logged. Supports solving the task as a multilabel or multiclass. For multiclass approach, also contains implementation of using specified depth level of the entire hierarchical label space of iSamples vocabulary. + +## Results of Experiments +Results of the ongoing experiments will be updated [here](https://docs.google.com/spreadsheets/d/19Q95HsjRS7JGyHoY8o8hxirBO6NiJ1ufHYB_xg0X4Ks/edit?usp=sharing). +Approaches experimented so far: +1) Partially-label-unseen : Multiclass. Finetune the model with a partial label space (partial training data) and see how it evaluates on the entire label space. This approaches uses the iSamplesMaterialType that was contained in SESARTrainingiSamKeywords.csv directly as labels. +2) Fully-label-unseen : Multiclass. Use the pretrained model directly and apply it on the test dataset. This approaches uses the iSamplesMaterialType that was contained in SESARTrainingiSamKeywords.csv directly as labels. +3) Multilabel-fully-label-unseen : Multilabel. Use the pretrained model directly and apply it on the test dataset. This approach uses the label including the extension vocabulary and expects the model to predict the label and all of the parent labels of it. +4) Depth-fully-label-unseen: Multiclass. Use the iSamplesMaterialType field and the extMaterialType field in SESARTrainingiSamKeywords.csv to convert the label into specified depth level of the iSamples MaterialType hierarchy. Use this converted label as expected prediction space. +5) Multilabel-Finetune : Multilabel. Use the pretrained model and finetune on the entire SESAR dataset. (code TBC) diff --git a/SESAR/zeroshot-learning/evaluate.py b/SESAR/zeroshot-learning/evaluate.py new file mode 100644 index 0000000..b083cfb --- /dev/null +++ b/SESAR/zeroshot-learning/evaluate.py @@ -0,0 +1,134 @@ +from argparse import ArgumentParser +import pandas as pd +from transformers import (AutoTokenizer, pipeline) +from transformers.pipelines.pt_utils import KeyDataset +from datasets import Dataset +from sklearn.metrics import classification_report +import logging +import torch +import os +from tqdm.auto import tqdm +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import MultiLabelBinarizer +logging.basicConfig() +logging.getLogger().setLevel(logging.INFO) +os.environ["WANDB_MODE"]="disabled" + + +THRESHOLD = 0.5 +"""## Model training""" +# If there's a GPU available... +if torch.cuda.is_available(): + + # Tell PyTorch to use the GPU. + device = torch.device("cuda") + print('There are %d GPU(s) available.' % torch.cuda.device_count()) + print('We will use the GPU:', torch.cuda.get_device_name(0)) + +# If not... +else: + print('No GPU available, using the CPU instead.') + device = torch.device("cpu") + +def get_multilabel_predictions(predictions, THRESHOLD): + """Get all predictions by conducting multilabel classification""" + predicted_labels = [] + for pred in predictions: + # get predictions that have probability larger than THRESHOLD + indices = [i for i, val in enumerate(pred['scores']) if val >= THRESHOLD] + prediction = [pred['labels'][i] for i in indices] + predicted_labels.append(prediction) + return predicted_labels + +def get_zero_shot_predictions(multilabel,output_dir, test_df, template_type, label_names, batch_size, max_length): + """ Get the zero shot predictions by applying the model to the full label space + + Args: + multilabel : whether to solve the problem as multilabel or not. + output_dir : directory(label-partially-unseen) that stores the finetuned model or the pretrained model name(label-fully-unseen) we want to use for prediction + test_df : test dataset used for evaluation + template_type : hypothesis template type (A/B/C) + label_names : list of labels that the model will use as label space + batch_size : batch size that is used in prediction + max_length : max length of tokens that are used during tokenization of input text + """ + device = 0 if torch.cuda.is_available() else -1 + # load saved tokenizer and classifier + tokenizer = AutoTokenizer.from_pretrained(output_dir, use_fast=True, model_max_length=max_length) + classifier = pipeline("zero-shot-classification", model=output_dir, tokenizer=tokenizer, device=device) + # load test dataset + test_col = 'concatenated_text_' + template_type + if template_type=='C': + test_col = 'concatenated_text_B' # no column for C exists for the test set + test_text = test_df[test_col].values.tolist() + test_ds = Dataset.from_dict({'text': test_text }) + # get zero-shot predictions + preds_list = [] + for text, output in tqdm(zip(test_text, classifier(KeyDataset(test_ds, 'text'), batch_size=batch_size, candidate_labels=label_names, multi_label=multilabel)), + total=len(test_ds), desc="SESAR Zero Shot"): + preds_list.append(output) + if not multilabel: + # get a single predicted label + return [x['labels'][0] for x in preds_list] + else: + return get_multilabel_predictions(preds_list, THRESHOLD) + +def evaluate_classification_performance(multilabel, predicted_labels, gold_labels, gold_label_names): + target_names = None + if multilabel: + mlb = MultiLabelBinarizer() + # Fit the MultiLabelBinarizer on your labels and transform them into one-hot vectors + mlb.fit([gold_label_names]) + gold_labels = mlb.transform(gold_labels) + predicted_labels = mlb.transform(predicted_labels) + target_names = mlb.classes_ + accuracy = accuracy_score(gold_labels, predicted_labels) + report = classification_report(gold_labels, predicted_labels, target_names=target_names, output_dict=True) + logging.info(classification_report(gold_labels, predicted_labels, target_names = target_names)) + for key, score in report.items(): + if type(score)==dict: + logging.info(f"{key:<30} {score['precision']:.3f} {score['recall']:.3f} {score['f1-score']:.3f}") + logging.info(f"Accuracy : {accuracy}") + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument("--hypothesis_template_type", type=str, default='A') + parser.add_argument("--test_dataset_dir", type=str) + parser.add_argument("--eval_batch_size", type=int, default=32) + parser.add_argument("--max_length", type=int, default=256) + parser.add_argument("--multilabel", type=bool,default=True) + parser.add_argument("--depth_level", type=int,default=1) + parser.add_argument("--output_dir", type=str, default='roberta-large-mnli') + + args = parser.parse_args() + + # load dataset + test_df = pd.read_csv(args.test_dataset_dir) + test_df = test_df.fillna("") + test_df = test_df[(test_df['description_material'] != '')] + # get subset of data + #test_df = test_df.groupby('description_material').sample(n=500, random_state=42, replace=True) + logging.info("Test data size : ", test_df.shape) + + prefix = "mat:" + if args.multilabel: + label_col_name = "description_material" + # use the stored label space + gold_label_names = open('unique_multi_labels.txt').read().splitlines() + else: + # using specified depth level to restrict the label space + if args.depth_level == 1: + label_col_name = "description_material_depth_1" + elif args.depth_level == 2: + label_col_name = "description_material_depth_2" + else: + label_col_name = "description_material_depth_3" + gold_label_names = [x for x in list(set(test_df[label_col_name].values.tolist()))] # all possible gold labels + logging.info(f"Total {len(gold_label_names)} candidate labels to predict: {gold_label_names}") + # Evaluate performance + predicted_labels = get_zero_shot_predictions(args.multilabel, args.output_dir, test_df, template_type=args.hypothesis_template_type, label_names=gold_label_names, batch_size=args.eval_batch_size, max_length=args.max_length) + if args.multilabel: + test_gold_labels = [x.split("/") for x in test_df[label_col_name].values.tolist()] + else: + test_gold_labels = [x for x in test_df[label_col_name].values.tolist()] + evaluate_classification_performance(args.multilabel,predicted_labels, test_gold_labels, gold_label_names) diff --git a/SESAR/zeroshot-learning/finetune_ZTC.py b/SESAR/zeroshot-learning/finetune_ZTC.py new file mode 100644 index 0000000..c028f92 --- /dev/null +++ b/SESAR/zeroshot-learning/finetune_ZTC.py @@ -0,0 +1,136 @@ +from argparse import ArgumentParser +import pandas as pd +from transformers import (AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments) +from datasets import Dataset, DatasetDict +import logging +import torch +import os +logging.basicConfig() +logging.getLogger().setLevel(logging.INFO) +os.environ["WANDB_MODE"]="disabled" +"""## Model training""" +if torch.cuda.is_available(): + # GPU is available + # Tell PyTorch to use the GPU. + device = torch.device("cuda") + print('There are %d GPU(s) available.' % torch.cuda.device_count()) + print('We will use the GPU:', torch.cuda.get_device_name(0)) +else: + print('No GPU available, using the CPU instead.') + device = torch.device("cpu") + +def convert_dataframe_format(dataframe, template_type , config): + """ Convert dataframe into correct format for textual entailment task. + Each value in the negative_sample will be converted as a new row as result. + The labels will be also converted to a format that corrsponds to the pretrained model that is used. + Negative samples should be converted to NEUTRAL label. + + Args: + dataframe : dataframe to convert format + template_type : hypothesis template type (A/B/C) + config : model config that is used to determine the labels + """ + # positive sample + pos_column = 'concatenated_text_' + template_type + pos_text = dataframe[pos_column].values.tolist() + text = pos_text + labels = [config.label2id['ENTAILMENT']] * len(pos_text) + + # add negative sample as each row + neg_column = 'negative_sample_' + template_type + neg_text = dataframe[neg_column].values.tolist() + text += neg_text + labels += [config.label2id['NEUTRAL']] * len(neg_text) + + # generate new dataframe + data = {'text': text, 'label': labels} + df = pd.DataFrame(data) + # shuffle data + return df.sample(frac=1, random_state=42).reset_index(drop=True) + + +def create_datasets(tokenizer, train_df, dev_df, max_length): + """ Generate dataset dict that is going to be used by the trainer during finetuning """ + def tokenize(batch): + return tokenizer(batch['text'], truncation='only_first', padding='max_length', max_length= max_length) + ds = DatasetDict() + ds['train'] = Dataset.from_pandas(train_df) + ds['validation'] = Dataset.from_pandas(dev_df) + + train_ds = ds['train'].map( + tokenize, + batched=True, + ) + dev_ds = ds['validation'].map( + tokenize, + batched=True, + ) + return train_ds, dev_ds + + +def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, lr_rate, weight_decay, train_batch_size, eval_batch_size, max_length): + # load pretrained model and tokenizer + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = ( + AutoModelForSequenceClassification.from_pretrained(model_name) + ) + config = model.config + logging.info(f"Loaded pretrained model {model_name}") + # preprocess dataset for textual entailment finetuning task + train_df = convert_dataframe_format(train_df, template_type, config) + dev_df = convert_dataframe_format(dev_df, template_type, config) + + train_ds, dev_ds = create_datasets(tokenizer, train_df, dev_df, max_length) + logging.info(f"Dataset size : train - {len(train_ds)}, dev - {len(dev_ds)}") + #### CONDUCT TRAINING #### + output_dir = template_type + "_" + model_name + "_" + str(num_epochs) + "_" + str(lr_rate) + "_" + str(weight_decay) + "_" + str(train_batch_size) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + training_args = TrainingArguments( + output_dir=output_dir, + log_level='error', + num_train_epochs=num_epochs, + per_device_train_batch_size=train_batch_size, + per_device_eval_batch_size=eval_batch_size, + evaluation_strategy='epoch', + save_strategy='epoch', + warmup_steps=500, + gradient_accumulation_steps=8, # batch size * accumulation_steps = total batch size + weight_decay=weight_decay + ) + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_ds, + eval_dataset=dev_ds + ) + + trainer.train() + # store for future + trainer.save_model(output_dir) + tokenizer.save_pretrained(output_dir) + + return output_dir + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument("--hypothesis_template_type", type=str, default='A') + parser.add_argument("--model_name", type=str, default='roberta-large-mnli') + parser.add_argument("--train_data_dir", type=str, required=True) + parser.add_argument("--dev_data_dir", type=str, required=True) + parser.add_argument("--num_epochs", type=int, default=1) + parser.add_argument("--lr_rate", type=float, default=2e-5) + parser.add_argument("--train_batch_size", type=int, default=16) + parser.add_argument("--eval_batch_size", type=int, default=16) + parser.add_argument("--weight_decay", type=float, default=0.01) + parser.add_argument("--max_length", type=int, default=512) + args = parser.parse_args() + + # load dataset + dev_df = pd.read_csv(args.dev_data_dir) + train_df = pd.read_csv(args.train_data_dir) + # finetune the textual entailment model on the dataset + output_dir = finetune_ZTC_model(train_df, dev_df, model_name=args.model_name, template_type=args.hypothesis_template_type,num_epochs=args.num_epochs, lr_rate=args.lr_rate, train_batch_size=args.train_batch_size, eval_batch_size=args.eval_batch_size,weight_decay=args.weight_decay,max_length=args.max_length) + logging.info(f"Saved finetuned model in {output_dir}") diff --git a/SESAR/zeroshot-learning/unique_multi_labels.txt b/SESAR/zeroshot-learning/unique_multi_labels.txt new file mode 100644 index 0000000..6a7432d --- /dev/null +++ b/SESAR/zeroshot-learning/unique_multi_labels.txt @@ -0,0 +1,101 @@ +fragmental igneous rock +clastic sediment +metamorphic rock +tephritoid +particulate +intermediate composition igneous rock +anthropogenic material +hornblendite +mineral +fault related material +impact generated material +sediment +iron rich sedimentary rock +non clastic siliceous sediment +anorthositic rock +sedimentary rock +dioritoid +basic igneous rock +liquid water +diamicton +foiditoid +gabbroic rock +coal +fine grained igneous rock +dacite +foid syenitoid +non clastic siliceous sedimentary rock +trachytoid +clastic sedimentary rock +phonolitoid +peridotite +gabbroid +breccia gouge series +syenitoid +igneous rock +mineral-silicate or germanate +residual material +pyroclastic rock +porphyry +chemical sedimentary material +foid gabbroid +anthropogenic metal material +charcoal +carbonate sedimentary rock +granitoid +hybrid sedimentary rock +andesite +generic sandstone +gravel size sediment +natural solid material +metasomatic rock +ultramafic igneous rock +generic conglomerate +phaneritic igneous rock +alkali feldspar granite +tonalite +soil +massive sulphide +pyroxenite +cataclasite series +ceramic clay +quartz rich igneous rock +gaseous material +mineral-phosphate, arsenate, or vanadate +diamictite +mineral-oxide +sand size sediment +mineral-organic compound +mylonitic rock +tuffite +generic mudstone +rock +organic material +exotic composition igneous rock +glass +rhyolitoid +high magnesium fine grained igneous rock +glass rich igneous rock +aphanite +granite +mineral-carbonate or nitrate +carbonate sediment +acidic igneous rock +organic rich sedimentary rock +mineral-sulfate, selenate, or tellurate +basalt +fluid material +breccia +tephra +doleritic rock +biogenic non-organic material +mineral-halide +mineral-sulfide or sulfosalt +pegmatite +foidolite +biogenic sediment +any anthropogenic material +mineral-borate +mud size sediment +granodiorite +mineral-native element From 6b85e58d4b94a9932abaa215707ee903aa8236c0 Mon Sep 17 00:00:00 2001 From: hyunssong Date: Fri, 13 Oct 2023 14:11:23 -0700 Subject: [PATCH 2/7] Update for finetuning roberta-mnli on SESAR dataset as multilabel --- .../hyperparam_search_ZTC.py | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 SESAR/zeroshot-learning/hyperparam_search_ZTC.py diff --git a/SESAR/zeroshot-learning/hyperparam_search_ZTC.py b/SESAR/zeroshot-learning/hyperparam_search_ZTC.py new file mode 100644 index 0000000..fcf1d07 --- /dev/null +++ b/SESAR/zeroshot-learning/hyperparam_search_ZTC.py @@ -0,0 +1,150 @@ +from argparse import ArgumentParser +import pandas as pd +from transformers import (AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments) +from datasets import Dataset, DatasetDict +import logging +import torch +import os +import numpy as np +from datasets import load_metric +logging.basicConfig() +logging.getLogger().setLevel(logging.INFO) +os.environ["WANDB_MODE"]="disabled" + +"""## Model training""" +if torch.cuda.is_available(): + # GPU is available + # Tell PyTorch to use the GPU. + device = torch.device("cuda") + print('There are %d GPU(s) available.' % torch.cuda.device_count()) + print('We will use the GPU:', torch.cuda.get_device_name(0)) +else: + print('No GPU available, using the CPU instead.') + device = torch.device("cpu") + +def convert_dataframe_format(dataframe, template_type , config): + """ Convert dataframe into correct format for textual entailment task. + Each value in the negative_sample will be converted as a new row as result. + The labels will be also converted to a format that corrsponds to the pretrained model that is used. + Negative samples should be converted to NEUTRAL label. + + Args: + dataframe : dataframe to convert format + template_type : hypothesis template type (A/B/C) + config : model config that is used to determine the labels + """ + # positive sample + pos_column = 'concatenated_text_' + template_type + pos_text = dataframe[pos_column].values.tolist() + text = pos_text + labels = [config.label2id['ENTAILMENT']] * len(pos_text) + + # add negative sample as each row + neg_column = 'negative_sample_' + template_type + neg_text = dataframe[neg_column].values.tolist() + text += neg_text + labels += [config.label2id['NEUTRAL']] * len(neg_text) + + # generate new dataframe + data = {'text': text, 'label': labels} + df = pd.DataFrame(data) + # shuffle data + return df.sample(frac=1, random_state=42).reset_index(drop=True) + + +def create_datasets(tokenizer, train_df, dev_df, max_length): + """ Generate dataset dict that is going to be used by the trainer during finetuning """ + def tokenize(batch): + return tokenizer(batch['text'], truncation='only_first', padding='max_length', max_length= max_length) + ds = DatasetDict() + ds['train'] = Dataset.from_pandas(train_df) + ds['validation'] = Dataset.from_pandas(dev_df) + + train_ds = ds['train'].map( + tokenize, + batched=True, + ) + dev_ds = ds['validation'].map( + tokenize, + batched=True, + ) + return train_ds, dev_ds + +def compute_metrics(eval_pred): + metric = load_metric('glue', 'rte') # textual entailment task + predictions, labels = eval_pred + predictions = np.argmax(predictions, axis=-1) + return metric.compute(predictions=predictions, references=labels) + + +def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, lr_rate, weight_decay, train_batch_size, eval_batch_size, max_length): + # load pretrained model and tokenizer + tokenizer = AutoTokenizer.from_pretrained(model_name) + # model is finetuned on our domain specific NLI data + num_labels = 3 # entailment or neutral + model = ( + AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = num_labels) + ) + config = model.config + logging.info(f"Loaded pretrained model {model_name}") + # preprocess dataset for textual entailment finetuning task + train_df = convert_dataframe_format(train_df, template_type, config) + dev_df = convert_dataframe_format(dev_df, template_type, config) + + train_ds, dev_ds = create_datasets(tokenizer, train_df, dev_df, max_length) + logging.info(f"Dataset size : train - {len(train_ds)}, dev - {len(dev_ds)}") + #### CONDUCT TRAINING #### + output_dir = template_type + "_" + model_name + "_" + str(num_epochs) + "_" + str(lr_rate) + "_" + str(weight_decay) + "_" + str(train_batch_size) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + metric_name = "accuracy" + training_args = TrainingArguments( + output_dir=output_dir, + log_level='error', + num_train_epochs=num_epochs, + per_device_train_batch_size=train_batch_size, + per_device_eval_batch_size=eval_batch_size, + evaluation_strategy='epoch', + save_strategy='epoch', + warmup_steps=500, + gradient_accumulation_steps=8, # batch size * accumulation_steps = total batch size + weight_decay=weight_decay, + load_best_model_at_end=True, + metric_for_best_model=metric_name, + ) + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_ds, + eval_dataset=dev_ds, + compute_metrics=compute_metrics + ) + + trainer.train() + # store for future + trainer.save_model(output_dir) + tokenizer.save_pretrained(output_dir) + + return output_dir + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument("--hypothesis_template_type", type=str, default='A') + parser.add_argument("--model_name", type=str, default='roberta-large-mnli') + parser.add_argument("--train_data_dir", type=str, required=True) + parser.add_argument("--dev_data_dir", type=str, required=True) + parser.add_argument("--num_epochs", type=int, default=1) + parser.add_argument("--lr_rate", type=float, default=2e-5) + parser.add_argument("--train_batch_size", type=int, default=16) + parser.add_argument("--eval_batch_size", type=int, default=16) + parser.add_argument("--weight_decay", type=float, default=0.01) + parser.add_argument("--max_length", type=int, default=512) + args = parser.parse_args() + + # load dataset + dev_df = pd.read_csv(args.dev_data_dir) + train_df = pd.read_csv(args.train_data_dir) + # finetune the textual entailment model on the dataset + output_dir = finetune_ZTC_model(train_df, dev_df, model_name=args.model_name, template_type=args.hypothesis_template_type,num_epochs=args.num_epochs, lr_rate=args.lr_rate, train_batch_size=args.train_batch_size, eval_batch_size=args.eval_batch_size,weight_decay=args.weight_decay,max_length=args.max_length) + logging.info(f"Saved finetuned model in {output_dir}") From 7c492d40d91da34ae4c3cad036904a6bbf3cf74c Mon Sep 17 00:00:00 2001 From: hyunssong Date: Fri, 13 Oct 2023 14:11:23 -0700 Subject: [PATCH 3/7] Update for finetuning roberta-mnli on SESAR dataset as multilabel - Add hyperparameter tuning script for finetune hyperparam --- .../hyperparam_search_ZTC.py | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 SESAR/zeroshot-learning/hyperparam_search_ZTC.py diff --git a/SESAR/zeroshot-learning/hyperparam_search_ZTC.py b/SESAR/zeroshot-learning/hyperparam_search_ZTC.py new file mode 100644 index 0000000..fcf1d07 --- /dev/null +++ b/SESAR/zeroshot-learning/hyperparam_search_ZTC.py @@ -0,0 +1,150 @@ +from argparse import ArgumentParser +import pandas as pd +from transformers import (AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments) +from datasets import Dataset, DatasetDict +import logging +import torch +import os +import numpy as np +from datasets import load_metric +logging.basicConfig() +logging.getLogger().setLevel(logging.INFO) +os.environ["WANDB_MODE"]="disabled" + +"""## Model training""" +if torch.cuda.is_available(): + # GPU is available + # Tell PyTorch to use the GPU. + device = torch.device("cuda") + print('There are %d GPU(s) available.' % torch.cuda.device_count()) + print('We will use the GPU:', torch.cuda.get_device_name(0)) +else: + print('No GPU available, using the CPU instead.') + device = torch.device("cpu") + +def convert_dataframe_format(dataframe, template_type , config): + """ Convert dataframe into correct format for textual entailment task. + Each value in the negative_sample will be converted as a new row as result. + The labels will be also converted to a format that corrsponds to the pretrained model that is used. + Negative samples should be converted to NEUTRAL label. + + Args: + dataframe : dataframe to convert format + template_type : hypothesis template type (A/B/C) + config : model config that is used to determine the labels + """ + # positive sample + pos_column = 'concatenated_text_' + template_type + pos_text = dataframe[pos_column].values.tolist() + text = pos_text + labels = [config.label2id['ENTAILMENT']] * len(pos_text) + + # add negative sample as each row + neg_column = 'negative_sample_' + template_type + neg_text = dataframe[neg_column].values.tolist() + text += neg_text + labels += [config.label2id['NEUTRAL']] * len(neg_text) + + # generate new dataframe + data = {'text': text, 'label': labels} + df = pd.DataFrame(data) + # shuffle data + return df.sample(frac=1, random_state=42).reset_index(drop=True) + + +def create_datasets(tokenizer, train_df, dev_df, max_length): + """ Generate dataset dict that is going to be used by the trainer during finetuning """ + def tokenize(batch): + return tokenizer(batch['text'], truncation='only_first', padding='max_length', max_length= max_length) + ds = DatasetDict() + ds['train'] = Dataset.from_pandas(train_df) + ds['validation'] = Dataset.from_pandas(dev_df) + + train_ds = ds['train'].map( + tokenize, + batched=True, + ) + dev_ds = ds['validation'].map( + tokenize, + batched=True, + ) + return train_ds, dev_ds + +def compute_metrics(eval_pred): + metric = load_metric('glue', 'rte') # textual entailment task + predictions, labels = eval_pred + predictions = np.argmax(predictions, axis=-1) + return metric.compute(predictions=predictions, references=labels) + + +def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, lr_rate, weight_decay, train_batch_size, eval_batch_size, max_length): + # load pretrained model and tokenizer + tokenizer = AutoTokenizer.from_pretrained(model_name) + # model is finetuned on our domain specific NLI data + num_labels = 3 # entailment or neutral + model = ( + AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = num_labels) + ) + config = model.config + logging.info(f"Loaded pretrained model {model_name}") + # preprocess dataset for textual entailment finetuning task + train_df = convert_dataframe_format(train_df, template_type, config) + dev_df = convert_dataframe_format(dev_df, template_type, config) + + train_ds, dev_ds = create_datasets(tokenizer, train_df, dev_df, max_length) + logging.info(f"Dataset size : train - {len(train_ds)}, dev - {len(dev_ds)}") + #### CONDUCT TRAINING #### + output_dir = template_type + "_" + model_name + "_" + str(num_epochs) + "_" + str(lr_rate) + "_" + str(weight_decay) + "_" + str(train_batch_size) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + metric_name = "accuracy" + training_args = TrainingArguments( + output_dir=output_dir, + log_level='error', + num_train_epochs=num_epochs, + per_device_train_batch_size=train_batch_size, + per_device_eval_batch_size=eval_batch_size, + evaluation_strategy='epoch', + save_strategy='epoch', + warmup_steps=500, + gradient_accumulation_steps=8, # batch size * accumulation_steps = total batch size + weight_decay=weight_decay, + load_best_model_at_end=True, + metric_for_best_model=metric_name, + ) + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_ds, + eval_dataset=dev_ds, + compute_metrics=compute_metrics + ) + + trainer.train() + # store for future + trainer.save_model(output_dir) + tokenizer.save_pretrained(output_dir) + + return output_dir + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument("--hypothesis_template_type", type=str, default='A') + parser.add_argument("--model_name", type=str, default='roberta-large-mnli') + parser.add_argument("--train_data_dir", type=str, required=True) + parser.add_argument("--dev_data_dir", type=str, required=True) + parser.add_argument("--num_epochs", type=int, default=1) + parser.add_argument("--lr_rate", type=float, default=2e-5) + parser.add_argument("--train_batch_size", type=int, default=16) + parser.add_argument("--eval_batch_size", type=int, default=16) + parser.add_argument("--weight_decay", type=float, default=0.01) + parser.add_argument("--max_length", type=int, default=512) + args = parser.parse_args() + + # load dataset + dev_df = pd.read_csv(args.dev_data_dir) + train_df = pd.read_csv(args.train_data_dir) + # finetune the textual entailment model on the dataset + output_dir = finetune_ZTC_model(train_df, dev_df, model_name=args.model_name, template_type=args.hypothesis_template_type,num_epochs=args.num_epochs, lr_rate=args.lr_rate, train_batch_size=args.train_batch_size, eval_batch_size=args.eval_batch_size,weight_decay=args.weight_decay,max_length=args.max_length) + logging.info(f"Saved finetuned model in {output_dir}") From 55d4ed748bb34f6e9aa744d6ff087974c07d3d4a Mon Sep 17 00:00:00 2001 From: hyunssong Date: Fri, 13 Oct 2023 14:15:10 -0700 Subject: [PATCH 4/7] Update configs to finetune for textual entailment task --- SESAR/zeroshot-learning/finetune_ZTC.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/SESAR/zeroshot-learning/finetune_ZTC.py b/SESAR/zeroshot-learning/finetune_ZTC.py index c028f92..fcf1d07 100644 --- a/SESAR/zeroshot-learning/finetune_ZTC.py +++ b/SESAR/zeroshot-learning/finetune_ZTC.py @@ -5,9 +5,12 @@ import logging import torch import os +import numpy as np +from datasets import load_metric logging.basicConfig() logging.getLogger().setLevel(logging.INFO) os.environ["WANDB_MODE"]="disabled" + """## Model training""" if torch.cuda.is_available(): # GPU is available @@ -67,12 +70,20 @@ def tokenize(batch): ) return train_ds, dev_ds +def compute_metrics(eval_pred): + metric = load_metric('glue', 'rte') # textual entailment task + predictions, labels = eval_pred + predictions = np.argmax(predictions, axis=-1) + return metric.compute(predictions=predictions, references=labels) + def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, lr_rate, weight_decay, train_batch_size, eval_batch_size, max_length): # load pretrained model and tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) + # model is finetuned on our domain specific NLI data + num_labels = 3 # entailment or neutral model = ( - AutoModelForSequenceClassification.from_pretrained(model_name) + AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = num_labels) ) config = model.config logging.info(f"Loaded pretrained model {model_name}") @@ -86,7 +97,7 @@ def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, output_dir = template_type + "_" + model_name + "_" + str(num_epochs) + "_" + str(lr_rate) + "_" + str(weight_decay) + "_" + str(train_batch_size) if not os.path.exists(output_dir): os.makedirs(output_dir) - + metric_name = "accuracy" training_args = TrainingArguments( output_dir=output_dir, log_level='error', @@ -97,14 +108,17 @@ def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, save_strategy='epoch', warmup_steps=500, gradient_accumulation_steps=8, # batch size * accumulation_steps = total batch size - weight_decay=weight_decay + weight_decay=weight_decay, + load_best_model_at_end=True, + metric_for_best_model=metric_name, ) trainer = Trainer( model=model, args=training_args, train_dataset=train_ds, - eval_dataset=dev_ds + eval_dataset=dev_ds, + compute_metrics=compute_metrics ) trainer.train() From 8d37a133b7edacafc5d0e9b8bf1799848ae06b20 Mon Sep 17 00:00:00 2001 From: hyunssong Date: Tue, 17 Oct 2023 09:50:16 -0700 Subject: [PATCH 5/7] Multilabel classification work - Add jupyter notebook of data preprocessing to convert SESAR to multilabel training format - Update scripts of training and evaluation for multilabel - Update google drive of datasets --- .../Create-Multi-Label-Mapping.ipynb | 3207 ++++++ SESAR/zeroshot-learning/README.md | 8 +- ...ntailment Data Generation-MultiLabel.ipynb | 9821 +++++++++++++++++ SESAR/zeroshot-learning/evaluate.py | 18 +- SESAR/zeroshot-learning/finetune_ZTC.py | 4 +- .../total_unique_multi_labels.txt | 101 + .../total_unique_multi_labels_wo_leaf.txt | 27 + 7 files changed, 13179 insertions(+), 7 deletions(-) create mode 100644 SESAR/zeroshot-learning/Create-Multi-Label-Mapping.ipynb create mode 100644 SESAR/zeroshot-learning/Text Entailment Data Generation-MultiLabel.ipynb create mode 100644 SESAR/zeroshot-learning/total_unique_multi_labels.txt create mode 100644 SESAR/zeroshot-learning/total_unique_multi_labels_wo_leaf.txt diff --git a/SESAR/zeroshot-learning/Create-Multi-Label-Mapping.ipynb b/SESAR/zeroshot-learning/Create-Multi-Label-Mapping.ipynb new file mode 100644 index 0000000..483cdc3 --- /dev/null +++ b/SESAR/zeroshot-learning/Create-Multi-Label-Mapping.ipynb @@ -0,0 +1,3207 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "aa2ab7e4", + "metadata": {}, + "source": [ + "## Generate a mapping for multilabel training\n", + "### Given a label, convert it to a list of parent labels" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4ec99895", + "metadata": {}, + "outputs": [], + "source": [ + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9f8f7a7b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'https://w3id.org/isample/vocabulary/material/0.9/material': {'label': {'en': 'Material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/material/0.9/anyanthropogenicmaterial': {'label': {'en': 'Any anthropogenic material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/material/0.9/anthropogenicmetal': {'label': {'en': 'Anthropogenic metal material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/brass': {'label': {'en': 'brass'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/bronze': {'label': {'en': 'bronze'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/copper': {'label': {'en': 'Copper'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/gold': {'label': {'en': 'Gold'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/iron': {'label': {'en': 'iron'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/lead': {'label': {'en': 'lead'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/pewter': {'label': {'en': 'pewter'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/otheranthropogenicmaterial': {'label': {'en': 'Anthropogenic material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/anthropogenicorganicmaterial': {'label': {'en': 'Anthropogenic organic material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/plastic': {'label': {'en': 'Plastic (material)'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/ceramicclay': {'label': {'en': 'Ceramic clay'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/brickclay': {'label': {'en': 'Brick clay'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/bucchero': {'label': {'en': 'bucchero'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/faience': {'label': {'en': 'faience'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/porcelain': {'label': {'en': 'Porcelain'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/terracotta': {'label': {'en': 'Terracotta'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/terrasigilata': {'label': {'en': 'Terra sigilata'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/fibermaterial': {'label': {'en': 'Fiber material'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/glass': {'label': {'en': 'Glass'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/paper': {'label': {'en': 'Paper'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/plaster': {'label': {'en': 'plaster'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/plasterormortar': {'label': {'en': 'Plaster or Mortar'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/rubber': {'label': {'en': 'Rubber'},\n", + " 'children': []}}]}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/anyice': {'label': {'en': 'Any ice'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/material/0.9/waterice': {'label': {'en': 'Frozen water'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/biogenicnonorganicmaterial': {'label': {'en': 'Biogenic non-organic material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/amber': {'label': {'en': 'amber'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/bone': {'label': {'en': 'bone'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/charcoal': {'label': {'en': 'charcoal'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/coal': {'label': {'en': 'coal'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/shell': {'label': {'en': 'Shell'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/dispersedmedia': {'label': {'en': 'Dispersed media'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/earthmaterial': {'label': {'en': 'Natural Solid Material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/material/0.9/mineral': {'label': {'en': 'Mineral'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/hematite': {'label': {'en': 'Hematite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/kaolin': {'label': {'en': 'kaolin'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/mica': {'label': {'en': 'mica'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/quartz': {'label': {'en': 'quartz'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/boratemineral': {'label': {'en': 'Mineral-Borate'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/carbonatenitratemineral': {'label': {'en': 'Mineral-Carbonate or Nitrate'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/halidemineral': {'label': {'en': 'Mineral-Halide'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/nativeelementmineral': {'label': {'en': 'Mineral-Native Element'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/organicmineral': {'label': {'en': 'Mineral-Organic Compound'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/oxidemineral': {'label': {'en': 'Mineral-Oxide'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/phosphatearsenatevanadatemineral': {'label': {'en': 'Mineral-Phosphate, Arsenate, or Vanadate'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/silicategermanatemineral': {'label': {'en': 'Mineral-Silicate or Germanate'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/sulfateselenatetelluratemineral': {'label': {'en': 'Mineral-Sulfate, Selenate, or Tellurate'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/mingroup/0.9/sulfidesulfosaltmineral': {'label': {'en': 'Mineral-Sulfide or Sulfosalt'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/mixedsoilsedimentrock': {'label': {'en': 'Mixed soil sediment or rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/particulate': {'label': {'en': 'Particulate'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/cinder': {'label': {'en': 'Cinder'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/rockorsediment': {'label': {'en': 'Rock or sediment'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/material/0.9/rock': {'label': {'en': 'Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/basalt': {'label': {'en': 'Basalt'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/chert': {'label': {'en': 'Chert'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/flint': {'label': {'en': 'flint'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/cinder': {'label': {'en': 'Cinder'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/coal': {'label': {'en': 'coal'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/dolomite': {'label': {'en': 'dolomite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/gabbro': {'label': {'en': 'gabbro'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/greywacke': {'label': {'en': 'greywacke'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/limestone': {'label': {'en': 'limestone'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/marble': {'label': {'en': 'marble'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/obsidian': {'label': {'en': 'obsidian'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/pumice': {'label': {'en': 'pumice'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/slate': {'label': {'en': 'Slate'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/travertine': {'label': {'en': 'Travertine'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Aphanite': {'label': {'en': 'rksd:Aphanite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Breccia': {'label': {'en': 'rksd:Breccia'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Fault_Related_Material': {'label': {'en': 'rksd:Fault_Related_Material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Cataclasite_Series': {'label': {'en': 'rksd:Cataclasite_Series'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Mylonitic_Rock': {'label': {'en': 'rksd:Mylonitic_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/breccia_gouge_series': {'label': {'en': 'rksd:breccia_gouge_series'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Fragmental_Igneous_Rock': {'label': {'en': 'rksd:Fragmental_Igneous_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Pyroclastic_Rock': {'label': {'en': 'rksd:Pyroclastic_Rock'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Igneous_Rock': {'label': {'en': 'rksd:Igneous_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Acidic_Igneous_Rock': {'label': {'en': 'rksd:Acidic_Igneous_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Dacite': {'label': {'en': 'rksd:Dacite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Granitoid': {'label': {'en': 'rksd:Granitoid'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Alkali_Feldspar_Granite': {'label': {'en': 'rksd:Alkali_Feldspar_Granite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Granite': {'label': {'en': 'rksd:Granite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Granodiorite': {'label': {'en': 'rksd:Granodiorite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Tonalite': {'label': {'en': 'rksd:Tonalite'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Quartz_Rich_Igneous_Rock': {'label': {'en': 'rksd:Quartz_Rich_Igneous_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Rhyolitoid': {'label': {'en': 'rksd:Rhyolitoid'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Basic_Igneous_Rock': {'label': {'en': 'rksd:Basic_Igneous_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Basalt': {'label': {'en': 'rksd:Basalt'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Gabbroic_Rock': {'label': {'en': 'rksd:Gabbroic_Rock'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Doleritic_Rock': {'label': {'en': 'rksd:Doleritic_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Exotic_Composition_Igneous_Rock': {'label': {'en': 'rksd:Exotic_Composition_Igneous_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Fine_Grained_Igneous_Rock': {'label': {'en': 'rksd:Fine_Grained_Igneous_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Andesite': {'label': {'en': 'rksd:Andesite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Basalt': {'label': {'en': 'rksd:Basalt'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Dacite': {'label': {'en': 'rksd:Dacite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Foiditoid': {'label': {'en': 'rksd:Foiditoid'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/High_Magnesium_Fine_Grained_Igneous_Rock': {'label': {'en': 'rksd:High_Magnesium_Fine_Grained_Igneous_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Phonolitoid': {'label': {'en': 'rksd:Phonolitoid'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Rhyolitoid': {'label': {'en': 'rksd:Rhyolitoid'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Tephritoid': {'label': {'en': 'rksd:Tephritoid'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Trachytoid': {'label': {'en': 'rksd:Trachytoid'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Fragmental_Igneous_Rock': {'label': {'en': 'rksd:Fragmental_Igneous_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Pyroclastic_Rock': {'label': {'en': 'rksd:Pyroclastic_Rock'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Glass_Rich_Igneous_Rock': {'label': {'en': 'rksd:Glass_Rich_Igneous_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Hypabyssal_Intrusive_Rock': {'label': {'en': 'rksd:Hypabyssal_Intrusive_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Intermediate_Composition_Igneous_Rock': {'label': {'en': 'rksd:Intermediate_Composition_Igneous_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Andesite': {'label': {'en': 'rksd:Andesite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Dioritoid': {'label': {'en': 'rksd:Dioritoid'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Phaneritic_Igneous_Rock': {'label': {'en': 'rksd:Phaneritic_Igneous_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Anorthositic_Rock': {'label': {'en': 'rksd:Anorthositic_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Aplite': {'label': {'en': 'rksd:Aplite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Dioritoid': {'label': {'en': 'rksd:Dioritoid'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Foid_Dioritoid': {'label': {'en': 'rksd:Foid_Dioritoid'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Foid_Gabbroid': {'label': {'en': 'rksd:Foid_Gabbroid'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Foid_Syenitoid': {'label': {'en': 'rksd:Foid_Syenitoid'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Foidolite': {'label': {'en': 'rksd:Foidolite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Gabbroid': {'label': {'en': 'rksd:Gabbroid'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Gabbroic_Rock': {'label': {'en': 'rksd:Gabbroic_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Monzogabbroic_Rock': {'label': {'en': 'rksd:Monzogabbroic_Rock'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Granitoid': {'label': {'en': 'rksd:Granitoid'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Alkali_Feldspar_Granite': {'label': {'en': 'rksd:Alkali_Feldspar_Granite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Granite': {'label': {'en': 'rksd:Granite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Granodiorite': {'label': {'en': 'rksd:Granodiorite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Tonalite': {'label': {'en': 'rksd:Tonalite'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Hornblendite': {'label': {'en': 'rksd:Hornblendite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Pegmatite': {'label': {'en': 'rksd:Pegmatite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Peridotite': {'label': {'en': 'rksd:Peridotite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Pyroxenite': {'label': {'en': 'rksd:Pyroxenite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Quartz_Rich_Igneous_Rock': {'label': {'en': 'rksd:Quartz_Rich_Igneous_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Syenitoid': {'label': {'en': 'rksd:Syenitoid'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Plutonic_Igneous_Rock': {'label': {'en': 'rksd:Plutonic_Igneous_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Porphyry': {'label': {'en': 'rksd:Porphyry'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Ultrabasic_Igneous_Rock': {'label': {'en': 'rksd:Ultrabasic_Igneous_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Ultramafic_Igneous_Rock': {'label': {'en': 'rksd:Ultramafic_Igneous_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Hornblendite': {'label': {'en': 'rksd:Hornblendite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Peridotite': {'label': {'en': 'rksd:Peridotite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Pyroxenite': {'label': {'en': 'rksd:Pyroxenite'},\n", + " 'children': []}}]}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Impact_Generated_Material': {'label': {'en': 'rksd:Impact_Generated_Material'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Massive_Sulphide': {'label': {'en': 'rksd:Massive_Sulphide'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Metamorphic_Rock': {'label': {'en': 'rksd:Metamorphic_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Metasomatic_Rock': {'label': {'en': 'rksd:Metasomatic_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Sedimentary_Rock': {'label': {'en': 'rksd:Sedimentary_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Carbonate_Sedimentary_Rock': {'label': {'en': 'rksd:Carbonate_Sedimentary_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Clastic_Sedimentary_Rock': {'label': {'en': 'rksd:Clastic_Sedimentary_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Diamictite': {'label': {'en': 'rksd:Diamictite'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Generic_Conglomerate': {'label': {'en': 'rksd:Generic_Conglomerate'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Generic_Mudstone': {'label': {'en': 'rksd:Generic_Mudstone'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Generic_Sandstone': {'label': {'en': 'rksd:Generic_Sandstone'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Hybrid_Sedimentary_Rock': {'label': {'en': 'rksd:Hybrid_Sedimentary_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Iron_Rich_Sedimentary_Rock': {'label': {'en': 'rksd:Iron_Rich_Sedimentary_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Non_Clastic_Siliceous_Sedimentary_Rock': {'label': {'en': 'rksd:Non_Clastic_Siliceous_Sedimentary_Rock'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Organic_Rich_Sedimentary_Rock': {'label': {'en': 'rksd:Organic_Rich_Sedimentary_Rock'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Coal': {'label': {'en': 'rksd:Coal'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Phosphorite': {'label': {'en': 'rksd:Phosphorite'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Tuffite': {'label': {'en': 'rksd:Tuffite'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/residual_material': {'label': {'en': 'rksd:residual_material'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/sediment': {'label': {'en': 'Sediment'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Biogenic_Sediment': {'label': {'en': 'rksd:Biogenic_Sediment'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Carbonate_Sediment': {'label': {'en': 'rksd:Carbonate_Sediment'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Chemical_Sedimentary_Material': {'label': {'en': 'rksd:Chemical_Sedimentary_Material'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Clastic_Sediment': {'label': {'en': 'rksd:Clastic_Sediment'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/rocksediment/0.9/Diamicton': {'label': {'en': 'rksd:Diamicton'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Gravel_Size_Sediment': {'label': {'en': 'rksd:Gravel_Size_Sediment'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Hybrid_Sediment': {'label': {'en': 'rksd:Hybrid_Sediment'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Iron_Rich_Sediment': {'label': {'en': 'rksd:Iron_Rich_Sediment'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Mud_Size_Sediment': {'label': {'en': 'rksd:Mud_Size_Sediment'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Non_Clastic_Siliceous_Sediment': {'label': {'en': 'rksd:Non_Clastic_Siliceous_Sediment'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Phosphate_Rich_Sediment': {'label': {'en': 'rksd:Phosphate_Rich_Sediment'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Sand_Size_Sediment': {'label': {'en': 'rksd:Sand_Size_Sediment'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/rocksediment/0.9/Tephra': {'label': {'en': 'rksd:Tephra'},\n", + " 'children': []}}]}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/soil': {'label': {'en': 'Soil'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/fluid': {'label': {'en': 'Fluid material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/material/0.9/gas': {'label': {'en': 'Gaseous material'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/liquidwater': {'label': {'en': 'Liquid water'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/nonaqueousliquid': {'label': {'en': 'Non-aqueous liquid material'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/material/0.9/organicmaterial': {'label': {'en': 'Organic material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/anthropogenicorganicmaterial': {'label': {'en': 'Anthropogenic organic material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/plastic': {'label': {'en': 'Plastic (material)'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/organicanimalmaterial': {'label': {'en': 'Organic animal material'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/organicanimalproduct': {'label': {'en': 'Organic animal product'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/hair': {'label': {'en': 'Hair'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/leather': {'label': {'en': 'Leather'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/organicplantmaterial': {'label': {'en': 'Organic plant material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/wood': {'label': {'en': 'Wood'},\n", + " 'children': []}}]}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/plantmaterial': {'label': {'en': 'Plant Material'},\n", + " 'children': [{'https://w3id.org/isample/vocabulary/opencontext/material/0.1/plantfiber': {'label': {'en': 'Plant fiber'},\n", + " 'children': []}},\n", + " {'https://w3id.org/isample/vocabulary/opencontext/material/0.1/wood': {'label': {'en': 'Wood'},\n", + " 'children': []}}]}}]}}]}}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Read Hierarchy json and convert it into tree\n", + "with open('./datasets/material_hierarchy.json', 'r') as f:\n", + " data = json.load(f)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fae79ae9", + "metadata": {}, + "outputs": [], + "source": [ + "# space out multi-word labels or ones that are not mapped directly \n", + "spaced_map = {\n", + " \"carbonatenitratemineral\": \"Mineral-Carbonate or Nitrate\",\n", + " \"organicmaterial\": \"Organic material\",\n", + " \"otheranthropogenicmaterial\" : \"Anthropogenic material\",\n", + " \"boratemineral\": \"Mineral-Borate\",\n", + " \"liquidwater\": \"Liquid water\",\n", + " \"rockorsediment\":\"Rock/sediment\",\n", + " \"mixedsoilsedimentrock\" : \"Soil/sediment/rock\",\n", + " \"anthropogenicmetal\":\"Anthropogenic metal material\",\n", + " \"biogenicnonorganicmaterial\":\"Biogenic non-organic material\",\n", + " \"sulfateselenatetelluratemineral\": \"Mineral-Sulfate, Selenate, or Tellurate\",\n", + " \"ceramicclay\": \"Ceramic clay\",\n", + " \"oxidemineral\":\"Mineral-Oxide\",\n", + " \"phosphatearsenatevanadatemineral\":\"Mineral-Phosphate, Arsenate, or Vanadate\",\n", + " \"silicategermanatemineral\":\"Mineral-Silicate or Germanate\",\n", + " \"sulfidesulfosaltmineral\":\"Mineral-Sulfide or Sulfosalt\",\n", + " \"nativeelementmineral\":\"Mineral-Native Element\",\n", + " \"halidemineral\":\"Mineral-Halide\",\n", + " \"organicmineral\":\"Mineral-Organic Compound\",\n", + " \"gas\":\"gaseous material\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ad9e2ef0", + "metadata": {}, + "outputs": [], + "source": [ + "parent = {} # store parent of key" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "89731d24", + "metadata": {}, + "outputs": [], + "source": [ + "prefixes = [\"rksd:\",\"mat:\",\"ming:\",\"ocmat:\"]\n", + "def process_label(value):\n", + " if value is None:\n", + " return value\n", + " value = value.lower().replace(\"_\",\" \")\n", + " for prefix in prefixes:\n", + " if value.startswith(prefix):\n", + " value = value[len(prefix):].lower()\n", + " if value in spaced_map:\n", + " value = spaced_map[value]\n", + " break\n", + " return value" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "07d4883e", + "metadata": {}, + "outputs": [], + "source": [ + "leaf_labels = [] # list of labels that are leaf in the hierarchy\n", + "def updateParent(dic, key, label, parentLabel):\n", + " \n", + " # store the parent label as parent\n", + " parentLabel = process_label(parentLabel)\n", + " label = process_label(label)\n", + " \n", + " parent[label] = parentLabel\n", + " \n", + " # store if no children exists\n", + " if len(dic[key][\"children\"]) == 0:\n", + " leaf_labels.append(label)\n", + " \n", + " # recurse\n", + " for child in dic[key][\"children\"]:\n", + "\n", + " for key, value in child.items():\n", + " childKey = key\n", + " childLabel = value[\"label\"][\"en\"].lower()\n", + " updateParent(child,childKey, childLabel, label)\n", + "\n", + "for key in data:\n", + " updateParent(data, key, data[key][\"label\"][\"en\"], None)\n", + " break\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "eabaf50d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'material': None,\n", + " 'any anthropogenic material': 'material',\n", + " 'anthropogenic metal material': 'any anthropogenic material',\n", + " 'brass': 'anthropogenic metal material',\n", + " 'bronze': 'anthropogenic metal material',\n", + " 'copper': 'anthropogenic metal material',\n", + " 'gold': 'anthropogenic metal material',\n", + " 'iron': 'anthropogenic metal material',\n", + " 'lead': 'anthropogenic metal material',\n", + " 'pewter': 'anthropogenic metal material',\n", + " 'anthropogenic material': 'any anthropogenic material',\n", + " 'anthropogenic organic material': 'organic material',\n", + " 'plastic (material)': 'anthropogenic organic material',\n", + " 'ceramic clay': 'anthropogenic material',\n", + " 'brick clay': 'ceramic clay',\n", + " 'bucchero': 'ceramic clay',\n", + " 'faience': 'ceramic clay',\n", + " 'porcelain': 'ceramic clay',\n", + " 'terracotta': 'ceramic clay',\n", + " 'terra sigilata': 'ceramic clay',\n", + " 'fiber material': 'anthropogenic material',\n", + " 'glass': 'anthropogenic material',\n", + " 'paper': 'anthropogenic material',\n", + " 'plaster': 'anthropogenic material',\n", + " 'plaster or mortar': 'anthropogenic material',\n", + " 'rubber': 'anthropogenic material',\n", + " 'any ice': 'material',\n", + " 'frozen water': 'any ice',\n", + " 'biogenic non-organic material': 'material',\n", + " 'amber': 'biogenic non-organic material',\n", + " 'bone': 'biogenic non-organic material',\n", + " 'charcoal': 'biogenic non-organic material',\n", + " 'coal': 'organic rich sedimentary rock',\n", + " 'shell': 'biogenic non-organic material',\n", + " 'dispersed media': 'material',\n", + " 'natural solid material': 'material',\n", + " 'mineral': 'natural solid material',\n", + " 'hematite': 'mineral',\n", + " 'kaolin': 'mineral',\n", + " 'mica': 'mineral',\n", + " 'quartz': 'mineral',\n", + " 'mineral-borate': 'mineral',\n", + " 'mineral-carbonate or nitrate': 'mineral',\n", + " 'mineral-halide': 'mineral',\n", + " 'mineral-native element': 'mineral',\n", + " 'mineral-organic compound': 'mineral',\n", + " 'mineral-oxide': 'mineral',\n", + " 'mineral-phosphate, arsenate, or vanadate': 'mineral',\n", + " 'mineral-silicate or germanate': 'mineral',\n", + " 'mineral-sulfate, selenate, or tellurate': 'mineral',\n", + " 'mineral-sulfide or sulfosalt': 'mineral',\n", + " 'mixed soil sediment or rock': 'natural solid material',\n", + " 'particulate': 'natural solid material',\n", + " 'cinder': 'rock',\n", + " 'rock or sediment': 'natural solid material',\n", + " 'rock': 'rock or sediment',\n", + " 'basalt': 'fine grained igneous rock',\n", + " 'chert': 'rock',\n", + " 'flint': 'chert',\n", + " 'dolomite': 'rock',\n", + " 'gabbro': 'rock',\n", + " 'greywacke': 'rock',\n", + " 'limestone': 'rock',\n", + " 'marble': 'rock',\n", + " 'obsidian': 'rock',\n", + " 'pumice': 'rock',\n", + " 'slate': 'rock',\n", + " 'travertine': 'rock',\n", + " 'aphanite': 'rock',\n", + " 'breccia': 'rock',\n", + " 'fault related material': 'rock',\n", + " 'cataclasite series': 'fault related material',\n", + " 'mylonitic rock': 'fault related material',\n", + " 'breccia gouge series': 'fault related material',\n", + " 'fragmental igneous rock': 'igneous rock',\n", + " 'pyroclastic rock': 'fragmental igneous rock',\n", + " 'igneous rock': 'rock',\n", + " 'acidic igneous rock': 'igneous rock',\n", + " 'dacite': 'fine grained igneous rock',\n", + " 'granitoid': 'phaneritic igneous rock',\n", + " 'alkali feldspar granite': 'granitoid',\n", + " 'granite': 'granitoid',\n", + " 'granodiorite': 'granitoid',\n", + " 'tonalite': 'granitoid',\n", + " 'quartz rich igneous rock': 'phaneritic igneous rock',\n", + " 'rhyolitoid': 'fine grained igneous rock',\n", + " 'basic igneous rock': 'igneous rock',\n", + " 'gabbroic rock': 'gabbroid',\n", + " 'doleritic rock': 'igneous rock',\n", + " 'exotic composition igneous rock': 'igneous rock',\n", + " 'fine grained igneous rock': 'igneous rock',\n", + " 'andesite': 'intermediate composition igneous rock',\n", + " 'foiditoid': 'fine grained igneous rock',\n", + " 'high magnesium fine grained igneous rock': 'fine grained igneous rock',\n", + " 'phonolitoid': 'fine grained igneous rock',\n", + " 'tephritoid': 'fine grained igneous rock',\n", + " 'trachytoid': 'fine grained igneous rock',\n", + " 'glass rich igneous rock': 'igneous rock',\n", + " 'hypabyssal intrusive rock': 'igneous rock',\n", + " 'intermediate composition igneous rock': 'igneous rock',\n", + " 'dioritoid': 'phaneritic igneous rock',\n", + " 'phaneritic igneous rock': 'igneous rock',\n", + " 'anorthositic rock': 'phaneritic igneous rock',\n", + " 'aplite': 'phaneritic igneous rock',\n", + " 'foid dioritoid': 'phaneritic igneous rock',\n", + " 'foid gabbroid': 'phaneritic igneous rock',\n", + " 'foid syenitoid': 'phaneritic igneous rock',\n", + " 'foidolite': 'phaneritic igneous rock',\n", + " 'gabbroid': 'phaneritic igneous rock',\n", + " 'monzogabbroic rock': 'gabbroid',\n", + " 'hornblendite': 'ultramafic igneous rock',\n", + " 'pegmatite': 'phaneritic igneous rock',\n", + " 'peridotite': 'ultramafic igneous rock',\n", + " 'pyroxenite': 'ultramafic igneous rock',\n", + " 'syenitoid': 'phaneritic igneous rock',\n", + " 'plutonic igneous rock': 'igneous rock',\n", + " 'porphyry': 'igneous rock',\n", + " 'ultrabasic igneous rock': 'igneous rock',\n", + " 'ultramafic igneous rock': 'igneous rock',\n", + " 'impact generated material': 'rock',\n", + " 'massive sulphide': 'rock',\n", + " 'metamorphic rock': 'rock',\n", + " 'metasomatic rock': 'rock',\n", + " 'sedimentary rock': 'rock',\n", + " 'carbonate sedimentary rock': 'sedimentary rock',\n", + " 'clastic sedimentary rock': 'sedimentary rock',\n", + " 'diamictite': 'clastic sedimentary rock',\n", + " 'generic conglomerate': 'sedimentary rock',\n", + " 'generic mudstone': 'sedimentary rock',\n", + " 'generic sandstone': 'sedimentary rock',\n", + " 'hybrid sedimentary rock': 'sedimentary rock',\n", + " 'iron rich sedimentary rock': 'sedimentary rock',\n", + " 'non clastic siliceous sedimentary rock': 'sedimentary rock',\n", + " 'organic rich sedimentary rock': 'sedimentary rock',\n", + " 'phosphorite': 'sedimentary rock',\n", + " 'tuffite': 'rock',\n", + " 'residual material': 'rock',\n", + " 'sediment': 'rock or sediment',\n", + " 'biogenic sediment': 'sediment',\n", + " 'carbonate sediment': 'sediment',\n", + " 'chemical sedimentary material': 'sediment',\n", + " 'clastic sediment': 'sediment',\n", + " 'diamicton': 'clastic sediment',\n", + " 'gravel size sediment': 'sediment',\n", + " 'hybrid sediment': 'sediment',\n", + " 'iron rich sediment': 'sediment',\n", + " 'mud size sediment': 'sediment',\n", + " 'non clastic siliceous sediment': 'sediment',\n", + " 'phosphate rich sediment': 'sediment',\n", + " 'sand size sediment': 'sediment',\n", + " 'tephra': 'sediment',\n", + " 'soil': 'natural solid material',\n", + " 'fluid material': 'material',\n", + " 'gaseous material': 'fluid material',\n", + " 'liquid water': 'fluid material',\n", + " 'non-aqueous liquid material': 'fluid material',\n", + " 'organic material': 'material',\n", + " 'organic animal material': 'organic material',\n", + " 'organic animal product': 'organic material',\n", + " 'hair': 'organic animal product',\n", + " 'leather': 'organic animal product',\n", + " 'organic plant material': 'organic material',\n", + " 'wood': 'plant material',\n", + " 'plant material': 'organic material',\n", + " 'plant fiber': 'plant material'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# stores direct parent \n", + "parent" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2b5d0442", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['brass', 'bronze', 'copper', 'gold', 'iron', 'lead', 'pewter', 'plastic (material)', 'brick clay', 'bucchero', 'faience', 'porcelain', 'terracotta', 'terra sigilata', 'fiber material', 'glass', 'paper', 'plaster', 'plaster or mortar', 'rubber', 'frozen water', 'amber', 'bone', 'charcoal', 'coal', 'shell', 'dispersed media', 'hematite', 'kaolin', 'mica', 'quartz', 'mineral-borate', 'mineral-carbonate or nitrate', 'mineral-halide', 'mineral-native element', 'mineral-organic compound', 'mineral-oxide', 'mineral-phosphate, arsenate, or vanadate', 'mineral-silicate or germanate', 'mineral-sulfate, selenate, or tellurate', 'mineral-sulfide or sulfosalt', 'mixed soil sediment or rock', 'cinder', 'basalt', 'flint', 'cinder', 'coal', 'dolomite', 'gabbro', 'greywacke', 'limestone', 'marble', 'obsidian', 'pumice', 'slate', 'travertine', 'aphanite', 'breccia', 'cataclasite series', 'mylonitic rock', 'breccia gouge series', 'pyroclastic rock', 'dacite', 'alkali feldspar granite', 'granite', 'granodiorite', 'tonalite', 'quartz rich igneous rock', 'rhyolitoid', 'basalt', 'gabbroic rock', 'doleritic rock', 'exotic composition igneous rock', 'andesite', 'basalt', 'dacite', 'foiditoid', 'high magnesium fine grained igneous rock', 'phonolitoid', 'rhyolitoid', 'tephritoid', 'trachytoid', 'pyroclastic rock', 'glass rich igneous rock', 'hypabyssal intrusive rock', 'andesite', 'dioritoid', 'anorthositic rock', 'aplite', 'dioritoid', 'foid dioritoid', 'foid gabbroid', 'foid syenitoid', 'foidolite', 'gabbroic rock', 'monzogabbroic rock', 'alkali feldspar granite', 'granite', 'granodiorite', 'tonalite', 'hornblendite', 'pegmatite', 'peridotite', 'pyroxenite', 'quartz rich igneous rock', 'syenitoid', 'plutonic igneous rock', 'porphyry', 'ultrabasic igneous rock', 'hornblendite', 'peridotite', 'pyroxenite', 'impact generated material', 'massive sulphide', 'metamorphic rock', 'metasomatic rock', 'carbonate sedimentary rock', 'diamictite', 'generic conglomerate', 'generic mudstone', 'generic sandstone', 'hybrid sedimentary rock', 'iron rich sedimentary rock', 'non clastic siliceous sedimentary rock', 'coal', 'phosphorite', 'tuffite', 'residual material', 'biogenic sediment', 'carbonate sediment', 'chemical sedimentary material', 'diamicton', 'gravel size sediment', 'hybrid sediment', 'iron rich sediment', 'mud size sediment', 'non clastic siliceous sediment', 'phosphate rich sediment', 'sand size sediment', 'tephra', 'soil', 'gaseous material', 'liquid water', 'non-aqueous liquid material', 'plastic (material)', 'organic animal material', 'hair', 'leather', 'wood', 'plant fiber', 'wood']\n" + ] + } + ], + "source": [ + "print(leaf_labels)" + ] + }, + { + "cell_type": "markdown", + "id": "d52392c1", + "metadata": {}, + "source": [ + "## Get list of labels up to depth threshold" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "98e36188", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['any anthropogenic material', 'any ice', 'biogenic non-organic material', 'dispersed media', 'natural solid material', 'fluid material', 'organic material']\n", + "7\n" + ] + } + ], + "source": [ + "# depth 1 threshold (right below material)\n", + "depth_level_1 = []\n", + "for child, direct_parent in parent.items():\n", + " if direct_parent == \"material\":\n", + " depth_level_1.append(child)\n", + "print(depth_level_1)\n", + "print(len(depth_level_1))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "eb53d971", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['anthropogenic metal material', 'anthropogenic material', 'anthropogenic organic material', 'frozen water', 'amber', 'bone', 'charcoal', 'shell', 'mineral', 'mixed soil sediment or rock', 'particulate', 'rock or sediment', 'soil', 'gaseous material', 'liquid water', 'non-aqueous liquid material', 'organic animal material', 'organic animal product', 'organic plant material', 'plant material']\n", + "20\n" + ] + } + ], + "source": [ + "# depth 2 threshold\n", + "depth_level_2 = [] \n", + "for child, direct_parent in parent.items():\n", + " if direct_parent in depth_level_1:\n", + " depth_level_2.append(child)\n", + "print(depth_level_2)\n", + "print(len(depth_level_2))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8d470340", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['brass', 'bronze', 'copper', 'gold', 'iron', 'lead', 'pewter', 'plastic (material)', 'ceramic clay', 'fiber material', 'glass', 'paper', 'plaster', 'plaster or mortar', 'rubber', 'hematite', 'kaolin', 'mica', 'quartz', 'mineral-borate', 'mineral-carbonate or nitrate', 'mineral-halide', 'mineral-native element', 'mineral-organic compound', 'mineral-oxide', 'mineral-phosphate, arsenate, or vanadate', 'mineral-silicate or germanate', 'mineral-sulfate, selenate, or tellurate', 'mineral-sulfide or sulfosalt', 'rock', 'sediment', 'hair', 'leather', 'wood', 'plant fiber']\n", + "35\n" + ] + } + ], + "source": [ + "# depth 3 threshold\n", + "depth_level_3 = [] \n", + "for child, direct_parent in parent.items():\n", + " if direct_parent in depth_level_2:\n", + " depth_level_3.append(child)\n", + "print(depth_level_3)\n", + "print(len(depth_level_3))" + ] + }, + { + "cell_type": "markdown", + "id": "8b298232", + "metadata": {}, + "source": [ + "## Get entire path of parents" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f3f1d98a", + "metadata": {}, + "outputs": [], + "source": [ + "# get entire path of parents \n", + "parents = {}\n", + "for key, value in parent.items():\n", + " parents[key] = [value]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ebf7d5b8", + "metadata": {}, + "outputs": [], + "source": [ + "# expand to get all parents\n", + "\n", + "def expandParents(curr, child): # current node / node we want to get all the parents\n", + " if curr not in parents[child] and curr != child:\n", + " parents[child.lower()].append(curr)\n", + " # expand\n", + " if parent[curr] is not None:\n", + " expandParents(parent[curr],child)\n", + "\n", + "for key in parent:\n", + " expandParents(key, key)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "bf25a359", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'material': [None], 'any anthropogenic material': ['material'], 'anthropogenic metal material': ['any anthropogenic material', 'material'], 'brass': ['anthropogenic metal material', 'any anthropogenic material', 'material'], 'bronze': ['anthropogenic metal material', 'any anthropogenic material', 'material'], 'copper': ['anthropogenic metal material', 'any anthropogenic material', 'material'], 'gold': ['anthropogenic metal material', 'any anthropogenic material', 'material'], 'iron': ['anthropogenic metal material', 'any anthropogenic material', 'material'], 'lead': ['anthropogenic metal material', 'any anthropogenic material', 'material'], 'pewter': ['anthropogenic metal material', 'any anthropogenic material', 'material'], 'anthropogenic material': ['any anthropogenic material', 'material'], 'anthropogenic organic material': ['organic material', 'material'], 'plastic (material)': ['anthropogenic organic material', 'organic material', 'material'], 'ceramic clay': ['anthropogenic material', 'any anthropogenic material', 'material'], 'brick clay': ['ceramic clay', 'anthropogenic material', 'any anthropogenic material', 'material'], 'bucchero': ['ceramic clay', 'anthropogenic material', 'any anthropogenic material', 'material'], 'faience': ['ceramic clay', 'anthropogenic material', 'any anthropogenic material', 'material'], 'porcelain': ['ceramic clay', 'anthropogenic material', 'any anthropogenic material', 'material'], 'terracotta': ['ceramic clay', 'anthropogenic material', 'any anthropogenic material', 'material'], 'terra sigilata': ['ceramic clay', 'anthropogenic material', 'any anthropogenic material', 'material'], 'fiber material': ['anthropogenic material', 'any anthropogenic material', 'material'], 'glass': ['anthropogenic material', 'any anthropogenic material', 'material'], 'paper': ['anthropogenic material', 'any anthropogenic material', 'material'], 'plaster': ['anthropogenic material', 'any anthropogenic material', 'material'], 'plaster or mortar': ['anthropogenic material', 'any anthropogenic material', 'material'], 'rubber': ['anthropogenic material', 'any anthropogenic material', 'material'], 'any ice': ['material'], 'frozen water': ['any ice', 'material'], 'biogenic non-organic material': ['material'], 'amber': ['biogenic non-organic material', 'material'], 'bone': ['biogenic non-organic material', 'material'], 'charcoal': ['biogenic non-organic material', 'material'], 'coal': ['organic rich sedimentary rock', 'sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'shell': ['biogenic non-organic material', 'material'], 'dispersed media': ['material'], 'natural solid material': ['material'], 'mineral': ['natural solid material', 'material'], 'hematite': ['mineral', 'natural solid material', 'material'], 'kaolin': ['mineral', 'natural solid material', 'material'], 'mica': ['mineral', 'natural solid material', 'material'], 'quartz': ['mineral', 'natural solid material', 'material'], 'mineral-borate': ['mineral', 'natural solid material', 'material'], 'mineral-carbonate or nitrate': ['mineral', 'natural solid material', 'material'], 'mineral-halide': ['mineral', 'natural solid material', 'material'], 'mineral-native element': ['mineral', 'natural solid material', 'material'], 'mineral-organic compound': ['mineral', 'natural solid material', 'material'], 'mineral-oxide': ['mineral', 'natural solid material', 'material'], 'mineral-phosphate, arsenate, or vanadate': ['mineral', 'natural solid material', 'material'], 'mineral-silicate or germanate': ['mineral', 'natural solid material', 'material'], 'mineral-sulfate, selenate, or tellurate': ['mineral', 'natural solid material', 'material'], 'mineral-sulfide or sulfosalt': ['mineral', 'natural solid material', 'material'], 'mixed soil sediment or rock': ['natural solid material', 'material'], 'particulate': ['natural solid material', 'material'], 'cinder': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'rock or sediment': ['natural solid material', 'material'], 'rock': ['rock or sediment', 'natural solid material', 'material'], 'basalt': ['fine grained igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'chert': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'flint': ['chert', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'dolomite': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'gabbro': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'greywacke': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'limestone': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'marble': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'obsidian': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'pumice': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'slate': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'travertine': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'aphanite': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'breccia': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'fault related material': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'cataclasite series': ['fault related material', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'mylonitic rock': ['fault related material', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'breccia gouge series': ['fault related material', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'fragmental igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'pyroclastic rock': ['fragmental igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'igneous rock': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'acidic igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'dacite': ['fine grained igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'granitoid': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'alkali feldspar granite': ['granitoid', 'phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'granite': ['granitoid', 'phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'granodiorite': ['granitoid', 'phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'tonalite': ['granitoid', 'phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'quartz rich igneous rock': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'rhyolitoid': ['fine grained igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'basic igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'gabbroic rock': ['gabbroid', 'phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'doleritic rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'exotic composition igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'fine grained igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'andesite': ['intermediate composition igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'foiditoid': ['fine grained igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'high magnesium fine grained igneous rock': ['fine grained igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'phonolitoid': ['fine grained igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'tephritoid': ['fine grained igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'trachytoid': ['fine grained igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'glass rich igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'hypabyssal intrusive rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'intermediate composition igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'dioritoid': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'phaneritic igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'anorthositic rock': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'aplite': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'foid dioritoid': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'foid gabbroid': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'foid syenitoid': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'foidolite': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'gabbroid': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'monzogabbroic rock': ['gabbroid', 'phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'hornblendite': ['ultramafic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'pegmatite': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'peridotite': ['ultramafic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'pyroxenite': ['ultramafic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'syenitoid': ['phaneritic igneous rock', 'igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'plutonic igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'porphyry': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'ultrabasic igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'ultramafic igneous rock': ['igneous rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'impact generated material': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'massive sulphide': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'metamorphic rock': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'metasomatic rock': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'sedimentary rock': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'carbonate sedimentary rock': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'clastic sedimentary rock': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'diamictite': ['clastic sedimentary rock', 'sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'generic conglomerate': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'generic mudstone': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'generic sandstone': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'hybrid sedimentary rock': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'iron rich sedimentary rock': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'non clastic siliceous sedimentary rock': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'organic rich sedimentary rock': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'phosphorite': ['sedimentary rock', 'rock', 'rock or sediment', 'natural solid material', 'material'], 'tuffite': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'residual material': ['rock', 'rock or sediment', 'natural solid material', 'material'], 'sediment': ['rock or sediment', 'natural solid material', 'material'], 'biogenic sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'carbonate sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'chemical sedimentary material': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'clastic sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'diamicton': ['clastic sediment', 'sediment', 'rock or sediment', 'natural solid material', 'material'], 'gravel size sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'hybrid sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'iron rich sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'mud size sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'non clastic siliceous sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'phosphate rich sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'sand size sediment': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'tephra': ['sediment', 'rock or sediment', 'natural solid material', 'material'], 'soil': ['natural solid material', 'material'], 'fluid material': ['material'], 'gaseous material': ['fluid material', 'material'], 'liquid water': ['fluid material', 'material'], 'non-aqueous liquid material': ['fluid material', 'material'], 'organic material': ['material'], 'organic animal material': ['organic material', 'material'], 'organic animal product': ['organic material', 'material'], 'hair': ['organic animal product', 'organic material', 'material'], 'leather': ['organic animal product', 'organic material', 'material'], 'organic plant material': ['organic material', 'material'], 'wood': ['plant material', 'organic material', 'material'], 'plant material': ['organic material', 'material'], 'plant fiber': ['plant material', 'organic material', 'material']} 165\n" + ] + } + ], + "source": [ + "parents # all parents \n", + "print(parents, len(parents))" + ] + }, + { + "cell_type": "markdown", + "id": "c6abb131", + "metadata": {}, + "source": [ + "## Generate map to convert labels to specific depth threshold" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "1b423cb9", + "metadata": {}, + "outputs": [], + "source": [ + "# generate map to convert all labels to depth level 1 \n", + "map_to_depth_level_1 = {\"material\":\"material\"}\n", + "for label, parent_labels in parents.items():\n", + " if len(parent_labels) == 1 and parent_labels[0] == \"material\":\n", + " map_to_depth_level_1[label] = label\n", + " continue\n", + " \n", + " for parent_label in parent_labels:\n", + " if parent_label in depth_level_1:\n", + " map_to_depth_level_1[label] = parent_label\n", + " \n", + "map_to_depth_level_1\n", + "assert len(map_to_depth_level_1) == len(parents)\n", + "assert len(set(map_to_depth_level_1.values())) == len(depth_level_1) + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "7b28415c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'material': 'material',\n", + " 'any anthropogenic material': 'any anthropogenic material',\n", + " 'anthropogenic metal material': 'anthropogenic metal material',\n", + " 'brass': 'anthropogenic metal material',\n", + " 'bronze': 'anthropogenic metal material',\n", + " 'copper': 'anthropogenic metal material',\n", + " 'gold': 'anthropogenic metal material',\n", + " 'iron': 'anthropogenic metal material',\n", + " 'lead': 'anthropogenic metal material',\n", + " 'pewter': 'anthropogenic metal material',\n", + " 'anthropogenic material': 'anthropogenic material',\n", + " 'anthropogenic organic material': 'anthropogenic organic material',\n", + " 'plastic (material)': 'anthropogenic organic material',\n", + " 'ceramic clay': 'anthropogenic material',\n", + " 'brick clay': 'anthropogenic material',\n", + " 'bucchero': 'anthropogenic material',\n", + " 'faience': 'anthropogenic material',\n", + " 'porcelain': 'anthropogenic material',\n", + " 'terracotta': 'anthropogenic material',\n", + " 'terra sigilata': 'anthropogenic material',\n", + " 'fiber material': 'anthropogenic material',\n", + " 'glass': 'anthropogenic material',\n", + " 'paper': 'anthropogenic material',\n", + " 'plaster': 'anthropogenic material',\n", + " 'plaster or mortar': 'anthropogenic material',\n", + " 'rubber': 'anthropogenic material',\n", + " 'any ice': 'any ice',\n", + " 'frozen water': 'frozen water',\n", + " 'biogenic non-organic material': 'biogenic non-organic material',\n", + " 'amber': 'amber',\n", + " 'bone': 'bone',\n", + " 'charcoal': 'charcoal',\n", + " 'coal': 'rock or sediment',\n", + " 'shell': 'shell',\n", + " 'dispersed media': 'dispersed media',\n", + " 'natural solid material': 'natural solid material',\n", + " 'mineral': 'mineral',\n", + " 'hematite': 'mineral',\n", + " 'kaolin': 'mineral',\n", + " 'mica': 'mineral',\n", + " 'quartz': 'mineral',\n", + " 'mineral-borate': 'mineral',\n", + " 'mineral-carbonate or nitrate': 'mineral',\n", + " 'mineral-halide': 'mineral',\n", + " 'mineral-native element': 'mineral',\n", + " 'mineral-organic compound': 'mineral',\n", + " 'mineral-oxide': 'mineral',\n", + " 'mineral-phosphate, arsenate, or vanadate': 'mineral',\n", + " 'mineral-silicate or germanate': 'mineral',\n", + " 'mineral-sulfate, selenate, or tellurate': 'mineral',\n", + " 'mineral-sulfide or sulfosalt': 'mineral',\n", + " 'mixed soil sediment or rock': 'mixed soil sediment or rock',\n", + " 'particulate': 'particulate',\n", + " 'cinder': 'rock or sediment',\n", + " 'rock or sediment': 'rock or sediment',\n", + " 'rock': 'rock or sediment',\n", + " 'basalt': 'rock or sediment',\n", + " 'chert': 'rock or sediment',\n", + " 'flint': 'rock or sediment',\n", + " 'dolomite': 'rock or sediment',\n", + " 'gabbro': 'rock or sediment',\n", + " 'greywacke': 'rock or sediment',\n", + " 'limestone': 'rock or sediment',\n", + " 'marble': 'rock or sediment',\n", + " 'obsidian': 'rock or sediment',\n", + " 'pumice': 'rock or sediment',\n", + " 'slate': 'rock or sediment',\n", + " 'travertine': 'rock or sediment',\n", + " 'aphanite': 'rock or sediment',\n", + " 'breccia': 'rock or sediment',\n", + " 'fault related material': 'rock or sediment',\n", + " 'cataclasite series': 'rock or sediment',\n", + " 'mylonitic rock': 'rock or sediment',\n", + " 'breccia gouge series': 'rock or sediment',\n", + " 'fragmental igneous rock': 'rock or sediment',\n", + " 'pyroclastic rock': 'rock or sediment',\n", + " 'igneous rock': 'rock or sediment',\n", + " 'acidic igneous rock': 'rock or sediment',\n", + " 'dacite': 'rock or sediment',\n", + " 'granitoid': 'rock or sediment',\n", + " 'alkali feldspar granite': 'rock or sediment',\n", + " 'granite': 'rock or sediment',\n", + " 'granodiorite': 'rock or sediment',\n", + " 'tonalite': 'rock or sediment',\n", + " 'quartz rich igneous rock': 'rock or sediment',\n", + " 'rhyolitoid': 'rock or sediment',\n", + " 'basic igneous rock': 'rock or sediment',\n", + " 'gabbroic rock': 'rock or sediment',\n", + " 'doleritic rock': 'rock or sediment',\n", + " 'exotic composition igneous rock': 'rock or sediment',\n", + " 'fine grained igneous rock': 'rock or sediment',\n", + " 'andesite': 'rock or sediment',\n", + " 'foiditoid': 'rock or sediment',\n", + " 'high magnesium fine grained igneous rock': 'rock or sediment',\n", + " 'phonolitoid': 'rock or sediment',\n", + " 'tephritoid': 'rock or sediment',\n", + " 'trachytoid': 'rock or sediment',\n", + " 'glass rich igneous rock': 'rock or sediment',\n", + " 'hypabyssal intrusive rock': 'rock or sediment',\n", + " 'intermediate composition igneous rock': 'rock or sediment',\n", + " 'dioritoid': 'rock or sediment',\n", + " 'phaneritic igneous rock': 'rock or sediment',\n", + " 'anorthositic rock': 'rock or sediment',\n", + " 'aplite': 'rock or sediment',\n", + " 'foid dioritoid': 'rock or sediment',\n", + " 'foid gabbroid': 'rock or sediment',\n", + " 'foid syenitoid': 'rock or sediment',\n", + " 'foidolite': 'rock or sediment',\n", + " 'gabbroid': 'rock or sediment',\n", + " 'monzogabbroic rock': 'rock or sediment',\n", + " 'hornblendite': 'rock or sediment',\n", + " 'pegmatite': 'rock or sediment',\n", + " 'peridotite': 'rock or sediment',\n", + " 'pyroxenite': 'rock or sediment',\n", + " 'syenitoid': 'rock or sediment',\n", + " 'plutonic igneous rock': 'rock or sediment',\n", + " 'porphyry': 'rock or sediment',\n", + " 'ultrabasic igneous rock': 'rock or sediment',\n", + " 'ultramafic igneous rock': 'rock or sediment',\n", + " 'impact generated material': 'rock or sediment',\n", + " 'massive sulphide': 'rock or sediment',\n", + " 'metamorphic rock': 'rock or sediment',\n", + " 'metasomatic rock': 'rock or sediment',\n", + " 'sedimentary rock': 'rock or sediment',\n", + " 'carbonate sedimentary rock': 'rock or sediment',\n", + " 'clastic sedimentary rock': 'rock or sediment',\n", + " 'diamictite': 'rock or sediment',\n", + " 'generic conglomerate': 'rock or sediment',\n", + " 'generic mudstone': 'rock or sediment',\n", + " 'generic sandstone': 'rock or sediment',\n", + " 'hybrid sedimentary rock': 'rock or sediment',\n", + " 'iron rich sedimentary rock': 'rock or sediment',\n", + " 'non clastic siliceous sedimentary rock': 'rock or sediment',\n", + " 'organic rich sedimentary rock': 'rock or sediment',\n", + " 'phosphorite': 'rock or sediment',\n", + " 'tuffite': 'rock or sediment',\n", + " 'residual material': 'rock or sediment',\n", + " 'sediment': 'rock or sediment',\n", + " 'biogenic sediment': 'rock or sediment',\n", + " 'carbonate sediment': 'rock or sediment',\n", + " 'chemical sedimentary material': 'rock or sediment',\n", + " 'clastic sediment': 'rock or sediment',\n", + " 'diamicton': 'rock or sediment',\n", + " 'gravel size sediment': 'rock or sediment',\n", + " 'hybrid sediment': 'rock or sediment',\n", + " 'iron rich sediment': 'rock or sediment',\n", + " 'mud size sediment': 'rock or sediment',\n", + " 'non clastic siliceous sediment': 'rock or sediment',\n", + " 'phosphate rich sediment': 'rock or sediment',\n", + " 'sand size sediment': 'rock or sediment',\n", + " 'tephra': 'rock or sediment',\n", + " 'soil': 'soil',\n", + " 'fluid material': 'fluid material',\n", + " 'gaseous material': 'gaseous material',\n", + " 'liquid water': 'liquid water',\n", + " 'non-aqueous liquid material': 'non-aqueous liquid material',\n", + " 'organic material': 'organic material',\n", + " 'organic animal material': 'organic animal material',\n", + " 'organic animal product': 'organic animal product',\n", + " 'hair': 'organic animal product',\n", + " 'leather': 'organic animal product',\n", + " 'organic plant material': 'organic plant material',\n", + " 'wood': 'plant material',\n", + " 'plant material': 'plant material',\n", + " 'plant fiber': 'plant material'}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "map_to_depth_level_2 = {\"material\":\"material\"}\n", + "\n", + "for label, parent_labels in parents.items():\n", + " if len(parent_labels) == 1 and parent_labels[0] == \"material\":\n", + " map_to_depth_level_2[label] = label # depth level 1 labels itself\n", + " continue\n", + " if label in depth_level_2:\n", + " map_to_depth_level_2[label] = label\n", + " continue\n", + " for parent_label in parent_labels:\n", + " if parent_label in depth_level_2:\n", + " map_to_depth_level_2[label] = parent_label # found the substituting label\n", + " continue\n", + "\n", + "assert len(map_to_depth_level_2) == len(parents)\n", + "assert len(set(map_to_depth_level_2.values())) == len(depth_level_1) + len(depth_level_2) + 1\n", + "map_to_depth_level_2" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "76fe8350", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "63 7 20 35\n" + ] + }, + { + "data": { + "text/plain": [ + "{'material': 'material',\n", + " 'any anthropogenic material': 'any anthropogenic material',\n", + " 'anthropogenic metal material': 'anthropogenic metal material',\n", + " 'brass': 'brass',\n", + " 'bronze': 'bronze',\n", + " 'copper': 'copper',\n", + " 'gold': 'gold',\n", + " 'iron': 'iron',\n", + " 'lead': 'lead',\n", + " 'pewter': 'pewter',\n", + " 'anthropogenic material': 'anthropogenic material',\n", + " 'anthropogenic organic material': 'anthropogenic organic material',\n", + " 'plastic (material)': 'plastic (material)',\n", + " 'ceramic clay': 'ceramic clay',\n", + " 'brick clay': 'ceramic clay',\n", + " 'bucchero': 'ceramic clay',\n", + " 'faience': 'ceramic clay',\n", + " 'porcelain': 'ceramic clay',\n", + " 'terracotta': 'ceramic clay',\n", + " 'terra sigilata': 'ceramic clay',\n", + " 'fiber material': 'fiber material',\n", + " 'glass': 'glass',\n", + " 'paper': 'paper',\n", + " 'plaster': 'plaster',\n", + " 'plaster or mortar': 'plaster or mortar',\n", + " 'rubber': 'rubber',\n", + " 'any ice': 'any ice',\n", + " 'frozen water': 'frozen water',\n", + " 'biogenic non-organic material': 'biogenic non-organic material',\n", + " 'amber': 'amber',\n", + " 'bone': 'bone',\n", + " 'charcoal': 'charcoal',\n", + " 'coal': 'rock',\n", + " 'shell': 'shell',\n", + " 'dispersed media': 'dispersed media',\n", + " 'natural solid material': 'natural solid material',\n", + " 'mineral': 'mineral',\n", + " 'hematite': 'hematite',\n", + " 'kaolin': 'kaolin',\n", + " 'mica': 'mica',\n", + " 'quartz': 'quartz',\n", + " 'mineral-borate': 'mineral-borate',\n", + " 'mineral-carbonate or nitrate': 'mineral-carbonate or nitrate',\n", + " 'mineral-halide': 'mineral-halide',\n", + " 'mineral-native element': 'mineral-native element',\n", + " 'mineral-organic compound': 'mineral-organic compound',\n", + " 'mineral-oxide': 'mineral-oxide',\n", + " 'mineral-phosphate, arsenate, or vanadate': 'mineral-phosphate, arsenate, or vanadate',\n", + " 'mineral-silicate or germanate': 'mineral-silicate or germanate',\n", + " 'mineral-sulfate, selenate, or tellurate': 'mineral-sulfate, selenate, or tellurate',\n", + " 'mineral-sulfide or sulfosalt': 'mineral-sulfide or sulfosalt',\n", + " 'mixed soil sediment or rock': 'mixed soil sediment or rock',\n", + " 'particulate': 'particulate',\n", + " 'cinder': 'rock',\n", + " 'rock or sediment': 'rock or sediment',\n", + " 'rock': 'rock',\n", + " 'basalt': 'rock',\n", + " 'chert': 'rock',\n", + " 'flint': 'rock',\n", + " 'dolomite': 'rock',\n", + " 'gabbro': 'rock',\n", + " 'greywacke': 'rock',\n", + " 'limestone': 'rock',\n", + " 'marble': 'rock',\n", + " 'obsidian': 'rock',\n", + " 'pumice': 'rock',\n", + " 'slate': 'rock',\n", + " 'travertine': 'rock',\n", + " 'aphanite': 'rock',\n", + " 'breccia': 'rock',\n", + " 'fault related material': 'rock',\n", + " 'cataclasite series': 'rock',\n", + " 'mylonitic rock': 'rock',\n", + " 'breccia gouge series': 'rock',\n", + " 'fragmental igneous rock': 'rock',\n", + " 'pyroclastic rock': 'rock',\n", + " 'igneous rock': 'rock',\n", + " 'acidic igneous rock': 'rock',\n", + " 'dacite': 'rock',\n", + " 'granitoid': 'rock',\n", + " 'alkali feldspar granite': 'rock',\n", + " 'granite': 'rock',\n", + " 'granodiorite': 'rock',\n", + " 'tonalite': 'rock',\n", + " 'quartz rich igneous rock': 'rock',\n", + " 'rhyolitoid': 'rock',\n", + " 'basic igneous rock': 'rock',\n", + " 'gabbroic rock': 'rock',\n", + " 'doleritic rock': 'rock',\n", + " 'exotic composition igneous rock': 'rock',\n", + " 'fine grained igneous rock': 'rock',\n", + " 'andesite': 'rock',\n", + " 'foiditoid': 'rock',\n", + " 'high magnesium fine grained igneous rock': 'rock',\n", + " 'phonolitoid': 'rock',\n", + " 'tephritoid': 'rock',\n", + " 'trachytoid': 'rock',\n", + " 'glass rich igneous rock': 'rock',\n", + " 'hypabyssal intrusive rock': 'rock',\n", + " 'intermediate composition igneous rock': 'rock',\n", + " 'dioritoid': 'rock',\n", + " 'phaneritic igneous rock': 'rock',\n", + " 'anorthositic rock': 'rock',\n", + " 'aplite': 'rock',\n", + " 'foid dioritoid': 'rock',\n", + " 'foid gabbroid': 'rock',\n", + " 'foid syenitoid': 'rock',\n", + " 'foidolite': 'rock',\n", + " 'gabbroid': 'rock',\n", + " 'monzogabbroic rock': 'rock',\n", + " 'hornblendite': 'rock',\n", + " 'pegmatite': 'rock',\n", + " 'peridotite': 'rock',\n", + " 'pyroxenite': 'rock',\n", + " 'syenitoid': 'rock',\n", + " 'plutonic igneous rock': 'rock',\n", + " 'porphyry': 'rock',\n", + " 'ultrabasic igneous rock': 'rock',\n", + " 'ultramafic igneous rock': 'rock',\n", + " 'impact generated material': 'rock',\n", + " 'massive sulphide': 'rock',\n", + " 'metamorphic rock': 'rock',\n", + " 'metasomatic rock': 'rock',\n", + " 'sedimentary rock': 'rock',\n", + " 'carbonate sedimentary rock': 'rock',\n", + " 'clastic sedimentary rock': 'rock',\n", + " 'diamictite': 'rock',\n", + " 'generic conglomerate': 'rock',\n", + " 'generic mudstone': 'rock',\n", + " 'generic sandstone': 'rock',\n", + " 'hybrid sedimentary rock': 'rock',\n", + " 'iron rich sedimentary rock': 'rock',\n", + " 'non clastic siliceous sedimentary rock': 'rock',\n", + " 'organic rich sedimentary rock': 'rock',\n", + " 'phosphorite': 'rock',\n", + " 'tuffite': 'rock',\n", + " 'residual material': 'rock',\n", + " 'sediment': 'sediment',\n", + " 'biogenic sediment': 'sediment',\n", + " 'carbonate sediment': 'sediment',\n", + " 'chemical sedimentary material': 'sediment',\n", + " 'clastic sediment': 'sediment',\n", + " 'diamicton': 'sediment',\n", + " 'gravel size sediment': 'sediment',\n", + " 'hybrid sediment': 'sediment',\n", + " 'iron rich sediment': 'sediment',\n", + " 'mud size sediment': 'sediment',\n", + " 'non clastic siliceous sediment': 'sediment',\n", + " 'phosphate rich sediment': 'sediment',\n", + " 'sand size sediment': 'sediment',\n", + " 'tephra': 'sediment',\n", + " 'soil': 'soil',\n", + " 'fluid material': 'fluid material',\n", + " 'gaseous material': 'gaseous material',\n", + " 'liquid water': 'liquid water',\n", + " 'non-aqueous liquid material': 'non-aqueous liquid material',\n", + " 'organic material': 'organic material',\n", + " 'organic animal material': 'organic animal material',\n", + " 'organic animal product': 'organic animal product',\n", + " 'hair': 'hair',\n", + " 'leather': 'leather',\n", + " 'organic plant material': 'organic plant material',\n", + " 'wood': 'wood',\n", + " 'plant material': 'plant material',\n", + " 'plant fiber': 'plant fiber'}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "map_to_depth_level_3 = {\"material\":\"material\"}\n", + "\n", + "for label, parent_labels in parents.items():\n", + " if len(parent_labels) == 1 and parent_labels[0] == \"material\":\n", + " map_to_depth_level_3[label] = label # depth level 1 labels itself\n", + " continue\n", + " if label in depth_level_3:\n", + " map_to_depth_level_3[label] = label\n", + " continue\n", + " if label in depth_level_2:\n", + " map_to_depth_level_3[label] = label\n", + " continue\n", + " \n", + " for parent_label in parent_labels:\n", + " if parent_label in depth_level_3:\n", + " map_to_depth_level_3[label] = parent_label # found the substituting label\n", + " break # found it \n", + " elif parent_label in depth_level_2:\n", + " map_to_depth_level_3[label] = parent_label\n", + " break\n", + " elif parent_label in depth_level_1:\n", + " map_to_depth_level_3[label] = parent_label\n", + " break\n", + "\n", + "assert len(map_to_depth_level_3) == len(parents)\n", + "print(len(set(map_to_depth_level_3.values())), len(depth_level_1) , len(depth_level_2) , len(depth_level_3) )\n", + "map_to_depth_level_3" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5212dabc", + "metadata": {}, + "outputs": [], + "source": [ + "# store the mapping\n", + "import json\n", + "\n", + "with open('depth_level_1_mapping.json', 'w') as f:\n", + " json.dump(map_to_depth_level_1, f)\n", + " \n", + "with open('depth_level_2_mapping.json', 'w') as f:\n", + " json.dump(map_to_depth_level_2, f)\n", + "\n", + "with open('depth_level_3_mapping.json', 'w') as f:\n", + " json.dump(map_to_depth_level_3, f)" + ] + }, + { + "cell_type": "markdown", + "id": "98526cfa", + "metadata": {}, + "source": [ + "## Read Extension type and add that info" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "825a25bf", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "1739c151", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_6418/4051324437.py:1: DtypeWarning: Columns (1,12,24) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " gold = pd.read_csv(\"./datasets/SESAR_CV_labeled.csv\")\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0description_supplementMetadata_localitydescription_materialdescription_supplementMetadata_countrydescription_supplementMetadata_provincedescription_sampleTypedescription_supplementMetadata_platformTypedescription_supplementMetadata_geologicalAgedescription_supplementMetadata_locationDescriptiondescription_supplementMetadata_purpose...description_supplementMetadata_primaryLocationNamedescription_supplementMetadata_geologicalUnitdescription_supplementMetadata_localityDescriptiondescription_supplementMetadata_originalArchivedescription_supplementMetadata_platformDescrdescription_collectionMethoddescription_igsnPrefixdescription_supplementMetadata_cruiseFieldPrgrmdescription_supplementMetadata_publicationUrl_descriptionoriginal_high_label
00NaNming:sulfateselenatetelluratemineralUnited StatesNew Jerseya sample that is an individual unit, including...NaNNaNCoordinates for Sterling Hill Mine (MRDS ID: W...NaN...Sterling Hill MineNaNOgdensburgNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6294-...mat:mineral
11NaNming:sulfateselenatetelluratemineralUnited StatesNew Jerseya sample that is an individual unit, including...NaNNaNMatched to the GeoNames record for Franklin, S...NaN...Franklin MineNaNFranklinNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6295-...mat:mineral
22NaNming:sulfateselenatetelluratemineralUnited StatesNew Jerseya sample that is an individual unit, including...NaNNaNMatched to the GeoNames record for Franklin, S...NaN...Franklin MineNaNFranklinNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6295-...mat:mineral
33NaNming:silicategermanatemineralUnited StatesNew Jerseya sample that is an individual unit, including...NaNNaNCoordinates from GEOLocate for parse pattern \"...NaN...Franklin Mining DistrictNaNFranklinNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6296-...mat:mineral
44NaNming:silicategermanatemineralUnited StatesNew Jerseya sample that is an individual unit, including...NaNNaNCoordinates for Sterling Hill Mine (MRDS ID: W...NaN...Sterling Hill MineNaNOgdensburgNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6299-...mat:mineral
..................................................................
987548995644NaNrksd:Metamorphic RockNaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...Mitchell Peak, West Fosdick Mountains, AntarcticaNaNNaNDept of Geology, Colorado College, Colorado Sp...NaNManualMBLNaNNaNmat:rock
987549995645NaNrksd:Metamorphic RockNaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...Mitchell Peak, West Fosdick Mountains, AntarcticaNaNNaNDept of Geology, Colorado College, Colorado Sp...NaNManualMBLNaNNaNmat:rock
987550995646NaNrksd:Metamorphic RockNaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...Ben Burton ParkNaNNaNDepartment of Geology, Bryn Mawr College, Bryn...NaNManualUGANaNNaNmat:rock
987551995647NaNrksd:Metamorphic RockNaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...Salamander Range, north Victoria Land, AntarcticaNaNNaNDept of Geology, Colorado College, Colorado Sp...NaNManualLTRNaNNaNmat:rock
987552995648NaNrksd:Metamorphic RockNaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...Salamander Range, north Victoria Land, AntarcticaNaNNaNDept of Geology, Colorado College, Colorado Sp...NaNManualLTRNaNNaNmat:rock
\n", + "

987553 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 description_supplementMetadata_locality \\\n", + "0 0 NaN \n", + "1 1 NaN \n", + "2 2 NaN \n", + "3 3 NaN \n", + "4 4 NaN \n", + "... ... ... \n", + "987548 995644 NaN \n", + "987549 995645 NaN \n", + "987550 995646 NaN \n", + "987551 995647 NaN \n", + "987552 995648 NaN \n", + "\n", + " description_material \\\n", + "0 ming:sulfateselenatetelluratemineral \n", + "1 ming:sulfateselenatetelluratemineral \n", + "2 ming:sulfateselenatetelluratemineral \n", + "3 ming:silicategermanatemineral \n", + "4 ming:silicategermanatemineral \n", + "... ... \n", + "987548 rksd:Metamorphic Rock \n", + "987549 rksd:Metamorphic Rock \n", + "987550 rksd:Metamorphic Rock \n", + "987551 rksd:Metamorphic Rock \n", + "987552 rksd:Metamorphic Rock \n", + "\n", + " description_supplementMetadata_country \\\n", + "0 United States \n", + "1 United States \n", + "2 United States \n", + "3 United States \n", + "4 United States \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_province \\\n", + "0 New Jersey \n", + "1 New Jersey \n", + "2 New Jersey \n", + "3 New Jersey \n", + "4 New Jersey \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_sampleType \\\n", + "0 a sample that is an individual unit, including... \n", + "1 a sample that is an individual unit, including... \n", + "2 a sample that is an individual unit, including... \n", + "3 a sample that is an individual unit, including... \n", + "4 a sample that is an individual unit, including... \n", + "... ... \n", + "987548 a sample that is an individual unit, including... \n", + "987549 a sample that is an individual unit, including... \n", + "987550 a sample that is an individual unit, including... \n", + "987551 a sample that is an individual unit, including... \n", + "987552 a sample that is an individual unit, including... \n", + "\n", + " description_supplementMetadata_platformType \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_geologicalAge \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_locationDescription \\\n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "1 Matched to the GeoNames record for Franklin, S... \n", + "2 Matched to the GeoNames record for Franklin, S... \n", + "3 Coordinates from GEOLocate for parse pattern \"... \n", + "4 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_purpose ... \\\n", + "0 NaN ... \n", + "1 NaN ... \n", + "2 NaN ... \n", + "3 NaN ... \n", + "4 NaN ... \n", + "... ... ... \n", + "987548 NaN ... \n", + "987549 NaN ... \n", + "987550 NaN ... \n", + "987551 NaN ... \n", + "987552 NaN ... \n", + "\n", + " description_supplementMetadata_primaryLocationName \\\n", + "0 Sterling Hill Mine \n", + "1 Franklin Mine \n", + "2 Franklin Mine \n", + "3 Franklin Mining District \n", + "4 Sterling Hill Mine \n", + "... ... \n", + "987548 Mitchell Peak, West Fosdick Mountains, Antarctica \n", + "987549 Mitchell Peak, West Fosdick Mountains, Antarctica \n", + "987550 Ben Burton Park \n", + "987551 Salamander Range, north Victoria Land, Antarctica \n", + "987552 Salamander Range, north Victoria Land, Antarctica \n", + "\n", + " description_supplementMetadata_geologicalUnit \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_localityDescription \\\n", + "0 Ogdensburg \n", + "1 Franklin \n", + "2 Franklin \n", + "3 Franklin \n", + "4 Ogdensburg \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_originalArchive \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 Dept of Geology, Colorado College, Colorado Sp... \n", + "987549 Dept of Geology, Colorado College, Colorado Sp... \n", + "987550 Department of Geology, Bryn Mawr College, Bryn... \n", + "987551 Dept of Geology, Colorado College, Colorado Sp... \n", + "987552 Dept of Geology, Colorado College, Colorado Sp... \n", + "\n", + " description_supplementMetadata_platformDescr \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_collectionMethod description_igsnPrefix \\\n", + "0 NaN NHB \n", + "1 NaN NHB \n", + "2 NaN NHB \n", + "3 NaN NHB \n", + "4 NaN NHB \n", + "... ... ... \n", + "987548 Manual MBL \n", + "987549 Manual MBL \n", + "987550 Manual UGA \n", + "987551 Manual LTR \n", + "987552 Manual LTR \n", + "\n", + " description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_publicationUrl_description \\\n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "2 Smithsonian collections record for NMNH C6295-... \n", + "3 Smithsonian collections record for NMNH C6296-... \n", + "4 Smithsonian collections record for NMNH C6299-... \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " original_high_label \n", + "0 mat:mineral \n", + "1 mat:mineral \n", + "2 mat:mineral \n", + "3 mat:mineral \n", + "4 mat:mineral \n", + "... ... \n", + "987548 mat:rock \n", + "987549 mat:rock \n", + "987550 mat:rock \n", + "987551 mat:rock \n", + "987552 mat:rock \n", + "\n", + "[987553 rows x 30 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gold = pd.read_csv(\"./datasets/SESAR_CV_labeled.csv\")\n", + "gold" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "96f81340", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'rksd:Generic Mudstone/rksd:Coal', 'rksd:Generic Sandstone/rksd:Generic Mudstone', 'rksd:Tephritoid', 'mat:rockorsediment', 'mat:particulate', 'rksd:Carbonate Sedimentary Rock/rksd:Generic Mudstone', 'mat:mineral', 'rksd:Pyroclastic Rock/rksd:Basalt', 'rksd:Non Clastic Siliceous Sediment', 'rksd:Cataclasite Series', 'rksd:Phonolitoid', 'rksd:Generic Sandstone/rksd:Tuffite', 'rksd:Massive Sulphide', 'rksd:Rhyolitoid/rksd:Glass Rich Igneous Rock', 'rksd:Metasomatic Rock', 'rksd:Diamictite', 'mat:anthropogenicmetal', 'rksd:Fragmental Igneous Rock/rksd:Fine Grained Igneous Rock', 'rksd:Carbonate Sedimentary Rock', 'rksd:Generic Mudstone/rksd:Biogenic Sediment', 'rksd:Exotic Composition Igneous Rock', 'rksd:Basalt/rksd:Chemical Sedimentary Material', 'rksd:Anorthositic Rock', 'rksd:Tephritoid/rksd:Pyroclastic Rock', 'rksd:Generic Conglomerate', 'rksd:Basalt', 'mat:liquidwater', 'rksd:Impact Generated Material', 'rksd:High Magnesium Fine Grained Igneous Rock', 'rksd:Fragmental Igneous Rock', 'ocmat:ceramicclay', 'rksd:Syenitoid', 'rksd:Basalt/rksd:Glass Rich Igneous Rock', 'rksd:Tonalite', 'ming:carbonatenitratemineral', 'mat:mixedsoilsedimentrock', 'rksd:Tephra', 'rksd:Generic Mudstone', 'rksd:Sand Size Sediment', 'mat:material', 'rksd:Sedimentary Rock', 'ming:sulfidesulfosaltmineral', 'ming:silicategermanatemineral', 'rksd:Andesite/rksd:Diamicton', 'rksd:Dioritoid', 'rksd:Biogenic Sediment', 'ming:oxidemineral', 'rksd:Igneous Rock', 'rksd:Trachytoid', 'rksd:Breccia', 'rksd:Phaneritic Igneous Rock/rksd:Pyroclastic Rock', 'rksd:Metamorphic Rock', 'rksd:Basic Igneous Rock', 'mat:organicmaterial', 'rksd:Diamicton/rksd:Dacite', 'rksd:Mud Size Sediment', 'ming:halidemineral', 'rksd:Tuffite', 'rksd:Metamorphic Rock/rksd:Basic Igneous Rock', 'rksd:Pegmatite', 'rksd:Chemical Sedimentary Material', 'rksd:Non Clastic Siliceous Sedimentary Rock', 'ming:sulfateselenatetelluratemineral', 'rksd:coal', 'rksd:Metamorphic Rock/rksd:Gabbroid', 'rksd:Quartz Rich Igneous Rock', 'rksd:Gravel Size Sediment', 'ocmat:glass', 'rksd:Alkali Feldspar Granite', 'rksd:Doleritic Rock', 'rksd:Gabbroic Rock', 'rksd:Porphyry', 'rksd:Peridotite', 'rksd:Metamorphic Rock/rksd:Fine Grained Igneous Rock', 'mat:otheranthropogenicmaterial', 'rksd:Fragmental Igneous Rock/rksd:Doleritic Rock', 'rksd:Foid Syenitoid', 'rksd:Clastic Sedimentary Rock', 'rksd:Pyroclastic Rock/rksd:Rhyolitoid', 'ming:phosphatearsenatevanadatemineral', 'rksd:Foidolite', 'rksd:Aphanite', 'rksd:Acidic Igneous Rock', 'rksd:Clastic Sedimentary Rock/rksd:Coal', 'rksd:Mylonitic Rock', 'rksd:Metamorphic Rock/rksd:Generic Mudstone', 'rksd:Iron Rich Sedimentary Rock', 'rksd:Pyroclastic Rock', 'rksd:Trachytoid/rksd:Pyroclastic Rock', 'rksd:Generic Sandstone', 'rksd:Carbonate Sedimentary Rock/rksd:Clastic Sedimentary Rock', 'mat:sediment', 'rksd:Granodiorite', 'rksd:Pyroxenite', 'rksd:Generic Sandstone/rksd:Organic Rich Sedimentary Rock', 'rksd:Carbonate Sediment', 'rksd:Generic Sandstone/rksd:Coal', 'mat:biogenicnonorganicmaterial', 'rksd:basalt', 'rksd:Hornblendite', 'rksd:Hybrid Sedimentary Rock', 'rksd:Diamicton', 'rksd:Metasomatic Rock/rksd:Peridotite', 'rksd:Clastic sediment', 'rksd:Fine Grained Igneous Rock', 'rksd:Metasomatic Rock/rksd:Ultramafic Igneous Rock', 'rksd:Metamorphic Rock/rksd:Dioritoid', 'rksd:anorthositic rock', 'rksd:Coal', 'ming:boratemineral', 'rksd:andesite', 'rksd:Phaneritic Igneous Rock', 'rksd:Granite', 'ocmat:charcoal', 'ming:nativeelementmineral', 'rksd:Dacite', 'mat:rock', 'mat:gas', 'rksd:Gabbroid', 'ming:organicmineral', 'rksd:Granitoid', 'rksd:Coal/ming:phosphatearsenatevanadatemineral', 'rksd:Foiditoid', 'rksd:residual material', 'rksd:Foid Gabbroid', 'mat:soil', 'rksd:Breccia Gouge Series', 'rksd:Rhyolitoid', 'rksd:Fault Related Material', 'rksd:Organic Rich Sedimentary Rock', 'rksd:Andesite', 'rksd:Glass Rich Igneous Rock', 'rksd:Metasomatic Rock/rksd:Generic Mudstone', 'rksd:Clastic Sediment', 'rksd:Ultramafic Igneous Rock'}\n" + ] + } + ], + "source": [ + "gold_labels = set(gold[\"description_material\"].values.tolist())\n", + "print(gold_labels) # extension type " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "4dba5edb", + "metadata": {}, + "outputs": [], + "source": [ + "# store original column\n", + "gold = gold.assign(original_label=gold.description_material)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "024a32c4", + "metadata": {}, + "outputs": [], + "source": [ + "# prefixes to remove \n", + "prefixes = [\"rksd:\",\"mat:\",\"ming:\",\"ocmat:\"]\n", + "\n", + "def cleanup(value):\n", + " # remove prefix and space out \n", + " splitted = value.split(\"/\") # multiple labels can exist \n", + " cleanedup=[]\n", + " for split in splitted:\n", + " for prefix in prefixes:\n", + " if split.startswith(prefix):\n", + " transformed = split[len(prefix):].lower()\n", + " if transformed in spaced_map:\n", + " # map to spaced label\n", + " transformed = spaced_map[transformed]\n", + " cleanedup.append(transformed)\n", + " if len(cleanedup) == 0:\n", + " # no label starting with prefix \n", + " cleanedup = [split.lower() for split in splitted]\n", + " return \"/\".join(cleanedup) # add back the parents\n", + " \n", + " \n", + "# strip off prefix and space out \n", + "gold[\"description_material\"]=gold[\"description_material\"].apply(cleanup)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "c127fed2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'dioritoid', 'coal', 'hornblendite', 'generic sandstone', 'fragmental igneous rock', 'metasomatic rock/ultramafic igneous rock', 'metamorphic rock/fine grained igneous rock', 'acidic igneous rock', 'mineral', 'diamicton', 'pyroclastic rock/basalt', 'andesite/diamicton', 'anorthositic rock', 'tuffite', 'dacite', 'mud size sediment', 'fault related material', 'tonalite', 'basalt/chemical sedimentary material', 'fragmental igneous rock/doleritic rock', 'Rock/sediment', 'metamorphic rock/dioritoid', 'diamicton/dacite', 'residual material', 'Biogenic non-organic material', 'basalt', 'material', 'Ceramic clay', 'sediment', 'quartz rich igneous rock', 'pyroclastic rock/rhyolitoid', 'granite', 'Anthropogenic material', 'granodiorite', 'rhyolitoid/glass rich igneous rock', 'basic igneous rock', 'gabbroid', 'clastic sediment', 'generic conglomerate', 'igneous rock', 'ultramafic igneous rock', 'particulate', 'Mineral-Oxide', 'Mineral-Sulfide or Sulfosalt', 'generic mudstone/biogenic sediment', 'charcoal', 'tephritoid/pyroclastic rock', 'glass rich igneous rock', 'metamorphic rock/basic igneous rock', 'generic sandstone/tuffite', 'trachytoid/pyroclastic rock', 'Mineral-Phosphate, Arsenate, or Vanadate', 'porphyry', 'carbonate sediment', 'Mineral-Sulfate, Selenate, or Tellurate', 'soil', 'Anthropogenic metal material', 'foiditoid', 'alkali feldspar granite', 'doleritic rock', 'gravel size sediment', 'generic mudstone/coal', 'Mineral-Halide', 'clastic sedimentary rock', 'breccia', 'non clastic siliceous sedimentary rock', 'pegmatite', 'clastic sedimentary rock/coal', 'foid gabbroid', 'sand size sediment', 'breccia gouge series', 'metamorphic rock', 'hybrid sedimentary rock', 'pyroclastic rock', 'Organic material', 'Mineral-Borate', 'phonolitoid', 'pyroxenite', 'carbonate sedimentary rock', 'basalt/glass rich igneous rock', 'fine grained igneous rock', 'Mineral-Native Element', 'metamorphic rock/generic mudstone', 'gaseous material', 'generic sandstone/generic mudstone', 'fragmental igneous rock/fine grained igneous rock', 'mylonitic rock', 'foidolite', 'Mineral-Silicate or Germanate', 'organic rich sedimentary rock', 'metamorphic rock/gabbroid', 'Mineral-Organic Compound', 'peridotite', 'non clastic siliceous sediment', 'phaneritic igneous rock', 'massive sulphide', 'glass', 'foid syenitoid', 'iron rich sedimentary rock', 'generic sandstone/organic rich sedimentary rock', 'metasomatic rock/generic mudstone', 'coal/Mineral-Phosphate, Arsenate, or Vanadate', 'Soil/sediment/rock', 'gabbroic rock', 'tephritoid', 'phaneritic igneous rock/pyroclastic rock', 'generic mudstone', 'rhyolitoid', 'carbonate sedimentary rock/generic mudstone', 'metasomatic rock/peridotite', 'andesite', 'diamictite', 'chemical sedimentary material', 'high magnesium fine grained igneous rock', 'metasomatic rock', 'Mineral-Carbonate or Nitrate', 'generic sandstone/coal', 'Liquid water', 'syenitoid', 'tephra', 'cataclasite series', 'rock', 'granitoid', 'impact generated material', 'aphanite', 'biogenic sediment', 'exotic composition igneous rock', 'carbonate sedimentary rock/clastic sedimentary rock', 'trachytoid', 'sedimentary rock'}\n" + ] + } + ], + "source": [ + "gold_labels = set(gold[\"description_material\"].values.tolist())\n", + "print(gold_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "9518bdd8", + "metadata": {}, + "outputs": [], + "source": [ + "# add parent labels for each possible label\n", + "def expand(value):\n", + " splitted = value.split(\"/\")\n", + " expanded = set(splitted)\n", + " \n", + " for split in splitted:\n", + " split = split.lower()\n", + " for p in parents[split]:\n", + " if p in spaced_map: # add spaced out version\n", + " expanded.add(spaced_map[p])\n", + " else:\n", + " expanded.add(p)\n", + " expanded = set([x for x in expanded if x is not None])\n", + " \n", + " return \"/\".join(list(expanded))\n", + " \n", + "\n", + "gold[\"description_material\"]=gold[\"description_material\"].apply(expand)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "00eb6d43", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'material/granitoid/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/rock/granodiorite', 'material/rock or sediment/igneous rock/natural solid material/glass rich igneous rock/rock', 'material/rock or sediment/igneous rock/natural solid material/rock/fine grained igneous rock/dacite', 'material/sediment/rock or sediment/generic mudstone/natural solid material/biogenic sediment/sedimentary rock/rock', 'material/fluid material/gaseous material', 'Anthropogenic metal material/material/any anthropogenic material', 'material/clastic sedimentary rock/rock or sediment/natural solid material/sedimentary rock/rock', 'Liquid water/material/fluid material', 'material/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/rock', 'material/rock or sediment/natural solid material/residual material/rock', 'material/rock or sediment/massive sulphide/natural solid material/rock', 'material/natural solid material/Mineral-Phosphate, Arsenate, or Vanadate/mineral', 'material/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/foid syenitoid/rock', 'material/rock or sediment/generic mudstone/natural solid material/sedimentary rock/rock', 'material/granitoid/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/alkali feldspar granite/rock', 'material/rock or sediment/generic mudstone/natural solid material/metasomatic rock/sedimentary rock/rock', 'biogenic non-organic material/material/charcoal', 'material/sediment/rock or sediment/natural solid material/biogenic sediment', 'material/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/anorthositic rock/rock', 'material/rock or sediment/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock', 'material/sediment/rock or sediment/natural solid material/tephra', 'material/dioritoid/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/rock', 'material/generic conglomerate/rock or sediment/natural solid material/sedimentary rock/rock', 'material/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/rock/fragmental igneous rock/pyroclastic rock', 'material/sediment/rock or sediment/mud size sediment/natural solid material', 'material/rock or sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock', 'glass/anthropogenic material/any anthropogenic material/material', 'material/granitoid/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/rock', 'material/rock or sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/peridotite/rock', 'material/rock or sediment/igneous rock/natural solid material/rock/trachytoid/fragmental igneous rock/fine grained igneous rock/pyroclastic rock', 'material/rock or sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock', 'material/rock or sediment/breccia/natural solid material/rock', 'material/tephritoid/rock or sediment/igneous rock/natural solid material/fine grained igneous rock/rock', 'basalt/material/rock or sediment/igneous rock/natural solid material/glass rich igneous rock/fine grained igneous rock/rock', 'material/dioritoid/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/metamorphic rock/rock', 'material/rock or sediment/igneous rock/natural solid material/metamorphic rock/fine grained igneous rock/rock', 'Mineral-Oxide/natural solid material/mineral/material', 'material/gabbroid/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/metamorphic rock/rock', 'material/natural solid material/Mineral-Borate/mineral', 'material', 'material/sediment/rock or sediment/natural solid material/non clastic siliceous sediment', 'material/rock or sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock', 'material/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/foid gabbroid/rock', 'material/rock or sediment/igneous rock/natural solid material/doleritic rock/rock', 'material/clastic sediment/sediment/rock or sediment/natural solid material/diamicton', 'material/phaneritic igneous rock/rock or sediment/foidolite/igneous rock/natural solid material/rock', 'material/rock or sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock', 'material/rock or sediment/generic mudstone/natural solid material/metamorphic rock/sedimentary rock/rock', 'material/clastic sediment/sediment/rock or sediment/natural solid material', 'material/granitoid/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/tonalite/rock', 'material/rock or sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock', 'material/rock or sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock', 'material/rock or sediment/igneous rock/rhyolitoid/natural solid material/glass rich igneous rock/fine grained igneous rock/rock', 'gabbroic rock/gabbroid/material/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/rock', 'material/gabbroid/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/rock', 'material/natural solid material/mineral/Mineral-Carbonate or Nitrate', 'material/rock or sediment/igneous rock/natural solid material/exotic composition igneous rock/rock', 'material/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/pegmatite/rock', 'basalt/material/sediment/rock or sediment/igneous rock/natural solid material/chemical sedimentary material/fine grained igneous rock/rock', 'material/rock or sediment/igneous rock/natural solid material/ultramafic igneous rock/rock', 'material/rock or sediment/fault related material/natural solid material/rock', 'material/rock or sediment/igneous rock/natural solid material/fragmental igneous rock/doleritic rock/rock', 'material/rock or sediment/natural solid material/sedimentary rock/rock', 'material/rock or sediment/natural solid material/metasomatic rock/rock', 'material/rock or sediment/natural solid material/aphanite/rock', 'material/granitoid/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/granite/rock', 'material/sediment/rock or sediment/natural solid material/sand size sediment', 'Mineral-Halide/material/natural solid material/mineral', 'material/rock or sediment/generic mudstone/natural solid material/generic sandstone/sedimentary rock/rock', 'Organic material/material', 'material/rock or sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock', 'material/clastic sedimentary rock/rock or sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock', 'andesite/material/rock or sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock', 'material/rock or sediment/igneous rock/natural solid material/fragmental igneous rock/rock', 'material/phaneritic igneous rock/rock or sediment/igneous rock/natural solid material/syenitoid/rock', 'material/rock or sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock', 'material/rock or sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock', 'material/rock or sediment/natural solid material/sedimentary rock/hybrid sedimentary rock/rock', 'material/rock or sediment/igneous rock/natural solid material/rock/fragmental igneous rock/pyroclastic rock', 'material/rock or sediment/fault related material/natural solid material/cataclasite series/rock', 'material/sediment/rock or sediment/natural solid material/chemical sedimentary material', 'material/rock or sediment/igneous rock/natural solid material/fine grained igneous rock/rock', 'material/rock or sediment/natural solid material/generic sandstone/sedimentary rock/rock', 'material/tephritoid/rock or sediment/igneous rock/natural solid material/rock/fragmental igneous rock/fine grained igneous rock/pyroclastic rock', 'material/anthropogenic material/any anthropogenic material/Ceramic clay', 'diamictite/material/clastic sedimentary rock/rock or sediment/natural solid material/sedimentary rock/rock', 'material/any anthropogenic material/Anthropogenic material', 'material/rock or sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock', 'material/phaneritic igneous rock/quartz rich igneous rock/rock or sediment/igneous rock/natural solid material/rock', 'material/natural solid material/Mineral-Sulfide or Sulfosalt/mineral', 'Mineral-Organic Compound/material/natural solid material/mineral', 'material/rock or sediment/coal/natural solid material/Mineral-Phosphate, Arsenate, or Vanadate/organic rich sedimentary rock/sedimentary rock/mineral/rock', 'material/sediment/rock or sediment/natural solid material/carbonate sediment', 'material/natural solid material/soil', 'material/clastic sedimentary rock/rock or sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock', 'material/natural solid material/rock/rock or sediment', 'material/rock or sediment/mylonitic rock/fault related material/natural solid material/rock', 'particulate/material/natural solid material', 'material/rock or sediment/igneous rock/acidic igneous rock/natural solid material/rock', 'material/impact generated material/rock or sediment/natural solid material/rock', 'material/sediment/rock or sediment/natural solid material/Soil/rock', 'material/natural solid material/mineral/Mineral-Silicate or Germanate', 'material/rock or sediment/fault related material/natural solid material/breccia gouge series/rock', 'basalt/material/rock or sediment/igneous rock/natural solid material/fine grained igneous rock/rock', 'material/rock or sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock', 'material/natural solid material/mineral', 'material/rock or sediment/igneous rock/rhyolitoid/natural solid material/rock/fragmental igneous rock/fine grained igneous rock/pyroclastic rock', 'Mineral-Sulfate, Selenate, or Tellurate/natural solid material/mineral/material', 'material/rock or sediment/igneous rock/natural solid material/rock', 'basic igneous rock/material/rock or sediment/igneous rock/natural solid material/metamorphic rock/rock', 'basic igneous rock/material/rock or sediment/igneous rock/natural solid material/rock', 'material/sediment/rock or sediment/natural solid material/Rock', 'Biogenic non-organic material/material', 'material/rock or sediment/coal/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock', 'material/rock or sediment/igneous rock/natural solid material/porphyry/rock', 'material/clastic sediment/sediment/rock or sediment/igneous rock/dacite/natural solid material/fine grained igneous rock/diamicton/rock', 'material/sediment/rock or sediment/natural solid material/gravel size sediment', 'material/rock or sediment/generic mudstone/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock', 'material/rock or sediment/igneous rock/natural solid material/high magnesium fine grained igneous rock/fine grained igneous rock/rock', 'material/rock or sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/rock', 'material/tuffite/rock or sediment/natural solid material/rock', 'material/rock or sediment/generic mudstone/natural solid material/carbonate sedimentary rock/sedimentary rock/rock', 'andesite/material/clastic sediment/sediment/rock or sediment/igneous rock/natural solid material/intermediate composition igneous rock/diamicton/rock', 'material/natural solid material/sediment/rock or sediment', 'material/rock or sediment/natural solid material/metamorphic rock/rock', 'material/rock or sediment/natural solid material/generic sandstone/rock/sedimentary rock/tuffite', 'material/natural solid material/mineral/Mineral-Native Element', 'material/rock or sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock', 'material/rock or sediment/igneous rock/hornblendite/ultramafic igneous rock/natural solid material/rock', 'basalt/material/rock or sediment/igneous rock/natural solid material/rock/fragmental igneous rock/fine grained igneous rock/pyroclastic rock'}\n" + ] + } + ], + "source": [ + "gold_labels = set(gold[\"description_material\"].values.tolist())\n", + "print(gold_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "eb217383", + "metadata": {}, + "outputs": [], + "source": [ + "# replace multilabels and remove duplicates for parent labels that are not handled\n", + "duplicates = {\n", + " \"rock or sediment\" : \"rock/sediment\",\n", + " \"mixed soil sediment or rock\": \"soil/sediment/rock\"\n", + "}\n", + "def remove_parent_duplicates(value):\n", + " splitted = value.split(\"/\")\n", + " unique = set([x.lower() for x in splitted])\n", + " \n", + " for label in splitted:\n", + " if label in duplicates:\n", + " unique = list(unique)\n", + " unique.remove(label)\n", + " unique.extend(duplicates[label].split(\"/\"))\n", + " unique = set(unique)\n", + " \n", + " unique = set([x for x in unique if x is not None and x not in duplicates])\n", + " \n", + " return \"/\".join(list(unique))\n", + " \n", + "\n", + "\n", + "gold[\"description_material\"]=gold[\"description_material\"].apply(remove_parent_duplicates)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "25b0c64c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'material/natural solid material/mineral/mineral-oxide', 'material/fluid material/gaseous material', 'material/sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock', 'material/granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock', 'material/sediment/igneous rock/natural solid material/ultramafic igneous rock/rock', 'material/granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/alkali feldspar granite/rock', 'biogenic non-organic material/material', 'material/sediment/massive sulphide/natural solid material/rock', 'material/sediment/fault related material/natural solid material/cataclasite series/rock', 'material/clastic sedimentary rock/sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock', 'material/phaneritic igneous rock/sediment/igneous rock/natural solid material/foid syenitoid/rock', 'material/sediment/igneous rock/natural solid material/hornblendite/ultramafic igneous rock/rock', 'material/sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock', 'material/sediment/natural solid material/metasomatic rock/rock', 'material/sediment/igneous rock/natural solid material/glass rich igneous rock/rock', 'material/dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock', 'material/anthropogenic metal material/any anthropogenic material', 'material/sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock', 'biogenic non-organic material/material/charcoal', 'material/sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock', 'material/natural solid material/mineral-halide/mineral', 'material/sediment/natural solid material/aphanite/rock', 'material/granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/granite/rock', 'material/sediment/impact generated material/natural solid material/rock', 'glass/anthropogenic material/any anthropogenic material/material', 'material/sediment/igneous rock/rhyolitoid/natural solid material/glass rich igneous rock/fine grained igneous rock/rock', 'material/sediment/generic mudstone/natural solid material/biogenic sediment/sedimentary rock/rock', 'material/sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock', 'material/sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock', 'material/clastic sediment/sediment/natural solid material/rock', 'material/sediment/generic mudstone/natural solid material/carbonate sedimentary rock/sedimentary rock/rock', 'basic igneous rock/material/sediment/igneous rock/natural solid material/rock', 'material/tephritoid/sediment/igneous rock/natural solid material/fine grained igneous rock/rock', 'material/clastic sediment/sediment/natural solid material/diamicton/rock', 'material/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock', 'material/sediment/igneous rock/natural solid material/pyroclastic rock/trachytoid/fragmental igneous rock/fine grained igneous rock/rock', 'material/gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock', 'material/sediment/coal/natural solid material/mineral-phosphate, arsenate, or vanadate/organic rich sedimentary rock/sedimentary rock/mineral/rock', 'material/natural solid material/mineral/mineral-silicate or germanate', 'material/sediment/igneous rock/natural solid material/exotic composition igneous rock/rock', 'material/liquid water/fluid material', 'material/anthropogenic material/ceramic clay/any anthropogenic material', 'material', 'material/natural solid material/mineral-sulfate, selenate, or tellurate/mineral', 'material/sediment/breccia/natural solid material/rock', 'material/dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock', 'material/sediment/natural solid material/carbonate sediment/rock', 'material/sediment/igneous rock/natural solid material/porphyry/rock', 'material/soil/sediment/natural solid material/rock', 'material/sediment/generic mudstone/natural solid material/generic sandstone/sedimentary rock/rock', 'material/sediment/natural solid material/biogenic sediment/rock', 'material/sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock', 'material/sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock', 'material/phaneritic igneous rock/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock', 'material/sediment/generic mudstone/natural solid material/sedimentary rock/rock', 'material/clastic sediment/sediment/igneous rock/natural solid material/rock/fine grained igneous rock/diamicton/dacite', 'material/sediment/igneous rock/natural solid material/fragmental igneous rock/rock', 'material/sediment/natural solid material/tephra/rock', 'material/sediment/generic mudstone/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock', 'basalt/material/sediment/igneous rock/natural solid material/fine grained igneous rock/rock', 'mineral-organic compound/material/natural solid material/mineral', 'material/sediment/igneous rock/natural solid material/doleritic rock/rock', 'basic igneous rock/material/sediment/igneous rock/natural solid material/metamorphic rock/rock', 'material/clastic sedimentary rock/sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock', 'material/sediment/igneous rock/natural solid material/high magnesium fine grained igneous rock/fine grained igneous rock/rock', 'material/sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock', 'basalt/material/sediment/igneous rock/natural solid material/glass rich igneous rock/fine grained igneous rock/rock', 'material/tephritoid/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock', 'material/sediment/natural solid material/non clastic siliceous sediment/rock', 'material/phaneritic igneous rock/sediment/igneous rock/natural solid material/foid gabbroid/rock', 'material/granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock', 'material/natural solid material/mineral/mineral-phosphate, arsenate, or vanadate', 'diamictite/material/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock', 'basalt/material/sediment/igneous rock/natural solid material/chemical sedimentary material/fine grained igneous rock/rock', 'material/sediment/generic conglomerate/natural solid material/sedimentary rock/rock', 'material/sediment/mud size sediment/natural solid material/rock', 'material/natural solid material/sediment/rock', 'material/sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/rock', 'mineral-borate/material/natural solid material/mineral', 'material/sediment/tuffite/natural solid material/generic sandstone/sedimentary rock/rock', 'material/natural solid material/mineral/mineral-sulfide or sulfosalt', 'material/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock', 'material/sediment/fault related material/natural solid material/rock', 'material/sediment/natural solid material/metamorphic rock/rock', 'material/phaneritic igneous rock/sediment/igneous rock/foidolite/natural solid material/rock', 'basalt/material/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock', 'material/gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock', 'material/sediment/igneous rock/natural solid material/rock', 'material/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock', 'andesite/material/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock', 'material/sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock', 'material/sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock', 'material/sediment/natural solid material/generic sandstone/sedimentary rock/rock', 'material/sediment/natural solid material/sedimentary rock/rock', 'material/granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock', 'organic material/material', 'material/natural solid material/soil', 'material/sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock', 'material/anthropogenic material/any anthropogenic material', 'material/sediment/igneous rock/natural solid material/fragmental igneous rock/doleritic rock/rock', 'particulate/material/natural solid material', 'material/sediment/generic mudstone/natural solid material/metamorphic rock/sedimentary rock/rock', 'material/sediment/natural solid material/residual material/rock', 'material/sediment/natural solid material/gravel size sediment/rock', 'material/sediment/natural solid material/chemical sedimentary material/rock', 'material/sediment/igneous rock/acidic igneous rock/natural solid material/rock', 'material/sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock', 'material/sediment/mylonitic rock/fault related material/natural solid material/rock', 'material/natural solid material/mineral', 'material/phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock', 'material/phaneritic igneous rock/quartz rich igneous rock/sediment/igneous rock/natural solid material/rock', 'material/phaneritic igneous rock/sediment/igneous rock/natural solid material/pegmatite/rock', 'material/sediment/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock', 'material/sediment/tuffite/natural solid material/rock', 'material/sediment/igneous rock/natural solid material/fine grained igneous rock/rock', 'andesite/material/clastic sediment/sediment/igneous rock/natural solid material/intermediate composition igneous rock/diamicton/rock', 'material/sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/peridotite/rock', 'natural solid material/material/mineral-carbonate or nitrate/mineral', 'material/sediment/generic mudstone/natural solid material/metasomatic rock/sedimentary rock/rock', 'material/sediment/natural solid material/sedimentary rock/hybrid sedimentary rock/rock', 'material/sediment/fault related material/natural solid material/breccia gouge series/rock', 'material/sediment/igneous rock/natural solid material/metamorphic rock/fine grained igneous rock/rock', 'material/phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock', 'material/natural solid material/mineral/mineral-native element', 'gabbroic rock/gabbroid/material/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock', 'material/sediment/natural solid material/sand size sediment/rock', 'material/sediment/coal/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock', 'material/sediment/igneous rock/rhyolitoid/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock'}\n" + ] + } + ], + "source": [ + "gold_labels = set(gold[\"description_material\"].values.tolist())\n", + "print(gold_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "7b443dc6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0description_supplementMetadata_localitydescription_materialdescription_supplementMetadata_countrydescription_supplementMetadata_provincedescription_sampleTypedescription_supplementMetadata_platformTypedescription_supplementMetadata_geologicalAgedescription_supplementMetadata_locationDescriptiondescription_supplementMetadata_purpose...description_supplementMetadata_geologicalUnitdescription_supplementMetadata_localityDescriptiondescription_supplementMetadata_originalArchivedescription_supplementMetadata_platformDescrdescription_collectionMethoddescription_igsnPrefixdescription_supplementMetadata_cruiseFieldPrgrmdescription_supplementMetadata_publicationUrl_descriptionoriginal_high_labeloriginal_label
00NaNmaterial/natural solid material/mineral-sulfat...United StatesNew Jerseya sample that is an individual unit, including...NaNNaNCoordinates for Sterling Hill Mine (MRDS ID: W...NaN...NaNOgdensburgNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6294-...mat:mineralming:sulfateselenatetelluratemineral
11NaNmaterial/natural solid material/mineral-sulfat...United StatesNew Jerseya sample that is an individual unit, including...NaNNaNMatched to the GeoNames record for Franklin, S...NaN...NaNFranklinNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6295-...mat:mineralming:sulfateselenatetelluratemineral
22NaNmaterial/natural solid material/mineral-sulfat...United StatesNew Jerseya sample that is an individual unit, including...NaNNaNMatched to the GeoNames record for Franklin, S...NaN...NaNFranklinNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6295-...mat:mineralming:sulfateselenatetelluratemineral
33NaNmaterial/natural solid material/mineral/minera...United StatesNew Jerseya sample that is an individual unit, including...NaNNaNCoordinates from GEOLocate for parse pattern \"...NaN...NaNFranklinNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6296-...mat:mineralming:silicategermanatemineral
44NaNmaterial/natural solid material/mineral/minera...United StatesNew Jerseya sample that is an individual unit, including...NaNNaNCoordinates for Sterling Hill Mine (MRDS ID: W...NaN...NaNOgdensburgNaNNaNNaNNHBNaNSmithsonian collections record for NMNH C6299-...mat:mineralming:silicategermanatemineral
..................................................................
987548995644NaNmaterial/sediment/natural solid material/metam...NaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...NaNNaNDept of Geology, Colorado College, Colorado Sp...NaNManualMBLNaNNaNmat:rockrksd:Metamorphic Rock
987549995645NaNmaterial/sediment/natural solid material/metam...NaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...NaNNaNDept of Geology, Colorado College, Colorado Sp...NaNManualMBLNaNNaNmat:rockrksd:Metamorphic Rock
987550995646NaNmaterial/sediment/natural solid material/metam...NaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...NaNNaNDepartment of Geology, Bryn Mawr College, Bryn...NaNManualUGANaNNaNmat:rockrksd:Metamorphic Rock
987551995647NaNmaterial/sediment/natural solid material/metam...NaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...NaNNaNDept of Geology, Colorado College, Colorado Sp...NaNManualLTRNaNNaNmat:rockrksd:Metamorphic Rock
987552995648NaNmaterial/sediment/natural solid material/metam...NaNNaNa sample that is an individual unit, including...NaNNaNNaNNaN...NaNNaNDept of Geology, Colorado College, Colorado Sp...NaNManualLTRNaNNaNmat:rockrksd:Metamorphic Rock
\n", + "

987553 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 description_supplementMetadata_locality \\\n", + "0 0 NaN \n", + "1 1 NaN \n", + "2 2 NaN \n", + "3 3 NaN \n", + "4 4 NaN \n", + "... ... ... \n", + "987548 995644 NaN \n", + "987549 995645 NaN \n", + "987550 995646 NaN \n", + "987551 995647 NaN \n", + "987552 995648 NaN \n", + "\n", + " description_material \\\n", + "0 material/natural solid material/mineral-sulfat... \n", + "1 material/natural solid material/mineral-sulfat... \n", + "2 material/natural solid material/mineral-sulfat... \n", + "3 material/natural solid material/mineral/minera... \n", + "4 material/natural solid material/mineral/minera... \n", + "... ... \n", + "987548 material/sediment/natural solid material/metam... \n", + "987549 material/sediment/natural solid material/metam... \n", + "987550 material/sediment/natural solid material/metam... \n", + "987551 material/sediment/natural solid material/metam... \n", + "987552 material/sediment/natural solid material/metam... \n", + "\n", + " description_supplementMetadata_country \\\n", + "0 United States \n", + "1 United States \n", + "2 United States \n", + "3 United States \n", + "4 United States \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_province \\\n", + "0 New Jersey \n", + "1 New Jersey \n", + "2 New Jersey \n", + "3 New Jersey \n", + "4 New Jersey \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_sampleType \\\n", + "0 a sample that is an individual unit, including... \n", + "1 a sample that is an individual unit, including... \n", + "2 a sample that is an individual unit, including... \n", + "3 a sample that is an individual unit, including... \n", + "4 a sample that is an individual unit, including... \n", + "... ... \n", + "987548 a sample that is an individual unit, including... \n", + "987549 a sample that is an individual unit, including... \n", + "987550 a sample that is an individual unit, including... \n", + "987551 a sample that is an individual unit, including... \n", + "987552 a sample that is an individual unit, including... \n", + "\n", + " description_supplementMetadata_platformType \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_geologicalAge \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_locationDescription \\\n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "1 Matched to the GeoNames record for Franklin, S... \n", + "2 Matched to the GeoNames record for Franklin, S... \n", + "3 Coordinates from GEOLocate for parse pattern \"... \n", + "4 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_purpose ... \\\n", + "0 NaN ... \n", + "1 NaN ... \n", + "2 NaN ... \n", + "3 NaN ... \n", + "4 NaN ... \n", + "... ... ... \n", + "987548 NaN ... \n", + "987549 NaN ... \n", + "987550 NaN ... \n", + "987551 NaN ... \n", + "987552 NaN ... \n", + "\n", + " description_supplementMetadata_geologicalUnit \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_localityDescription \\\n", + "0 Ogdensburg \n", + "1 Franklin \n", + "2 Franklin \n", + "3 Franklin \n", + "4 Ogdensburg \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_originalArchive \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 Dept of Geology, Colorado College, Colorado Sp... \n", + "987549 Dept of Geology, Colorado College, Colorado Sp... \n", + "987550 Department of Geology, Bryn Mawr College, Bryn... \n", + "987551 Dept of Geology, Colorado College, Colorado Sp... \n", + "987552 Dept of Geology, Colorado College, Colorado Sp... \n", + "\n", + " description_supplementMetadata_platformDescr \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_collectionMethod description_igsnPrefix \\\n", + "0 NaN NHB \n", + "1 NaN NHB \n", + "2 NaN NHB \n", + "3 NaN NHB \n", + "4 NaN NHB \n", + "... ... ... \n", + "987548 Manual MBL \n", + "987549 Manual MBL \n", + "987550 Manual UGA \n", + "987551 Manual LTR \n", + "987552 Manual LTR \n", + "\n", + " description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " description_supplementMetadata_publicationUrl_description \\\n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "2 Smithsonian collections record for NMNH C6295-... \n", + "3 Smithsonian collections record for NMNH C6296-... \n", + "4 Smithsonian collections record for NMNH C6299-... \n", + "... ... \n", + "987548 NaN \n", + "987549 NaN \n", + "987550 NaN \n", + "987551 NaN \n", + "987552 NaN \n", + "\n", + " original_high_label original_label \n", + "0 mat:mineral ming:sulfateselenatetelluratemineral \n", + "1 mat:mineral ming:sulfateselenatetelluratemineral \n", + "2 mat:mineral ming:sulfateselenatetelluratemineral \n", + "3 mat:mineral ming:silicategermanatemineral \n", + "4 mat:mineral ming:silicategermanatemineral \n", + "... ... ... \n", + "987548 mat:rock rksd:Metamorphic Rock \n", + "987549 mat:rock rksd:Metamorphic Rock \n", + "987550 mat:rock rksd:Metamorphic Rock \n", + "987551 mat:rock rksd:Metamorphic Rock \n", + "987552 mat:rock rksd:Metamorphic Rock \n", + "\n", + "[987553 rows x 31 columns]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gold" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "08e3ca50", + "metadata": {}, + "outputs": [], + "source": [ + "# store the map of multi-label mapping\n", + "mapping = {}\n", + "for _, row in gold.iterrows():\n", + " mapped = row['description_material']\n", + " original = row['original_label']\n", + " if original == \"material\":\n", + " mapped = \"material\"\n", + " else:\n", + " mapped = \"/\".join([x for x in mapped.split(\"/\") if x != \"material\"])\n", + " \n", + " mapping[original] = mapped" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "4cf47d39", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ming:sulfateselenatetelluratemineral': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n", + " 'ming:silicategermanatemineral': 'natural solid material/mineral/mineral-silicate or germanate',\n", + " 'ming:nativeelementmineral': 'natural solid material/mineral/mineral-native element',\n", + " 'ming:oxidemineral': 'natural solid material/mineral/mineral-oxide',\n", + " 'mat:soil': 'natural solid material/soil',\n", + " 'ming:carbonatenitratemineral': 'natural solid material/mineral-carbonate or nitrate/mineral',\n", + " 'rksd:Sedimentary Rock': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'ming:halidemineral': 'natural solid material/mineral-halide/mineral',\n", + " 'ming:phosphatearsenatevanadatemineral': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n", + " 'ming:sulfidesulfosaltmineral': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n", + " 'rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n", + " 'ming:boratemineral': 'mineral-borate/natural solid material/mineral',\n", + " 'mat:organicmaterial': 'organic material',\n", + " 'rksd:Generic Sandstone': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n", + " 'mat:liquidwater': 'liquid water/fluid material',\n", + " 'mat:mineral': 'natural solid material/mineral',\n", + " 'mat:biogenicnonorganicmaterial': 'biogenic non-organic material',\n", + " 'rksd:Basalt': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'ocmat:glass': 'glass/anthropogenic material/any anthropogenic material',\n", + " 'mat:sediment': 'natural solid material/sediment/rock',\n", + " 'rksd:Carbonate Sediment': 'sediment/natural solid material/carbonate sediment/rock',\n", + " 'rksd:Sand Size Sediment': 'sediment/natural solid material/sand size sediment/rock',\n", + " 'rksd:Mud Size Sediment': 'sediment/mud size sediment/natural solid material/rock',\n", + " 'mat:rock': 'natural solid material/sediment/rock',\n", + " 'rksd:Andesite': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n", + " 'rksd:Pyroclastic Rock': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n", + " 'mat:otheranthropogenicmaterial': 'anthropogenic material/any anthropogenic material',\n", + " 'rksd:Metasomatic Rock': 'sediment/natural solid material/metasomatic rock/rock',\n", + " 'rksd:Metamorphic Rock': 'sediment/natural solid material/metamorphic rock/rock',\n", + " 'rksd:Breccia': 'sediment/breccia/natural solid material/rock',\n", + " 'rksd:Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Glass Rich Igneous Rock': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n", + " 'rksd:Gravel Size Sediment': 'sediment/natural solid material/gravel size sediment/rock',\n", + " 'mat:rockorsediment': 'natural solid material/sediment/rock',\n", + " 'rksd:Tuffite': 'sediment/tuffite/natural solid material/rock',\n", + " 'rksd:Chemical Sedimentary Material': 'sediment/natural solid material/chemical sedimentary material/rock',\n", + " 'rksd:Doleritic Rock': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n", + " 'rksd:Fragmental Igneous Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/rock',\n", + " 'rksd:Exotic Composition Igneous Rock': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n", + " 'rksd:Trachytoid': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n", + " 'rksd:Dacite': 'sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Non Clastic Siliceous Sedimentary Rock': 'sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Generic Conglomerate': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Tephra': 'sediment/natural solid material/tephra/rock',\n", + " 'rksd:Dioritoid': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Granodiorite': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n", + " 'rksd:Acidic Igneous Rock': 'sediment/igneous rock/acidic igneous rock/natural solid material/rock',\n", + " 'rksd:Granitoid': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Phonolitoid': 'sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock',\n", + " 'rksd:Tephritoid': 'tephritoid/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Foiditoid': 'sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock',\n", + " 'rksd:Pyroxenite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n", + " 'rksd:Igneous Rock': 'sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:anorthositic rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock',\n", + " 'rksd:Gabbroic Rock': 'gabbroic rock/gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:andesite': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n", + " 'rksd:Phaneritic Igneous Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Biogenic Sediment': 'sediment/natural solid material/biogenic sediment/rock',\n", + " 'rksd:Carbonate Sedimentary Rock': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Gabbroid': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Tonalite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock',\n", + " 'rksd:residual material': 'sediment/natural solid material/residual material/rock',\n", + " 'rksd:Peridotite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n", + " 'rksd:Ultramafic Igneous Rock': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n", + " 'rksd:Porphyry': 'sediment/igneous rock/natural solid material/porphyry/rock',\n", + " 'mat:material': '',\n", + " 'rksd:Rhyolitoid': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Organic Rich Sedimentary Rock': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Anorthositic Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock',\n", + " 'rksd:Diamictite': 'diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Impact Generated Material': 'sediment/impact generated material/natural solid material/rock',\n", + " 'rksd:Clastic Sediment': 'clastic sediment/sediment/natural solid material/rock',\n", + " 'rksd:Hornblendite': 'sediment/igneous rock/natural solid material/hornblendite/ultramafic igneous rock/rock',\n", + " 'rksd:Basalt/rksd:Glass Rich Igneous Rock': 'basalt/sediment/igneous rock/natural solid material/glass rich igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Syenitoid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n", + " 'rksd:Foid Syenitoid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/foid syenitoid/rock',\n", + " 'rksd:Quartz Rich Igneous Rock': 'phaneritic igneous rock/quartz rich igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Mylonitic Rock': 'sediment/mylonitic rock/fault related material/natural solid material/rock',\n", + " 'mat:mixedsoilsedimentrock': 'soil/sediment/natural solid material/rock',\n", + " 'rksd:Clastic Sedimentary Rock': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n", + " 'ming:organicmineral': 'mineral-organic compound/natural solid material/mineral',\n", + " 'mat:particulate': 'particulate/natural solid material',\n", + " 'rksd:Diamicton': 'clastic sediment/sediment/natural solid material/diamicton/rock',\n", + " 'rksd:Generic Mudstone/rksd:Biogenic Sediment': 'sediment/generic mudstone/natural solid material/biogenic sediment/sedimentary rock/rock',\n", + " 'rksd:Coal': 'sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/metamorphic rock/sedimentary rock/rock',\n", + " 'rksd:Iron Rich Sedimentary Rock': 'sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:High Magnesium Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/high magnesium fine grained igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Basic Igneous Rock': 'basic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Foid Gabbroid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/foid gabbroid/rock',\n", + " 'rksd:Foidolite': 'phaneritic igneous rock/sediment/igneous rock/foidolite/natural solid material/rock',\n", + " 'rksd:Breccia Gouge Series': 'sediment/fault related material/natural solid material/breccia gouge series/rock',\n", + " 'rksd:coal': 'sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Clastic Sedimentary Rock': 'clastic sedimentary rock/sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Generic Sandstone/rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/generic sandstone/sedimentary rock/rock',\n", + " 'rksd:Cataclasite Series': 'sediment/fault related material/natural solid material/cataclasite series/rock',\n", + " 'rksd:Fault Related Material': 'sediment/fault related material/natural solid material/rock',\n", + " 'rksd:Massive Sulphide': 'sediment/massive sulphide/natural solid material/rock',\n", + " 'rksd:Alkali Feldspar Granite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/alkali feldspar granite/rock',\n", + " 'rksd:Pyroclastic Rock/rksd:Basalt': 'basalt/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'ocmat:charcoal': 'biogenic non-organic material/charcoal',\n", + " 'rksd:Basalt/rksd:Chemical Sedimentary Material': 'basalt/sediment/igneous rock/natural solid material/chemical sedimentary material/fine grained igneous rock/rock',\n", + " 'rksd:Generic Sandstone/rksd:Coal': 'sediment/coal/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'mat:gas': 'fluid material/gaseous material',\n", + " 'rksd:Non Clastic Siliceous Sediment': 'sediment/natural solid material/non clastic siliceous sediment/rock',\n", + " 'rksd:Aphanite': 'sediment/natural solid material/aphanite/rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:basalt': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Generic Mudstone/rksd:Coal': 'sediment/generic mudstone/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Clastic Sedimentary Rock/rksd:Coal': 'clastic sedimentary rock/sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'mat:anthropogenicmetal': 'anthropogenic metal material/any anthropogenic material',\n", + " 'rksd:Metamorphic Rock/rksd:Gabbroid': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Dioritoid': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock',\n", + " 'ocmat:ceramicclay': 'anthropogenic material/ceramic clay/any anthropogenic material',\n", + " 'rksd:Generic Sandstone/rksd:Organic Rich Sedimentary Rock': 'sediment/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/metamorphic rock/fine grained igneous rock/rock',\n", + " 'rksd:Tephritoid/rksd:Pyroclastic Rock': 'tephritoid/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Basic Igneous Rock': 'basic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock',\n", + " 'rksd:Diamicton/rksd:Dacite': 'clastic sediment/sediment/igneous rock/natural solid material/rock/fine grained igneous rock/diamicton/dacite',\n", + " 'rksd:Metasomatic Rock/rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/metasomatic rock/sedimentary rock/rock',\n", + " 'rksd:Coal/ming:phosphatearsenatevanadatemineral': 'sediment/coal/natural solid material/mineral-phosphate, arsenate, or vanadate/organic rich sedimentary rock/sedimentary rock/mineral/rock',\n", + " 'rksd:Granite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/granite/rock',\n", + " 'rksd:Clastic sediment': 'clastic sediment/sediment/natural solid material/rock',\n", + " 'rksd:Metasomatic Rock/rksd:Ultramafic Igneous Rock': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/rock',\n", + " 'rksd:Generic Sandstone/rksd:Tuffite': 'sediment/tuffite/natural solid material/generic sandstone/sedimentary rock/rock',\n", + " 'rksd:Metasomatic Rock/rksd:Peridotite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/peridotite/rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Doleritic Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/doleritic rock/rock',\n", + " 'rksd:Trachytoid/rksd:Pyroclastic Rock': 'sediment/igneous rock/natural solid material/pyroclastic rock/trachytoid/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Pyroclastic Rock/rksd:Rhyolitoid': 'sediment/igneous rock/rhyolitoid/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Andesite/rksd:Diamicton': 'andesite/clastic sediment/sediment/igneous rock/natural solid material/intermediate composition igneous rock/diamicton/rock',\n", + " 'rksd:Phaneritic Igneous Rock/rksd:Pyroclastic Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n", + " 'rksd:Pegmatite': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/pegmatite/rock',\n", + " 'rksd:Rhyolitoid/rksd:Glass Rich Igneous Rock': 'sediment/igneous rock/rhyolitoid/natural solid material/glass rich igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Hybrid Sedimentary Rock': 'sediment/natural solid material/sedimentary rock/hybrid sedimentary rock/rock'}" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "5e8682c9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mat:material\n" + ] + } + ], + "source": [ + "for key, value in mapping.items():\n", + " if value == \"\":\n", + " print(key)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "d4e07b6d", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "with open('./datasets/multilabel_mapping.json', 'w') as f:\n", + " json.dump(mapping, f)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "57da6b98", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ming:sulfateselenatetelluratemineral': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n", + " 'ming:silicategermanatemineral': 'natural solid material/mineral/mineral-silicate or germanate',\n", + " 'ming:nativeelementmineral': 'natural solid material/mineral/mineral-native element',\n", + " 'ming:oxidemineral': 'natural solid material/mineral/mineral-oxide',\n", + " 'mat:soil': 'natural solid material/soil',\n", + " 'ming:carbonatenitratemineral': 'natural solid material/mineral-carbonate or nitrate/mineral',\n", + " 'rksd:Sedimentary Rock': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'ming:halidemineral': 'natural solid material/mineral-halide/mineral',\n", + " 'ming:phosphatearsenatevanadatemineral': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n", + " 'ming:sulfidesulfosaltmineral': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n", + " 'rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n", + " 'ming:boratemineral': 'mineral-borate/natural solid material/mineral',\n", + " 'mat:organicmaterial': 'organic material',\n", + " 'rksd:Generic Sandstone': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n", + " 'mat:liquidwater': 'liquid water/fluid material',\n", + " 'mat:mineral': 'natural solid material/mineral',\n", + " 'mat:biogenicnonorganicmaterial': 'biogenic non-organic material',\n", + " 'rksd:Basalt': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'ocmat:glass': 'glass/anthropogenic material/any anthropogenic material',\n", + " 'mat:sediment': 'natural solid material/sediment/rock',\n", + " 'rksd:Carbonate Sediment': 'sediment/natural solid material/carbonate sediment/rock',\n", + " 'rksd:Sand Size Sediment': 'sediment/natural solid material/sand size sediment/rock',\n", + " 'rksd:Mud Size Sediment': 'sediment/mud size sediment/natural solid material/rock',\n", + " 'mat:rock': 'natural solid material/sediment/rock',\n", + " 'rksd:Andesite': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n", + " 'rksd:Pyroclastic Rock': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n", + " 'mat:otheranthropogenicmaterial': 'anthropogenic material/any anthropogenic material',\n", + " 'rksd:Metasomatic Rock': 'sediment/natural solid material/metasomatic rock/rock',\n", + " 'rksd:Metamorphic Rock': 'sediment/natural solid material/metamorphic rock/rock',\n", + " 'rksd:Breccia': 'sediment/breccia/natural solid material/rock',\n", + " 'rksd:Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Glass Rich Igneous Rock': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n", + " 'rksd:Gravel Size Sediment': 'sediment/natural solid material/gravel size sediment/rock',\n", + " 'mat:rockorsediment': 'natural solid material/sediment/rock',\n", + " 'rksd:Tuffite': 'sediment/tuffite/natural solid material/rock',\n", + " 'rksd:Chemical Sedimentary Material': 'sediment/natural solid material/chemical sedimentary material/rock',\n", + " 'rksd:Doleritic Rock': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n", + " 'rksd:Fragmental Igneous Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/rock',\n", + " 'rksd:Exotic Composition Igneous Rock': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n", + " 'rksd:Trachytoid': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n", + " 'rksd:Dacite': 'sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Non Clastic Siliceous Sedimentary Rock': 'sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Generic Conglomerate': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Tephra': 'sediment/natural solid material/tephra/rock',\n", + " 'rksd:Dioritoid': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Granodiorite': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n", + " 'rksd:Acidic Igneous Rock': 'sediment/igneous rock/acidic igneous rock/natural solid material/rock',\n", + " 'rksd:Granitoid': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Phonolitoid': 'sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock',\n", + " 'rksd:Tephritoid': 'tephritoid/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Foiditoid': 'sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock',\n", + " 'rksd:Pyroxenite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n", + " 'rksd:Igneous Rock': 'sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:anorthositic rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock',\n", + " 'rksd:Gabbroic Rock': 'gabbroic rock/gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:andesite': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n", + " 'rksd:Phaneritic Igneous Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Biogenic Sediment': 'sediment/natural solid material/biogenic sediment/rock',\n", + " 'rksd:Carbonate Sedimentary Rock': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Gabbroid': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Tonalite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock',\n", + " 'rksd:residual material': 'sediment/natural solid material/residual material/rock',\n", + " 'rksd:Peridotite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n", + " 'rksd:Ultramafic Igneous Rock': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n", + " 'rksd:Porphyry': 'sediment/igneous rock/natural solid material/porphyry/rock',\n", + " 'mat:material': '',\n", + " 'rksd:Rhyolitoid': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Organic Rich Sedimentary Rock': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Anorthositic Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock',\n", + " 'rksd:Diamictite': 'diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Impact Generated Material': 'sediment/impact generated material/natural solid material/rock',\n", + " 'rksd:Clastic Sediment': 'clastic sediment/sediment/natural solid material/rock',\n", + " 'rksd:Hornblendite': 'sediment/igneous rock/natural solid material/hornblendite/ultramafic igneous rock/rock',\n", + " 'rksd:Basalt/rksd:Glass Rich Igneous Rock': 'basalt/sediment/igneous rock/natural solid material/glass rich igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Syenitoid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n", + " 'rksd:Foid Syenitoid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/foid syenitoid/rock',\n", + " 'rksd:Quartz Rich Igneous Rock': 'phaneritic igneous rock/quartz rich igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Mylonitic Rock': 'sediment/mylonitic rock/fault related material/natural solid material/rock',\n", + " 'mat:mixedsoilsedimentrock': 'soil/sediment/natural solid material/rock',\n", + " 'rksd:Clastic Sedimentary Rock': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n", + " 'ming:organicmineral': 'mineral-organic compound/natural solid material/mineral',\n", + " 'mat:particulate': 'particulate/natural solid material',\n", + " 'rksd:Diamicton': 'clastic sediment/sediment/natural solid material/diamicton/rock',\n", + " 'rksd:Generic Mudstone/rksd:Biogenic Sediment': 'sediment/generic mudstone/natural solid material/biogenic sediment/sedimentary rock/rock',\n", + " 'rksd:Coal': 'sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/metamorphic rock/sedimentary rock/rock',\n", + " 'rksd:Iron Rich Sedimentary Rock': 'sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:High Magnesium Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/high magnesium fine grained igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Basic Igneous Rock': 'basic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Foid Gabbroid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/foid gabbroid/rock',\n", + " 'rksd:Foidolite': 'phaneritic igneous rock/sediment/igneous rock/foidolite/natural solid material/rock',\n", + " 'rksd:Breccia Gouge Series': 'sediment/fault related material/natural solid material/breccia gouge series/rock',\n", + " 'rksd:coal': 'sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Clastic Sedimentary Rock': 'clastic sedimentary rock/sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Generic Sandstone/rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/generic sandstone/sedimentary rock/rock',\n", + " 'rksd:Cataclasite Series': 'sediment/fault related material/natural solid material/cataclasite series/rock',\n", + " 'rksd:Fault Related Material': 'sediment/fault related material/natural solid material/rock',\n", + " 'rksd:Massive Sulphide': 'sediment/massive sulphide/natural solid material/rock',\n", + " 'rksd:Alkali Feldspar Granite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/alkali feldspar granite/rock',\n", + " 'rksd:Pyroclastic Rock/rksd:Basalt': 'basalt/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'ocmat:charcoal': 'biogenic non-organic material/charcoal',\n", + " 'rksd:Basalt/rksd:Chemical Sedimentary Material': 'basalt/sediment/igneous rock/natural solid material/chemical sedimentary material/fine grained igneous rock/rock',\n", + " 'rksd:Generic Sandstone/rksd:Coal': 'sediment/coal/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'mat:gas': 'fluid material/gaseous material',\n", + " 'rksd:Non Clastic Siliceous Sediment': 'sediment/natural solid material/non clastic siliceous sediment/rock',\n", + " 'rksd:Aphanite': 'sediment/natural solid material/aphanite/rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:basalt': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Generic Mudstone/rksd:Coal': 'sediment/generic mudstone/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Clastic Sedimentary Rock/rksd:Coal': 'clastic sedimentary rock/sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'mat:anthropogenicmetal': 'anthropogenic metal material/any anthropogenic material',\n", + " 'rksd:Metamorphic Rock/rksd:Gabbroid': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Dioritoid': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock',\n", + " 'ocmat:ceramicclay': 'anthropogenic material/ceramic clay/any anthropogenic material',\n", + " 'rksd:Generic Sandstone/rksd:Organic Rich Sedimentary Rock': 'sediment/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/metamorphic rock/fine grained igneous rock/rock',\n", + " 'rksd:Tephritoid/rksd:Pyroclastic Rock': 'tephritoid/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Basic Igneous Rock': 'basic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock',\n", + " 'rksd:Diamicton/rksd:Dacite': 'clastic sediment/sediment/igneous rock/natural solid material/rock/fine grained igneous rock/diamicton/dacite',\n", + " 'rksd:Metasomatic Rock/rksd:Generic Mudstone': 'sediment/generic mudstone/natural solid material/metasomatic rock/sedimentary rock/rock',\n", + " 'rksd:Coal/ming:phosphatearsenatevanadatemineral': 'sediment/coal/natural solid material/mineral-phosphate, arsenate, or vanadate/organic rich sedimentary rock/sedimentary rock/mineral/rock',\n", + " 'rksd:Granite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/granite/rock',\n", + " 'rksd:Clastic sediment': 'clastic sediment/sediment/natural solid material/rock',\n", + " 'rksd:Metasomatic Rock/rksd:Ultramafic Igneous Rock': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/rock',\n", + " 'rksd:Generic Sandstone/rksd:Tuffite': 'sediment/tuffite/natural solid material/generic sandstone/sedimentary rock/rock',\n", + " 'rksd:Metasomatic Rock/rksd:Peridotite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/peridotite/rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Doleritic Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/doleritic rock/rock',\n", + " 'rksd:Trachytoid/rksd:Pyroclastic Rock': 'sediment/igneous rock/natural solid material/pyroclastic rock/trachytoid/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Pyroclastic Rock/rksd:Rhyolitoid': 'sediment/igneous rock/rhyolitoid/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Andesite/rksd:Diamicton': 'andesite/clastic sediment/sediment/igneous rock/natural solid material/intermediate composition igneous rock/diamicton/rock',\n", + " 'rksd:Phaneritic Igneous Rock/rksd:Pyroclastic Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n", + " 'rksd:Pegmatite': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/pegmatite/rock',\n", + " 'rksd:Rhyolitoid/rksd:Glass Rich Igneous Rock': 'sediment/igneous rock/rhyolitoid/natural solid material/glass rich igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Hybrid Sedimentary Rock': 'sediment/natural solid material/sedimentary rock/hybrid sedimentary rock/rock'}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "with open('./datasets/multilabel_mapping.json') as f:\n", + " multilabel_mapping = json.load(f)\n", + "multilabel_mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "28bf6091", + "metadata": {}, + "outputs": [], + "source": [ + "# get the version that excludes the leaf labels\n", + "mapping_wo_leaf = {}\n", + "for key, value in multilabel_mapping.items():\n", + " splitted = value.split(\"/\")\n", + " \n", + " parents_wo_leaf = []\n", + " for split in splitted:\n", + " if split not in leaf_labels:\n", + " parents_wo_leaf.append(split)\n", + " mapping_wo_leaf[key] = \"/\".join(parents_wo_leaf)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "78de8603", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ming:sulfateselenatetelluratemineral': 'natural solid material/mineral',\n", + " 'ming:silicategermanatemineral': 'natural solid material/mineral',\n", + " 'ming:nativeelementmineral': 'natural solid material/mineral',\n", + " 'ming:oxidemineral': 'natural solid material/mineral',\n", + " 'mat:soil': 'natural solid material',\n", + " 'ming:carbonatenitratemineral': 'natural solid material/mineral',\n", + " 'rksd:Sedimentary Rock': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'ming:halidemineral': 'natural solid material/mineral',\n", + " 'ming:phosphatearsenatevanadatemineral': 'natural solid material/mineral',\n", + " 'ming:sulfidesulfosaltmineral': 'natural solid material/mineral',\n", + " 'rksd:Generic Mudstone': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'ming:boratemineral': 'natural solid material/mineral',\n", + " 'mat:organicmaterial': 'organic material',\n", + " 'rksd:Generic Sandstone': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'mat:liquidwater': 'fluid material',\n", + " 'mat:mineral': 'natural solid material/mineral',\n", + " 'mat:biogenicnonorganicmaterial': 'biogenic non-organic material',\n", + " 'rksd:Basalt': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'ocmat:glass': 'anthropogenic material/any anthropogenic material',\n", + " 'mat:sediment': 'natural solid material/sediment/rock',\n", + " 'rksd:Carbonate Sediment': 'sediment/natural solid material/rock',\n", + " 'rksd:Sand Size Sediment': 'sediment/natural solid material/rock',\n", + " 'rksd:Mud Size Sediment': 'sediment/natural solid material/rock',\n", + " 'mat:rock': 'natural solid material/sediment/rock',\n", + " 'rksd:Andesite': 'sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n", + " 'rksd:Pyroclastic Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/rock',\n", + " 'mat:otheranthropogenicmaterial': 'anthropogenic material/any anthropogenic material',\n", + " 'rksd:Metasomatic Rock': 'sediment/natural solid material/rock',\n", + " 'rksd:Metamorphic Rock': 'sediment/natural solid material/rock',\n", + " 'rksd:Breccia': 'sediment/natural solid material/rock',\n", + " 'rksd:Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Glass Rich Igneous Rock': 'sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Gravel Size Sediment': 'sediment/natural solid material/rock',\n", + " 'mat:rockorsediment': 'natural solid material/sediment/rock',\n", + " 'rksd:Tuffite': 'sediment/natural solid material/rock',\n", + " 'rksd:Chemical Sedimentary Material': 'sediment/natural solid material/rock',\n", + " 'rksd:Doleritic Rock': 'sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Fragmental Igneous Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/rock',\n", + " 'rksd:Exotic Composition Igneous Rock': 'sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Trachytoid': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Dacite': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Non Clastic Siliceous Sedimentary Rock': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Generic Conglomerate': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Tephra': 'sediment/natural solid material/rock',\n", + " 'rksd:Dioritoid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Granodiorite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Acidic Igneous Rock': 'sediment/igneous rock/acidic igneous rock/natural solid material/rock',\n", + " 'rksd:Granitoid': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Phonolitoid': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Tephritoid': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Foiditoid': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Pyroxenite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n", + " 'rksd:Igneous Rock': 'sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:anorthositic rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Gabbroic Rock': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:andesite': 'sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n", + " 'rksd:Phaneritic Igneous Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Biogenic Sediment': 'sediment/natural solid material/rock',\n", + " 'rksd:Carbonate Sedimentary Rock': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Gabbroid': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Tonalite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:residual material': 'sediment/natural solid material/rock',\n", + " 'rksd:Peridotite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n", + " 'rksd:Ultramafic Igneous Rock': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n", + " 'rksd:Porphyry': 'sediment/igneous rock/natural solid material/rock',\n", + " 'mat:material': '',\n", + " 'rksd:Rhyolitoid': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Organic Rich Sedimentary Rock': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Anorthositic Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Diamictite': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Impact Generated Material': 'sediment/natural solid material/rock',\n", + " 'rksd:Clastic Sediment': 'clastic sediment/sediment/natural solid material/rock',\n", + " 'rksd:Hornblendite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n", + " 'rksd:Basalt/rksd:Glass Rich Igneous Rock': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Syenitoid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Foid Syenitoid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Quartz Rich Igneous Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Mylonitic Rock': 'sediment/fault related material/natural solid material/rock',\n", + " 'mat:mixedsoilsedimentrock': 'sediment/natural solid material/rock',\n", + " 'rksd:Clastic Sedimentary Rock': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n", + " 'ming:organicmineral': 'natural solid material/mineral',\n", + " 'mat:particulate': 'particulate/natural solid material',\n", + " 'rksd:Diamicton': 'clastic sediment/sediment/natural solid material/rock',\n", + " 'rksd:Generic Mudstone/rksd:Biogenic Sediment': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Coal': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Generic Mudstone': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Iron Rich Sedimentary Rock': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:High Magnesium Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Basic Igneous Rock': 'basic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Foid Gabbroid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Foidolite': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Breccia Gouge Series': 'sediment/fault related material/natural solid material/rock',\n", + " 'rksd:coal': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Generic Mudstone': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Clastic Sedimentary Rock': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Generic Sandstone/rksd:Generic Mudstone': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Cataclasite Series': 'sediment/fault related material/natural solid material/rock',\n", + " 'rksd:Fault Related Material': 'sediment/fault related material/natural solid material/rock',\n", + " 'rksd:Massive Sulphide': 'sediment/natural solid material/rock',\n", + " 'rksd:Alkali Feldspar Granite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Pyroclastic Rock/rksd:Basalt': 'sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'ocmat:charcoal': 'biogenic non-organic material',\n", + " 'rksd:Basalt/rksd:Chemical Sedimentary Material': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Generic Sandstone/rksd:Coal': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'mat:gas': 'fluid material',\n", + " 'rksd:Non Clastic Siliceous Sediment': 'sediment/natural solid material/rock',\n", + " 'rksd:Aphanite': 'sediment/natural solid material/rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:basalt': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Generic Mudstone/rksd:Coal': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Clastic Sedimentary Rock/rksd:Coal': 'clastic sedimentary rock/sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'mat:anthropogenicmetal': 'anthropogenic metal material/any anthropogenic material',\n", + " 'rksd:Metamorphic Rock/rksd:Gabbroid': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Dioritoid': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'ocmat:ceramicclay': 'anthropogenic material/ceramic clay/any anthropogenic material',\n", + " 'rksd:Generic Sandstone/rksd:Organic Rich Sedimentary Rock': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Fine Grained Igneous Rock': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Tephritoid/rksd:Pyroclastic Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Metamorphic Rock/rksd:Basic Igneous Rock': 'basic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Diamicton/rksd:Dacite': 'clastic sediment/sediment/igneous rock/natural solid material/rock/fine grained igneous rock',\n", + " 'rksd:Metasomatic Rock/rksd:Generic Mudstone': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Coal/ming:phosphatearsenatevanadatemineral': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/mineral/rock',\n", + " 'rksd:Granite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Clastic sediment': 'clastic sediment/sediment/natural solid material/rock',\n", + " 'rksd:Metasomatic Rock/rksd:Ultramafic Igneous Rock': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n", + " 'rksd:Generic Sandstone/rksd:Tuffite': 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'rksd:Metasomatic Rock/rksd:Peridotite': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Doleritic Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/rock',\n", + " 'rksd:Trachytoid/rksd:Pyroclastic Rock': 'sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Pyroclastic Rock/rksd:Rhyolitoid': 'sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'rksd:Andesite/rksd:Diamicton': 'clastic sediment/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n", + " 'rksd:Phaneritic Igneous Rock/rksd:Pyroclastic Rock': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/fragmental igneous rock/rock',\n", + " 'rksd:Pegmatite': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'rksd:Rhyolitoid/rksd:Glass Rich Igneous Rock': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'rksd:Hybrid Sedimentary Rock': 'sediment/natural solid material/sedimentary rock/rock'}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping_wo_leaf" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5fcbe6ec", + "metadata": {}, + "outputs": [], + "source": [ + "with open('./datasets/multilabel_mapping_wo_leaf.json', 'w') as f:\n", + " json.dump(mapping_wo_leaf, f)" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "id": "0c749b55", + "metadata": {}, + "outputs": [], + "source": [ + "# store converted dataset " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "venv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/SESAR/zeroshot-learning/README.md b/SESAR/zeroshot-learning/README.md index 69b20df..cf33406 100644 --- a/SESAR/zeroshot-learning/README.md +++ b/SESAR/zeroshot-learning/README.md @@ -2,14 +2,18 @@ This directory contains the ongoing experiments on using Zeroshot Text Classific ## Datasets Different datasets were created from the original SESAR dump and the annotated data `SESARTrainingiSamKeywords.csv` to find the best method to solve our problem. -The datasets that are used during this process will be uploaded [here](https://drive.google.com/drive/folders/1PCm8greoBSBXm-YNHeJwbvBLj7ytw6Gr?usp=sharing). +The datasets that are used during this process will be uploaded [here](https://drive.google.com/drive/folders/1o9vZ4CzTDi0N93KKPCGgvcvqNTrPx4jI?usp=sharing). +- `SESAR_ZTC_train_multi.csv`, `SESAR_ZTC_dev_multi.csv`, `SESAR_ZTC_test_multi.csv` : Used for multilabel finetuning (non zero-shot) - `SESAR_ZTC_test_multiclass_label_fully_unseen.csv` : Used for multiclass label-fully-unseen tasks. - `SESAR_ZTC_test_multilabel_label_fully_unseen.csv` : Used for multilabel label-fully-unseen tasks. - `SESAR_ZTC_partial_label_unseen_train.csv`, `SESAR_ZTC_partial_label_unseen_dev.csv`,`SESAR_ZTC_partial_label_unseen_test.csv` : Used for multiclass partially-label-unseen tasks. - +- `SESAR_labeled_original.csv` : Original dump of SESAR description_material labeled records. ## Code + +-`hyperparam_search_ZTC.py` : Code to find the optimal hyperparameters for finetuning. + -`finetune_ZTC.py` : Implementation of fine-tuning a textual entailment model on the SESAR dataset. Converts the dataset into a format that is applicable for textual entailment finetuning task and uses the given arguments to execute finetuning. The finetuned model will be stored in the output directory. -`evaluate.py` : Implementation of evaluating the model on SESAR dataset. The model that can be used could be either a finetuned model from finetune_ZTC.py or an out-of-box textual entailment model(completely zeroshot). Result of evaluation will be logged. Supports solving the task as a multilabel or multiclass. For multiclass approach, also contains implementation of using specified depth level of the entire hierarchical label space of iSamples vocabulary. diff --git a/SESAR/zeroshot-learning/Text Entailment Data Generation-MultiLabel.ipynb b/SESAR/zeroshot-learning/Text Entailment Data Generation-MultiLabel.ipynb new file mode 100644 index 0000000..27f725f --- /dev/null +++ b/SESAR/zeroshot-learning/Text Entailment Data Generation-MultiLabel.ipynb @@ -0,0 +1,9821 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "d61926d8", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1fb3638f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_1893/1284511993.py:1: DtypeWarning: Columns (1,2,4,7,13,14,15,16,18,20,21,22,24,26,29,31,33,34,35,36,39,42,43,44,46,49,51,52,53,55,56,57,59,62,63,68,69,71,75,76,77,78,84,86,89,90,91,92,93,96,97,98,99,104,106,108,110,111,113,115,116) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(\"./datasets/SESAR_labeled_original.csv\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(995649, 117)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0description_supplementMetadata_zonedescription_supplementMetadata_localitydescription_contributors_contributor_familyNamedescription_supplementMetadata_externalSampleIddescription_supplementMetadata_sampleIddescription_contributors_contributor_@typedescription_supplementMetadata_publicationUrl_urldescription_contributorsdescription_material...description_igsnPrefixdescription_supplementMetadata_cruiseFieldPrgrmdescription_collectordescription_supplementMetadata_otherNamedescription_supplementMetadata_sizeUnitdescription_publisherdescription_supplementMetadata_publicationUrl_descriptiondescription_spatialCoverage_geo_longitudedescription_geoLocation_geo_MultiplePointsdescription_parentIdentifier
00.0NaNNaNMansurNMNH C6294-01 (MIN)3835675.0Personhttp://n2t.net/ark:/65665/368aae4fe-832b-4f20-...NaNMooreite>Mineral...NHBNaNNaNNaNNaNNaNSmithsonian collections record for NMNH C6294-...NaNNaNNHB002GWS
11.0NaNNaNMansurNMNH C6295-00 (MIN)3835676.0Personhttp://n2t.net/ark:/65665/3682f8d30-22dc-4825-...NaNBarite>Mineral...NHBNaNNaNNaNNaNNaNSmithsonian collections record for NMNH C6295-...NaNNaNNaN
22.0NaNNaNMansurNMNH C6295-01 (MIN)3835677.0Personhttp://n2t.net/ark:/65665/3391bc823-1b27-441f-...NaNBarite>Mineral...NHBNaNNaNNaNNaNNaNSmithsonian collections record for NMNH C6295-...NaNNaNNHB002GWU
33.0NaNNaNMansurNMNH C6296-00 (MIN)3835678.0Personhttp://n2t.net/ark:/65665/3b365a0d7-f60d-47b1-...NaNHardystonite>Mineral...NHBNaNNaNNaNNaNNaNSmithsonian collections record for NMNH C6296-...NaNNaNNaN
44.0NaNNaNMansurNMNH C6299-00 (MIN)3835681.0Personhttp://n2t.net/ark:/65665/3db9be6c2-3a80-4cff-...NaNChondrodite>Mineral...NHBNaNNaNNaNNaNNaNSmithsonian collections record for NMNH C6299-...NaNNaNNaN
..................................................................
9956444583983.0NaNNaNSiddowayMBL 318-M148923.0PersonNaNNaNMetamorphic>Gneiss>Rock...MBLNaNNaNNaNNaNNaNNaNNaNNaNNaN
9956454584986.0NaNNaNSiddowayMBL 318-M148923.0PersonNaNNaNMetamorphic>Gneiss>Rock...MBLNaNNaNNaNNaNNaNNaNNaNNaNNaN
9956464588773.0NaNNaNMiranteAthens Gneiss72550.0PersonNaNNaNMetamorphic>Gneiss>Rock...UGANaNNaNNaNNaNNaNNaN-83.433334NaNNaN
9956474588776.0NaNNaNSiddowayLTR 6125-1448924.0PersonNaNNaNMetamorphic>Calc-Silicate>Rock...LTRNaNNaNNaNNaNNaNNaNNaNNaNNaN
9956484588777.0NaNNaNSiddowayLTR 6125-1448924.0PersonNaNNaNMetamorphic>Calc-Silicate>Rock...LTRNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

995649 rows × 117 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 description_supplementMetadata_zone \\\n", + "0 0.0 NaN \n", + "1 1.0 NaN \n", + "2 2.0 NaN \n", + "3 3.0 NaN \n", + "4 4.0 NaN \n", + "... ... ... \n", + "995644 4583983.0 NaN \n", + "995645 4584986.0 NaN \n", + "995646 4588773.0 NaN \n", + "995647 4588776.0 NaN \n", + "995648 4588777.0 NaN \n", + "\n", + " description_supplementMetadata_locality \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "995644 NaN \n", + "995645 NaN \n", + "995646 NaN \n", + "995647 NaN \n", + "995648 NaN \n", + "\n", + " description_contributors_contributor_familyName \\\n", + "0 Mansur \n", + "1 Mansur \n", + "2 Mansur \n", + "3 Mansur \n", + "4 Mansur \n", + "... ... \n", + "995644 Siddoway \n", + "995645 Siddoway \n", + "995646 Mirante \n", + "995647 Siddoway \n", + "995648 Siddoway \n", + "\n", + " description_supplementMetadata_externalSampleId \\\n", + "0 NMNH C6294-01 (MIN) \n", + "1 NMNH C6295-00 (MIN) \n", + "2 NMNH C6295-01 (MIN) \n", + "3 NMNH C6296-00 (MIN) \n", + "4 NMNH C6299-00 (MIN) \n", + "... ... \n", + "995644 MBL 318-M1 \n", + "995645 MBL 318-M1 \n", + "995646 Athens Gneiss \n", + "995647 LTR 6125-14 \n", + "995648 LTR 6125-14 \n", + "\n", + " description_supplementMetadata_sampleId \\\n", + "0 3835675.0 \n", + "1 3835676.0 \n", + "2 3835677.0 \n", + "3 3835678.0 \n", + "4 3835681.0 \n", + "... ... \n", + "995644 48923.0 \n", + "995645 48923.0 \n", + "995646 72550.0 \n", + "995647 48924.0 \n", + "995648 48924.0 \n", + "\n", + " description_contributors_contributor_@type \\\n", + "0 Person \n", + "1 Person \n", + "2 Person \n", + "3 Person \n", + "4 Person \n", + "... ... \n", + "995644 Person \n", + "995645 Person \n", + "995646 Person \n", + "995647 Person \n", + "995648 Person \n", + "\n", + " description_supplementMetadata_publicationUrl_url \\\n", + "0 http://n2t.net/ark:/65665/368aae4fe-832b-4f20-... \n", + "1 http://n2t.net/ark:/65665/3682f8d30-22dc-4825-... \n", + "2 http://n2t.net/ark:/65665/3391bc823-1b27-441f-... \n", + "3 http://n2t.net/ark:/65665/3b365a0d7-f60d-47b1-... \n", + "4 http://n2t.net/ark:/65665/3db9be6c2-3a80-4cff-... \n", + "... ... \n", + "995644 NaN \n", + "995645 NaN \n", + "995646 NaN \n", + "995647 NaN \n", + "995648 NaN \n", + "\n", + " description_contributors description_material ... \\\n", + "0 NaN Mooreite>Mineral ... \n", + "1 NaN Barite>Mineral ... \n", + "2 NaN Barite>Mineral ... \n", + "3 NaN Hardystonite>Mineral ... \n", + "4 NaN Chondrodite>Mineral ... \n", + "... ... ... ... \n", + "995644 NaN Metamorphic>Gneiss>Rock ... \n", + "995645 NaN Metamorphic>Gneiss>Rock ... \n", + "995646 NaN Metamorphic>Gneiss>Rock ... \n", + "995647 NaN Metamorphic>Calc-Silicate>Rock ... \n", + "995648 NaN Metamorphic>Calc-Silicate>Rock ... \n", + "\n", + " description_igsnPrefix description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 NHB NaN \n", + "1 NHB NaN \n", + "2 NHB NaN \n", + "3 NHB NaN \n", + "4 NHB NaN \n", + "... ... ... \n", + "995644 MBL NaN \n", + "995645 MBL NaN \n", + "995646 UGA NaN \n", + "995647 LTR NaN \n", + "995648 LTR NaN \n", + "\n", + " description_collector description_supplementMetadata_otherName \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "995644 NaN NaN \n", + "995645 NaN NaN \n", + "995646 NaN NaN \n", + "995647 NaN NaN \n", + "995648 NaN NaN \n", + "\n", + " description_supplementMetadata_sizeUnit description_publisher \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "995644 NaN NaN \n", + "995645 NaN NaN \n", + "995646 NaN NaN \n", + "995647 NaN NaN \n", + "995648 NaN NaN \n", + "\n", + " description_supplementMetadata_publicationUrl_description \\\n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "2 Smithsonian collections record for NMNH C6295-... \n", + "3 Smithsonian collections record for NMNH C6296-... \n", + "4 Smithsonian collections record for NMNH C6299-... \n", + "... ... \n", + "995644 NaN \n", + "995645 NaN \n", + "995646 NaN \n", + "995647 NaN \n", + "995648 NaN \n", + "\n", + " description_spatialCoverage_geo_longitude \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "995644 NaN \n", + "995645 NaN \n", + "995646 -83.433334 \n", + "995647 NaN \n", + "995648 NaN \n", + "\n", + " description_geoLocation_geo_MultiplePoints \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "995644 NaN \n", + "995645 NaN \n", + "995646 NaN \n", + "995647 NaN \n", + "995648 NaN \n", + "\n", + " description_parentIdentifier \n", + "0 NHB002GWS \n", + "1 NaN \n", + "2 NHB002GWU \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "995644 NaN \n", + "995645 NaN \n", + "995646 NaN \n", + "995647 NaN \n", + "995648 NaN \n", + "\n", + "[995649 rows x 117 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"./datasets/SESAR_labeled_original.csv\")\n", + "print(df.shape)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cd822d34", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0description_supplementMetadata_zonedescription_supplementMetadata_localitydescription_contributors_contributor_familyNamedescription_supplementMetadata_externalSampleIddescription_supplementMetadata_sampleIddescription_contributors_contributor_@typedescription_supplementMetadata_publicationUrl_urldescription_contributorsdescription_material...description_igsnPrefixdescription_supplementMetadata_cruiseFieldPrgrmdescription_collectordescription_supplementMetadata_otherNamedescription_supplementMetadata_sizeUnitdescription_publisherdescription_supplementMetadata_publicationUrl_descriptiondescription_spatialCoverage_geo_longitudedescription_geoLocation_geo_MultiplePointsdescription_parentIdentifier
00.0MansurNMNH C6294-01 (MIN)3835675.0Personhttp://n2t.net/ark:/65665/368aae4fe-832b-4f20-...Mooreite>Mineral...NHBSmithsonian collections record for NMNH C6294-...NHB002GWS
11.0MansurNMNH C6295-00 (MIN)3835676.0Personhttp://n2t.net/ark:/65665/3682f8d30-22dc-4825-...Barite>Mineral...NHBSmithsonian collections record for NMNH C6295-...
22.0MansurNMNH C6295-01 (MIN)3835677.0Personhttp://n2t.net/ark:/65665/3391bc823-1b27-441f-...Barite>Mineral...NHBSmithsonian collections record for NMNH C6295-...NHB002GWU
33.0MansurNMNH C6296-00 (MIN)3835678.0Personhttp://n2t.net/ark:/65665/3b365a0d7-f60d-47b1-...Hardystonite>Mineral...NHBSmithsonian collections record for NMNH C6296-...
44.0MansurNMNH C6299-00 (MIN)3835681.0Personhttp://n2t.net/ark:/65665/3db9be6c2-3a80-4cff-...Chondrodite>Mineral...NHBSmithsonian collections record for NMNH C6299-...
..................................................................
9956444583983.0SiddowayMBL 318-M148923.0PersonMetamorphic>Gneiss>Rock...MBL
9956454584986.0SiddowayMBL 318-M148923.0PersonMetamorphic>Gneiss>Rock...MBL
9956464588773.0MiranteAthens Gneiss72550.0PersonMetamorphic>Gneiss>Rock...UGA-83.433334
9956474588776.0SiddowayLTR 6125-1448924.0PersonMetamorphic>Calc-Silicate>Rock...LTR
9956484588777.0SiddowayLTR 6125-1448924.0PersonMetamorphic>Calc-Silicate>Rock...LTR
\n", + "

995649 rows × 117 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 description_supplementMetadata_zone \\\n", + "0 0.0 \n", + "1 1.0 \n", + "2 2.0 \n", + "3 3.0 \n", + "4 4.0 \n", + "... ... ... \n", + "995644 4583983.0 \n", + "995645 4584986.0 \n", + "995646 4588773.0 \n", + "995647 4588776.0 \n", + "995648 4588777.0 \n", + "\n", + " description_supplementMetadata_locality \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_contributors_contributor_familyName \\\n", + "0 Mansur \n", + "1 Mansur \n", + "2 Mansur \n", + "3 Mansur \n", + "4 Mansur \n", + "... ... \n", + "995644 Siddoway \n", + "995645 Siddoway \n", + "995646 Mirante \n", + "995647 Siddoway \n", + "995648 Siddoway \n", + "\n", + " description_supplementMetadata_externalSampleId \\\n", + "0 NMNH C6294-01 (MIN) \n", + "1 NMNH C6295-00 (MIN) \n", + "2 NMNH C6295-01 (MIN) \n", + "3 NMNH C6296-00 (MIN) \n", + "4 NMNH C6299-00 (MIN) \n", + "... ... \n", + "995644 MBL 318-M1 \n", + "995645 MBL 318-M1 \n", + "995646 Athens Gneiss \n", + "995647 LTR 6125-14 \n", + "995648 LTR 6125-14 \n", + "\n", + " description_supplementMetadata_sampleId \\\n", + "0 3835675.0 \n", + "1 3835676.0 \n", + "2 3835677.0 \n", + "3 3835678.0 \n", + "4 3835681.0 \n", + "... ... \n", + "995644 48923.0 \n", + "995645 48923.0 \n", + "995646 72550.0 \n", + "995647 48924.0 \n", + "995648 48924.0 \n", + "\n", + " description_contributors_contributor_@type \\\n", + "0 Person \n", + "1 Person \n", + "2 Person \n", + "3 Person \n", + "4 Person \n", + "... ... \n", + "995644 Person \n", + "995645 Person \n", + "995646 Person \n", + "995647 Person \n", + "995648 Person \n", + "\n", + " description_supplementMetadata_publicationUrl_url \\\n", + "0 http://n2t.net/ark:/65665/368aae4fe-832b-4f20-... \n", + "1 http://n2t.net/ark:/65665/3682f8d30-22dc-4825-... \n", + "2 http://n2t.net/ark:/65665/3391bc823-1b27-441f-... \n", + "3 http://n2t.net/ark:/65665/3b365a0d7-f60d-47b1-... \n", + "4 http://n2t.net/ark:/65665/3db9be6c2-3a80-4cff-... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_contributors description_material ... \\\n", + "0 Mooreite>Mineral ... \n", + "1 Barite>Mineral ... \n", + "2 Barite>Mineral ... \n", + "3 Hardystonite>Mineral ... \n", + "4 Chondrodite>Mineral ... \n", + "... ... ... ... \n", + "995644 Metamorphic>Gneiss>Rock ... \n", + "995645 Metamorphic>Gneiss>Rock ... \n", + "995646 Metamorphic>Gneiss>Rock ... \n", + "995647 Metamorphic>Calc-Silicate>Rock ... \n", + "995648 Metamorphic>Calc-Silicate>Rock ... \n", + "\n", + " description_igsnPrefix description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 NHB \n", + "1 NHB \n", + "2 NHB \n", + "3 NHB \n", + "4 NHB \n", + "... ... ... \n", + "995644 MBL \n", + "995645 MBL \n", + "995646 UGA \n", + "995647 LTR \n", + "995648 LTR \n", + "\n", + " description_collector description_supplementMetadata_otherName \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_sizeUnit description_publisher \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_publicationUrl_description \\\n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "2 Smithsonian collections record for NMNH C6295-... \n", + "3 Smithsonian collections record for NMNH C6296-... \n", + "4 Smithsonian collections record for NMNH C6299-... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_spatialCoverage_geo_longitude \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 -83.433334 \n", + "995647 \n", + "995648 \n", + "\n", + " description_geoLocation_geo_MultiplePoints description_parentIdentifier \n", + "0 NHB002GWS \n", + "1 \n", + "2 NHB002GWU \n", + "3 \n", + "4 \n", + "... ... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + "[995649 rows x 117 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.fillna(\"\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b18f982d", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_columns', None)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b7104490", + "metadata": {}, + "outputs": [], + "source": [ + "toExclude = [\n", + " \"description_contributors_contributor_familyName\",\n", + " \"description_supplementMetadata_externalSampleId\",\n", + " \"description_supplementMetadata_sampleId\",\n", + " \"description_contributors_contributor_@type\",\n", + " \"description_supplementMetadata_publicationUrl_url\",\n", + " \"description_collector\",\n", + " \"description_publisher_url\",\n", + " \"description_publisher_contactPoint_url\",\n", + " \"description_contributors_contributor_name\",\n", + " \"description_supplementMetadata_publicationUrl\",\n", + " \"description_publisher_name\",\n", + " \"description_publisher_contactPoint_name\",\n", + " \"description_parentIdentifier\",\n", + " \"description_publisher_@id\",\n", + " \"description_contributors_contributor_givenName\",\n", + " \"description_supplementMetadata_publicationUrl_urlType\",\n", + " \"description_supplementMetadata_childIGSN\",\n", + " \"description_supplementMetadata_currentArchiveContact\",\n", + " \"description_publisher_contactPoint_email\",\n", + " \"description_parentIdentifier\",\n", + " \"description_supplementMetadata_siblingIGSN\",\n", + " \"description_supplementMetadata_zone\",\n", + " \"description_supplementMetadata_launchId\",\n", + " \"description_supplementMetadata_externalParentName\",\n", + " \"description_collectionStartDate\",\n", + " \"description_supplementMetadata_document_primaryImage\",\n", + " \"description_log_timestamp\",\n", + " \"description_supplementMetadata_document_urlToFile\",\n", + " \"description_supplementMetadata_document_fileName\",\n", + " \"description_spatialCoverage\",\n", + " \"description_supplementMetadata_verticalDatum\",\n", + " \"description_publisher_contactPoint\",\n", + " \"description_supplementMetadata_elevation\",\n", + " \"description_spatialCoverage_geo_longitude\",\n", + " \"description_geoLocation_geo_MultiplePoints\",\n", + " \"description_supplementMetadata_ageMax\",\n", + " \"description_supplementMetadata_ageMin\",\n", + " \"description_geoLocation_geo_latitude\",\n", + " \"description_supplementMetadata_easting\",\n", + " \"description_supplementMetadata_northing\",\n", + " \"description_spatialCoverage_geo_Line\",\n", + " \"description_geoLocation_geo\",\n", + " \"description_spatialCoverage_geo\",\n", + " \"description_supplementMetadata_depthMax\",\n", + " \"description_supplementMetadata_depthMin\",\n", + " \"description_supplementMetadata_elevationEnd\",\n", + " \"description_supplementMetadata_externalParentSampleTypeId\",\n", + " \"description_supplementMetadata_launchTypeName\",\n", + " \"description_collectorDetail\",\n", + " \"description_collectionEndDate\",\n", + " \"description_collectionDatePrecision\",\n", + " \"description_supplementMetadata_otherName\",\n", + " \"description_supplementMetadata_platformName\",\n", + " \"description_supplementMetadata_size\",\n", + " \"description_supplementMetadata_sizeUnit\",\n", + " \"description_supplementMetadata_elevationUnit\",\n", + " \"description_geoLocation_geo_longitude\",\n", + " \"description_supplementMetadata_ageUnit\",\n", + " \"description_supplementMetadata_depthScale\",\n", + " \"description_spatialCoverage_geo_latitude\",\n", + " \"description_spatialCoverage_geo_elevation\",\n", + " \"description_supplementMetadata_launchPlatformName\",\n", + " \"description_contributors_roleName\",\n", + " \"description_contributors\",\n", + " \"description\",\n", + " \"description_contributors_contributor\",\n", + " \"description_publisher\",\n", + " \"description_geoLocation\",\n", + " \"description_log\",\n", + " \"description_supplementMetadata_document\",\n", + " \"description_supplementMetadata\",\n", + " \"description_supplementMetadata_navigationType\",\n", + " \"description_publisher_contactPoint_contactType\",\n", + " \"description_log_type\",\n", + " \"description_igsnPrefix\",\n", + " \"description_sampleName\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8ba13ada", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
description_supplementMetadata_localitydescription_materialdescription_supplementMetadata_countrydescription_supplementMetadata_provincedescription_sampleTypedescription_supplementMetadata_platformTypedescription_supplementMetadata_geologicalAgedescription_supplementMetadata_locationDescriptiondescription_supplementMetadata_purposedescription_supplementMetadata_countydescription_descriptiondescription_supplementMetadata_classificationCommentdescription_supplementMetadata_currentArchivedescription_supplementMetadata_citydescription_supplementMetadata_sampleCommentdescription_supplementMetadata_fieldNamedescription_supplementMetadata_primaryLocationTypedescription_collectionMethodDescrdescription_supplementMetadata_primaryLocationNameigsndescription_supplementMetadata_geologicalUnitdescription_supplementMetadata_localityDescriptiondescription_supplementMetadata_originalArchivedescription_supplementMetadata_platformDescrdescription_collectionMethoddescription_supplementMetadata_cruiseFieldPrgrmdescription_supplementMetadata_publicationUrl_description
0Mooreite>MineralUnited StatesNew JerseyIndividual SampleCoordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...MooreiteMineSterling Hill MineNHB002GWTOgdensburgSmithsonian collections record for NMNH C6294-...
1Barite>MineralUnited StatesNew JerseyIndividual SampleMatched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineNHB002GWUFranklinSmithsonian collections record for NMNH C6295-...
2Barite>MineralUnited StatesNew JerseyIndividual SampleMatched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineNHB002GWVFranklinSmithsonian collections record for NMNH C6295-...
3Hardystonite>MineralUnited StatesNew JerseyIndividual SampleCoordinates from GEOLocate for parse pattern \"...Sussex Co.National Mineral Collection, Smithsonian Insti...Hardystonite with calcite and leucophoeniciteMining DistrictFranklin Mining DistrictNHB002GWWFranklinSmithsonian collections record for NMNH C6296-...
4Chondrodite>MineralUnited StatesNew JerseyIndividual SampleCoordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...ChondroditeMineSterling Hill MineNHB002GWZOgdensburgSmithsonian collections record for NMNH C6299-...
....................................................................................
995644Metamorphic>Gneiss>RockIndividual SampleMigmatic gneiss with quartz, biotite, feldspar...Mitchell Peak, West Fosdick Mountains, AntarcticaMBL00001RDept of Geology, Colorado College, Colorado Sp...Manual
995645Metamorphic>Gneiss>RockIndividual SampleMigmatic gneiss with quartz, biotite, feldspar...Mitchell Peak, West Fosdick Mountains, AntarcticaMBL00001RDept of Geology, Colorado College, Colorado Sp...Manual
995646Metamorphic>Gneiss>RockIndividual Samplewhole-rock analysis availableFine grained biotite-quartz-feldspar gneissBen Burton ParkUGA000001Department of Geology, Bryn Mawr College, Bryn...Manual
995647Metamorphic>Calc-Silicate>RockIndividual SamplegneissSalamander Range, north Victoria Land, AntarcticaLTR000001Dept of Geology, Colorado College, Colorado Sp...Manual
995648Metamorphic>Calc-Silicate>RockIndividual SamplegneissSalamander Range, north Victoria Land, AntarcticaLTR000001Dept of Geology, Colorado College, Colorado Sp...Manual
\n", + "

995649 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " description_supplementMetadata_locality \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_material description_supplementMetadata_country \\\n", + "0 Mooreite>Mineral United States \n", + "1 Barite>Mineral United States \n", + "2 Barite>Mineral United States \n", + "3 Hardystonite>Mineral United States \n", + "4 Chondrodite>Mineral United States \n", + "... ... ... \n", + "995644 Metamorphic>Gneiss>Rock \n", + "995645 Metamorphic>Gneiss>Rock \n", + "995646 Metamorphic>Gneiss>Rock \n", + "995647 Metamorphic>Calc-Silicate>Rock \n", + "995648 Metamorphic>Calc-Silicate>Rock \n", + "\n", + " description_supplementMetadata_province description_sampleType \\\n", + "0 New Jersey Individual Sample \n", + "1 New Jersey Individual Sample \n", + "2 New Jersey Individual Sample \n", + "3 New Jersey Individual Sample \n", + "4 New Jersey Individual Sample \n", + "... ... ... \n", + "995644 Individual Sample \n", + "995645 Individual Sample \n", + "995646 Individual Sample \n", + "995647 Individual Sample \n", + "995648 Individual Sample \n", + "\n", + " description_supplementMetadata_platformType \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_geologicalAge \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_locationDescription \\\n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "1 Matched to the GeoNames record for Franklin, S... \n", + "2 Matched to the GeoNames record for Franklin, S... \n", + "3 Coordinates from GEOLocate for parse pattern \"... \n", + "4 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_purpose \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_county description_description \\\n", + "0 Sussex Co. \n", + "1 Sussex Co. \n", + "2 Sussex Co. \n", + "3 Sussex Co. \n", + "4 Sussex Co. \n", + "... ... ... \n", + "995644 \n", + "995645 \n", + "995646 whole-rock analysis available \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_classificationComment \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_currentArchive \\\n", + "0 National Mineral Collection, Smithsonian Insti... \n", + "1 National Mineral Collection, Smithsonian Insti... \n", + "2 National Mineral Collection, Smithsonian Insti... \n", + "3 National Mineral Collection, Smithsonian Insti... \n", + "4 National Mineral Collection, Smithsonian Insti... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_city \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_sampleComment \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_fieldName \\\n", + "0 Mooreite \n", + "1 Baryte \n", + "2 Baryte \n", + "3 Hardystonite with calcite and leucophoenicite \n", + "4 Chondrodite \n", + "... ... \n", + "995644 Migmatic gneiss with quartz, biotite, feldspar... \n", + "995645 Migmatic gneiss with quartz, biotite, feldspar... \n", + "995646 Fine grained biotite-quartz-feldspar gneiss \n", + "995647 gneiss \n", + "995648 gneiss \n", + "\n", + " description_supplementMetadata_primaryLocationType \\\n", + "0 Mine \n", + "1 Mine \n", + "2 Mine \n", + "3 Mining District \n", + "4 Mine \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_collectionMethodDescr \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_primaryLocationName igsn \\\n", + "0 Sterling Hill Mine NHB002GWT \n", + "1 Franklin Mine NHB002GWU \n", + "2 Franklin Mine NHB002GWV \n", + "3 Franklin Mining District NHB002GWW \n", + "4 Sterling Hill Mine NHB002GWZ \n", + "... ... ... \n", + "995644 Mitchell Peak, West Fosdick Mountains, Antarctica MBL00001R \n", + "995645 Mitchell Peak, West Fosdick Mountains, Antarctica MBL00001R \n", + "995646 Ben Burton Park UGA000001 \n", + "995647 Salamander Range, north Victoria Land, Antarctica LTR000001 \n", + "995648 Salamander Range, north Victoria Land, Antarctica LTR000001 \n", + "\n", + " description_supplementMetadata_geologicalUnit \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_localityDescription \\\n", + "0 Ogdensburg \n", + "1 Franklin \n", + "2 Franklin \n", + "3 Franklin \n", + "4 Ogdensburg \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_originalArchive \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 Dept of Geology, Colorado College, Colorado Sp... \n", + "995645 Dept of Geology, Colorado College, Colorado Sp... \n", + "995646 Department of Geology, Bryn Mawr College, Bryn... \n", + "995647 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "\n", + " description_supplementMetadata_platformDescr \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_collectionMethod \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 Manual \n", + "995645 Manual \n", + "995646 Manual \n", + "995647 Manual \n", + "995648 Manual \n", + "\n", + " description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_publicationUrl_description \n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "2 Smithsonian collections record for NMNH C6295-... \n", + "3 Smithsonian collections record for NMNH C6296-... \n", + "4 Smithsonian collections record for NMNH C6299-... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + "[995649 rows x 27 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# select only necessary columns\n", + "columns_not_in_list = [col for col in df.columns if (col not in toExclude and col.startswith(\"description\") and col.lower().endswith(\"@type\") is False) or col == \"igsn\"]\n", + "\n", + "# Create a new DataFrame containing only the desired columns\n", + "df = df[columns_not_in_list]\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a87c306d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "description_supplementMetadata_locality ['', 'Cleo Mine', 'Mangualde', 'Socavão', 'hp071', 'nw012', 'Sykes Rock', 'Great Vein', 'Martens Peak', 'Indiana Geological Survey Drill Hole 214', 'MIS-6', 'Ankaditany', 'Near Case Quarry', 'Rosinville', 'Rt. 140s Roadcut', 'Antamboletcibe', 'nw096', 'Waldheim', 'Palmer Shaft', 'Nevada Queen Mine']\n", + "-------\n", + "description_material ['Arsendescloizite>Mineral', 'Pseudoboleite>Mineral', 'Emeleusite>Mineral', '', 'Joesmithite>Mineral', 'Tundrite-(Ce)>Mineral', 'Tincalconite>Mineral', 'Cervelleite>Mineral', 'Baricite>Mineral', 'Wolframite>Mineral', 'Iriginite>Mineral', 'Tourmaline>Mineral', 'Gratonite>Mineral', 'Tremolite>Mineral', 'Bismuth>Mineral', 'Metahewettite>Mineral', 'Perovskite>Mineral', 'Thenardite>Mineral', 'Hyalotekite>Mineral', 'Moissanite>Mineral']\n", + "-------\n", + "description_supplementMetadata_country ['', 'Somalia', 'Peru', 'Botswana', 'St. Kitts And Nevis', 'Azerbaijan', 'Bulgaria', 'Costa Rica', 'Antarctica', 'Liberia', 'Saint Vincent And The Grenadines', 'Cameroon', 'New Caledonia', 'Cambodia', 'French Guiana', 'South Korea', 'Yemen', 'Dominican Republic', 'Nigeria', 'Jamaica']\n", + "-------\n", + "description_supplementMetadata_province ['', 'Obercassel on Rhine', 'Tirol Ca.', \"Valle D'Aosta\", 'Sardegna', 'Puebla', 'Basque Country', 'Mandalay', 'Landes', 'Osh Oblast', 'Darmaland', 'Jonkoping', 'Württemberg', 'Toamasina', 'Coromandel Peninsula', 'South Australia', 'Hunedoara', 'Gavleborg', 'Salamanca', 'Bohemia']\n", + "-------\n", + "description_sampleType ['Individual Sample>Cube', 'Trawl', 'Individual Sample>Liquid', 'Individual Sample>Mechanical Fraction', 'Individual Sample>Specimen', 'Individual Sample>Toothpick', 'Other', 'Core Section', 'Core Section Half', 'Terrestrial Section', 'Experimental Specimen', 'Individual Sample>Thin Section', 'Experimental Specimen>Other', 'Individual Sample>Slab', 'Individual Sample>Gas', 'Dredge', 'Rock Powder', 'Core Quarter Round', 'Core Sub-Piece', 'Individual Sample>Chemical Fraction']\n", + "-------\n", + "description_supplementMetadata_platformType ['', 'ship', 'Drill Rig ', 'coastal', 'drilling vessel', 'Submersible / HOV', 'DLDS', 'AUV', 'Drilling Vessel', 'Other', 'Type of platform for the cruise.', 'SHALDRIL', 'small craft', 'Shipp', 'NotApplicable', 'Gravity corer', 'Ship', 'Ice Island', 'ROV', 'SHIP']\n", + "-------\n", + "description_supplementMetadata_geologicalAge ['', 'Precambrian>Proterozoic>Paleoproterozoic>Rhyacian', 'Precambrian>Proterozoic>Paleoproterozoic>Siderian', 'middle Archean', 'Permo-Triassic ', 'Cenozoic>Paleogene>Paleocene>Late Paleocene>Thanetian', 'Early Permian', '34.4', 'Lower Pleistocene', 'Quartzite', 'Emsian (Late Devonian)', 'recent', 'Paleoproterozoic', 'Eocene/Oligocene or \\ncontamination', 'Quaternary > Holocene', 'late Cretaceous', 'Late Cretaceous-Eocene', 'Late Pliocene', 'Cenozoic>Neogene>Miocene>Early Miocene', 'Paleoarchaean']\n", + "-------\n", + "description_supplementMetadata_locationDescription ['', 'Coordinates for Calaveras Mine (Deposit ID: 10310598) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county.', 'Matched to the GeoNames record for Mesaba, Saint Louis Co., Minnesota, United States (http://geonames.org/5037321) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script.', 'Matched to the GeoNames record for Umatac, Guam (http://geonames.org/4038739) based on locality name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box.', 'Matched to Mindat locality record for Irene Quarry, Irene, Boone Co., Illinois, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-133776.html.', 'Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Tusas Mountain/Tusas Mountains, Rio Arriba Co., New Mexico, United States (n=2) and San Antonio Mountain, New Mexico, United States (n=2). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~25 km). Matching San Antonio Mt and Tusas Mts required using a wildcard search. Other place names mentioned in the EMu record (\"12 Km Nw Of\" and Taos Plateau) could not be matched and were ignored when determining the coordinates given here.', 'Matched to the GeoNames record for South Paris, Oxford Co., Maine, United States (http://geonames.org/4979220) based on locality name, district/county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Mt. Marie) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script.', 'Matched to the GeoNames record for Tuscany, Italy (http://geonames.org/3165361) based on state/province name and country using the situate.py script. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (165 km).', 'Matched to the GeoNames record for Cerro de Pasco, Provincia de Pasco, Pasco, Peru (http://geonames.org/3944797) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (600 Level and Cleopatra Vein) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script.', 'Coordinates for Henry Clay Mine (Deposit ID: 10166769) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state.', 'Matched to the GeoNames record for Aconchi, Sonora, Mexico (http://geonames.org/8583365) based on feature name and country using the situate.py script. The script determined that Aconchi (featureCode=ADM2) is located within Sonora (featureCode=ADM1), another feature mentioned in this record. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box.', 'Matched to the GeoNames record for Central Massive, Cantal, Auvergne-Rhône-Alpes, France (http://geonames.org/3027940) based on feature name and country using the situate.py script. Another place name mentioned in the EMu record (Limagne Maar (revisit)) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=UPLD records matched using the script.', 'Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Edinburgh, Edinburgh, Scotland, United Kingdom (n=1) and Corstorphine Hill, Edinburgh, Scotland, United Kingdom (n=1). The coordinates and error radius given here describe a circle encompassing the combination of sites matching both names with the smallest maximum distance between them (~5 km).', 'Matched to the GeoNames record for Monte Martica, Provincia di Varese, Lombardy, Italy (http://geonames.org/10630308) based on feature name, state/province, and country using the situate.py script. Matching Martica-Mt required using a wildcard search. Other place names mentioned in the EMu record (Lake Lugano Area and Martic A-Mt) could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all featureCode=MT records matched using the script.', 'Matched to the GeoNames record for Levack, Ontario, Canada (http://geonames.org/6053216) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Lake 2500 and Sudbury Irruptive) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script.', 'Matched to Mindat locality record for McDermitt Mine, Opalite District, Humboldt Co., Nevada, USA. Mindat lists this locality as an area where mercury is mined. McDermott may be a misspelling of McDermitt. Could not find any results for a McDermott in any other county in Nevada. Additionally could not find a result for a McDermitt in any other country in Nevada. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-4206.html.', \"Matched to the GeoNames record for Agay, Var, Provence-Alpes-Côte d'Azur, France (http://geonames.org/3038640) based on locality name, county, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Cap Garonne and Esterel Mtns.) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script.\", 'Colorado Plateau, La Sal Mtns', 'Coordinates for Santo Nino Mine (MRDS ID: M000982, Deposit ID: 10026908) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state.', 'Matched to the GeoNames record for Thorold, Ontario, Canada (http://geonames.org/6165719) based on municipality name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Walker Quarry) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script.']\n", + "-------\n", + "description_supplementMetadata_purpose ['', 'Changes in Hudson River Sediments Due to Invasion of Zebra Mussels in 1991/1928', 'Testing update batch process', \"Collected in the search for Tuhi's Spring, lost since 1886.\", 'Changes in Hudson River Sediments Due to Invasion of Zebra Mussels in 1991/1971', 'Changes in Hudson River Sediments Due to Invasion of Zebra Mussels in 1991/1744', 'Changes in Hudson River Sediments Due to Invasion of Zebra Mussels in 1991/1715', 'Microbiology; Biogeochemistry', 'Geochemistry analysis; Detrital zircon dating', 'Thin Section', 'Geochemical analyses', 'Changes in Hudson River Sediments Due to Invasion of Zebra Mussels in 1991/1959', 'Oil or gas exploration', 'Geochronology/geochemistry', 'Changes in Hudson River Sediments Due to Invasion of Zebra Mussels in 1991/1992', 'Changes in Hudson River Sediments Due to Invasion of Zebra Mussels in 1991/1851', 'XRF/ICP analysis', 'Morphologic Horizon', 'Surface water for WHONDRS S19S for SpC and anion analyses.', 'Thermal conductivity']\n", + "-------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "description_supplementMetadata_county ['', 'Mangualde', 'Gowganda', 'Lanier', 'Fritzington', 'POWELL, MT', 'Dallas Co.', 'Huttenberg', 'Grand County', 'Mezquitic', 'Powell', 'Hardy Co.', 'UNION, PA', 'Waldheim', 'Columbus Co.', 'JEFFERSON', 'Beaconsfield', 'Nevada Co.', 'West Goshen', 'Toamasina']\n", + "-------\n", + "description_description ['', 'blocky coarsely porphyritic vesicular/ black andesite aa lava flow under ash.', '[aphanitic vesicular basalt]', 'first outcrop of basalt. ', 'This is a sample of type CUTTINGS from an oil or gas well, API number 422150018700.', 'sandstone lens, quartzitic', 'Number of pieces: 1; Quality: display; brown; Crystal size: coarse; Brown botryoidal crust with distinct ccrystal terminations', 'NW Ethiopian plateau; Maychew area; HT1 basalt (high-Ti type 1); Tsibet region (seq.5)', 'Veined gray quartzite. Erratic cobble [125g rock, 349g sieve; weathered pink, weathering salts]', '1929?', 'This is a sample of type CUTTINGS from an oil or gas well, API number 422010261800.', 'This is a sample of type CUTTINGS from an oil or gas well, API number 420390071500.', \"Experimental run product. IHPV at Universite d'Orleans. 400MPa, 1100degC, NNO+1\", 'Poorly sorted, rounded clasts, fine to medium size clasts, gray matrix. [polished slab]', 'Altered sediment or volcanic', '[Light brown, partly laminated, sandstone, Sirius Group?]', 'Under water near Left bank above where Site 83? was located. Set I above lowest Vs [varves].', 'This is a sample of type CUTTINGS from an oil or gas well, API number 423233045600.', 'Mineral Group: Amphibole; Number of pieces: 1; Quality: display; Crystal size: very coarse; ', 'Green pelite, crystal fragment. Black veins. Mn dendrites']\n", + "-------\n", + "description_supplementMetadata_classificationComment ['', 'Rock Classification', 'sediment', 'Rock classification', 'ROCK', 'Mineral Classification', 'Volcanic Glass', 'Rock', 'Mineral', 'rock']\n", + "-------\n", + "description_supplementMetadata_currentArchive ['', 'U. Minnesota', 'Institute of Earth Sciences Jaume Almera\\xa0(ICTJA-CSIC) Carrer de Lluís Solé i Sabarís, s/n, 08028 Barcelona, Spain +34 93 409 54 116', 'University of Nebraska-Lincoln (UNL)', 'Victoria University of Wellington', 'Australian National University', 'University of Texas NPL UT000004616 NPL1-007054', 'Institute of Oceanology Chinese Academy of Sciences, Qingdao, China', 'National Energy Technology Laboratory, 3610 Collins Ferry Road, P.O. Box 880, B17, Morgantown, WV, 26507', 'Institute of Earth Sciences Jaume Almera\\xa0(ICTJA-CSIC) Carrer de Lluís Solé i Sabarís, s/n, 08028 Barcelona, Spain +34 93 409 54 12', 'Instituto Andaluz de Ciencias de la Tierra', 'United States Forest Service; Athens, Georgia 30610', 'National Energy Technology Laboratory, 3610 Collins Ferry Road, B-17, Morgantown, WV 26540', 'Bowling Green State University', 'Okayama University', 'Institute of Earth Sciences Jaume Almera\\xa0(ICTJA-CSIC) Carrer de Lluís Solé i Sabarís, s/n, 08028 Barcelona, Spain +34 93 409 54 58', 'United States Forest Service; Athens, Georgia 30617', 'Institute of Earth Sciences Jaume Almera\\xa0(ICTJA-CSIC) Carrer de Lluís Solé i Sabarís, s/n, 08028 Barcelona, Spain +34 93 409 54 98', 'LSU PAST Laboratory Coral Collection', 'University of Kansas Dept. of Geology']\n", + "-------\n", + "description_supplementMetadata_city ['', 'Near Fortmill', 'Mangualde', 'Gowganda', 'Conception del Oro', 'Lanier', 'Fritzington', 'Stanton', 'Chachao', 'Huttenberg', 'Raiz', 'Elixir Springs', 'Com Of Sera', 'Meinohama, Chikuzen', 'Waldheim', 'Araguai', 'Berwick', 'Fetakgomo', 'Shelbyville', 'Yule Creek']\n", + "-------\n", + "description_supplementMetadata_sampleComment ['', 'Slabby grainstone', 'ghostbuster deployment at X3. Heading: 250.38; Altitude: 4.84; Virtual Van Event#: 5111', 'Red micrite in contact with gray cherty limestone. Small styolite. Decent-sized hand sample. Two billets.', 'Sample Treatment: Medium-grained, sparsely vesicular basalt. Moderately altered GM, moderately ol-phyric.', 'Yellow major sample taken at side of Maka sulfide structure where temperature was previously 315 C. Can see the fluid pouring out of the bottle. (Temp >200) Altitude: 7.1 Heading: 163 VirtualVan Event#: 4038', 'On-bottom wire out=2650; Off-bottom wire out=2552; Rock_content=small sample', 'Buchenstein formation. Angular chert clasts. Some tabular crystals (feldspar?). Clay matrix.', 'Location detail: E0404', 'Fresh olivine', 'Sect. 2A; 310.5 cmbsf', 'Decent-sized sample of yellow-brown chert. The sample is taped together.', 'NB # BARB4-363', 'Stored in Annex, Row 1, Box Unit 109', 'vertical whole core plug; vial in brown wide cardboard box #2', 'N-6; Sample residue returned', 'Sect. 1A; 640.5 cmbsf', 'Green shale with calcite cement.', 'Weatherford sample 7-2SRP Material Crushed Dried', 'on west side of central sulfide mound']\n", + "-------\n", + "description_supplementMetadata_fieldName ['', 'Culex coronator Dyar & Knab, 1906', 'Datolite with copper', 'Herderite with apatite and lazulite', 'Ore with chlorite', 'Arfvedsonite with eudialyte and albite', 'Fourmarierite with rutherfordine and goethite', 'Milarite with adularia and quartz', 'Azurite with olivenite and chalcocite', 'Margarite with chromite', 'Kentbrooksite with lamprophyllite', 'Humboldtine with fluorite and hematite', 'Norite with bronzitite', 'Carbonatite with melanite', 'Andrewsite with cuprite and copper', 'Monzogranite to granodiorite', 'Pumpellyite with albite', 'Plancheite with conichalcite', 'Anorthosite with plagioclase and chlorite', 'Yofortierite with aegirine']\n", + "-------\n", + "description_supplementMetadata_primaryLocationType ['', 'Midslope', 'Resurgent dome', 'Ebb Tide Distal', 'Stream channel', 'seafloor outcrop', 'Grand Falls ', 'Laguna Tebenquiche', 'oceanic crust', 'Fjord', 'Subduction zone, Aleutian islands', 'natural spring', 'volcanic seamount', 'Baffin Island', 'Old volcanic caldera', 'knoll', 'bottomland forest', 'water-dependent area', 'fault', 'ridge']\n", + "-------\n", + "description_collectionMethodDescr ['', 'depth=-2059 m', 'please see http://www.mseel.org/ for details', 'depth=-1200 m', 'depth=-5463 m', 'depth=-31.8 m', 'depth=-2594 m', 'depth=-73.1 m', 'Jason 2 Dive: J2-494 on Cruise: KM1005; Filtered bag #23. ~10m N/NW of the old Mrk-105 position. Good flow here on the edge of this steep cliff face in a crack. Quite a few shrimp here. Tmax=40.6. Tavg=40.4. Vol=453. T2=16.', 'depth=-1958 m', 'depth=-5726 m', 'depth=-521.44 m', 'depth=-533.44 m', 'depth=-2138.7 m', 'Jason 2 Dive: J2-521 on Cruise: TN253; HFS-RNA flat 14, NOAA fluid sampler', 'depth=-2503 m', 'depth=-1495 m', 'depth=-521.26 m', 'depth=-79.4 m', 'Manual spring water sample collection']\n", + "-------\n", + "description_supplementMetadata_primaryLocationName ['', 'Commodore No.', 'Stanton', 'Sunrise Mine', 'Framway Mine', 'Fierro Mine', 'Palmer Shaft', 'Southern Victoria Land Ross Sea', 'NW’ of Capelinhos', 'San Tsioro Mine', 'Ellsworth Mountains Urban Point', 'Dallas Mine', 'Elba Island', 'Louiche Quarry', 'Weisser Hirsch Mine', 'Champion Spark Plug Company Mine', 'Oling House', 'Ellsworth Mountains Mt. Weems area', 'Mathiati Mine', 'Blue Gravel Lead Mine']\n", + "-------\n", + "igsn ['', 'NEON02J6Z', 'NEON02IS9', 'NEON036AS', 'IEAVO02UP', 'NEON00GZJ', 'NEON03L0N', 'NHB005G4Z', 'IERVTL1ZT', 'NHB006EAO', 'PRR017187', 'IEASH00EO', 'WHO0002CM', 'IEBEG1J3X', 'UGS0002CN', 'BSU0001RT', 'IECUR00HJ', 'NHB002JPC', 'NHB0050LW', 'NHB003U7J']\n", + "-------\n", + "description_supplementMetadata_geologicalUnit ['', 'Taconite River Fm', 'TRAVISPEAK 7352', 'Chimana Formation', 'Drayton (uncertain)', 'TRAVIS PEAK TOP 7042', 'TOP GEORGETOWN 4214', 'Powell', 'SPRABERRY/WOODFORD/FUSSELMAN/SILVAN', 'Vlaming Sandstone', 'Siamo-Negaunee', 'Ben Lomond Basalt', 'Peerless Pegmatite', 'Shublik purple', 'GLEN ROSE/RODESSA/JAMES LIME/PETTIT', 'H18', 'GLEN ROSE/PERMIAN', 'CLEAR FORK/TUBB/PENN/MISS/ELLENBURGER/PRE-CAMBRIAN', 'Reading', 'Jackfork']\n", + "-------\n", + "description_supplementMetadata_localityDescription ['', 'Mangualde', 'Near Bicycle Lake, Yermo', 'Buhowo', 'Timbucto', 'Stanton', 'Onverwacht; Borehole OV-14 207\\' 2\"', 'Christiansand', 'Grodno', 'Huttenberg', 'Locality Key: Solfatara; Near Pozzuole', 'Spoh (Near), Federated Malay States', 'Cliff near base of section; 1/2 mile SE BM 7918', 'Owings Mills (Near)', 'Sutro Tunnel, S. Branch, 1060 Ft. In', 'Goboboseb Mtns', '7 Mi Sse Of Roseburg', 'Callaway Co', 'Ambatomanoina', 'Sambrerete']\n", + "-------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "description_supplementMetadata_originalArchive ['', 'Weatherford-Golden Sample 2-37G', 'Weatherford-Golden Sample 7-16G-A', 'Australian National University', 'NRM, Swedish Museum of Natural History', 'BDC Personal Archive', 'ECS Samples Repository, U.S. Geological Survey', 'CHRONO-ENVIRONNEMENT (ISNI: 0000 0001 2188 3779)', 'Lehigh University Core Laboratory', 'Mauritius Water Resources Unit', 'EDYTEM', 'Weatherford-Golden Sample 7-26G', 'Weatherford-Golden Sample 4-76G', 'Weatherford-Golden Sample 4-86G', 'University of Tennessee', 'Weatherford-Golden Sample 7-10G', 'Weatherford-Golden Sample 7-11G', 'Weatherford-Golden Sample 7-7G', 'Texas Christian University', 'Weatherford-Golden Sample 7-52G']\n", + "-------\n", + "description_supplementMetadata_platformDescr ['', 'truck-mounted Failing Model 2500 drill rig on barge', 'ice mass', 'Operated by the German FSB as an Ocean-class research ship', 'truck-mounted Failing Model 2500 drill rig', 'Oceanographic research vessel', 'ROV Jason 2 from R/V TG Thompson', 'Not Applicable', 'CARR', 'research vessel', 'truck mounted Failing Model 2500 drill rig', 'Description of the platform for the cruise.', 'Research Vessel']\n", + "-------\n", + "description_collectionMethod ['', 'DEEP LAKE DRILLING SYSTEM using Hydraulic Piston Corer (HPC), Extended shoe, non-rotating (EXN), Extended core bit, rotating (XCB)', 'Oil and Gas Exploration', 'Sieved', 'Coring>Multicorer', 'hand sample', 'Sampler:Biology:Suction', 'Coring>TriggerCorer', 'Coring> Push Core', 'Gravity Core', 'Kaiko dive', 'Sampler:Fluid:Ghostbuster', 'drill core', 'rotasonic ', 'Sampler:Gas:bottle', 'Coring>Jumbo Trigger Core', 'Lab made', 'Coring - Jumbo Trigger Core', 'Coring>PistonCorer>Jumbo', 'CTD']\n", + "-------\n", + "description_supplementMetadata_cruiseFieldPrgrm ['', 'Knorr cruise 90 Station C 600m', 'TRIESTE15-77', 'UF Jamaica 2015 Fieldwork', 'KK721108-6', 'KNORR 25 stn107 200m', 'Knorr cruise 73/6 station 10 200m', 'WCC6', 'CADISAR 2', 'BOEM-2015', 'VM12', 'SEGMENT TZ FIELD CAMPAIGN 2072', 'Gillis', 'Deep Freeze 80', 'MGL1111', 'Tim Matthews ugrad research', 'SEGMENT TZ FIELD CAMPAIGN 2234', 'RC0023', 'SEGMENT TZ FIELD CAMPAIGN 2013', '2017 summer field season']\n", + "-------\n", + "description_supplementMetadata_publicationUrl_description ['', 'Smithsonian collections record for NMNH 71830 (PET)', 'Smithsonian collections record for NMNH G6888-02 (MIN)', 'Smithsonian collections record for NMNH 113707-12 (PET)', 'Smithsonian collections record for NMNH 118329-746 (PET)', 'Smithsonian collections record for NMNH 96315-00 (MIN)', 'Smithsonian collections record for NMNH 126981-00 (MIN)', 'Smithsonian collections record for NMNH 111212-13 (PET)', 'Smithsonian collections record for NMNH 29605 (PET)', 'Smithsonian collections record for NMNH 118288-84 (PET)', 'Smithsonian collections record for NMNH R15397-00 (MIN)', 'Smithsonian collections record for NMNH 118251-00 (MIN)', 'Smithsonian collections record for NMNH 109426-15 (PET)', 'Smithsonian collections record for NMNH 118010-107 (PET)', 'Smithsonian collections record for NMNH 88211-517 (PET)', 'Smithsonian collections record for NMNH M16404-00 (MIN)', 'Smithsonian collections record for NMNH 116148-21 (PET)', 'Smithsonian collections record for NMNH B8001-00 (MIN)', 'Smithsonian collections record for NMNH 117693-392 (PET)', 'Smithsonian collections record for NMNH 113669-33 (PET)']\n", + "-------\n" + ] + } + ], + "source": [ + "for col in df.columns:\n", + " values = set(df[col].values.tolist())\n", + " print(col, list(values)[:20])\n", + " print(\"-------\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1090f4b6", + "metadata": {}, + "outputs": [], + "source": [ + "# description mapping\n", + "description_to_text = {\n", + " \"description_supplementMetadata_locality\":\"The name of the specific place where the sample was collected\",\n", + " \"description_supplementMetadata_localityDescription\":\"The additional information about the specific place where the sample was collected\",\n", + " \"description_supplementMetadata_country\":\"The country where the sample was collected\",\n", + " \"description_supplementMetadata_province\":\"The province where the sample was collected\",\n", + " \"description_supplementMetadata_geologicalAge\":\"The age of a sample as described by the stratigraphic era, period, state, etc.\",\n", + " \"description_supplementMetadata_locationDescription\":\"The free text description of the location\",\n", + " \"description_supplementMetadata_county\":\"The county where the sample was collected\",\n", + " \"description_supplementMetadata_classificationComment\":\"The taxonomy formal categorization of sample\",\n", + " \"description_supplementMetadata_currentArchive\":\"The name of institution, museum, or repository where the sample is currently stored\",\n", + " \"description_supplementMetadata_sampleComment\":\"The free text to add any comments pertaining to the sample\",\n", + " \"description_supplementMetadata_city\":\"The city where the sample was collected\",\n", + " \"description_supplementMetadata_fieldName\":\"The taxonomy informal classification of sample\",\n", + " \"description_collectionMethodDescr\":\"The additional information about the method by which a sample was collected\",\n", + " \"description_supplementMetadata_launchPlatformName\":\"The name of the launch used to collect the sample\",\n", + " \"description_sampleName\":\"The sample name given by the collector\",\n", + " \"description_supplementMetadata_geologicalUnit\":\"A body of rock established as a distinct entity in the classification of the Earth’s rocks\",\n", + " \"description_supplementMetadata_originalArchive\": \"The name of institution, museum, or repository where the sample was originally stored\",\n", + " \"description_supplementMetadata_platformDescr\":\"The description of the platform for the cruise\",\n", + " \"description_collectionMethod\":\"The method by which a sample was collected\",\n", + " \"description_supplementMetadata_size\":\"Size of the registered object\",\n", + " \"description_supplementMetadata_cruiseFieldPrgrm\":\"The name or identifier of the field program (cruise or expedition), during which the sample was collected\",\n", + " \"description_supplementMetadata_publicationUrl_description\":\"The free text description of the related URL\",\n", + " \"description_supplementMetadata_purpose\":\"The free text to describe the collection purpose of the sample\",\n", + " \"description_description\":\"The detailed description of the sample\",\n", + " \"description_supplementMetadata_primaryLocationName\":\"The name of the primary location the sample was collected\",\n", + " \"description_igsnPrefix\":\"The prefix of unique identifier of the sample\",\n", + " \"description_sampleType\": \"The object type of sample indicates that this sample\",\n", + " \"description_supplementMetadata_platformType\":\"The type of platform for the cruise\",\n", + " \"description_supplementMetadata_primaryLocationType\":\"The type of the primary location\", \n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e457d7ef", + "metadata": {}, + "outputs": [], + "source": [ + "sampleType_desc_map = {\n", + " \"Core\" : \"long cylindrical cores\",\n", + " \"Core Catcher\" : \"material recovered from the core catcher of a sedimentary core and which is treated as a separate section from the core\",\n", + " \"Core Half Round\":\"half-cylindrical products of along-axis split of a whole round\",\n", + " \"Core Piece\": \"material occurring between unambiguous [as curated] breaks in recovery\",\n", + " \"Core Quarter Round\" : \"quarter-cylindrical products of along-axis split of a half round\",\n", + " \"Core Section\" : \"arbitrarily cut segments of a core\",\n", + " \"Core Section Half\":\"half-cylindrical products of along-axis split of a section or its component fragments through a selected diameter\",\n", + " \"Core Slab\": \"rectangular prism of material taken from a core where one dimension is significantly shorter than the others\",\n", + " \"Core Sub-Piece\": \"unambiguously mated portion of a larger piece noted for curatorial management of the material\",\n", + " \"Core U-Channel\":\"long rectangular prism of material for continuous measurement\",\n", + " \"Core Whole Round\": \"cylindrical segments of core or core section material\",\n", + " \"CTD\": \"a CTD (Conductivity, Temperature, and Depth) cast sample\",\n", + " \"Cuttings\": \"loose, coarse, unconsolidated material suspended in drilling fluid\",\n", + " \"Dredge\":\"a group of rocks collected by dragging a dredge along the seafloor\",\n", + " \"Experimental Specimen\":\"a synthetic material used during an experiment\",\n", + " \"Grab\": \"a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area\",\n", + " \"Hole\": \"hole cavity and walls surrounding that cavity\",\n", + " \"Individual Sample\": \"a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid\",\n", + " \"Oriented Core\": \"core that can be positioned on the surface in the same way that it was arranged in the borehole before extraction\",\n", + " \"Other\": \"a sample that does not fit any of the existing type designations. It is expected that further detailed description of the particular sample will be provided\",\n", + " \"Rock Powder\": \"a sample created from pulverizing a rock to powder\",\n", + " \"Site\": \"a place where a sample is collected\",\n", + " \"Terrestrial Section\": \"a sample of a section of the near-surface Earth, generally in the critical zone\",\n", + "\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d6d9f4e3", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_1893/2050558754.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df['description_sampleType'] = df['description_sampleType'].map(lambda x: sampleType_desc_map.get(x.split(\">\")[0], x) + \". The more descriptive object type is \" + x.split(\">\")[1] if len(x.split(\">\")) > 1 else sampleType_desc_map.get(x.split(\">\")[0], x))\n" + ] + } + ], + "source": [ + "df['description_sampleType'] = df['description_sampleType'].map(lambda x: sampleType_desc_map.get(x.split(\">\")[0], x) + \". The more descriptive object type is \" + x.split(\">\")[1] if len(x.split(\">\")) > 1 else sampleType_desc_map.get(x.split(\">\")[0], x))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "9c73a584", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'Trawl', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Toothpick', 'loose, coarse, unconsolidated material suspended in drilling fluid', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Cube', 'a sample of a section of the near-surface Earth, generally in the critical zone', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Cylinder', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Smear', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Powder', 'a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid', 'core that can be positioned on the surface in the same way that it was arranged in the borehole before extraction', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Bead', 'arbitrarily cut segments of a core', 'quarter-cylindrical products of along-axis split of a half round', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Mechanical Fraction', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Chemical Fraction', 'cylindrical segments of core or core section material', 'a CTD (Conductivity, Temperature, and Depth) cast sample', '-80.566667', 'long cylindrical cores', 'a sample created from pulverizing a rock to powder', 'a place where a sample is collected', 'a sample that does not fit any of the existing type designations. It is expected that further detailed description of the particular sample will be provided', 'a synthetic material used during an experiment', 'half-cylindrical products of along-axis split of a section or its component fragments through a selected diameter', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Gas', 'a synthetic material used during an experiment. The more descriptive object type is Other', 'a group of rocks collected by dragging a dredge along the seafloor', '-82.050000', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Slab', 'material occurring between unambiguous [as curated] breaks in recovery', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Thin Section', 'half-cylindrical products of along-axis split of a whole round', 'hole cavity and walls surrounding that cavity', 'a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Culture', 'unambiguously mated portion of a larger piece noted for curatorial management of the material'}\n" + ] + } + ], + "source": [ + "print(set(df['description_sampleType'].values.tolist()))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "62576e57", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "description_material\n", + "igsn\n" + ] + } + ], + "source": [ + "for col in df.columns:\n", + " if col not in description_to_text:\n", + " print(col)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7121c87d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_1893/306383103.py:2: DtypeWarning: Columns (15) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " annotated_data = pd.read_csv(\"./datasets/SESARTrainingiSamKeywords.csv\")\n" + ] + } + ], + "source": [ + "# map the labels to CV\n", + "annotated_data = pd.read_csv(\"./datasets/SESARTrainingiSamKeywords.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2a64abb1", + "metadata": {}, + "outputs": [], + "source": [ + "annotated_data = annotated_data.fillna(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "ab5f5b96", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
doiprefixigsnisgnprefixtraintextestTokencountconcatenated_textiSampleMaterialiSample Material2extMaterialTypeextProtolithMaterialTypeiSamMaterialSampleTypeExtSampleTypeiSamSampledFeatureUnnamed: 13Unnamed: 14Unnamed: 15
0289762.0IEUHM001MIEUIEUHM | KK830116-3 STA9 RD5 | Rock>Igneous>Vol...653.76Dredging>ChainBagDredge. Igneous>Volcanic>Mafi...mat:rockrksd:Basaltspec:genericaggregationesmat:dredgesf:marinewaterbodybottom
1159035.0IEAVO02WUIEAIEAVO | 09KCNYE007 | | Hand sample; >20g. At n...590.08Rock. Individual Sample. Hand sample; >20g. At...mat:rockspec:othersolidobjectesmat:rockhandsamplesf:earthinterior
2152216.0IEUHM002SIEUIEUHM | MW8701 STA10 RD10 | Rock>Igneous>Volca...584.00Dredging>ChainBagDredge. Igneous>Volcanic>Mafi...mat:rockrksd:Basaltspec:genericaggregationesmat:dredgesf:marinewaterbodybottom
3682572.0DSR0000TIDSRDSR | VM15-10 | Dredge | Dredge contains 41 ro...542.40dredge. Rock. Dredge. Dredge contains 41 rock ...mat:rockspec:genericaggregationesmat:dredgesf:marinewaterbodybottom
4289175.0IEUHM001QIEUIEUHM | KK830116-3 STA17 RD14 | Rock>Igneous>V...531.20Dredging>ChainBagDredge. Igneous>Volcanic>Mafi...mat:rockrksd:Basaltspec:genericaggregationesmat:dredgesf:marinewaterbodybottom
...................................................
988421100359.0GEG0000UXGEGUT0606 | Core | Rock6.40Rock. Core. George Gehrelsmat:rockspec:othersolidobjectesmat:coresf:earthinterior
988422693685.0BSU000208BSUWB-1 | | tuff4.16Rock. Individual Sample. tuff. Mark Schmitz. S...mat:rockrksd:Pyroclastic_Rockspec:othersolidobjectesmat:rockhandsamplesf:earthinterior
988423617501.0BSU0005ELBSUZ-83 | | tuff4.16Rock. Individual Sample. tuff. Mark Schmitz. o...mat:rockrksd:Pyroclastic_Rockspec:othersolidobjectesmat:rockhandsamplesf:earthinterior
988424519881.0IEKBA0009IEK3 | | Basalt3.84Rock. Individual Sample. Basalt. Kimberly Aviadomat:rockrksd:Basaltspec:othersolidobjectesmat:rockhandsamplesf:earthinterior
988425130932.0BSU00011ABSUM-8 | | ash3.52Rock. Individual Sample. ash. Mark Schmitz. Bo...mat:rockorsedimentrksd:Tephraspec:genericaggregationesmat:naturalaggregatesf:earthinterior
\n", + "

988426 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " doiprefix igsn isgnprefix \\\n", + "0 289762.0 IEUHM001M IEU \n", + "1 159035.0 IEAVO02WU IEA \n", + "2 152216.0 IEUHM002S IEU \n", + "3 682572.0 DSR0000TI DSR \n", + "4 289175.0 IEUHM001Q IEU \n", + "... ... ... ... \n", + "988421 100359.0 GEG0000UX GEG \n", + "988422 693685.0 BSU000208 BSU \n", + "988423 617501.0 BSU0005EL BSU \n", + "988424 519881.0 IEKBA0009 IEK \n", + "988425 130932.0 BSU00011A BSU \n", + "\n", + " traintext estTokencount \\\n", + "0 IEUHM | KK830116-3 STA9 RD5 | Rock>Igneous>Vol... 653.76 \n", + "1 IEAVO | 09KCNYE007 | | Hand sample; >20g. At n... 590.08 \n", + "2 IEUHM | MW8701 STA10 RD10 | Rock>Igneous>Volca... 584.00 \n", + "3 DSR | VM15-10 | Dredge | Dredge contains 41 ro... 542.40 \n", + "4 IEUHM | KK830116-3 STA17 RD14 | Rock>Igneous>V... 531.20 \n", + "... ... ... \n", + "988421 UT0606 | Core | Rock 6.40 \n", + "988422 WB-1 | | tuff 4.16 \n", + "988423 Z-83 | | tuff 4.16 \n", + "988424 3 | | Basalt 3.84 \n", + "988425 M-8 | | ash 3.52 \n", + "\n", + " concatenated_text iSampleMaterial \\\n", + "0 Dredging>ChainBagDredge. Igneous>Volcanic>Mafi... mat:rock \n", + "1 Rock. Individual Sample. Hand sample; >20g. At... mat:rock \n", + "2 Dredging>ChainBagDredge. Igneous>Volcanic>Mafi... mat:rock \n", + "3 dredge. Rock. Dredge. Dredge contains 41 rock ... mat:rock \n", + "4 Dredging>ChainBagDredge. Igneous>Volcanic>Mafi... mat:rock \n", + "... ... ... \n", + "988421 Rock. Core. George Gehrels mat:rock \n", + "988422 Rock. Individual Sample. tuff. Mark Schmitz. S... mat:rock \n", + "988423 Rock. Individual Sample. tuff. Mark Schmitz. o... mat:rock \n", + "988424 Rock. Individual Sample. Basalt. Kimberly Aviado mat:rock \n", + "988425 Rock. Individual Sample. ash. Mark Schmitz. Bo... mat:rockorsediment \n", + "\n", + " iSample Material2 extMaterialType extProtolithMaterialType \\\n", + "0 rksd:Basalt \n", + "1 \n", + "2 rksd:Basalt \n", + "3 \n", + "4 rksd:Basalt \n", + "... ... ... ... \n", + "988421 \n", + "988422 rksd:Pyroclastic_Rock \n", + "988423 rksd:Pyroclastic_Rock \n", + "988424 rksd:Basalt \n", + "988425 rksd:Tephra \n", + "\n", + " iSamMaterialSampleType ExtSampleType \\\n", + "0 spec:genericaggregation esmat:dredge \n", + "1 spec:othersolidobject esmat:rockhandsample \n", + "2 spec:genericaggregation esmat:dredge \n", + "3 spec:genericaggregation esmat:dredge \n", + "4 spec:genericaggregation esmat:dredge \n", + "... ... ... \n", + "988421 spec:othersolidobject esmat:core \n", + "988422 spec:othersolidobject esmat:rockhandsample \n", + "988423 spec:othersolidobject esmat:rockhandsample \n", + "988424 spec:othersolidobject esmat:rockhandsample \n", + "988425 spec:genericaggregation esmat:naturalaggregate \n", + "\n", + " iSamSampledFeature Unnamed: 13 Unnamed: 14 Unnamed: 15 \n", + "0 sf:marinewaterbodybottom \n", + "1 sf:earthinterior \n", + "2 sf:marinewaterbodybottom \n", + "3 sf:marinewaterbodybottom \n", + "4 sf:marinewaterbodybottom \n", + "... ... ... ... ... \n", + "988421 sf:earthinterior \n", + "988422 sf:earthinterior \n", + "988423 sf:earthinterior \n", + "988424 sf:earthinterior \n", + "988425 sf:earthinterior \n", + "\n", + "[988426 rows x 16 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "annotated_data" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "d75e75a2", + "metadata": {}, + "outputs": [], + "source": [ + "label_map = {}\n", + "highlevel_label_map = {}\n", + "for idx, row in annotated_data.iterrows():\n", + " igsn = row['igsn']\n", + " if row['extMaterialType'] != \"\":\n", + " if \"|\" in row['extMaterialType']:\n", + " labels = row['extMaterialType'].split(\"|\")\n", + " else:\n", + " labels = [row['extMaterialType']]\n", + " else:\n", + " labels = [row['iSampleMaterial']]\n", + " \n", + " label_map[igsn] = [ x.strip().replace(\"_\",\" \") for x in labels ]\n", + " highlevel_label_map[igsn] = row['iSampleMaterial']\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "743decc7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'IEUHM001M': ['rksd:Basalt'],\n", + " 'IEAVO02WU': ['mat:rock'],\n", + " 'IEUHM002S': ['rksd:Basalt'],\n", + " 'DSR0000TI': ['mat:rock'],\n", + " 'IEUHM001Q': ['rksd:Basalt'],\n", + " 'DSR0000TP': ['mat:rock'],\n", + " 'IEUHM002Q': ['rksd:Basalt'],\n", + " 'IEUHM002M': ['mat:mixedsoilsedimentrock'],\n", + " 'IEUHM001P': ['rksd:Basalt'],\n", + " 'IEUHM00R8': ['rksd:Carbonate Sediment'],\n", + " 'IEAVO02ZC': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO02ZE': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO02ZG': ['rksd:Fine Grained Igneous Rock'],\n", + " 'DSR0000TL': ['mat:rock'],\n", + " 'IEUHM002U': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO02ZD': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO02ZF': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO02ZB': ['rksd:Fine Grained Igneous Rock'],\n", + " 'TOR0000GW': ['rksd:Pyroclastic Rock'],\n", + " 'TOR0000I9': ['rksd:Basalt'],\n", + " 'URI0000ET': ['rksd:Generic Mudstone'],\n", + " 'IEUHM002V': ['rksd:Basalt'],\n", + " 'IEUHM001N': ['rksd:Basalt'],\n", + " 'IEAVO02X2': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO02X1': ['rksd:Pyroclastic Rock'],\n", + " 'IEUHM00PS': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM0061': ['rksd:Basalt'],\n", + " 'URI0000FF': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00PE': ['rksd:Carbonate Sediment'],\n", + " 'URI0000GG': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEAVO04HS': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO04HR': ['rksd:Fine Grained Igneous Rock'],\n", + " 'URI0000FI': ['rksd:Sedimentary Rock'],\n", + " 'IEAVO03NB': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NJ': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NN': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEMRS004N': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS004P': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS004T': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM0068': ['rksd:Basalt'],\n", + " 'IEMRS004U': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS004O': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS004S': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS004Q': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS004R': ['mat:biogenicnonorganicmaterial'],\n", + " 'URI0000EW': ['rksd:Sedimentary Rock'],\n", + " 'IEUHM00PU': ['mat:soil'],\n", + " 'URI0000FT': ['rksd:Generic Mudstone'],\n", + " 'KHR00000F': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'KHR000019': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'IEUHM006H': ['rksd:Basalt'],\n", + " 'URI0000F8': ['rksd:Generic Mudstone'],\n", + " 'URI0000FK': ['rksd:Sedimentary Rock'],\n", + " 'URI0000ER': ['rksd:Generic Mudstone'],\n", + " 'IEUHM002R': ['rksd:Basalt'],\n", + " 'IEUHM0060': ['rksd:Basalt'],\n", + " 'SRH000163': ['mat:rock'],\n", + " 'IEUHM002T': ['rksd:Basalt'],\n", + " 'IEUHM002O': ['rksd:Basalt'],\n", + " 'IEUHM006M': ['rksd:Basalt', 'rksd:Glass Rich Igneous Rock'],\n", + " 'TOR0000IA': ['rksd:Basalt'],\n", + " 'URI0000EQ': ['rksd:Generic Mudstone'],\n", + " 'URI0000F6': ['rksd:Generic Mudstone'],\n", + " 'IEUHM007O': ['rksd:Basalt'],\n", + " 'TAP00003Y': ['rksd:Clastic Sediment'],\n", + " 'IEUHM00OB': ['rksd:Basalt'],\n", + " 'URI0000FM': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'URI0000ES': ['rksd:Generic Mudstone'],\n", + " 'IEAVO02XF': ['mat:rock'],\n", + " 'TAP00003W': ['rksd:Tephra'],\n", + " 'IEAVO03NK': ['rksd:Fine Grained Igneous Rock'],\n", + " 'TAP00007M': ['rksd:Tephra'],\n", + " 'IENTL002R': ['rksd:Generic Sandstone'],\n", + " 'URI0000GQ': ['rksd:Metasomatic Rock'],\n", + " 'BSU0005YV': ['rksd:Generic Sandstone'],\n", + " 'WDC0000BP': ['mat:sediment'],\n", + " 'IENTL000P': ['rksd:Generic Sandstone'],\n", + " 'IEAVO03I4': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM001O': ['rksd:Basalt'],\n", + " 'IEUHM0067': ['rksd:Basalt'],\n", + " 'IEUHM0066': ['rksd:Pyroclastic Rock'],\n", + " 'URI0000FB': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'KHR00000D': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'BSU0005YT': ['rksd:Generic Sandstone'],\n", + " 'URI0000FO': ['rksd:Sedimentary Rock'],\n", + " 'BSU0005YU': ['rksd:Generic Sandstone'],\n", + " 'IEAVO02WY': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO02WZ': ['rksd:Pyroclastic Rock'],\n", + " 'BSU0005YR': ['rksd:Generic Sandstone'],\n", + " 'IEDUT100K': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS004Z': ['mat:biogenicnonorganicmaterial'],\n", + " 'BSU0005YS': ['rksd:Generic Sandstone'],\n", + " 'IENTL002D': ['rksd:Generic Mudstone'],\n", + " 'URI0000G6': ['rksd:Metasomatic Rock'],\n", + " 'BSU0005U1': ['rksd:Generic Sandstone'],\n", + " 'URI0000G8': ['rksd:Sedimentary Rock'],\n", + " 'WDC0000A5': ['mat:sediment'],\n", + " 'IEMRS0050': ['mat:biogenicnonorganicmaterial'],\n", + " 'SSH00038T': ['mat:soil'],\n", + " 'URI0000EP': ['rksd:Generic Mudstone'],\n", + " 'IEMRS0052': ['mat:organicmaterial'],\n", + " 'IEMRS0051': ['mat:organicmaterial'],\n", + " 'IEMRS0053': ['mat:organicmaterial'],\n", + " 'MGD0009UK': ['mat:rock'],\n", + " 'KHR000275': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'URI0000G0': ['rksd:Generic Mudstone'],\n", + " 'IEAVO05B1': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO05B5': ['rksd:Pyroclastic Rock'],\n", + " 'IENTL002J': ['rksd:Generic Sandstone'],\n", + " 'IEAVO03ND': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO02WO': ['mat:rock'],\n", + " 'IEAVO03NM': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IENTL010Q': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010Y': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0111': ['rksd:Sedimentary Rock'],\n", + " 'WDC0000BR': ['mat:sediment'],\n", + " 'IENTL010B': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010H': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010I': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010M': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010N': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010P': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010R': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010T': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010U': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010V': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010W': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010X': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010Z': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0110': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0112': ['rksd:Sedimentary Rock'],\n", + " 'IEAVO03NI': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IENTL010C': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010D': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010E': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010F': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010G': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010J': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010K': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010L': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010O': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010S': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0113': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0114': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0115': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0108': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0103': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0104': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0105': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0107': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0109': ['rksd:Sedimentary Rock'],\n", + " 'IENTL010A': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0116': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0106': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0102': ['rksd:Sedimentary Rock'],\n", + " 'IEUHM0062': ['rksd:Basalt'],\n", + " 'IEUHM006F': ['rksd:Basalt'],\n", + " 'BSU0005Z5': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO0590': ['rksd:Pyroclastic Rock'],\n", + " 'IENTL001M': ['rksd:Generic Mudstone'],\n", + " 'URI0000GP': ['rksd:Metasomatic Rock'],\n", + " 'IEUHM006C': ['rksd:Basalt'],\n", + " 'TOR0000IH': ['rksd:Basalt'],\n", + " 'BSU0005Z0': ['rksd:Pyroclastic Rock'],\n", + " 'IENTL011G': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011H': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011I': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0117': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0118': ['rksd:Sedimentary Rock'],\n", + " 'IENTL0119': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011A': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011B': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011C': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011D': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011E': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011F': ['rksd:Sedimentary Rock'],\n", + " 'IEUHM00SX': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00PI': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'BSU0005Z3': ['rksd:Pyroclastic Rock'],\n", + " 'BSU0005Z4': ['rksd:Pyroclastic Rock'],\n", + " 'WDC0000BQ': ['mat:sediment'],\n", + " 'IECUR0003': ['rksd:Metamorphic Rock'],\n", + " 'BSU0005YY': ['rksd:Pyroclastic Rock'],\n", + " 'BSU0005Z1': ['rksd:Pyroclastic Rock'],\n", + " 'IEUHM00PT': ['mat:soil'],\n", + " 'BSU0005YZ': ['rksd:Pyroclastic Rock'],\n", + " 'BSU0005Z2': ['rksd:Pyroclastic Rock'],\n", + " 'BSU0005YW': ['rksd:Pyroclastic Rock'],\n", + " 'BSU0005U3': ['rksd:Pyroclastic Rock'],\n", + " 'BSU0005YX': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO03NQ': ['rksd:Fine Grained Igneous Rock'],\n", + " 'BSU0005U2': ['rksd:Pyroclastic Rock'],\n", + " 'TOR0000GX': ['rksd:Basalt'],\n", + " 'MGD0001BC': ['mat:sediment'],\n", + " 'IEUHM00RW': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM006Q': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'URI0000FE': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEMRS0060': ['mat:organicmaterial'],\n", + " 'URI0000FH': ['rksd:Sedimentary Rock'],\n", + " 'IEUHM006P': ['rksd:Basalt'],\n", + " 'IEUHM00PL': ['mat:sediment'],\n", + " 'IEUHM00PJ': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEDUT100G': ['mat:biogenicnonorganicmaterial'],\n", + " 'IENTL000T': ['rksd:Generic Sandstone'],\n", + " 'IEMRS0049': ['mat:biogenicnonorganicmaterial'],\n", + " 'MGD00002J': ['mat:rock'],\n", + " 'IEUHM006G': ['rksd:Basalt'],\n", + " 'IECBJ000G': ['rksd:Mylonitic Rock'],\n", + " 'IEMRS004E': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM007D': ['rksd:Basalt'],\n", + " 'NHB00389B': ['ming:oxidemineral'],\n", + " 'IEUHM00O6': ['rksd:Basalt'],\n", + " 'URI0000GX': ['rksd:Pyroclastic Rock'],\n", + " 'IENTL000C': ['rksd:Generic Mudstone'],\n", + " 'IECBJ000H': ['rksd:Mylonitic Rock'],\n", + " 'IEMRS004C': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS004D': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS004B': ['mat:biogenicnonorganicmaterial'],\n", + " 'IENSH0003': ['rksd:Pyroclastic Rock'],\n", + " 'KHR000017': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'IENSH0001': ['rksd:Granitoid'],\n", + " 'IENTL001X': ['rksd:Generic Sandstone'],\n", + " 'IEAVO02WQ': ['rksd:Gabbroic Rock'],\n", + " 'IEMRS004A': ['mat:biogenicnonorganicmaterial'],\n", + " 'IENTL0021': ['rksd:Generic Mudstone'],\n", + " 'IEUHM00TO': ['mat:sediment'],\n", + " 'MGD00002H': ['mat:rock'],\n", + " 'IEUHM00OT': ['rksd:Basalt'],\n", + " 'IEUHM002N': ['rksd:Basalt'],\n", + " 'IEAVO02XA': ['mat:rock'],\n", + " 'IEUHM00U3': ['rksd:Basalt'],\n", + " 'IECBJ000P': ['rksd:Mylonitic Rock'],\n", + " 'IECBJ000R': ['rksd:Mylonitic Rock'],\n", + " 'IEUHM008N': ['rksd:Basalt'],\n", + " 'IEDUT101B': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00PQ': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEAVO03BW': ['rksd:Fine Grained Igneous Rock'],\n", + " 'KHR000007': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'IEAVO03BX': ['rksd:Fine Grained Igneous Rock'],\n", + " 'URI0000G7': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'URI0000FY': ['rksd:Generic Mudstone'],\n", + " 'IENTL001F': ['rksd:Generic Mudstone'],\n", + " 'MGD00002I': ['mat:rock'],\n", + " 'URI0000GT': ['rksd:Basalt'],\n", + " 'URI0000GR': ['rksd:Metasomatic Rock'],\n", + " 'IENTL0029': ['rksd:Generic Sandstone'],\n", + " 'IEEVB00KV': ['xxx'],\n", + " 'IEEVB00KW': ['xxx'],\n", + " 'IEEVB00M5': ['xxx'],\n", + " 'IEUHM00U1': ['mat:sediment'],\n", + " 'URI0000FG': ['rksd:Sedimentary Rock'],\n", + " 'IEAVO04KJ': ['rksd:Tephra'],\n", + " 'IEUHM0046': ['rksd:Basalt'],\n", + " 'MGD000BAZ': ['mat:organicmaterial'],\n", + " 'IECBJ000A': ['rksd:Mylonitic Rock'],\n", + " 'IEUHM008S': ['rksd:Basalt'],\n", + " 'IEDUT100C': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00OW': ['rksd:Basalt'],\n", + " 'IECBJ000D': ['rksd:Mylonitic Rock'],\n", + " 'IENTL002H': ['rksd:Generic Sandstone'],\n", + " 'IEUHM001Y': ['rksd:Basalt'],\n", + " 'IEUHM007A': ['rksd:Basalt'],\n", + " 'TOR0000ID': ['rksd:Basalt'],\n", + " 'IECBJ000O': ['rksd:Metamorphic Rock'],\n", + " 'IEUHM00R9': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'TOR0000I8': ['rksd:Basalt'],\n", + " 'IEDUT1010': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00PF': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEDUT100W': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00O5': ['rksd:Basalt'],\n", + " 'URI0000GL': ['rksd:Generic Mudstone'],\n", + " 'IEDUT101D': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT101E': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00OO': ['rksd:Basalt'],\n", + " 'IEUHM00PG': ['mat:sediment'],\n", + " 'IEACB000X': ['rksd:Tephra'],\n", + " 'IEDUT100S': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM009J': ['rksd:Basalt'],\n", + " 'IEAVO03NC': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NE': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NF': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NG': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NH': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NL': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IECBJ0008': ['rksd:Mylonitic Rock'],\n", + " 'IEUHM00SP': ['rksd:Generic Mudstone'],\n", + " 'IEUHM008D': ['rksd:Basalt'],\n", + " 'IEAVO02XL': ['mat:rock'],\n", + " 'IEDUT101G': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT101H': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00PA': ['rksd:Basalt'],\n", + " 'IEAVO03J5': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEDUT1012': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT1014': ['mat:biogenicnonorganicmaterial'],\n", + " 'URI0000F7': ['rksd:Generic Mudstone'],\n", + " 'IEDUT1011': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT1013': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT1015': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT101F': ['mat:biogenicnonorganicmaterial'],\n", + " 'KHR00001C': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'IEDUT1019': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT101A': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00VR': ['mat:mixedsoilsedimentrock'],\n", + " 'NHB004B5U': ['rksd:Metamorphic Rock'],\n", + " 'IEUHM0095': ['rksd:Basalt'],\n", + " 'MGD00003E': ['mat:liquidwater'],\n", + " 'IEAVO03MW': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO0594': ['rksd:Pyroclastic Rock'],\n", + " 'URI0000GO': ['rksd:Metasomatic Rock'],\n", + " 'IEUHM00SQ': ['rksd:Carbonate Sedimentary Rock', 'rksd:Generic Mudstone'],\n", + " 'IEAVO05BR': ['rksd:Pyroclastic Rock'],\n", + " 'IECBJ000S': ['rksd:Breccia'],\n", + " 'IEAVO03NY': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IECBJ000C': ['rksd:Mylonitic Rock'],\n", + " 'IECBJ000T': ['rksd:Breccia'],\n", + " 'IEUHM00P2': ['rksd:Basalt'],\n", + " 'MGD00001Q': ['mat:rock'],\n", + " 'IECBJ000B': ['rksd:Mylonitic Rock'],\n", + " 'IEUHM00OK': ['rksd:Basalt'],\n", + " 'IEDUT1009': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM007F': ['rksd:Basalt'],\n", + " 'TOR0000HW': ['rksd:Basalt'],\n", + " 'IEAVO03IH': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IECBJ000M': ['rksd:Mylonitic Rock'],\n", + " 'IEUHM001L': ['rksd:Basalt'],\n", + " 'TAP00005T': ['rksd:Andesite'],\n", + " 'IEUHM0087': ['rksd:Basalt'],\n", + " 'IEUHM00TD': ['mat:sediment'],\n", + " 'IECBJ0009': ['rksd:Mylonitic Rock'],\n", + " 'IECBJ000I': ['rksd:Mylonitic Rock'],\n", + " 'IEUHM00O7': ['rksd:Basalt'],\n", + " 'IEUHM006D': ['rksd:Basalt'],\n", + " 'IEAVO02X8': ['ming:silicategermanatemineral'],\n", + " 'IECBJ000J': ['rksd:Mylonitic Rock'],\n", + " 'TOR0000GN': ['rksd:Basalt'],\n", + " 'IEAVO04TS': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM002X': ['rksd:Basalt'],\n", + " 'MGD00003D': ['mat:liquidwater'],\n", + " 'BSU0005H1': ['rksd:Pyroclastic Rock'],\n", + " 'IEUHM00RH': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IECBJ000K': ['rksd:Mylonitic Rock'],\n", + " 'IEUHM007Q': ['rksd:Basalt'],\n", + " 'TOR0000GU': ['rksd:Basalt'],\n", + " 'IEUHM0069': ['rksd:Basalt'],\n", + " 'IEAVO02X9': ['ming:silicategermanatemineral'],\n", + " 'URI0000EN': ['rksd:Generic Mudstone'],\n", + " 'IEAVO04W4': ['mat:rock'],\n", + " 'IEGPL000A': ['mat:liquidwater'],\n", + " 'IEGPL000B': ['mat:liquidwater'],\n", + " 'IEGPL000C': ['mat:liquidwater'],\n", + " 'IEGPL000D': ['mat:liquidwater'],\n", + " 'IEGPL000E': ['mat:liquidwater'],\n", + " 'IEGPL000F': ['mat:liquidwater'],\n", + " 'IEGPL000G': ['mat:liquidwater'],\n", + " 'IEGPL000H': ['mat:liquidwater'],\n", + " 'IEGPL000I': ['mat:liquidwater'],\n", + " 'IEGPL000J': ['mat:liquidwater'],\n", + " 'IEGPL000K': ['mat:liquidwater'],\n", + " 'IEGPL000L': ['mat:liquidwater'],\n", + " 'IEGPL000M': ['mat:liquidwater'],\n", + " 'IEGPL000N': ['mat:liquidwater'],\n", + " 'IEGPL000O': ['mat:liquidwater'],\n", + " 'IEGPL000P': ['mat:liquidwater'],\n", + " 'IEGPL000Q': ['mat:liquidwater'],\n", + " 'IEGPL000R': ['mat:liquidwater'],\n", + " 'IEGPL000S': ['mat:liquidwater'],\n", + " 'IEGPL000T': ['mat:liquidwater'],\n", + " 'IEGPL000U': ['mat:liquidwater'],\n", + " 'IEGPL000V': ['mat:liquidwater'],\n", + " 'IEGPL000W': ['mat:liquidwater'],\n", + " 'IEGPL000X': ['mat:liquidwater'],\n", + " 'IEGPL000Y': ['mat:liquidwater'],\n", + " 'IEGPL000Z': ['mat:liquidwater'],\n", + " 'IEGPL0010': ['mat:liquidwater'],\n", + " 'IEGPL0011': ['mat:liquidwater'],\n", + " 'IEGPL0012': ['mat:liquidwater'],\n", + " 'IEGPL0013': ['mat:liquidwater'],\n", + " 'IEGPL0014': ['mat:liquidwater'],\n", + " 'IEGPL0015': ['mat:liquidwater'],\n", + " 'IEGPL0016': ['mat:liquidwater'],\n", + " 'IEGPL0017': ['mat:liquidwater'],\n", + " 'IEGPL0018': ['mat:liquidwater'],\n", + " 'IEGPL0019': ['mat:liquidwater'],\n", + " 'IEGPL001A': ['mat:liquidwater'],\n", + " 'IEGPL001B': ['mat:liquidwater'],\n", + " 'IEGPL001C': ['mat:liquidwater'],\n", + " 'IEGPL001D': ['mat:liquidwater'],\n", + " 'IEGPL001E': ['mat:liquidwater'],\n", + " 'IEGPL0001': ['mat:liquidwater'],\n", + " 'IEGPL0002': ['mat:liquidwater'],\n", + " 'IEGPL0003': ['mat:liquidwater'],\n", + " 'IEGPL0004': ['mat:liquidwater'],\n", + " 'IEGPL0005': ['mat:liquidwater'],\n", + " 'IEGPL0006': ['mat:liquidwater'],\n", + " 'IEGPL0007': ['mat:liquidwater'],\n", + " 'IEGPL0008': ['mat:liquidwater'],\n", + " 'IEGPL0009': ['mat:liquidwater'],\n", + " 'IEUHM00SY': ['mat:sediment'],\n", + " 'IEAVO04TC': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM00OC': ['rksd:Basalt'],\n", + " 'KHR00001O': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'IEAVO03U3': ['mat:rock'],\n", + " 'IEDUT100U': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM006J': ['rksd:Basalt'],\n", + " 'URI0000FS': ['rksd:Generic Mudstone'],\n", + " 'IEDUT101J': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00SW': ['mat:sediment'],\n", + " 'IEUHM00UH': ['mat:sediment'],\n", + " 'IEUHM002P': ['rksd:Basalt'],\n", + " 'MGD00002E': ['mat:rock'],\n", + " 'IEACB0007': ['rksd:Tephra'],\n", + " 'IEDUT101C': ['mat:biogenicnonorganicmaterial'],\n", + " 'TOR0000IF': ['rksd:Basalt'],\n", + " 'IEAVO02X5': ['rksd:anorthositic rock'],\n", + " 'IEUHM008R': ['rksd:Basalt'],\n", + " 'URI0000EO': ['rksd:Generic Mudstone'],\n", + " 'IEAVO02ZT': ['mat:rock'],\n", + " 'IEUHM0080': ['rksd:Basalt'],\n", + " 'IEDUT1008': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM0091': ['rksd:Basalt'],\n", + " 'IEUHM009K': ['rksd:Basalt'],\n", + " 'IEDUT101I': ['mat:biogenicnonorganicmaterial'],\n", + " 'URI0000H4': ['rksd:Pyroclastic Rock'],\n", + " 'WDC00001Q': ['rksd:Mud Size Sediment'],\n", + " 'IEDUT100P': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100Q': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100R': ['mat:biogenicnonorganicmaterial'],\n", + " 'KHR00028H': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'URI0000F9': ['rksd:Generic Mudstone'],\n", + " 'IEDUT1016': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEAVO02AK': ['rksd:Tephra'],\n", + " 'IEDUT101K': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT101L': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT101M': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEAVO03BU': ['mat:rock'],\n", + " 'IEUHM00S0': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'URI0000GM': ['rksd:Metasomatic Rock'],\n", + " 'IEDUT1018': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT101O': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT101P': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT101Q': ['mat:biogenicnonorganicmaterial'],\n", + " 'BSU0005S8': ['rksd:Dioritoid'],\n", + " 'IEUHM00PH': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEDUT100X': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100Y': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100Z': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM008Z': ['rksd:Basalt'],\n", + " 'IEUHM0098': ['rksd:Basalt'],\n", + " 'IEUHM006I': ['rksd:Basalt'],\n", + " 'TOR0000I5': ['rksd:Basalt'],\n", + " 'URI0000GK': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'KHR00001Z': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'IEAVO02BL': ['rksd:Tephra'],\n", + " 'IEAVO058E': ['rksd:Pyroclastic Rock'],\n", + " 'IEUHM0090': ['rksd:Basalt'],\n", + " 'URI0000FN': ['rksd:Sedimentary Rock'],\n", + " 'TOR0000I7': ['rksd:Basalt'],\n", + " 'URI0000FJ': ['rksd:Sedimentary Rock'],\n", + " 'IECBJ000N': ['rksd:Dioritoid'],\n", + " 'IEUHM00OY': ['rksd:Basalt'],\n", + " 'IEUHM00P4': ['rksd:Basalt'],\n", + " 'TOR0000IC': ['rksd:Basalt'],\n", + " 'IEUHM00OZ': ['rksd:Basalt'],\n", + " 'IEDUT100A': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100B': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100H': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100I': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100J': ['mat:biogenicnonorganicmaterial'],\n", + " 'URI0000GB': ['rksd:Chemical Sedimentary Material'],\n", + " 'IECBJ000F': ['rksd:Dioritoid'],\n", + " 'IENTL0020': ['rksd:Generic Mudstone'],\n", + " 'IEUHM006B': ['rksd:Basalt'],\n", + " 'KHR000006': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'URI0000G9': ['rksd:Metasomatic Rock'],\n", + " 'IEMRS005H': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM002A': ['rksd:Basalt'],\n", + " 'MGD00002G': ['mat:rock'],\n", + " 'NHB003WIE': ['rksd:Porphyry'],\n", + " 'IEDUT100L': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100M': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100N': ['mat:biogenicnonorganicmaterial'],\n", + " 'KHR00024K': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'URI0000FW': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEAVO02BJ': ['rksd:Tephra'],\n", + " 'IEAVO02BK': ['rksd:Tephra'],\n", + " 'IEAVO02B3': ['rksd:Tephra'],\n", + " 'IEAVO02B4': ['rksd:Tephra'],\n", + " 'IEAVO02B5': ['rksd:Tephra'],\n", + " 'IEAVO02BI': ['rksd:Tephra'],\n", + " 'IECBJ000Q': ['rksd:Granitoid'],\n", + " 'URI0000P5': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO02B1': ['rksd:Tephra'],\n", + " 'IEAVO02B2': ['rksd:Tephra'],\n", + " 'IEAVO02B7': ['rksd:Tephra'],\n", + " 'IEAVO02B8': ['rksd:Tephra'],\n", + " 'IENTL001T': ['rksd:Generic Mudstone'],\n", + " 'TAP00007J': ['rksd:Tephra'],\n", + " 'URI0000FV': ['rksd:Sedimentary Rock'],\n", + " 'URI0000FL': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'URI0000NH': ['rksd:Fine Grained Igneous Rock'],\n", + " 'URI0000LP': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO02B0': ['rksd:Tephra'],\n", + " 'IECUR008F': ['rksd:Generic Sandstone'],\n", + " 'IEUHM0084': ['rksd:Basalt'],\n", + " 'SRH00015W': ['mat:rock'],\n", + " 'IEDUT100D': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100E': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100F': ['mat:biogenicnonorganicmaterial'],\n", + " 'URI0000IB': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO02B6': ['rksd:Tephra'],\n", + " 'IEAVO03V8': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEMRS005D': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS005E': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS005F': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00TM': ['mat:sediment'],\n", + " 'URI0000EM': ['rksd:Generic Mudstone'],\n", + " 'IEACB000K': ['rksd:Tephra'],\n", + " 'IEAVO03VE': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IECBJ000L': ['rksd:Granitoid'],\n", + " 'IEMRS005G': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM009E': ['rksd:Basalt'],\n", + " 'IEUHM00SG': ['mat:sediment'],\n", + " 'IEDUT100T': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT100V': ['mat:biogenicnonorganicmaterial'],\n", + " 'URI0000F5': ['rksd:Generic Mudstone'],\n", + " 'IEUHM00OJ': ['rksd:Basalt'],\n", + " 'IEUHM00VP': ['mat:sediment'],\n", + " 'IEUHM0082': ['rksd:Basalt'],\n", + " 'IEUHM00OR': ['rksd:Basalt'],\n", + " 'IEUHM001R': ['rksd:Basalt'],\n", + " 'IEAVO02BY': ['rksd:Tephra'],\n", + " 'IEAVO043E': ['rksd:Andesite'],\n", + " 'IEUHM007U': ['rksd:Basalt'],\n", + " 'IEUHM00SF': ['mat:sediment'],\n", + " 'IEUHM00S9': ['rksd:Gravel Size Sediment'],\n", + " 'IEUHM00SU': ['mat:sediment'],\n", + " 'IEUHM008L': ['rksd:Basalt'],\n", + " 'IEUHM00OH': ['rksd:Basalt'],\n", + " 'NHB004HAW': ['rksd:Granitoid'],\n", + " 'TAP00003Z': ['rksd:Clastic Sediment'],\n", + " 'IEUHM00SD': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00RE': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'KHR000014': ['rksd:Basalt'],\n", + " 'URI0000FQ': ['rksd:Generic Mudstone'],\n", + " 'IEUHM00SV': ['mat:sediment'],\n", + " 'TOR0000GM': ['rksd:Basalt'],\n", + " 'IEAVO02BD': ['rksd:Tephra'],\n", + " 'IEAVO03EP': ['rksd:Fine Grained Igneous Rock'],\n", + " 'URI0000GA': ['rksd:Sedimentary Rock'],\n", + " 'URI0000GF': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEAVO03F9': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IECBJ0001': ['rksd:Mylonitic Rock'],\n", + " 'IEUHM0088': ['rksd:Basalt'],\n", + " 'IEUHM00U4': ['rksd:Basalt'],\n", + " 'TAP00007L': ['rksd:Fine Grained Igneous Rock'],\n", + " 'BSU0006RR': ['rksd:Metamorphic Rock'],\n", + " 'IEAVO03FD': ['rksd:Pyroclastic Rock'],\n", + " 'IEUHM00T3': ['mat:sediment'],\n", + " 'IEUHM00PM': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'URI0000FP': ['rksd:Metasomatic Rock'],\n", + " 'IEUHM0085': ['rksd:Basalt'],\n", + " 'URI0000FC': ['rksd:Generic Mudstone'],\n", + " 'URI0000GN': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEAVO02AD': ['rksd:Tephra'],\n", + " 'IEUHM007B': ['rksd:Basalt'],\n", + " 'BSU00065O': ['rksd:Metamorphic Rock'],\n", + " 'TAP00007K': ['rksd:Tephra'],\n", + " 'URI0000NY': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO040A': ['rksd:Andesite'],\n", + " 'IEUHM007H': ['rksd:Basalt'],\n", + " 'IEUHM00TJ': ['mat:sediment'],\n", + " 'IEUHM00VJ': ['mat:rock'],\n", + " 'IEAVO042E': ['rksd:Andesite'],\n", + " 'IEUHM00SB': ['mat:rockorsediment'],\n", + " 'IEUHM00TX': ['rksd:Carbonate Sedimentary Rock', 'rksd:Generic Mudstone'],\n", + " 'MGD00001P': ['mat:rock'],\n", + " 'TOR0000HY': ['rksd:Basalt'],\n", + " 'IEAVO02WT': ['mat:rock'],\n", + " 'IEUHM00SS': ['rksd:Carbonate Sediment'],\n", + " 'IEACB000M': ['rksd:Tephra'],\n", + " 'IECUR001B': ['rksd:Generic Sandstone'],\n", + " 'IEUHM00O8': ['rksd:Basalt'],\n", + " 'IEUHM00U2': ['mat:sediment'],\n", + " 'URI0000EE': ['rksd:Metasomatic Rock'],\n", + " 'URI0000G5': ['rksd:Metasomatic Rock'],\n", + " 'IEAVO03BT': ['mat:rock'],\n", + " 'IEUHM00PO': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00TY': ['mat:sediment'],\n", + " 'URI0000GC': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEAVO03G7': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM00SE': ['mat:rockorsediment'],\n", + " 'IEUHM002I': ['rksd:Basalt'],\n", + " 'IEAVO02AE': ['rksd:Tephra'],\n", + " 'URI0000MB': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM0094': ['rksd:Basalt'],\n", + " 'IEUHM00T4': ['mat:sediment'],\n", + " 'UHM000EC1': ['mat:biogenicnonorganicmaterial'],\n", + " 'URI0000P8': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM00S4': ['rksd:Metasomatic Rock'],\n", + " 'SRH00015V': ['mat:rock'],\n", + " 'IECUR00D0': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM008E': ['rksd:Basalt'],\n", + " 'URI0000GH': ['rksd:Metasomatic Rock'],\n", + " 'PRR000901': ['rksd:Chemical Sedimentary Material'],\n", + " 'IEAVO0438': ['rksd:Andesite'],\n", + " 'IEUHM00P1': ['rksd:Basalt'],\n", + " 'IEUHM00NE': ['rksd:Basalt'],\n", + " 'IEUHM00TI': ['mat:rockorsediment'],\n", + " 'IEUHM00R3': ['rksd:Metasomatic Rock'],\n", + " 'TAP00003A': ['rksd:Tephra'],\n", + " 'IEUHM00OP': ['rksd:Basalt'],\n", + " 'IEUHM00SZ': ['mat:sediment'],\n", + " 'IEUHM00S5': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM002J': ['rksd:Basalt'],\n", + " 'MGD000BB3': ['mat:organicmaterial'],\n", + " 'MGD000BB4': ['mat:organicmaterial'],\n", + " 'IEDUT101N': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEAVO02CH': ['rksd:Tephra'],\n", + " 'IEAVO02CI': ['rksd:Tephra'],\n", + " 'IEUHM00OM': ['rksd:Basalt'],\n", + " 'IEUHM00SN': ['mat:sediment'],\n", + " 'IEUHM00TZ': ['mat:sediment'],\n", + " 'BSU00065M': ['rksd:Metamorphic Rock'],\n", + " 'IEAVO02X0': ['mat:rock'],\n", + " 'IEUHM00T9': ['mat:sediment'],\n", + " 'TAP00004L': ['rksd:Tephra'],\n", + " 'IEAVO02CG': ['rksd:Tephra'],\n", + " 'BSU00065N': ['rksd:Metamorphic Rock'],\n", + " 'IEUHM008B': ['rksd:Basalt'],\n", + " 'BSU00065K': ['rksd:Metamorphic Rock'],\n", + " 'IEAVO042S': ['rksd:Andesite'],\n", + " 'IEDUT1001': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00RQ': ['rksd:Metasomatic Rock'],\n", + " 'TOR0000GS': ['rksd:Basalt'],\n", + " 'URI0000G4': ['rksd:Metasomatic Rock'],\n", + " 'URI0000PC': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO042R': ['rksd:Andesite'],\n", + " 'IEUHM00VS': ['rksd:Basalt'],\n", + " 'KHR000005': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'KHR00001W': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'TOR0000GT': ['rksd:Basalt'],\n", + " 'BSU00065L': ['rksd:Metamorphic Rock'],\n", + " 'IEAVO0404': ['rksd:Andesite'],\n", + " 'IEUHM007V': ['rksd:Basalt'],\n", + " 'URI0000GJ': ['rksd:Metasomatic Rock'],\n", + " 'NHB006G89': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'NHB006G8B': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'URI0000NM': ['rksd:Hybrid Sedimentary Rock'],\n", + " 'IEPAP0025': ['rksd:Generic Conglomerate'],\n", + " 'IEUHM00T2': ['mat:sediment'],\n", + " 'TOR0000I4': ['rksd:Basalt'],\n", + " 'URI0000P7': ['rksd:Fine Grained Igneous Rock'],\n", + " 'WDC0000BA': ['mat:sediment'],\n", + " 'IENTL000Q': ['rksd:Generic Sandstone'],\n", + " 'IEUHM002F': ['rksd:Basalt'],\n", + " 'KHR00028W': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'NHB0003GX': ['ming:nativeelementmineral'],\n", + " 'TOR0000HS': ['rksd:Basalt'],\n", + " 'IEUHM00OE': ['rksd:Basalt'],\n", + " 'IEUHM00TC': ['mat:sediment'],\n", + " 'IEUHM00U0': ['mat:sediment'],\n", + " 'URI0000NG': ['rksd:Hybrid Sedimentary Rock'],\n", + " 'IEREX0002': ['mat:organicmaterial'],\n", + " 'URI0000IC': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEMRS0059': ['mat:biogenicnonorganicmaterial'],\n", + " 'KHR00001R': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'TOR0000HO': ['rksd:Metasomatic Rock'],\n", + " 'IEUHM007T': ['rksd:Basalt'],\n", + " 'IEUHM00TW': ['mat:sediment'],\n", + " 'IEUHM00S2': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'TAP00007I': ['rksd:Fine Grained Igneous Rock'],\n", + " 'TOR0000FV': ['rksd:Metamorphic Rock'],\n", + " 'IEAVO058W': ['rksd:Pyroclastic Rock'],\n", + " 'IEUHM00RV': ['rksd:Pyroclastic Rock'],\n", + " 'TOR0000GQ': ['rksd:Basalt'],\n", + " 'URI0000FD': ['rksd:Generic Mudstone'],\n", + " 'URI0000LU': ['rksd:Sedimentary Rock'],\n", + " 'IEAVO02AF': ['rksd:Tephra'],\n", + " 'IENTL001J': ['rksd:Generic Mudstone'],\n", + " 'PRR042033': ['rksd:Acidic Igneous Rock'],\n", + " 'IEAVO02X3': ['mat:rock'],\n", + " 'IEAVO02X4': ['mat:rock'],\n", + " 'IEAVO03MY': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM00SC': ['mat:sediment'],\n", + " 'IEUHM00RY': ['mat:rockorsediment'],\n", + " 'IEUHM0016': ['mat:rock'],\n", + " 'IEUHM00RA': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'URI0000N1': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NO': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM00VO': ['mat:sediment'],\n", + " 'MGD00002A': ['mat:rock'],\n", + " 'BSU00001A': ['rksd:Rhyolitoid'],\n", + " 'BSU00001B': ['rksd:Rhyolitoid'],\n", + " 'BSU00001C': ['rksd:Rhyolitoid'],\n", + " 'BSU00001D': ['rksd:Rhyolitoid'],\n", + " 'BSU00001E': ['rksd:Rhyolitoid'],\n", + " 'BSU00001F': ['rksd:Rhyolitoid'],\n", + " 'BSU00001G': ['rksd:Rhyolitoid'],\n", + " 'BSU00001H': ['rksd:Rhyolitoid'],\n", + " 'BSU00001I': ['rksd:Rhyolitoid'],\n", + " 'IEUHM00S6': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEAVO02U2': ['mat:rock'],\n", + " 'IEUHM008V': ['rksd:Basalt'],\n", + " 'BSU000019': ['rksd:Rhyolitoid'],\n", + " 'BSU000018': ['rksd:Rhyolitoid'],\n", + " 'IEAVO03TY': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03ZP': ['rksd:Andesite'],\n", + " 'URI0000H1': ['rksd:Pyroclastic Rock'],\n", + " 'URI0000LJ': ['rksd:Fine Grained Igneous Rock'],\n", + " 'BSU0005KK': ['rksd:Metamorphic Rock'],\n", + " 'IEAVO03V7': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM00SO': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'TOR0000GV': ['rksd:Andesite'],\n", + " 'IENTL0003': ['rksd:Generic Sandstone'],\n", + " 'IEUHM00TK': ['mat:sediment'],\n", + " 'IEDUT100O': ['mat:biogenicnonorganicmaterial'],\n", + " 'TOR0000G6': ['rksd:Basalt'],\n", + " 'TOR0000I6': ['rksd:Basalt'],\n", + " 'IEMRS0057': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS0058': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM0089': ['rksd:Basalt'],\n", + " 'IEUHM009N': ['rksd:Basalt'],\n", + " 'IEUHM00TR': ['mat:sediment'],\n", + " 'NHB0052UT': ['rksd:Metasomatic Rock'],\n", + " 'NHB004BHA': ['rksd:Metasomatic Rock'],\n", + " 'SRH00015Q': ['mat:rock'],\n", + " 'BSU00065J': ['rksd:Fragmental Igneous Rock'],\n", + " 'BSU0005KO': ['rksd:Metamorphic Rock'],\n", + " 'KHR000311': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'IEUHM0020': ['rksd:Basalt'],\n", + " 'IEDUT1000': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM009M': ['rksd:Basalt'],\n", + " 'IEUHM00S3': ['rksd:Generic Mudstone'],\n", + " 'IEAVO02Z8': ['rksd:Igneous Rock'],\n", + " 'IENTL000H': ['rksd:Generic Sandstone'],\n", + " 'IEROY000A': ['ocmat:charcoal'],\n", + " 'IEROY000B': ['ocmat:charcoal'],\n", + " 'IEROY000C': ['ocmat:charcoal'],\n", + " 'IEROY000D': ['ocmat:charcoal'],\n", + " 'IEROY000E': ['ocmat:charcoal'],\n", + " 'IEROY000G': ['ocmat:charcoal'],\n", + " 'IEROY000H': ['ocmat:charcoal'],\n", + " 'IEROY000I': ['ocmat:charcoal'],\n", + " 'IEROY000J': ['ocmat:charcoal'],\n", + " 'IEROY000K': ['ocmat:charcoal'],\n", + " 'IEROY000L': ['ocmat:charcoal'],\n", + " 'IEROY000M': ['ocmat:charcoal'],\n", + " 'IEROY000N': ['ocmat:charcoal'],\n", + " 'IEROY000O': ['ocmat:charcoal'],\n", + " 'IEROY000P': ['ocmat:charcoal'],\n", + " 'IEROY000Q': ['ocmat:charcoal'],\n", + " 'IEROY000R': ['ocmat:charcoal'],\n", + " 'IEROY000S': ['ocmat:charcoal'],\n", + " 'IEUHM00PB': ['rksd:Basalt'],\n", + " 'IEUHM00PW': ['rksd:Basalt'],\n", + " 'MGD00001S': ['mat:rock'],\n", + " 'IEAVO02C6': ['rksd:Tephra'],\n", + " 'IEAVO02C7': ['rksd:Tephra'],\n", + " 'IEAVO02C8': ['rksd:Tephra'],\n", + " 'IEAVO03IQ': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEROY0003': ['ocmat:charcoal'],\n", + " 'IEROY0005': ['ocmat:charcoal'],\n", + " 'IEROY0006': ['ocmat:charcoal'],\n", + " 'IEROY0007': ['ocmat:charcoal'],\n", + " 'IEROY0008': ['ocmat:charcoal'],\n", + " 'IEROY0009': ['ocmat:charcoal'],\n", + " 'IEUHM008I': ['rksd:Basalt'],\n", + " 'KHR00027X': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'MGD00003R': ['mat:liquidwater'],\n", + " 'IEAVO04IP': ['rksd:Tephra'],\n", + " 'IEROY0001': ['ocmat:charcoal'],\n", + " 'IEROY000F': ['ocmat:charcoal'],\n", + " 'IEROY0004': ['ocmat:charcoal'],\n", + " 'IEUHM008W': ['rksd:Basalt'],\n", + " 'IEUHM008U': ['rksd:Basalt'],\n", + " 'IEUHM00UA': ['mat:sediment'],\n", + " 'IEDUT1005': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT1006': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT1007': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEAVO02C5': ['rksd:Tephra'],\n", + " 'IEUHM007S': ['rksd:Basalt'],\n", + " 'IEUHM0096': ['rksd:Basalt'],\n", + " 'KHR00001L': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'TOR0000HN': ['rksd:Basalt'],\n", + " 'URI0000NZ': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM007G': ['rksd:Basalt', 'rksd:Glass Rich Igneous Rock'],\n", + " 'URI0000GZ': ['rksd:Pyroclastic Rock'],\n", + " 'URI0000H0': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO03ZO': ['rksd:Pyroclastic Rock'],\n", + " 'IEDS1000F': ['rksd:Pyroclastic Rock'],\n", + " 'IEUHM00OQ': ['rksd:Basalt'],\n", + " 'URI0000EL': ['rksd:Generic Mudstone'],\n", + " 'IEUHM00OV': ['rksd:Basalt'],\n", + " 'IEUHM00OD': ['rksd:Basalt'],\n", + " 'IEUHM00P3': ['rksd:Basalt'],\n", + " 'IEUHM00V1': ['mat:sediment'],\n", + " 'PRR041908': ['rksd:Acidic Igneous Rock'],\n", + " 'TAP00007D': ['rksd:Tephra'],\n", + " 'URI0000H5': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO02AS': ['rksd:Tephra'],\n", + " 'IEAVO02AT': ['rksd:Tephra'],\n", + " 'IEUHM00PN': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM006K': ['rksd:Basalt'],\n", + " 'TAP00002P': ['rksd:Tephra'],\n", + " 'WDC00001P': ['rksd:Mud Size Sediment'],\n", + " 'IEAVO03NP': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NR': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NS': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NT': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NU': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NV': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NW': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO03NX': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO04L4': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IECUR00FM': ['rksd:Generic Sandstone'],\n", + " 'IESAG0012': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'KHR00001Q': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'MGD00002D': ['mat:rock'],\n", + " 'NHB001NWZ': ['ming:oxidemineral'],\n", + " 'IEAVO02AR': ['rksd:Tephra'],\n", + " 'IESAG0011': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IESAG0013': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'TOR0000IB': ['rksd:Basalt'],\n", + " 'BSU000017': ['rksd:Rhyolitoid'],\n", + " 'IESAG000V': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IESAG000W': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IESAG000X': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IESAG000Y': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IESAG000Z': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IESAG0010': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'SRH00015U': ['mat:rock'],\n", + " 'BSU0005ZU': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO02CK': ['rksd:Tephra'],\n", + " 'IEAVO02CL': ['rksd:Tephra'],\n", + " 'IEMRS0065': ['mat:biogenicnonorganicmaterial'],\n", + " 'IESAG000U': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00SR': ['rksd:Carbonate Sedimentary Rock', 'rksd:Generic Mudstone'],\n", + " 'IEUHM00T8': ['mat:sediment'],\n", + " 'NHB0073XU': ['rksd:Non Clastic Siliceous Sedimentary Rock'],\n", + " 'IEDS1000H': ['rksd:Pyroclastic Rock'],\n", + " 'IEUHM007Y': ['rksd:Basalt'],\n", + " 'IEUHM00SM': ['rksd:Generic Mudstone'],\n", + " 'PRR055764': ['rksd:Clastic Sedimentary Rock'],\n", + " 'IEAVO02CJ': ['rksd:Tephra'],\n", + " 'IEAVO02XE': ['mat:rock'],\n", + " 'IECUR0002': ['rksd:Metamorphic Rock'],\n", + " 'IEUHM008F': ['rksd:Basalt'],\n", + " 'IEUHM00SA': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00TH': ['mat:sediment'],\n", + " 'IEUHM00TP': ['mat:sediment'],\n", + " 'WDC0000BO': ['mat:sediment'],\n", + " 'IEAVO03MF': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM007J': ['rksd:Basalt'],\n", + " 'IEUHM00T1': ['mat:sediment'],\n", + " 'IEUHM00U7': ['rksd:Basalt'],\n", + " 'IEAVO03MK': ['rksd:Fine Grained Igneous Rock'],\n", + " 'URI0000FZ': ['rksd:Generic Mudstone'],\n", + " 'WDC000013': ['rksd:Mud Size Sediment'],\n", + " 'BSU00065H': ['rksd:Fragmental Igneous Rock'],\n", + " 'IEAVO03BV': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM00PD': ['rksd:Basalt'],\n", + " 'NHB006NBC': ['rksd:Generic Mudstone'],\n", + " 'IEDUT1002': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEDUT1003': ['mat:biogenicnonorganicmaterial'],\n", + " 'TOR0000FK': ['rksd:Andesite'],\n", + " 'IEUHM008O': ['rksd:Basalt'],\n", + " 'URI0000PB': ['rksd:Chemical Sedimentary Material'],\n", + " 'WDC0000BB': ['mat:sediment'],\n", + " 'BSU00065I': ['rksd:Fragmental Igneous Rock'],\n", + " 'IEMRS0055': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEMRS0056': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM00TA': ['mat:sediment'],\n", + " 'IEUHM00TN': ['mat:sediment'],\n", + " 'NHB003VJS': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'TOR0000HX': ['rksd:Basalt'],\n", + " 'IEAVO02AG': ['rksd:Tephra'],\n", + " 'IEAVO04T1': ['rksd:Pyroclastic Rock'],\n", + " 'IECBJ000E': ['rksd:Metamorphic Rock'],\n", + " 'IEUHM009L': ['rksd:Basalt'],\n", + " 'IEUHM00O9': ['rksd:Basalt'],\n", + " 'IEUHM00RZ': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00RM': ['rksd:Metasomatic Rock'],\n", + " 'KHR00026C': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'URI0000GV': ['rksd:Pyroclastic Rock'],\n", + " 'BSU00065F': ['rksd:Fragmental Igneous Rock'],\n", + " 'IEUHM007E': ['rksd:Basalt'],\n", + " 'IEUHM0099': ['rksd:Basalt'],\n", + " 'IEUHM009P': ['rksd:Basalt'],\n", + " 'IEUHM00RF': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'KHR00024R': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'URI0000FA': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'NHB006NCJ': ['rksd:Generic Mudstone'],\n", + " 'DSR0000TM': ['mat:organicmaterial'],\n", + " 'IEUHM00OU': ['rksd:Basalt'],\n", + " 'IEUHM00SL': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'TOR0000FU': ['rksd:Basalt'],\n", + " 'BSU00065G': ['rksd:Fragmental Igneous Rock'],\n", + " 'IEDS1000D': ['rksd:Pyroclastic Rock'],\n", + " 'IENTL0026': ['rksd:Generic Sandstone'],\n", + " 'IEUHM0083': ['rksd:Basalt', 'rksd:Glass Rich Igneous Rock'],\n", + " 'IEUHM00TB': ['mat:sediment'],\n", + " 'IEUHM00RG': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00RB': ['rksd:Carbonate Sediment'],\n", + " 'URI0000GW': ['rksd:Pyroclastic Rock'],\n", + " 'URI0000MN': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO02AV': ['rksd:Tephra'],\n", + " 'IEAVO02AW': ['rksd:Tephra'],\n", + " 'IEUHM009D': ['rksd:Basalt'],\n", + " 'IEUHM00P5': ['rksd:Basalt'],\n", + " 'IEUHM00ST': ['mat:organicmaterial'],\n", + " 'IEUHM00TS': ['mat:sediment'],\n", + " 'IEUHM001S': ['rksd:Doleritic Rock'],\n", + " 'IEUHM00RL': ['rksd:Metasomatic Rock'],\n", + " 'PRR053135': ['rksd:Basalt'],\n", + " 'SRH000161': ['mat:rock'],\n", + " 'TOR0000HB': ['rksd:Andesite'],\n", + " 'IEAVO02AU': ['rksd:Tephra'],\n", + " 'IENTL001I': ['rksd:Generic Mudstone'],\n", + " 'IEMRS0068': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEPAP0035': ['rksd:Generic Mudstone'],\n", + " 'IEUHM007R': ['rksd:Basalt'],\n", + " 'IEAVO03GL': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM00RU': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00S7': ['mat:rockorsediment'],\n", + " 'PRR021194': ['rksd:Doleritic Rock'],\n", + " 'PRR038014': ['rksd:Porphyry'],\n", + " 'PRR042034': ['rksd:Breccia'],\n", + " 'IEAVO03ZN': ['rksd:Andesite'],\n", + " 'IENTL011Z': ['rksd:Sedimentary Rock'],\n", + " 'IEUHM0081': ['rksd:Basalt'],\n", + " 'PRR036392': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'TOR0000IE': ['rksd:Basalt'],\n", + " 'IEAVO02AO': ['rksd:Tephra'],\n", + " 'IEAVO03ZT': ['rksd:Andesite'],\n", + " 'IEUHM00RC': ['mat:biogenicnonorganicmaterial'],\n", + " 'KHR00024J': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'TAP00002G': ['rksd:Pyroclastic Rock'],\n", + " 'UHM000EC2': ['mat:biogenicnonorganicmaterial'],\n", + " 'URI0000EG': ['rksd:Basalt'],\n", + " 'WDC0000BC': ['mat:sediment'],\n", + " 'WDC0000BJ': ['mat:sediment'],\n", + " 'BSU00000B': ['rksd:Rhyolitoid'],\n", + " 'IEAVO02BX': ['rksd:Tephra'],\n", + " 'IENTL011W': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011J': ['rksd:Sedimentary Rock'],\n", + " 'IENTL011K': ['rksd:Sedimentary Rock'],\n", + " 'IEROY0002': ['ocmat:charcoal'],\n", + " 'IEUHM00SJ': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'IEUHM00TU': ['mat:sediment'],\n", + " 'KHR00001J': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'KHR000309': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'PRR050547': ['rksd:Metamorphic Rock'],\n", + " 'BSU0005ZS': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO03M9': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEUHM00RT': ['rksd:Sand Size Sediment'],\n", + " 'IEUHM00SH': ['mat:sediment'],\n", + " 'IEUHM00T6': ['mat:sediment'],\n", + " 'KHR000259': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'KHR00027U': ['mat:sediment'],\n", + " 'KHR00028F': ['rksd:High Magnesium Fine Grained Igneous Rock'],\n", + " 'TAP00007C': ['rksd:Tephra'],\n", + " 'URI0000MR': ['rksd:Breccia'],\n", + " 'IEAVO02ZV': ['mat:rock'],\n", + " 'IEMRS0066': ['mat:biogenicnonorganicmaterial'],\n", + " 'IEUHM008Q': ['rksd:Basalt'],\n", + " 'IEUHM00T5': ['mat:organicmaterial'],\n", + " 'IEUHM00S1': ['mat:sediment'],\n", + " 'TOR0000GP': ['rksd:Basalt'],\n", + " 'BSU0005ZT': ['rksd:Pyroclastic Rock'],\n", + " 'IEUHM00S8': ['rksd:Carbonate Sedimentary Rock'],\n", + " 'PRR050548': ['rksd:Metamorphic Rock'],\n", + " 'IEUHM00VA': ['mat:sediment'],\n", + " 'NHB004Y2X': ['rksd:Metasomatic Rock'],\n", + " 'BSU0005ZQ': ['rksd:Pyroclastic Rock'],\n", + " 'EOI0000G8': ['rksd:Massive Sulphide'],\n", + " 'IESPB000P': ['mat:organicmaterial'],\n", + " 'IEUHM0037': ['rksd:Basalt'],\n", + " 'URI0000MP': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO0402': ['rksd:Andesite'],\n", + " 'BSU0005ZR': ['rksd:Pyroclastic Rock'],\n", + " 'IEAVO02BW': ['rksd:Tephra'],\n", + " 'NHB001B71': ['mat:material'],\n", + " 'NHB004Y2W': ['rksd:Metasomatic Rock'],\n", + " 'PRR037322': ['rksd:Granitoid'],\n", + " 'SRH00015T': ['mat:rock'],\n", + " 'IAC000010': ['rksd:Fine Grained Igneous Rock'],\n", + " 'IEAVO041W': ['rksd:Pyroclastic Rock'],\n", + " 'IEDDM000K': ['ming:silicategermanatemineral'],\n", + " 'IEMRS0067': ['mat:organicmaterial'],\n", + " 'IEUHM00UL': ['rksd:Carbonate Sediment'],\n", + " 'URI0000FX': ['rksd:Generic Mudstone'],\n", + " 'IEDDM000I': ['ming:silicategermanatemineral'],\n", + " 'IEDS1000G': ['rksd:Pyroclastic Rock'],\n", + " 'IENTL000A': ['rksd:Generic Mudstone'],\n", + " 'IEPAP0021': ['rksd:Generic Conglomerate'],\n", + " 'IEUHM00UE': ['mat:sediment'],\n", + " 'BSU000016': ['rksd:Rhyolitoid'],\n", + " 'BSU0005U8': ['rksd:Pyroclastic Rock'],\n", + " 'BSU0005KM': ['rksd:Fragmental Igneous Rock'],\n", + " 'GRD026SS0': ['rksd:Sedimentary Rock'],\n", + " ...}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "label_map" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "0fa15c4e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'IEUHM001M': 'mat:rock',\n", + " 'IEAVO02WU': 'mat:rock',\n", + " 'IEUHM002S': 'mat:rock',\n", + " 'DSR0000TI': 'mat:rock',\n", + " 'IEUHM001Q': 'mat:rock',\n", + " 'DSR0000TP': 'mat:rock',\n", + " 'IEUHM002Q': 'mat:rock',\n", + " 'IEUHM002M': 'mat:mixedsoilsedimentrock',\n", + " 'IEUHM001P': 'mat:rock',\n", + " 'IEUHM00R8': 'mat:biogenicnonorganicmaterial',\n", + " 'IEAVO02ZC': 'mat:rock',\n", + " 'IEAVO02ZE': 'mat:rock',\n", + " 'IEAVO02ZG': 'mat:rock',\n", + " 'DSR0000TL': 'mat:rock',\n", + " 'IEUHM002U': 'mat:rock',\n", + " 'IEAVO02ZD': 'mat:rock',\n", + " 'IEAVO02ZF': 'mat:rock',\n", + " 'IEAVO02ZB': 'mat:rock',\n", + " 'TOR0000GW': 'mat:rock',\n", + " 'TOR0000I9': 'mat:rock',\n", + " 'URI0000ET': 'mat:rock',\n", + " 'IEUHM002V': 'mat:rock',\n", + " 'IEUHM001N': 'mat:rock',\n", + " 'IEAVO02X2': 'mat:rock',\n", + " 'IEAVO02X1': 'mat:rock',\n", + " 'IEUHM00PS': 'mat:rock',\n", + " 'IEUHM0061': 'mat:rock',\n", + " 'URI0000FF': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00PE': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000GG': 'mat:biogenicnonorganicmaterial',\n", + " 'IEAVO04HS': 'mat:rock',\n", + " 'IEAVO04HR': 'mat:rock',\n", + " 'URI0000FI': 'mat:rock',\n", + " 'IEAVO03NB': 'mat:rock',\n", + " 'IEAVO03NJ': 'mat:rock',\n", + " 'IEAVO03NN': 'mat:rock',\n", + " 'IEMRS004N': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS004P': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS004T': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM0068': 'mat:rock',\n", + " 'IEMRS004U': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS004O': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS004S': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS004Q': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS004R': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000EW': 'mat:rock',\n", + " 'IEUHM00PU': 'mat:soil',\n", + " 'URI0000FT': 'mat:rock',\n", + " 'KHR00000F': 'mat:rock',\n", + " 'KHR000019': 'mat:rock',\n", + " 'IEUHM006H': 'mat:rock',\n", + " 'URI0000F8': 'mat:rock',\n", + " 'URI0000FK': 'mat:rock',\n", + " 'URI0000ER': 'mat:rock',\n", + " 'IEUHM002R': 'mat:rock',\n", + " 'IEUHM0060': 'mat:rock',\n", + " 'SRH000163': 'mat:rock',\n", + " 'IEUHM002T': 'mat:rock',\n", + " 'IEUHM002O': 'mat:rock',\n", + " 'IEUHM006M': 'mat:rock',\n", + " 'TOR0000IA': 'mat:rock',\n", + " 'URI0000EQ': 'mat:rock',\n", + " 'URI0000F6': 'mat:rock',\n", + " 'IEUHM007O': 'mat:rock',\n", + " 'TAP00003Y': 'mat:sediment',\n", + " 'IEUHM00OB': 'mat:rock',\n", + " 'URI0000FM': 'mat:rock',\n", + " 'URI0000ES': 'mat:rock',\n", + " 'IEAVO02XF': 'mat:rock',\n", + " 'TAP00003W': 'mat:rockorsediment',\n", + " 'IEAVO03NK': 'mat:rock',\n", + " 'TAP00007M': 'mat:rockorsediment',\n", + " 'IENTL002R': 'mat:rock',\n", + " 'URI0000GQ': 'mat:rock',\n", + " 'BSU0005YV': 'mat:rock',\n", + " 'WDC0000BP': 'mat:sediment',\n", + " 'IENTL000P': 'mat:rock',\n", + " 'IEAVO03I4': 'mat:rock',\n", + " 'IEUHM001O': 'mat:rock',\n", + " 'IEUHM0067': 'mat:rock',\n", + " 'IEUHM0066': 'mat:rock',\n", + " 'URI0000FB': 'mat:rock',\n", + " 'KHR00000D': 'mat:rock',\n", + " 'BSU0005YT': 'mat:rock',\n", + " 'URI0000FO': 'mat:rock',\n", + " 'BSU0005YU': 'mat:rock',\n", + " 'IEAVO02WY': 'mat:rock',\n", + " 'IEAVO02WZ': 'mat:rock',\n", + " 'BSU0005YR': 'mat:rock',\n", + " 'IEDUT100K': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS004Z': 'mat:biogenicnonorganicmaterial',\n", + " 'BSU0005YS': 'mat:rock',\n", + " 'IENTL002D': 'mat:rock',\n", + " 'URI0000G6': 'mat:rock',\n", + " 'BSU0005U1': 'mat:rock',\n", + " 'URI0000G8': 'mat:rock',\n", + " 'WDC0000A5': 'mat:sediment',\n", + " 'IEMRS0050': 'mat:biogenicnonorganicmaterial',\n", + " 'SSH00038T': 'mat:soil',\n", + " 'URI0000EP': 'mat:rock',\n", + " 'IEMRS0052': 'mat:organicmaterial',\n", + " 'IEMRS0051': 'mat:organicmaterial',\n", + " 'IEMRS0053': 'mat:organicmaterial',\n", + " 'MGD0009UK': 'mat:rock',\n", + " 'KHR000275': 'mat:rock',\n", + " 'URI0000G0': 'mat:rock',\n", + " 'IEAVO05B1': 'mat:rock',\n", + " 'IEAVO05B5': 'mat:rock',\n", + " 'IENTL002J': 'mat:rock',\n", + " 'IEAVO03ND': 'mat:rock',\n", + " 'IEAVO02WO': 'mat:rock',\n", + " 'IEAVO03NM': 'mat:rock',\n", + " 'IENTL010Q': 'mat:rock',\n", + " 'IENTL010Y': 'mat:rock',\n", + " 'IENTL0111': 'mat:rock',\n", + " 'WDC0000BR': 'mat:sediment',\n", + " 'IENTL010B': 'mat:rock',\n", + " 'IENTL010H': 'mat:rock',\n", + " 'IENTL010I': 'mat:rock',\n", + " 'IENTL010M': 'mat:rock',\n", + " 'IENTL010N': 'mat:rock',\n", + " 'IENTL010P': 'mat:rock',\n", + " 'IENTL010R': 'mat:rock',\n", + " 'IENTL010T': 'mat:rock',\n", + " 'IENTL010U': 'mat:rock',\n", + " 'IENTL010V': 'mat:rock',\n", + " 'IENTL010W': 'mat:rock',\n", + " 'IENTL010X': 'mat:rock',\n", + " 'IENTL010Z': 'mat:rock',\n", + " 'IENTL0110': 'mat:rock',\n", + " 'IENTL0112': 'mat:rock',\n", + " 'IEAVO03NI': 'mat:rock',\n", + " 'IENTL010C': 'mat:rock',\n", + " 'IENTL010D': 'mat:rock',\n", + " 'IENTL010E': 'mat:rock',\n", + " 'IENTL010F': 'mat:rock',\n", + " 'IENTL010G': 'mat:rock',\n", + " 'IENTL010J': 'mat:rock',\n", + " 'IENTL010K': 'mat:rock',\n", + " 'IENTL010L': 'mat:rock',\n", + " 'IENTL010O': 'mat:rock',\n", + " 'IENTL010S': 'mat:rock',\n", + " 'IENTL0113': 'mat:rock',\n", + " 'IENTL0114': 'mat:rock',\n", + " 'IENTL0115': 'mat:rock',\n", + " 'IENTL0108': 'mat:rock',\n", + " 'IENTL0103': 'mat:rock',\n", + " 'IENTL0104': 'mat:rock',\n", + " 'IENTL0105': 'mat:rock',\n", + " 'IENTL0107': 'mat:rock',\n", + " 'IENTL0109': 'mat:rock',\n", + " 'IENTL010A': 'mat:rock',\n", + " 'IENTL0116': 'mat:rock',\n", + " 'IENTL0106': 'mat:rock',\n", + " 'IENTL0102': 'mat:rock',\n", + " 'IEUHM0062': 'mat:rock',\n", + " 'IEUHM006F': 'mat:rock',\n", + " 'BSU0005Z5': 'mat:rock',\n", + " 'IEAVO0590': 'mat:rock',\n", + " 'IENTL001M': 'mat:rock',\n", + " 'URI0000GP': 'mat:rock',\n", + " 'IEUHM006C': 'mat:rock',\n", + " 'TOR0000IH': 'mat:rock',\n", + " 'BSU0005Z0': 'mat:rock',\n", + " 'IENTL011G': 'mat:rock',\n", + " 'IENTL011H': 'mat:rock',\n", + " 'IENTL011I': 'mat:rock',\n", + " 'IENTL0117': 'mat:rock',\n", + " 'IENTL0118': 'mat:rock',\n", + " 'IENTL0119': 'mat:rock',\n", + " 'IENTL011A': 'mat:rock',\n", + " 'IENTL011B': 'mat:rock',\n", + " 'IENTL011C': 'mat:rock',\n", + " 'IENTL011D': 'mat:rock',\n", + " 'IENTL011E': 'mat:rock',\n", + " 'IENTL011F': 'mat:rock',\n", + " 'IEUHM00SX': 'mat:rock',\n", + " 'IEUHM00PI': 'mat:rock',\n", + " 'BSU0005Z3': 'mat:rock',\n", + " 'BSU0005Z4': 'mat:rock',\n", + " 'WDC0000BQ': 'mat:sediment',\n", + " 'IECUR0003': 'mat:otheranthropogenicmaterial',\n", + " 'BSU0005YY': 'mat:rock',\n", + " 'BSU0005Z1': 'mat:rock',\n", + " 'IEUHM00PT': 'mat:soil',\n", + " 'BSU0005YZ': 'mat:rock',\n", + " 'BSU0005Z2': 'mat:rock',\n", + " 'BSU0005YW': 'mat:rock',\n", + " 'BSU0005U3': 'mat:rock',\n", + " 'BSU0005YX': 'mat:rock',\n", + " 'IEAVO03NQ': 'mat:rock',\n", + " 'BSU0005U2': 'mat:rock',\n", + " 'TOR0000GX': 'mat:rock',\n", + " 'MGD0001BC': 'mat:sediment',\n", + " 'IEUHM00RW': 'mat:rock',\n", + " 'IEUHM006Q': 'mat:rock',\n", + " 'URI0000FE': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS0060': 'mat:organicmaterial',\n", + " 'URI0000FH': 'mat:rock',\n", + " 'IEUHM006P': 'mat:rock',\n", + " 'IEUHM00PL': 'mat:sediment',\n", + " 'IEUHM00PJ': 'mat:rock',\n", + " 'IEDUT100G': 'mat:biogenicnonorganicmaterial',\n", + " 'IENTL000T': 'mat:rock',\n", + " 'IEMRS0049': 'mat:biogenicnonorganicmaterial',\n", + " 'MGD00002J': 'mat:rock',\n", + " 'IEUHM006G': 'mat:rock',\n", + " 'IECBJ000G': 'mat:rock',\n", + " 'IEMRS004E': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM007D': 'mat:rock',\n", + " 'NHB00389B': 'mat:mineral',\n", + " 'IEUHM00O6': 'mat:rock',\n", + " 'URI0000GX': 'mat:rock',\n", + " 'IENTL000C': 'mat:rock',\n", + " 'IECBJ000H': 'mat:rock',\n", + " 'IEMRS004C': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS004D': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS004B': 'mat:biogenicnonorganicmaterial',\n", + " 'IENSH0003': 'mat:rock',\n", + " 'KHR000017': 'mat:rock',\n", + " 'IENSH0001': 'mat:rock',\n", + " 'IENTL001X': 'mat:rock',\n", + " 'IEAVO02WQ': 'mat:rock',\n", + " 'IEMRS004A': 'mat:biogenicnonorganicmaterial',\n", + " 'IENTL0021': 'mat:rock',\n", + " 'IEUHM00TO': 'mat:sediment',\n", + " 'MGD00002H': 'mat:rock',\n", + " 'IEUHM00OT': 'mat:rock',\n", + " 'IEUHM002N': 'mat:rock',\n", + " 'IEAVO02XA': 'mat:rock',\n", + " 'IEUHM00U3': 'mat:rock',\n", + " 'IECBJ000P': 'mat:rock',\n", + " 'IECBJ000R': 'mat:rock',\n", + " 'IEUHM008N': 'mat:rock',\n", + " 'IEDUT101B': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00PQ': 'mat:rock',\n", + " 'IEAVO03BW': 'mat:rock',\n", + " 'KHR000007': 'mat:rock',\n", + " 'IEAVO03BX': 'mat:rock',\n", + " 'URI0000G7': 'mat:rock',\n", + " 'URI0000FY': 'mat:rock',\n", + " 'IENTL001F': 'mat:rock',\n", + " 'MGD00002I': 'mat:rock',\n", + " 'URI0000GT': 'mat:rock',\n", + " 'URI0000GR': 'mat:rock',\n", + " 'IENTL0029': 'mat:rock',\n", + " 'IEEVB00KV': 'xxx',\n", + " 'IEEVB00KW': 'xxx',\n", + " 'IEEVB00M5': 'xxx',\n", + " 'IEUHM00U1': 'mat:sediment',\n", + " 'URI0000FG': 'mat:rock',\n", + " 'IEAVO04KJ': 'mat:sediment',\n", + " 'IEUHM0046': 'mat:rock',\n", + " 'MGD000BAZ': 'mat:organicmaterial',\n", + " 'IECBJ000A': 'mat:rock',\n", + " 'IEUHM008S': 'mat:rock',\n", + " 'IEDUT100C': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00OW': 'mat:rock',\n", + " 'IECBJ000D': 'mat:rock',\n", + " 'IENTL002H': 'mat:rock',\n", + " 'IEUHM001Y': 'mat:rock',\n", + " 'IEUHM007A': 'mat:rock',\n", + " 'TOR0000ID': 'mat:rock',\n", + " 'IECBJ000O': 'mat:rock',\n", + " 'IEUHM00R9': 'mat:rock',\n", + " 'TOR0000I8': 'mat:rock',\n", + " 'IEDUT1010': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00PF': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100W': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00O5': 'mat:rock',\n", + " 'URI0000GL': 'mat:rock',\n", + " 'IEDUT101D': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT101E': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00OO': 'mat:rock',\n", + " 'IEUHM00PG': 'mat:sediment',\n", + " 'IEACB000X': 'mat:sediment',\n", + " 'IEDUT100S': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM009J': 'mat:rock',\n", + " 'IEAVO03NC': 'mat:rock',\n", + " 'IEAVO03NE': 'mat:rock',\n", + " 'IEAVO03NF': 'mat:rock',\n", + " 'IEAVO03NG': 'mat:rock',\n", + " 'IEAVO03NH': 'mat:rock',\n", + " 'IEAVO03NL': 'mat:rock',\n", + " 'IECBJ0008': 'mat:rock',\n", + " 'IEUHM00SP': 'mat:rockorsediment',\n", + " 'IEUHM008D': 'mat:rock',\n", + " 'IEAVO02XL': 'mat:rock',\n", + " 'IEDUT101G': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT101H': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00PA': 'mat:rock',\n", + " 'IEAVO03J5': 'mat:rock',\n", + " 'IEDUT1012': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT1014': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000F7': 'mat:rock',\n", + " 'IEDUT1011': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT1013': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT1015': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT101F': 'mat:biogenicnonorganicmaterial',\n", + " 'KHR00001C': 'mat:rock',\n", + " 'IEDUT1019': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT101A': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00VR': 'mat:mixedsoilsedimentrock',\n", + " 'NHB004B5U': 'mat:rock',\n", + " 'IEUHM0095': 'mat:rock',\n", + " 'MGD00003E': 'mat:liquidwater',\n", + " 'IEAVO03MW': 'mat:rock',\n", + " 'IEAVO0594': 'mat:rock',\n", + " 'URI0000GO': 'mat:rock',\n", + " 'IEUHM00SQ': 'mat:biogenicnonorganicmaterial',\n", + " 'IEAVO05BR': 'mat:rock',\n", + " 'IECBJ000S': 'mat:rock',\n", + " 'IEAVO03NY': 'mat:rock',\n", + " 'IECBJ000C': 'mat:rock',\n", + " 'IECBJ000T': 'mat:rock',\n", + " 'IEUHM00P2': 'mat:rock',\n", + " 'MGD00001Q': 'mat:rock',\n", + " 'IECBJ000B': 'mat:rock',\n", + " 'IEUHM00OK': 'mat:rock',\n", + " 'IEDUT1009': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM007F': 'mat:rock',\n", + " 'TOR0000HW': 'mat:rock',\n", + " 'IEAVO03IH': 'mat:rock',\n", + " 'IECBJ000M': 'mat:rock',\n", + " 'IEUHM001L': 'mat:rock',\n", + " 'TAP00005T': 'mat:rock',\n", + " 'IEUHM0087': 'mat:rock',\n", + " 'IEUHM00TD': 'mat:sediment',\n", + " 'IECBJ0009': 'mat:rock',\n", + " 'IECBJ000I': 'mat:rock',\n", + " 'IEUHM00O7': 'mat:rock',\n", + " 'IEUHM006D': 'mat:rock',\n", + " 'IEAVO02X8': 'mat:mineral',\n", + " 'IECBJ000J': 'mat:rock',\n", + " 'TOR0000GN': 'mat:rock',\n", + " 'IEAVO04TS': 'mat:rock',\n", + " 'IEUHM002X': 'mat:mixedsoilsedimentrock',\n", + " 'MGD00003D': 'mat:liquidwater',\n", + " 'BSU0005H1': 'mat:rock',\n", + " 'IEUHM00RH': 'mat:rock',\n", + " 'IECBJ000K': 'mat:rock',\n", + " 'IEUHM007Q': 'mat:rock',\n", + " 'TOR0000GU': 'mat:rock',\n", + " 'IEUHM0069': 'mat:rock',\n", + " 'IEAVO02X9': 'mat:mineral',\n", + " 'URI0000EN': 'mat:rock',\n", + " 'IEAVO04W4': 'mat:rock',\n", + " 'IEGPL000A': 'mat:liquidwater',\n", + " 'IEGPL000B': 'mat:liquidwater',\n", + " 'IEGPL000C': 'mat:liquidwater',\n", + " 'IEGPL000D': 'mat:liquidwater',\n", + " 'IEGPL000E': 'mat:liquidwater',\n", + " 'IEGPL000F': 'mat:liquidwater',\n", + " 'IEGPL000G': 'mat:liquidwater',\n", + " 'IEGPL000H': 'mat:liquidwater',\n", + " 'IEGPL000I': 'mat:liquidwater',\n", + " 'IEGPL000J': 'mat:liquidwater',\n", + " 'IEGPL000K': 'mat:liquidwater',\n", + " 'IEGPL000L': 'mat:liquidwater',\n", + " 'IEGPL000M': 'mat:liquidwater',\n", + " 'IEGPL000N': 'mat:liquidwater',\n", + " 'IEGPL000O': 'mat:liquidwater',\n", + " 'IEGPL000P': 'mat:liquidwater',\n", + " 'IEGPL000Q': 'mat:liquidwater',\n", + " 'IEGPL000R': 'mat:liquidwater',\n", + " 'IEGPL000S': 'mat:liquidwater',\n", + " 'IEGPL000T': 'mat:liquidwater',\n", + " 'IEGPL000U': 'mat:liquidwater',\n", + " 'IEGPL000V': 'mat:liquidwater',\n", + " 'IEGPL000W': 'mat:liquidwater',\n", + " 'IEGPL000X': 'mat:liquidwater',\n", + " 'IEGPL000Y': 'mat:liquidwater',\n", + " 'IEGPL000Z': 'mat:liquidwater',\n", + " 'IEGPL0010': 'mat:liquidwater',\n", + " 'IEGPL0011': 'mat:liquidwater',\n", + " 'IEGPL0012': 'mat:liquidwater',\n", + " 'IEGPL0013': 'mat:liquidwater',\n", + " 'IEGPL0014': 'mat:liquidwater',\n", + " 'IEGPL0015': 'mat:liquidwater',\n", + " 'IEGPL0016': 'mat:liquidwater',\n", + " 'IEGPL0017': 'mat:liquidwater',\n", + " 'IEGPL0018': 'mat:liquidwater',\n", + " 'IEGPL0019': 'mat:liquidwater',\n", + " 'IEGPL001A': 'mat:liquidwater',\n", + " 'IEGPL001B': 'mat:liquidwater',\n", + " 'IEGPL001C': 'mat:liquidwater',\n", + " 'IEGPL001D': 'mat:liquidwater',\n", + " 'IEGPL001E': 'mat:liquidwater',\n", + " 'IEGPL0001': 'mat:liquidwater',\n", + " 'IEGPL0002': 'mat:liquidwater',\n", + " 'IEGPL0003': 'mat:liquidwater',\n", + " 'IEGPL0004': 'mat:liquidwater',\n", + " 'IEGPL0005': 'mat:liquidwater',\n", + " 'IEGPL0006': 'mat:liquidwater',\n", + " 'IEGPL0007': 'mat:liquidwater',\n", + " 'IEGPL0008': 'mat:liquidwater',\n", + " 'IEGPL0009': 'mat:liquidwater',\n", + " 'IEUHM00SY': 'mat:sediment',\n", + " 'IEAVO04TC': 'mat:rock',\n", + " 'IEUHM00OC': 'mat:rock',\n", + " 'KHR00001O': 'mat:rock',\n", + " 'IEAVO03U3': 'mat:rock',\n", + " 'IEDUT100U': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM006J': 'mat:rock',\n", + " 'URI0000FS': 'mat:rock',\n", + " 'IEDUT101J': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00SW': 'mat:sediment',\n", + " 'IEUHM00UH': 'mat:sediment',\n", + " 'IEUHM002P': 'mat:rock',\n", + " 'MGD00002E': 'mat:rock',\n", + " 'IEACB0007': 'mat:sediment',\n", + " 'IEDUT101C': 'mat:biogenicnonorganicmaterial',\n", + " 'TOR0000IF': 'mat:rock',\n", + " 'IEAVO02X5': 'mat:rock',\n", + " 'IEUHM008R': 'mat:rock',\n", + " 'URI0000EO': 'mat:rock',\n", + " 'IEAVO02ZT': 'mat:rock',\n", + " 'IEUHM0080': 'mat:rock',\n", + " 'IEDUT1008': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM0091': 'mat:rock',\n", + " 'IEUHM009K': 'mat:rock',\n", + " 'IEDUT101I': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000H4': 'mat:rock',\n", + " 'WDC00001Q': 'mat:sediment',\n", + " 'IEDUT100P': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100Q': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100R': 'mat:biogenicnonorganicmaterial',\n", + " 'KHR00028H': 'mat:rock',\n", + " 'URI0000F9': 'mat:rock',\n", + " 'IEDUT1016': 'mat:biogenicnonorganicmaterial',\n", + " 'IEAVO02AK': 'mat:sediment',\n", + " 'IEDUT101K': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT101L': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT101M': 'mat:biogenicnonorganicmaterial',\n", + " 'IEAVO03BU': 'mat:rock',\n", + " 'IEUHM00S0': 'mat:rock',\n", + " 'URI0000GM': 'mat:rock',\n", + " 'IEDUT1018': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT101O': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT101P': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT101Q': 'mat:biogenicnonorganicmaterial',\n", + " 'BSU0005S8': 'mat:rock',\n", + " 'IEUHM00PH': 'mat:rock',\n", + " 'IEDUT100X': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100Y': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100Z': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM008Z': 'mat:rock',\n", + " 'IEUHM0098': 'mat:rock',\n", + " 'IEUHM006I': 'mat:rock',\n", + " 'TOR0000I5': 'mat:rock',\n", + " 'URI0000GK': 'mat:rock',\n", + " 'KHR00001Z': 'mat:rock',\n", + " 'IEAVO02BL': 'mat:sediment',\n", + " 'IEAVO058E': 'mat:rock',\n", + " 'IEUHM0090': 'mat:rock',\n", + " 'URI0000FN': 'mat:biogenicnonorganicmaterial',\n", + " 'TOR0000I7': 'mat:rock',\n", + " 'URI0000FJ': 'mat:rock',\n", + " 'IECBJ000N': 'mat:rock',\n", + " 'IEUHM00OY': 'mat:rock',\n", + " 'IEUHM00P4': 'mat:rock',\n", + " 'TOR0000IC': 'mat:rock',\n", + " 'IEUHM00OZ': 'mat:rock',\n", + " 'IEDUT100A': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100B': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100H': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100I': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100J': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000GB': 'mat:rock',\n", + " 'IECBJ000F': 'mat:rock',\n", + " 'IENTL0020': 'mat:rock',\n", + " 'IEUHM006B': 'mat:rock',\n", + " 'KHR000006': 'mat:rock',\n", + " 'URI0000G9': 'mat:rock',\n", + " 'IEMRS005H': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM002A': 'mat:rock',\n", + " 'MGD00002G': 'mat:rock',\n", + " 'NHB003WIE': 'mat:rock',\n", + " 'IEDUT100L': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100M': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100N': 'mat:biogenicnonorganicmaterial',\n", + " 'KHR00024K': 'mat:rock',\n", + " 'URI0000FW': 'mat:rock',\n", + " 'IEAVO02BJ': 'mat:sediment',\n", + " 'IEAVO02BK': 'mat:sediment',\n", + " 'IEAVO02B3': 'mat:sediment',\n", + " 'IEAVO02B4': 'mat:sediment',\n", + " 'IEAVO02B5': 'mat:sediment',\n", + " 'IEAVO02BI': 'mat:sediment',\n", + " 'IECBJ000Q': 'mat:rock',\n", + " 'URI0000P5': 'mat:rock',\n", + " 'IEAVO02B1': 'mat:sediment',\n", + " 'IEAVO02B2': 'mat:sediment',\n", + " 'IEAVO02B7': 'mat:sediment',\n", + " 'IEAVO02B8': 'mat:sediment',\n", + " 'IENTL001T': 'mat:rock',\n", + " 'TAP00007J': 'mat:rockorsediment',\n", + " 'URI0000FV': 'mat:rock',\n", + " 'URI0000FL': 'mat:rock',\n", + " 'URI0000NH': 'mat:rock',\n", + " 'URI0000LP': 'mat:rock',\n", + " 'IEAVO02B0': 'mat:sediment',\n", + " 'IECUR008F': 'mat:rock',\n", + " 'IEUHM0084': 'mat:rock',\n", + " 'SRH00015W': 'mat:rock',\n", + " 'IEDUT100D': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100E': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100F': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000IB': 'mat:rock',\n", + " 'IEAVO02B6': 'mat:sediment',\n", + " 'IEAVO03V8': 'mat:rock',\n", + " 'IEMRS005D': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS005E': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS005F': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00TM': 'mat:sediment',\n", + " 'URI0000EM': 'mat:rock',\n", + " 'IEACB000K': 'mat:sediment',\n", + " 'IEAVO03VE': 'mat:rock',\n", + " 'IECBJ000L': 'mat:rock',\n", + " 'IEMRS005G': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM009E': 'mat:rock',\n", + " 'IEUHM00SG': 'mat:sediment',\n", + " 'IEDUT100T': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT100V': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000F5': 'mat:rock',\n", + " 'IEUHM00OJ': 'mat:rock',\n", + " 'IEUHM00VP': 'mat:sediment',\n", + " 'IEUHM0082': 'mat:rock',\n", + " 'IEUHM00OR': 'mat:rock',\n", + " 'IEUHM001R': 'mat:rock',\n", + " 'IEAVO02BY': 'mat:sediment',\n", + " 'IEAVO043E': 'mat:rock',\n", + " 'IEUHM007U': 'mat:rock',\n", + " 'IEUHM00SF': 'mat:sediment',\n", + " 'IEUHM00S9': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00SU': 'mat:sediment',\n", + " 'IEUHM008L': 'mat:rock',\n", + " 'IEUHM00OH': 'mat:rock',\n", + " 'NHB004HAW': 'mat:rock',\n", + " 'TAP00003Z': 'mat:sediment',\n", + " 'IEUHM00SD': 'mat:rock',\n", + " 'IEUHM00RE': 'mat:rock',\n", + " 'KHR000014': 'mat:rock',\n", + " 'URI0000FQ': 'mat:rock',\n", + " 'IEUHM00SV': 'mat:sediment',\n", + " 'TOR0000GM': 'mat:rock',\n", + " 'IEAVO02BD': 'mat:sediment',\n", + " 'IEAVO03EP': 'mat:rock',\n", + " 'URI0000GA': 'mat:rock',\n", + " 'URI0000GF': 'mat:rock',\n", + " 'IEAVO03F9': 'mat:rock',\n", + " 'IECBJ0001': 'mat:rock',\n", + " 'IEUHM0088': 'mat:rock',\n", + " 'IEUHM00U4': 'mat:rock',\n", + " 'TAP00007L': 'mat:rock',\n", + " 'BSU0006RR': 'mat:rock',\n", + " 'IEAVO03FD': 'mat:rock',\n", + " 'IEUHM00T3': 'mat:sediment',\n", + " 'IEUHM00PM': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000FP': 'mat:rock',\n", + " 'IEUHM0085': 'mat:rock',\n", + " 'URI0000FC': 'mat:rock',\n", + " 'URI0000GN': 'mat:rock',\n", + " 'IEAVO02AD': 'mat:sediment',\n", + " 'IEUHM007B': 'mat:rock',\n", + " 'BSU00065O': 'mat:rock',\n", + " 'TAP00007K': 'mat:rockorsediment',\n", + " 'URI0000NY': 'mat:rock',\n", + " 'IEAVO040A': 'mat:rock',\n", + " 'IEUHM007H': 'mat:rock',\n", + " 'IEUHM00TJ': 'mat:sediment',\n", + " 'IEUHM00VJ': 'mat:rock',\n", + " 'IEAVO042E': 'mat:rock',\n", + " 'IEUHM00SB': 'mat:rockorsediment',\n", + " 'IEUHM00TX': 'mat:biogenicnonorganicmaterial',\n", + " 'MGD00001P': 'mat:rock',\n", + " 'TOR0000HY': 'mat:rock',\n", + " 'IEAVO02WT': 'mat:rock',\n", + " 'IEUHM00SS': 'mat:rockorsediment',\n", + " 'IEACB000M': 'mat:sediment',\n", + " 'IECUR001B': 'mat:rock',\n", + " 'IEUHM00O8': 'mat:rock',\n", + " 'IEUHM00U2': 'mat:sediment',\n", + " 'URI0000EE': 'mat:rock',\n", + " 'URI0000G5': 'mat:rock',\n", + " 'IEAVO03BT': 'mat:rock',\n", + " 'IEUHM00PO': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00TY': 'mat:sediment',\n", + " 'URI0000GC': 'mat:rock',\n", + " 'IEAVO03G7': 'mat:rock',\n", + " 'IEUHM00SE': 'mat:rockorsediment',\n", + " 'IEUHM002I': 'mat:rock',\n", + " 'IEAVO02AE': 'mat:sediment',\n", + " 'URI0000MB': 'mat:rock',\n", + " 'IEUHM0094': 'mat:rock',\n", + " 'IEUHM00T4': 'mat:sediment',\n", + " 'UHM000EC1': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000P8': 'mat:rock',\n", + " 'IEUHM00S4': 'mat:rock',\n", + " 'SRH00015V': 'mat:rock',\n", + " 'IECUR00D0': 'mat:rock',\n", + " 'IEUHM008E': 'mat:rock',\n", + " 'URI0000GH': 'mat:organicmaterial',\n", + " 'PRR000901': 'mat:rock',\n", + " 'IEAVO0438': 'mat:rock',\n", + " 'IEUHM00P1': 'mat:rock',\n", + " 'IEUHM00NE': 'mat:rock',\n", + " 'IEUHM00TI': 'mat:rockorsediment',\n", + " 'IEUHM00R3': 'mat:rock',\n", + " 'TAP00003A': 'mat:rockorsediment',\n", + " 'IEUHM00OP': 'mat:rock',\n", + " 'IEUHM00SZ': 'mat:sediment',\n", + " 'IEUHM00S5': 'mat:rock',\n", + " 'IEUHM002J': 'mat:rock',\n", + " 'MGD000BB3': 'mat:organicmaterial',\n", + " 'MGD000BB4': 'mat:organicmaterial',\n", + " 'IEDUT101N': 'mat:biogenicnonorganicmaterial',\n", + " 'IEAVO02CH': 'mat:sediment',\n", + " 'IEAVO02CI': 'mat:sediment',\n", + " 'IEUHM00OM': 'mat:rock',\n", + " 'IEUHM00SN': 'mat:sediment',\n", + " 'IEUHM00TZ': 'mat:sediment',\n", + " 'BSU00065M': 'mat:rock',\n", + " 'IEAVO02X0': 'mat:rock',\n", + " 'IEUHM00T9': 'mat:sediment',\n", + " 'TAP00004L': 'mat:rockorsediment',\n", + " 'IEAVO02CG': 'mat:sediment',\n", + " 'BSU00065N': 'mat:rock',\n", + " 'IEUHM008B': 'mat:rock',\n", + " 'BSU00065K': 'mat:rock',\n", + " 'IEAVO042S': 'mat:rock',\n", + " 'IEDUT1001': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00RQ': 'mat:rock',\n", + " 'TOR0000GS': 'mat:rock',\n", + " 'URI0000G4': 'mat:rock',\n", + " 'URI0000PC': 'mat:rock',\n", + " 'IEAVO042R': 'mat:rock',\n", + " 'IEUHM00VS': 'mat:rock',\n", + " 'KHR000005': 'mat:rock',\n", + " 'KHR00001W': 'mat:rock',\n", + " 'TOR0000GT': 'mat:rock',\n", + " 'BSU00065L': 'mat:rock',\n", + " 'IEAVO0404': 'mat:rock',\n", + " 'IEUHM007V': 'mat:rock',\n", + " 'URI0000GJ': 'mat:rock',\n", + " 'NHB006G89': 'mat:rock',\n", + " 'NHB006G8B': 'mat:rock',\n", + " 'URI0000NM': 'mat:rock',\n", + " 'IEPAP0025': 'mat:rock',\n", + " 'IEUHM00T2': 'mat:sediment',\n", + " 'TOR0000I4': 'mat:rock',\n", + " 'URI0000P7': 'mat:rock',\n", + " 'WDC0000BA': 'mat:sediment',\n", + " 'IENTL000Q': 'mat:rock',\n", + " 'IEUHM002F': 'mat:rock',\n", + " 'KHR00028W': 'mat:rock',\n", + " 'NHB0003GX': 'mat:mineral',\n", + " 'TOR0000HS': 'mat:rock',\n", + " 'IEUHM00OE': 'mat:rock',\n", + " 'IEUHM00TC': 'mat:sediment',\n", + " 'IEUHM00U0': 'mat:sediment',\n", + " 'URI0000NG': 'mat:rock',\n", + " 'IEREX0002': 'mat:organicmaterial',\n", + " 'URI0000IC': 'mat:rock',\n", + " 'IEMRS0059': 'mat:biogenicnonorganicmaterial',\n", + " 'KHR00001R': 'mat:rock',\n", + " 'TOR0000HO': 'mat:rock',\n", + " 'IEUHM007T': 'mat:rock',\n", + " 'IEUHM00TW': 'mat:sediment',\n", + " 'IEUHM00S2': 'mat:rock',\n", + " 'TAP00007I': 'mat:rock',\n", + " 'TOR0000FV': 'mat:rock',\n", + " 'IEAVO058W': 'mat:rock',\n", + " 'IEUHM00RV': 'mat:rock',\n", + " 'TOR0000GQ': 'mat:rock',\n", + " 'URI0000FD': 'mat:rock',\n", + " 'URI0000LU': 'mat:rock',\n", + " 'IEAVO02AF': 'mat:sediment',\n", + " 'IENTL001J': 'mat:rock',\n", + " 'PRR042033': 'mat:rock',\n", + " 'IEAVO02X3': 'mat:rock',\n", + " 'IEAVO02X4': 'mat:rock',\n", + " 'IEAVO03MY': 'mat:rock',\n", + " 'IEUHM00SC': 'mat:sediment',\n", + " 'IEUHM00RY': 'mat:rockorsediment',\n", + " 'IEUHM0016': 'mat:rock',\n", + " 'IEUHM00RA': 'mat:rock',\n", + " 'URI0000N1': 'mat:rock',\n", + " 'IEAVO03NO': 'mat:rock',\n", + " 'IEUHM00VO': 'mat:sediment',\n", + " 'MGD00002A': 'mat:rock',\n", + " 'BSU00001A': 'mat:rock',\n", + " 'BSU00001B': 'mat:rock',\n", + " 'BSU00001C': 'mat:rock',\n", + " 'BSU00001D': 'mat:rock',\n", + " 'BSU00001E': 'mat:rock',\n", + " 'BSU00001F': 'mat:rock',\n", + " 'BSU00001G': 'mat:rock',\n", + " 'BSU00001H': 'mat:rock',\n", + " 'BSU00001I': 'mat:rock',\n", + " 'IEUHM00S6': 'mat:rock',\n", + " 'IEAVO02U2': 'mat:rock',\n", + " 'IEUHM008V': 'mat:rock',\n", + " 'BSU000019': 'mat:rock',\n", + " 'BSU000018': 'mat:rock',\n", + " 'IEAVO03TY': 'mat:rock',\n", + " 'IEAVO03ZP': 'mat:rock',\n", + " 'URI0000H1': 'mat:rock',\n", + " 'URI0000LJ': 'mat:rock',\n", + " 'BSU0005KK': 'mat:rock',\n", + " 'IEAVO03V7': 'mat:rock',\n", + " 'IEUHM00SO': 'mat:rock',\n", + " 'TOR0000GV': 'mat:rock',\n", + " 'IENTL0003': 'mat:rock',\n", + " 'IEUHM00TK': 'mat:sediment',\n", + " 'IEDUT100O': 'mat:biogenicnonorganicmaterial',\n", + " 'TOR0000G6': 'mat:rock',\n", + " 'TOR0000I6': 'mat:rock',\n", + " 'IEMRS0057': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS0058': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM0089': 'mat:rock',\n", + " 'IEUHM009N': 'mat:rock',\n", + " 'IEUHM00TR': 'mat:sediment',\n", + " 'NHB0052UT': 'mat:rock',\n", + " 'NHB004BHA': 'mat:rock',\n", + " 'SRH00015Q': 'mat:rock',\n", + " 'BSU00065J': 'mat:rock',\n", + " 'BSU0005KO': 'mat:rock',\n", + " 'KHR000311': 'mat:rock',\n", + " 'IEUHM0020': 'mat:rock',\n", + " 'IEDUT1000': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM009M': 'mat:rock',\n", + " 'IEUHM00S3': 'mat:biogenicnonorganicmaterial',\n", + " 'IEAVO02Z8': 'mat:rock',\n", + " 'IENTL000H': 'mat:rock',\n", + " 'IEROY000A': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000B': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000C': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000D': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000E': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000G': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000H': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000I': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000J': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000K': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000L': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000M': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000N': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000O': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000P': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000Q': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000R': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000S': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00PB': 'mat:rock',\n", + " 'IEUHM00PW': 'mat:rock',\n", + " 'MGD00001S': 'mat:rock',\n", + " 'IEAVO02C6': 'mat:sediment',\n", + " 'IEAVO02C7': 'mat:sediment',\n", + " 'IEAVO02C8': 'mat:sediment',\n", + " 'IEAVO03IQ': 'mat:rock',\n", + " 'IEROY0003': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY0005': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY0006': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY0007': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY0008': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY0009': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM008I': 'mat:rock',\n", + " 'KHR00027X': 'mat:rock',\n", + " 'MGD00003R': 'mat:liquidwater',\n", + " 'IEAVO04IP': 'mat:sediment',\n", + " 'IEROY0001': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY000F': 'mat:biogenicnonorganicmaterial',\n", + " 'IEROY0004': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM008W': 'mat:rock',\n", + " 'IEUHM008U': 'mat:rock',\n", + " 'IEUHM00UA': 'mat:sediment',\n", + " 'IEDUT1005': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT1006': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT1007': 'mat:biogenicnonorganicmaterial',\n", + " 'IEAVO02C5': 'mat:sediment',\n", + " 'IEUHM007S': 'mat:rock',\n", + " 'IEUHM0096': 'mat:rock',\n", + " 'KHR00001L': 'mat:rock',\n", + " 'TOR0000HN': 'mat:rock',\n", + " 'URI0000NZ': 'mat:rock',\n", + " 'IEUHM007G': 'mat:rock',\n", + " 'URI0000GZ': 'mat:rock',\n", + " 'URI0000H0': 'mat:rock',\n", + " 'IEAVO03ZO': 'mat:rock',\n", + " 'IEDS1000F': 'mat:rock',\n", + " 'IEUHM00OQ': 'mat:rock',\n", + " 'URI0000EL': 'mat:rock',\n", + " 'IEUHM00OV': 'mat:rock',\n", + " 'IEUHM00OD': 'mat:rock',\n", + " 'IEUHM00P3': 'mat:rock',\n", + " 'IEUHM00V1': 'mat:sediment',\n", + " 'PRR041908': 'mat:rock',\n", + " 'TAP00007D': 'mat:rockorsediment',\n", + " 'URI0000H5': 'mat:rock',\n", + " 'IEAVO02AS': 'mat:sediment',\n", + " 'IEAVO02AT': 'mat:sediment',\n", + " 'IEUHM00PN': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM006K': 'mat:rock',\n", + " 'TAP00002P': 'mat:rockorsediment',\n", + " 'WDC00001P': 'mat:sediment',\n", + " 'IEAVO03NP': 'mat:rock',\n", + " 'IEAVO03NR': 'mat:rock',\n", + " 'IEAVO03NS': 'mat:rock',\n", + " 'IEAVO03NT': 'mat:rock',\n", + " 'IEAVO03NU': 'mat:rock',\n", + " 'IEAVO03NV': 'mat:rock',\n", + " 'IEAVO03NW': 'mat:rock',\n", + " 'IEAVO03NX': 'mat:rock',\n", + " 'IEAVO04L4': 'mat:rock',\n", + " 'IECUR00FM': 'mat:rock',\n", + " 'IESAG0012': 'mat:rock',\n", + " 'KHR00001Q': 'mat:rock',\n", + " 'MGD00002D': 'mat:rock',\n", + " 'NHB001NWZ': 'mat:mineral',\n", + " 'IEAVO02AR': 'mat:sediment',\n", + " 'IESAG0011': 'mat:rock',\n", + " 'IESAG0013': 'mat:rock',\n", + " 'TOR0000IB': 'mat:rock',\n", + " 'BSU000017': 'mat:rock',\n", + " 'IESAG000V': 'mat:rock',\n", + " 'IESAG000W': 'mat:rock',\n", + " 'IESAG000X': 'mat:rock',\n", + " 'IESAG000Y': 'mat:rock',\n", + " 'IESAG000Z': 'mat:rock',\n", + " 'IESAG0010': 'mat:rock',\n", + " 'SRH00015U': 'mat:rock',\n", + " 'BSU0005ZU': 'mat:rock',\n", + " 'IEAVO02CK': 'mat:sediment',\n", + " 'IEAVO02CL': 'mat:sediment',\n", + " 'IEMRS0065': 'mat:biogenicnonorganicmaterial',\n", + " 'IESAG000U': 'mat:rock',\n", + " 'IEUHM00SR': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00T8': 'mat:sediment',\n", + " 'NHB0073XU': 'mat:rock',\n", + " 'IEDS1000H': 'mat:rock',\n", + " 'IEUHM007Y': 'mat:rock',\n", + " 'IEUHM00SM': 'mat:biogenicnonorganicmaterial',\n", + " 'PRR055764': 'mat:rock',\n", + " 'IEAVO02CJ': 'mat:sediment',\n", + " 'IEAVO02XE': 'mat:rock',\n", + " 'IECUR0002': 'mat:rock',\n", + " 'IEUHM008F': 'mat:rock',\n", + " 'IEUHM00SA': 'mat:rock',\n", + " 'IEUHM00TH': 'mat:sediment',\n", + " 'IEUHM00TP': 'mat:sediment',\n", + " 'WDC0000BO': 'mat:sediment',\n", + " 'IEAVO03MF': 'mat:rock',\n", + " 'IEUHM007J': 'mat:rock',\n", + " 'IEUHM00T1': 'mat:sediment',\n", + " 'IEUHM00U7': 'mat:rock',\n", + " 'IEAVO03MK': 'mat:rock',\n", + " 'URI0000FZ': 'mat:rock',\n", + " 'WDC000013': 'mat:sediment',\n", + " 'BSU00065H': 'mat:rock',\n", + " 'IEAVO03BV': 'mat:rock',\n", + " 'IEUHM00PD': 'mat:rock',\n", + " 'NHB006NBC': 'mat:rock',\n", + " 'IEDUT1002': 'mat:biogenicnonorganicmaterial',\n", + " 'IEDUT1003': 'mat:biogenicnonorganicmaterial',\n", + " 'TOR0000FK': 'mat:rock',\n", + " 'IEUHM008O': 'mat:rock',\n", + " 'URI0000PB': 'mat:rock',\n", + " 'WDC0000BB': 'mat:sediment',\n", + " 'BSU00065I': 'mat:rock',\n", + " 'IEMRS0055': 'mat:biogenicnonorganicmaterial',\n", + " 'IEMRS0056': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00TA': 'mat:sediment',\n", + " 'IEUHM00TN': 'mat:sediment',\n", + " 'NHB003VJS': 'mat:rock',\n", + " 'TOR0000HX': 'mat:rock',\n", + " 'IEAVO02AG': 'mat:sediment',\n", + " 'IEAVO04T1': 'mat:rock',\n", + " 'IECBJ000E': 'mat:rock',\n", + " 'IEUHM009L': 'mat:rock',\n", + " 'IEUHM00O9': 'mat:rock',\n", + " 'IEUHM00RZ': 'mat:rock',\n", + " 'IEUHM00RM': 'mat:rock',\n", + " 'KHR00026C': 'mat:rock',\n", + " 'URI0000GV': 'mat:rock',\n", + " 'BSU00065F': 'mat:rock',\n", + " 'IEUHM007E': 'mat:rock',\n", + " 'IEUHM0099': 'mat:rock',\n", + " 'IEUHM009P': 'mat:rock',\n", + " 'IEUHM00RF': 'mat:rock',\n", + " 'KHR00024R': 'mat:rock',\n", + " 'URI0000FA': 'mat:rock',\n", + " 'NHB006NCJ': 'mat:rock',\n", + " 'DSR0000TM': 'mat:organicmaterial',\n", + " 'IEUHM00OU': 'mat:rock',\n", + " 'IEUHM00SL': 'mat:rock',\n", + " 'TOR0000FU': 'mat:rock',\n", + " 'BSU00065G': 'mat:rock',\n", + " 'IEDS1000D': 'mat:rock',\n", + " 'IENTL0026': 'mat:rock',\n", + " 'IEUHM0083': 'mat:rock',\n", + " 'IEUHM00TB': 'mat:sediment',\n", + " 'IEUHM00RG': 'mat:rock',\n", + " 'IEUHM00RB': 'mat:sediment',\n", + " 'URI0000GW': 'mat:rock',\n", + " 'URI0000MN': 'mat:rock',\n", + " 'IEAVO02AV': 'mat:sediment',\n", + " 'IEAVO02AW': 'mat:sediment',\n", + " 'IEUHM009D': 'mat:rock',\n", + " 'IEUHM00P5': 'mat:rock',\n", + " 'IEUHM00ST': 'mat:organicmaterial',\n", + " 'IEUHM00TS': 'mat:sediment',\n", + " 'IEUHM001S': 'mat:rock',\n", + " 'IEUHM00RL': 'mat:rock',\n", + " 'PRR053135': 'mat:rock',\n", + " 'SRH000161': 'mat:rock',\n", + " 'TOR0000HB': 'mat:rock',\n", + " 'IEAVO02AU': 'mat:sediment',\n", + " 'IENTL001I': 'mat:rock',\n", + " 'IEMRS0068': 'mat:biogenicnonorganicmaterial',\n", + " 'IEPAP0035': 'mat:rock',\n", + " 'IEUHM007R': 'mat:rock',\n", + " 'IEAVO03GL': 'mat:rock',\n", + " 'IEUHM00RU': 'mat:rock',\n", + " 'IEUHM00S7': 'mat:rockorsediment',\n", + " 'PRR021194': 'mat:rock',\n", + " 'PRR038014': 'mat:rock',\n", + " 'PRR042034': 'mat:rock',\n", + " 'IEAVO03ZN': 'mat:rock',\n", + " 'IENTL011Z': 'mat:rock',\n", + " 'IEUHM0081': 'mat:rock',\n", + " 'PRR036392': 'mat:rock',\n", + " 'TOR0000IE': 'mat:rock',\n", + " 'IEAVO02AO': 'mat:sediment',\n", + " 'IEAVO03ZT': 'mat:rock',\n", + " 'IEUHM00RC': 'mat:biogenicnonorganicmaterial',\n", + " 'KHR00024J': 'mat:rock',\n", + " 'TAP00002G': 'mat:rock',\n", + " 'UHM000EC2': 'mat:biogenicnonorganicmaterial',\n", + " 'URI0000EG': 'mat:rock',\n", + " 'WDC0000BC': 'mat:sediment',\n", + " 'WDC0000BJ': 'mat:sediment',\n", + " 'BSU00000B': 'mat:rock',\n", + " 'IEAVO02BX': 'mat:sediment',\n", + " 'IENTL011W': 'mat:rock',\n", + " 'IENTL011J': 'mat:rock',\n", + " 'IENTL011K': 'mat:rock',\n", + " 'IEROY0002': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00SJ': 'mat:rock',\n", + " 'IEUHM00TU': 'mat:sediment',\n", + " 'KHR00001J': 'mat:rock',\n", + " 'KHR000309': 'mat:rock',\n", + " 'PRR050547': 'mat:rock',\n", + " 'BSU0005ZS': 'mat:rock',\n", + " 'IEAVO03M9': 'mat:rock',\n", + " 'IEUHM00RT': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM00SH': 'mat:sediment',\n", + " 'IEUHM00T6': 'mat:sediment',\n", + " 'KHR000259': 'mat:rock',\n", + " 'KHR00027U': 'mat:sediment',\n", + " 'KHR00028F': 'mat:rock',\n", + " 'TAP00007C': 'mat:rockorsediment',\n", + " 'URI0000MR': 'mat:rock',\n", + " 'IEAVO02ZV': 'mat:rock',\n", + " 'IEMRS0066': 'mat:biogenicnonorganicmaterial',\n", + " 'IEUHM008Q': 'mat:rock',\n", + " 'IEUHM00T5': 'mat:organicmaterial',\n", + " 'IEUHM00S1': 'mat:sediment',\n", + " 'TOR0000GP': 'mat:rock',\n", + " 'BSU0005ZT': 'mat:rock',\n", + " 'IEUHM00S8': 'mat:rock',\n", + " 'PRR050548': 'mat:rock',\n", + " 'IEUHM00VA': 'mat:sediment',\n", + " 'NHB004Y2X': 'mat:rock',\n", + " 'BSU0005ZQ': 'mat:rock',\n", + " 'EOI0000G8': 'mat:rock',\n", + " 'IESPB000P': 'mat:organicmaterial',\n", + " 'IEUHM0037': 'mat:rock',\n", + " 'URI0000MP': 'mat:rock',\n", + " 'IEAVO0402': 'mat:rock',\n", + " 'BSU0005ZR': 'mat:rock',\n", + " 'IEAVO02BW': 'mat:sediment',\n", + " 'NHB001B71': 'mat:material',\n", + " 'NHB004Y2W': 'mat:rock',\n", + " 'PRR037322': 'mat:rock',\n", + " 'SRH00015T': 'mat:rock',\n", + " 'IAC000010': 'mat:rock',\n", + " 'IEAVO041W': 'mat:rock',\n", + " 'IEDDM000K': 'mat:mineral',\n", + " 'IEMRS0067': 'mat:organicmaterial',\n", + " 'IEUHM00UL': 'mat:rockorsediment',\n", + " 'URI0000FX': 'mat:rock',\n", + " 'IEDDM000I': 'mat:mineral',\n", + " 'IEDS1000G': 'mat:rock',\n", + " 'IENTL000A': 'mat:rock',\n", + " 'IEPAP0021': 'mat:rock',\n", + " 'IEUHM00UE': 'mat:sediment',\n", + " 'BSU000016': 'mat:rock',\n", + " 'BSU0005U8': 'mat:rock',\n", + " 'BSU0005KM': 'mat:rock',\n", + " 'GRD026SS0': 'mat:rock',\n", + " ...}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "highlevel_label_map" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "3b15a42a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
description_supplementMetadata_localitydescription_materialdescription_supplementMetadata_countrydescription_supplementMetadata_provincedescription_sampleTypedescription_supplementMetadata_platformTypedescription_supplementMetadata_geologicalAgedescription_supplementMetadata_locationDescriptiondescription_supplementMetadata_purposedescription_supplementMetadata_countydescription_descriptiondescription_supplementMetadata_classificationCommentdescription_supplementMetadata_currentArchivedescription_supplementMetadata_citydescription_supplementMetadata_sampleCommentdescription_supplementMetadata_fieldNamedescription_supplementMetadata_primaryLocationTypedescription_collectionMethodDescrdescription_supplementMetadata_primaryLocationNameigsndescription_supplementMetadata_geologicalUnitdescription_supplementMetadata_localityDescriptiondescription_supplementMetadata_originalArchivedescription_supplementMetadata_platformDescrdescription_collectionMethoddescription_supplementMetadata_cruiseFieldPrgrmdescription_supplementMetadata_publicationUrl_descriptionoriginal_high_label
0Mooreite>MineralUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...MooreiteMineSterling Hill MineNHB002GWTOgdensburgSmithsonian collections record for NMNH C6294-...mat:mineral
1Barite>MineralUnited StatesNew Jerseya sample that is an individual unit, including...Matched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineNHB002GWUFranklinSmithsonian collections record for NMNH C6295-...mat:mineral
2Barite>MineralUnited StatesNew Jerseya sample that is an individual unit, including...Matched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineNHB002GWVFranklinSmithsonian collections record for NMNH C6295-...mat:mineral
3Hardystonite>MineralUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates from GEOLocate for parse pattern \"...Sussex Co.National Mineral Collection, Smithsonian Insti...Hardystonite with calcite and leucophoeniciteMining DistrictFranklin Mining DistrictNHB002GWWFranklinSmithsonian collections record for NMNH C6296-...mat:mineral
4Chondrodite>MineralUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...ChondroditeMineSterling Hill MineNHB002GWZOgdensburgSmithsonian collections record for NMNH C6299-...mat:mineral
.......................................................................................
995644Metamorphic>Gneiss>Rocka sample that is an individual unit, including...Migmatic gneiss with quartz, biotite, feldspar...Mitchell Peak, West Fosdick Mountains, AntarcticaMBL00001RDept of Geology, Colorado College, Colorado Sp...Manualmat:rock
995645Metamorphic>Gneiss>Rocka sample that is an individual unit, including...Migmatic gneiss with quartz, biotite, feldspar...Mitchell Peak, West Fosdick Mountains, AntarcticaMBL00001RDept of Geology, Colorado College, Colorado Sp...Manualmat:rock
995646Metamorphic>Gneiss>Rocka sample that is an individual unit, including...whole-rock analysis availableFine grained biotite-quartz-feldspar gneissBen Burton ParkUGA000001Department of Geology, Bryn Mawr College, Bryn...Manualmat:rock
995647Metamorphic>Calc-Silicate>Rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaLTR000001Dept of Geology, Colorado College, Colorado Sp...Manualmat:rock
995648Metamorphic>Calc-Silicate>Rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaLTR000001Dept of Geology, Colorado College, Colorado Sp...Manualmat:rock
\n", + "

995649 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " description_supplementMetadata_locality \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_material description_supplementMetadata_country \\\n", + "0 Mooreite>Mineral United States \n", + "1 Barite>Mineral United States \n", + "2 Barite>Mineral United States \n", + "3 Hardystonite>Mineral United States \n", + "4 Chondrodite>Mineral United States \n", + "... ... ... \n", + "995644 Metamorphic>Gneiss>Rock \n", + "995645 Metamorphic>Gneiss>Rock \n", + "995646 Metamorphic>Gneiss>Rock \n", + "995647 Metamorphic>Calc-Silicate>Rock \n", + "995648 Metamorphic>Calc-Silicate>Rock \n", + "\n", + " description_supplementMetadata_province \\\n", + "0 New Jersey \n", + "1 New Jersey \n", + "2 New Jersey \n", + "3 New Jersey \n", + "4 New Jersey \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_sampleType \\\n", + "0 a sample that is an individual unit, including... \n", + "1 a sample that is an individual unit, including... \n", + "2 a sample that is an individual unit, including... \n", + "3 a sample that is an individual unit, including... \n", + "4 a sample that is an individual unit, including... \n", + "... ... \n", + "995644 a sample that is an individual unit, including... \n", + "995645 a sample that is an individual unit, including... \n", + "995646 a sample that is an individual unit, including... \n", + "995647 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "\n", + " description_supplementMetadata_platformType \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_geologicalAge \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_locationDescription \\\n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "1 Matched to the GeoNames record for Franklin, S... \n", + "2 Matched to the GeoNames record for Franklin, S... \n", + "3 Coordinates from GEOLocate for parse pattern \"... \n", + "4 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_purpose \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_county description_description \\\n", + "0 Sussex Co. \n", + "1 Sussex Co. \n", + "2 Sussex Co. \n", + "3 Sussex Co. \n", + "4 Sussex Co. \n", + "... ... ... \n", + "995644 \n", + "995645 \n", + "995646 whole-rock analysis available \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_classificationComment \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_currentArchive \\\n", + "0 National Mineral Collection, Smithsonian Insti... \n", + "1 National Mineral Collection, Smithsonian Insti... \n", + "2 National Mineral Collection, Smithsonian Insti... \n", + "3 National Mineral Collection, Smithsonian Insti... \n", + "4 National Mineral Collection, Smithsonian Insti... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_city \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_sampleComment \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_fieldName \\\n", + "0 Mooreite \n", + "1 Baryte \n", + "2 Baryte \n", + "3 Hardystonite with calcite and leucophoenicite \n", + "4 Chondrodite \n", + "... ... \n", + "995644 Migmatic gneiss with quartz, biotite, feldspar... \n", + "995645 Migmatic gneiss with quartz, biotite, feldspar... \n", + "995646 Fine grained biotite-quartz-feldspar gneiss \n", + "995647 gneiss \n", + "995648 gneiss \n", + "\n", + " description_supplementMetadata_primaryLocationType \\\n", + "0 Mine \n", + "1 Mine \n", + "2 Mine \n", + "3 Mining District \n", + "4 Mine \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_collectionMethodDescr \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_primaryLocationName igsn \\\n", + "0 Sterling Hill Mine NHB002GWT \n", + "1 Franklin Mine NHB002GWU \n", + "2 Franklin Mine NHB002GWV \n", + "3 Franklin Mining District NHB002GWW \n", + "4 Sterling Hill Mine NHB002GWZ \n", + "... ... ... \n", + "995644 Mitchell Peak, West Fosdick Mountains, Antarctica MBL00001R \n", + "995645 Mitchell Peak, West Fosdick Mountains, Antarctica MBL00001R \n", + "995646 Ben Burton Park UGA000001 \n", + "995647 Salamander Range, north Victoria Land, Antarctica LTR000001 \n", + "995648 Salamander Range, north Victoria Land, Antarctica LTR000001 \n", + "\n", + " description_supplementMetadata_geologicalUnit \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_localityDescription \\\n", + "0 Ogdensburg \n", + "1 Franklin \n", + "2 Franklin \n", + "3 Franklin \n", + "4 Ogdensburg \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_originalArchive \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 Dept of Geology, Colorado College, Colorado Sp... \n", + "995645 Dept of Geology, Colorado College, Colorado Sp... \n", + "995646 Department of Geology, Bryn Mawr College, Bryn... \n", + "995647 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "\n", + " description_supplementMetadata_platformDescr \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_collectionMethod \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 Manual \n", + "995645 Manual \n", + "995646 Manual \n", + "995647 Manual \n", + "995648 Manual \n", + "\n", + " description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_publicationUrl_description \\\n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "2 Smithsonian collections record for NMNH C6295-... \n", + "3 Smithsonian collections record for NMNH C6296-... \n", + "4 Smithsonian collections record for NMNH C6299-... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " original_high_label \n", + "0 mat:mineral \n", + "1 mat:mineral \n", + "2 mat:mineral \n", + "3 mat:mineral \n", + "4 mat:mineral \n", + "... ... \n", + "995644 mat:rock \n", + "995645 mat:rock \n", + "995646 mat:rock \n", + "995647 mat:rock \n", + "995648 mat:rock \n", + "\n", + "[995649 rows x 28 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.assign(original_high_label=df.description_material)\n", + "df['original_high_label'] = df['igsn'].map(lambda x: highlevel_label_map.get(x,x))\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2521ae84", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
description_supplementMetadata_localitydescription_materialdescription_supplementMetadata_countrydescription_supplementMetadata_provincedescription_sampleTypedescription_supplementMetadata_platformTypedescription_supplementMetadata_geologicalAgedescription_supplementMetadata_locationDescriptiondescription_supplementMetadata_purposedescription_supplementMetadata_countydescription_descriptiondescription_supplementMetadata_classificationCommentdescription_supplementMetadata_currentArchivedescription_supplementMetadata_citydescription_supplementMetadata_sampleCommentdescription_supplementMetadata_fieldNamedescription_supplementMetadata_primaryLocationTypedescription_collectionMethodDescrdescription_supplementMetadata_primaryLocationNameigsndescription_supplementMetadata_geologicalUnitdescription_supplementMetadata_localityDescriptiondescription_supplementMetadata_originalArchivedescription_supplementMetadata_platformDescrdescription_collectionMethoddescription_supplementMetadata_cruiseFieldPrgrmdescription_supplementMetadata_publicationUrl_descriptionoriginal_high_label
0ming:sulfateselenatetelluratemineralUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...MooreiteMineSterling Hill MineNHB002GWTOgdensburgSmithsonian collections record for NMNH C6294-...mat:mineral
1ming:sulfateselenatetelluratemineralUnited StatesNew Jerseya sample that is an individual unit, including...Matched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineNHB002GWUFranklinSmithsonian collections record for NMNH C6295-...mat:mineral
2ming:sulfateselenatetelluratemineralUnited StatesNew Jerseya sample that is an individual unit, including...Matched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineNHB002GWVFranklinSmithsonian collections record for NMNH C6295-...mat:mineral
3ming:silicategermanatemineralUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates from GEOLocate for parse pattern \"...Sussex Co.National Mineral Collection, Smithsonian Insti...Hardystonite with calcite and leucophoeniciteMining DistrictFranklin Mining DistrictNHB002GWWFranklinSmithsonian collections record for NMNH C6296-...mat:mineral
4ming:silicategermanatemineralUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...ChondroditeMineSterling Hill MineNHB002GWZOgdensburgSmithsonian collections record for NMNH C6299-...mat:mineral
.......................................................................................
995644rksd:Metamorphic Rocka sample that is an individual unit, including...Migmatic gneiss with quartz, biotite, feldspar...Mitchell Peak, West Fosdick Mountains, AntarcticaMBL00001RDept of Geology, Colorado College, Colorado Sp...Manualmat:rock
995645rksd:Metamorphic Rocka sample that is an individual unit, including...Migmatic gneiss with quartz, biotite, feldspar...Mitchell Peak, West Fosdick Mountains, AntarcticaMBL00001RDept of Geology, Colorado College, Colorado Sp...Manualmat:rock
995646rksd:Metamorphic Rocka sample that is an individual unit, including...whole-rock analysis availableFine grained biotite-quartz-feldspar gneissBen Burton ParkUGA000001Department of Geology, Bryn Mawr College, Bryn...Manualmat:rock
995647rksd:Metamorphic Rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaLTR000001Dept of Geology, Colorado College, Colorado Sp...Manualmat:rock
995648rksd:Metamorphic Rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaLTR000001Dept of Geology, Colorado College, Colorado Sp...Manualmat:rock
\n", + "

995649 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " description_supplementMetadata_locality \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_material \\\n", + "0 ming:sulfateselenatetelluratemineral \n", + "1 ming:sulfateselenatetelluratemineral \n", + "2 ming:sulfateselenatetelluratemineral \n", + "3 ming:silicategermanatemineral \n", + "4 ming:silicategermanatemineral \n", + "... ... \n", + "995644 rksd:Metamorphic Rock \n", + "995645 rksd:Metamorphic Rock \n", + "995646 rksd:Metamorphic Rock \n", + "995647 rksd:Metamorphic Rock \n", + "995648 rksd:Metamorphic Rock \n", + "\n", + " description_supplementMetadata_country \\\n", + "0 United States \n", + "1 United States \n", + "2 United States \n", + "3 United States \n", + "4 United States \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_province \\\n", + "0 New Jersey \n", + "1 New Jersey \n", + "2 New Jersey \n", + "3 New Jersey \n", + "4 New Jersey \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_sampleType \\\n", + "0 a sample that is an individual unit, including... \n", + "1 a sample that is an individual unit, including... \n", + "2 a sample that is an individual unit, including... \n", + "3 a sample that is an individual unit, including... \n", + "4 a sample that is an individual unit, including... \n", + "... ... \n", + "995644 a sample that is an individual unit, including... \n", + "995645 a sample that is an individual unit, including... \n", + "995646 a sample that is an individual unit, including... \n", + "995647 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "\n", + " description_supplementMetadata_platformType \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_geologicalAge \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_locationDescription \\\n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "1 Matched to the GeoNames record for Franklin, S... \n", + "2 Matched to the GeoNames record for Franklin, S... \n", + "3 Coordinates from GEOLocate for parse pattern \"... \n", + "4 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_purpose \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_county description_description \\\n", + "0 Sussex Co. \n", + "1 Sussex Co. \n", + "2 Sussex Co. \n", + "3 Sussex Co. \n", + "4 Sussex Co. \n", + "... ... ... \n", + "995644 \n", + "995645 \n", + "995646 whole-rock analysis available \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_classificationComment \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_currentArchive \\\n", + "0 National Mineral Collection, Smithsonian Insti... \n", + "1 National Mineral Collection, Smithsonian Insti... \n", + "2 National Mineral Collection, Smithsonian Insti... \n", + "3 National Mineral Collection, Smithsonian Insti... \n", + "4 National Mineral Collection, Smithsonian Insti... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_city \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_sampleComment \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_fieldName \\\n", + "0 Mooreite \n", + "1 Baryte \n", + "2 Baryte \n", + "3 Hardystonite with calcite and leucophoenicite \n", + "4 Chondrodite \n", + "... ... \n", + "995644 Migmatic gneiss with quartz, biotite, feldspar... \n", + "995645 Migmatic gneiss with quartz, biotite, feldspar... \n", + "995646 Fine grained biotite-quartz-feldspar gneiss \n", + "995647 gneiss \n", + "995648 gneiss \n", + "\n", + " description_supplementMetadata_primaryLocationType \\\n", + "0 Mine \n", + "1 Mine \n", + "2 Mine \n", + "3 Mining District \n", + "4 Mine \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_collectionMethodDescr \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_primaryLocationName igsn \\\n", + "0 Sterling Hill Mine NHB002GWT \n", + "1 Franklin Mine NHB002GWU \n", + "2 Franklin Mine NHB002GWV \n", + "3 Franklin Mining District NHB002GWW \n", + "4 Sterling Hill Mine NHB002GWZ \n", + "... ... ... \n", + "995644 Mitchell Peak, West Fosdick Mountains, Antarctica MBL00001R \n", + "995645 Mitchell Peak, West Fosdick Mountains, Antarctica MBL00001R \n", + "995646 Ben Burton Park UGA000001 \n", + "995647 Salamander Range, north Victoria Land, Antarctica LTR000001 \n", + "995648 Salamander Range, north Victoria Land, Antarctica LTR000001 \n", + "\n", + " description_supplementMetadata_geologicalUnit \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_localityDescription \\\n", + "0 Ogdensburg \n", + "1 Franklin \n", + "2 Franklin \n", + "3 Franklin \n", + "4 Ogdensburg \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_originalArchive \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 Dept of Geology, Colorado College, Colorado Sp... \n", + "995645 Dept of Geology, Colorado College, Colorado Sp... \n", + "995646 Department of Geology, Bryn Mawr College, Bryn... \n", + "995647 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "\n", + " description_supplementMetadata_platformDescr \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_collectionMethod \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 Manual \n", + "995645 Manual \n", + "995646 Manual \n", + "995647 Manual \n", + "995648 Manual \n", + "\n", + " description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " description_supplementMetadata_publicationUrl_description \\\n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "2 Smithsonian collections record for NMNH C6295-... \n", + "3 Smithsonian collections record for NMNH C6296-... \n", + "4 Smithsonian collections record for NMNH C6299-... \n", + "... ... \n", + "995644 \n", + "995645 \n", + "995646 \n", + "995647 \n", + "995648 \n", + "\n", + " original_high_label \n", + "0 mat:mineral \n", + "1 mat:mineral \n", + "2 mat:mineral \n", + "3 mat:mineral \n", + "4 mat:mineral \n", + "... ... \n", + "995644 mat:rock \n", + "995645 mat:rock \n", + "995646 mat:rock \n", + "995647 mat:rock \n", + "995648 mat:rock \n", + "\n", + "[995649 rows x 28 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for _, row in df.iterrows():\n", + " if row['igsn'] in label_map:\n", + " row['description_material'] = \"/\".join(label_map.get(row['igsn']))\n", + "df['description_material'] = df['igsn'].map(lambda x: \"/\".join(label_map.get(x, [])))\n", + "df # note : there are some rows from original dump that annotation does not exist " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "55f28fdd", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.drop('igsn', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "89a065d0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'',\n", + " 'mat:anthropogenicmetal',\n", + " 'mat:biogenicnonorganicmaterial',\n", + " 'mat:gas',\n", + " 'mat:liquidwater',\n", + " 'mat:material',\n", + " 'mat:mineral',\n", + " 'mat:mixedsoilsedimentrock',\n", + " 'mat:organicmaterial',\n", + " 'mat:otheranthropogenicmaterial',\n", + " 'mat:particulate',\n", + " 'mat:rock',\n", + " 'mat:rockorsediment',\n", + " 'mat:sediment',\n", + " 'mat:soil',\n", + " 'ming:boratemineral',\n", + " 'ming:carbonatenitratemineral',\n", + " 'ming:halidemineral',\n", + " 'ming:nativeelementmineral',\n", + " 'ming:organicmineral',\n", + " 'ming:oxidemineral',\n", + " 'ming:phosphatearsenatevanadatemineral',\n", + " 'ming:silicategermanatemineral',\n", + " 'ming:sulfateselenatetelluratemineral',\n", + " 'ming:sulfidesulfosaltmineral',\n", + " 'ocmat:ceramicclay',\n", + " 'ocmat:charcoal',\n", + " 'ocmat:glass',\n", + " 'rksd:Acidic Igneous Rock',\n", + " 'rksd:Alkali Feldspar Granite',\n", + " 'rksd:Andesite',\n", + " 'rksd:Andesite/rksd:Diamicton',\n", + " 'rksd:Anorthositic Rock',\n", + " 'rksd:Aphanite',\n", + " 'rksd:Basalt',\n", + " 'rksd:Basalt/rksd:Chemical Sedimentary Material',\n", + " 'rksd:Basalt/rksd:Glass Rich Igneous Rock',\n", + " 'rksd:Basic Igneous Rock',\n", + " 'rksd:Biogenic Sediment',\n", + " 'rksd:Breccia',\n", + " 'rksd:Breccia Gouge Series',\n", + " 'rksd:Carbonate Sediment',\n", + " 'rksd:Carbonate Sedimentary Rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Clastic Sedimentary Rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Generic Mudstone',\n", + " 'rksd:Cataclasite Series',\n", + " 'rksd:Chemical Sedimentary Material',\n", + " 'rksd:Clastic Sediment',\n", + " 'rksd:Clastic Sedimentary Rock',\n", + " 'rksd:Clastic Sedimentary Rock/rksd:Coal',\n", + " 'rksd:Clastic sediment',\n", + " 'rksd:Coal',\n", + " 'rksd:Coal/ming:phosphatearsenatevanadatemineral',\n", + " 'rksd:Dacite',\n", + " 'rksd:Diamictite',\n", + " 'rksd:Diamicton',\n", + " 'rksd:Diamicton/rksd:Dacite',\n", + " 'rksd:Dioritoid',\n", + " 'rksd:Doleritic Rock',\n", + " 'rksd:Exotic Composition Igneous Rock',\n", + " 'rksd:Fault Related Material',\n", + " 'rksd:Fine Grained Igneous Rock',\n", + " 'rksd:Foid Gabbroid',\n", + " 'rksd:Foid Syenitoid',\n", + " 'rksd:Foiditoid',\n", + " 'rksd:Foidolite',\n", + " 'rksd:Fragmental Igneous Rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Doleritic Rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Fine Grained Igneous Rock',\n", + " 'rksd:Gabbroic Rock',\n", + " 'rksd:Gabbroid',\n", + " 'rksd:Generic Conglomerate',\n", + " 'rksd:Generic Mudstone',\n", + " 'rksd:Generic Mudstone/rksd:Biogenic Sediment',\n", + " 'rksd:Generic Mudstone/rksd:Coal',\n", + " 'rksd:Generic Sandstone',\n", + " 'rksd:Generic Sandstone/rksd:Coal',\n", + " 'rksd:Generic Sandstone/rksd:Generic Mudstone',\n", + " 'rksd:Generic Sandstone/rksd:Organic Rich Sedimentary Rock',\n", + " 'rksd:Generic Sandstone/rksd:Tuffite',\n", + " 'rksd:Glass Rich Igneous Rock',\n", + " 'rksd:Granite',\n", + " 'rksd:Granitoid',\n", + " 'rksd:Granodiorite',\n", + " 'rksd:Gravel Size Sediment',\n", + " 'rksd:High Magnesium Fine Grained Igneous Rock',\n", + " 'rksd:Hornblendite',\n", + " 'rksd:Hybrid Sedimentary Rock',\n", + " 'rksd:Igneous Rock',\n", + " 'rksd:Impact Generated Material',\n", + " 'rksd:Iron Rich Sedimentary Rock',\n", + " 'rksd:Massive Sulphide',\n", + " 'rksd:Metamorphic Rock',\n", + " 'rksd:Metamorphic Rock/rksd:Basic Igneous Rock',\n", + " 'rksd:Metamorphic Rock/rksd:Dioritoid',\n", + " 'rksd:Metamorphic Rock/rksd:Fine Grained Igneous Rock',\n", + " 'rksd:Metamorphic Rock/rksd:Gabbroid',\n", + " 'rksd:Metamorphic Rock/rksd:Generic Mudstone',\n", + " 'rksd:Metasomatic Rock',\n", + " 'rksd:Metasomatic Rock/rksd:Generic Mudstone',\n", + " 'rksd:Metasomatic Rock/rksd:Peridotite',\n", + " 'rksd:Metasomatic Rock/rksd:Ultramafic Igneous Rock',\n", + " 'rksd:Mud Size Sediment',\n", + " 'rksd:Mylonitic Rock',\n", + " 'rksd:Non Clastic Siliceous Sediment',\n", + " 'rksd:Non Clastic Siliceous Sedimentary Rock',\n", + " 'rksd:Organic Rich Sedimentary Rock',\n", + " 'rksd:Pegmatite',\n", + " 'rksd:Peridotite',\n", + " 'rksd:Phaneritic Igneous Rock',\n", + " 'rksd:Phaneritic Igneous Rock/rksd:Pyroclastic Rock',\n", + " 'rksd:Phonolitoid',\n", + " 'rksd:Porphyry',\n", + " 'rksd:Pyroclastic Rock',\n", + " 'rksd:Pyroclastic Rock/rksd:Basalt',\n", + " 'rksd:Pyroclastic Rock/rksd:Rhyolitoid',\n", + " 'rksd:Pyroxenite',\n", + " 'rksd:Quartz Rich Igneous Rock',\n", + " 'rksd:Rhyolitoid',\n", + " 'rksd:Rhyolitoid/rksd:Glass Rich Igneous Rock',\n", + " 'rksd:Sand Size Sediment',\n", + " 'rksd:Sedimentary Rock',\n", + " 'rksd:Syenitoid',\n", + " 'rksd:Tephra',\n", + " 'rksd:Tephritoid',\n", + " 'rksd:Tephritoid/rksd:Pyroclastic Rock',\n", + " 'rksd:Tonalite',\n", + " 'rksd:Trachytoid',\n", + " 'rksd:Trachytoid/rksd:Pyroclastic Rock',\n", + " 'rksd:Tuffite',\n", + " 'rksd:Ultramafic Igneous Rock',\n", + " 'rksd:andesite',\n", + " 'rksd:anorthositic rock',\n", + " 'rksd:basalt',\n", + " 'rksd:coal',\n", + " 'rksd:residual material',\n", + " 'xxx'}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(df['description_material'].values.tolist())" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "04a88924", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(995649, 27)\n", + "(987553, 27)\n" + ] + }, + { + "data": { + "text/plain": [ + "{'mat:anthropogenicmetal',\n", + " 'mat:biogenicnonorganicmaterial',\n", + " 'mat:gas',\n", + " 'mat:liquidwater',\n", + " 'mat:material',\n", + " 'mat:mineral',\n", + " 'mat:mixedsoilsedimentrock',\n", + " 'mat:organicmaterial',\n", + " 'mat:otheranthropogenicmaterial',\n", + " 'mat:particulate',\n", + " 'mat:rock',\n", + " 'mat:rockorsediment',\n", + " 'mat:sediment',\n", + " 'mat:soil',\n", + " 'ming:boratemineral',\n", + " 'ming:carbonatenitratemineral',\n", + " 'ming:halidemineral',\n", + " 'ming:nativeelementmineral',\n", + " 'ming:organicmineral',\n", + " 'ming:oxidemineral',\n", + " 'ming:phosphatearsenatevanadatemineral',\n", + " 'ming:silicategermanatemineral',\n", + " 'ming:sulfateselenatetelluratemineral',\n", + " 'ming:sulfidesulfosaltmineral',\n", + " 'ocmat:ceramicclay',\n", + " 'ocmat:charcoal',\n", + " 'ocmat:glass',\n", + " 'rksd:Acidic Igneous Rock',\n", + " 'rksd:Alkali Feldspar Granite',\n", + " 'rksd:Andesite',\n", + " 'rksd:Andesite/rksd:Diamicton',\n", + " 'rksd:Anorthositic Rock',\n", + " 'rksd:Aphanite',\n", + " 'rksd:Basalt',\n", + " 'rksd:Basalt/rksd:Chemical Sedimentary Material',\n", + " 'rksd:Basalt/rksd:Glass Rich Igneous Rock',\n", + " 'rksd:Basic Igneous Rock',\n", + " 'rksd:Biogenic Sediment',\n", + " 'rksd:Breccia',\n", + " 'rksd:Breccia Gouge Series',\n", + " 'rksd:Carbonate Sediment',\n", + " 'rksd:Carbonate Sedimentary Rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Clastic Sedimentary Rock',\n", + " 'rksd:Carbonate Sedimentary Rock/rksd:Generic Mudstone',\n", + " 'rksd:Cataclasite Series',\n", + " 'rksd:Chemical Sedimentary Material',\n", + " 'rksd:Clastic Sediment',\n", + " 'rksd:Clastic Sedimentary Rock',\n", + " 'rksd:Clastic Sedimentary Rock/rksd:Coal',\n", + " 'rksd:Clastic sediment',\n", + " 'rksd:Coal',\n", + " 'rksd:Coal/ming:phosphatearsenatevanadatemineral',\n", + " 'rksd:Dacite',\n", + " 'rksd:Diamictite',\n", + " 'rksd:Diamicton',\n", + " 'rksd:Diamicton/rksd:Dacite',\n", + " 'rksd:Dioritoid',\n", + " 'rksd:Doleritic Rock',\n", + " 'rksd:Exotic Composition Igneous Rock',\n", + " 'rksd:Fault Related Material',\n", + " 'rksd:Fine Grained Igneous Rock',\n", + " 'rksd:Foid Gabbroid',\n", + " 'rksd:Foid Syenitoid',\n", + " 'rksd:Foiditoid',\n", + " 'rksd:Foidolite',\n", + " 'rksd:Fragmental Igneous Rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Doleritic Rock',\n", + " 'rksd:Fragmental Igneous Rock/rksd:Fine Grained Igneous Rock',\n", + " 'rksd:Gabbroic Rock',\n", + " 'rksd:Gabbroid',\n", + " 'rksd:Generic Conglomerate',\n", + " 'rksd:Generic Mudstone',\n", + " 'rksd:Generic Mudstone/rksd:Biogenic Sediment',\n", + " 'rksd:Generic Mudstone/rksd:Coal',\n", + " 'rksd:Generic Sandstone',\n", + " 'rksd:Generic Sandstone/rksd:Coal',\n", + " 'rksd:Generic Sandstone/rksd:Generic Mudstone',\n", + " 'rksd:Generic Sandstone/rksd:Organic Rich Sedimentary Rock',\n", + " 'rksd:Generic Sandstone/rksd:Tuffite',\n", + " 'rksd:Glass Rich Igneous Rock',\n", + " 'rksd:Granite',\n", + " 'rksd:Granitoid',\n", + " 'rksd:Granodiorite',\n", + " 'rksd:Gravel Size Sediment',\n", + " 'rksd:High Magnesium Fine Grained Igneous Rock',\n", + " 'rksd:Hornblendite',\n", + " 'rksd:Hybrid Sedimentary Rock',\n", + " 'rksd:Igneous Rock',\n", + " 'rksd:Impact Generated Material',\n", + " 'rksd:Iron Rich Sedimentary Rock',\n", + " 'rksd:Massive Sulphide',\n", + " 'rksd:Metamorphic Rock',\n", + " 'rksd:Metamorphic Rock/rksd:Basic Igneous Rock',\n", + " 'rksd:Metamorphic Rock/rksd:Dioritoid',\n", + " 'rksd:Metamorphic Rock/rksd:Fine Grained Igneous Rock',\n", + " 'rksd:Metamorphic Rock/rksd:Gabbroid',\n", + " 'rksd:Metamorphic Rock/rksd:Generic Mudstone',\n", + " 'rksd:Metasomatic Rock',\n", + " 'rksd:Metasomatic Rock/rksd:Generic Mudstone',\n", + " 'rksd:Metasomatic Rock/rksd:Peridotite',\n", + " 'rksd:Metasomatic Rock/rksd:Ultramafic Igneous Rock',\n", + " 'rksd:Mud Size Sediment',\n", + " 'rksd:Mylonitic Rock',\n", + " 'rksd:Non Clastic Siliceous Sediment',\n", + " 'rksd:Non Clastic Siliceous Sedimentary Rock',\n", + " 'rksd:Organic Rich Sedimentary Rock',\n", + " 'rksd:Pegmatite',\n", + " 'rksd:Peridotite',\n", + " 'rksd:Phaneritic Igneous Rock',\n", + " 'rksd:Phaneritic Igneous Rock/rksd:Pyroclastic Rock',\n", + " 'rksd:Phonolitoid',\n", + " 'rksd:Porphyry',\n", + " 'rksd:Pyroclastic Rock',\n", + " 'rksd:Pyroclastic Rock/rksd:Basalt',\n", + " 'rksd:Pyroclastic Rock/rksd:Rhyolitoid',\n", + " 'rksd:Pyroxenite',\n", + " 'rksd:Quartz Rich Igneous Rock',\n", + " 'rksd:Rhyolitoid',\n", + " 'rksd:Rhyolitoid/rksd:Glass Rich Igneous Rock',\n", + " 'rksd:Sand Size Sediment',\n", + " 'rksd:Sedimentary Rock',\n", + " 'rksd:Syenitoid',\n", + " 'rksd:Tephra',\n", + " 'rksd:Tephritoid',\n", + " 'rksd:Tephritoid/rksd:Pyroclastic Rock',\n", + " 'rksd:Tonalite',\n", + " 'rksd:Trachytoid',\n", + " 'rksd:Trachytoid/rksd:Pyroclastic Rock',\n", + " 'rksd:Tuffite',\n", + " 'rksd:Ultramafic Igneous Rock',\n", + " 'rksd:andesite',\n", + " 'rksd:anorthositic rock',\n", + " 'rksd:basalt',\n", + " 'rksd:coal',\n", + " 'rksd:residual material'}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(df.shape)\n", + "df = df[df['description_material'].notna()]\n", + "df = df[(df['description_material'] != '') & (df['description_material'] != 'xxx')] # drop invalid annotations\n", + "print(df.shape)\n", + "set(df['description_material'].values.tolist())" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "17cd0182", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(987553, 27)\n" + ] + } + ], + "source": [ + "df = df[(df['original_high_label'] != '')]\n", + "print(df.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "7efeefc5", + "metadata": {}, + "outputs": [], + "source": [ + "# store the CV labeled version\n", + "\n", + "df.to_csv(\"./datasets/SESAR_CV_labeled.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "d451042e", + "metadata": {}, + "source": [ + "## Convert Material Type to Multi Label" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "c7736c4d", + "metadata": {}, + "outputs": [], + "source": [ + "# df = pd.read_csv(\"./datasets/SESAR_CV_labeled.csv\")\n", + "# print(df.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "886790b9", + "metadata": {}, + "outputs": [], + "source": [ + "# map description_material to parent multi labels\n", + "import json\n", + "with open('./datasets/multilabel_mapping.json') as f:\n", + "#with open('./datasets/multilabel_mapping_wo_leaf.json') as f:\n", + " multilabel_mapping = json.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "907b3dac", + "metadata": {}, + "outputs": [], + "source": [ + "df['description_material'] = df['description_material'].map(lambda x: multilabel_mapping.get(x,x))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "2c0ba9e7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(972182, 27)\n" + ] + } + ], + "source": [ + "df = df[(df['description_material'] != '')]\n", + "print(df.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "a63b817f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['sediment/igneous rock/natural solid material/high magnesium fine grained igneous rock/fine grained igneous rock/rock',\n", + " 'sediment/coal/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'anthropogenic metal material/any anthropogenic material',\n", + " 'clastic sedimentary rock/sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'phaneritic igneous rock/quartz rich igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'sediment/natural solid material/non clastic siliceous sediment/rock',\n", + " 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n", + " 'sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'anthropogenic material/ceramic clay/any anthropogenic material',\n", + " 'sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock',\n", + " 'sediment/igneous rock/natural solid material/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'sediment/tuffite/natural solid material/rock',\n", + " 'sediment/breccia/natural solid material/rock',\n", + " 'phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock',\n", + " 'clastic sediment/sediment/natural solid material/rock',\n", + " 'mineral-organic compound/natural solid material/mineral',\n", + " 'sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock',\n", + " 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'gabbroic rock/gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'sediment/generic mudstone/natural solid material/metasomatic rock/sedimentary rock/rock',\n", + " 'sediment/mylonitic rock/fault related material/natural solid material/rock',\n", + " 'sediment/igneous rock/natural solid material/porphyry/rock',\n", + " 'sediment/impact generated material/natural solid material/rock',\n", + " 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'sediment/generic mudstone/natural solid material/biogenic sediment/sedimentary rock/rock',\n", + " 'natural solid material/mineral/mineral-oxide',\n", + " 'sediment/natural solid material/metasomatic rock/rock',\n", + " 'sediment/igneous rock/natural solid material/pyroclastic rock/trachytoid/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'tephritoid/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'sediment/generic mudstone/natural solid material/generic sandstone/sedimentary rock/rock',\n", + " 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'basic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock',\n", + " 'sediment/igneous rock/rhyolitoid/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n", + " 'sediment/fault related material/natural solid material/breccia gouge series/rock',\n", + " 'fluid material/gaseous material',\n", + " 'glass/anthropogenic material/any anthropogenic material',\n", + " 'sediment/natural solid material/chemical sedimentary material/rock',\n", + " 'sediment/tuffite/natural solid material/generic sandstone/sedimentary rock/rock',\n", + " 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n", + " 'soil/sediment/natural solid material/rock',\n", + " 'sediment/natural solid material/biogenic sediment/rock',\n", + " 'phaneritic igneous rock/sediment/igneous rock/natural solid material/pegmatite/rock',\n", + " 'clastic sedimentary rock/sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n", + " 'sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/peridotite/rock',\n", + " 'andesite/clastic sediment/sediment/igneous rock/natural solid material/intermediate composition igneous rock/diamicton/rock',\n", + " 'sediment/fault related material/natural solid material/cataclasite series/rock',\n", + " 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n", + " 'basalt/sediment/igneous rock/natural solid material/glass rich igneous rock/fine grained igneous rock/rock',\n", + " 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/alkali feldspar granite/rock',\n", + " 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n", + " 'sediment/natural solid material/sedimentary rock/hybrid sedimentary rock/rock',\n", + " 'sediment/generic mudstone/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n", + " 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n", + " 'natural solid material/mineral/mineral-silicate or germanate',\n", + " 'sediment/igneous rock/natural solid material/rock',\n", + " 'liquid water/fluid material',\n", + " 'sediment/igneous rock/rhyolitoid/natural solid material/glass rich igneous rock/fine grained igneous rock/rock',\n", + " 'sediment/natural solid material/residual material/rock',\n", + " 'sediment/generic mudstone/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/granite/rock',\n", + " 'phaneritic igneous rock/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n", + " 'sediment/natural solid material/generic sandstone/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'sediment/igneous rock/natural solid material/metamorphic rock/fine grained igneous rock/rock',\n", + " 'natural solid material/soil',\n", + " 'sediment/mud size sediment/natural solid material/rock',\n", + " 'sediment/coal/natural solid material/mineral-phosphate, arsenate, or vanadate/organic rich sedimentary rock/sedimentary rock/mineral/rock',\n", + " 'sediment/igneous rock/natural solid material/hornblendite/ultramafic igneous rock/rock',\n", + " 'natural solid material/mineral-carbonate or nitrate/mineral',\n", + " 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n", + " 'sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock',\n", + " 'sediment/fault related material/natural solid material/rock',\n", + " 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n", + " 'sediment/natural solid material/aphanite/rock',\n", + " 'tephritoid/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'sediment/igneous rock/acidic igneous rock/natural solid material/rock',\n", + " 'phaneritic igneous rock/sediment/igneous rock/natural solid material/foid gabbroid/rock',\n", + " 'natural solid material/mineral-halide/mineral',\n", + " 'particulate/natural solid material',\n", + " 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock',\n", + " 'clastic sediment/sediment/igneous rock/natural solid material/rock/fine grained igneous rock/diamicton/dacite',\n", + " 'sediment/natural solid material/carbonate sediment/rock',\n", + " 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n", + " 'natural solid material/mineral',\n", + " 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n", + " 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n", + " 'sediment/natural solid material/sand size sediment/rock',\n", + " 'mineral-borate/natural solid material/mineral',\n", + " 'sediment/generic mudstone/natural solid material/metamorphic rock/sedimentary rock/rock',\n", + " 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n", + " 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n", + " 'natural solid material/sediment/rock',\n", + " 'sediment/natural solid material/gravel size sediment/rock',\n", + " 'sediment/massive sulphide/natural solid material/rock',\n", + " 'basic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n", + " 'sediment/natural solid material/metamorphic rock/rock',\n", + " 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock',\n", + " 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n", + " 'basalt/sediment/igneous rock/natural solid material/chemical sedimentary material/fine grained igneous rock/rock',\n", + " 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n", + " 'anthropogenic material/any anthropogenic material',\n", + " 'biogenic non-organic material/charcoal',\n", + " 'sediment/igneous rock/natural solid material/ultramafic igneous rock/metasomatic rock/rock',\n", + " 'organic material',\n", + " 'basalt/sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/fine grained igneous rock/rock',\n", + " 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/metamorphic rock/rock',\n", + " 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n", + " 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n", + " 'sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n", + " 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n", + " 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n", + " 'sediment/natural solid material/tephra/rock',\n", + " 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n", + " 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n", + " 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n", + " 'biogenic non-organic material',\n", + " 'clastic sediment/sediment/natural solid material/diamicton/rock',\n", + " 'sediment/igneous rock/natural solid material/fragmental igneous rock/doleritic rock/rock',\n", + " 'sediment/natural solid material/sedimentary rock/rock',\n", + " 'phaneritic igneous rock/sediment/igneous rock/natural solid material/foid syenitoid/rock',\n", + " 'sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock',\n", + " 'natural solid material/mineral/mineral-native element',\n", + " 'sediment/igneous rock/natural solid material/fragmental igneous rock/rock',\n", + " 'phaneritic igneous rock/sediment/igneous rock/foidolite/natural solid material/rock']" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gold_labels = set(df['description_material'].values.tolist())\n", + "gold_labels = list(gold_labels)\n", + "gold_labels" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "925e19db", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "101, {'foid syenitoid', 'mineral-carbonate or nitrate', 'tuffite', 'sedimentary rock', 'gabbroid', 'carbonate sediment', 'mylonitic rock', 'sediment', 'peridotite', 'porphyry', 'glass rich igneous rock', 'carbonate sedimentary rock', 'mineral-borate', 'gaseous material', 'liquid water', 'mineral-halide', 'fluid material', 'mineral-silicate or germanate', 'mud size sediment', 'alkali feldspar granite', 'granitoid', 'pyroclastic rock', 'intermediate composition igneous rock', 'tonalite', 'chemical sedimentary material', 'any anthropogenic material', 'fine grained igneous rock', 'gravel size sediment', 'cataclasite series', 'biogenic sediment', 'gabbroic rock', 'residual material', 'phaneritic igneous rock', 'rhyolitoid', 'tephritoid', 'dioritoid', 'coal', 'ceramic clay', 'mineral-sulfate, selenate, or tellurate', 'mineral-phosphate, arsenate, or vanadate', 'basalt', 'syenitoid', 'fault related material', 'phonolitoid', 'impact generated material', 'andesite', 'diamictite', 'mineral-native element', 'mineral-organic compound', 'ultramafic igneous rock', 'hybrid sedimentary rock', 'natural solid material', 'granite', 'foiditoid', 'metamorphic rock', 'metasomatic rock', 'granodiorite', 'high magnesium fine grained igneous rock', 'particulate', 'anthropogenic metal material', 'breccia', 'massive sulphide', 'pyroxenite', 'rock', 'quartz rich igneous rock', 'clastic sediment', 'generic sandstone', 'iron rich sedimentary rock', 'exotic composition igneous rock', 'organic material', 'trachytoid', 'basic igneous rock', 'glass', 'breccia gouge series', 'non clastic siliceous sediment', 'pegmatite', 'generic mudstone', 'acidic igneous rock', 'mineral-oxide', 'hornblendite', 'mineral', 'anorthositic rock', 'mineral-sulfide or sulfosalt', 'foid gabbroid', 'igneous rock', 'tephra', 'diamicton', 'dacite', 'biogenic non-organic material', 'organic rich sedimentary rock', 'non clastic siliceous sedimentary rock', 'doleritic rock', 'fragmental igneous rock', 'soil', 'aphanite', 'anthropogenic material', 'generic conglomerate', 'foidolite', 'clastic sedimentary rock', 'charcoal', 'sand size sediment'}\n" + ] + } + ], + "source": [ + "# store entire label space as file\n", + "unique = set()\n", + "for label in gold_labels:\n", + " splitted = label.split(\"/\")\n", + " for split in splitted:\n", + " unique.add(split)\n", + "print(f\"{len(unique)}, {unique}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "65102c15", + "metadata": {}, + "outputs": [], + "source": [ + "with open('total_unique_multi_labels.txt', 'w') as f:\n", + "#with open('total_unique_multi_labels_wo_leaf.txt', 'w') as f:\n", + " for line in unique:\n", + " if line != '':\n", + " f.write(f\"{line}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "c16dce79", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(3204978, 29)\n" + ] + } + ], + "source": [ + "# Split the \"label\" column on \"/\"\n", + "df['label_list'] = df['description_material'].copy()\n", + "df['splitted'] = df['description_material'].str.split('/')\n", + "\n", + "# Create a list to store new rows\n", + "new_rows = []\n", + "\n", + "# Iterate through each row and split to create a new row\n", + "for _, row in df.iterrows():\n", + " categories = row['splitted']\n", + " for category in categories:\n", + " # Create a new row with the same values as the original row\n", + " new_row = row.copy()\n", + " # Set the \"label\" column to the current category\n", + " new_row['description_material'] = category\n", + " # Append the new row to the list\n", + " new_rows.append(new_row)\n", + "\n", + "# Create a new DataFrame from the list of new rows\n", + "new_df = pd.DataFrame(new_rows)\n", + "new_df\n", + "print(new_df.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "38e515bd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
description_supplementMetadata_localitydescription_materialdescription_supplementMetadata_countrydescription_supplementMetadata_provincedescription_sampleTypedescription_supplementMetadata_platformTypedescription_supplementMetadata_geologicalAgedescription_supplementMetadata_locationDescriptiondescription_supplementMetadata_purposedescription_supplementMetadata_countydescription_descriptiondescription_supplementMetadata_classificationCommentdescription_supplementMetadata_currentArchivedescription_supplementMetadata_citydescription_supplementMetadata_sampleCommentdescription_supplementMetadata_fieldNamedescription_supplementMetadata_primaryLocationTypedescription_collectionMethodDescrdescription_supplementMetadata_primaryLocationNamedescription_supplementMetadata_geologicalUnitdescription_supplementMetadata_localityDescriptiondescription_supplementMetadata_originalArchivedescription_supplementMetadata_platformDescrdescription_collectionMethoddescription_supplementMetadata_cruiseFieldPrgrmdescription_supplementMetadata_publicationUrl_descriptionoriginal_high_labellabel_listsplitted
0natural solid materialUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...MooreiteMineSterling Hill MineOgdensburgSmithsonian collections record for NMNH C6294-...mat:mineralnatural solid material/mineral-sulfate, selena...[natural solid material, mineral-sulfate, sele...
0mineral-sulfate, selenate, or tellurateUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...MooreiteMineSterling Hill MineOgdensburgSmithsonian collections record for NMNH C6294-...mat:mineralnatural solid material/mineral-sulfate, selena...[natural solid material, mineral-sulfate, sele...
0mineralUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...MooreiteMineSterling Hill MineOgdensburgSmithsonian collections record for NMNH C6294-...mat:mineralnatural solid material/mineral-sulfate, selena...[natural solid material, mineral-sulfate, sele...
1natural solid materialUnited StatesNew Jerseya sample that is an individual unit, including...Matched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineFranklinSmithsonian collections record for NMNH C6295-...mat:mineralnatural solid material/mineral-sulfate, selena...[natural solid material, mineral-sulfate, sele...
1mineral-sulfate, selenate, or tellurateUnited StatesNew Jerseya sample that is an individual unit, including...Matched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineFranklinSmithsonian collections record for NMNH C6295-...mat:mineralnatural solid material/mineral-sulfate, selena...[natural solid material, mineral-sulfate, sele...
..........................................................................................
995647rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...[sediment, natural solid material, metamorphic...
995648sedimenta sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...[sediment, natural solid material, metamorphic...
995648natural solid materiala sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...[sediment, natural solid material, metamorphic...
995648metamorphic rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...[sediment, natural solid material, metamorphic...
995648rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...[sediment, natural solid material, metamorphic...
\n", + "

3204978 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " description_supplementMetadata_locality \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_material \\\n", + "0 natural solid material \n", + "0 mineral-sulfate, selenate, or tellurate \n", + "0 mineral \n", + "1 natural solid material \n", + "1 mineral-sulfate, selenate, or tellurate \n", + "... ... \n", + "995647 rock \n", + "995648 sediment \n", + "995648 natural solid material \n", + "995648 metamorphic rock \n", + "995648 rock \n", + "\n", + " description_supplementMetadata_country \\\n", + "0 United States \n", + "0 United States \n", + "0 United States \n", + "1 United States \n", + "1 United States \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_province \\\n", + "0 New Jersey \n", + "0 New Jersey \n", + "0 New Jersey \n", + "1 New Jersey \n", + "1 New Jersey \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_sampleType \\\n", + "0 a sample that is an individual unit, including... \n", + "0 a sample that is an individual unit, including... \n", + "0 a sample that is an individual unit, including... \n", + "1 a sample that is an individual unit, including... \n", + "1 a sample that is an individual unit, including... \n", + "... ... \n", + "995647 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "\n", + " description_supplementMetadata_platformType \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_geologicalAge \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_locationDescription \\\n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "1 Matched to the GeoNames record for Franklin, S... \n", + "1 Matched to the GeoNames record for Franklin, S... \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_purpose \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_county description_description \\\n", + "0 Sussex Co. \n", + "0 Sussex Co. \n", + "0 Sussex Co. \n", + "1 Sussex Co. \n", + "1 Sussex Co. \n", + "... ... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_classificationComment \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_currentArchive \\\n", + "0 National Mineral Collection, Smithsonian Insti... \n", + "0 National Mineral Collection, Smithsonian Insti... \n", + "0 National Mineral Collection, Smithsonian Insti... \n", + "1 National Mineral Collection, Smithsonian Insti... \n", + "1 National Mineral Collection, Smithsonian Insti... \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_city \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_sampleComment \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_fieldName \\\n", + "0 Mooreite \n", + "0 Mooreite \n", + "0 Mooreite \n", + "1 Baryte \n", + "1 Baryte \n", + "... ... \n", + "995647 gneiss \n", + "995648 gneiss \n", + "995648 gneiss \n", + "995648 gneiss \n", + "995648 gneiss \n", + "\n", + " description_supplementMetadata_primaryLocationType \\\n", + "0 Mine \n", + "0 Mine \n", + "0 Mine \n", + "1 Mine \n", + "1 Mine \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_collectionMethodDescr \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_primaryLocationName \\\n", + "0 Sterling Hill Mine \n", + "0 Sterling Hill Mine \n", + "0 Sterling Hill Mine \n", + "1 Franklin Mine \n", + "1 Franklin Mine \n", + "... ... \n", + "995647 Salamander Range, north Victoria Land, Antarctica \n", + "995648 Salamander Range, north Victoria Land, Antarctica \n", + "995648 Salamander Range, north Victoria Land, Antarctica \n", + "995648 Salamander Range, north Victoria Land, Antarctica \n", + "995648 Salamander Range, north Victoria Land, Antarctica \n", + "\n", + " description_supplementMetadata_geologicalUnit \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_localityDescription \\\n", + "0 Ogdensburg \n", + "0 Ogdensburg \n", + "0 Ogdensburg \n", + "1 Franklin \n", + "1 Franklin \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_originalArchive \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "\n", + " description_supplementMetadata_platformDescr \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_collectionMethod \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 Manual \n", + "995648 Manual \n", + "995648 Manual \n", + "995648 Manual \n", + "995648 Manual \n", + "\n", + " description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_publicationUrl_description \\\n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " original_high_label label_list \\\n", + "0 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "0 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "0 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "1 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "1 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "... ... ... \n", + "995647 mat:rock sediment/natural solid material/metamorphic ro... \n", + "995648 mat:rock sediment/natural solid material/metamorphic ro... \n", + "995648 mat:rock sediment/natural solid material/metamorphic ro... \n", + "995648 mat:rock sediment/natural solid material/metamorphic ro... \n", + "995648 mat:rock sediment/natural solid material/metamorphic ro... \n", + "\n", + " splitted \n", + "0 [natural solid material, mineral-sulfate, sele... \n", + "0 [natural solid material, mineral-sulfate, sele... \n", + "0 [natural solid material, mineral-sulfate, sele... \n", + "1 [natural solid material, mineral-sulfate, sele... \n", + "1 [natural solid material, mineral-sulfate, sele... \n", + "... ... \n", + "995647 [sediment, natural solid material, metamorphic... \n", + "995648 [sediment, natural solid material, metamorphic... \n", + "995648 [sediment, natural solid material, metamorphic... \n", + "995648 [sediment, natural solid material, metamorphic... \n", + "995648 [sediment, natural solid material, metamorphic... \n", + "\n", + "[3204978 rows x 29 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "99767810", + "metadata": {}, + "outputs": [], + "source": [ + "df = new_df.drop('splitted', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "5917f29a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
description_supplementMetadata_localitydescription_materialdescription_supplementMetadata_countrydescription_supplementMetadata_provincedescription_sampleTypedescription_supplementMetadata_platformTypedescription_supplementMetadata_geologicalAgedescription_supplementMetadata_locationDescriptiondescription_supplementMetadata_purposedescription_supplementMetadata_countydescription_descriptiondescription_supplementMetadata_classificationCommentdescription_supplementMetadata_currentArchivedescription_supplementMetadata_citydescription_supplementMetadata_sampleCommentdescription_supplementMetadata_fieldNamedescription_supplementMetadata_primaryLocationTypedescription_collectionMethodDescrdescription_supplementMetadata_primaryLocationNamedescription_supplementMetadata_geologicalUnitdescription_supplementMetadata_localityDescriptiondescription_supplementMetadata_originalArchivedescription_supplementMetadata_platformDescrdescription_collectionMethoddescription_supplementMetadata_cruiseFieldPrgrmdescription_supplementMetadata_publicationUrl_descriptionoriginal_high_labellabel_list
0natural solid materialUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...MooreiteMineSterling Hill MineOgdensburgSmithsonian collections record for NMNH C6294-...mat:mineralnatural solid material/mineral-sulfate, selena...
0mineral-sulfate, selenate, or tellurateUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...MooreiteMineSterling Hill MineOgdensburgSmithsonian collections record for NMNH C6294-...mat:mineralnatural solid material/mineral-sulfate, selena...
0mineralUnited StatesNew Jerseya sample that is an individual unit, including...Coordinates for Sterling Hill Mine (MRDS ID: W...Sussex Co.National Mineral Collection, Smithsonian Insti...MooreiteMineSterling Hill MineOgdensburgSmithsonian collections record for NMNH C6294-...mat:mineralnatural solid material/mineral-sulfate, selena...
1natural solid materialUnited StatesNew Jerseya sample that is an individual unit, including...Matched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineFranklinSmithsonian collections record for NMNH C6295-...mat:mineralnatural solid material/mineral-sulfate, selena...
1mineral-sulfate, selenate, or tellurateUnited StatesNew Jerseya sample that is an individual unit, including...Matched to the GeoNames record for Franklin, S...Sussex Co.National Mineral Collection, Smithsonian Insti...BaryteMineFranklin MineFranklinSmithsonian collections record for NMNH C6295-...mat:mineralnatural solid material/mineral-sulfate, selena...
.......................................................................................
995647rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...
995648sedimenta sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...
995648natural solid materiala sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...
995648metamorphic rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...
995648rocka sample that is an individual unit, including...gneissSalamander Range, north Victoria Land, AntarcticaDept of Geology, Colorado College, Colorado Sp...Manualmat:rocksediment/natural solid material/metamorphic ro...
\n", + "

3204978 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " description_supplementMetadata_locality \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_material \\\n", + "0 natural solid material \n", + "0 mineral-sulfate, selenate, or tellurate \n", + "0 mineral \n", + "1 natural solid material \n", + "1 mineral-sulfate, selenate, or tellurate \n", + "... ... \n", + "995647 rock \n", + "995648 sediment \n", + "995648 natural solid material \n", + "995648 metamorphic rock \n", + "995648 rock \n", + "\n", + " description_supplementMetadata_country \\\n", + "0 United States \n", + "0 United States \n", + "0 United States \n", + "1 United States \n", + "1 United States \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_province \\\n", + "0 New Jersey \n", + "0 New Jersey \n", + "0 New Jersey \n", + "1 New Jersey \n", + "1 New Jersey \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_sampleType \\\n", + "0 a sample that is an individual unit, including... \n", + "0 a sample that is an individual unit, including... \n", + "0 a sample that is an individual unit, including... \n", + "1 a sample that is an individual unit, including... \n", + "1 a sample that is an individual unit, including... \n", + "... ... \n", + "995647 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "995648 a sample that is an individual unit, including... \n", + "\n", + " description_supplementMetadata_platformType \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_geologicalAge \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_locationDescription \\\n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "0 Coordinates for Sterling Hill Mine (MRDS ID: W... \n", + "1 Matched to the GeoNames record for Franklin, S... \n", + "1 Matched to the GeoNames record for Franklin, S... \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_purpose \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_county description_description \\\n", + "0 Sussex Co. \n", + "0 Sussex Co. \n", + "0 Sussex Co. \n", + "1 Sussex Co. \n", + "1 Sussex Co. \n", + "... ... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_classificationComment \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_currentArchive \\\n", + "0 National Mineral Collection, Smithsonian Insti... \n", + "0 National Mineral Collection, Smithsonian Insti... \n", + "0 National Mineral Collection, Smithsonian Insti... \n", + "1 National Mineral Collection, Smithsonian Insti... \n", + "1 National Mineral Collection, Smithsonian Insti... \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_city \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_sampleComment \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_fieldName \\\n", + "0 Mooreite \n", + "0 Mooreite \n", + "0 Mooreite \n", + "1 Baryte \n", + "1 Baryte \n", + "... ... \n", + "995647 gneiss \n", + "995648 gneiss \n", + "995648 gneiss \n", + "995648 gneiss \n", + "995648 gneiss \n", + "\n", + " description_supplementMetadata_primaryLocationType \\\n", + "0 Mine \n", + "0 Mine \n", + "0 Mine \n", + "1 Mine \n", + "1 Mine \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_collectionMethodDescr \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_primaryLocationName \\\n", + "0 Sterling Hill Mine \n", + "0 Sterling Hill Mine \n", + "0 Sterling Hill Mine \n", + "1 Franklin Mine \n", + "1 Franklin Mine \n", + "... ... \n", + "995647 Salamander Range, north Victoria Land, Antarctica \n", + "995648 Salamander Range, north Victoria Land, Antarctica \n", + "995648 Salamander Range, north Victoria Land, Antarctica \n", + "995648 Salamander Range, north Victoria Land, Antarctica \n", + "995648 Salamander Range, north Victoria Land, Antarctica \n", + "\n", + " description_supplementMetadata_geologicalUnit \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_localityDescription \\\n", + "0 Ogdensburg \n", + "0 Ogdensburg \n", + "0 Ogdensburg \n", + "1 Franklin \n", + "1 Franklin \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_originalArchive \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "995648 Dept of Geology, Colorado College, Colorado Sp... \n", + "\n", + " description_supplementMetadata_platformDescr \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_collectionMethod \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 Manual \n", + "995648 Manual \n", + "995648 Manual \n", + "995648 Manual \n", + "995648 Manual \n", + "\n", + " description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 \n", + "0 \n", + "0 \n", + "1 \n", + "1 \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " description_supplementMetadata_publicationUrl_description \\\n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "0 Smithsonian collections record for NMNH C6294-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "1 Smithsonian collections record for NMNH C6295-... \n", + "... ... \n", + "995647 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "995648 \n", + "\n", + " original_high_label label_list \n", + "0 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "0 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "0 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "1 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "1 mat:mineral natural solid material/mineral-sulfate, selena... \n", + "... ... ... \n", + "995647 mat:rock sediment/natural solid material/metamorphic ro... \n", + "995648 mat:rock sediment/natural solid material/metamorphic ro... \n", + "995648 mat:rock sediment/natural solid material/metamorphic ro... \n", + "995648 mat:rock sediment/natural solid material/metamorphic ro... \n", + "995648 mat:rock sediment/natural solid material/metamorphic ro... \n", + "\n", + "[3204978 rows x 28 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "4007b1a2", + "metadata": {}, + "outputs": [], + "source": [ + "#df.to_csv(\"./datasets/multilabel_SESAR.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "52988633", + "metadata": {}, + "source": [ + "## Split Dataset to train/dev/test" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "7090e5a1", + "metadata": {}, + "outputs": [], + "source": [ + "#df = pd.read_csv(\"./datasets/multilabel_SESAR.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "df910ff0", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.fillna(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "554a1910", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(2618061, 28)\n", + "Counter({'natural solid material': 806033, 'sediment': 498308, 'rock': 498308, 'mineral': 245135, 'igneous rock': 165681, 'organic material': 134793, 'fine grained igneous rock': 72866, 'sedimentary rock': 57513, 'phaneritic igneous rock': 36092, 'fluid material': 26769, 'granitoid': 20068, 'ultramafic igneous rock': 15883, 'fragmental igneous rock': 13437, 'intermediate composition igneous rock': 7039, 'gabbroid': 6514, 'biogenic non-organic material': 3534, 'clastic sedimentary rock': 3089, 'clastic sediment': 1643, 'organic rich sedimentary rock': 1222, 'any anthropogenic material': 1053, 'anthropogenic material': 997, 'fault related material': 771, 'particulate': 620, 'basic igneous rock': 412, 'acidic igneous rock': 178, 'anthropogenic metal material': 56, 'ceramic clay': 47})\n" + ] + } + ], + "source": [ + "import collections\n", + "print(df.shape)\n", + "print(collections.Counter(df['description_material'].values.tolist())) # check labels all exist \n" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "9768c11d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(2618061, 28)\n", + "Counter({'natural solid material': 806033, 'sediment': 498308, 'rock': 498308, 'mineral': 245135, 'igneous rock': 165681, 'organic material': 134793, 'fine grained igneous rock': 72866, 'sedimentary rock': 57513, 'phaneritic igneous rock': 36092, 'fluid material': 26769, 'granitoid': 20068, 'ultramafic igneous rock': 15883, 'fragmental igneous rock': 13437, 'intermediate composition igneous rock': 7039, 'gabbroid': 6514, 'biogenic non-organic material': 3534, 'clastic sedimentary rock': 3089, 'clastic sediment': 1643, 'organic rich sedimentary rock': 1222, 'any anthropogenic material': 1053, 'anthropogenic material': 997, 'fault related material': 771, 'particulate': 620, 'basic igneous rock': 412, 'acidic igneous rock': 178, 'anthropogenic metal material': 56, 'ceramic clay': 47})\n" + ] + } + ], + "source": [ + "# drop rows that have count less than 10\n", + "\n", + "label_counts = df['description_material'].value_counts()\n", + "\n", + "# Get labels where the frequency is less than 10\n", + "labels_to_filter = label_counts[label_counts < 10].index\n", + "\n", + "# Filter out rows with labels that occur less than 10 times\n", + "df= df[~df['description_material'].isin(labels_to_filter)]\n", + "print(df.shape)\n", + "print(collections.Counter(df['description_material'].values.tolist()))" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "id": "9d30e977", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n", + "/var/folders/5_/2jhxkdyn5p74jn6q496vblp00000gn/T/ipykernel_8231/3207015472.py:17: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " selected_rows = selected_rows.append(random_sample)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Counter({'natural solid material': 15000, 'sediment': 15000, 'rock': 15000, 'mineral': 15000, 'igneous rock': 15000, 'organic material': 15000, 'fine grained igneous rock': 15000, 'sedimentary rock': 15000, 'phaneritic igneous rock': 15000, 'fluid material': 15000, 'granitoid': 15000, 'ultramafic igneous rock': 15000, 'fragmental igneous rock': 13437, 'intermediate composition igneous rock': 7039, 'gabbroid': 6514, 'biogenic non-organic material': 3534, 'clastic sedimentary rock': 3089, 'clastic sediment': 1643, 'organic rich sedimentary rock': 1222, 'any anthropogenic material': 1053, 'anthropogenic material': 997, 'fault related material': 771, 'particulate': 620, 'basic igneous rock': 412, 'acidic igneous rock': 178, 'anthropogenic metal material': 56, 'ceramic clay': 47})\n" + ] + } + ], + "source": [ + "# filter out too much frequent label rows\n", + "max_frequency = 15000\n", + "\n", + "# Calculate the current frequency of values in the 'Category' column\n", + "value_counts = df['description_material'].value_counts()\n", + "\n", + "# Identify values with frequency greater than max_frequency\n", + "values_to_limit = value_counts[value_counts > max_frequency].index\n", + "\n", + "# Create an empty DataFrame to store the selected rows\n", + "selected_rows = pd.DataFrame(columns=df.columns)\n", + "\n", + "# Randomly select up to 5000 rows for each value exceeding the limit\n", + "for value in values_to_limit:\n", + " rows_with_value = df[df['description_material'] == value]\n", + " random_sample = rows_with_value.sample(min(len(rows_with_value), max_frequency), random_state=42)\n", + " selected_rows = selected_rows.append(random_sample)\n", + "\n", + "# Select all rows where frequency is less than or equal to max_frequency\n", + "remaining_rows = df[df['description_material'].apply(lambda x: value_counts[x] <= max_frequency)]\n", + "\n", + "# Concatenate the selected rows with the remaining rows\n", + "final_df = pd.concat([selected_rows, remaining_rows])\n", + "\n", + "print(collections.Counter(final_df['description_material'].values.tolist())) # check labels all exist \n" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "f7268437", + "metadata": {}, + "outputs": [], + "source": [ + "final_df.shape\n", + "df = final_df" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "id": "60c15358", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "train_ratio = 0.7\n", + "train_df, dev_test_df = train_test_split(df, test_size=1 - train_ratio, random_state=42, stratify=df['description_material'])\n", + "dev_df, test_df = train_test_split(dev_test_df, test_size=0.5, stratify=dev_test_df['description_material'], random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "id": "04193552", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(154428, 28) (33092, 28) (33092, 28)\n", + "Counter({'fine grained igneous rock': 2250, 'natural solid material': 2250, 'igneous rock': 2250, 'fluid material': 2250, 'phaneritic igneous rock': 2250, 'rock': 2250, 'granitoid': 2250, 'mineral': 2250, 'ultramafic igneous rock': 2250, 'sedimentary rock': 2250, 'sediment': 2250, 'organic material': 2250, 'fragmental igneous rock': 2015, 'intermediate composition igneous rock': 1056, 'gabbroid': 977, 'biogenic non-organic material': 530, 'clastic sedimentary rock': 463, 'clastic sediment': 246, 'organic rich sedimentary rock': 184, 'any anthropogenic material': 158, 'anthropogenic material': 150, 'fault related material': 116, 'particulate': 93, 'basic igneous rock': 62, 'acidic igneous rock': 27, 'anthropogenic metal material': 8, 'ceramic clay': 7})\n", + "Counter({'rock': 2250, 'mineral': 2250, 'fine grained igneous rock': 2250, 'sedimentary rock': 2250, 'natural solid material': 2250, 'granitoid': 2250, 'fluid material': 2250, 'phaneritic igneous rock': 2250, 'sediment': 2250, 'igneous rock': 2250, 'organic material': 2250, 'ultramafic igneous rock': 2250, 'fragmental igneous rock': 2016, 'intermediate composition igneous rock': 1056, 'gabbroid': 977, 'biogenic non-organic material': 530, 'clastic sedimentary rock': 464, 'clastic sediment': 247, 'organic rich sedimentary rock': 183, 'any anthropogenic material': 158, 'anthropogenic material': 149, 'fault related material': 115, 'particulate': 93, 'basic igneous rock': 62, 'acidic igneous rock': 26, 'anthropogenic metal material': 9, 'ceramic clay': 7})\n", + "Counter({'fluid material': 10500, 'fine grained igneous rock': 10500, 'organic material': 10500, 'phaneritic igneous rock': 10500, 'sediment': 10500, 'sedimentary rock': 10500, 'ultramafic igneous rock': 10500, 'natural solid material': 10500, 'rock': 10500, 'mineral': 10500, 'granitoid': 10500, 'igneous rock': 10500, 'fragmental igneous rock': 9406, 'intermediate composition igneous rock': 4927, 'gabbroid': 4560, 'biogenic non-organic material': 2474, 'clastic sedimentary rock': 2162, 'clastic sediment': 1150, 'organic rich sedimentary rock': 855, 'any anthropogenic material': 737, 'anthropogenic material': 698, 'fault related material': 540, 'particulate': 434, 'basic igneous rock': 288, 'acidic igneous rock': 125, 'anthropogenic metal material': 39, 'ceramic clay': 33})\n" + ] + } + ], + "source": [ + "import collections\n", + "print(train_df.shape, dev_df.shape, test_df.shape)\n", + "print(collections.Counter(dev_df['description_material'].values.tolist())) # check labels all exist \n", + "print(collections.Counter(test_df['description_material'].values.tolist()))\n", + "print(collections.Counter(train_df['description_material'].values.tolist()))" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "f081023a", + "metadata": {}, + "outputs": [], + "source": [ + "# label_frequencies = df['description_material'].value_counts()\n", + "# import random\n", + "# random_seed = 38\n", + "# random.seed(random_seed)\n", + "# # Sort labels by frequency in descending order\n", + "# sorted_labels = label_frequencies.index.tolist()\n", + "\n", + "# # Create two empty lists to store selected labels for each split\n", + "# split_a_labels = []\n", + "# split_b_labels = []\n", + "\n", + "# # Iterate through the sorted labels and assign them to splits\n", + "# total_labels = len(sorted_labels)\n", + "# split_size = total_labels // 2\n", + "# print(f\"split size: {split_size}\")\n", + "\n", + "# split_a_keys = random.sample(sorted_labels, split_size)\n", + "# split_b_keys = [x for x in sorted_labels if x not in split_a_keys ]\n", + "\n", + "# # # Randomly shuffle the labels within each split\n", + "# # random.shuffle(split_a_labels)\n", + "# # random.shuffle(split_b_labels)\n", + "\n", + "# # # Filter the dataset based on selected labels for each split\n", + "# split_a = df[df['description_material'].isin(split_a_keys)]\n", + "# split_b = df[df['description_material'].isin(split_b_keys)]\n", + "\n", + "# # shuffle\n", + "# split_a = split_a.sample(frac=1)\n", + "# split_b = split_b.sample(frac=1)\n", + "# # # Reset the index of the splits\n", + "# split_a.reset_index(drop=True, inplace=True)\n", + "# split_b.reset_index(drop=True, inplace=True)\n", + "# print(split_a.shape, split_b.shape)\n", + "# split_a_labels = set(split_a['description_material'].values.tolist())\n", + "# print(split_a_labels, len(split_a_labels))\n", + "# print(set(split_b['description_material'].values.tolist()))" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "d2177276", + "metadata": {}, + "outputs": [], + "source": [ + "# separate training data into v0 and v1 \n", + "# train_df_v0 = split_a\n", + "# train_df_v1 = split_b" + ] + }, + { + "cell_type": "markdown", + "id": "b29fb6fe", + "metadata": {}, + "source": [ + "## Generate dataset for Text Entailment Task" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "bcbfdd3a", + "metadata": {}, + "outputs": [], + "source": [ + "## 1. Use simple Name:Value \n", + "import re\n", + "import random \n", + "import collections" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "id": "d6740c08", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a function to concatenate values from specified columns\n", + "columns_to_concat = [x for x in df.columns if x != 'description_material' and x.startswith('description')] # Exclude 'description_material'\n", + "url_pattern = r'https?://[\\n\\S]+\\b'\n", + "\n", + "def concatenate_cols(dataframe, verbose=False):\n", + " # concatenate all the column fields \n", + " values = []\n", + " for index, row in dataframe.iterrows():\n", + " # Access row values by column name\n", + " concatenated_text = ''\n", + " for column in columns_to_concat:\n", + " if str(row[column]) != '':\n", + " if verbose is False:\n", + " concatenated_text += description_to_text[column] + ':' + str(row[column]) + \", \"\n", + " else:\n", + " concatenated_text += description_to_text[column] + ' is ' + str(row[column]) + \", \"\n", + " # Use the re.sub() function to replace URLs with an empty string\n", + " cleaned_text = re.sub(url_pattern, '', concatenated_text)\n", + " values.append(concatenated_text)\n", + " return values" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "19a134a1", + "metadata": {}, + "outputs": [], + "source": [ + "# 1. generate concatenated text that does not have any description (non-language like)\n", + "# [Col Name] : [Col Value] / Material : [Label]" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "ee36a5ba", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_text_A(dataframe, train=True):\n", + " dataframe['concatenated_text_A'] = concatenate_cols(dataframe, False)\n", + " # Add hypothesis to text\n", + " prefix = 'mat:'\n", + " values = []\n", + " neg_values = [] # generated negative examples\n", + " gold_labels = list(set(dataframe['description_material'].values.tolist())) # labels that exist in this dataframe\n", + " for index, row in dataframe.iterrows():\n", + " # Add label as hypothesis (strip out the prefix and remove trailing comma)\n", + " values.append('' + row['concatenated_text_A'][:-2] + 'Material:' + str(row['description_material']) + '.')\n", + " # randomly select negative label from only seen labels \n", + " if train is True:\n", + " negative_label = random.choice([label for label in gold_labels if label!= row['description_material'] and label not in row['label_list']])\n", + " # generate negative example \n", + " neg_values.append('' + row['concatenated_text_A'][:-2] + 'Material:' + negative_label + '.')\n", + " dataframe['concatenated_text_A'] = values\n", + " if train is True:\n", + " dataframe['negative_sample_A'] = neg_values\n", + " return dataframe " + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "id": "7c9d2209", + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Use more verbose version : [Col Name] gives [Col Value] / The material of this sample Material is [Label]" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "id": "9874dc02", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_text_B(dataframe, train=True):\n", + " dataframe['concatenated_text_B'] = concatenate_cols(dataframe, True)\n", + " # Add hypothesis to text\n", + " prefix = 'mat:'\n", + " values = []\n", + " neg_values = []\n", + " gold_labels = list(set(dataframe['description_material'].values.tolist())) # labels that exist in this dataframe\n", + " for index, row in dataframe.iterrows():\n", + " # Add label as hypothesis (strip out the prefix! )\n", + " values.append('' + row['concatenated_text_B'][:-2] + 'The material of this physical sample is ' + str(row['description_material'])+ '.')\n", + " # randomly select negative label\n", + " if train is True:\n", + " negative_label = random.choice([label for label in gold_labels if label!= row['description_material'] and label not in row['label_list']])\n", + " # generate negative example\n", + " neg_values.append('' + row['concatenated_text_B'][:-2] + 'The material of this physical sample is ' + negative_label+ '.')\n", + " dataframe['concatenated_text_B'] = values\n", + " if train is True:\n", + " dataframe['negative_sample_B'] = neg_values\n" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "cde9fd4d", + "metadata": {}, + "outputs": [], + "source": [ + "# 3. Give more description about the label field \n", + "# The kind of material that constitutes a physical sample is [LABEL]" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "id": "79d82e79", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_text_C(dataframe, train=True):\n", + " dataframe['concatenated_text_C'] = concatenate_cols(dataframe, True)\n", + " # Add hypothesis to text\n", + " prefix = 'mat:'\n", + " values = []\n", + " neg_values = []\n", + " gold_labels = list(set(dataframe['description_material'].values.tolist())) # labels that exist in this dataframe\n", + " for index, row in dataframe.iterrows():\n", + " # Add label as hypothesis (strip out the prefix! )\n", + " values.append('' + row['concatenated_text_C'][:-2] + 'The kind of material that constitutes this physical sample is ' + str(row['description_material'])+ '.')\n", + " # randomly select negative label\n", + " if train is True:\n", + " negative_label = random.choice([label for label in gold_labels if label!= row['description_material'] and label not in row['label_list']])\n", + " # generate negative example\n", + " neg_values.append('' + row['concatenated_text_C'][:-2] + 'The kind of material that constitutes this physical sample is ' + negative_label+ '.')\n", + " dataframe['concatenated_text_C'] = values\n", + " if train is True:\n", + " dataframe['negative_sample_C'] = neg_values" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "id": "f6c55010", + "metadata": {}, + "outputs": [], + "source": [ + "def process_text(values):\n", + " # format the text appropriately\n", + " new_values = []\n", + " for value in values:\n", + " new_values.append('' + value[:-2] + '.')\n", + " return new_values\n", + "# simply just need to concatenate column information\n", + "def generate_test_text_AB(dataframe):\n", + " values = concatenate_cols(dataframe, False)\n", + " dataframe['concatenated_text_A'] = process_text(values) # format it \n", + " values = concatenate_cols(dataframe, True)\n", + " dataframe['concatenated_text_B'] = process_text(values)\n", + " # C does not exist\n", + "\n", + "generate_test_text_AB(test_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "id": "77c32fd0", + "metadata": {}, + "outputs": [], + "source": [ + "# generate concatenated text for train_v0 and train_v1\n", + "def generate_text(dataframe, train):\n", + " generate_text_A(dataframe, train)\n", + " generate_text_B(dataframe, train)\n", + " generate_text_C(dataframe, train)\n", + "\n", + "generate_text(train_df, True)\n", + "#generate_text(train_df_v1, True)\n", + "generate_text(dev_df, True) # dev also needs negative samples" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "id": "b7444b10", + "metadata": {}, + "outputs": [], + "source": [ + "# print(train_df_v1['concatenated_text_A'].values.tolist()[0])\n", + "# print(train_df_v1['concatenated_text_B'].values.tolist()[0])\n", + "# print(train_df_v1['concatenated_text_C'].values.tolist()[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "id": "760b85af", + "metadata": {}, + "outputs": [], + "source": [ + "# print(train_df_v0['negative_sample_A'].values.tolist()[0])\n", + "# print(train_df_v0['negative_sample_B'].values.tolist()[0])\n", + "# print(train_df_v0['negative_sample_C'].values.tolist()[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "9fb1d3e1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The name of the specific place where the sample was collected:Komati Fm, The country where the sample was collected:South Africa, The province where the sample was collected:Univ Witwatersrand, The object type of sample indicates that this sample:long cylindrical cores, The age of a sample as described by the stratigraphic era, period, state, etc.:Paleoarchaean, The free text description of the location:Tjakastad, Barberton Greenstone Belt, The free text to describe the collection purpose of the sample:UW: Wilson - petrology, geochemistry, The detailed description of the sample:chill, The name of institution, museum, or repository where the sample is currently stored:University of Johannesburg (UJ), The taxonomy informal classification of sample:chill, A body of rock established as a distinct entity in the classification of the Earth’s rocks:Komati Formation, The additional information about the specific place where the sample was collected:BARB1, The method by which a sample was collected:CoringMaterial:fine grained igneous rock.\n", + "The name of the specific place where the sample was collected is Komati Fm, The country where the sample was collected is South Africa, The province where the sample was collected is Univ Witwatersrand, The object type of sample indicates that this sample is long cylindrical cores, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleoarchaean, The free text description of the location is Tjakastad, Barberton Greenstone Belt, The free text to describe the collection purpose of the sample is UW: Wilson - petrology, geochemistry, The detailed description of the sample is chill, The name of institution, museum, or repository where the sample is currently stored is University of Johannesburg (UJ), The taxonomy informal classification of sample is chill, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Komati Formation, The additional information about the specific place where the sample was collected is BARB1, The method by which a sample was collected is CoringThe material of this physical sample is fine grained igneous rock.\n", + "The name of the specific place where the sample was collected is Komati Fm, The country where the sample was collected is South Africa, The province where the sample was collected is Univ Witwatersrand, The object type of sample indicates that this sample is long cylindrical cores, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleoarchaean, The free text description of the location is Tjakastad, Barberton Greenstone Belt, The free text to describe the collection purpose of the sample is UW: Wilson - petrology, geochemistry, The detailed description of the sample is chill, The name of institution, museum, or repository where the sample is currently stored is University of Johannesburg (UJ), The taxonomy informal classification of sample is chill, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Komati Formation, The additional information about the specific place where the sample was collected is BARB1, The method by which a sample was collected is CoringThe kind of material that constitutes this physical sample is fine grained igneous rock.\n" + ] + } + ], + "source": [ + "print(dev_df['concatenated_text_A'].values.tolist()[0])\n", + "print(dev_df['concatenated_text_B'].values.tolist()[0])\n", + "print(dev_df['concatenated_text_C'].values.tolist()[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "8bf5ed85", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The country where the sample was collected:United Kingdom, The province where the sample was collected:England, The object type of sample indicates that this sample:a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location:Matched to the GeoNames record for Cadgwith, Cornwall, England, United Kingdom (http://geonames.org/2654110) based on municipality name, district/county, state/province, and country using the situate.py script. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (10 km)., The county where the sample was collected:Cornwall, The name of institution, museum, or repository where the sample is currently stored:National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected:Cadgwith Dist, The taxonomy informal classification of sample:Saxonite, The type of the primary location:Island, The name of the primary location the sample was collected:Great Britain Island, The additional information about the specific place where the sample was collected:Locality Key: Cornwall, The free text description of the related URL:Smithsonian collections record for NMNH 70137-1 (PET).\n" + ] + } + ], + "source": [ + "print(test_df['concatenated_text_A'].values.tolist()[0])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "id": "ece6bebd", + "metadata": {}, + "outputs": [], + "source": [ + "# store the CV labeled version\n", + "train_df.to_csv(\"./datasets/SESAR_ZTC_train_multi.csv\")\n", + "# train_df_v1.to_csv(\"./datasets/SESAR_ZTC_train_v1_verbose_multi.csv\")\n", + "dev_df.to_csv(\"./datasets/SESAR_ZTC_dev_multi.csv\")\n", + "test_df.to_csv(\"./datasets/SESAR_ZTC_test_multi.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "id": "3ccc1141", + "metadata": {}, + "outputs": [], + "source": [ + "## TODO : synonym expansion/ replace label with description of CV" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "id": "c1bb874c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0description_supplementMetadata_localitydescription_materialdescription_supplementMetadata_countrydescription_supplementMetadata_provincedescription_sampleTypedescription_supplementMetadata_platformTypedescription_supplementMetadata_geologicalAgedescription_supplementMetadata_locationDescriptiondescription_supplementMetadata_purposedescription_supplementMetadata_countydescription_descriptiondescription_supplementMetadata_classificationCommentdescription_supplementMetadata_currentArchivedescription_supplementMetadata_citydescription_supplementMetadata_sampleCommentdescription_supplementMetadata_fieldNamedescription_supplementMetadata_primaryLocationTypedescription_collectionMethodDescrdescription_supplementMetadata_primaryLocationNamedescription_supplementMetadata_geologicalUnitdescription_supplementMetadata_localityDescriptiondescription_supplementMetadata_originalArchivedescription_supplementMetadata_platformDescrdescription_collectionMethoddescription_supplementMetadata_cruiseFieldPrgrmdescription_supplementMetadata_publicationUrl_descriptionoriginal_high_labellabel_listconcatenated_text_Anegative_sample_Aconcatenated_text_Bnegative_sample_Bconcatenated_text_Cnegative_sample_C
0898380NaNfluid materialNot ApplicableNot Applicablea sample that is an individual unit, including...ShipNot ApplicableNaNNaNNot ApplicableNaNNaNNot ProvidedNot ApplicableNaNNaNIceShelfNaNAntarctica:LarsenIceShelfNot ApplicableNaNNaNNaNSampler:Fluid:BottleNBP1001NaNmat:liquidwaterfluid material<s>The country where the sample was collected:...<s>The country where the sample was collected:...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...
1277699NaNfine grained igneous rockUnited StatesHawaiilong cylindrical coresNaNNaNCoordinates from GeoNames for the Kilauea Iki ...NaNNaNNaNNaNNational Rock & Ore Collection, Smithsonian In...NaNNaNOlivine-basalt with olivineVolcanoNaNKilaueaNaNNaNNaNNaNCoringNaNSmithsonian collections record for NMNH 115478...mat:rocksediment/igneous rock/natural solid material/f...<s>The country where the sample was collected:...<s>The country where the sample was collected:...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...
2914187NaNfine grained igneous rockNaNNaNa sample that is an individual unit, including...ShipNaNNaNNaNNaNNaNNaNNational Rock & Ore Collection, Smithsonian In...NaNNaNBasaltOceanNaNAtlantic OceanNaNLocality Key: Mid-Atlantic Ridge, Romanche TrenchNaNNaNNaNNaNSmithsonian collections record for NMNH 110753...mat:rocksediment/igneous rock/natural solid material/f...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...
3901903NaNorganic materialNot ApplicableNot Applicablea sample that is an individual unit, including...ShipNot ApplicableDivaNaNNot ApplicableBacterial matNaNNaNNot ApplicableNaNNaNseamountJason 2 Dive: J2-523 on Cruise: TN253; orange,...JdF:AxialNot ApplicableNaNNaNNaNSyringe, 100 ccTN253NaNmat:organicmaterialorganic material<s>The country where the sample was collected:...<s>The country where the sample was collected:...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...
4696512NaNfine grained igneous rockNaNNaNa sample that is an individual unit, including...shipNaNNaNNaNNaNNaNNaNGEOMAR, Kiel, GermanyNaNNaNNaNvolcanoNaNVolcanologists MassifNaNNaNNaNNaNChainBagDredge, Sediment TrapSO249-2NaNmat:rocksediment/igneous rock/natural solid material/f...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...
............................................................................................................
154423219904NaNorganic materialUnited StatesNew Mexicoa sample that is an individual unit, including...NaNNaNNaNNaNDona AnaSubsample of adult non-Carabid or larval inver...NaNInvertebrate Bycatch Collection (Archive Pooli...NaNNaNInvertebrate BycatchNaNPreservedSpecimenNaNNaNNaNNaNNaNManualNaNSource Reference URLmat:organicmaterialorganic material<s>The country where the sample was collected:...<s>The country where the sample was collected:...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...
154424222537NaNorganic materialUnited StatesMichigana sample that is an individual unit, including...NaNNaNNaNNaNGogebicFecal sample collected from a small mammal and...NaNMammal Collection (Fecal Samples) (NEONMAMC-FE)NaNNaNMyodes gapperi VigorsNaNPreservedSpecimenNaNNaNNaNNaNNaNManualNaNSource Reference URLmat:organicmaterialorganic material<s>The country where the sample was collected:...<s>The country where the sample was collected:...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...
154425458617NaNfragmental igneous rockNaNNaNarbitrarily cut segments of a coreNaNNaNNaNArAr datingNaNNaNNaNLacCoreNaNNaNTuffBasinNaNOlduvai GorgeNaNNaNNaNNaNCoringOGCPNaNmat:rocksediment/igneous rock/natural solid material/f...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...
154426846400NaNgranitoidNaNNaNa sample that is an individual unit, including...NaNCambrianNaNNaNNaNNaNNaNPolar Rock Repository, Byrd Polar and Climate ...NaNNaNGraniteNaNNaNBull PassNaNNaNNaNNaNNaNNaNNonemat:rockgranitoid/phaneritic igneous rock/sediment/ign...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...<s>The object type of sample indicates that th...
154427805058NaNfine grained igneous rockUnited StatesNew Mexicoa sample that is an individual unit, including...NaNNaNMatched to the GeoNames record for Cerro de Gu...NaNSandoval Co.NaNNaNNational Rock & Ore Collection, Smithsonian In...NaNNaNBasalt with lherzoliteNaNNaNCerro de GuadalupeNaNPuerco PlugsNaNNaNNaNNaNSmithsonian collections record for NMNH 118050...mat:rocksediment/igneous rock/natural solid material/f...<s>The country where the sample was collected:...<s>The country where the sample was collected:...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...<s>The country where the sample was collected ...
\n", + "

154428 rows × 35 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 description_supplementMetadata_locality \\\n", + "0 898380 NaN \n", + "1 277699 NaN \n", + "2 914187 NaN \n", + "3 901903 NaN \n", + "4 696512 NaN \n", + "... ... ... \n", + "154423 219904 NaN \n", + "154424 222537 NaN \n", + "154425 458617 NaN \n", + "154426 846400 NaN \n", + "154427 805058 NaN \n", + "\n", + " description_material description_supplementMetadata_country \\\n", + "0 fluid material Not Applicable \n", + "1 fine grained igneous rock United States \n", + "2 fine grained igneous rock NaN \n", + "3 organic material Not Applicable \n", + "4 fine grained igneous rock NaN \n", + "... ... ... \n", + "154423 organic material United States \n", + "154424 organic material United States \n", + "154425 fragmental igneous rock NaN \n", + "154426 granitoid NaN \n", + "154427 fine grained igneous rock United States \n", + "\n", + " description_supplementMetadata_province \\\n", + "0 Not Applicable \n", + "1 Hawaii \n", + "2 NaN \n", + "3 Not Applicable \n", + "4 NaN \n", + "... ... \n", + "154423 New Mexico \n", + "154424 Michigan \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 New Mexico \n", + "\n", + " description_sampleType \\\n", + "0 a sample that is an individual unit, including... \n", + "1 long cylindrical cores \n", + "2 a sample that is an individual unit, including... \n", + "3 a sample that is an individual unit, including... \n", + "4 a sample that is an individual unit, including... \n", + "... ... \n", + "154423 a sample that is an individual unit, including... \n", + "154424 a sample that is an individual unit, including... \n", + "154425 arbitrarily cut segments of a core \n", + "154426 a sample that is an individual unit, including... \n", + "154427 a sample that is an individual unit, including... \n", + "\n", + " description_supplementMetadata_platformType \\\n", + "0 Ship \n", + "1 NaN \n", + "2 Ship \n", + "3 Ship \n", + "4 ship \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_geologicalAge \\\n", + "0 Not Applicable \n", + "1 NaN \n", + "2 NaN \n", + "3 Not Applicable \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 Cambrian \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_locationDescription \\\n", + "0 NaN \n", + "1 Coordinates from GeoNames for the Kilauea Iki ... \n", + "2 NaN \n", + "3 Diva \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 Matched to the GeoNames record for Cerro de Gu... \n", + "\n", + " description_supplementMetadata_purpose \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 ArAr dating \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_county \\\n", + "0 Not Applicable \n", + "1 NaN \n", + "2 NaN \n", + "3 Not Applicable \n", + "4 NaN \n", + "... ... \n", + "154423 Dona Ana \n", + "154424 Gogebic \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 Sandoval Co. \n", + "\n", + " description_description \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 Bacterial mat \n", + "4 NaN \n", + "... ... \n", + "154423 Subsample of adult non-Carabid or larval inver... \n", + "154424 Fecal sample collected from a small mammal and... \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_classificationComment \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_currentArchive \\\n", + "0 Not Provided \n", + "1 National Rock & Ore Collection, Smithsonian In... \n", + "2 National Rock & Ore Collection, Smithsonian In... \n", + "3 NaN \n", + "4 GEOMAR, Kiel, Germany \n", + "... ... \n", + "154423 Invertebrate Bycatch Collection (Archive Pooli... \n", + "154424 Mammal Collection (Fecal Samples) (NEONMAMC-FE) \n", + "154425 LacCore \n", + "154426 Polar Rock Repository, Byrd Polar and Climate ... \n", + "154427 National Rock & Ore Collection, Smithsonian In... \n", + "\n", + " description_supplementMetadata_city \\\n", + "0 Not Applicable \n", + "1 NaN \n", + "2 NaN \n", + "3 Not Applicable \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_sampleComment \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_fieldName \\\n", + "0 NaN \n", + "1 Olivine-basalt with olivine \n", + "2 Basalt \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "154423 Invertebrate Bycatch \n", + "154424 Myodes gapperi Vigors \n", + "154425 Tuff \n", + "154426 Granite \n", + "154427 Basalt with lherzolite \n", + "\n", + " description_supplementMetadata_primaryLocationType \\\n", + "0 IceShelf \n", + "1 Volcano \n", + "2 Ocean \n", + "3 seamount \n", + "4 volcano \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 Basin \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_collectionMethodDescr \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 Jason 2 Dive: J2-523 on Cruise: TN253; orange,... \n", + "4 NaN \n", + "... ... \n", + "154423 PreservedSpecimen \n", + "154424 PreservedSpecimen \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_primaryLocationName \\\n", + "0 Antarctica:LarsenIceShelf \n", + "1 Kilauea \n", + "2 Atlantic Ocean \n", + "3 JdF:Axial \n", + "4 Volcanologists Massif \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 Olduvai Gorge \n", + "154426 Bull Pass \n", + "154427 Cerro de Guadalupe \n", + "\n", + " description_supplementMetadata_geologicalUnit \\\n", + "0 Not Applicable \n", + "1 NaN \n", + "2 NaN \n", + "3 Not Applicable \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_localityDescription \\\n", + "0 NaN \n", + "1 NaN \n", + "2 Locality Key: Mid-Atlantic Ridge, Romanche Trench \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 Puerco Plugs \n", + "\n", + " description_supplementMetadata_originalArchive \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_platformDescr \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 NaN \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_collectionMethod \\\n", + "0 Sampler:Fluid:Bottle \n", + "1 Coring \n", + "2 NaN \n", + "3 Syringe, 100 cc \n", + "4 ChainBagDredge, Sediment Trap \n", + "... ... \n", + "154423 Manual \n", + "154424 Manual \n", + "154425 Coring \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_cruiseFieldPrgrm \\\n", + "0 NBP1001 \n", + "1 NaN \n", + "2 NaN \n", + "3 TN253 \n", + "4 SO249-2 \n", + "... ... \n", + "154423 NaN \n", + "154424 NaN \n", + "154425 OGCP \n", + "154426 NaN \n", + "154427 NaN \n", + "\n", + " description_supplementMetadata_publicationUrl_description \\\n", + "0 NaN \n", + "1 Smithsonian collections record for NMNH 115478... \n", + "2 Smithsonian collections record for NMNH 110753... \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "154423 Source Reference URL \n", + "154424 Source Reference URL \n", + "154425 NaN \n", + "154426 None \n", + "154427 Smithsonian collections record for NMNH 118050... \n", + "\n", + " original_high_label \\\n", + "0 mat:liquidwater \n", + "1 mat:rock \n", + "2 mat:rock \n", + "3 mat:organicmaterial \n", + "4 mat:rock \n", + "... ... \n", + "154423 mat:organicmaterial \n", + "154424 mat:organicmaterial \n", + "154425 mat:rock \n", + "154426 mat:rock \n", + "154427 mat:rock \n", + "\n", + " label_list \\\n", + "0 fluid material \n", + "1 sediment/igneous rock/natural solid material/f... \n", + "2 sediment/igneous rock/natural solid material/f... \n", + "3 organic material \n", + "4 sediment/igneous rock/natural solid material/f... \n", + "... ... \n", + "154423 organic material \n", + "154424 organic material \n", + "154425 sediment/igneous rock/natural solid material/f... \n", + "154426 granitoid/phaneritic igneous rock/sediment/ign... \n", + "154427 sediment/igneous rock/natural solid material/f... \n", + "\n", + " concatenated_text_A \\\n", + "0 The country where the sample was collected:... \n", + "1 The country where the sample was collected:... \n", + "2 The object type of sample indicates that th... \n", + "3 The country where the sample was collected:... \n", + "4 The object type of sample indicates that th... \n", + "... ... \n", + "154423 The country where the sample was collected:... \n", + "154424 The country where the sample was collected:... \n", + "154425 The object type of sample indicates that th... \n", + "154426 The object type of sample indicates that th... \n", + "154427 The country where the sample was collected:... \n", + "\n", + " negative_sample_A \\\n", + "0 The country where the sample was collected:... \n", + "1 The country where the sample was collected:... \n", + "2 The object type of sample indicates that th... \n", + "3 The country where the sample was collected:... \n", + "4 The object type of sample indicates that th... \n", + "... ... \n", + "154423 The country where the sample was collected:... \n", + "154424 The country where the sample was collected:... \n", + "154425 The object type of sample indicates that th... \n", + "154426 The object type of sample indicates that th... \n", + "154427 The country where the sample was collected:... \n", + "\n", + " concatenated_text_B \\\n", + "0 The country where the sample was collected ... \n", + "1 The country where the sample was collected ... \n", + "2 The object type of sample indicates that th... \n", + "3 The country where the sample was collected ... \n", + "4 The object type of sample indicates that th... \n", + "... ... \n", + "154423 The country where the sample was collected ... \n", + "154424 The country where the sample was collected ... \n", + "154425 The object type of sample indicates that th... \n", + "154426 The object type of sample indicates that th... \n", + "154427 The country where the sample was collected ... \n", + "\n", + " negative_sample_B \\\n", + "0 The country where the sample was collected ... \n", + "1 The country where the sample was collected ... \n", + "2 The object type of sample indicates that th... \n", + "3 The country where the sample was collected ... \n", + "4 The object type of sample indicates that th... \n", + "... ... \n", + "154423 The country where the sample was collected ... \n", + "154424 The country where the sample was collected ... \n", + "154425 The object type of sample indicates that th... \n", + "154426 The object type of sample indicates that th... \n", + "154427 The country where the sample was collected ... \n", + "\n", + " concatenated_text_C \\\n", + "0 The country where the sample was collected ... \n", + "1 The country where the sample was collected ... \n", + "2 The object type of sample indicates that th... \n", + "3 The country where the sample was collected ... \n", + "4 The object type of sample indicates that th... \n", + "... ... \n", + "154423 The country where the sample was collected ... \n", + "154424 The country where the sample was collected ... \n", + "154425 The object type of sample indicates that th... \n", + "154426 The object type of sample indicates that th... \n", + "154427 The country where the sample was collected ... \n", + "\n", + " negative_sample_C \n", + "0 The country where the sample was collected ... \n", + "1 The country where the sample was collected ... \n", + "2 The object type of sample indicates that th... \n", + "3 The country where the sample was collected ... \n", + "4 The object type of sample indicates that th... \n", + "... ... \n", + "154423 The country where the sample was collected ... \n", + "154424 The country where the sample was collected ... \n", + "154425 The object type of sample indicates that th... \n", + "154426 The object type of sample indicates that th... \n", + "154427 The country where the sample was collected ... \n", + "\n", + "[154428 rows x 35 columns]" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_df = pd.read_csv(\"./datasets/SESAR_ZTC_train_multi.csv\") \n", + "train_df" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "id": "db74fe86", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The country where the sample was collected:Not Applicable, The province where the sample was collected:Not Applicable, The object type of sample indicates that this sample:a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise:Ship, The age of a sample as described by the stratigraphic era, period, state, etc.:Not Applicable, The county where the sample was collected:Not Applicable, The name of institution, museum, or repository where the sample is currently stored:Not Provided, The city where the sample was collected:Not Applicable, The type of the primary location:IceShelf, The name of the primary location the sample was collected:Antarctica:LarsenIceShelf, A body of rock established as a distinct entity in the classification of the Earth’s rocks:Not Applicable, The method by which a sample was collected:Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected:NBP1001Material:fluid material.\n", + "\n", + "The country where the sample was collected:Not Applicable, The province where the sample was collected:Not Applicable, The object type of sample indicates that this sample:a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise:Ship, The age of a sample as described by the stratigraphic era, period, state, etc.:Not Applicable, The county where the sample was collected:Not Applicable, The name of institution, museum, or repository where the sample is currently stored:Not Provided, The city where the sample was collected:Not Applicable, The type of the primary location:IceShelf, The name of the primary location the sample was collected:Antarctica:LarsenIceShelf, A body of rock established as a distinct entity in the classification of the Earth’s rocks:Not Applicable, The method by which a sample was collected:Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected:NBP1001Material:ceramic clay.\n", + "----------\n", + "The country where the sample was collected is Not Applicable, The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Not Applicable, The county where the sample was collected is Not Applicable, The name of institution, museum, or repository where the sample is currently stored is Not Provided, The city where the sample was collected is Not Applicable, The type of the primary location is IceShelf, The name of the primary location the sample was collected is Antarctica:LarsenIceShelf, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Not Applicable, The method by which a sample was collected is Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NBP1001The material of this physical sample is fluid material.\n", + "\n", + "The country where the sample was collected is Not Applicable, The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Not Applicable, The county where the sample was collected is Not Applicable, The name of institution, museum, or repository where the sample is currently stored is Not Provided, The city where the sample was collected is Not Applicable, The type of the primary location is IceShelf, The name of the primary location the sample was collected is Antarctica:LarsenIceShelf, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Not Applicable, The method by which a sample was collected is Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NBP1001The material of this physical sample is igneous rock.\n", + "----------\n", + "The country where the sample was collected is Not Applicable, The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Not Applicable, The county where the sample was collected is Not Applicable, The name of institution, museum, or repository where the sample is currently stored is Not Provided, The city where the sample was collected is Not Applicable, The type of the primary location is IceShelf, The name of the primary location the sample was collected is Antarctica:LarsenIceShelf, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Not Applicable, The method by which a sample was collected is Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NBP1001The kind of material that constitutes this physical sample is fluid material.\n", + "\n", + "The country where the sample was collected is Not Applicable, The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Not Applicable, The county where the sample was collected is Not Applicable, The name of institution, museum, or repository where the sample is currently stored is Not Provided, The city where the sample was collected is Not Applicable, The type of the primary location is IceShelf, The name of the primary location the sample was collected is Antarctica:LarsenIceShelf, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Not Applicable, The method by which a sample was collected is Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NBP1001The kind of material that constitutes this physical sample is gabbroid.\n" + ] + } + ], + "source": [ + "print(train_df['concatenated_text_A'].values.tolist()[0])\n", + "print()\n", + "print(train_df['negative_sample_A'][0])\n", + "print(\"----------\")\n", + "print(train_df['concatenated_text_B'].values.tolist()[0])\n", + "print()\n", + "print(train_df['negative_sample_B'][0])\n", + "print(\"----------\")\n", + "print(train_df['concatenated_text_C'].values.tolist()[0])\n", + "print()\n", + "print(train_df['negative_sample_C'][0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4824e56", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0e35a85", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "venv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/SESAR/zeroshot-learning/evaluate.py b/SESAR/zeroshot-learning/evaluate.py index b083cfb..f17f182 100644 --- a/SESAR/zeroshot-learning/evaluate.py +++ b/SESAR/zeroshot-learning/evaluate.py @@ -60,11 +60,22 @@ def get_zero_shot_predictions(multilabel,output_dir, test_df, template_type, lab test_col = 'concatenated_text_' + template_type if template_type=='C': test_col = 'concatenated_text_B' # no column for C exists for the test set + # select hypothesis template + if template_type == "A": + hypothesis_template = "Material:{}." + elif template_type == "B": + hypothesis_template = "The material of this physical sample is {}." + else: + hypothesis_template = "The kind of material that constitutes this physical sample is {}." + prefix = "" + suffix = "" test_text = test_df[test_col].values.tolist() + # strip prefix + test_text = [text[len(prefix):][:-len(suffix)] for text in test_text] test_ds = Dataset.from_dict({'text': test_text }) # get zero-shot predictions preds_list = [] - for text, output in tqdm(zip(test_text, classifier(KeyDataset(test_ds, 'text'), batch_size=batch_size, candidate_labels=label_names, multi_label=multilabel)), + for text, output in tqdm(zip(test_text, classifier(KeyDataset(test_ds, 'text'), batch_size=batch_size, hypothesis_template = hypothesis_template, candidate_labels=label_names, multi_label=multilabel)), total=len(test_ds), desc="SESAR Zero Shot"): preds_list.append(output) if not multilabel: @@ -93,6 +104,7 @@ def evaluate_classification_performance(multilabel, predicted_labels, gold_label if __name__ == '__main__': parser = ArgumentParser() parser.add_argument("--hypothesis_template_type", type=str, default='A') + parser.add_argument("--label_file", type=str) parser.add_argument("--test_dataset_dir", type=str) parser.add_argument("--eval_batch_size", type=int, default=32) parser.add_argument("--max_length", type=int, default=256) @@ -112,9 +124,9 @@ def evaluate_classification_performance(multilabel, predicted_labels, gold_label prefix = "mat:" if args.multilabel: - label_col_name = "description_material" + label_col_name ="label_list" # use the stored label space - gold_label_names = open('unique_multi_labels.txt').read().splitlines() + gold_label_names = open(args.label_file).read().splitlines() else: # using specified depth level to restrict the label space if args.depth_level == 1: diff --git a/SESAR/zeroshot-learning/finetune_ZTC.py b/SESAR/zeroshot-learning/finetune_ZTC.py index fcf1d07..4a72e98 100644 --- a/SESAR/zeroshot-learning/finetune_ZTC.py +++ b/SESAR/zeroshot-learning/finetune_ZTC.py @@ -106,8 +106,8 @@ def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, per_device_eval_batch_size=eval_batch_size, evaluation_strategy='epoch', save_strategy='epoch', - warmup_steps=500, - gradient_accumulation_steps=8, # batch size * accumulation_steps = total batch size + # warmup_steps=500, + gradient_accumulation_steps=1, # batch size * accumulation_steps = total batch size weight_decay=weight_decay, load_best_model_at_end=True, metric_for_best_model=metric_name, diff --git a/SESAR/zeroshot-learning/total_unique_multi_labels.txt b/SESAR/zeroshot-learning/total_unique_multi_labels.txt new file mode 100644 index 0000000..79b4bb4 --- /dev/null +++ b/SESAR/zeroshot-learning/total_unique_multi_labels.txt @@ -0,0 +1,101 @@ +foid syenitoid +mineral-carbonate or nitrate +tuffite +sedimentary rock +gabbroid +carbonate sediment +mylonitic rock +sediment +peridotite +porphyry +glass rich igneous rock +carbonate sedimentary rock +mineral-borate +gaseous material +liquid water +mineral-halide +fluid material +mineral-silicate or germanate +mud size sediment +alkali feldspar granite +granitoid +pyroclastic rock +intermediate composition igneous rock +tonalite +chemical sedimentary material +any anthropogenic material +fine grained igneous rock +gravel size sediment +cataclasite series +biogenic sediment +gabbroic rock +residual material +phaneritic igneous rock +rhyolitoid +tephritoid +dioritoid +coal +ceramic clay +mineral-sulfate, selenate, or tellurate +mineral-phosphate, arsenate, or vanadate +basalt +syenitoid +fault related material +phonolitoid +impact generated material +andesite +diamictite +mineral-native element +mineral-organic compound +ultramafic igneous rock +hybrid sedimentary rock +natural solid material +granite +foiditoid +metamorphic rock +metasomatic rock +granodiorite +high magnesium fine grained igneous rock +particulate +anthropogenic metal material +breccia +massive sulphide +pyroxenite +rock +quartz rich igneous rock +clastic sediment +generic sandstone +iron rich sedimentary rock +exotic composition igneous rock +organic material +trachytoid +basic igneous rock +glass +breccia gouge series +non clastic siliceous sediment +pegmatite +generic mudstone +acidic igneous rock +mineral-oxide +hornblendite +mineral +anorthositic rock +mineral-sulfide or sulfosalt +foid gabbroid +igneous rock +tephra +diamicton +dacite +biogenic non-organic material +organic rich sedimentary rock +non clastic siliceous sedimentary rock +doleritic rock +fragmental igneous rock +soil +aphanite +anthropogenic material +generic conglomerate +foidolite +clastic sedimentary rock +charcoal +sand size sediment diff --git a/SESAR/zeroshot-learning/total_unique_multi_labels_wo_leaf.txt b/SESAR/zeroshot-learning/total_unique_multi_labels_wo_leaf.txt new file mode 100644 index 0000000..c56dcd0 --- /dev/null +++ b/SESAR/zeroshot-learning/total_unique_multi_labels_wo_leaf.txt @@ -0,0 +1,27 @@ +ultramafic igneous rock +fluid material +biogenic non-organic material +clastic sediment +organic rich sedimentary rock +any anthropogenic material +sediment +anthropogenic metal material +organic material +anthropogenic material +sedimentary rock +igneous rock +natural solid material +fine grained igneous rock +gabbroid +acidic igneous rock +mineral +intermediate composition igneous rock +ceramic clay +fragmental igneous rock +fault related material +particulate +basic igneous rock +granitoid +phaneritic igneous rock +clastic sedimentary rock +rock From a1be95361d944e8ce0b631c32a8194a42caa3252 Mon Sep 17 00:00:00 2001 From: hyunssong Date: Tue, 21 Nov 2023 15:01:36 -0700 Subject: [PATCH 6/7] Updates on scripts and progress - Update on training scripts for finetuning on entire label space / non-leaves - Add README for new progress --- .../Create-Multi-Label-Mapping.ipynb | 190 +-------------- SESAR/zeroshot-learning/README.md | 10 +- SESAR/zeroshot-learning/evaluate.py | 148 ++++++++---- SESAR/zeroshot-learning/finetune_ZTC.py | 37 ++- ...nique_multi_labels.txt => leaf_labels.txt} | 226 +++++++++++------- .../total_unique_nonleaf_multi_labels.txt | 33 +++ 6 files changed, 303 insertions(+), 341 deletions(-) rename SESAR/zeroshot-learning/{unique_multi_labels.txt => leaf_labels.txt} (59%) create mode 100644 SESAR/zeroshot-learning/total_unique_nonleaf_multi_labels.txt diff --git a/SESAR/zeroshot-learning/Create-Multi-Label-Mapping.ipynb b/SESAR/zeroshot-learning/Create-Multi-Label-Mapping.ipynb index 483cdc3..33dea0f 100644 --- a/SESAR/zeroshot-learning/Create-Multi-Label-Mapping.ipynb +++ b/SESAR/zeroshot-learning/Create-Multi-Label-Mapping.ipynb @@ -477,192 +477,6 @@ " break\n" ] }, - { - "cell_type": "code", - "execution_count": 7, - "id": "eabaf50d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'material': None,\n", - " 'any anthropogenic material': 'material',\n", - " 'anthropogenic metal material': 'any anthropogenic material',\n", - " 'brass': 'anthropogenic metal material',\n", - " 'bronze': 'anthropogenic metal material',\n", - " 'copper': 'anthropogenic metal material',\n", - " 'gold': 'anthropogenic metal material',\n", - " 'iron': 'anthropogenic metal material',\n", - " 'lead': 'anthropogenic metal material',\n", - " 'pewter': 'anthropogenic metal material',\n", - " 'anthropogenic material': 'any anthropogenic material',\n", - " 'anthropogenic organic material': 'organic material',\n", - " 'plastic (material)': 'anthropogenic organic material',\n", - " 'ceramic clay': 'anthropogenic material',\n", - " 'brick clay': 'ceramic clay',\n", - " 'bucchero': 'ceramic clay',\n", - " 'faience': 'ceramic clay',\n", - " 'porcelain': 'ceramic clay',\n", - " 'terracotta': 'ceramic clay',\n", - " 'terra sigilata': 'ceramic clay',\n", - " 'fiber material': 'anthropogenic material',\n", - " 'glass': 'anthropogenic material',\n", - " 'paper': 'anthropogenic material',\n", - " 'plaster': 'anthropogenic material',\n", - " 'plaster or mortar': 'anthropogenic material',\n", - " 'rubber': 'anthropogenic material',\n", - " 'any ice': 'material',\n", - " 'frozen water': 'any ice',\n", - " 'biogenic non-organic material': 'material',\n", - " 'amber': 'biogenic non-organic material',\n", - " 'bone': 'biogenic non-organic material',\n", - " 'charcoal': 'biogenic non-organic material',\n", - " 'coal': 'organic rich sedimentary rock',\n", - " 'shell': 'biogenic non-organic material',\n", - " 'dispersed media': 'material',\n", - " 'natural solid material': 'material',\n", - " 'mineral': 'natural solid material',\n", - " 'hematite': 'mineral',\n", - " 'kaolin': 'mineral',\n", - " 'mica': 'mineral',\n", - " 'quartz': 'mineral',\n", - " 'mineral-borate': 'mineral',\n", - " 'mineral-carbonate or nitrate': 'mineral',\n", - " 'mineral-halide': 'mineral',\n", - " 'mineral-native element': 'mineral',\n", - " 'mineral-organic compound': 'mineral',\n", - " 'mineral-oxide': 'mineral',\n", - " 'mineral-phosphate, arsenate, or vanadate': 'mineral',\n", - " 'mineral-silicate or germanate': 'mineral',\n", - " 'mineral-sulfate, selenate, or tellurate': 'mineral',\n", - " 'mineral-sulfide or sulfosalt': 'mineral',\n", - " 'mixed soil sediment or rock': 'natural solid material',\n", - " 'particulate': 'natural solid material',\n", - " 'cinder': 'rock',\n", - " 'rock or sediment': 'natural solid material',\n", - " 'rock': 'rock or sediment',\n", - " 'basalt': 'fine grained igneous rock',\n", - " 'chert': 'rock',\n", - " 'flint': 'chert',\n", - " 'dolomite': 'rock',\n", - " 'gabbro': 'rock',\n", - " 'greywacke': 'rock',\n", - " 'limestone': 'rock',\n", - " 'marble': 'rock',\n", - " 'obsidian': 'rock',\n", - " 'pumice': 'rock',\n", - " 'slate': 'rock',\n", - " 'travertine': 'rock',\n", - " 'aphanite': 'rock',\n", - " 'breccia': 'rock',\n", - " 'fault related material': 'rock',\n", - " 'cataclasite series': 'fault related material',\n", - " 'mylonitic rock': 'fault related material',\n", - " 'breccia gouge series': 'fault related material',\n", - " 'fragmental igneous rock': 'igneous rock',\n", - " 'pyroclastic rock': 'fragmental igneous rock',\n", - " 'igneous rock': 'rock',\n", - " 'acidic igneous rock': 'igneous rock',\n", - " 'dacite': 'fine grained igneous rock',\n", - " 'granitoid': 'phaneritic igneous rock',\n", - " 'alkali feldspar granite': 'granitoid',\n", - " 'granite': 'granitoid',\n", - " 'granodiorite': 'granitoid',\n", - " 'tonalite': 'granitoid',\n", - " 'quartz rich igneous rock': 'phaneritic igneous rock',\n", - " 'rhyolitoid': 'fine grained igneous rock',\n", - " 'basic igneous rock': 'igneous rock',\n", - " 'gabbroic rock': 'gabbroid',\n", - " 'doleritic rock': 'igneous rock',\n", - " 'exotic composition igneous rock': 'igneous rock',\n", - " 'fine grained igneous rock': 'igneous rock',\n", - " 'andesite': 'intermediate composition igneous rock',\n", - " 'foiditoid': 'fine grained igneous rock',\n", - " 'high magnesium fine grained igneous rock': 'fine grained igneous rock',\n", - " 'phonolitoid': 'fine grained igneous rock',\n", - " 'tephritoid': 'fine grained igneous rock',\n", - " 'trachytoid': 'fine grained igneous rock',\n", - " 'glass rich igneous rock': 'igneous rock',\n", - " 'hypabyssal intrusive rock': 'igneous rock',\n", - " 'intermediate composition igneous rock': 'igneous rock',\n", - " 'dioritoid': 'phaneritic igneous rock',\n", - " 'phaneritic igneous rock': 'igneous rock',\n", - " 'anorthositic rock': 'phaneritic igneous rock',\n", - " 'aplite': 'phaneritic igneous rock',\n", - " 'foid dioritoid': 'phaneritic igneous rock',\n", - " 'foid gabbroid': 'phaneritic igneous rock',\n", - " 'foid syenitoid': 'phaneritic igneous rock',\n", - " 'foidolite': 'phaneritic igneous rock',\n", - " 'gabbroid': 'phaneritic igneous rock',\n", - " 'monzogabbroic rock': 'gabbroid',\n", - " 'hornblendite': 'ultramafic igneous rock',\n", - " 'pegmatite': 'phaneritic igneous rock',\n", - " 'peridotite': 'ultramafic igneous rock',\n", - " 'pyroxenite': 'ultramafic igneous rock',\n", - " 'syenitoid': 'phaneritic igneous rock',\n", - " 'plutonic igneous rock': 'igneous rock',\n", - " 'porphyry': 'igneous rock',\n", - " 'ultrabasic igneous rock': 'igneous rock',\n", - " 'ultramafic igneous rock': 'igneous rock',\n", - " 'impact generated material': 'rock',\n", - " 'massive sulphide': 'rock',\n", - " 'metamorphic rock': 'rock',\n", - " 'metasomatic rock': 'rock',\n", - " 'sedimentary rock': 'rock',\n", - " 'carbonate sedimentary rock': 'sedimentary rock',\n", - " 'clastic sedimentary rock': 'sedimentary rock',\n", - " 'diamictite': 'clastic sedimentary rock',\n", - " 'generic conglomerate': 'sedimentary rock',\n", - " 'generic mudstone': 'sedimentary rock',\n", - " 'generic sandstone': 'sedimentary rock',\n", - " 'hybrid sedimentary rock': 'sedimentary rock',\n", - " 'iron rich sedimentary rock': 'sedimentary rock',\n", - " 'non clastic siliceous sedimentary rock': 'sedimentary rock',\n", - " 'organic rich sedimentary rock': 'sedimentary rock',\n", - " 'phosphorite': 'sedimentary rock',\n", - " 'tuffite': 'rock',\n", - " 'residual material': 'rock',\n", - " 'sediment': 'rock or sediment',\n", - " 'biogenic sediment': 'sediment',\n", - " 'carbonate sediment': 'sediment',\n", - " 'chemical sedimentary material': 'sediment',\n", - " 'clastic sediment': 'sediment',\n", - " 'diamicton': 'clastic sediment',\n", - " 'gravel size sediment': 'sediment',\n", - " 'hybrid sediment': 'sediment',\n", - " 'iron rich sediment': 'sediment',\n", - " 'mud size sediment': 'sediment',\n", - " 'non clastic siliceous sediment': 'sediment',\n", - " 'phosphate rich sediment': 'sediment',\n", - " 'sand size sediment': 'sediment',\n", - " 'tephra': 'sediment',\n", - " 'soil': 'natural solid material',\n", - " 'fluid material': 'material',\n", - " 'gaseous material': 'fluid material',\n", - " 'liquid water': 'fluid material',\n", - " 'non-aqueous liquid material': 'fluid material',\n", - " 'organic material': 'material',\n", - " 'organic animal material': 'organic material',\n", - " 'organic animal product': 'organic material',\n", - " 'hair': 'organic animal product',\n", - " 'leather': 'organic animal product',\n", - " 'organic plant material': 'organic material',\n", - " 'wood': 'plant material',\n", - " 'plant material': 'organic material',\n", - " 'plant fiber': 'plant material'}" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# stores direct parent \n", - "parent" - ] - }, { "cell_type": "code", "execution_count": 8, @@ -1053,7 +867,7 @@ "\n", "assert len(map_to_depth_level_2) == len(parents)\n", "assert len(set(map_to_depth_level_2.values())) == len(depth_level_1) + len(depth_level_2) + 1\n", - "map_to_depth_level_2" + "#map_to_depth_level_2" ] }, { @@ -1270,7 +1084,7 @@ " break\n", "\n", "assert len(map_to_depth_level_3) == len(parents)\n", - "print(len(set(map_to_depth_level_3.values())), len(depth_level_1) , len(depth_level_2) , len(depth_level_3) )\n", + "#print(len(set(map_to_depth_level_3.values())), len(depth_level_1) , len(depth_level_2) , len(depth_level_3) )\n", "map_to_depth_level_3" ] }, diff --git a/SESAR/zeroshot-learning/README.md b/SESAR/zeroshot-learning/README.md index cf33406..a63dd6d 100644 --- a/SESAR/zeroshot-learning/README.md +++ b/SESAR/zeroshot-learning/README.md @@ -4,7 +4,9 @@ This directory contains the ongoing experiments on using Zeroshot Text Classific Different datasets were created from the original SESAR dump and the annotated data `SESARTrainingiSamKeywords.csv` to find the best method to solve our problem. The datasets that are used during this process will be uploaded [here](https://drive.google.com/drive/folders/1o9vZ4CzTDi0N93KKPCGgvcvqNTrPx4jI?usp=sharing). -- `SESAR_ZTC_train_multi.csv`, `SESAR_ZTC_dev_multi.csv`, `SESAR_ZTC_test_multi.csv` : Used for multilabel finetuning (non zero-shot) +- `SESAR_ZTC_train_multi_nonleaves_replaced.csv`, `SESAR_ZTC_dev_multi_nonleaves_replaced.csv`, `SESAR_ZTC_test_multi_nonleaves_replaced.csv` : Used for multilabel finetuning on entire label space(with underperforming labels that are substituted by definitions) +- `SESAR_ZTC_train_multi_entire.csv`, `SESAR_ZTC_dev_multi_entire.csv`, `SESAR_ZTC_test_multi_entire.csv`: Used for multilabel finetuning (non zero-shot) on the entire label space +- `SESAR_ZTC_train_multi_wo_leaf.csv`, `SESAR_ZTC_dev_multi_wo_leaf.csv`, `SESAR_ZTC_test_multi_wo_leaf.csv`: Used for multilabel finetuning on the label space excluding leaf labels - `SESAR_ZTC_test_multiclass_label_fully_unseen.csv` : Used for multiclass label-fully-unseen tasks. - `SESAR_ZTC_test_multilabel_label_fully_unseen.csv` : Used for multilabel label-fully-unseen tasks. - `SESAR_ZTC_partial_label_unseen_train.csv`, `SESAR_ZTC_partial_label_unseen_dev.csv`,`SESAR_ZTC_partial_label_unseen_test.csv` : Used for multiclass partially-label-unseen tasks. @@ -15,8 +17,10 @@ The datasets that are used during this process will be uploaded [here](https://d -`hyperparam_search_ZTC.py` : Code to find the optimal hyperparameters for finetuning. -`finetune_ZTC.py` : Implementation of fine-tuning a textual entailment model on the SESAR dataset. Converts the dataset into a format that is applicable for textual entailment finetuning task and uses the given arguments to execute finetuning. The finetuned model will be stored in the output directory. + ``python finetune_ZTC.py --hypothesis_template_type B --lr_rate 5e-05 --train_data_dir ./datasets/SESAR_ZTC_train_multi_wo_leaf.csv --dev_data_dir ./datasets/SESAR_ZTC_dev_multi_entire.csv --model_name roberta-large-mnli --num_epochs 3 --train_batch_size 4 --eval_batch_size 4`` --`evaluate.py` : Implementation of evaluating the model on SESAR dataset. The model that can be used could be either a finetuned model from finetune_ZTC.py or an out-of-box textual entailment model(completely zeroshot). Result of evaluation will be logged. Supports solving the task as a multilabel or multiclass. For multiclass approach, also contains implementation of using specified depth level of the entire hierarchical label space of iSamples vocabulary. +-`evaluate.py` : Implementation of evaluating the model on SESAR dataset. The model that can be used could be either a finetuned model from finetune_ZTC.py. Result of evaluation will be logged. + ``python evaluate.py --label_file total_unique_nonleaf_multi_labels.txt --test_dataset_dir ./datasets/SESAR_ZTC_test_multi_nonleaves.csv --output_dir ./C_roberta-large-mnli_3_5e-05_0.01_8_2023_11_17_17_30_35/checkpoint-1400`` ## Results of Experiments Results of the ongoing experiments will be updated [here](https://docs.google.com/spreadsheets/d/19Q95HsjRS7JGyHoY8o8hxirBO6NiJ1ufHYB_xg0X4Ks/edit?usp=sharing). @@ -25,4 +29,4 @@ Approaches experimented so far: 2) Fully-label-unseen : Multiclass. Use the pretrained model directly and apply it on the test dataset. This approaches uses the iSamplesMaterialType that was contained in SESARTrainingiSamKeywords.csv directly as labels. 3) Multilabel-fully-label-unseen : Multilabel. Use the pretrained model directly and apply it on the test dataset. This approach uses the label including the extension vocabulary and expects the model to predict the label and all of the parent labels of it. 4) Depth-fully-label-unseen: Multiclass. Use the iSamplesMaterialType field and the extMaterialType field in SESARTrainingiSamKeywords.csv to convert the label into specified depth level of the iSamples MaterialType hierarchy. Use this converted label as expected prediction space. -5) Multilabel-Finetune : Multilabel. Use the pretrained model and finetune on the entire SESAR dataset. (code TBC) +5) Multilabel-Finetune-Leaves/Nonleaves: Multilabel. Use the pretrained model and finetune it on the SESAR datasets with each record's label up to the non-leaf labels (nonleaves) OR each record's label up to the leaf labels (leaves). Evaluation is done on test dataset records that have only non-leaf labels(`SESAR_ZTC_test_multi_nonleaves.csv`) and test dataset records that have leaf labels and using only those leaf labels as output space (`SESAR_ZTC_test_multi_leaves_only.csv`). After underperforming non-leaf labels are identified, experiment with replacing those labels with layman term definitions (using this definition[here](https://docs.google.com/spreadsheets/d/1JD_F37bLxuqeuGVIPCuuWvKxzA6tGmMzYrogX_mJC_M/edit?usp=sharing)). diff --git a/SESAR/zeroshot-learning/evaluate.py b/SESAR/zeroshot-learning/evaluate.py index f17f182..52294e9 100644 --- a/SESAR/zeroshot-learning/evaluate.py +++ b/SESAR/zeroshot-learning/evaluate.py @@ -1,6 +1,6 @@ from argparse import ArgumentParser import pandas as pd -from transformers import (AutoTokenizer, pipeline) +from transformers import (AutoTokenizer, pipeline,AutoModelForSequenceClassification, Trainer, TrainingArguments) from transformers.pipelines.pt_utils import KeyDataset from datasets import Dataset from sklearn.metrics import classification_report @@ -9,6 +9,7 @@ import os from tqdm.auto import tqdm from sklearn.metrics import accuracy_score +import numpy as np from sklearn.preprocessing import MultiLabelBinarizer logging.basicConfig() logging.getLogger().setLevel(logging.INFO) @@ -40,7 +41,7 @@ def get_multilabel_predictions(predictions, THRESHOLD): predicted_labels.append(prediction) return predicted_labels -def get_zero_shot_predictions(multilabel,output_dir, test_df, template_type, label_names, batch_size, max_length): +def get_predictions(output_dir, test_df, template_type, label_names, batch_size, max_length): """ Get the zero shot predictions by applying the model to the full label space Args: @@ -54,8 +55,17 @@ def get_zero_shot_predictions(multilabel,output_dir, test_df, template_type, lab """ device = 0 if torch.cuda.is_available() else -1 # load saved tokenizer and classifier - tokenizer = AutoTokenizer.from_pretrained(output_dir, use_fast=True, model_max_length=max_length) - classifier = pipeline("zero-shot-classification", model=output_dir, tokenizer=tokenizer, device=device) + tokenizer = AutoTokenizer.from_pretrained('roberta-large-mnli',use_fast=True, model_max_length=max_length) + model = ( + AutoModelForSequenceClassification.from_pretrained(output_dir, num_labels = 3) + ) + test_args = TrainingArguments( + output_dir = "./results", + do_train = False, + do_predict = True, + per_device_eval_batch_size = 64, + ) + trainer = Trainer(model = model, args =test_args) # load test dataset test_col = 'concatenated_text_' + template_type if template_type=='C': @@ -67,32 +77,68 @@ def get_zero_shot_predictions(multilabel,output_dir, test_df, template_type, lab hypothesis_template = "The material of this physical sample is {}." else: hypothesis_template = "The kind of material that constitutes this physical sample is {}." - prefix = "" - suffix = "" + # load test dataset text test_text = test_df[test_col].values.tolist() - # strip prefix - test_text = [text[len(prefix):][:-len(suffix)] for text in test_text] - test_ds = Dataset.from_dict({'text': test_text }) - # get zero-shot predictions - preds_list = [] - for text, output in tqdm(zip(test_text, classifier(KeyDataset(test_ds, 'text'), batch_size=batch_size, hypothesis_template = hypothesis_template, candidate_labels=label_names, multi_label=multilabel)), - total=len(test_ds), desc="SESAR Zero Shot"): - preds_list.append(output) - if not multilabel: - # get a single predicted label - return [x['labels'][0] for x in preds_list] - else: - return get_multilabel_predictions(preds_list, THRESHOLD) + final_predictions = [] + idx = 0 + recall_k = [] # store for getting average of recall@k + for text in test_text: + test_batch = [] + # check entailment for all possible labels + for label in gold_label_names: + # select hypothesis template + if template_type == "A": + hypothesis_template = "Material:" + label + "." + elif template_type == "B": + hypothesis_template = "The material of this physical sample is " + label + "." + else: + hypothesis_template = "The kind of material that constitutes this physical sample is " + label + "." + to_test = text + "" + hypothesis_template + test_batch.append(to_test) + # prediction for this instance + test_encodings = tokenizer(test_batch, truncation=True, padding=True) + test_dataset = Dataset.from_dict(test_encodings) + predictions = trainer.predict(test_dataset) + logits = predictions.predictions + # apply sigmoid + threshold + probabilities = torch.softmax(torch.tensor(logits), dim=1) + #turn predicted id's into actual label names + entailment =np.argmax(probabilities, axis=1).tolist() + pos_labels = [i for i, x in enumerate(entailment) if x == 2] # entailment + predicted_labels = [gold_label_names[i] for i in pos_labels] + final_predictions.append(predicted_labels) + #gold_labels = test_gold_labels[idx] + + ######### EXTRACT TOP K RECALL ######## + # extract entailment probabilities + # probabilities_matrix = [] + # for i, label in enumerate(gold_label_names): + # # Extract the probabilities of entailment for the current label + # entailment_probabilities = probabilities[i][2].item() + # probabilities_matrix.append(entailment_probabilities) + # probabilities_matrix = np.array(probabilities_matrix) + # top5_probabilities, top5_indices = torch.topk(torch.from_numpy(probabilities_matrix), k=5) + # top5_probabilities = top5_probabilities.tolist() + # top5_indices = top5_indices.tolist() + # top5_predictions = [gold_label_names[i] for i in top5_indices] + # # calculate recall @ 5 of this instance + # correct_at_5 = [x for x in top5_predictions if x in gold_labels] + # recall_at_5 = len(correct_at_5) / len(gold_labels) + # #print(f"Recall at 5 : {recall_at_5}") + # recall_k.append(recall_at_5) + idx += 1 + logging.info(f"{predicted_labels}") + logging.info(f"{idx}-th prediction is done") + #logging.info(f"Average recall@k : {sum(recall_k) / len(recall_k)}", ) + return final_predictions -def evaluate_classification_performance(multilabel, predicted_labels, gold_labels, gold_label_names): - target_names = None - if multilabel: - mlb = MultiLabelBinarizer() - # Fit the MultiLabelBinarizer on your labels and transform them into one-hot vectors - mlb.fit([gold_label_names]) - gold_labels = mlb.transform(gold_labels) - predicted_labels = mlb.transform(predicted_labels) - target_names = mlb.classes_ +def evaluate_classification_performance(predicted_labels, gold_labels, gold_label_names): + mlb = MultiLabelBinarizer() + # Fit the MultiLabelBinarizer on your labels and transform them into one-hot vectors + mlb.fit([gold_label_names]) + gold_labels = mlb.transform(gold_labels) + predicted_labels = mlb.transform(predicted_labels) + target_names = mlb.classes_ accuracy = accuracy_score(gold_labels, predicted_labels) report = classification_report(gold_labels, predicted_labels, target_names=target_names, output_dict=True) logging.info(classification_report(gold_labels, predicted_labels, target_names = target_names)) @@ -103,12 +149,11 @@ def evaluate_classification_performance(multilabel, predicted_labels, gold_label if __name__ == '__main__': parser = ArgumentParser() - parser.add_argument("--hypothesis_template_type", type=str, default='A') + parser.add_argument("--hypothesis_template_type", type=str, default='C') parser.add_argument("--label_file", type=str) parser.add_argument("--test_dataset_dir", type=str) - parser.add_argument("--eval_batch_size", type=int, default=32) - parser.add_argument("--max_length", type=int, default=256) - parser.add_argument("--multilabel", type=bool,default=True) + parser.add_argument("--eval_batch_size", type=int, default=64) + parser.add_argument("--max_length", type=int, default=512) parser.add_argument("--depth_level", type=int,default=1) parser.add_argument("--output_dir", type=str, default='roberta-large-mnli') @@ -122,25 +167,22 @@ def evaluate_classification_performance(multilabel, predicted_labels, gold_label #test_df = test_df.groupby('description_material').sample(n=500, random_state=42, replace=True) logging.info("Test data size : ", test_df.shape) - prefix = "mat:" - if args.multilabel: - label_col_name ="label_list" - # use the stored label space - gold_label_names = open(args.label_file).read().splitlines() - else: - # using specified depth level to restrict the label space - if args.depth_level == 1: - label_col_name = "description_material_depth_1" - elif args.depth_level == 2: - label_col_name = "description_material_depth_2" - else: - label_col_name = "description_material_depth_3" - gold_label_names = [x for x in list(set(test_df[label_col_name].values.tolist()))] # all possible gold labels + leaf_label_file = "leaf_labels_replaced.txt" + leaf_label_names = open(leaf_label_file).read().splitlines() + label_col_name ="label_list" + # use the stored label space + gold_label_names = open(args.label_file).read().splitlines() logging.info(f"Total {len(gold_label_names)} candidate labels to predict: {gold_label_names}") - # Evaluate performance - predicted_labels = get_zero_shot_predictions(args.multilabel, args.output_dir, test_df, template_type=args.hypothesis_template_type, label_names=gold_label_names, batch_size=args.eval_batch_size, max_length=args.max_length) - if args.multilabel: - test_gold_labels = [x.split("/") for x in test_df[label_col_name].values.tolist()] - else: - test_gold_labels = [x for x in test_df[label_col_name].values.tolist()] - evaluate_classification_performance(args.multilabel,predicted_labels, test_gold_labels, gold_label_names) + # select leaf label or non-leaf label + test_gold_labels = [x.split("/") for x in test_df[label_col_name].values.tolist()] + final = [] + for label_lst in test_gold_labels: + temp = [] + for label in label_lst: + if label not in leaf_label_names: # only include leaf labels + temp.append(label) + final.append(temp) + test_gold_labels = final + #print("Test gold labels", test_gold_labels) + predicted_labels = get_predictions(args.output_dir, test_df, template_type=args.hypothesis_template_type, label_names=gold_label_names, batch_size=args.eval_batch_size, max_length=args.max_length) + evaluate_classification_performance(predicted_labels, test_gold_labels, gold_label_names) diff --git a/SESAR/zeroshot-learning/finetune_ZTC.py b/SESAR/zeroshot-learning/finetune_ZTC.py index 4a72e98..b4606b4 100644 --- a/SESAR/zeroshot-learning/finetune_ZTC.py +++ b/SESAR/zeroshot-learning/finetune_ZTC.py @@ -5,11 +5,15 @@ import logging import torch import os +import datetime import numpy as np +from transformers import EarlyStoppingCallback from datasets import load_metric +import collections +from collections import Counter logging.basicConfig() logging.getLogger().setLevel(logging.INFO) -os.environ["WANDB_MODE"]="disabled" +os.environ["WANDB_API_KEY"]="7027708b12b5786cbe2d63abc8453bc4a1a84fa2" """## Model training""" if torch.cuda.is_available(): @@ -45,9 +49,15 @@ def convert_dataframe_format(dataframe, template_type , config): text += neg_text labels += [config.label2id['NEUTRAL']] * len(neg_text) + neg_column = 'negative_sample_' + template_type + '2' + neg_text = dataframe[neg_column].values.tolist() + text += neg_text + labels += [config.label2id['NEUTRAL']] * len(neg_text) + # generate new dataframe data = {'text': text, 'label': labels} df = pd.DataFrame(data) + print(collections.Counter(labels)) # shuffle data return df.sample(frac=1, random_state=42).reset_index(drop=True) @@ -94,7 +104,9 @@ def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, train_ds, dev_ds = create_datasets(tokenizer, train_df, dev_df, max_length) logging.info(f"Dataset size : train - {len(train_ds)}, dev - {len(dev_ds)}") #### CONDUCT TRAINING #### - output_dir = template_type + "_" + model_name + "_" + str(num_epochs) + "_" + str(lr_rate) + "_" + str(weight_decay) + "_" + str(train_batch_size) + current_time = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + + output_dir = template_type + "_" + model_name + "_" + str(num_epochs) + "_" + str(lr_rate) + "_" + str(weight_decay) + "_" + str(train_batch_size) + "_" + str(current_time) if not os.path.exists(output_dir): os.makedirs(output_dir) metric_name = "accuracy" @@ -104,11 +116,16 @@ def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, num_train_epochs=num_epochs, per_device_train_batch_size=train_batch_size, per_device_eval_batch_size=eval_batch_size, - evaluation_strategy='epoch', - save_strategy='epoch', - # warmup_steps=500, - gradient_accumulation_steps=1, # batch size * accumulation_steps = total batch size + evaluation_strategy='steps', + eval_steps=200, + save_steps=200, + save_total_limit=2, + save_strategy='steps', + #warmup_steps=500, + gradient_accumulation_steps=8, # batch size * accumulation_steps = total batch size weight_decay=weight_decay, + lr_scheduler_type="linear", + report_to='wandb', load_best_model_at_end=True, metric_for_best_model=metric_name, ) @@ -118,7 +135,8 @@ def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, args=training_args, train_dataset=train_ds, eval_dataset=dev_ds, - compute_metrics=compute_metrics + compute_metrics=compute_metrics, + callbacks = [EarlyStoppingCallback(early_stopping_patience=5)] ) trainer.train() @@ -136,8 +154,8 @@ def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, parser.add_argument("--dev_data_dir", type=str, required=True) parser.add_argument("--num_epochs", type=int, default=1) parser.add_argument("--lr_rate", type=float, default=2e-5) - parser.add_argument("--train_batch_size", type=int, default=16) - parser.add_argument("--eval_batch_size", type=int, default=16) + parser.add_argument("--train_batch_size", type=int, default=8) + parser.add_argument("--eval_batch_size", type=int, default=8) parser.add_argument("--weight_decay", type=float, default=0.01) parser.add_argument("--max_length", type=int, default=512) args = parser.parse_args() @@ -145,6 +163,7 @@ def finetune_ZTC_model(train_df, dev_df, model_name, template_type, num_epochs, # load dataset dev_df = pd.read_csv(args.dev_data_dir) train_df = pd.read_csv(args.train_data_dir) + print(Counter(train_df['description_material'].values.tolist())) # finetune the textual entailment model on the dataset output_dir = finetune_ZTC_model(train_df, dev_df, model_name=args.model_name, template_type=args.hypothesis_template_type,num_epochs=args.num_epochs, lr_rate=args.lr_rate, train_batch_size=args.train_batch_size, eval_batch_size=args.eval_batch_size,weight_decay=args.weight_decay,max_length=args.max_length) logging.info(f"Saved finetuned model in {output_dir}") diff --git a/SESAR/zeroshot-learning/unique_multi_labels.txt b/SESAR/zeroshot-learning/leaf_labels.txt similarity index 59% rename from SESAR/zeroshot-learning/unique_multi_labels.txt rename to SESAR/zeroshot-learning/leaf_labels.txt index 6a7432d..816cb2a 100644 --- a/SESAR/zeroshot-learning/unique_multi_labels.txt +++ b/SESAR/zeroshot-learning/leaf_labels.txt @@ -1,101 +1,151 @@ -fragmental igneous rock -clastic sediment -metamorphic rock -tephritoid -particulate -intermediate composition igneous rock -anthropogenic material -hornblendite -mineral -fault related material -impact generated material -sediment -iron rich sedimentary rock -non clastic siliceous sediment -anorthositic rock -sedimentary rock -dioritoid -basic igneous rock -liquid water -diamicton -foiditoid -gabbroic rock +brass +bronze +copper +gold +iron +lead +pewter +plastic (material) +brick clay +bucchero +faience +porcelain +terracotta +terra sigilata +fiber material +glass +paper +plaster +plaster or mortar +rubber +frozen water +amber +bone +charcoal coal -fine grained igneous rock -dacite -foid syenitoid -non clastic siliceous sedimentary rock -trachytoid -clastic sedimentary rock -phonolitoid -peridotite -gabbroid -breccia gouge series -syenitoid -igneous rock +shell +dispersed media +hematite +kaolin +mica +quartz +mineral-borate +mineral-carbonate or nitrate +mineral-halide +mineral-native element +mineral-organic compound +mineral-oxide +mineral-phosphate, arsenate, or vanadate mineral-silicate or germanate -residual material +mineral-sulfate, selenate, or tellurate +mineral-sulfide or sulfosalt +mixed soil sediment or rock +cinder +basalt +flint +cinder +coal +dolomite +gabbro +greywacke +limestone +marble +obsidian +pumice +slate +travertine +aphanite +breccia +cataclasite series +mylonitic rock +breccia gouge series pyroclastic rock -porphyry -chemical sedimentary material -foid gabbroid -anthropogenic metal material -charcoal -carbonate sedimentary rock -granitoid -hybrid sedimentary rock -andesite -generic sandstone -gravel size sediment -natural solid material -metasomatic rock -ultramafic igneous rock -generic conglomerate -phaneritic igneous rock +dacite alkali feldspar granite +granite +granodiorite tonalite -soil -massive sulphide -pyroxenite -cataclasite series -ceramic clay quartz rich igneous rock -gaseous material -mineral-phosphate, arsenate, or vanadate -diamictite -mineral-oxide -sand size sediment -mineral-organic compound -mylonitic rock -tuffite -generic mudstone -rock -organic material -exotic composition igneous rock -glass rhyolitoid +basalt +gabbroic rock +doleritic rock +exotic composition igneous rock +andesite +basalt +dacite +foiditoid high magnesium fine grained igneous rock +phonolitoid +rhyolitoid +tephritoid +trachytoid +pyroclastic rock glass rich igneous rock -aphanite +hypabyssal intrusive rock +andesite +dioritoid +anorthositic rock +aplite +dioritoid +foid dioritoid +foid gabbroid +foid syenitoid +foidolite +gabbroic rock +monzogabbroic rock +alkali feldspar granite granite -mineral-carbonate or nitrate -carbonate sediment -acidic igneous rock -organic rich sedimentary rock -mineral-sulfate, selenate, or tellurate -basalt -fluid material -breccia -tephra -doleritic rock -biogenic non-organic material -mineral-halide -mineral-sulfide or sulfosalt +granodiorite +tonalite +hornblendite pegmatite -foidolite +peridotite +pyroxenite +quartz rich igneous rock +syenitoid +plutonic igneous rock +porphyry +ultrabasic igneous rock +hornblendite +peridotite +pyroxenite +impact generated material +massive sulphide +metamorphic rock +metasomatic rock +carbonate sedimentary rock +diamictite +generic conglomerate +generic mudstone +generic sandstone +hybrid sedimentary rock +iron rich sedimentary rock +non clastic siliceous sedimentary rock +coal +phosphorite +tuffite +residual material biogenic sediment -any anthropogenic material -mineral-borate +carbonate sediment +chemical sedimentary material +diamicton +gravel size sediment +hybrid sediment +iron rich sediment mud size sediment -granodiorite -mineral-native element +non clastic siliceous sediment +phosphate rich sediment +sand size sediment +tephra +soil +gaseous material +liquid water +non-aqueous liquid material +plastic (material) +organic animal material +hair +leather +wood +plant fiber +wood diff --git a/SESAR/zeroshot-learning/total_unique_nonleaf_multi_labels.txt b/SESAR/zeroshot-learning/total_unique_nonleaf_multi_labels.txt new file mode 100644 index 0000000..42053a6 --- /dev/null +++ b/SESAR/zeroshot-learning/total_unique_nonleaf_multi_labels.txt @@ -0,0 +1,33 @@ +organic rich sedimentary rock +igneous rock +particulate +fluid material +organic plant material +chert +ultramafic igneous rock +basic igneous rock +anthropogenic material +fault related material +organic animal product +fine grained igneous rock +clastic sedimentary rock +anthropogenic metal material +anthropogenic organic material +fragmental igneous rock +biogenic non-organic material +any ice +sediment +any anthropogenic material +ceramic clay +acidic igneous rock +natural solid material +clastic sediment +plant material +intermediate composition igneous rock +sedimentary rock +mineral +granitoid +organic material +rock +gabbroid +phaneritic igneous rock From 5f5be9f7d07f129c0a9a6539297daa451290a397 Mon Sep 17 00:00:00 2001 From: "Hyunju(Sarah) Song" <63157822+hyunssong@users.noreply.github.com> Date: Tue, 14 May 2024 15:41:27 -0700 Subject: [PATCH 7/7] Add Zero shot and Few Shot LLM Classification Notebooks --- .../iSamples Few Shot LLM.ipynb | 1 + .../iSamples_Zero_Shot_LLM.ipynb | 7527 +++++++++++++++++ 2 files changed, 7528 insertions(+) create mode 100644 SESAR/zeroshot-learning/iSamples Few Shot LLM.ipynb create mode 100644 SESAR/zeroshot-learning/iSamples_Zero_Shot_LLM.ipynb diff --git a/SESAR/zeroshot-learning/iSamples Few Shot LLM.ipynb b/SESAR/zeroshot-learning/iSamples Few Shot LLM.ipynb new file mode 100644 index 0000000..d6ec002 --- /dev/null +++ b/SESAR/zeroshot-learning/iSamples Few Shot LLM.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[{"file_id":"1SHOqNZMsCboXhMDtNv5Pelq_uSzVRQ5k","timestamp":1711564733485}],"gpuType":"T4","machine_shape":"hm","authorship_tag":"ABX9TyNcHFBe8CVsiPJXSBoA3cAB"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"34fdc80e3eef48429ee4c02289c36bc9":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_cdc46b71922e4846b30a3c9fd4d9d7bc","IPY_MODEL_8b27e5cfea2b4111b67604bb59228679","IPY_MODEL_e9dbeb7c7fa94590a98d263123fa9d76"],"layout":"IPY_MODEL_3378845bd06149d8ac466a5e379b024d"}},"cdc46b71922e4846b30a3c9fd4d9d7bc":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ca71b1f29a214000965a77d856575a39","placeholder":"​","style":"IPY_MODEL_93a941f989b14e75bb34789e6979aed1","value":"Loading checkpoint shards: 100%"}},"8b27e5cfea2b4111b67604bb59228679":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d8647be5e99048b5b76c83311ab01036","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_256b8667399f4215bb33bdd80b984e05","value":2}},"e9dbeb7c7fa94590a98d263123fa9d76":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e8ee2302a09540b6a3a61f03bc197863","placeholder":"​","style":"IPY_MODEL_8374dc9e041343a6870298f2ab7cc09d","value":" 2/2 [00:50<00:00, 23.85s/it]"}},"3378845bd06149d8ac466a5e379b024d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ca71b1f29a214000965a77d856575a39":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"93a941f989b14e75bb34789e6979aed1":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d8647be5e99048b5b76c83311ab01036":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"256b8667399f4215bb33bdd80b984e05":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e8ee2302a09540b6a3a61f03bc197863":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8374dc9e041343a6870298f2ab7cc09d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/gdrive')\n","%cd /content/gdrive/MyDrive"],"metadata":{"id":"D3iTBzjkH-MU","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1711574342632,"user_tz":420,"elapsed":1082,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}},"outputId":"4ae83cfd-6d98-4267-96fb-2145eb1558de"},"execution_count":28,"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n","/content/gdrive/MyDrive\n"]}]},{"cell_type":"code","source":["# set env variables\n","TEST_DATASET = \"SESAR_ZTC_test_multi_entire_filtered2.csv\"\n","TRAIN_DATASET = \"SESAR_ZTC_train_multi_entire_filtered2.csv\" # used for example selection\n","OUTPUT_FILE = \"OUTPUT.json\""],"metadata":{"id":"SONTODi_WbYa","executionInfo":{"status":"ok","timestamp":1711574342632,"user_tz":420,"elapsed":2,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":29,"outputs":[]},{"cell_type":"markdown","source":["# Set Up"],"metadata":{"id":"kZpc8Uvt5XCL"}},{"cell_type":"code","source":["!pip install datasets sentencepiece tokenizers bitsandbytes accelerate xformers einops\n","!pip install git+https://github.com/huggingface/transformers"],"metadata":{"id":"QOZjIsKd5dJI","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1711574369169,"user_tz":420,"elapsed":26538,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}},"outputId":"6613b13e-5df1-4b4a-9b3f-f5fa5858eb0f"},"execution_count":30,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.18.0)\n","Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (0.1.99)\n","Requirement already satisfied: tokenizers in /usr/local/lib/python3.10/dist-packages (0.15.2)\n","Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.10/dist-packages (0.43.0)\n","Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.28.0)\n","Requirement already satisfied: xformers in /usr/local/lib/python3.10/dist-packages (0.0.25)\n","Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (0.7.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.13.3)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.25.2)\n","Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (14.0.2)\n","Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n","Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n","Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n","Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.2)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n","Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n","Requirement already satisfied: fsspec[http]<=2024.2.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.3)\n","Requirement already satisfied: huggingface-hub>=0.19.4 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.20.3)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from bitsandbytes) (2.2.1+cu121)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.4.2)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (4.10.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (3.1.3)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (12.1.105)\n","Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (8.9.2.26)\n","Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (12.1.3.1)\n","Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (11.0.2.54)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (10.3.2.106)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (11.4.5.107)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (12.1.0.106)\n","Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (2.19.3)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (12.1.105)\n","Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (2.2.0)\n","Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->bitsandbytes) (12.4.99)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2024.2.2)\n","Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.4)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->bitsandbytes) (2.1.5)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->bitsandbytes) (1.3.0)\n","Collecting git+https://github.com/huggingface/transformers\n"," Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-rig2kqmg\n"," Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-rig2kqmg\n"," Resolved https://github.com/huggingface/transformers to commit a25037beb9f039270b30a94c34ead72ea80ae8a5\n"," Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n"," Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n"," Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (3.13.3)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.20.3)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (1.25.2)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (24.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2023.12.25)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.15.2)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.4.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (4.66.2)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (2023.6.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (4.10.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2024.2.2)\n"]}]},{"cell_type":"markdown","source":["# Download"],"metadata":{"id":"qtujFQnW5akB"}},{"cell_type":"code","source":["import torch\n","from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig\n","\n","device = \"cuda\"\n","# This causes OOM\n","\n","# model = AutoModelForCausalLM.from_pretrained(\n","# \"Open-Orca/Mistral-7B-OpenOrca\").to(device)\n","# tokenizer = AutoTokenizer.from_pretrained(\n","# \"Open-Orca/Mistral-7B-OpenOrca\")"],"metadata":{"id":"et7GPgVK5gvj","executionInfo":{"status":"ok","timestamp":1711574369169,"user_tz":420,"elapsed":4,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":31,"outputs":[]},{"cell_type":"code","source":["import transformers\n","model_id = \"Open-Orca/Mistral-7B-OpenOrca\"\n","bnb_config = transformers.BitsAndBytesConfig(\n"," load_in_4bit=True,\n"," bnb_4bit_use_double_quant=True,\n"," bnb_4bit_quant_type=\"nf4\",\n"," bnb_4bit_compute_dtype=torch.bfloat16\n",")\n","\n","model = transformers.AutoModelForCausalLM.from_pretrained(\n"," model_id,\n"," trust_remote_code=True,\n"," quantization_config=bnb_config,\n"," device_map='auto',\n",")\n","\n","tokenizer = transformers.AutoTokenizer.from_pretrained(\n"," model_id,\n",")\n","model.config.use_cache = True"],"metadata":{"id":"WKKLVdta-X6w","colab":{"base_uri":"https://localhost:8080/","height":84,"referenced_widgets":["34fdc80e3eef48429ee4c02289c36bc9","cdc46b71922e4846b30a3c9fd4d9d7bc","8b27e5cfea2b4111b67604bb59228679","e9dbeb7c7fa94590a98d263123fa9d76","3378845bd06149d8ac466a5e379b024d","ca71b1f29a214000965a77d856575a39","93a941f989b14e75bb34789e6979aed1","d8647be5e99048b5b76c83311ab01036","256b8667399f4215bb33bdd80b984e05","e8ee2302a09540b6a3a61f03bc197863","8374dc9e041343a6870298f2ab7cc09d"]},"executionInfo":{"status":"ok","timestamp":1711574422502,"user_tz":420,"elapsed":53336,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}},"outputId":"43e52096-5e61-431c-da06-268a9b1268e9"},"execution_count":32,"outputs":[{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards: 0%| | 0/2 [00:00 Orcas were not known to be drawn to mistral energy, but they were seen recently in the waters off the coast of France.\n","\n","A pod of orcas, or killer whales, were seen swimming in the waters off the coast of France, seemingly attracted by the energy from a wind farm.\n","\n","The unusual sighting was captured on video by a local resident, who noticed the orcas swimming in the area where the wind farm is located.\n","\n","The wind farm, which is powered by the wind, generates electricity and is considered a form of renewable energy. It is believed that the orcas were attracted to the area due to the movement of the water caused by the wind turbines.\n","\n","The orcas, which are known for their intelligence and curiosity, are often seen interacting with humans and other marine life. However, this is the first time they have been seen in such close proximity to a wind farm.\n","\n","The video of the orcas swimming near the wind farm has sparked a debate among marine biologists and environmentalists about the potential impact of such structures on marine life. Some argue that wind farms can disrupt the natural habitat of marine animals, while others believe that they can provide a new source of food and shelter for certain species.\n","\n","In this case, the orcas appear to be unaffect\n"]}]},{"cell_type":"code","source":["sys_prompt = \"A chat.\"\n","prompt = \"Tell me a joke.\"\n","\n","prefix = \"<|im_start|>\"\n","suffix = \"<|im_end|>\\n\"\n","sys_format = prefix + \"system\\n\" + sys_prompt + suffix\n","user_format = prefix + \"user\\n\" + prompt + suffix\n","assistant_format = prefix + \"assistant\\n\"\n","input_text = sys_format + user_format + assistant_format\n","\n","generation_config = GenerationConfig(\n"," max_length=256, temperature=1.1, top_p=0.95, repetition_penalty=1.0,\n"," do_sample=True, use_cache=True,\n"," eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id,\n"," transformers_version=\"4.34.0.dev0\")\n","\n","inputs = tokenizer(input_text, return_tensors=\"pt\", return_attention_mask=True).to(device)\n","outputs = model.generate(**inputs, generation_config=generation_config)\n","\n","text = tokenizer.batch_decode(outputs)[0]\n","print(text)"],"metadata":{"id":"orHjdlw76EhP","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1711574450061,"user_tz":420,"elapsed":2798,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}},"outputId":"2b80285a-d52b-461b-d967-b53f46ab6d31"},"execution_count":34,"outputs":[{"output_type":"stream","name":"stdout","text":["<|im_start|> system\n","A chat.<|im_end|><|im_start|> user\n","Tell me a joke.<|im_end|><|im_start|> assistant\n"," Why did the scarecrow win an award? \n","\n","Because the jury thought his speech was outstanding.<|im_end|>\n"]}]},{"cell_type":"code","source":["print(text.split(\"<|im_end|>\")[2][len(\"<|im_start|> assistant\"):].strip(\"\\n\"))"],"metadata":{"id":"zviNf9VrF7e8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1711574450061,"user_tz":420,"elapsed":8,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}},"outputId":"0391e5e0-2a36-4044-c396-c9edb73defad"},"execution_count":35,"outputs":[{"output_type":"stream","name":"stdout","text":[" Why did the scarecrow win an award? \n","\n","Because the jury thought his speech was outstanding.\n"]}]},{"cell_type":"markdown","source":["# Few Shot"],"metadata":{"id":"dEwZGnMf6gwT"}},{"cell_type":"code","source":["# load test dataset\n","import pandas as pd\n","test_df = pd.read_csv(TEST_DATASET)"],"metadata":{"id":"-VpaRb0y65vA","executionInfo":{"status":"ok","timestamp":1711574450061,"user_tz":420,"elapsed":3,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":36,"outputs":[]},{"cell_type":"markdown","source":["## Set Up - Read Taxonomy Files"],"metadata":{"id":"VFESw5xj7DUj"}},{"cell_type":"code","source":["import json\n","leaf_material_types = list(open(\"unique_leaf_labels.txt\").read().splitlines())\n","joined_leaf_material_types = \"\\n\".join(leaf_material_types)\n","\n","leaf_to_entire_path_mapping = {}\n","with open('leaf_to_parents_mapping.json') as f:\n"," leaf_to_entire_path_mapping = json.load(f)\n","\n","mapping = json.load(open(\"label_to_parent_mapping.json\"))\n","label_to_parent = {}\n","for k, v in mapping.items():\n"," if \"material\" in v:\n"," v.remove(\"material\")\n"," label_to_parent[k] = v\n","\n","child_to_parent = json.load(open(\"child_to_parent_mapping.json\"))\n","def get_parent_labels(curr_labels):\n"," # return the parent labels (upper level labels)\n"," parent_labels = []\n"," for label in curr_labels:\n"," if label in child_to_parent and child_to_parent[label]:\n"," parent_labels.append(child_to_parent[label])\n"," parent_labels = list(set(parent_labels))\n"," return parent_labels\n"],"metadata":{"id":"-Lg7CG_R7G60","executionInfo":{"status":"ok","timestamp":1711574450061,"user_tz":420,"elapsed":3,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":37,"outputs":[]},{"cell_type":"code","source":["import json\n","\n","# mapping to description and individual fields that contain geology terms that need to be enriched\n","# this can be easily generated by using mapping of two columns in dataframe\n","desc_to_tax_map = json.load(open(\"description_to_taxonomy_train_all.json\"))\n","desc_to_cm_map = json.load(open(\"description_to_collectionMethod_train_all.json\"))\n","desc_to_desc_map = json.load(open(\"description_to_description_train_all.json\"))\n","\n","def trim_mapping(mapping):\n"," return {k.split(\"\")[0][len(\"\"):] : v for k,v in mapping.items()}\n","desc_to_tax_map = trim_mapping(desc_to_tax_map)\n","desc_to_cm_map = trim_mapping(desc_to_cm_map)\n","desc_to_desc_map = trim_mapping(desc_to_desc_map)"],"metadata":{"id":"1D_Sho9GCIHr","executionInfo":{"status":"ok","timestamp":1711574459279,"user_tz":420,"elapsed":9220,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":38,"outputs":[]},{"cell_type":"markdown","source":["# Set Up - SentBERT"],"metadata":{"id":"b_55ABTve44F"}},{"cell_type":"code","source":["!pip install -U sentence-transformers\n","from sentence_transformers import SentenceTransformer, util\n","sentbert_model = SentenceTransformer(\"allenai-specter\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ssbcE6vVe64n","executionInfo":{"status":"ok","timestamp":1711574471485,"user_tz":420,"elapsed":12225,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}},"outputId":"28c0cda4-2d06-4f16-f021-d62a685702a1"},"execution_count":39,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (2.6.1)\n","Requirement already satisfied: transformers<5.0.0,>=4.32.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (4.40.0.dev0)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (4.66.2)\n","Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (2.2.1+cu121)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.25.2)\n","Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.2.2)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.11.4)\n","Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.20.3)\n","Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (9.4.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (3.13.3)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (2023.6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (2.31.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (6.0.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (4.10.0)\n","Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (24.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.3)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n","Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (8.9.2.26)\n","Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.3.1)\n","Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (11.0.2.54)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (10.3.2.106)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (11.4.5.107)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.0.106)\n","Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (2.19.3)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n","Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (2.2.0)\n","Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers) (12.4.99)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.32.0->sentence-transformers) (2023.12.25)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.32.0->sentence-transformers) (0.15.2)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.32.0->sentence-transformers) (0.4.2)\n","Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (1.3.2)\n","Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (3.4.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers) (2.1.5)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers) (2024.2.2)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11.0->sentence-transformers) (1.3.0)\n"]}]},{"cell_type":"code","source":["train_df = pd.read_csv(TRAIN_DATASET)\n","train_descriptions = list([x.split(\"\")[0][len(\"\"):] for x in train_df[\"concatenated_text_B\"].tolist()])\n","descriptions_to_label = dict(zip(train_df['concatenated_text_B'], train_df['label_list']))\n","final_desc_to_label = {}\n","for k, v in descriptions_to_label.items():\n"," final_desc_to_label[k.split(\"\")[0][len(\"\"):]] = v\n"," final_desc_to_label[k.split(\"\")[0][len(\"\"):]+\".\"] = v\n","final_desc_to_label"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"a2utQlHUhIit","executionInfo":{"status":"ok","timestamp":1711574514255,"user_tz":420,"elapsed":42773,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}},"outputId":"82a24d20-6116-4a84-daf4-43272f3c0c51"},"execution_count":40,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'The name of the specific place where the sample was collected is Windscoop Bluff, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene, The detailed description of the sample is Trachyte flow forming small bluff. Cropping out SW of Camp III. Finely vesicular, aphyric phonolite, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Phonolite, The method by which a sample was collected is Manual, The free text description of the related URL is None': 'sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Windscoop Bluff, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene, The detailed description of the sample is Trachyte flow forming small bluff. Cropping out SW of Camp III. Finely vesicular, aphyric phonolite, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Phonolite, The method by which a sample was collected is Manual, The free text description of the related URL is None.': 'sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Bushmanland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped coordinates and uncertainty for the locality string \"S of Garies\" in Northern Cape, South Africa using the situate.py script based on coordinates given by GeoNames. Another place name mentioned in the EMu record (\"Plug on Farm Dikdoorn\") could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the calculated values., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Melilite nephelinite, The additional information about the specific place where the sample was collected is plug on farm Dikdoorn, South of Garies, The free text description of the related URL is Smithsonian collections record for NMNH 117251-121 (PET)': 'sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Bushmanland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped coordinates and uncertainty for the locality string \"S of Garies\" in Northern Cape, South Africa using the situate.py script based on coordinates given by GeoNames. Another place name mentioned in the EMu record (\"Plug on Farm Dikdoorn\") could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the calculated values., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Melilite nephelinite, The additional information about the specific place where the sample was collected is plug on farm Dikdoorn, South of Garies, The free text description of the related URL is Smithsonian collections record for NMNH 117251-121 (PET).': 'sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Arenal Volcano, Alajuela, Costa Rica (http://geonames.org/3624862) based on volcano name and country using the situate.py script. Another place name mentioned in the EMu record (Valley of Rio Tabacon) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all volcano records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite with anorthite and olivine, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Valley Of Rio Tabacon, The free text description of the related URL is Smithsonian collections record for NMNH 116228-4 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Arenal Volcano, Alajuela, Costa Rica (http://geonames.org/3624862) based on volcano name and country using the situate.py script. Another place name mentioned in the EMu record (Valley of Rio Tabacon) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all volcano records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite with anorthite and olivine, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Valley Of Rio Tabacon, The free text description of the related URL is Smithsonian collections record for NMNH 116228-4 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Not Applicable, The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Not Applicable, The name of institution, museum, or repository where the sample is currently stored is Not Provided, The city where the sample was collected is Not Applicable, The type of the primary location is IceShelf, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Not Applicable, The method by which a sample was collected is Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NBP1001': 'liquid water/fluid material',\n"," 'The country where the sample was collected is Not Applicable, The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Not Applicable, The name of institution, museum, or repository where the sample is currently stored is Not Provided, The city where the sample was collected is Not Applicable, The type of the primary location is IceShelf, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Not Applicable, The method by which a sample was collected is Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NBP1001.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Chalcocite, The additional information about the specific place where the sample was collected is Mansfeld, The free text description of the related URL is Smithsonian collections record for NMNH B2311-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Chalcocite, The additional information about the specific place where the sample was collected is Mansfeld, The free text description of the related URL is Smithsonian collections record for NMNH B2311-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Italy, The province where the sample was collected is Campania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Vomero, Naples, Campania, Italy (http://geonames.org/3163948) based on municipality name, state/province, and country using the situate.py script. The script determined that this locality is located within Naples (featureCode=PPLA), another feature mentioned in this record. Other place names mentioned in the EMu record (Campi Flegrei and St Stefano) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 1 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Naples, St Stefano, Vomero, The taxonomy informal classification of sample is Trachytic obsidian, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 91983 (PET)': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Italy, The province where the sample was collected is Campania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Vomero, Naples, Campania, Italy (http://geonames.org/3163948) based on municipality name, state/province, and country using the situate.py script. The script determined that this locality is located within Naples (featureCode=PPLA), another feature mentioned in this record. Other place names mentioned in the EMu record (Campi Flegrei and St Stefano) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 1 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Naples, St Stefano, Vomero, The taxonomy informal classification of sample is Trachytic obsidian, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 91983 (PET).': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to USGS MRDS result for Old Reliable Mine, Santa Fe Co., NM. Lucas Mine is listed as an alternate name for this locality. The Deposit ID for this locality is 10224207. URL: https://mrdata.usgs.gov/mrds/show-mrds.php?dep_id=10224207., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Shale, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Just Below The Old Reliable Mine, The free text description of the related URL is Smithsonian collections record for NMNH 87905-28 (PET)': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to USGS MRDS result for Old Reliable Mine, Santa Fe Co., NM. Lucas Mine is listed as an alternate name for this locality. The Deposit ID for this locality is 10224207. URL: https://mrdata.usgs.gov/mrds/show-mrds.php?dep_id=10224207., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Shale, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Just Below The Old Reliable Mine, The free text description of the related URL is Smithsonian collections record for NMNH 87905-28 (PET).': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Otting, The taxonomy informal classification of sample is Shocked granite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is W Side Of Quarry, The free text description of the related URL is Smithsonian collections record for NMNH 116515-156 (PET)': 'sediment/impact generated material/natural solid material/rock',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Otting, The taxonomy informal classification of sample is Shocked granite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is W Side Of Quarry, The free text description of the related URL is Smithsonian collections record for NMNH 116515-156 (PET).': 'sediment/impact generated material/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is New Madrid core, The country where the sample was collected is United States, The province where the sample was collected is Missouri, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleocene, The free text description of the location is 0.05 mi NW of intersection Rte 162 with unnumbered county rd, W of Linda, The name of institution, museum, or repository where the sample is currently stored is U.S. Geological Survey, Reston Nannofossil laboratory, The taxonomy informal classification of sample is Macrofossils': 'natural solid material/sediment/rock',\n"," 'The name of the specific place where the sample was collected is New Madrid core, The country where the sample was collected is United States, The province where the sample was collected is Missouri, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleocene, The free text description of the location is 0.05 mi NW of intersection Rte 162 with unnumbered county rd, W of Linda, The name of institution, museum, or repository where the sample is currently stored is U.S. Geological Survey, Reston Nannofossil laboratory, The taxonomy informal classification of sample is Macrofossils.': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is Saudi Arabia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyroxenite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118255-542 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is Saudi Arabia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyroxenite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118255-542 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The taxonomy informal classification of sample is Pl-Ol Pillow Basalt, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN210-05, The free text description of the related URL is Related EarthChem Library dataset, Dredge Records for Knorr Cruise 210, Leg 5': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The taxonomy informal classification of sample is Pl-Ol Pillow Basalt, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN210-05, The free text description of the related URL is Related EarthChem Library dataset, Dredge Records for Knorr Cruise 210, Leg 5.': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Sandstone quarry Obersulzbach, The country where the sample was collected is Germany, The province where the sample was collected is Rhineland-Palatinate, The object type of sample indicates that this sample is a synthetic material used during an experiment, The age of a sample as described by the stratigraphic era, period, state, etc. is Cisuralian, The city where the sample was collected is Obersulzbach, The taxonomy informal classification of sample is Lithic arenite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Disibodenberg formation, The method by which a sample was collected is Coring>RockCorer': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Sandstone quarry Obersulzbach, The country where the sample was collected is Germany, The province where the sample was collected is Rhineland-Palatinate, The object type of sample indicates that this sample is a synthetic material used during an experiment, The age of a sample as described by the stratigraphic era, period, state, etc. is Cisuralian, The city where the sample was collected is Obersulzbach, The taxonomy informal classification of sample is Lithic arenite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Disibodenberg formation, The method by which a sample was collected is Coring>RockCorer.': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is Biogeochemistry; 1000s of marine animals are available from many depths for trace element analysis, The name of institution, museum, or repository where the sample is currently stored is OSU Marine and Geology Repository, The taxonomy informal classification of sample is Ash sample': 'organic material',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is Biogeochemistry; 1000s of marine animals are available from many depths for trace element analysis, The name of institution, museum, or repository where the sample is currently stored is OSU Marine and Geology Repository, The taxonomy informal classification of sample is Ash sample.': 'organic material',\n"," 'The name of the specific place where the sample was collected is Azores Plateau , The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The type of platform for the cruise is ship, The free text to describe the collection purpose of the sample is Petrology/Geochemistry, The name of institution, museum, or repository where the sample is currently stored is GeoZentrum Nordbayern, Friedrich-Alexander Universität Erlangen-Nürnberg, The type of the primary location is volcano, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Pos232': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Azores Plateau , The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The type of platform for the cruise is ship, The free text to describe the collection purpose of the sample is Petrology/Geochemistry, The name of institution, museum, or repository where the sample is currently stored is GeoZentrum Nordbayern, Friedrich-Alexander Universität Erlangen-Nürnberg, The type of the primary location is volcano, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Pos232.': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Puerto Rico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 5 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Puerto Rico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 5 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The name of the specific place where the sample was collected is Azores Plateau , The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The type of platform for the cruise is ship, The free text to describe the collection purpose of the sample is Petrology/Geochemistry, The name of institution, museum, or repository where the sample is currently stored is GeoZentrum Nordbayern, Friedrich-Alexander Universität Erlangen-Nürnberg, The type of the primary location is volcano, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is M128': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Azores Plateau , The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The type of platform for the cruise is ship, The free text to describe the collection purpose of the sample is Petrology/Geochemistry, The name of institution, museum, or repository where the sample is currently stored is GeoZentrum Nordbayern, Friedrich-Alexander Universität Erlangen-Nürnberg, The type of the primary location is volcano, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is M128.': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Cube, The free text to describe the collection purpose of the sample is Undergraduate Research, The name of institution, museum, or repository where the sample is currently stored is Northern Arizona University, The taxonomy informal classification of sample is Basalt, The type of the primary location is Grand Falls , The method by which a sample was collected is Manual': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Cube, The free text to describe the collection purpose of the sample is Undergraduate Research, The name of institution, museum, or repository where the sample is currently stored is Northern Arizona University, The taxonomy informal classification of sample is Basalt, The type of the primary location is Grand Falls , The method by which a sample was collected is Manual.': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Maine, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Moosehead Lake, Piscataquis Co., Maine, United States (http://geonames.org/4972557) based on map name, state/province, and country using the situate.py script. The script interepreted Moosehead Lake Quad as a synonym for Moosehead Lake. Another place name mentioned in the EMu record (Moxie Pluton) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende diorite with hornblende, The free text description of the related URL is Smithsonian collections record for NMNH 115081-177 (PET)': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Maine, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Moosehead Lake, Piscataquis Co., Maine, United States (http://geonames.org/4972557) based on map name, state/province, and country using the situate.py script. The script interepreted Moosehead Lake Quad as a synonym for Moosehead Lake. Another place name mentioned in the EMu record (Moxie Pluton) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende diorite with hornblende, The free text description of the related URL is Smithsonian collections record for NMNH 115081-177 (PET).': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Colima-Jalisco border, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 117593-100 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Colima-Jalisco border, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 117593-100 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Ontario, Canada (http://geonames.org/7626259) based on state/province name and country using the situate.py script. Another place name mentioned in the EMu record (Sudbury) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (100 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gabbro, The additional information about the specific place where the sample was collected is Sudbury, The free text description of the related URL is Smithsonian collections record for NMNH 112669-35 (PET)': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Ontario, Canada (http://geonames.org/7626259) based on state/province name and country using the situate.py script. Another place name mentioned in the EMu record (Sudbury) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (100 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gabbro, The additional information about the specific place where the sample was collected is Sudbury, The free text description of the related URL is Smithsonian collections record for NMNH 112669-35 (PET).': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Not Applicable, The name of institution, museum, or repository where the sample is currently stored is University of Victoria (UVic), The city where the sample was collected is Not Applicable, The type of the primary location is Volcano, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Not Applicable, The method by which a sample was collected is Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected is TN232': 'liquid water/fluid material',\n"," 'The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Not Applicable, The name of institution, museum, or repository where the sample is currently stored is University of Victoria (UVic), The city where the sample was collected is Not Applicable, The type of the primary location is Volcano, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Not Applicable, The method by which a sample was collected is Sampler:Fluid:Bottle, The name or identifier of the field program (cruise or expedition), during which the sample was collected is TN232.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Tennessee, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Williamson Co., Tennessee, United States (http://geonames.org/4668054) based on district/county name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"(S W Corner)\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (34 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is (S W Corner), The taxonomy informal classification of sample is Phosphate concretion, The free text description of the related URL is Smithsonian collections record for NMNH 91246 (PET)': 'sediment/natural solid material/chemical sedimentary material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Tennessee, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Williamson Co., Tennessee, United States (http://geonames.org/4668054) based on district/county name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"(S W Corner)\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (34 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is (S W Corner), The taxonomy informal classification of sample is Phosphate concretion, The free text description of the related URL is Smithsonian collections record for NMNH 91246 (PET).': 'sediment/natural solid material/chemical sedimentary material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Maine, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Greenwood, Oxford Co., Maine, United States (http://geonames.org/4965991) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fluorapatite, The additional information about the specific place where the sample was collected is Greenwood, The free text description of the related URL is Smithsonian collections record for NMNH 158566-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Maine, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Greenwood, Oxford Co., Maine, United States (http://geonames.org/4965991) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fluorapatite, The additional information about the specific place where the sample was collected is Greenwood, The free text description of the related URL is Smithsonian collections record for NMNH 158566-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," \"The country where the sample was collected is France, The province where the sample was collected is Provence-Alpes-Côte d'Azur, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Cap Garonne Mine, Le Pradet, Var, Provence-Alpes-Côte d'Azur, France. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-1747.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Adamite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Hyeres (Near), Cap Garonne, The free text description of the related URL is Smithsonian collections record for NMNH C4178-00 (MIN)\": 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," \"The country where the sample was collected is France, The province where the sample was collected is Provence-Alpes-Côte d'Azur, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Cap Garonne Mine, Le Pradet, Var, Provence-Alpes-Côte d'Azur, France. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-1747.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Adamite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Hyeres (Near), Cap Garonne, The free text description of the related URL is Smithsonian collections record for NMNH C4178-00 (MIN).\": 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, General Trimble's Mine, East Whiteland Township, Chester Co., Pennsylvania, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-11314.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wavellite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH M16630-00 (MIN)\": 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, General Trimble's Mine, East Whiteland Township, Chester Co., Pennsylvania, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-11314.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wavellite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH M16630-00 (MIN).\": 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is Canada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Flathead Range, Canada, including http://geonames.org/5954270 and http://geonames.org/5954271. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff-breccia, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Purcell lava, The free text description of the related URL is Smithsonian collections record for NMNH 117768-9 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Canada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Flathead Range, Canada, including http://geonames.org/5954270 and http://geonames.org/5954271. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff-breccia, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Purcell lava, The free text description of the related URL is Smithsonian collections record for NMNH 117768-9 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Philippines, The province where the sample was collected is Camarines Norte Prov, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Paracales, The taxonomy informal classification of sample is Gold ore, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 76511-2 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is Philippines, The province where the sample was collected is Camarines Norte Prov, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Paracales, The taxonomy informal classification of sample is Gold ore, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 76511-2 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Silty fine to medium grained sandstone, nodular spalls [?] in some layers. Possible calcareous rock and chert nodules. Green, purple, brown. Bedding 2-20cm [thick]. Fractured extensively. Bedded but heavily altered so no dip measurable., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Silty fine to medium grained sandstone, nodular spalls [?] in some layers. Possible calcareous rock and chert nodules. Green, purple, brown. Bedding 2-20cm [thick]. Fractured extensively. Bedded but heavily altered so no dip measurable., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None.': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is Iceland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Holocene, The name of institution, museum, or repository where the sample is currently stored is SOEST, University of Hawaii at Manoa, Honolulu, HI, The taxonomy informal classification of sample is basalt, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Lambahraun': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Iceland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Holocene, The name of institution, museum, or repository where the sample is currently stored is SOEST, University of Hawaii at Manoa, Honolulu, HI, The taxonomy informal classification of sample is basalt, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Lambahraun.': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Daviess 873 core site, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Phanerozoic>Paleozoic>Devonian>Frasnian, The free text to describe the collection purpose of the sample is Collected from Daviess 873 core by Sarah De La Rue for palynology, The detailed description of the sample is shale, TOC=5.93%, The name of institution, museum, or repository where the sample is currently stored is Astrobiogeochemistry Laboratory, Jet Propulsion Lab, Pasasdena, California, The taxonomy informal classification of sample is shale, A body of rock established as a distinct entity in the classification of the Earth’s rocks is New Albany Shale>Selmier': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Daviess 873 core site, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Phanerozoic>Paleozoic>Devonian>Frasnian, The free text to describe the collection purpose of the sample is Collected from Daviess 873 core by Sarah De La Rue for palynology, The detailed description of the sample is shale, TOC=5.93%, The name of institution, museum, or repository where the sample is currently stored is Astrobiogeochemistry Laboratory, Jet Propulsion Lab, Pasasdena, California, The taxonomy informal classification of sample is shale, A body of rock established as a distinct entity in the classification of the Earth’s rocks is New Albany Shale>Selmier.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Virginia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Amelia Co., Virginia, United States (http://geonames.org/4744308) based on feature name, district/county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Rutherford Mine) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cassiterite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Amelia, The free text description of the related URL is Smithsonian collections record for NMNH C5393-05 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Virginia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Amelia Co., Virginia, United States (http://geonames.org/4744308) based on feature name, district/county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Rutherford Mine) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cassiterite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Amelia, The free text description of the related URL is Smithsonian collections record for NMNH C5393-05 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Idaho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Idaho, United States (http://geonames.org/5596512) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The additional information about the specific place where the sample was collected is Locality Key: Boise Quad, The free text description of the related URL is Smithsonian collections record for NMNH 73756-104 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Idaho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Idaho, United States (http://geonames.org/5596512) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The additional information about the specific place where the sample was collected is Locality Key: Boise Quad, The free text description of the related URL is Smithsonian collections record for NMNH 73756-104 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Kentucky, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Silurian, The free text description of the location is Matched to the GeoNames record for Nelson Co., Kentucky, United States (http://geonames.org/4302277) based on district/county name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is Smithsonian collections record for NMNH 36904-1 (PET)': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Kentucky, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Silurian, The free text description of the location is Matched to the GeoNames record for Nelson Co., Kentucky, United States (http://geonames.org/4302277) based on district/county name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is Smithsonian collections record for NMNH 36904-1 (PET).': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is Germany, The province where the sample was collected is North Rhine-Westphalia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Friedrich Mine, Müsen, Hilchenbach, Siegerland, North Rhine-Westphalia, Germany. URL: https://www.mindat.org/loc-157984.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Polydymite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Musen, The free text description of the related URL is Smithsonian collections record for NMNH M12082-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Germany, The province where the sample was collected is North Rhine-Westphalia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Friedrich Mine, Müsen, Hilchenbach, Siegerland, North Rhine-Westphalia, Germany. URL: https://www.mindat.org/loc-157984.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Polydymite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Musen, The free text description of the related URL is Smithsonian collections record for NMNH M12082-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The name of the specific place where the sample was collected is Gulf of California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Monterey Bay Aquarium Research Institute, The taxonomy informal classification of sample is basaltic andesite, The type of the primary location is ridge, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Gulf of California 2015': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Gulf of California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Monterey Bay Aquarium Research Institute, The taxonomy informal classification of sample is basaltic andesite, The type of the primary location is ridge, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Gulf of California 2015.': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Quebec, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Orchan (Deposit ID: 10206365) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ore, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is 2nd Level, 1-a Orebody, The free text description of the related URL is Smithsonian collections record for NMNH 117061-27 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Quebec, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Orchan (Deposit ID: 10206365) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ore, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is 2nd Level, 1-a Orebody, The free text description of the related URL is Smithsonian collections record for NMNH 117061-27 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The name of the specific place where the sample was collected is Ahumada Mine, The country where the sample was collected is Mexico, The province where the sample was collected is Chihuahua, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Los Lamentos District, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The name of the specific place where the sample was collected is Ahumada Mine, The country where the sample was collected is Mexico, The province where the sample was collected is Chihuahua, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Los Lamentos District, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is This record was georeferenced by an intern in the NMNH Information Technology Office. No estimate of precision was given. All Mineral Sciences records georeferenced as part of this program were assigned a minimum uncertainty of 10 km., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cuprite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Rio Francisco, The free text description of the related URL is Smithsonian collections record for NMNH 8429-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is This record was georeferenced by an intern in the NMNH Information Technology Office. No estimate of precision was given. All Mineral Sciences records georeferenced as part of this program were assigned a minimum uncertainty of 10 km., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cuprite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Rio Francisco, The free text description of the related URL is Smithsonian collections record for NMNH 8429-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Triassic, The detailed description of the sample is Black shaly mudstone., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Mudstone, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Triassic, The detailed description of the sample is Black shaly mudstone., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Mudstone, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Queensland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Walloon Subgroup, The free text description of the location is Stratheden-60-18a, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is tuff, The type of the primary location is Surat Basin': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Queensland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Walloon Subgroup, The free text description of the location is Stratheden-60-18a, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is tuff, The type of the primary location is Surat Basin.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Gilbert, The taxonomy informal classification of sample is Obsidian, The additional information about the specific place where the sample was collected is from litter overlying mt; where Highway 120 cuts contact between flow and tuff beds; top of distal portion of flow, The free text description of the related URL is Smithsonian collections record for NMNH 117463-22 (PET)': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Gilbert, The taxonomy informal classification of sample is Obsidian, The additional information about the specific place where the sample was collected is from litter overlying mt; where Highway 120 cuts contact between flow and tuff beds; top of distal portion of flow, The free text description of the related URL is Smithsonian collections record for NMNH 117463-22 (PET).': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is Peru, The province where the sample was collected is Lima, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Oyon, Lima region, Peru (http://geonames.org/8349566) based on district/county name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM2). Another place name mentioned in the EMu record (Uchucchacua mine) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (46 km)., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Alabandite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH C7750-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Peru, The province where the sample was collected is Lima, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Oyon, Lima region, Peru (http://geonames.org/8349566) based on district/county name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM2). Another place name mentioned in the EMu record (Uchucchacua mine) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (46 km)., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Alabandite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH C7750-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Guam, The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a group of rocks collected by dragging a dredge along the seafloor, The type of platform for the cruise is Ship, The free text description of the location is caldera 3 wall, The detailed description of the sample is dredge 1/2 full of rocks, pipe dredge full of rocks and sediment, The name of institution, museum, or repository where the sample is currently stored is University of Rhode Island Marine Geological Samples Laboratory (MGSL), The city where the sample was collected is Not Applicable, The taxonomy informal classification of sample is Lava/Sediment, The type of the primary location is submarine arc, The additional information about the method by which a sample was collected is Metal bag dragged across the seafloor, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is TN273, The free text description of the related URL is Link to IMLGS, Index to Marine & Lacustrine Geological Samples': 'clastic sediment/sediment/natural solid material/rock',\n"," 'The country where the sample was collected is Guam, The province where the sample was collected is Not Applicable, The object type of sample indicates that this sample is a group of rocks collected by dragging a dredge along the seafloor, The type of platform for the cruise is Ship, The free text description of the location is caldera 3 wall, The detailed description of the sample is dredge 1/2 full of rocks, pipe dredge full of rocks and sediment, The name of institution, museum, or repository where the sample is currently stored is University of Rhode Island Marine Geological Samples Laboratory (MGSL), The city where the sample was collected is Not Applicable, The taxonomy informal classification of sample is Lava/Sediment, The type of the primary location is submarine arc, The additional information about the method by which a sample was collected is Metal bag dragged across the seafloor, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is TN273, The free text description of the related URL is Link to IMLGS, Index to Marine & Lacustrine Geological Samples.': 'clastic sediment/sediment/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Tennessee, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Maynardsville, The taxonomy informal classification of sample is Kimberlite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Norris Lake Kimberlite, The additional information about the specific place where the sample was collected is Clark Hollow, Just Sw Of Island F And Opposite Rabbit Island, 2 Miles From Hickory Star Landing; N Shore Of Norris Lake.; N Of Maynardsville, The free text description of the related URL is Smithsonian collections record for NMNH 117154-8 (PET)': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Tennessee, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Maynardsville, The taxonomy informal classification of sample is Kimberlite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Norris Lake Kimberlite, The additional information about the specific place where the sample was collected is Clark Hollow, Just Sw Of Island F And Opposite Rabbit Island, 2 Miles From Hickory Star Landing; N Shore Of Norris Lake.; N Of Maynardsville, The free text description of the related URL is Smithsonian collections record for NMNH 117154-8 (PET).': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Triassic, The detailed description of the sample is Quartz grit, coarse grained, poorly sorted, angular to rounded, quartz and feldspar dominate, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Grit, The method by which a sample was collected is Manual, The free text description of the related URL is None': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Triassic, The detailed description of the sample is Quartz grit, coarse grained, poorly sorted, angular to rounded, quartz and feldspar dominate, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Grit, The method by which a sample was collected is Manual, The free text description of the related URL is None.': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Individual Carabid identified and pinned, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Pinned Vouchers) (NEONCARC-PV), The taxonomy informal classification of sample is Cratacanthus dubius (Palisot de Beauvois, 1811), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'organic material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Individual Carabid identified and pinned, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Pinned Vouchers) (NEONCARC-PV), The taxonomy informal classification of sample is Cratacanthus dubius (Palisot de Beauvois, 1811), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'organic material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Virginia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for New Goose Creek Quarry, Leesburg, Loudoun Co., Virginia, USA. Nearby Old Goose Creek Quarry, Leesburg, Loudoun Co., Virginia (URL: https://www.mindat.org/loc-104169.html), may also be a possibility. Data manager assigned an arbitrary error radius of 1 km., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sphalerite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Leesburg, The free text description of the related URL is Smithsonian collections record for NMNH 168100-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Virginia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for New Goose Creek Quarry, Leesburg, Loudoun Co., Virginia, USA. Nearby Old Goose Creek Quarry, Leesburg, Loudoun Co., Virginia (URL: https://www.mindat.org/loc-104169.html), may also be a possibility. Data manager assigned an arbitrary error radius of 1 km., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sphalerite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Leesburg, The free text description of the related URL is Smithsonian collections record for NMNH 168100-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Oberlahnstein, Rheinland-Pfalz, Germany (http://geonames.org/11977045) based on feature name and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=RSTN). Another place name mentioned in the EMu record (Friedrichssegen Mine) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 1 km was assigned to all featureCode=RSTN records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cerussite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Oberlahnstein (Near), The free text description of the related URL is Smithsonian collections record for NMNH B10273-00 (MIN)': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Oberlahnstein, Rheinland-Pfalz, Germany (http://geonames.org/11977045) based on feature name and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=RSTN). Another place name mentioned in the EMu record (Friedrichssegen Mine) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 1 km was assigned to all featureCode=RSTN records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cerussite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Oberlahnstein (Near), The free text description of the related URL is Smithsonian collections record for NMNH B10273-00 (MIN).': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Individual Carabid identified and pinned, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Pinned Vouchers) (NEONCARC-PV), The taxonomy informal classification of sample is Omus dejeanii Reiche, 1838, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'organic material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Individual Carabid identified and pinned, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Pinned Vouchers) (NEONCARC-PV), The taxonomy informal classification of sample is Omus dejeanii Reiche, 1838, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'organic material',\n"," 'The name of the specific place where the sample was collected is Minna Hook, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene, The detailed description of the sample is North side of Inclusion Stock. Trachytic textured kaersutite (3%) trachyte (similar to AW82146)., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Trachyte, The free text description of the related URL is None': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Minna Hook, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene, The detailed description of the sample is North side of Inclusion Stock. Trachytic textured kaersutite (3%) trachyte (similar to AW82146)., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Trachyte, The free text description of the related URL is None.': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Idaho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is north of Emmett, ID, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The city where the sample was collected is Emmett, The taxonomy informal classification of sample is tuff': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Idaho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is north of Emmett, ID, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The city where the sample was collected is Emmett, The taxonomy informal classification of sample is tuff.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including La Cueva/La Cueva (historical), New Mexico, United States (n=4) and Jemez Springs, Sandoval Co., New Mexico, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~12 km). Another place name mentioned in the EMu record (\"From Small Dome 3/4 Above La Cueva\") could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rhyolite with domite and celadonite, The additional information about the specific place where the sample was collected is from small dome 3/4 above la Cueva, The free text description of the related URL is Smithsonian collections record for NMNH 117226-612 (PET)': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including La Cueva/La Cueva (historical), New Mexico, United States (n=4) and Jemez Springs, Sandoval Co., New Mexico, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~12 km). Another place name mentioned in the EMu record (\"From Small Dome 3/4 Above La Cueva\") could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rhyolite with domite and celadonite, The additional information about the specific place where the sample was collected is from small dome 3/4 above la Cueva, The free text description of the related URL is Smithsonian collections record for NMNH 117226-612 (PET).': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Argentina, The province where the sample was collected is San Juan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Miocene, The free text description of the location is Sierra Villicum, The free text to describe the collection purpose of the sample is Detrital Zircon Geochronology, The taxonomy informal classification of sample is VILE11': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Argentina, The province where the sample was collected is San Juan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Miocene, The free text description of the location is Sierra Villicum, The free text to describe the collection purpose of the sample is Detrital Zircon Geochronology, The taxonomy informal classification of sample is VILE11.': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The free text to describe the collection purpose of the sample is Petrology/Geochemistry, The name of institution, museum, or repository where the sample is currently stored is GeoZentrum Nordbayern, Friedrich-Alexander Universität Erlangen-Nürnberg, Germany, The taxonomy informal classification of sample is andesite, The type of the primary location is volcano, The method by which a sample was collected is Manual': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The free text to describe the collection purpose of the sample is Petrology/Geochemistry, The name of institution, museum, or repository where the sample is currently stored is GeoZentrum Nordbayern, Friedrich-Alexander Universität Erlangen-Nürnberg, Germany, The taxonomy informal classification of sample is andesite, The type of the primary location is volcano, The method by which a sample was collected is Manual.': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is alkali syenite': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is alkali syenite.': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Amber, The type of the primary location is Sea/Gulf, The free text description of the related URL is Smithsonian collections record for NMNH 115850-08 (MIN)': 'mineral-organic compound/natural solid material/mineral',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Amber, The type of the primary location is Sea/Gulf, The free text description of the related URL is Smithsonian collections record for NMNH 115850-08 (MIN).': 'mineral-organic compound/natural solid material/mineral',\n"," 'The country where the sample was collected is Sweden, The province where the sample was collected is Kristianstad, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Bromölla, Bromölla Municipality, Skåne, Sweden (n=1) and Västanå, Bromölla Municipality, Skåne, Sweden (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~12 km). Another place name mentioned in the EMu record (Skane) appears to describe a larger, less specific locality and was ignored when determining coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Trolleite with chlorite and scheelite, The additional information about the specific place where the sample was collected is Skane, Bromolla, Vastana, The free text description of the related URL is Smithsonian collections record for NMNH R5612-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is Sweden, The province where the sample was collected is Kristianstad, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Bromölla, Bromölla Municipality, Skåne, Sweden (n=1) and Västanå, Bromölla Municipality, Skåne, Sweden (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~12 km). Another place name mentioned in the EMu record (Skane) appears to describe a larger, less specific locality and was ignored when determining coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Trolleite with chlorite and scheelite, The additional information about the specific place where the sample was collected is Skane, Bromolla, Vastana, The free text description of the related URL is Smithsonian collections record for NMNH R5612-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Marquette iron range, Marquette Co., Michigan, USA. URL: https://www.mindat.org/loc-125421.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cherty quartzite, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Mesnard?, The additional information about the specific place where the sample was collected is Locality Key: Mesnard?, The free text description of the related URL is Smithsonian collections record for NMNH 113553-252 (PET)': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Marquette iron range, Marquette Co., Michigan, USA. URL: https://www.mindat.org/loc-125421.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cherty quartzite, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Mesnard?, The additional information about the specific place where the sample was collected is Locality Key: Mesnard?, The free text description of the related URL is Smithsonian collections record for NMNH 113553-252 (PET).': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including 22 mi SE of Thompson, Grand Co., Utah, United States (n=1) and 18 mi N of Moab, Grand Co., Utah, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~16 km). Other place names mentioned in the EMu record (Big Cottonwood, Grand, and Yellow Cat Mine) could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Vanadium ore with hewettite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Collected 22 Mi Se Of Thompson And 18 Mi N Of Moab, The free text description of the related URL is Smithsonian collections record for NMNH 91970-1 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including 22 mi SE of Thompson, Grand Co., Utah, United States (n=1) and 18 mi N of Moab, Grand Co., Utah, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~16 km). Other place names mentioned in the EMu record (Big Cottonwood, Grand, and Yellow Cat Mine) could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Vanadium ore with hewettite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Collected 22 Mi Se Of Thompson And 18 Mi N Of Moab, The free text description of the related URL is Smithsonian collections record for NMNH 91970-1 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Till is also a mixture of colluvium from the nearby hills. Clasts are molded and angular mix, many large boulders., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Dolerite, The method by which a sample was collected is Manual, The free text description of the related URL is None': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Till is also a mixture of colluvium from the nearby hills. Clasts are molded and angular mix, many large boulders., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Dolerite, The method by which a sample was collected is Manual, The free text description of the related URL is None.': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Butte, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Butte, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for California, United States (http://geonames.org/5332921) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Pala, The taxonomy informal classification of sample is Colemanite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Stewart Hill, The free text description of the related URL is Smithsonian collections record for NMNH 126106-00 (MIN)': 'mineral-borate/natural solid material/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for California, United States (http://geonames.org/5332921) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Pala, The taxonomy informal classification of sample is Colemanite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Stewart Hill, The free text description of the related URL is Smithsonian collections record for NMNH 126106-00 (MIN).': 'mineral-borate/natural solid material/mineral',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Arizona State University, School of Earth and Space Exploration, The taxonomy informal classification of sample is scoria fall deposit Sunset Crater Scoria Cone, The type of the primary location is volcano, The method by which a sample was collected is Manual': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Arizona State University, School of Earth and Space Exploration, The taxonomy informal classification of sample is scoria fall deposit Sunset Crater Scoria Cone, The type of the primary location is volcano, The method by which a sample was collected is Manual.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates given here are for the original USGS description of the Little Belt Mountains Quadrangle: \"The square degree forming the Little Belt Mountains quadrangle is limited by 110° and 111° of longitude and 46° and 47° of latitude.\"The radius is the center-to-corner distance of the bounding box. Information about this quadrangle is not available through the current USGS map services., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The additional information about the specific place where the sample was collected is Locality Key: Little Belt Mts Quad, The free text description of the related URL is Smithsonian collections record for NMNH 53643-63 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates given here are for the original USGS description of the Little Belt Mountains Quadrangle: \"The square degree forming the Little Belt Mountains quadrangle is limited by 110° and 111° of longitude and 46° and 47° of latitude.\"The radius is the center-to-corner distance of the bounding box. Information about this quadrangle is not available through the current USGS map services., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The additional information about the specific place where the sample was collected is Locality Key: Little Belt Mts Quad, The free text description of the related URL is Smithsonian collections record for NMNH 53643-63 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is French Polynesia, The province where the sample was collected is French Polynesia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Presqu’île de Taiarapu, French Polynesia (http://geonames.org/4033642) based on locality name and country using the situate.py script. Another place name mentioned in the EMu record (Beach At Tautira) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 50 km was assigned to all featureCode=PEN records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Phonolite, The type of the primary location is Island, The additional information about the specific place where the sample was collected is Beach At Tautira, Taiarapu Peninsula, The free text description of the related URL is Smithsonian collections record for NMNH 101078 (PET)': 'sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is French Polynesia, The province where the sample was collected is French Polynesia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Presqu’île de Taiarapu, French Polynesia (http://geonames.org/4033642) based on locality name and country using the situate.py script. Another place name mentioned in the EMu record (Beach At Tautira) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 50 km was assigned to all featureCode=PEN records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Phonolite, The type of the primary location is Island, The additional information about the specific place where the sample was collected is Beach At Tautira, Taiarapu Peninsula, The free text description of the related URL is Smithsonian collections record for NMNH 101078 (PET).': 'sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Queensland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Guluguba-2, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is tuff, The type of the primary location is Surat Basin': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Queensland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Guluguba-2, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is tuff, The type of the primary location is Surat Basin.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Impact breccia, The additional information about the specific place where the sample was collected is Brent Crater, 1/2 miles North of crater, The free text description of the related URL is Smithsonian collections record for NMNH 118231-20 (PET)': 'sediment/impact generated material/natural solid material/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Impact breccia, The additional information about the specific place where the sample was collected is Brent Crater, 1/2 miles North of crater, The free text description of the related URL is Smithsonian collections record for NMNH 118231-20 (PET).': 'sediment/impact generated material/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granodiorite, The free text description of the related URL is None': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granodiorite, The free text description of the related URL is None.': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Independence Mountain, Lincoln Co., Montana, United States (http://geonames.org/5658658) based on volcano name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Absaroka Range-Northern) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all records matched using wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granodiorite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 112601-34 (PET)': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Independence Mountain, Lincoln Co., Montana, United States (http://geonames.org/5658658) based on volcano name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Absaroka Range-Northern) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all records matched using wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granodiorite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 112601-34 (PET).': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Quartzite with minor beds of argillite. Strike N30 W, dip variable. Folded, amplitude ~ 100-150'. Additional notes available at U.S. Polar Rock Repository., The detailed description of the sample is Era: Paleozoic; Period: Devonian; , The name of institution, museum, or repository where the sample is currently stored is U.S. Polar Rock Repository at Ohio State University, The taxonomy informal classification of sample is slate?, The name of institution, museum, or repository where the sample was originally stored is US Polar Rock Repository, Byrd Polar Research Center, Ohio State University, The method by which a sample was collected is Manual\": 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Quartzite with minor beds of argillite. Strike N30 W, dip variable. Folded, amplitude ~ 100-150'. Additional notes available at U.S. Polar Rock Repository., The detailed description of the sample is Era: Paleozoic; Period: Devonian; , The name of institution, museum, or repository where the sample is currently stored is U.S. Polar Rock Repository at Ohio State University, The taxonomy informal classification of sample is slate?, The name of institution, museum, or repository where the sample was originally stored is US Polar Rock Repository, Byrd Polar Research Center, Ohio State University, The method by which a sample was collected is Manual.\": 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Marquette iron range, Marquette Co., Michigan, USA. URL: https://www.mindat.org/loc-125421.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Greenstone, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Mona, The additional information about the specific place where the sample was collected is Locality Key: Mona, The free text description of the related URL is Smithsonian collections record for NMNH 113553-480 (PET)': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Marquette iron range, Marquette Co., Michigan, USA. URL: https://www.mindat.org/loc-125421.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Greenstone, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Mona, The additional information about the specific place where the sample was collected is Locality Key: Mona, The free text description of the related URL is Smithsonian collections record for NMNH 113553-480 (PET).': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Kansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 4 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Kansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 4 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The name of the specific place where the sample was collected is Above Perry South CG, The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The age of a sample as described by the stratigraphic era, period, state, etc. is Late Miocene, The detailed description of the sample is Ignimbrite deposit, The name of institution, museum, or repository where the sample is currently stored is Oregon State University: College of Earth, Ocean, and Atmospheric Sciences, The taxonomy informal classification of sample is Ignimbrite deposit: Bulk rock, The type of the primary location is Drainage Basin, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Deschutes Formation, The method by which a sample was collected is Manual': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Above Perry South CG, The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The age of a sample as described by the stratigraphic era, period, state, etc. is Late Miocene, The detailed description of the sample is Ignimbrite deposit, The name of institution, museum, or repository where the sample is currently stored is Oregon State University: College of Earth, Ocean, and Atmospheric Sciences, The taxonomy informal classification of sample is Ignimbrite deposit: Bulk rock, The type of the primary location is Drainage Basin, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Deschutes Formation, The method by which a sample was collected is Manual.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is Pebbly conglomerate, grit layer in sandstone with quartz and feldspar to 1.5cm., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Conglomerate, The method by which a sample was collected is Manual, The free text description of the related URL is None': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is Pebbly conglomerate, grit layer in sandstone with quartz and feldspar to 1.5cm., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Conglomerate, The method by which a sample was collected is Manual, The free text description of the related URL is None.': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for O'ahu, Honolulu Co., Hawaii, United States (http://geonames.org/5851609) based on island name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (%Koolau%) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (41 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-garnet wehrlite, The type of the primary location is Volcano, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Honolulu Volcanic Series, The free text description of the related URL is Smithsonian collections record for NMNH 114357-6 (PET)\": 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for O'ahu, Honolulu Co., Hawaii, United States (http://geonames.org/5851609) based on island name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (%Koolau%) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (41 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-garnet wehrlite, The type of the primary location is Volcano, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Honolulu Volcanic Series, The free text description of the related URL is Smithsonian collections record for NMNH 114357-6 (PET).\": 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Southeast Farallon, San Francisco County Co., California, United States (http://geonames.org/5397868) based on island name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"Salinian Block - Se Farallon Island\" and Salinian Block) could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (50 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Biotite granodiorite, The additional information about the specific place where the sample was collected is Locality Key: Salinian Block - Se Farallon Island, The free text description of the related URL is Smithsonian collections record for NMNH 115775-15 (PET)': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Southeast Farallon, San Francisco County Co., California, United States (http://geonames.org/5397868) based on island name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"Salinian Block - Se Farallon Island\" and Salinian Block) could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (50 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Biotite granodiorite, The additional information about the specific place where the sample was collected is Locality Key: Salinian Block - Se Farallon Island, The free text description of the related URL is Smithsonian collections record for NMNH 115775-15 (PET).': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is N-S ridge S of Mt. Leek on W side of Spear Glacier. Sandstone, shale, fossils (belemnites, pelecypods, wood, burrows). Bedding N60 E, 30 N. In bottoms of saddles are fairly thick (100-150') shale sequences which then begin to get sandy and are capped by, The detailed description of the sample is Era: Mesozoic; Period: Jurassic; , The name of institution, museum, or repository where the sample is currently stored is U.S. Polar Rock Repository at Ohio State University, The taxonomy informal classification of sample is sandstone, The name of institution, museum, or repository where the sample was originally stored is US Polar Rock Repository, Byrd Polar Research Center, Ohio State University, The method by which a sample was collected is Manual\": 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is N-S ridge S of Mt. Leek on W side of Spear Glacier. Sandstone, shale, fossils (belemnites, pelecypods, wood, burrows). Bedding N60 E, 30 N. In bottoms of saddles are fairly thick (100-150') shale sequences which then begin to get sandy and are capped by, The detailed description of the sample is Era: Mesozoic; Period: Jurassic; , The name of institution, museum, or repository where the sample is currently stored is U.S. Polar Rock Repository at Ohio State University, The taxonomy informal classification of sample is sandstone, The name of institution, museum, or repository where the sample was originally stored is US Polar Rock Repository, Byrd Polar Research Center, Ohio State University, The method by which a sample was collected is Manual.\": 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Tsumeb Mine, The country where the sample was collected is Namibia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Tsumeb, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-native element',\n"," 'The name of the specific place where the sample was collected is Tsumeb Mine, The country where the sample was collected is Namibia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Tsumeb, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Great Notch, Passaic Co., New Jersey, United States (http://geonames.org/5098580) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hydroxyapophyllite with prehnite and laumontite, The additional information about the specific place where the sample was collected is Great Notch, The free text description of the related URL is Smithsonian collections record for NMNH C6855-00 (MIN)': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Great Notch, Passaic Co., New Jersey, United States (http://geonames.org/5098580) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hydroxyapophyllite with prehnite and laumontite, The additional information about the specific place where the sample was collected is Great Notch, The free text description of the related URL is Smithsonian collections record for NMNH C6855-00 (MIN).': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite with brown hornblende, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Nw Slope, Sited By Radar, The free text description of the related URL is Smithsonian collections record for NMNH 110755-240 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite with brown hornblende, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Nw Slope, Sited By Radar, The free text description of the related URL is Smithsonian collections record for NMNH 110755-240 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The name of the specific place where the sample was collected is Oracle Ridge, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Soil, Water and Environmental Science, University of Arizona, The taxonomy informal classification of sample is Oracle Ridge Soil Pedon 1, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is Prenart Super Quartz suction cups soil water sampler, The method by which a sample was collected is Prenart, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO)': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Oracle Ridge, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Soil, Water and Environmental Science, University of Arizona, The taxonomy informal classification of sample is Oracle Ridge Soil Pedon 1, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is Prenart Super Quartz suction cups soil water sampler, The method by which a sample was collected is Prenart, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO).': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Leadville, Lake Co., Colorado, United States (n=1) and Empire Gulch, Lake Co., Colorado, United States (n=1).The coordinates and error radius given here describe a circle encompassing the combination of the instances of both names with the smallest maximum distance between them (~8 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Leadville, The taxonomy informal classification of sample is Granite, The free text description of the related URL is Smithsonian collections record for NMNH 91903-1 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Leadville, Lake Co., Colorado, United States (n=1) and Empire Gulch, Lake Co., Colorado, United States (n=1).The coordinates and error radius given here describe a circle encompassing the combination of the instances of both names with the smallest maximum distance between them (~8 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Leadville, The taxonomy informal classification of sample is Granite, The free text description of the related URL is Smithsonian collections record for NMNH 91903-1 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is Great Wheal Vor, The country where the sample was collected is United Kingdom, The province where the sample was collected is Cornwall, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Rutile; Number of pieces: 1; Quality: display; Crystal size: medium; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Breage, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-oxide',\n"," 'The name of the specific place where the sample was collected is Great Wheal Vor, The country where the sample was collected is United Kingdom, The province where the sample was collected is Cornwall, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Rutile; Number of pieces: 1; Quality: display; Crystal size: medium; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Breage, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-oxide',\n"," 'The name of the specific place where the sample was collected is Marshall Gulch, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Soil, Water and Environmental Science, University of Arizona, The taxonomy informal classification of sample is Schist Convergent Lysimeter 1, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is SoilMoisture suction cups lysimeter; Tesion Sampler (SoilMoisture Equipment Corp., Santa Barbara, CA), The method by which a sample was collected is Tension Lysimeter, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO)': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Marshall Gulch, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Soil, Water and Environmental Science, University of Arizona, The taxonomy informal classification of sample is Schist Convergent Lysimeter 1, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is SoilMoisture suction cups lysimeter; Tesion Sampler (SoilMoisture Equipment Corp., Santa Barbara, CA), The method by which a sample was collected is Tension Lysimeter, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO).': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for April Fool Hill, Manhattan, Manhattan District, Toquima Range, Nye Co., Nevada, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-213520.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Cave In April Fool Hill, The free text description of the related URL is Smithsonian collections record for NMNH 91848-231 (PET)': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for April Fool Hill, Manhattan, Manhattan District, Toquima Range, Nye Co., Nevada, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-213520.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Cave In April Fool Hill, The free text description of the related URL is Smithsonian collections record for NMNH 91848-231 (PET).': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Silver Cliff, Custer Co., Colorado, United States (http://geonames.org/5438730) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Rosita Hills) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The additional information about the specific place where the sample was collected is Locality Key: Silver Cliff, Rosita Hills, The free text description of the related URL is Smithsonian collections record for NMNH 73513-1074 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Silver Cliff, Custer Co., Colorado, United States (http://geonames.org/5438730) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Rosita Hills) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The additional information about the specific place where the sample was collected is Locality Key: Silver Cliff, Rosita Hills, The free text description of the related URL is Smithsonian collections record for NMNH 73513-1074 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Devonian > Upper/Late, The free text description of the location is Coordinates for the center of PLSS subdivision \"Nevada, Mount Diablo Meridian, T37N, R53E, S29, NE\" are from the TownshipGeocoder webservice on the BLM GeoCommunicator website., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Baryte with chert and quartzite, The free text description of the related URL is Smithsonian collections record for NMNH 117330-46 (PET)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Devonian > Upper/Late, The free text description of the location is Coordinates for the center of PLSS subdivision \"Nevada, Mount Diablo Meridian, T37N, R53E, S29, NE\" are from the TownshipGeocoder webservice on the BLM GeoCommunicator website., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Baryte with chert and quartzite, The free text description of the related URL is Smithsonian collections record for NMNH 117330-46 (PET).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Mechanical Fraction, The name of institution, museum, or repository where the sample is currently stored is Dartmouth College, The taxonomy informal classification of sample is bentonite': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Mechanical Fraction, The name of institution, museum, or repository where the sample is currently stored is Dartmouth College, The taxonomy informal classification of sample is bentonite.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Shale, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Ophir Shale, The additional information about the specific place where the sample was collected is Locality Key: Ophir Shale, The free text description of the related URL is Smithsonian collections record for NMNH 102566-35 (PET)': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Shale, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Ophir Shale, The additional information about the specific place where the sample was collected is Locality Key: Ophir Shale, The free text description of the related URL is Smithsonian collections record for NMNH 102566-35 (PET).': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Brazil, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Morro Branco, The taxonomy informal classification of sample is Alkali trachyte, The type of the primary location is Island, The free text description of the related URL is Smithsonian collections record for NMNH 111222-8 (PET)': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Brazil, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Morro Branco, The taxonomy informal classification of sample is Alkali trachyte, The type of the primary location is Island, The free text description of the related URL is Smithsonian collections record for NMNH 111222-8 (PET).': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Silver City, Grant Co., New Mexico, United States (http://geonames.org/5491487) based on map name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"Chino (?) Ore Body\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granodiorite porphyry, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Santa Rita Granodiorite Porphyry, The additional information about the specific place where the sample was collected is Chino (?) Ore Body, The free text description of the related URL is Smithsonian collections record for NMNH 107619-452 (PET)': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Silver City, Grant Co., New Mexico, United States (http://geonames.org/5491487) based on map name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"Chino (?) Ore Body\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granodiorite porphyry, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Santa Rita Granodiorite Porphyry, The additional information about the specific place where the sample was collected is Chino (?) Ore Body, The free text description of the related URL is Smithsonian collections record for NMNH 107619-452 (PET).': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Frood-Stobie Mine, McKim and Blezard Townships, Sudbury District, Ontario, Canada. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-8182.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sulfur ore with chalcopyrite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Locality Key: Sudbury Dist, The free text description of the related URL is Smithsonian collections record for NMNH 116024-31 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Frood-Stobie Mine, McKim and Blezard Townships, Sudbury District, Ontario, Canada. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-8182.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sulfur ore with chalcopyrite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Locality Key: Sudbury Dist, The free text description of the related URL is Smithsonian collections record for NMNH 116024-31 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Northern Territory, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sandstone, The additional information about the specific place where the sample was collected is Near central peak, The free text description of the related URL is Smithsonian collections record for NMNH 118339-129 (PET)': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Northern Territory, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sandstone, The additional information about the specific place where the sample was collected is Near central peak, The free text description of the related URL is Smithsonian collections record for NMNH 118339-129 (PET).': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Gem Collection, Smithsonian Institution, The taxonomy informal classification of sample is DeYoung Red Diamond, The additional information about the specific place where the sample was collected is Unknown, The free text description of the related URL is Smithsonian collections record for NMNH G9871-00 (MIN)': 'natural solid material/mineral/mineral-native element',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Gem Collection, Smithsonian Institution, The taxonomy informal classification of sample is DeYoung Red Diamond, The additional information about the specific place where the sample was collected is Unknown, The free text description of the related URL is Smithsonian collections record for NMNH G9871-00 (MIN).': 'natural solid material/mineral/mineral-native element',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The free text to add any comments pertaining to the sample is Completely serpentinized. Unable to get details on original mineralogy, The taxonomy informal classification of sample is Peridotite, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN210-05, The free text description of the related URL is Related EarthChem Library dataset, Dredge Records for Knorr Cruise 210, Leg 5': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The free text to add any comments pertaining to the sample is Completely serpentinized. Unable to get details on original mineralogy, The taxonomy informal classification of sample is Peridotite, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN210-05, The free text description of the related URL is Related EarthChem Library dataset, Dredge Records for Knorr Cruise 210, Leg 5.': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Manganese nodule, The type of the primary location is Ocean, The free text description of the related URL is Smithsonian collections record for NMNH 111519-15 (PET)': 'sediment/natural solid material/chemical sedimentary material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Manganese nodule, The type of the primary location is Ocean, The free text description of the related URL is Smithsonian collections record for NMNH 111519-15 (PET).': 'sediment/natural solid material/chemical sedimentary material/rock',\n"," 'The name of the specific place where the sample was collected is Silver Bay, The country where the sample was collected is United States, The province where the sample was collected is Minnesota, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is anorthosite': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock',\n"," 'The name of the specific place where the sample was collected is Silver Bay, The country where the sample was collected is United States, The province where the sample was collected is Minnesota, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is anorthosite.': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock',\n"," 'The name of the specific place where the sample was collected is Ross Sea, The object type of sample indicates that this sample is unambiguously mated portion of a larger piece noted for curatorial management of the material, The name of institution, museum, or repository where the sample is currently stored is Rice University (Rice), The name of institution, museum, or repository where the sample was originally stored is ARF; Antarctic Marine Geology Research Facility, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Freeze 1980': 'natural solid material/sediment/rock',\n"," 'The name of the specific place where the sample was collected is Ross Sea, The object type of sample indicates that this sample is unambiguously mated portion of a larger piece noted for curatorial management of the material, The name of institution, museum, or repository where the sample is currently stored is Rice University (Rice), The name of institution, museum, or repository where the sample was originally stored is ARF; Antarctic Marine Geology Research Facility, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Freeze 1980.': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Calcareous shale, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Ophir Shale, The additional information about the specific place where the sample was collected is Locality Key: Ophir Shale, The free text description of the related URL is Smithsonian collections record for NMNH 102566-33 (PET)': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Calcareous shale, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Ophir Shale, The additional information about the specific place where the sample was collected is Locality Key: Ophir Shale, The free text description of the related URL is Smithsonian collections record for NMNH 102566-33 (PET).': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Bisbee, Cochise Co., Arizona, United States (http://geonames.org/5284905) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPLA2 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Malachite, The additional information about the specific place where the sample was collected is Bisbee, The free text description of the related URL is Smithsonian collections record for NMNH 104891-00 (MIN)': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Bisbee, Cochise Co., Arizona, United States (http://geonames.org/5284905) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPLA2 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Malachite, The additional information about the specific place where the sample was collected is Bisbee, The free text description of the related URL is Smithsonian collections record for NMNH 104891-00 (MIN).': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Asbestos rock, The additional information about the specific place where the sample was collected is Unknown. American Rutile Company., The free text description of the related URL is Smithsonian collections record for NMNH 116579-11 (PET)': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Asbestos rock, The additional information about the specific place where the sample was collected is Unknown. American Rutile Company., The free text description of the related URL is Smithsonian collections record for NMNH 116579-11 (PET).': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is Puerto Rico, The province where the sample was collected is Puerto Rico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Central La Plata, Guatemala Barrio, San Sebastián, Puerto Rico (http://geonames.org/4563429) based on map name and country using the situate.py script. Other place names mentioned in the EMu record (\"21.09 Km\", Mayaguez Rd, and Pr 108) could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (10 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The additional information about the specific place where the sample was collected is Pr 108, Ss Mayaguez Rd, 21.09 Km, The free text description of the related URL is Smithsonian collections record for NMNH 116678-162 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Puerto Rico, The province where the sample was collected is Puerto Rico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Central La Plata, Guatemala Barrio, San Sebastián, Puerto Rico (http://geonames.org/4563429) based on map name and country using the situate.py script. Other place names mentioned in the EMu record (\"21.09 Km\", Mayaguez Rd, and Pr 108) could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (10 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The additional information about the specific place where the sample was collected is Pr 108, Ss Mayaguez Rd, 21.09 Km, The free text description of the related URL is Smithsonian collections record for NMNH 116678-162 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gneiss with biotite and quartz, The additional information about the specific place where the sample was collected is Williams 1 diatreme, trench; 0.5 km N of Williams Ranch; NW SW NW 8, T24N, R24E, The free text description of the related URL is Smithsonian collections record for NMNH 117292-78 (PET)': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gneiss with biotite and quartz, The additional information about the specific place where the sample was collected is Williams 1 diatreme, trench; 0.5 km N of Williams Ranch; NW SW NW 8, T24N, R24E, The free text description of the related URL is Smithsonian collections record for NMNH 117292-78 (PET).': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The name of the specific place where the sample was collected is John Day, The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is tuff': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is John Day, The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is tuff.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alabama, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 1 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The taxonomy informal classification of sample is Soil Microbe, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alabama, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 1 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The taxonomy informal classification of sample is Soil Microbe, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Shale, silty, carbonaceous, bedding plan split about 1/32 to 1/4 inch thick, weathers medium gray., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Shale, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Shale, silty, carbonaceous, bedding plan split about 1/32 to 1/4 inch thick, weathers medium gray., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Shale, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dolerite, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is N Base Of The Central Part Of The Blanco Trough, The free text description of the related URL is Smithsonian collections record for NMNH 111236-2 (PET)': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dolerite, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is N Base Of The Central Part Of The Blanco Trough, The free text description of the related URL is Smithsonian collections record for NMNH 111236-2 (PET).': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The country where the sample was collected is Tonga, The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The type of platform for the cruise is Ship, The free text to describe the collection purpose of the sample is research, The detailed description of the sample is lava, The name of institution, museum, or repository where the sample is currently stored is University of Hawaii (Hawaii), The taxonomy informal classification of sample is dacite, The type of the primary location is volcano, The additional information about the method by which a sample was collected is Quest 4000 manipulator, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is RR1211': 'sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Tonga, The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The type of platform for the cruise is Ship, The free text to describe the collection purpose of the sample is research, The detailed description of the sample is lava, The name of institution, museum, or repository where the sample is currently stored is University of Hawaii (Hawaii), The taxonomy informal classification of sample is dacite, The type of the primary location is volcano, The additional information about the method by which a sample was collected is Quest 4000 manipulator, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is RR1211.': 'sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Terrace Ridge, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Coal and coaly shale, thinly banded with sparse bands, thin fusain bands present, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Coal, The free text description of the related URL is None': 'sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Terrace Ridge, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Coal and coaly shale, thinly banded with sparse bands, thin fusain bands present, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Coal, The free text description of the related URL is None.': 'sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone with specularite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Ne Of Silver Dipper Mine, The free text description of the related URL is Smithsonian collections record for NMNH 102566-241 (PET)': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone with specularite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Ne Of Silver Dipper Mine, The free text description of the related URL is Smithsonian collections record for NMNH 102566-241 (PET).': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is France, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hypersthene andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Summit Sand, The free text description of the related URL is Smithsonian collections record for NMNH 115750 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is France, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hypersthene andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Summit Sand, The free text description of the related URL is Smithsonian collections record for NMNH 115750 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Steinbruch Böhl, The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Steinbruch, Werk Raumland, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is gray-black less altered \"platey\" bentonite': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The name of the specific place where the sample was collected is Steinbruch Böhl, The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Steinbruch, Werk Raumland, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is gray-black less altered \"platey\" bentonite.': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Ohio, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Clay Center, Ottawa Co., Ohio, United States (http://geonames.org/5150460) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fluorite, The additional information about the specific place where the sample was collected is Clay Center, The free text description of the related URL is Smithsonian collections record for NMNH R19056-00 (MIN)': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Ohio, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Clay Center, Ottawa Co., Ohio, United States (http://geonames.org/5150460) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fluorite, The additional information about the specific place where the sample was collected is Clay Center, The free text description of the related URL is Smithsonian collections record for NMNH R19056-00 (MIN).': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is Chile, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diorite, The additional information about the specific place where the sample was collected is Lago General Correra, The free text description of the related URL is Smithsonian collections record for NMNH 116511-18 (PET)': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Chile, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diorite, The additional information about the specific place where the sample was collected is Lago General Correra, The free text description of the related URL is Smithsonian collections record for NMNH 116511-18 (PET).': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Puerto Rico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Wet deposition subsample for chemical analysis, The name of institution, museum, or repository where the sample is currently stored is Wet Deposition Collection (NEONWDC), The taxonomy informal classification of sample is Wet Deposition, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Puerto Rico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Wet deposition subsample for chemical analysis, The name of institution, museum, or repository where the sample is currently stored is Wet Deposition Collection (NEONWDC), The taxonomy informal classification of sample is Wet Deposition, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Temagami/Temagami Island, Ontario, Canada, including http://geonames.org/6162646, http://geonames.org/6162647, and http://geonames.org/6162648. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing all 3 localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Millerite with gersdorffite and chalcopyrite, The additional information about the specific place where the sample was collected is Temagami, The free text description of the related URL is Smithsonian collections record for NMNH 113209-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Temagami/Temagami Island, Ontario, Canada, including http://geonames.org/6162646, http://geonames.org/6162647, and http://geonames.org/6162648. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing all 3 localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Millerite with gersdorffite and chalcopyrite, The additional information about the specific place where the sample was collected is Temagami, The free text description of the related URL is Smithsonian collections record for NMNH 113209-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is geochemistry, The name of institution, museum, or repository where the sample is currently stored is USGS - Anchorage, AK, The taxonomy informal classification of sample is granodiorite, The method by which a sample was collected is outcrop': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is geochemistry, The name of institution, museum, or repository where the sample is currently stored is USGS - Anchorage, AK, The taxonomy informal classification of sample is granodiorite, The method by which a sample was collected is outcrop.': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Locality Key: Colima Volcanic Complex, The free text description of the related URL is Smithsonian collections record for NMNH 116598-30 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Locality Key: Colima Volcanic Complex, The free text description of the related URL is Smithsonian collections record for NMNH 116598-30 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Arizona, United States (http://geonames.org/5551752) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Vanadinite, The free text description of the related URL is Smithsonian collections record for NMNH M21891-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Arizona, United States (http://geonames.org/5551752) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Vanadinite, The free text description of the related URL is Smithsonian collections record for NMNH M21891-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Measured stratigraphic section. See dissertation for more information., The detailed description of the sample is Era: Paleozoic; Period: Permian; , The name of institution, museum, or repository where the sample is currently stored is U.S. Polar Rock Repository at Ohio State University, The taxonomy informal classification of sample is tillite, The name of institution, museum, or repository where the sample was originally stored is US Polar Rock Repository, Byrd Polar Research Center, Ohio State University, The method by which a sample was collected is Manual': 'diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Measured stratigraphic section. See dissertation for more information., The detailed description of the sample is Era: Paleozoic; Period: Permian; , The name of institution, museum, or repository where the sample is currently stored is U.S. Polar Rock Repository at Ohio State University, The taxonomy informal classification of sample is tillite, The name of institution, museum, or repository where the sample was originally stored is US Polar Rock Repository, Byrd Polar Research Center, Ohio State University, The method by which a sample was collected is Manual.': 'diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Triassic, The detailed description of the sample is C horizon. Flaggy siltstone. This rock has an irregular bedding., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Triassic, The detailed description of the sample is C horizon. Flaggy siltstone. This rock has an irregular bedding., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is long cylindrical cores, The free text description of the location is Coordinates from GeoNames for the Kilauea Iki Crater (https://www.geonames.org/5849363). Per Cathleen Brown, coordinates from the USGS NGDB for the Kilaeua Iki drill cores plot ~10 miles west of the crater., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt with olivine, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Kilauea Iki. Drill Hole No. 75-1, 83.5 To 116(+-) Ft, The method by which a sample was collected is Coring, The free text description of the related URL is Smithsonian collections record for NMNH 115470-4 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is long cylindrical cores, The free text description of the location is Coordinates from GeoNames for the Kilauea Iki Crater (https://www.geonames.org/5849363). Per Cathleen Brown, coordinates from the USGS NGDB for the Kilaeua Iki drill cores plot ~10 miles west of the crater., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt with olivine, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Kilauea Iki. Drill Hole No. 75-1, 83.5 To 116(+-) Ft, The method by which a sample was collected is Coring, The free text description of the related URL is Smithsonian collections record for NMNH 115470-4 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates from GEOLocate for parse pattern \"FRANKLIN\". Mindat gives a nearly identical location and provides the following description for this site: \"Franklin includes all of the localities within the incorporated limits of the borough of Franklin, including the Franklin mine. This locality is within the Franklin Mining District\" (\"Franklin, Franklin Mining District\", 2016)., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Zincite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Franklin, The free text description of the related URL is Smithsonian collections record for NMNH 118641-01 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates from GEOLocate for parse pattern \"FRANKLIN\". Mindat gives a nearly identical location and provides the following description for this site: \"Franklin includes all of the localities within the incorporated limits of the borough of Franklin, including the Franklin mine. This locality is within the Franklin Mining District\" (\"Franklin, Franklin Mining District\", 2016)., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Zincite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Franklin, The free text description of the related URL is Smithsonian collections record for NMNH 118641-01 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is West Coal River area, The detailed description of the sample is Bt-granodiorite, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is Bt-granodiorite': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is West Coal River area, The detailed description of the sample is Bt-granodiorite, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is Bt-granodiorite.': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-monzonite with quartz and aplitic dike-rock, The additional information about the specific place where the sample was collected is Locality Key: Anthracite-Creste D Butte, The free text description of the related URL is Smithsonian collections record for NMNH 73501-1498 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-monzonite with quartz and aplitic dike-rock, The additional information about the specific place where the sample was collected is Locality Key: Anthracite-Creste D Butte, The free text description of the related URL is Smithsonian collections record for NMNH 73501-1498 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Sterling Hill Mine (MRDS ID: W031395, Deposit ID: 10080281) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hemimorphite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Ogdensburg, The free text description of the related URL is Smithsonian collections record for NMNH B17312-00 (MIN)': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Sterling Hill Mine (MRDS ID: W031395, Deposit ID: 10080281) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hemimorphite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Ogdensburg, The free text description of the related URL is Smithsonian collections record for NMNH B17312-00 (MIN).': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Comstock Lode Mining District, Storey Co., Nevada, United States (http://geonames.org/5502108) based on feature name, state/province, and country using the situate.py script. The script determined that this locality is located within Washoe (featureCode=ADM2), another feature mentioned in this record. Matching Comstock Lode required using a wildcard search. Another place name mentioned in the EMu record (\"Ridge Ne Of Ranch On Ani Flat Road\") could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (100 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende andesite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Ridge Ne Of Ranch On Ani Flat Road, The free text description of the related URL is Smithsonian collections record for NMNH 72378 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Comstock Lode Mining District, Storey Co., Nevada, United States (http://geonames.org/5502108) based on feature name, state/province, and country using the situate.py script. The script determined that this locality is located within Washoe (featureCode=ADM2), another feature mentioned in this record. Matching Comstock Lode required using a wildcard search. Another place name mentioned in the EMu record (\"Ridge Ne Of Ranch On Ani Flat Road\") could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (100 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende andesite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Ridge Ne Of Ranch On Ani Flat Road, The free text description of the related URL is Smithsonian collections record for NMNH 72378 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is 2019 inundation tracer tests, The type of the primary location is floodplain, aquifer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC-SFA': 'liquid water/fluid material',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is 2019 inundation tracer tests, The type of the primary location is floodplain, aquifer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC-SFA.': 'liquid water/fluid material',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 117894-54 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 117894-54 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is University of Florida, The taxonomy informal classification of sample is Basalt, The type of the primary location is Ridge, The method by which a sample was collected is WaxCorer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is A125-24': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is University of Florida, The taxonomy informal classification of sample is Basalt, The type of the primary location is Ridge, The method by which a sample was collected is WaxCorer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is A125-24.': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None.': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Nassau Mtns., The country where the sample was collected is Suriname, The province where the sample was collected is Marowijne, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The name of the specific place where the sample was collected is Nassau Mtns., The country where the sample was collected is Suriname, The province where the sample was collected is Marowijne, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Hamburg, The free text description of the related URL is Smithsonian collections record for NMNH 87890-9981 (PET)': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Hamburg, The free text description of the related URL is Smithsonian collections record for NMNH 87890-9981 (PET).': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 2 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 2 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mount Shasta, Siskiyou Co., California, United States (http://geonames.org/5571124) based on feature name, district/county, state/province, and country using the situate.py script. The script determined that this locality is located within Siskiyou (featureCode=ADM2), another feature mentioned in this record. Another place name mentioned in the EMu record (Mount) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (10 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hypersthene andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Locality Key: Shasta, Mount, The free text description of the related URL is Smithsonian collections record for NMNH 36978 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mount Shasta, Siskiyou Co., California, United States (http://geonames.org/5571124) based on feature name, district/county, state/province, and country using the situate.py script. The script determined that this locality is located within Siskiyou (featureCode=ADM2), another feature mentioned in this record. Another place name mentioned in the EMu record (Mount) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (10 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hypersthene andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Locality Key: Shasta, Mount, The free text description of the related URL is Smithsonian collections record for NMNH 36978 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Corral de Tierra, Monterey Co., California, United States (http://geonames.org/5339745) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Salinian Block and Salinian Block - Santa Lucia Range) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Corral De Tierra, The taxonomy informal classification of sample is Hornblende-biotite-quartz diorite, The additional information about the specific place where the sample was collected is Locality Key: Salinian Block - Santa Lucia Range, The free text description of the related URL is Smithsonian collections record for NMNH 116116-22 (PET)': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Corral de Tierra, Monterey Co., California, United States (http://geonames.org/5339745) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Salinian Block and Salinian Block - Santa Lucia Range) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Corral De Tierra, The taxonomy informal classification of sample is Hornblende-biotite-quartz diorite, The additional information about the specific place where the sample was collected is Locality Key: Salinian Block - Santa Lucia Range, The free text description of the related URL is Smithsonian collections record for NMNH 116116-22 (PET).': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Texas, The object type of sample indicates that this sample is loose, coarse, unconsolidated material suspended in drilling fluid, The free text to describe the collection purpose of the sample is Oil or gas exploration, The detailed description of the sample is This is a sample of type CUTTINGS from an oil or gas well., The name of institution, museum, or repository where the sample is currently stored is Bureau of Economic Geology Austin Core Research Center, The method by which a sample was collected is Coring': 'soil/sediment/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Texas, The object type of sample indicates that this sample is loose, coarse, unconsolidated material suspended in drilling fluid, The free text to describe the collection purpose of the sample is Oil or gas exploration, The detailed description of the sample is This is a sample of type CUTTINGS from an oil or gas well., The name of institution, museum, or repository where the sample is currently stored is Bureau of Economic Geology Austin Core Research Center, The method by which a sample was collected is Coring.': 'soil/sediment/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Connecticut, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Bristol, Hartford Co., Connecticut, United States (http://geonames.org/5282835) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Chalcocite, The additional information about the specific place where the sample was collected is Bristol, The free text description of the related URL is Smithsonian collections record for NMNH 104509-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Connecticut, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Bristol, Hartford Co., Connecticut, United States (http://geonames.org/5282835) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Chalcocite, The additional information about the specific place where the sample was collected is Bristol, The free text description of the related URL is Smithsonian collections record for NMNH 104509-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is 2018 inundation tracer tests, The free text to add any comments pertaining to the sample is aka 205; not water from NTC1-48in, The type of the primary location is floodplain, aquifer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC-SFA': 'liquid water/fluid material',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is 2018 inundation tracer tests, The free text to add any comments pertaining to the sample is aka 205; not water from NTC1-48in, The type of the primary location is floodplain, aquifer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC-SFA.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Durango, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Sierra San Francisco, Durango, Mexico (n=1) and Arroyo Sapioris/Sapioris/Sapiorís, Durango, Mexico (n=6). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~22 km). The script interepreted Sierra De San Francisco as a synonym for Sierra San Francisco., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cassiterite, The additional information about the specific place where the sample was collected is Sierra De San Francisco, Sapioris, The free text description of the related URL is Smithsonian collections record for NMNH R8006-02 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Durango, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Sierra San Francisco, Durango, Mexico (n=1) and Arroyo Sapioris/Sapioris/Sapiorís, Durango, Mexico (n=6). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~22 km). The script interepreted Sierra De San Francisco as a synonym for Sierra San Francisco., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cassiterite, The additional information about the specific place where the sample was collected is Sierra De San Francisco, Sapioris, The free text description of the related URL is Smithsonian collections record for NMNH R8006-02 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is From Table 2 of Jackson et al. (1982), The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt with olivine, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is \"Count 65KAAP is from the Kaapuna branch of the 1950 flows and contains mainly olivine gabbro, troctolite, and olivine norite xenoliths.\" (Jackson et al., 1982, p. 10), The free text description of the related URL is Smithsonian collections record for NMNH 114326-17 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is From Table 2 of Jackson et al. (1982), The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt with olivine, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is \"Count 65KAAP is from the Kaapuna branch of the 1950 flows and contains mainly olivine gabbro, troctolite, and olivine norite xenoliths.\" (Jackson et al., 1982, p. 10), The free text description of the related URL is Smithsonian collections record for NMNH 114326-17 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tonopah, Nye Co., Nevada, United States (http://geonames.org/5513795) based on municipality name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Tonopah, The taxonomy informal classification of sample is Andesite, The free text description of the related URL is Smithsonian collections record for NMNH 90688-77 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tonopah, Nye Co., Nevada, United States (http://geonames.org/5513795) based on municipality name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Tonopah, The taxonomy informal classification of sample is Andesite, The free text description of the related URL is Smithsonian collections record for NMNH 90688-77 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Slate River, The country where the sample was collected is United States, The province where the sample was collected is CO, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The free text to describe the collection purpose of the sample is metal and anion analysis, The detailed description of the sample is filtered at 0.6um, anaerobic, stored in glass, The name of institution, museum, or repository where the sample is currently stored is SLAC/SSRL, The city where the sample was collected is Crested Butte, The free text to add any comments pertaining to the sample is Sample name formatted [month.day.year site, depth of sample collection (cm), filter apperture (um)], The taxonomy informal classification of sample is Pore water, The type of the primary location is floodplain, The additional information about the method by which a sample was collected is Collected with Geotech peristaltic pump. First ~250mL purged/discarded, The method by which a sample was collected is Pumping, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC SFA': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Slate River, The country where the sample was collected is United States, The province where the sample was collected is CO, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The free text to describe the collection purpose of the sample is metal and anion analysis, The detailed description of the sample is filtered at 0.6um, anaerobic, stored in glass, The name of institution, museum, or repository where the sample is currently stored is SLAC/SSRL, The city where the sample was collected is Crested Butte, The free text to add any comments pertaining to the sample is Sample name formatted [month.day.year site, depth of sample collection (cm), filter apperture (um)], The taxonomy informal classification of sample is Pore water, The type of the primary location is floodplain, The additional information about the method by which a sample was collected is Collected with Geotech peristaltic pump. First ~250mL purged/discarded, The method by which a sample was collected is Pumping, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC SFA.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is Greenland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tunulliarfik, Qeqqata, Greenland (http://geonames.org/3419140) based on feature name and country using the situate.py script. This was the most specific match possible based on information available in this record. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 5 km was assigned to all featureCode=STM records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Erikite with monazite, The additional information about the specific place where the sample was collected is Tunugdliarfik, The free text description of the related URL is Smithsonian collections record for NMNH 95514-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is Greenland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tunulliarfik, Qeqqata, Greenland (http://geonames.org/3419140) based on feature name and country using the situate.py script. This was the most specific match possible based on information available in this record. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 5 km was assigned to all featureCode=STM records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Erikite with monazite, The additional information about the specific place where the sample was collected is Tunugdliarfik, The free text description of the related URL is Smithsonian collections record for NMNH 95514-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Cape Province, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Kimberley, The taxonomy informal classification of sample is Harzburgite with clinopyroxene and garnet, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Bultfontein Kimberlite, The additional information about the specific place where the sample was collected is Boshof Road Dump, The free text description of the related URL is Smithsonian collections record for NMNH 117165-141 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Cape Province, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Kimberley, The taxonomy informal classification of sample is Harzburgite with clinopyroxene and garnet, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Bultfontein Kimberlite, The additional information about the specific place where the sample was collected is Boshof Road Dump, The free text description of the related URL is Smithsonian collections record for NMNH 117165-141 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The name of the specific place where the sample was collected is Vail Stone Co., The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Silurian, The free text description of the location is 4\\'5\" from base of C2, The free text to describe the collection purpose of the sample is Survey, Limestone Quarries of Southern Indiana, The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN, The free text to add any comments pertaining to the sample is two pieces, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Salamonie Dolomite; Laurel Member, The method by which a sample was collected is Manual': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Vail Stone Co., The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Silurian, The free text description of the location is 4\\'5\" from base of C2, The free text to describe the collection purpose of the sample is Survey, Limestone Quarries of Southern Indiana, The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN, The free text to add any comments pertaining to the sample is two pieces, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Salamonie Dolomite; Laurel Member, The method by which a sample was collected is Manual.': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Utah, United States (http://geonames.org/5549030) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-monzonite, The free text description of the related URL is Smithsonian collections record for NMNH 92601-19 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Utah, United States (http://geonames.org/5549030) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-monzonite, The free text description of the related URL is Smithsonian collections record for NMNH 92601-19 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Not Applicable, The taxonomy informal classification of sample is scoria, The type of the primary location is Volcano, The description of the platform for the cruise is Not Applicable, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Not Applicable': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Not Applicable, The taxonomy informal classification of sample is scoria, The type of the primary location is Volcano, The description of the platform for the cruise is Not Applicable, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Not Applicable.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Northern Cape Province, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Gem Collection, Smithsonian Institution, The city where the sample was collected is Postmasburg, The taxonomy informal classification of sample is Quartz (var. carnelian), The additional information about the specific place where the sample was collected is Unknown, The free text description of the related URL is Smithsonian collections record for NMNH G7160-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Northern Cape Province, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Gem Collection, Smithsonian Institution, The city where the sample was collected is Postmasburg, The taxonomy informal classification of sample is Quartz (var. carnelian), The additional information about the specific place where the sample was collected is Unknown, The free text description of the related URL is Smithsonian collections record for NMNH G7160-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The object type of sample indicates that this sample is a group of rocks collected by dragging a dredge along the seafloor, The name of institution, museum, or repository where the sample is currently stored is IPGP - Chambon la Foret, The taxonomy informal classification of sample is serpentinized dunite, The type of the primary location is mid-ocean ridge, The name or identifier of the field program (cruise or expedition), during which the sample was collected is EDUL': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a group of rocks collected by dragging a dredge along the seafloor, The name of institution, museum, or repository where the sample is currently stored is IPGP - Chambon la Foret, The taxonomy informal classification of sample is serpentinized dunite, The type of the primary location is mid-ocean ridge, The name or identifier of the field program (cruise or expedition), during which the sample was collected is EDUL.': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The type of the primary location is Sea/Gulf, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 117984-96 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The type of the primary location is Sea/Gulf, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 117984-96 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is UNALASKA ISLAND, ALEUTIANS, The country where the sample was collected is United States, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The free text to describe the collection purpose of the sample is Geochemistry and Geochronology, The detailed description of the sample is felsic rock intruding volcaniclastic sediments/turbidite layers, exact relationship not clear, could also be basement of the sediment deposits, The name of institution, museum, or repository where the sample is currently stored is Lamont-Doherty Earth Observatory, The type of the primary location is Island arc': 'sediment/igneous rock/acidic igneous rock/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is UNALASKA ISLAND, ALEUTIANS, The country where the sample was collected is United States, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The free text to describe the collection purpose of the sample is Geochemistry and Geochronology, The detailed description of the sample is felsic rock intruding volcaniclastic sediments/turbidite layers, exact relationship not clear, could also be basement of the sediment deposits, The name of institution, museum, or repository where the sample is currently stored is Lamont-Doherty Earth Observatory, The type of the primary location is Island arc.': 'sediment/igneous rock/acidic igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is French Polynesia, The province where the sample was collected is French Polynesia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tahiti, Îles du Vent, French Polynesia (http://geonames.org/4033649) based on island name and country using the situate.py script. Another place name mentioned in the EMu record (Collected Above Bridge Punaruu River) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt with olivine, The type of the primary location is Island, The additional information about the specific place where the sample was collected is Collected Above Bridge Punaruu River, The free text description of the related URL is Smithsonian collections record for NMNH 100890 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is French Polynesia, The province where the sample was collected is French Polynesia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tahiti, Îles du Vent, French Polynesia (http://geonames.org/4033649) based on island name and country using the situate.py script. Another place name mentioned in the EMu record (Collected Above Bridge Punaruu River) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt with olivine, The type of the primary location is Island, The additional information about the specific place where the sample was collected is Collected Above Bridge Punaruu River, The free text description of the related URL is Smithsonian collections record for NMNH 100890 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is China, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Thin Section, The taxonomy informal classification of sample is eclogite, The method by which a sample was collected is Manual': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is China, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Thin Section, The taxonomy informal classification of sample is eclogite, The method by which a sample was collected is Manual.': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Basalt, The free text description of the related URL is None': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Basalt, The free text description of the related URL is None.': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is This record was georeferenced by an intern in the NMNH Information Technology Office. The precision of this georeference was estimated as Low (38). All Mineral Sciences records georeferenced as part of this program were assigned a minimum uncertainty of 10 km., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is Smithsonian collections record for NMNH 112459-19 (PET)': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is This record was georeferenced by an intern in the NMNH Information Technology Office. The precision of this georeference was estimated as Low (38). All Mineral Sciences records georeferenced as part of this program were assigned a minimum uncertainty of 10 km., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is Smithsonian collections record for NMNH 112459-19 (PET).': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Kern Co., California, United States (http://geonames.org/5362932) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"Lag Sample From Construction Site\" and San Joaquin Valley) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (129 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gravel, The additional information about the specific place where the sample was collected is lag sample from construction site, The free text description of the related URL is Smithsonian collections record for NMNH 118032-211 (PET)': 'sediment/natural solid material/gravel size sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Kern Co., California, United States (http://geonames.org/5362932) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"Lag Sample From Construction Site\" and San Joaquin Valley) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (129 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gravel, The additional information about the specific place where the sample was collected is lag sample from construction site, The free text description of the related URL is Smithsonian collections record for NMNH 118032-211 (PET).': 'sediment/natural solid material/gravel size sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Quemazon Canyon, Los Alamos Co., New Mexico, United States (n=1) and Valles Caldera, Sandoval Co., New Mexico, United States (n=1).The coordinates and error radius given here describe a circle encompassing the combination of the instances of both names with the smallest maximum distance between them (~16 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The type of the primary location is Volcano, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Tewa > Bandelier Tuff, The additional information about the specific place where the sample was collected is Quemazon Saddle section in saddle, The free text description of the related URL is Smithsonian collections record for NMNH 117227-224 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Quemazon Canyon, Los Alamos Co., New Mexico, United States (n=1) and Valles Caldera, Sandoval Co., New Mexico, United States (n=1).The coordinates and error radius given here describe a circle encompassing the combination of the instances of both names with the smallest maximum distance between them (~16 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The type of the primary location is Volcano, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Tewa > Bandelier Tuff, The additional information about the specific place where the sample was collected is Quemazon Saddle section in saddle, The free text description of the related URL is Smithsonian collections record for NMNH 117227-224 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Fine grained arkosic sandstone-siltstone - hornfels, above 0-5' of sill. Lower sill area. 2' above contact., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None\": 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Fine grained arkosic sandstone-siltstone - hornfels, above 0-5' of sill. Lower sill area. 2' above contact., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None.\": 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The free text to describe the collection purpose of the sample is research, The detailed description of the sample is rhyolite pumice, Eruption Year: 871 AD, The name of institution, museum, or repository where the sample is currently stored is University of Hawaii, The free text to add any comments pertaining to the sample is GPS malfunction, navigation type: topographic map/Google Earth, The taxonomy informal classification of sample is RHYOLITE, The type of the primary location is volcanic crater, The method by which a sample was collected is Manual, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Bimodal volcanism 2001': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The free text to describe the collection purpose of the sample is research, The detailed description of the sample is rhyolite pumice, Eruption Year: 871 AD, The name of institution, museum, or repository where the sample is currently stored is University of Hawaii, The free text to add any comments pertaining to the sample is GPS malfunction, navigation type: topographic map/Google Earth, The taxonomy informal classification of sample is RHYOLITE, The type of the primary location is volcanic crater, The method by which a sample was collected is Manual, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Bimodal volcanism 2001.': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The taxonomy informal classification of sample is Diabase, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN210-05, The free text description of the related URL is Related EarthChem Library dataset, Dredge Records for Knorr Cruise 210, Leg 5': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The taxonomy informal classification of sample is Diabase, The method by which a sample was collected is Dredging, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN210-05, The free text description of the related URL is Related EarthChem Library dataset, Dredge Records for Knorr Cruise 210, Leg 5.': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Moors Mill, Cumberland Co., Pennsylvania, United States (http://geonames.org/5201875) based on feature name, district/county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Mount Holly Springs (Near)) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Beraunite with cacoxenite, The additional information about the specific place where the sample was collected is Moors Mill, Mount Holly Springs (Near), The free text description of the related URL is Smithsonian collections record for NMNH M2176-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Moors Mill, Cumberland Co., Pennsylvania, United States (http://geonames.org/5201875) based on feature name, district/county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Mount Holly Springs (Near)) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Beraunite with cacoxenite, The additional information about the specific place where the sample was collected is Moors Mill, Mount Holly Springs (Near), The free text description of the related URL is Smithsonian collections record for NMNH M2176-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene > Miocene, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite with serpentine, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 115252-53 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene > Miocene, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite with serpentine, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 115252-53 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for San Juan Mountains, Mineral Co., Colorado, United States (http://geonames.org/5437674) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Tridymite latite. New analyzed rock.) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Latite, The additional information about the specific place where the sample was collected is Tridymite latite. New analyzed rock, The free text description of the related URL is Smithsonian collections record for NMNH 117550-47 (PET)': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for San Juan Mountains, Mineral Co., Colorado, United States (http://geonames.org/5437674) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Tridymite latite. New analyzed rock.) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Latite, The additional information about the specific place where the sample was collected is Tridymite latite. New analyzed rock, The free text description of the related URL is Smithsonian collections record for NMNH 117550-47 (PET).': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New York, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock Collection, Smithsonian Institution, The city where the sample was collected is Ticonderoga, The taxonomy informal classification of sample is Quartz, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 88280-44 (PET)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New York, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock Collection, Smithsonian Institution, The city where the sample was collected is Ticonderoga, The taxonomy informal classification of sample is Quartz, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 88280-44 (PET).': 'natural solid material/mineral/mineral-oxide',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is Baked claystone at contact., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Claystone, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is Baked claystone at contact., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Claystone, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Pinned on collector's map, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-diorite, The additional information about the specific place where the sample was collected is Traverse up ridge S of Meno Meadows, The free text description of the related URL is Smithsonian collections record for NMNH 118309-238 (PET)\": 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Pinned on collector's map, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-diorite, The additional information about the specific place where the sample was collected is Traverse up ridge S of Meno Meadows, The free text description of the related URL is Smithsonian collections record for NMNH 118309-238 (PET).\": 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for United States (http://geonames.org/6252001) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy rhyolite, The free text description of the related URL is Smithsonian collections record for NMNH 22488 (PET)': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for United States (http://geonames.org/6252001) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy rhyolite, The free text description of the related URL is Smithsonian collections record for NMNH 22488 (PET).': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Maine, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Eastport, Washington Co., Maine, United States (http://geonames.org/4963486) based on municipality name, state/province, and country using the situate.py script. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all records matched using wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Eastport Area, The taxonomy informal classification of sample is Tuff, The additional information about the specific place where the sample was collected is Locality Key: Eastport Quad, The free text description of the related URL is Smithsonian collections record for NMNH 87987-185 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Maine, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Eastport, Washington Co., Maine, United States (http://geonames.org/4963486) based on municipality name, state/province, and country using the situate.py script. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all records matched using wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Eastport Area, The taxonomy informal classification of sample is Tuff, The additional information about the specific place where the sample was collected is Locality Key: Eastport Quad, The free text description of the related URL is Smithsonian collections record for NMNH 87987-185 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Norway, The province where the sample was collected is Akershus Co, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Eidsvoll, Eidsvoll, Akershus, Norway (http://geonames.org/3158608) based on municipality name, state/province, and country using the situate.py script. The script interepreted Eidsvold as a synonym for Eidsvoll. Another place name mentioned in the EMu record (Bunelo) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Eidsvold, Bunelo, The taxonomy informal classification of sample is Trachyte, The additional information about the specific place where the sample was collected is Locality Key: Eidsvold, Bunelo, The free text description of the related URL is Smithsonian collections record for NMNH 70562 (PET)': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Norway, The province where the sample was collected is Akershus Co, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Eidsvoll, Eidsvoll, Akershus, Norway (http://geonames.org/3158608) based on municipality name, state/province, and country using the situate.py script. The script interepreted Eidsvold as a synonym for Eidsvoll. Another place name mentioned in the EMu record (Bunelo) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Eidsvold, Bunelo, The taxonomy informal classification of sample is Trachyte, The additional information about the specific place where the sample was collected is Locality Key: Eidsvold, Bunelo, The free text description of the related URL is Smithsonian collections record for NMNH 70562 (PET).': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is siltstone, fossils, dark gray, thick-bedded, [?? See note] micaceous, weathers brownish red. Impressions 2 stems [?? See note] surface material., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is siltstone, fossils, dark gray, thick-bedded, [?? See note] micaceous, weathers brownish red. Impressions 2 stems [?? See note] surface material., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is New South Wales, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Sydney, Sydney, New South Wales, Australia (http://geonames.org/2147714) based on municipality name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Prospect Intrusion) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Sydney, The taxonomy informal classification of sample is Alkaline-olivine diabase with picrite, The free text description of the related URL is Smithsonian collections record for NMNH 112187 (PET)': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is New South Wales, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Sydney, Sydney, New South Wales, Australia (http://geonames.org/2147714) based on municipality name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Prospect Intrusion) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Sydney, The taxonomy informal classification of sample is Alkaline-olivine diabase with picrite, The free text description of the related URL is Smithsonian collections record for NMNH 112187 (PET).': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Mi Vida Mine (MRDS ID: DB00062, Deposit ID: 10015060) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Jet, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is 40 mi SE of Moab, The free text description of the related URL is Smithsonian collections record for NMNH 109360-04 (MIN)': 'sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Mi Vida Mine (MRDS ID: DB00062, Deposit ID: 10015060) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Jet, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is 40 mi SE of Moab, The free text description of the related URL is Smithsonian collections record for NMNH 109360-04 (MIN).': 'sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Identified Potrillo Maar on a map provided by the New Mexico Museum of Natural History (http://www.nmnaturalhistory.org/volcanoes/potrillo-volcanic-field) and matched it to a feature on Google Maps. The error radius is based on the approximate size of the maar. Note that Potrillo Maar straddles the US-Mexican border., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyroxenite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118049-167 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Identified Potrillo Maar on a map provided by the New Mexico Museum of Natural History (http://www.nmnaturalhistory.org/volcanoes/potrillo-volcanic-field) and matched it to a feature on Google Maps. The error radius is based on the approximate size of the maar. Note that Potrillo Maar straddles the US-Mexican border., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyroxenite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118049-167 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Kilauea/Kīlauea Volcano, Hawaii Co., Hawaii, United States, including http://geonames.org/11611367 (featureCode=VLC) and http://geonames.org/5849380 (featureCode=MT). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing both localities. Another place name mentioned in the EMu record (\"From Roof of Tunnel\") could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gypsum, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is From Roof Of Tunnel, The free text description of the related URL is Smithsonian collections record for NMNH 92134-1 (PET)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Kilauea/Kīlauea Volcano, Hawaii Co., Hawaii, United States, including http://geonames.org/11611367 (featureCode=VLC) and http://geonames.org/5849380 (featureCode=MT). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing both localities. Another place name mentioned in the EMu record (\"From Roof of Tunnel\") could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gypsum, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is From Roof Of Tunnel, The free text description of the related URL is Smithsonian collections record for NMNH 92134-1 (PET).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Cripple Creek, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Cripple Creek, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 117896-26 (PET)': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 117896-26 (PET).': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Lake Co., Colorado, United States (http://geonames.org/5427799) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"California Gulch (S Of Oro City)\" and Nightingale Tunnel) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (26 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-porphyry, The free text description of the related URL is Smithsonian collections record for NMNH 68937 (PET)': 'sediment/igneous rock/natural solid material/porphyry/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Lake Co., Colorado, United States (http://geonames.org/5427799) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"California Gulch (S Of Oro City)\" and Nightingale Tunnel) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (26 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-porphyry, The free text description of the related URL is Smithsonian collections record for NMNH 68937 (PET).': 'sediment/igneous rock/natural solid material/porphyry/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Wyoming, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Smectite; Variety: bentonite; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Clay Spur, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Wyoming, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Smectite; Variety: bentonite; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Clay Spur, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Ironwood, Gogebic Co., Michigan, United States (http://geonames.org/4997249) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Aurora Mine) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hematite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Ironwood, The free text description of the related URL is Smithsonian collections record for NMNH M8029-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Ironwood, Gogebic Co., Michigan, United States (http://geonames.org/4997249) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Aurora Mine) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hematite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Ironwood, The free text description of the related URL is Smithsonian collections record for NMNH M8029-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Niagara Falls, Ontario, Canada, including http://geonames.org/6087891 and http://geonames.org/6087892. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gypsum, The additional information about the specific place where the sample was collected is Niagara Falls, The free text description of the related URL is Smithsonian collections record for NMNH 118513-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Niagara Falls, Ontario, Canada, including http://geonames.org/6087891 and http://geonames.org/6087892. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gypsum, The additional information about the specific place where the sample was collected is Niagara Falls, The free text description of the related URL is Smithsonian collections record for NMNH 118513-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The name of the specific place where the sample was collected is Augustine, The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Augustine Island, The detailed description of the sample is juvenile pumice, on-island voluminous tephra fall >3kyr, AT-515, The name of institution, museum, or repository where the sample is currently stored is Geologic Materials Center, Anchorage AK, The taxonomy informal classification of sample is Tephra Fall, Pumice, The type of the primary location is volcano, The name of institution, museum, or repository where the sample was originally stored is Alaska Volcano Observatory': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Augustine, The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Augustine Island, The detailed description of the sample is juvenile pumice, on-island voluminous tephra fall >3kyr, AT-515, The name of institution, museum, or repository where the sample is currently stored is Geologic Materials Center, Anchorage AK, The taxonomy informal classification of sample is Tephra Fall, Pumice, The type of the primary location is volcano, The name of institution, museum, or repository where the sample was originally stored is Alaska Volcano Observatory.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is France, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Campan Valley (S), The taxonomy informal classification of sample is Marble, The free text description of the related URL is Smithsonian collections record for NMNH 69611 (PET)': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is France, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Campan Valley (S), The taxonomy informal classification of sample is Marble, The free text description of the related URL is Smithsonian collections record for NMNH 69611 (PET).': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Washington Co., Utah, United States (http://geonames.org/5549225) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"From E Side of Leeds Anticline (W Ridge)\" and \"Silver Reef (Harrisburg)\") could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (61 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Calcareous sandstone, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is From E Side Of Leeds Anticline (w Ridge), The free text description of the related URL is Smithsonian collections record for NMNH 109285-14 (PET)': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Washington Co., Utah, United States (http://geonames.org/5549225) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"From E Side of Leeds Anticline (W Ridge)\" and \"Silver Reef (Harrisburg)\") could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (61 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Calcareous sandstone, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is From E Side Of Leeds Anticline (w Ridge), The free text description of the related URL is Smithsonian collections record for NMNH 109285-14 (PET).': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is Kenya, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped manually to Mt. Elgon, Kenya, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Volcanic ash, The free text description of the related URL is Smithsonian collections record for NMNH 109391-140 (PET)': 'sediment/natural solid material/tephra/rock',\n"," 'The country where the sample was collected is Kenya, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped manually to Mt. Elgon, Kenya, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Volcanic ash, The free text description of the related URL is Smithsonian collections record for NMNH 109391-140 (PET).': 'sediment/natural solid material/tephra/rock',\n"," 'The country where the sample was collected is Switzerland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dolomite, The additional information about the specific place where the sample was collected is On The Rhone, Fiesch And Aernen (Between), The free text description of the related URL is Smithsonian collections record for NMNH B9665-00 (MIN)': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is Switzerland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dolomite, The additional information about the specific place where the sample was collected is On The Rhone, Fiesch And Aernen (Between), The free text description of the related URL is Smithsonian collections record for NMNH B9665-00 (MIN).': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is Scotland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Isle of Skye, Highland, Scotland, United Kingdom (http://geonames.org/2637681) based on island name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Near Meall Tuath) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diabase, The type of the primary location is Island, The additional information about the specific place where the sample was collected is near Meall Tuath, The free text description of the related URL is Smithsonian collections record for NMNH 118158-77 (PET)': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is Scotland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Isle of Skye, Highland, Scotland, United Kingdom (http://geonames.org/2637681) based on island name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Near Meall Tuath) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diabase, The type of the primary location is Island, The additional information about the specific place where the sample was collected is near Meall Tuath, The free text description of the related URL is Smithsonian collections record for NMNH 118158-77 (PET).': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene > Pliocene, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 115248-55 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene > Pliocene, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 115248-55 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt with olivine and basalt glass, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Mid-Atlantic Ridge, Vema Fracture Zone, east wall, ridge valley, south of Vema fracture zone, The free text description of the related URL is Smithsonian collections record for NMNH 110756-18 (PET)': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt with olivine and basalt glass, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Mid-Atlantic Ridge, Vema Fracture Zone, east wall, ridge valley, south of Vema fracture zone, The free text description of the related URL is Smithsonian collections record for NMNH 110756-18 (PET).': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mount Barcroft, Mono Co., California, United States (http://geonames.org/5326116) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"White Mts (Northern) - Mt Barcroft\" and \"White Mts (Northern)\") could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script. Matching Mt Barcroft required using before-and-after wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Mt Barcroft, The taxonomy informal classification of sample is Hornblende-biotite-quartz monzonite, The additional information about the specific place where the sample was collected is Locality Key: White Mts (Northern) - Mt Barcroft, The free text description of the related URL is Smithsonian collections record for NMNH 116147-5 (PET)': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mount Barcroft, Mono Co., California, United States (http://geonames.org/5326116) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"White Mts (Northern) - Mt Barcroft\" and \"White Mts (Northern)\") could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script. Matching Mt Barcroft required using before-and-after wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Mt Barcroft, The taxonomy informal classification of sample is Hornblende-biotite-quartz monzonite, The additional information about the specific place where the sample was collected is Locality Key: White Mts (Northern) - Mt Barcroft, The free text description of the related URL is Smithsonian collections record for NMNH 116147-5 (PET).': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Transvaal, The object type of sample indicates that this sample is long cylindrical cores, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyroxenite, The additional information about the specific place where the sample was collected is Winterveld adit, The method by which a sample was collected is Coring, The free text description of the related URL is Smithsonian collections record for NMNH 118321-747 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Transvaal, The object type of sample indicates that this sample is long cylindrical cores, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyroxenite, The additional information about the specific place where the sample was collected is Winterveld adit, The method by which a sample was collected is Coring, The free text description of the related URL is Smithsonian collections record for NMNH 118321-747 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Michoacán, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basaltic-andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Locality Key: Jorullo, The free text description of the related URL is Smithsonian collections record for NMNH 116595-11 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Michoacán, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basaltic-andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Locality Key: Jorullo, The free text description of the related URL is Smithsonian collections record for NMNH 116595-11 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is May Sand & Gravel Quarry, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mississippian, The free text description of the location is Unit 7 , The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Salina Group; Wabash Formation; Kokomo Limestone Member, The method by which a sample was collected is Manual': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is May Sand & Gravel Quarry, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mississippian, The free text description of the location is Unit 7 , The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Salina Group; Wabash Formation; Kokomo Limestone Member, The method by which a sample was collected is Manual.': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Chihuahua, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Erupcion Mine (MRDS ID: MX02184, Deposit ID: 10049477) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on the old name of a mine in the same country and state., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Anglesite with limonite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Los Lamentos, The free text description of the related URL is Smithsonian collections record for NMNH 95895-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Chihuahua, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Erupcion Mine (MRDS ID: MX02184, Deposit ID: 10049477) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on the old name of a mine in the same country and state., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Anglesite with limonite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Los Lamentos, The free text description of the related URL is Smithsonian collections record for NMNH 95895-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The name of the specific place where the sample was collected is Calais Wolds, The country where the sample was collected is United Kingdom, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is Sr-Pb-O isotope analysis of archaeological material., The detailed description of the sample is P1L. Adult male, The name of institution, museum, or repository where the sample is currently stored is British Geological Survey, The taxonomy informal classification of sample is Tooth': 'biogenic non-organic material',\n"," 'The name of the specific place where the sample was collected is Calais Wolds, The country where the sample was collected is United Kingdom, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is Sr-Pb-O isotope analysis of archaeological material., The detailed description of the sample is P1L. Adult male, The name of institution, museum, or repository where the sample is currently stored is British Geological Survey, The taxonomy informal classification of sample is Tooth.': 'biogenic non-organic material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Carboniferous, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Leadville, Lake Co., Colorado, United States (n=1) and Iron Hill, Lake Co., Colorado, United States (n=1).The coordinates and error radius given here describe a circle encompassing the combination of the instances of both names with the smallest maximum distance between them (~2.8 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Leadville, The taxonomy informal classification of sample is Dolomitic limestone with dolomite, The free text description of the related URL is Smithsonian collections record for NMNH 69446 (PET)': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Carboniferous, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Leadville, Lake Co., Colorado, United States (n=1) and Iron Hill, Lake Co., Colorado, United States (n=1).The coordinates and error radius given here describe a circle encompassing the combination of the instances of both names with the smallest maximum distance between them (~2.8 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Leadville, The taxonomy informal classification of sample is Dolomitic limestone with dolomite, The free text description of the related URL is Smithsonian collections record for NMNH 69446 (PET).': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is Norway, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Fen, Norway, including http://geonames.org/3157737 (featureCode=PPL) and http://geonames.org/3157955 (featureCode=PPLL). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing both localities. Another place name mentioned in the EMu record (Stream Junction Hatvet Brook-Vibeto Brook) could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Fen Area, The taxonomy informal classification of sample is Biotite juvite with biotite, The additional information about the specific place where the sample was collected is Stream Junction Hatvet Brook-vibeto Brook, The free text description of the related URL is Smithsonian collections record for NMNH 109301 (PET)': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/foid syenitoid/rock',\n"," 'The country where the sample was collected is Norway, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Fen, Norway, including http://geonames.org/3157737 (featureCode=PPL) and http://geonames.org/3157955 (featureCode=PPLL). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing both localities. Another place name mentioned in the EMu record (Stream Junction Hatvet Brook-Vibeto Brook) could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Fen Area, The taxonomy informal classification of sample is Biotite juvite with biotite, The additional information about the specific place where the sample was collected is Stream Junction Hatvet Brook-vibeto Brook, The free text description of the related URL is Smithsonian collections record for NMNH 109301 (PET).': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/foid syenitoid/rock',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is The coordinates given by the cataloger for this locality (19°41'33''N, 156°8'34''W) are ~28 km from the coordinates given in the spreadsheet provided by HVO. I have updated the coordinates to match those from the spreadsheet for field number MH81-26, which is the only sample attached to this record., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Scoria, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118326-40 (PET)\": 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is The coordinates given by the cataloger for this locality (19°41'33''N, 156°8'34''W) are ~28 km from the coordinates given in the spreadsheet provided by HVO. I have updated the coordinates to match those from the spreadsheet for field number MH81-26, which is the only sample attached to this record., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Scoria, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118326-40 (PET).\": 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Bolivia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The additional information about the specific place where the sample was collected is Bolivia, The free text description of the related URL is Smithsonian collections record for NMNH 118362-75 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Bolivia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The additional information about the specific place where the sample was collected is Bolivia, The free text description of the related URL is Smithsonian collections record for NMNH 118362-75 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Queensland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Colin Branch, The taxonomy informal classification of sample is Welded tuff, The additional information about the specific place where the sample was collected is Queensland (north), The free text description of the related URL is Smithsonian collections record for NMNH 116417-3 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Queensland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Colin Branch, The taxonomy informal classification of sample is Welded tuff, The additional information about the specific place where the sample was collected is Queensland (north), The free text description of the related URL is Smithsonian collections record for NMNH 116417-3 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Cañones, Rio Arriba Co., New Mexico, United States (http://geonames.org/5460228) based on locality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Jemez Mountains and 2.5 mile SE) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Canones, 2.5 mile SE, The taxonomy informal classification of sample is Basalt with olivine, The free text description of the related URL is Smithsonian collections record for NMNH 117211-27 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Cañones, Rio Arriba Co., New Mexico, United States (http://geonames.org/5460228) based on locality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Jemez Mountains and 2.5 mile SE) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Canones, 2.5 mile SE, The taxonomy informal classification of sample is Basalt with olivine, The free text description of the related URL is Smithsonian collections record for NMNH 117211-27 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Massive granite; leucogranite with biotite/garnet blebs., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granite, The free text description of the related URL is None': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Massive granite; leucogranite with biotite/garnet blebs., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granite, The free text description of the related URL is None.': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite with serpentine, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is 215 Degree T - 6.09 Mi (sited By Radar Range And Bearing On St Paul's Lighthouse). Ne Slope, The free text description of the related URL is Smithsonian collections record for NMNH 110668-57 (PET)\": 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite with serpentine, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is 215 Degree T - 6.09 Mi (sited By Radar Range And Bearing On St Paul's Lighthouse). Ne Slope, The free text description of the related URL is Smithsonian collections record for NMNH 110668-57 (PET).\": 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Silicified peat., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Peat, The free text description of the related URL is None': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Silicified peat., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Peat, The free text description of the related URL is None.': 'sediment/natural solid material/organic rich sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Sonora, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wulfenite with mimetite, The free text description of the related URL is Smithsonian collections record for NMNH 155808-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Sonora, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wulfenite with mimetite, The free text description of the related URL is Smithsonian collections record for NMNH 155808-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 1 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 1 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Found In Crater On Island In Crater Lake, The free text description of the related URL is Smithsonian collections record for NMNH 75434-85 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Found In Crater On Island In Crater Lake, The free text description of the related URL is Smithsonian collections record for NMNH 75434-85 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Chile, The province where the sample was collected is Tarapaca, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tarapacá, Chile (http://geonames.org/3870116) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Salt ore with soda niter, The additional information about the specific place where the sample was collected is Locality Key: Tarapaca, The free text description of the related URL is Smithsonian collections record for NMNH 62111-2 (PET)': 'sediment/natural solid material/chemical sedimentary material/rock',\n"," 'The country where the sample was collected is Chile, The province where the sample was collected is Tarapaca, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tarapacá, Chile (http://geonames.org/3870116) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Salt ore with soda niter, The additional information about the specific place where the sample was collected is Locality Key: Tarapaca, The free text description of the related URL is Smithsonian collections record for NMNH 62111-2 (PET).': 'sediment/natural solid material/chemical sedimentary material/rock',\n"," 'The country where the sample was collected is Switzerland, The province where the sample was collected is Valais, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Feld, Goms, Valais, Switzerland (http://geonames.org/2660810) based on feature name, state/province, and country using the situate.py script. The script determined that Feld (featureCode=PPL) is related to Binnatal (featureCode=VAL), another feature mentioned in this record. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Realgar with sphalerite and baryte, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Imfeld (Near), Binnental, The free text description of the related URL is Smithsonian collections record for NMNH B21449-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Switzerland, The province where the sample was collected is Valais, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Feld, Goms, Valais, Switzerland (http://geonames.org/2660810) based on feature name, state/province, and country using the situate.py script. The script determined that Feld (featureCode=PPL) is related to Binnatal (featureCode=VAL), another feature mentioned in this record. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Realgar with sphalerite and baryte, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Imfeld (Near), Binnental, The free text description of the related URL is Smithsonian collections record for NMNH B21449-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is intraclastic bed [up arrow], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Limestone, The method by which a sample was collected is Manual, The free text description of the related URL is None': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is intraclastic bed [up arrow], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Limestone, The method by which a sample was collected is Manual, The free text description of the related URL is None.': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is Japan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gypsum, The additional information about the specific place where the sample was collected is Rikuchiu, Yuda, The free text description of the related URL is Smithsonian collections record for NMNH 61500-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Japan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gypsum, The additional information about the specific place where the sample was collected is Rikuchiu, Yuda, The free text description of the related URL is Smithsonian collections record for NMNH 61500-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Silver Bay, Sitka City and Co., Alaska, United States (http://geonames.org/5557264) based on water/body name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Near Silver Bay and Halway And Millstick Mine) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 5 km was assigned to all water/body records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gold ore with arsenopyrite and quartz, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Near Silver Bay, The free text description of the related URL is Smithsonian collections record for NMNH 34413 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Silver Bay, Sitka City and Co., Alaska, United States (http://geonames.org/5557264) based on water/body name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Near Silver Bay and Halway And Millstick Mine) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 5 km was assigned to all water/body records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gold ore with arsenopyrite and quartz, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Near Silver Bay, The free text description of the related URL is Smithsonian collections record for NMNH 34413 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is Dominican Republic, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleogene > Oligocene, The free text description of the location is Matched to the GeoNames record for Hispaniola, Dominican Republic (http://geonames.org/3504558) based on island name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Conglomerate, The type of the primary location is Island, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Tabera > Inoa, The free text description of the related URL is Smithsonian collections record for NMNH 117763-49 (PET)': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Dominican Republic, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleogene > Oligocene, The free text description of the location is Matched to the GeoNames record for Hispaniola, Dominican Republic (http://geonames.org/3504558) based on island name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Conglomerate, The type of the primary location is Island, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Tabera > Inoa, The free text description of the related URL is Smithsonian collections record for NMNH 117763-49 (PET).': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Magdalena River, The country where the sample was collected is Colombia, The province where the sample was collected is Bolívar, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Filtered river water, The name of institution, museum, or repository where the sample is currently stored is WHOI - Clark - Basement Walk-in Fridge, The city where the sample was collected is Calamar, The free text to add any comments pertaining to the sample is No preservative, The additional information about the method by which a sample was collected is Syringe filtration by hand, The additional information about the specific place where the sample was collected is Intendencia Fluvial, The method by which a sample was collected is grab, 0.22 micron filtered': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Magdalena River, The country where the sample was collected is Colombia, The province where the sample was collected is Bolívar, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Filtered river water, The name of institution, museum, or repository where the sample is currently stored is WHOI - Clark - Basement Walk-in Fridge, The city where the sample was collected is Calamar, The free text to add any comments pertaining to the sample is No preservative, The additional information about the method by which a sample was collected is Syringe filtration by hand, The additional information about the specific place where the sample was collected is Intendencia Fluvial, The method by which a sample was collected is grab, 0.22 micron filtered.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Wyoming, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Norris Geyser Basin, Park Co., Wyoming, United States (http://geonames.org/5833498) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Carnegie Hole 2) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 1 km was assigned to all featureCode=DPR records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The additional information about the specific place where the sample was collected is Carnegie Hole 2; Yellowstone National Park, The free text description of the related URL is Smithsonian collections record for NMNH 117250-275 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Wyoming, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Norris Geyser Basin, Park Co., Wyoming, United States (http://geonames.org/5833498) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Carnegie Hole 2) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 1 km was assigned to all featureCode=DPR records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The additional information about the specific place where the sample was collected is Carnegie Hole 2; Yellowstone National Park, The free text description of the related URL is Smithsonian collections record for NMNH 117250-275 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Philippines, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mayon Volcano, Albay, Bicol, Philippines (http://geonames.org/1699962) based on volcano name and country using the situate.py script. Another place name mentioned in the EMu record (Luzon) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all volcano records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 111159 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Philippines, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mayon Volcano, Albay, Bicol, Philippines (http://geonames.org/1699962) based on volcano name and country using the situate.py script. Another place name mentioned in the EMu record (Luzon) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all volcano records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 111159 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is loose, coarse, unconsolidated material suspended in drilling fluid, The free text to describe the collection purpose of the sample is Oil or gas exploration, The detailed description of the sample is This is a sample of type CUTTINGS from an oil or gas well., The name of institution, museum, or repository where the sample is currently stored is Bureau of Economic Geology Midland Core Research Center, The method by which a sample was collected is Coring': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is loose, coarse, unconsolidated material suspended in drilling fluid, The free text to describe the collection purpose of the sample is Oil or gas exploration, The detailed description of the sample is This is a sample of type CUTTINGS from an oil or gas well., The name of institution, museum, or repository where the sample is currently stored is Bureau of Economic Geology Midland Core Research Center, The method by which a sample was collected is Coring.': 'natural solid material/sediment/rock',\n"," 'The name of the specific place where the sample was collected is 50223200260000, The province where the sample was collected is AK, The object type of sample indicates that this sample is material occurring between unambiguous [as curated] breaks in recovery, The age of a sample as described by the stratigraphic era, period, state, etc. is Cretaceous, The free text to describe the collection purpose of the sample is Sub Slab, The detailed description of the sample is Weatherford Sub-Slab 1-4P, The name of institution, museum, or repository where the sample is currently stored is Biomarker Technologies, Inc., A body of rock established as a distinct entity in the classification of the Earth’s rocks is Hue Shale, The name of institution, museum, or repository where the sample was originally stored is Weatherford, The method by which a sample was collected is Coring': 'sediment/natural solid material/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is 50223200260000, The province where the sample was collected is AK, The object type of sample indicates that this sample is material occurring between unambiguous [as curated] breaks in recovery, The age of a sample as described by the stratigraphic era, period, state, etc. is Cretaceous, The free text to describe the collection purpose of the sample is Sub Slab, The detailed description of the sample is Weatherford Sub-Slab 1-4P, The name of institution, museum, or repository where the sample is currently stored is Biomarker Technologies, Inc., A body of rock established as a distinct entity in the classification of the Earth’s rocks is Hue Shale, The name of institution, museum, or repository where the sample was originally stored is Weatherford, The method by which a sample was collected is Coring.': 'sediment/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is long cylindrical cores, The free text to describe the collection purpose of the sample is Tephrochronology, The name of institution, museum, or repository where the sample is currently stored is LSCE, The taxonomy informal classification of sample is Glass shards, The type of the primary location is continental margin, The method by which a sample was collected is Coring>PistonCorer>Giant, The name or identifier of the field program (cruise or expedition), during which the sample was collected is PACHIDERME': 'sediment/natural solid material/tephra/rock',\n"," 'The object type of sample indicates that this sample is long cylindrical cores, The free text to describe the collection purpose of the sample is Tephrochronology, The name of institution, museum, or repository where the sample is currently stored is LSCE, The taxonomy informal classification of sample is Glass shards, The type of the primary location is continental margin, The method by which a sample was collected is Coring>PistonCorer>Giant, The name or identifier of the field program (cruise or expedition), during which the sample was collected is PACHIDERME.': 'sediment/natural solid material/tephra/rock',\n"," 'The name of the specific place where the sample was collected is Willow, The country where the sample was collected is Canada, The province where the sample was collected is British Columbia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The additional information about the specific place where the sample was collected is Upper Fraser Valley Road bridge, The free text description of the related URL is Cited by': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Willow, The country where the sample was collected is Canada, The province where the sample was collected is British Columbia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The additional information about the specific place where the sample was collected is Upper Fraser Valley Road bridge, The free text description of the related URL is Cited by.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ultramafic-rock, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 111290-34 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ultramafic-rock, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 111290-34 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleogene, The detailed description of the sample is gray sandy siltstone, more consolidated horizon. Sparse fossils in situ in siltstone., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleogene, The detailed description of the sample is gray sandy siltstone, more consolidated horizon. Sparse fossils in situ in siltstone., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 1 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 1 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The country where the sample was collected is Norway, The province where the sample was collected is Telemark, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Kragerø, Telemark, Norway (http://geonames.org/3149409) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM2). This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rutile with hornblende, The additional information about the specific place where the sample was collected is Kragero, The free text description of the related URL is Smithsonian collections record for NMNH B5954-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Norway, The province where the sample was collected is Telemark, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Kragerø, Telemark, Norway (http://geonames.org/3149409) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM2). This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rutile with hornblende, The additional information about the specific place where the sample was collected is Kragero, The free text description of the related URL is Smithsonian collections record for NMNH B5954-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Triassic, The detailed description of the sample is Dark gray mudstone with plant fragments, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Mudstone, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Triassic, The detailed description of the sample is Dark gray mudstone with plant fragments, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Mudstone, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Colorado, United States (http://geonames.org/5417618) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The free text description of the related URL is Smithsonian collections record for NMNH 29073 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Colorado, United States (http://geonames.org/5417618) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The free text description of the related URL is Smithsonian collections record for NMNH 29073 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Ghana, The province where the sample was collected is Ashanti, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Kumasi, The taxonomy informal classification of sample is Suevite, The additional information about the specific place where the sample was collected is From Boni (buonim) Stream Bank, The free text description of the related URL is Smithsonian collections record for NMNH 116517-10 (PET)': 'sediment/impact generated material/natural solid material/rock',\n"," 'The country where the sample was collected is Ghana, The province where the sample was collected is Ashanti, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Kumasi, The taxonomy informal classification of sample is Suevite, The additional information about the specific place where the sample was collected is From Boni (buonim) Stream Bank, The free text description of the related URL is Smithsonian collections record for NMNH 116517-10 (PET).': 'sediment/impact generated material/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Gunnison Co., Colorado, United States (http://geonames.org/5424092) based on district/county name, state/province, and country using the situate.py script. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (86 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Iron Hill, The taxonomy informal classification of sample is Pyroxenite, The additional information about the specific place where the sample was collected is S Beaver Creek W Side Ridge Between B And C, The free text description of the related URL is Smithsonian collections record for NMNH 108613-22 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Gunnison Co., Colorado, United States (http://geonames.org/5424092) based on district/county name, state/province, and country using the situate.py script. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (86 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Iron Hill, The taxonomy informal classification of sample is Pyroxenite, The additional information about the specific place where the sample was collected is S Beaver Creek W Side Ridge Between B And C, The free text description of the related URL is Smithsonian collections record for NMNH 108613-22 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The name of the specific place where the sample was collected is Usnea Plug, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Limestone from float about 400m to east., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is None': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Usnea Plug, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Limestone from float about 400m to east., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is None.': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Celotex Plant & Quarry, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mississippian, The free text description of the location is Bottom of unit #1, The free text to describe the collection purpose of the sample is Survey, Limestone Quarries of Indiana, The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Wabash Formation; Mississinewa Shale Member, The method by which a sample was collected is Manual': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Celotex Plant & Quarry, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mississippian, The free text description of the location is Bottom of unit #1, The free text to describe the collection purpose of the sample is Survey, Limestone Quarries of Indiana, The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Wabash Formation; Mississinewa Shale Member, The method by which a sample was collected is Manual.': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The type of the primary location is Abyssal Hill Province, The additional information about the method by which a sample was collected is Sqeezer extracted sediment porewater, The method by which a sample was collected is Long Piston Core, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN223': 'liquid water/fluid material',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The type of the primary location is Abyssal Hill Province, The additional information about the method by which a sample was collected is Sqeezer extracted sediment porewater, The method by which a sample was collected is Long Piston Core, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN223.': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Hawaiian Islands Humpback Whale National Marine Sanctuary, The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The type of platform for the cruise is Ship, The free text description of the location is middle finger, The free text to describe the collection purpose of the sample is research, The detailed description of the sample is calcaranite coralline algae on top, fingering porites on bottom, The name of institution, museum, or repository where the sample is currently stored is University of Hawaii, The taxonomy informal classification of sample is coral reef fragment, The type of the primary location is carbonate platform, The additional information about the method by which a sample was collected is ROV manipulator, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is FK170825': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Hawaiian Islands Humpback Whale National Marine Sanctuary, The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The type of platform for the cruise is Ship, The free text description of the location is middle finger, The free text to describe the collection purpose of the sample is research, The detailed description of the sample is calcaranite coralline algae on top, fingering porites on bottom, The name of institution, museum, or repository where the sample is currently stored is University of Hawaii, The taxonomy informal classification of sample is coral reef fragment, The type of the primary location is carbonate platform, The additional information about the method by which a sample was collected is ROV manipulator, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is FK170825.': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for San Bernardino Co., California, United States (http://geonames.org/5391726) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"from SW side of main Dish Hill Cone\", Deadman Lake Volcanic Field, Dish Hill, and Mojave Desert Region) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (199 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyroxenite with phlogopite and scoria, The additional information about the specific place where the sample was collected is from SW side of main Dish Hill Cone, The free text description of the related URL is Smithsonian collections record for NMNH 118018-200 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for San Bernardino Co., California, United States (http://geonames.org/5391726) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"from SW side of main Dish Hill Cone\", Deadman Lake Volcanic Field, Dish Hill, and Mojave Desert Region) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (199 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyroxenite with phlogopite and scoria, The additional information about the specific place where the sample was collected is from SW side of main Dish Hill Cone, The free text description of the related URL is Smithsonian collections record for NMNH 118018-200 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped coordinates and uncertainty for the locality string \"300 m E of Clear Creek\" in Blaine Co., Montana, United States using the situate.py script based on coordinates given by GeoNames. The given point was calculated based on 2 GeoNames records matching Clear Creek, Blaine Co., Montana, United States. Matching Clear Creek required using a wildcard search. Other place names mentioned in the EMu record (\"N Volcanic Field\" and \"On Sec. 26-35 Boundary, T30n, R17e\") could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the calculated values., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Latite with augite and biotite, The additional information about the specific place where the sample was collected is On Sec. 26-35 Boundary, T30n, R17e; 300m E Of Clear Creek, The free text description of the related URL is Smithsonian collections record for NMNH 117215-76 (PET)': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped coordinates and uncertainty for the locality string \"300 m E of Clear Creek\" in Blaine Co., Montana, United States using the situate.py script based on coordinates given by GeoNames. The given point was calculated based on 2 GeoNames records matching Clear Creek, Blaine Co., Montana, United States. Matching Clear Creek required using a wildcard search. Other place names mentioned in the EMu record (\"N Volcanic Field\" and \"On Sec. 26-35 Boundary, T30n, R17e\") could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the calculated values., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Latite with augite and biotite, The additional information about the specific place where the sample was collected is On Sec. 26-35 Boundary, T30n, R17e; 300m E Of Clear Creek, The free text description of the related URL is Smithsonian collections record for NMNH 117215-76 (PET).': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample of a section of the near-surface Earth, generally in the critical zone, The detailed description of the sample is Whiskey Hills 1600 m Elevation N Aspect Sample Under-Plant 5-10 cm level, The name of institution, museum, or repository where the sample is currently stored is Idaho State University, The taxonomy informal classification of sample is Soil Sample, The type of the primary location is Subwatershed , The method by which a sample was collected is Manual, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Reynolds Critical Zone Observatory (CZO)': 'natural solid material/soil',\n"," 'The object type of sample indicates that this sample is a sample of a section of the near-surface Earth, generally in the critical zone, The detailed description of the sample is Whiskey Hills 1600 m Elevation N Aspect Sample Under-Plant 5-10 cm level, The name of institution, museum, or repository where the sample is currently stored is Idaho State University, The taxonomy informal classification of sample is Soil Sample, The type of the primary location is Subwatershed , The method by which a sample was collected is Manual, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Reynolds Critical Zone Observatory (CZO).': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Maine, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Greenville, Piscataquis Co., Maine, United States (http://geonames.org/4965978) based on map name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=PPL). Another place name mentioned in the EMu record (Moxie Pluton) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende-gabbro with hornblende, The free text description of the related URL is Smithsonian collections record for NMNH 115081-12 (PET)': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Maine, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Greenville, Piscataquis Co., Maine, United States (http://geonames.org/4965978) based on map name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=PPL). Another place name mentioned in the EMu record (Moxie Pluton) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende-gabbro with hornblende, The free text description of the related URL is Smithsonian collections record for NMNH 115081-12 (PET).': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Identified Potrillo Maar on a map provided by the New Mexico Museum of Natural History (http://www.nmnaturalhistory.org/volcanoes/potrillo-volcanic-field) and matched it to a feature on Google Maps. The error radius is based on the approximate size of the maar. Note that Potrillo Maar straddles the US-Mexican border., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Feldspathic pyroxenite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118049-117 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Identified Potrillo Maar on a map provided by the New Mexico Museum of Natural History (http://www.nmnaturalhistory.org/volcanoes/potrillo-volcanic-field) and matched it to a feature on Google Maps. The error radius is based on the approximate size of the maar. Note that Potrillo Maar straddles the US-Mexican border., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Feldspathic pyroxenite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118049-117 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for White-Inyo Range, Mono Co., California, United States (http://geonames.org/7306249) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Sierra Nevada Mts) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 500 km was assigned to all featureCode=MTS records matched using the script. Matching Inyo Range required using before-and-after wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Shale, The additional information about the specific place where the sample was collected is Locality Key: Sierra Nevada Mts, Inyo Range, The free text description of the related URL is Smithsonian collections record for NMNH 90050-110 (PET)': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for White-Inyo Range, Mono Co., California, United States (http://geonames.org/7306249) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Sierra Nevada Mts) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 500 km was assigned to all featureCode=MTS records matched using the script. Matching Inyo Range required using before-and-after wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Shale, The additional information about the specific place where the sample was collected is Locality Key: Sierra Nevada Mts, Inyo Range, The free text description of the related URL is Smithsonian collections record for NMNH 90050-110 (PET).': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Algeria, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Algeria (http://geonames.org/2589581) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyrite, The free text description of the related URL is Smithsonian collections record for NMNH B2482-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Algeria, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Algeria (http://geonames.org/2589581) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyrite, The free text description of the related URL is Smithsonian collections record for NMNH B2482-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Idaho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Mackay District (Alder Creek District; White Knob District), Custer Co., Idaho, USA. URL: https://www.mindat.org/loc-3734.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Trachyte, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Champion Group, Magnetite Tunnel, The free text description of the related URL is Smithsonian collections record for NMNH 98643-165 (PET)': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Idaho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Mackay District (Alder Creek District; White Knob District), Custer Co., Idaho, USA. URL: https://www.mindat.org/loc-3734.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Trachyte, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Champion Group, Magnetite Tunnel, The free text description of the related URL is Smithsonian collections record for NMNH 98643-165 (PET).': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," \"The name of the specific place where the sample was collected is Indiana State Farm, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mississippian, The free text description of the location is 16' above base of unit #10, The free text to describe the collection purpose of the sample is Survey, Limestone Quarries of Indiana, The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN, The free text to add any comments pertaining to the sample is two pieces, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Blue River Group; Ste. Genevieve Limestone, The method by which a sample was collected is Manual\": 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," \"The name of the specific place where the sample was collected is Indiana State Farm, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mississippian, The free text description of the location is 16' above base of unit #10, The free text to describe the collection purpose of the sample is Survey, Limestone Quarries of Indiana, The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN, The free text to add any comments pertaining to the sample is two pieces, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Blue River Group; Ste. Genevieve Limestone, The method by which a sample was collected is Manual.\": 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is Japan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Akenobe Mine (Deposit ID: 10206690) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Scheelite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Hyogo, The free text description of the related URL is Smithsonian collections record for NMNH 108335-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Japan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Akenobe Mine (Deposit ID: 10206690) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Scheelite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Hyogo, The free text description of the related URL is Smithsonian collections record for NMNH 108335-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Honolulu, Honolulu Co., Hawaii, United States (http://geonames.org/5856195) based on municipality name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"Coll From Flow Underlying Salt Lake Tuff In Roadcut On S Side Of Moanalua Road Just E Of Road To Damon Residence\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (12 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Honolulu, The taxonomy informal classification of sample is Tholeiite, The type of the primary location is Volcano, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Koolau Volcanic Series, The additional information about the specific place where the sample was collected is Coll From Flow Underlying Salt Lake Tuff In Roadcut On S Side Of Moanalua Road Just E Of Road To Damon Residence, The free text description of the related URL is Smithsonian collections record for NMNH 112521-39 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Honolulu, Honolulu Co., Hawaii, United States (http://geonames.org/5856195) based on municipality name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"Coll From Flow Underlying Salt Lake Tuff In Roadcut On S Side Of Moanalua Road Just E Of Road To Damon Residence\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (12 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Honolulu, The taxonomy informal classification of sample is Tholeiite, The type of the primary location is Volcano, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Koolau Volcanic Series, The additional information about the specific place where the sample was collected is Coll From Flow Underlying Salt Lake Tuff In Roadcut On S Side Of Moanalua Road Just E Of Road To Damon Residence, The free text description of the related URL is Smithsonian collections record for NMNH 112521-39 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for United States (http://geonames.org/6252001) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rhyolite, The free text description of the related URL is Smithsonian collections record for NMNH 21370 (PET)': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for United States (http://geonames.org/6252001) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rhyolite, The free text description of the related URL is Smithsonian collections record for NMNH 21370 (PET).': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Colima-Jalisco border, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Rock-fall deposit below the front of the active block-lava flow, 3 October 2004, The free text description of the related URL is Smithsonian collections record for NMNH 117593-160 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Colima-Jalisco border, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Rock-fall deposit below the front of the active block-lava flow, 3 October 2004, The free text description of the related URL is Smithsonian collections record for NMNH 117593-160 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Goldfield, Esmeralda Co., Nevada, United States (http://geonames.org/5504812) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Near Liverpool Shaft and Keelyn Shaft) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Goldfield, The taxonomy informal classification of sample is Andesite with hornblende and pyroxene, The additional information about the specific place where the sample was collected is Keelyn Shaft, Near Liverpool Shaft, The free text description of the related URL is Smithsonian collections record for NMNH 77404-69 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Goldfield, Esmeralda Co., Nevada, United States (http://geonames.org/5504812) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Near Liverpool Shaft and Keelyn Shaft) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Goldfield, The taxonomy informal classification of sample is Andesite with hornblende and pyroxene, The additional information about the specific place where the sample was collected is Keelyn Shaft, Near Liverpool Shaft, The free text description of the related URL is Smithsonian collections record for NMNH 77404-69 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is East Pacific Rise, The free text description of the related URL is Smithsonian collections record for NMNH 117372-174 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is East Pacific Rise, The free text description of the related URL is Smithsonian collections record for NMNH 117372-174 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," \"The name of the specific place where the sample was collected is Minna Hook, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene, The detailed description of the sample is Platy flow at 1180' overlying AW84774. Platy, kaersutite (1%) phonolite with alteration and secondary white mineralization along platy surfaces., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Phonolite, The free text description of the related URL is None\": 'sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock',\n"," \"The name of the specific place where the sample was collected is Minna Hook, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene, The detailed description of the sample is Platy flow at 1180' overlying AW84774. Platy, kaersutite (1%) phonolite with alteration and secondary white mineralization along platy surfaces., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Phonolite, The free text description of the related URL is None.\": 'sediment/igneous rock/natural solid material/phonolitoid/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Tsumeb Mine, The country where the sample was collected is Namibia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Barite; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Tsumeb, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The name of the specific place where the sample was collected is Tsumeb Mine, The country where the sample was collected is Namibia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Barite; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Tsumeb, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The type of the primary location is Abyssal Hill Province, The additional information about the method by which a sample was collected is RhizonTM extracted sediment porewater, The method by which a sample was collected is Long Piston Core, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN223': 'liquid water/fluid material',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The type of the primary location is Abyssal Hill Province, The additional information about the method by which a sample was collected is RhizonTM extracted sediment porewater, The method by which a sample was collected is Long Piston Core, The name or identifier of the field program (cruise or expedition), during which the sample was collected is KN223.': 'liquid water/fluid material',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Serpentinite with serpentine, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Mid-Atlantic Ridge, St Paul's Rocks; Nw Slope, Sited By Radar, northwest slope of St. Paul's massif, The free text description of the related URL is Smithsonian collections record for NMNH 110728-745 (PET)\": 'sediment/natural solid material/metamorphic rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Serpentinite with serpentine, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Mid-Atlantic Ridge, St Paul's Rocks; Nw Slope, Sited By Radar, northwest slope of St. Paul's massif, The free text description of the related URL is Smithsonian collections record for NMNH 110728-745 (PET).\": 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is South Australia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for South Australia, Australia (http://geonames.org/2061327) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wavellite, The free text description of the related URL is Smithsonian collections record for NMNH M16603-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is South Australia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for South Australia, Australia (http://geonames.org/2061327) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wavellite, The free text description of the related URL is Smithsonian collections record for NMNH M16603-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The name of the specific place where the sample was collected is Rosia Montana, The country where the sample was collected is Romania, The province where the sample was collected is Transylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Baia de Aries, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-native element',\n"," 'The name of the specific place where the sample was collected is Rosia Montana, The country where the sample was collected is Romania, The province where the sample was collected is Transylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Baia de Aries, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-native element',\n"," 'The name of the specific place where the sample was collected is Borehole KRX068, The country where the sample was collected is Zambia, The object type of sample indicates that this sample is material occurring between unambiguous [as curated] breaks in recovery, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is granitoid': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is Borehole KRX068, The country where the sample was collected is Zambia, The object type of sample indicates that this sample is material occurring between unambiguous [as curated] breaks in recovery, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is granitoid.': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is None': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is None.': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Wolkenburg, The taxonomy informal classification of sample is Hornblende andesite, The free text description of the related URL is Smithsonian collections record for NMNH 111123-314A (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Wolkenburg, The taxonomy informal classification of sample is Hornblende andesite, The free text description of the related URL is Smithsonian collections record for NMNH 111123-314A (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Louisiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Louisiana, Louisiana, United States (http://geonames.org/4331987) based on locality name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sandstone, The additional information about the specific place where the sample was collected is Louisiana ?, The free text description of the related URL is Smithsonian collections record for NMNH 37579 (PET)': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Louisiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Louisiana, Louisiana, United States (http://geonames.org/4331987) based on locality name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sandstone, The additional information about the specific place where the sample was collected is Louisiana ?, The free text description of the related URL is Smithsonian collections record for NMNH 37579 (PET).': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Pikes Peak Quad, Colorado, United States (n=3) and Pikes Peak, El Paso Co., Colorado, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~5 km). Matching Pikes Peak Quad required using a wildcard search. Other place names mentioned in the EMu record (\"Robbins Ranch, 2 Mi Sse\", From, and Low Ridge) could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The additional information about the specific place where the sample was collected is Robbins Ranch, 2 Mi Sse Of; Low Ridge, E Of Main Gulch From S Pikes Peak Quad, The free text description of the related URL is Smithsonian collections record for NMNH 63682-2156 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Pikes Peak Quad, Colorado, United States (n=3) and Pikes Peak, El Paso Co., Colorado, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~5 km). Matching Pikes Peak Quad required using a wildcard search. Other place names mentioned in the EMu record (\"Robbins Ranch, 2 Mi Sse\", From, and Low Ridge) could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The additional information about the specific place where the sample was collected is Robbins Ranch, 2 Mi Sse Of; Low Ridge, E Of Main Gulch From S Pikes Peak Quad, The free text description of the related URL is Smithsonian collections record for NMNH 63682-2156 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The free text description of the related URL is Smithsonian collections record for NMNH 117669-3 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The free text description of the related URL is Smithsonian collections record for NMNH 117669-3 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Vermont, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped coordinates and uncertainty for the locality string \"1.0 mi SW of Westminster West\" in Vermont, United States using the situate.py script based on coordinates given by GeoNames. Another place name mentioned in the EMu record (\"On Improved Road Sw From Westminster\") appears to describe a larger, less specific locality and was ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the calculated values., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Westminster West, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Waits River Formation, The additional information about the specific place where the sample was collected is On improved road S.W. from Westminster West 1.0 miles S.W. of Westminster West, The free text description of the related URL is Smithsonian collections record for NMNH 118288-46 (PET)': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Vermont, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped coordinates and uncertainty for the locality string \"1.0 mi SW of Westminster West\" in Vermont, United States using the situate.py script based on coordinates given by GeoNames. Another place name mentioned in the EMu record (\"On Improved Road Sw From Westminster\") appears to describe a larger, less specific locality and was ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the calculated values., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Westminster West, The taxonomy informal classification of sample is Limestone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Waits River Formation, The additional information about the specific place where the sample was collected is On improved road S.W. from Westminster West 1.0 miles S.W. of Westminster West, The free text description of the related URL is Smithsonian collections record for NMNH 118288-46 (PET).': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is Lesotho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Butha-Buthe, The taxonomy informal classification of sample is Xenolith with spinel-phlogopite harzburgite and clinopyroxene, The additional information about the specific place where the sample was collected is Sekameng kimberlite, 5 km SE of Butha-Buthe, The free text description of the related URL is Smithsonian collections record for NMNH 117327-241 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is Lesotho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Butha-Buthe, The taxonomy informal classification of sample is Xenolith with spinel-phlogopite harzburgite and clinopyroxene, The additional information about the specific place where the sample was collected is Sekameng kimberlite, 5 km SE of Butha-Buthe, The free text description of the related URL is Smithsonian collections record for NMNH 117327-241 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is Australia, The object type of sample indicates that this sample is a sample created from pulverizing a rock to powder, The detailed description of the sample is Heavy separate.': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is Australia, The object type of sample indicates that this sample is a sample created from pulverizing a rock to powder, The detailed description of the sample is Heavy separate..': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Victoria, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Noorat, The taxonomy informal classification of sample is Spinel lherzolite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Near Noorat, The free text description of the related URL is Smithsonian collections record for NMNH 117098-1 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Victoria, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Noorat, The taxonomy informal classification of sample is Spinel lherzolite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Near Noorat, The free text description of the related URL is Smithsonian collections record for NMNH 117098-1 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Jalisco, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Jalisco, Mexico (http://geonames.org/4004156) based on state/province name and country using the situate.py script. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (303 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The free text description of the related URL is Smithsonian collections record for NMNH 117634-3 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Jalisco, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Jalisco, Mexico (http://geonames.org/4004156) based on state/province name and country using the situate.py script. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (303 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The free text description of the related URL is Smithsonian collections record for NMNH 117634-3 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Madagascar, The province where the sample was collected is Mahajanga, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Columbite; Number of pieces: 1; Quality: display; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Berere, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Madagascar, The province where the sample was collected is Mahajanga, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Columbite; Number of pieces: 1; Quality: display; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Berere, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-oxide',\n"," 'The name of the specific place where the sample was collected is Little Ambergris Cay, The country where the sample was collected is Turks And Caicos Islands, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The taxonomy informal classification of sample is ooid sand, The type of the primary location is carbonate platform, The additional information about the specific place where the sample was collected is channel lobe on north side of island, The method by which a sample was collected is Manual': 'sediment/natural solid material/carbonate sediment/rock',\n"," 'The name of the specific place where the sample was collected is Little Ambergris Cay, The country where the sample was collected is Turks And Caicos Islands, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The taxonomy informal classification of sample is ooid sand, The type of the primary location is carbonate platform, The additional information about the specific place where the sample was collected is channel lobe on north side of island, The method by which a sample was collected is Manual.': 'sediment/natural solid material/carbonate sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for California, United States (http://geonames.org/5332921) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Locality Key: Kramer Borate, The free text description of the related URL is Smithsonian collections record for NMNH 115613-125 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for California, United States (http://geonames.org/5332921) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Locality Key: Kramer Borate, The free text description of the related URL is Smithsonian collections record for NMNH 115613-125 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is [dark gray/tan, medium-coarse grained], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is [dark gray/tan, medium-coarse grained], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None.': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is Norway, The province where the sample was collected is Aust-Agder, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Arendal, Aust-Agder, Norway (http://geonames.org/6453399) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM2). This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fergusonite, The additional information about the specific place where the sample was collected is Arendal, The free text description of the related URL is Smithsonian collections record for NMNH 130609-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Norway, The province where the sample was collected is Aust-Agder, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Arendal, Aust-Agder, Norway (http://geonames.org/6453399) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM2). This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fergusonite, The additional information about the specific place where the sample was collected is Arendal, The free text description of the related URL is Smithsonian collections record for NMNH 130609-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is long cylindrical cores, The free text description of the location is Coordinates from GeoNames for the Kilauea Iki Crater (https://www.geonames.org/5849363). Per Cathleen Brown, coordinates from the USGS NGDB for the Kilaeua Iki drill cores plot ~10 miles west of the crater., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt with olivine, The type of the primary location is Volcano, The method by which a sample was collected is Coring, The free text description of the related URL is Smithsonian collections record for NMNH 115475-7 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is long cylindrical cores, The free text description of the location is Coordinates from GeoNames for the Kilauea Iki Crater (https://www.geonames.org/5849363). Per Cathleen Brown, coordinates from the USGS NGDB for the Kilaeua Iki drill cores plot ~10 miles west of the crater., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt with olivine, The type of the primary location is Volcano, The method by which a sample was collected is Coring, The free text description of the related URL is Smithsonian collections record for NMNH 115475-7 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Richfield, Sevier Co., Utah, United States (http://geonames.org/5545710) based on feature name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPLA2 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Campigliaite, The additional information about the specific place where the sample was collected is Richfield, The free text description of the related URL is Smithsonian collections record for NMNH 168740-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Richfield, Sevier Co., Utah, United States (http://geonames.org/5545710) based on feature name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPLA2 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Campigliaite, The additional information about the specific place where the sample was collected is Richfield, The free text description of the related URL is Smithsonian collections record for NMNH 168740-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The taxonomy informal classification of sample is tonalite/leucrogranite dike; contact, The name of institution, museum, or repository where the sample was originally stored is Dept of Geology, Colorado College, Colorado Springs, CO, 80903, United States, The method by which a sample was collected is Manual': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The taxonomy informal classification of sample is tonalite/leucrogranite dike; contact, The name of institution, museum, or repository where the sample was originally stored is Dept of Geology, Colorado College, Colorado Springs, CO, 80903, United States, The method by which a sample was collected is Manual.': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Individual Carabid identified and pinned, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Pinned Vouchers) (NEONCARC-PV), The taxonomy informal classification of sample is Pterostichus nivalis (R.F.Sahlberg, 1844), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'organic material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Individual Carabid identified and pinned, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Pinned Vouchers) (NEONCARC-PV), The taxonomy informal classification of sample is Pterostichus nivalis (R.F.Sahlberg, 1844), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'organic material',\n"," 'The country where the sample was collected is Nicaragua, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Not Applicable, The taxonomy informal classification of sample is lava, The type of the primary location is Volcano, The description of the platform for the cruise is Not Applicable, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Not Applicable': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Nicaragua, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Not Applicable, The taxonomy informal classification of sample is lava, The type of the primary location is Volcano, The description of the platform for the cruise is Not Applicable, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Not Applicable.': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Illinois, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is Chemical analysis, The name of institution, museum, or repository where the sample is currently stored is Northwestern University, The city where the sample was collected is Markham, The taxonomy informal classification of sample is Surface and near surface water, The type of the primary location is Pristine prairie, The additional information about the method by which a sample was collected is Plastic bailer, The method by which a sample was collected is Manual': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Illinois, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is Chemical analysis, The name of institution, museum, or repository where the sample is currently stored is Northwestern University, The city where the sample was collected is Markham, The taxonomy informal classification of sample is Surface and near surface water, The type of the primary location is Pristine prairie, The additional information about the method by which a sample was collected is Plastic bailer, The method by which a sample was collected is Manual.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Connecticut, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Augite, The additional information about the specific place where the sample was collected is Pound Ridge; Long Ridge Road, The free text description of the related URL is Smithsonian collections record for NMNH M1517-00 (MIN)': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Connecticut, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Augite, The additional information about the specific place where the sample was collected is Pound Ridge; Long Ridge Road, The free text description of the related URL is Smithsonian collections record for NMNH M1517-00 (MIN).': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Kansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Individual Carabid identified and pinned, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Pinned Vouchers) (NEONCARC-PV), The taxonomy informal classification of sample is Anisodactylus opaculus (LeConte, 1863), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'organic material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Kansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Individual Carabid identified and pinned, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Pinned Vouchers) (NEONCARC-PV), The taxonomy informal classification of sample is Anisodactylus opaculus (LeConte, 1863), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'organic material',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is England, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is The locality \"Cornwall and Isles of Scilly\" is widely used but does not appear to correspond with a current or historical administrative division; the two localities are officially separate entities. Manually mapped the approximate extent of the two entities using GEOLocate., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fluorite (var. chlorophane), The additional information about the specific place where the sample was collected is Cornwall and Isles Of Scilly, The free text description of the related URL is Smithsonian collections record for NMNH 103662-00 (MIN)': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is England, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is The locality \"Cornwall and Isles of Scilly\" is widely used but does not appear to correspond with a current or historical administrative division; the two localities are officially separate entities. Manually mapped the approximate extent of the two entities using GEOLocate., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fluorite (var. chlorophane), The additional information about the specific place where the sample was collected is Cornwall and Isles Of Scilly, The free text description of the related URL is Smithsonian collections record for NMNH 103662-00 (MIN).': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Mississippi, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Pontotoc Co., Mississippi, United States (http://geonames.org/4442070) based on district/county name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"Smokey Top School Tract\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (25 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Smokey Top School Tract, The taxonomy informal classification of sample is Bauxite, The free text description of the related URL is Smithsonian collections record for NMNH 92393-21 (PET)': 'sediment/natural solid material/residual material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Mississippi, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Pontotoc Co., Mississippi, United States (http://geonames.org/4442070) based on district/county name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"Smokey Top School Tract\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (25 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Smokey Top School Tract, The taxonomy informal classification of sample is Bauxite, The free text description of the related URL is Smithsonian collections record for NMNH 92393-21 (PET).': 'sediment/natural solid material/residual material/rock',\n"," 'The country where the sample was collected is Italy, The province where the sample was collected is Piedmont, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Lurisia, Provincia di Cuneo, Piedmont, Italy (http://geonames.org/3174432) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Roccaforte Di Mondovi (Near)) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Meta-autunite with autunite, The additional information about the specific place where the sample was collected is Roccaforte Di Mondovi (Near), Lurisia, The free text description of the related URL is Smithsonian collections record for NMNH 95974-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is Italy, The province where the sample was collected is Piedmont, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Lurisia, Provincia di Cuneo, Piedmont, Italy (http://geonames.org/3174432) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Roccaforte Di Mondovi (Near)) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Meta-autunite with autunite, The additional information about the specific place where the sample was collected is Roccaforte Di Mondovi (Near), Lurisia, The free text description of the related URL is Smithsonian collections record for NMNH 95974-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granodiorite, The free text description of the related URL is Smithsonian collections record for NMNH 116650-179 (PET)': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granodiorite, The free text description of the related URL is Smithsonian collections record for NMNH 116650-179 (PET).': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is NE Lau Basin, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is ROV, The free text to describe the collection purpose of the sample is research, The detailed description of the sample is S102-sulfide-25. Most of the rest of the whole chimlet we pulled off earlier as S102-sulfide-22 on B3., The name of institution, museum, or repository where the sample is currently stored is Christian Timm; Cornel de Ronde, The taxonomy informal classification of sample is sulfide, The type of the primary location is seamount, The additional information about the method by which a sample was collected is ROV manipulator, The method by which a sample was collected is Grab, The name or identifier of the field program (cruise or expedition), during which the sample was collected is FK171110': 'sediment/massive sulphide/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is NE Lau Basin, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is ROV, The free text to describe the collection purpose of the sample is research, The detailed description of the sample is S102-sulfide-25. Most of the rest of the whole chimlet we pulled off earlier as S102-sulfide-22 on B3., The name of institution, museum, or repository where the sample is currently stored is Christian Timm; Cornel de Ronde, The taxonomy informal classification of sample is sulfide, The type of the primary location is seamount, The additional information about the method by which a sample was collected is ROV manipulator, The method by which a sample was collected is Grab, The name or identifier of the field program (cruise or expedition), during which the sample was collected is FK171110.': 'sediment/massive sulphide/natural solid material/rock',\n"," 'The country where the sample was collected is Russia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Kamchatka Peninsula, Kamchatka, Russia (http://geonames.org/2125073) based on feature name and country using the situate.py script. Another place name mentioned in the EMu record (River Marekanka) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 50 km was assigned to all featureCode=PEN records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Obsidian with marekanite, The free text description of the related URL is Smithsonian collections record for NMNH 117451-11 (PET)': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is Russia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Kamchatka Peninsula, Kamchatka, Russia (http://geonames.org/2125073) based on feature name and country using the situate.py script. Another place name mentioned in the EMu record (River Marekanka) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 50 km was assigned to all featureCode=PEN records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Obsidian with marekanite, The free text description of the related URL is Smithsonian collections record for NMNH 117451-11 (PET).': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Philipsburg, Granite Co., Montana, United States (http://geonames.org/5671240) based on locality name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=PPLA2). Other place names mentioned in the EMu record (\"1 Mi Sse of Peak\" and \"9814 Ft High\") could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all featureCode=PPLA2 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rhyolite tuff, The additional information about the specific place where the sample was collected is 1 Mi Sse Of Peak 9814 Ft High; Philipsburg Quad, Sw Part Of, The free text description of the related URL is Smithsonian collections record for NMNH 88211-292 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Philipsburg, Granite Co., Montana, United States (http://geonames.org/5671240) based on locality name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=PPLA2). Other place names mentioned in the EMu record (\"1 Mi Sse of Peak\" and \"9814 Ft High\") could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all featureCode=PPLA2 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rhyolite tuff, The additional information about the specific place where the sample was collected is 1 Mi Sse Of Peak 9814 Ft High; Philipsburg Quad, Sw Part Of, The free text description of the related URL is Smithsonian collections record for NMNH 88211-292 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Colombia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene > Miocene, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Serpentinite with carbonate, The additional information about the specific place where the sample was collected is Guajira Peninsula; Carpintero; Cabo de la Vela area, The free text description of the related URL is Smithsonian collections record for NMNH 117697-65 (PET)': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is Colombia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene > Miocene, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Serpentinite with carbonate, The additional information about the specific place where the sample was collected is Guajira Peninsula; Carpintero; Cabo de la Vela area, The free text description of the related URL is Smithsonian collections record for NMNH 117697-65 (PET).': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is Guinea, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Fenaria, The taxonomy informal classification of sample is Kimberlite with xenolith and granulite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Fenaria Kimberlite, The additional information about the specific place where the sample was collected is Se Guinea, The free text description of the related URL is Smithsonian collections record for NMNH 117132-9 (PET)': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," 'The country where the sample was collected is Guinea, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Fenaria, The taxonomy informal classification of sample is Kimberlite with xenolith and granulite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Fenaria Kimberlite, The additional information about the specific place where the sample was collected is Se Guinea, The free text description of the related URL is Smithsonian collections record for NMNH 117132-9 (PET).': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Colima-Jalisco border, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 117593-119 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Colima-Jalisco border, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 117593-119 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Arenal Volcano, Alajuela, Costa Rica (http://geonames.org/3624862) based on volcano name and country using the situate.py script. Another place name mentioned in the EMu record (Quarry) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all volcano records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basaltic tephra with olivine and plagioclase, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Quarry, The free text description of the related URL is Smithsonian collections record for NMNH 116233-17 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Arenal Volcano, Alajuela, Costa Rica (http://geonames.org/3624862) based on volcano name and country using the situate.py script. Another place name mentioned in the EMu record (Quarry) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all volcano records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basaltic tephra with olivine and plagioclase, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Quarry, The free text description of the related URL is Smithsonian collections record for NMNH 116233-17 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Hualālai Mountain, Hawaii Co., Hawaii, United States (http://geonames.org/5856403) based on volcano name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Hawaii) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (10 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ultramafic nodule, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 114384-26 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Hualālai Mountain, Hawaii Co., Hawaii, United States (http://geonames.org/5856403) based on volcano name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Hawaii) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (10 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ultramafic nodule, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 114384-26 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n"," 'The country where the sample was collected is Colombia, The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The detailed description of the sample is sandstone, The name of institution, museum, or repository where the sample was originally stored is University of Texas - Austin, The method by which a sample was collected is Grab': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is Colombia, The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The detailed description of the sample is sandstone, The name of institution, museum, or repository where the sample was originally stored is University of Texas - Austin, The method by which a sample was collected is Grab.': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Marquette iron range, Marquette Co., Michigan, USA. URL: https://www.mindat.org/loc-125421.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Siltstone, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Siamo, The additional information about the specific place where the sample was collected is Locality Key: Siamo, The free text description of the related URL is Smithsonian collections record for NMNH 113552-311 (PET)': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Marquette iron range, Marquette Co., Michigan, USA. URL: https://www.mindat.org/loc-125421.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Siltstone, The type of the primary location is Mining District, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Siamo, The additional information about the specific place where the sample was collected is Locality Key: Siamo, The free text description of the related URL is Smithsonian collections record for NMNH 113552-311 (PET).': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Warrick, The taxonomy informal classification of sample is Xenolith with spinel dunite and syenite, The additional information about the specific place where the sample was collected is Warrick Creek, 1 km SE of Warrick; SE 25, T27N, R16E, The free text description of the related URL is Smithsonian collections record for NMNH 117295-339 (PET)': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Warrick, The taxonomy informal classification of sample is Xenolith with spinel dunite and syenite, The additional information about the specific place where the sample was collected is Warrick Creek, 1 km SE of Warrick; SE 25, T27N, R16E, The free text description of the related URL is Smithsonian collections record for NMNH 117295-339 (PET).': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Mariana Trough back-arc basin, The free text description of the related URL is Smithsonian collections record for NMNH 117357-44 (PET)': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Mariana Trough back-arc basin, The free text description of the related URL is Smithsonian collections record for NMNH 117357-44 (PET).': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Glacier Creek Preserve, The country where the sample was collected is United States, The province where the sample was collected is Nebraska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is geochemistry, The detailed description of the sample is streamwater, The city where the sample was collected is Bennington, The taxonomy informal classification of sample is Glacier Creek Preserve Stream, The type of the primary location is ridge, The additional information about the method by which a sample was collected is grab sample, The method by which a sample was collected is manual': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Glacier Creek Preserve, The country where the sample was collected is United States, The province where the sample was collected is Nebraska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is geochemistry, The detailed description of the sample is streamwater, The city where the sample was collected is Bennington, The taxonomy informal classification of sample is Glacier Creek Preserve Stream, The type of the primary location is ridge, The additional information about the method by which a sample was collected is grab sample, The method by which a sample was collected is manual.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is Romania, The province where the sample was collected is Transilvania Region, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sylvanite, The additional information about the specific place where the sample was collected is Baia-De-Aries, The free text description of the related URL is Smithsonian collections record for NMNH C700-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Romania, The province where the sample was collected is Transilvania Region, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Sylvanite, The additional information about the specific place where the sample was collected is Baia-De-Aries, The free text description of the related URL is Smithsonian collections record for NMNH C700-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Czech Republic, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The city where the sample was collected is Moravia, The taxonomy informal classification of sample is tuff, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Brezina Formation': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is Czech Republic, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The city where the sample was collected is Moravia, The taxonomy informal classification of sample is tuff, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Brezina Formation.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The free text description of the related URL is Smithsonian collections record for NMNH 113157-6 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The free text description of the related URL is Smithsonian collections record for NMNH 113157-6 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Northern Mariana Islands, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleogene > Eocene, The free text description of the location is Matched to the GeoNames record for Saipan, Saipan, Northern Mariana Islands (http://geonames.org/4041550) based on island name and country using the situate.py script. Other place names mentioned in the EMu record (Hagman, Ne-Central Saipan, and Near Summit of Water Tank Hill) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hypersthene andesite with augite and hypersthene, The type of the primary location is Island, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Hagman, The additional information about the specific place where the sample was collected is Locality Key: Hagman; Near Summit Of Water Tank Hill, Ne-central Saipan, The free text description of the related URL is Smithsonian collections record for NMNH 108982-37 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Northern Mariana Islands, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleogene > Eocene, The free text description of the location is Matched to the GeoNames record for Saipan, Saipan, Northern Mariana Islands (http://geonames.org/4041550) based on island name and country using the situate.py script. Other place names mentioned in the EMu record (Hagman, Ne-Central Saipan, and Near Summit of Water Tank Hill) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hypersthene andesite with augite and hypersthene, The type of the primary location is Island, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Hagman, The additional information about the specific place where the sample was collected is Locality Key: Hagman; Near Summit Of Water Tank Hill, Ne-central Saipan, The free text description of the related URL is Smithsonian collections record for NMNH 108982-37 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Switzerland, The province where the sample was collected is Valais, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Feld, Goms, Valais, Switzerland (http://geonames.org/2660810) based on feature name, state/province, and country using the situate.py script. The script determined that Feld (featureCode=PPL) is related to Binnatal (featureCode=VAL), another feature mentioned in this record. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Baryte, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Imfeld (Near), Binnental, The free text description of the related URL is Smithsonian collections record for NMNH B11399-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Switzerland, The province where the sample was collected is Valais, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Feld, Goms, Valais, Switzerland (http://geonames.org/2660810) based on feature name, state/province, and country using the situate.py script. The script determined that Feld (featureCode=PPL) is related to Binnatal (featureCode=VAL), another feature mentioned in this record. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Baryte, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Imfeld (Near), Binnental, The free text description of the related URL is Smithsonian collections record for NMNH B11399-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt with basalt glass, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Lower S Side Of Central Part Of The Blanco Trough, The free text description of the related URL is Smithsonian collections record for NMNH 111232-159 (PET)': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt with basalt glass, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Lower S Side Of Central Part Of The Blanco Trough, The free text description of the related URL is Smithsonian collections record for NMNH 111232-159 (PET).': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is leucogabbro ~2 m above R6F; 3 pieces (1 is stained), The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Leucogabbro, The free text description of the related URL is None': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is leucogabbro ~2 m above R6F; 3 pieces (1 is stained), The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Leucogabbro, The free text description of the related URL is None.': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arkansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Big Rock Quarry (Deposit ID: 10162905) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is North Little Rock Quadrangle, The free text description of the related URL is Smithsonian collections record for NMNH 127016-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arkansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Big Rock Quarry (Deposit ID: 10162905) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is North Little Rock Quadrangle, The free text description of the related URL is Smithsonian collections record for NMNH 127016-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Oklahoma, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Wichita Mountains, Comanche Co., Oklahoma, United States (http://geonames.org/4555406) based on feature name, state/province, and country using the situate.py script. Matching Wichita Mts required using a wildcard search. Another place name mentioned in the EMu record (Soldiers Mt) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende syenite with hornblende, The additional information about the specific place where the sample was collected is Locality Key: Wichita Mts, Soldiers Mt, The free text description of the related URL is Smithsonian collections record for NMNH 2064 (PET)': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Oklahoma, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Wichita Mountains, Comanche Co., Oklahoma, United States (http://geonames.org/4555406) based on feature name, state/province, and country using the situate.py script. Matching Wichita Mts required using a wildcard search. Another place name mentioned in the EMu record (Soldiers Mt) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende syenite with hornblende, The additional information about the specific place where the sample was collected is Locality Key: Wichita Mts, Soldiers Mt, The free text description of the related URL is Smithsonian collections record for NMNH 2064 (PET).': 'phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock',\n"," 'The country where the sample was collected is Nicaragua, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Not Applicable, The taxonomy informal classification of sample is dike, The type of the primary location is Volcano, The description of the platform for the cruise is Not Applicable, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Not Applicable': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Nicaragua, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Not Applicable, The taxonomy informal classification of sample is dike, The type of the primary location is Volcano, The description of the platform for the cruise is Not Applicable, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Not Applicable.': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Two results for a Tyrell Mine in Jackson Co., OR were found searching the USGS MRDS. Tyrell Mine (Deposit ID 10128740), and Tyrell Mine (Deposit ID 10032341). Tyrrell may be a misspelling of Tyrell. could not find any results for a Tyrrell. URL: https://mrdata.usgs.gov/mrds/show-mrds.php?dep_id=10128740., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt with manganese ore, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Locality Key: Lake Creek, The free text description of the related URL is Smithsonian collections record for NMNH 92068-31 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Two results for a Tyrell Mine in Jackson Co., OR were found searching the USGS MRDS. Tyrell Mine (Deposit ID 10128740), and Tyrell Mine (Deposit ID 10032341). Tyrrell may be a misspelling of Tyrell. could not find any results for a Tyrrell. URL: https://mrdata.usgs.gov/mrds/show-mrds.php?dep_id=10128740., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt with manganese ore, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Locality Key: Lake Creek, The free text description of the related URL is Smithsonian collections record for NMNH 92068-31 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Jemez Mountains, New Mexico, United States, including http://geonames.org/5473293 (featureCode=MTS), http://geonames.org/5473294 (featureCode=HSTS), http://geonames.org/5473295 (featureCode=HSTS), http://geonames.org/5473296 (featureCode=LNDF), and http://geonames.org/9884974 (featureCode=MTS). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing all 5 localities. Matching Jemez Mountains required using a wildcard search. Other place names mentioned in the EMu record (\"1/4 mi. SE BM 8475\" and \"E of Boyd Ranch\") could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The additional information about the specific place where the sample was collected is 1/4 mi. SE BM 8475, E of Boyd Ranch, The free text description of the related URL is Smithsonian collections record for NMNH 117226-408 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Jemez Mountains, New Mexico, United States, including http://geonames.org/5473293 (featureCode=MTS), http://geonames.org/5473294 (featureCode=HSTS), http://geonames.org/5473295 (featureCode=HSTS), http://geonames.org/5473296 (featureCode=LNDF), and http://geonames.org/9884974 (featureCode=MTS). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing all 5 localities. Matching Jemez Mountains required using a wildcard search. Other place names mentioned in the EMu record (\"1/4 mi. SE BM 8475\" and \"E of Boyd Ranch\") could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The additional information about the specific place where the sample was collected is 1/4 mi. SE BM 8475, E of Boyd Ranch, The free text description of the related URL is Smithsonian collections record for NMNH 117226-408 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is College of Charleston (CofC), The type of the primary location is Seamount, The name or identifier of the field program (cruise or expedition), during which the sample was collected is R.V. Thompson TT 080 (1973)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is College of Charleston (CofC), The type of the primary location is Seamount, The name or identifier of the field program (cruise or expedition), during which the sample was collected is R.V. Thompson TT 080 (1973).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The free text description of the related URL is Smithsonian collections record for NMNH 117609-22 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The free text description of the related URL is Smithsonian collections record for NMNH 117609-22 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Slate River, The country where the sample was collected is United States, The province where the sample was collected is CO, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The free text to describe the collection purpose of the sample is metal and anion analysis, The detailed description of the sample is filtered at 0.6um, anaerobic, stored in glass, The name of institution, museum, or repository where the sample is currently stored is SLAC/SSRL, The city where the sample was collected is Crested Butte, The free text to add any comments pertaining to the sample is Sample name formatted [month.day.year site, depth of sample collection (cm), filter apperture (um)], The taxonomy informal classification of sample is Pore water, The type of the primary location is floodplain, The additional information about the method by which a sample was collected is 60 mL of vacuum created with syringe and stopcock, attched to hose barb on well tip. First 5-10 mL purged/discarded, The method by which a sample was collected is Suction>60mLSyringe, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC SFA': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Slate River, The country where the sample was collected is United States, The province where the sample was collected is CO, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Liquid, The free text to describe the collection purpose of the sample is metal and anion analysis, The detailed description of the sample is filtered at 0.6um, anaerobic, stored in glass, The name of institution, museum, or repository where the sample is currently stored is SLAC/SSRL, The city where the sample was collected is Crested Butte, The free text to add any comments pertaining to the sample is Sample name formatted [month.day.year site, depth of sample collection (cm), filter apperture (um)], The taxonomy informal classification of sample is Pore water, The type of the primary location is floodplain, The additional information about the method by which a sample was collected is 60 mL of vacuum created with syringe and stopcock, attched to hose barb on well tip. First 5-10 mL purged/discarded, The method by which a sample was collected is Suction>60mLSyringe, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC SFA.': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The type of the primary location is Island, The name or identifier of the field program (cruise or expedition), during which the sample was collected is EX - Japanese Mandated Islands, The free text description of the related URL is Smithsonian collections record for NMNH 116180-49 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff, The type of the primary location is Island, The name or identifier of the field program (cruise or expedition), during which the sample was collected is EX - Japanese Mandated Islands, The free text description of the related URL is Smithsonian collections record for NMNH 116180-49 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is Scotland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Island of Mull, Argyll and Bute, Scotland, United Kingdom (http://geonames.org/2641992) based on island name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Mull I, The taxonomy informal classification of sample is Allivalite, The type of the primary location is Island, The free text description of the related URL is Smithsonian collections record for NMNH 92086-18 (PET)': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is Scotland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Island of Mull, Argyll and Bute, Scotland, United Kingdom (http://geonames.org/2641992) based on island name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Mull I, The taxonomy informal classification of sample is Allivalite, The type of the primary location is Island, The free text description of the related URL is Smithsonian collections record for NMNH 92086-18 (PET).': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Kilbourne Hole, Doña Ana Co., New Mexico, United States, including http://geonames.org/5474409 and http://geonames.org/5474410. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt nodule, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Kilbourne Hole, The free text description of the related URL is Smithsonian collections record for NMNH 114026-38 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Kilbourne Hole, Doña Ana Co., New Mexico, United States, including http://geonames.org/5474409 and http://geonames.org/5474410. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt nodule, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Kilbourne Hole, The free text description of the related URL is Smithsonian collections record for NMNH 114026-38 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Virginia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Cold Springs Pit, Augusta Co., Virginia, USA. Entering the coordinates for this locality into Google Maps places this locality near the town of Greenville, VA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-102973.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Greenville, The taxonomy informal classification of sample is Clay, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is bottom of mine in cut connecting N and S pits; E of Greenville, The free text description of the related URL is Smithsonian collections record for NMNH 116317-23 (PET)': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Virginia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Cold Springs Pit, Augusta Co., Virginia, USA. Entering the coordinates for this locality into Google Maps places this locality near the town of Greenville, VA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-102973.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Greenville, The taxonomy informal classification of sample is Clay, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is bottom of mine in cut connecting N and S pits; E of Greenville, The free text description of the related URL is Smithsonian collections record for NMNH 116317-23 (PET).': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Pinned on collector's map, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornfels, The free text description of the related URL is Smithsonian collections record for NMNH 118308-475 (PET)\": 'sediment/natural solid material/metamorphic rock/rock',\n"," \"The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Pinned on collector's map, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornfels, The free text description of the related URL is Smithsonian collections record for NMNH 118308-475 (PET).\": 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is China, The province where the sample was collected is Inner Mongolia Autonomous Region, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Baotou City, The taxonomy informal classification of sample is Ore, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Bayan Obo ore deposit: Southern portion of East Ore Body, The free text description of the related URL is Smithsonian collections record for NMNH 118223-241 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is China, The province where the sample was collected is Inner Mongolia Autonomous Region, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Baotou City, The taxonomy informal classification of sample is Ore, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Bayan Obo ore deposit: Southern portion of East Ore Body, The free text description of the related URL is Smithsonian collections record for NMNH 118223-241 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Washington, D.C., United States (http://geonames.org/4138106) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Shale, The additional information about the specific place where the sample was collected is Locality Key: Tacoma Quad, The free text description of the related URL is Smithsonian collections record for NMNH 88102-11 (PET)': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Washington, D.C., United States (http://geonames.org/4138106) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Shale, The additional information about the specific place where the sample was collected is Locality Key: Tacoma Quad, The free text description of the related URL is Smithsonian collections record for NMNH 88102-11 (PET).': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Alaska, Co., Alaska, United States (http://geonames.org/5879092) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all state/province records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz with epidote, The free text description of the related URL is Smithsonian collections record for NMNH 155029-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Alaska, Co., Alaska, United States (http://geonames.org/5879092) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all state/province records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz with epidote, The free text description of the related URL is Smithsonian collections record for NMNH 155029-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for New Mexico, United States (http://geonames.org/5481136) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-monzonite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Locality Key: Tyrone, The free text description of the related URL is Smithsonian collections record for NMNH 107618-9 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for New Mexico, United States (http://geonames.org/5481136) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-monzonite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Locality Key: Tyrone, The free text description of the related URL is Smithsonian collections record for NMNH 107618-9 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NOAA - Metallogenesis, The free text description of the related URL is Smithsonian collections record for NMNH 115291-1 (PET)': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt glass, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NOAA - Metallogenesis, The free text description of the related URL is Smithsonian collections record for NMNH 115291-1 (PET).': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is Norway, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Chromite with siderochromite, The additional information about the specific place where the sample was collected is Droutheim, The free text description of the related URL is Smithsonian collections record for NMNH R1995-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Norway, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Chromite with siderochromite, The additional information about the specific place where the sample was collected is Droutheim, The free text description of the related URL is Smithsonian collections record for NMNH R1995-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 4 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 4 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Tennessee, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Subsample of Carabid adults collected and sent to archive facility, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Archive Pooling) (NEONCARC-AP), The taxonomy informal classification of sample is Dicaelus dilatatus sinuatus Ball, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'organic material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Tennessee, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Subsample of Carabid adults collected and sent to archive facility, The name of institution, museum, or repository where the sample is currently stored is Carabid Collection (Archive Pooling) (NEONCARC-AP), The taxonomy informal classification of sample is Dicaelus dilatatus sinuatus Ball, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'organic material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Texas, The object type of sample indicates that this sample is long cylindrical cores, The free text to describe the collection purpose of the sample is Oil and gas exploration, The detailed description of the sample is This is a sample of type SLABBED CORE from an oil or gas well., The name of institution, museum, or repository where the sample is currently stored is Bureau of Economic Geology Midland Core Research Center, The type of the primary location is Oil or Gas Field, The method by which a sample was collected is Coring': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Texas, The object type of sample indicates that this sample is long cylindrical cores, The free text to describe the collection purpose of the sample is Oil and gas exploration, The detailed description of the sample is This is a sample of type SLABBED CORE from an oil or gas well., The name of institution, museum, or repository where the sample is currently stored is Bureau of Economic Geology Midland Core Research Center, The type of the primary location is Oil or Gas Field, The method by which a sample was collected is Coring.': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Utah, United States (http://geonames.org/5549030) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Chlorargyrite, The additional information about the specific place where the sample was collected is Locality Key: Stockton-Fairfield Quad, The free text description of the related URL is Smithsonian collections record for NMNH 115503-76 (PET)': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Utah, United States (http://geonames.org/5549030) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Chlorargyrite, The additional information about the specific place where the sample was collected is Locality Key: Stockton-Fairfield Quad, The free text description of the related URL is Smithsonian collections record for NMNH 115503-76 (PET).': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is Indonesia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basanite with olivine, The type of the primary location is Island, The additional information about the specific place where the sample was collected is Shore .5 Mi W Of Tandjoeng Anjer, The free text description of the related URL is Smithsonian collections record for NMNH 100511 (PET)': 'tephritoid/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Indonesia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basanite with olivine, The type of the primary location is Island, The additional information about the specific place where the sample was collected is Shore .5 Mi W Of Tandjoeng Anjer, The free text description of the related URL is Smithsonian collections record for NMNH 100511 (PET).': 'tephritoid/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Mine La Motte, The country where the sample was collected is United States, The province where the sample was collected is Missouri, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Linnaeite; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Viburnum Trend, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The name of the specific place where the sample was collected is Mine La Motte, The country where the sample was collected is United States, The province where the sample was collected is Missouri, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Linnaeite; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Viburnum Trend, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Bolivia, The province where the sample was collected is Potosi, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Chorolque, Potosí, Bolivia (http://geonames.org/3920403) based on locality name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Bismuth with bismite, The additional information about the specific place where the sample was collected is Chorolque (Mtn.), The free text description of the related URL is Smithsonian collections record for NMNH 94992-00 (MIN)': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is Bolivia, The province where the sample was collected is Potosi, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Chorolque, Potosí, Bolivia (http://geonames.org/3920403) based on locality name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Bismuth with bismite, The additional information about the specific place where the sample was collected is Chorolque (Mtn.), The free text description of the related URL is Smithsonian collections record for NMNH 94992-00 (MIN).': 'natural solid material/mineral/mineral-native element',\n"," 'The name of the specific place where the sample was collected is southern Midland Basin, The country where the sample was collected is United States, The province where the sample was collected is TX, The object type of sample indicates that this sample is material occurring between unambiguous [as curated] breaks in recovery, The age of a sample as described by the stratigraphic era, period, state, etc. is early Permian, The free text description of the location is R. Ricker 1 well, The free text to describe the collection purpose of the sample is fossil ID, The detailed description of the sample is core sample, The name of institution, museum, or repository where the sample is currently stored is Nonvertebrate Paleontology Lab, UT at Austin, The free text to add any comments pertaining to the sample is Elevation of kelly bushing = 2683 ft, The taxonomy informal classification of sample is mudrock, The additional information about the method by which a sample was collected is broken end of core sawed off, A body of rock established as a distinct entity in the classification of the Earth’s rocks is lower Leonard, The name of institution, museum, or repository where the sample was originally stored is Core Research Center, BEG, UT at Austin, The method by which a sample was collected is manual': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is southern Midland Basin, The country where the sample was collected is United States, The province where the sample was collected is TX, The object type of sample indicates that this sample is material occurring between unambiguous [as curated] breaks in recovery, The age of a sample as described by the stratigraphic era, period, state, etc. is early Permian, The free text description of the location is R. Ricker 1 well, The free text to describe the collection purpose of the sample is fossil ID, The detailed description of the sample is core sample, The name of institution, museum, or repository where the sample is currently stored is Nonvertebrate Paleontology Lab, UT at Austin, The free text to add any comments pertaining to the sample is Elevation of kelly bushing = 2683 ft, The taxonomy informal classification of sample is mudrock, The additional information about the method by which a sample was collected is broken end of core sawed off, A body of rock established as a distinct entity in the classification of the Earth’s rocks is lower Leonard, The name of institution, museum, or repository where the sample was originally stored is Core Research Center, BEG, UT at Austin, The method by which a sample was collected is manual.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Blue Jay Mine, Jamestown, Jamestown District, Boulder Co., Colorado, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-8757.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fluorine ore with fluorite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Locality Key: Jamestown, The free text description of the related URL is Smithsonian collections record for NMNH 98249-4 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Blue Jay Mine, Jamestown, Jamestown District, Boulder Co., Colorado, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-8757.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fluorine ore with fluorite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Locality Key: Jamestown, The free text description of the related URL is Smithsonian collections record for NMNH 98249-4 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The object type of sample indicates that this sample is a group of rocks collected by dragging a dredge along the seafloor, The detailed description of the sample is [rounded, equigranular, medium grained granite/granodiorite], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granite, The method by which a sample was collected is Otter Trawl, The free text description of the related URL is None': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a group of rocks collected by dragging a dredge along the seafloor, The detailed description of the sample is [rounded, equigranular, medium grained granite/granodiorite], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granite, The method by which a sample was collected is Otter Trawl, The free text description of the related URL is None.': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Meissen, Saxony, Germany (http://geonames.org/2872155) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Elbe and Elbe River) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The additional information about the specific place where the sample was collected is Meissen, East Germany, Saxony, East Germany. 2.5 Km Below Meissen On Left Bank Of Elbe., The free text description of the related URL is Smithsonian collections record for NMNH 114511 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Meissen, Saxony, Germany (http://geonames.org/2872155) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Elbe and Elbe River) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The additional information about the specific place where the sample was collected is Meissen, East Germany, Saxony, East Germany. 2.5 Km Below Meissen On Left Bank Of Elbe., The free text description of the related URL is Smithsonian collections record for NMNH 114511 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is Western mound, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is ROV, The free text description of the location is Hydrothemal vent field, The free text to describe the collection purpose of the sample is Petrophysical analysis, The detailed description of the sample is Porous. Heterogeneous, The name of institution, museum, or repository where the sample is currently stored is NTNU, The taxonomy informal classification of sample is Hydrothermal, The type of the primary location is hydrothermal vent, The additional information about the method by which a sample was collected is 1 large boulder parted in smaller pieces, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is MARMINE': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The name of the specific place where the sample was collected is Western mound, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is ROV, The free text description of the location is Hydrothemal vent field, The free text to describe the collection purpose of the sample is Petrophysical analysis, The detailed description of the sample is Porous. Heterogeneous, The name of institution, museum, or repository where the sample is currently stored is NTNU, The taxonomy informal classification of sample is Hydrothermal, The type of the primary location is hydrothermal vent, The additional information about the method by which a sample was collected is 1 large boulder parted in smaller pieces, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is MARMINE.': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Vanadinite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH M32824-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Vanadinite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH M32824-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is [sandstone clasts, light-brown and gray, rounded to sub rounded sand-sized grains, grain dominated. Slightly friable] Sample of first debris flow [above] fine grained hard sand., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The method by which a sample was collected is Manual, The free text description of the related URL is None': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is [sandstone clasts, light-brown and gray, rounded to sub rounded sand-sized grains, grain dominated. Slightly friable] Sample of first debris flow [above] fine grained hard sand., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The method by which a sample was collected is Manual, The free text description of the related URL is None.': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Guanajuato, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cassiterite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Santa Catarina (Near), Cerro De Las Fajas, The free text description of the related URL is Smithsonian collections record for NMNH R8028-46 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Guanajuato, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Cassiterite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Santa Catarina (Near), Cerro De Las Fajas, The free text description of the related URL is Smithsonian collections record for NMNH R8028-46 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Leadville, Lake Co., Colorado, United States (http://geonames.org/5428184) based on municipality name, district/county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Dome Incline) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Leadville, The taxonomy informal classification of sample is Chert, The additional information about the specific place where the sample was collected is Dome Incline, The free text description of the related URL is Smithsonian collections record for NMNH 66013 (PET)': 'sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Leadville, Lake Co., Colorado, United States (http://geonames.org/5428184) based on municipality name, district/county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Dome Incline) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Leadville, The taxonomy informal classification of sample is Chert, The additional information about the specific place where the sample was collected is Dome Incline, The free text description of the related URL is Smithsonian collections record for NMNH 66013 (PET).': 'sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Rosita, Custer Co., Colorado, United States (n=2) and Game Ridge, Custer Co., Colorado, United States (n=1). The coordinates and error radius given here describe a circle encompassing the combination of sites matching both names with the smallest maximum distance between them (~1.3 km). Another place name mentioned in the EMu record (Silver Cliff Region) could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Rosita, The taxonomy informal classification of sample is Trachyte with feldspar and hornblende, The additional information about the specific place where the sample was collected is Silver Cliff Region; Near Rosita, The free text description of the related URL is Smithsonian collections record for NMNH 70605-2 (PET)': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Rosita, Custer Co., Colorado, United States (n=2) and Game Ridge, Custer Co., Colorado, United States (n=1). The coordinates and error radius given here describe a circle encompassing the combination of sites matching both names with the smallest maximum distance between them (~1.3 km). Another place name mentioned in the EMu record (Silver Cliff Region) could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Rosita, The taxonomy informal classification of sample is Trachyte with feldspar and hornblende, The additional information about the specific place where the sample was collected is Silver Cliff Region; Near Rosita, The free text description of the related URL is Smithsonian collections record for NMNH 70605-2 (PET).': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Victoria, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Camperdown, Corangamite, Victoria, Australia (http://geonames.org/2172562) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=PPL). Other place names mentioned in the EMu record (\"Stop 14-1 N Shoreline of Lake\", Lake Bulienmerri, Lake Bullenmerri/Lake Gnotuk Maar Complex, Locality 14, and Newer Volcanics Province) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Camperdown, The taxonomy informal classification of sample is Xenolith with kaersutite and lherzolite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Locality 14, Stop 14-1 N Shoreline Of Lake; Near Camperdown, The free text description of the related URL is Smithsonian collections record for NMNH 116605-16 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Victoria, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Camperdown, Corangamite, Victoria, Australia (http://geonames.org/2172562) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=PPL). Other place names mentioned in the EMu record (\"Stop 14-1 N Shoreline of Lake\", Lake Bulienmerri, Lake Bullenmerri/Lake Gnotuk Maar Complex, Locality 14, and Newer Volcanics Province) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Camperdown, The taxonomy informal classification of sample is Xenolith with kaersutite and lherzolite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Locality 14, Stop 14-1 N Shoreline Of Lake; Near Camperdown, The free text description of the related URL is Smithsonian collections record for NMNH 116605-16 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is Lapilli tuff, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Tuff, The free text description of the related URL is None': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is Lapilli tuff, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Tuff, The free text description of the related URL is None.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 4 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 4 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is British Columbia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Sullivan Mine (MRDS ID: W018870, Deposit ID: 10073061) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Kimberley, The taxonomy informal classification of sample is Laminated chert with schorl, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 108892-9 (PET)': 'sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is British Columbia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Sullivan Mine (MRDS ID: W018870, Deposit ID: 10073061) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Kimberley, The taxonomy informal classification of sample is Laminated chert with schorl, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 108892-9 (PET).': 'sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Germany, The province where the sample was collected is Saxony, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Altenberg, Saxony, Germany (http://geonames.org/6550825) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM4). This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 15 km was assigned to all featureCode=ADM4 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Prosopite with hematite, The additional information about the specific place where the sample was collected is Altenberg, The free text description of the related URL is Smithsonian collections record for NMNH B8553-00 (MIN)': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is Germany, The province where the sample was collected is Saxony, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Altenberg, Saxony, Germany (http://geonames.org/6550825) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM4). This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 15 km was assigned to all featureCode=ADM4 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Prosopite with hematite, The additional information about the specific place where the sample was collected is Altenberg, The free text description of the related URL is Smithsonian collections record for NMNH B8553-00 (MIN).': 'natural solid material/mineral-halide/mineral',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite with brown hornblende and serpentine, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Nw Slope, Sited By Radar, The free text description of the related URL is Smithsonian collections record for NMNH 110755-229 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite with brown hornblende and serpentine, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Nw Slope, Sited By Radar, The free text description of the related URL is Smithsonian collections record for NMNH 110755-229 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The name of the specific place where the sample was collected is Rabbit Mountain, The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Environmental Science, University of Arizona, The taxonomy informal classification of sample is 2011 Burned ZOB Pedon 5, The type of the primary location is Resurgent dome, The additional information about the method by which a sample was collected is Prenart Super Quartz suction cups soil water sampler., The method by which a sample was collected is Prenart , The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO)': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Rabbit Mountain, The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Environmental Science, University of Arizona, The taxonomy informal classification of sample is 2011 Burned ZOB Pedon 5, The type of the primary location is Resurgent dome, The additional information about the method by which a sample was collected is Prenart Super Quartz suction cups soil water sampler., The method by which a sample was collected is Prenart , The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO).': 'liquid water/fluid material',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Granitoid from low in cliff., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granodiorite, The free text description of the related URL is None': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Granitoid from low in cliff., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granodiorite, The free text description of the related URL is None.': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Turquoise; Variety: coeruleolacitite; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Turquoise; Variety: coeruleolacitite; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Braunschweig, Lower Saxony, Germany (http://geonames.org/6552297) based on feature name and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM4). Another place name mentioned in the EMu record (Grasleben Quarry) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 15 km was assigned to all featureCode=ADM4 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Halite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Braunschweig, The free text description of the related URL is Smithsonian collections record for NMNH 159575-00 (MIN)': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Braunschweig, Lower Saxony, Germany (http://geonames.org/6552297) based on feature name and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=ADM4). Another place name mentioned in the EMu record (Grasleben Quarry) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 15 km was assigned to all featureCode=ADM4 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Halite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Braunschweig, The free text description of the related URL is Smithsonian collections record for NMNH 159575-00 (MIN).': 'natural solid material/mineral-halide/mineral',\n"," 'The name of the specific place where the sample was collected is Komati Fm, The country where the sample was collected is South Africa, The province where the sample was collected is Univ Witwatersrand, The object type of sample indicates that this sample is long cylindrical cores, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleoarchaean, The free text description of the location is Tjakastad, Barberton Greenstone Belt, The free text to describe the collection purpose of the sample is UW: Wilson - petrology, geochemistry, The detailed description of the sample is harrisite, The name of institution, museum, or repository where the sample is currently stored is University of Johannesburg (UJ), The free text to add any comments pertaining to the sample is BARB 1 ref 6, The taxonomy informal classification of sample is harrisite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Komati Formation, The additional information about the specific place where the sample was collected is BARB1, The method by which a sample was collected is Coring': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is Komati Fm, The country where the sample was collected is South Africa, The province where the sample was collected is Univ Witwatersrand, The object type of sample indicates that this sample is long cylindrical cores, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleoarchaean, The free text description of the location is Tjakastad, Barberton Greenstone Belt, The free text to describe the collection purpose of the sample is UW: Wilson - petrology, geochemistry, The detailed description of the sample is harrisite, The name of institution, museum, or repository where the sample is currently stored is University of Johannesburg (UJ), The free text to add any comments pertaining to the sample is BARB 1 ref 6, The taxonomy informal classification of sample is harrisite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Komati Formation, The additional information about the specific place where the sample was collected is BARB1, The method by which a sample was collected is Coring.': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is SHIP, The free text description of the location is Across PTDZ wall in the nodal basin, The detailed description of the sample is protogranular peridotite, The name of institution, museum, or repository where the sample is currently stored is Litoteque de Institut Universitaire Européen de la Mer, 29280 Plouzané France, The taxonomy informal classification of sample is peridotite, The type of the primary location is Eastern RTI Romanche FZ, The additional information about the method by which a sample was collected is Nautile dive, The method by which a sample was collected is submersible, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SMARTIES': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is SHIP, The free text description of the location is Across PTDZ wall in the nodal basin, The detailed description of the sample is protogranular peridotite, The name of institution, museum, or repository where the sample is currently stored is Litoteque de Institut Universitaire Européen de la Mer, 29280 Plouzané France, The taxonomy informal classification of sample is peridotite, The type of the primary location is Eastern RTI Romanche FZ, The additional information about the method by which a sample was collected is Nautile dive, The method by which a sample was collected is submersible, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SMARTIES.': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality St Helens-Mt, WA, United States, including http://geonames.org/5804107 (featureCode=?), http://geonames.org/5804124 (featureCode=?), and http://geonames.org/5809218 (featureCode=MT). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing all 3 localities. Matching St Helens-Mt required using a wildcard search. Other place names mentioned in the EMu record (12 and Headwaters of Toutle River Creek and Avalanche) could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dacite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is E Side Of Pumice Pond. June 12 Headwaters Of Toutle River Creek And Avalanche, The free text description of the related URL is Smithsonian collections record for NMNH 115418-25 (PET)': 'sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality St Helens-Mt, WA, United States, including http://geonames.org/5804107 (featureCode=?), http://geonames.org/5804124 (featureCode=?), and http://geonames.org/5809218 (featureCode=MT). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing all 3 localities. Matching St Helens-Mt required using a wildcard search. Other place names mentioned in the EMu record (12 and Headwaters of Toutle River Creek and Avalanche) could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dacite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is E Side Of Pumice Pond. June 12 Headwaters Of Toutle River Creek And Avalanche, The free text description of the related URL is Smithsonian collections record for NMNH 115418-25 (PET).': 'sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates are from the USGS USTopoAvailability web service for the Fort Benton Quadrangle, MT. Another quadrangle with a similar name is also listed (Fort Benton NW) The radius corresponds to the center-to-corner distance of the quadrangle., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Trachyandesite, The free text description of the related URL is Smithsonian collections record for NMNH 74721-72 (PET)': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates are from the USGS USTopoAvailability web service for the Fort Benton Quadrangle, MT. Another quadrangle with a similar name is also listed (Fort Benton NW) The radius corresponds to the center-to-corner distance of the quadrangle., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Trachyandesite, The free text description of the related URL is Smithsonian collections record for NMNH 74721-72 (PET).': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for National Belle Mine, Red Mountain, Red Mountain District, Ouray Co., Colorado, USA. National Bell Mine is listed as an alternate name for this locality. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-16842.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Enargite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Red Mtn. Pass, The free text description of the related URL is Smithsonian collections record for NMNH 114761-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for National Belle Mine, Red Mountain, Red Mountain District, Ouray Co., Colorado, USA. National Bell Mine is listed as an alternate name for this locality. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-16842.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Enargite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Red Mtn. Pass, The free text description of the related URL is Smithsonian collections record for NMNH 114761-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is polymicitic conglomerate': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is polymicitic conglomerate.': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Kamb Ice Stream, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is [red, gray, black, fine grained], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granite, The free text description of the related URL is None': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is Kamb Ice Stream, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is [red, gray, black, fine grained], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granite, The free text description of the related URL is None.': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Himalaya Mine (Deposit ID: 10262631) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz with schorl, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Mesa Grande, The free text description of the related URL is Smithsonian collections record for NMNH M12922-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Himalaya Mine (Deposit ID: 10262631) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, and state., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz with schorl, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Mesa Grande, The free text description of the related URL is Smithsonian collections record for NMNH M12922-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Namibia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Kombat Mine (Deposit ID: 10208004) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name and country when mine names were standardized to end with \"mine.\", The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Defernite with crednerite and hausmannite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Tsumeb (49 Km S Of), The free text description of the related URL is Smithsonian collections record for NMNH 163831-00 (MIN)': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is Namibia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Kombat Mine (Deposit ID: 10208004) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name and country when mine names were standardized to end with \"mine.\", The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Defernite with crednerite and hausmannite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Tsumeb (49 Km S Of), The free text description of the related URL is Smithsonian collections record for NMNH 163831-00 (MIN).': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Rift Zone, Hawaii Co., Hawaii, United States (n=3); Rift Zone, Hawaii, United States (n=2); and Kauluoa Point, Hawaii Co., Hawaii, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching all 3 place names with the smallest maximum distance between them (~35 km). Matching Rift Zone required using a wildcard search, and the script interepreted Mauna Loa as a synonym for Maunaloa. Other place names mentioned in the EMu record (\"at Belt Road Crossing\" and Flow) could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Sw Rift Zone. Northern Flow At Belt Road Crossing, The free text description of the related URL is Smithsonian collections record for NMNH 116249-39 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Rift Zone, Hawaii Co., Hawaii, United States (n=3); Rift Zone, Hawaii, United States (n=2); and Kauluoa Point, Hawaii Co., Hawaii, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching all 3 place names with the smallest maximum distance between them (~35 km). Matching Rift Zone required using a wildcard search, and the script interepreted Mauna Loa as a synonym for Maunaloa. Other place names mentioned in the EMu record (\"at Belt Road Crossing\" and Flow) could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-basalt, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Sw Rift Zone. Northern Flow At Belt Road Crossing, The free text description of the related URL is Smithsonian collections record for NMNH 116249-39 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Durango, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Poyarkovite, The additional information about the specific place where the sample was collected is Elena, The free text description of the related URL is Smithsonian collections record for NMNH 169926-00 (MIN)': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Durango, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Poyarkovite, The additional information about the specific place where the sample was collected is Elena, The free text description of the related URL is Smithsonian collections record for NMNH 169926-00 (MIN).': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is Brazil, The province where the sample was collected is Minas Gerais, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Minas Gerais, Brazil (http://geonames.org/3457153) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Anthophyllite with biotite, The free text description of the related URL is Smithsonian collections record for NMNH 166573-00 (MIN)': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is Brazil, The province where the sample was collected is Minas Gerais, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Minas Gerais, Brazil (http://geonames.org/3457153) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Anthophyllite with biotite, The free text description of the related URL is Smithsonian collections record for NMNH 166573-00 (MIN).': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granodiorite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Mount Givens Granodiorite, The additional information about the specific place where the sample was collected is Sierra Demonstration Project Area, 60 miles NE of Fresno, CA, midway between Yosemite and Kings Canyon National Parks, The free text description of the related URL is Smithsonian collections record for NMNH 117504-35 (PET)': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granodiorite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Mount Givens Granodiorite, The additional information about the specific place where the sample was collected is Sierra Demonstration Project Area, 60 miles NE of Fresno, CA, midway between Yosemite and Kings Canyon National Parks, The free text description of the related URL is Smithsonian collections record for NMNH 117504-35 (PET).': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Norway, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gibbsite, The additional information about the specific place where the sample was collected is Langesundfjord, The free text description of the related URL is Smithsonian collections record for NMNH B7607-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Norway, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gibbsite, The additional information about the specific place where the sample was collected is Langesundfjord, The free text description of the related URL is Smithsonian collections record for NMNH B7607-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," \"The name of the specific place where the sample was collected is Steinbruch Forest, The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is near Top of 'Neuntel', Bad Laasphe, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is orange, plastic bentonite\": 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," \"The name of the specific place where the sample was collected is Steinbruch Forest, The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is near Top of 'Neuntel', Bad Laasphe, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is orange, plastic bentonite.\": 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Pancake Range, Nye Co., Nevada, United States (http://geonames.org/5509922) based on feature name, district/county, state/province, and country using the situate.py script. The script determined that this locality is located within Nye (featureCode=ADM2), another feature mentioned in this record. Another place name mentioned in the EMu record (Lunar Crater Volcanic Field) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (500 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wehrlite, The free text description of the related URL is Smithsonian collections record for NMNH 118044-100 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Pancake Range, Nye Co., Nevada, United States (http://geonames.org/5509922) based on feature name, district/county, state/province, and country using the situate.py script. The script determined that this locality is located within Nye (featureCode=ADM2), another feature mentioned in this record. Another place name mentioned in the EMu record (Lunar Crater Volcanic Field) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (500 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wehrlite, The free text description of the related URL is Smithsonian collections record for NMNH 118044-100 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dolerite, The type of the primary location is Ocean, The free text description of the related URL is Smithsonian collections record for NMNH 115089-30 (PET)': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dolerite, The type of the primary location is Ocean, The free text description of the related URL is Smithsonian collections record for NMNH 115089-30 (PET).': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The country where the sample was collected is Brazil, The province where the sample was collected is Bahia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Brazil (http://geonames.org/3469034) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Bom Jesus dos Meiras, The taxonomy informal classification of sample is Chrysoberyl, The free text description of the related URL is Smithsonian collections record for NMNH R15222-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Brazil, The province where the sample was collected is Bahia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Brazil (http://geonames.org/3469034) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Bom Jesus dos Meiras, The taxonomy informal classification of sample is Chrysoberyl, The free text description of the related URL is Smithsonian collections record for NMNH R15222-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The name of the specific place where the sample was collected is Munmorah State Conservation Area, The country where the sample was collected is Australia, The province where the sample was collected is New South Wales, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is 131-136 cm above top of Vales Point coal seam, The detailed description of the sample is weathered tuff, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The city where the sample was collected is Moonee, The taxonomy informal classification of sample is tuff, The type of the primary location is Sydney Basin, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Dooralong Shale': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Munmorah State Conservation Area, The country where the sample was collected is Australia, The province where the sample was collected is New South Wales, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is 131-136 cm above top of Vales Point coal seam, The detailed description of the sample is weathered tuff, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The city where the sample was collected is Moonee, The taxonomy informal classification of sample is tuff, The type of the primary location is Sydney Basin, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Dooralong Shale.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is weathered, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granite, The free text description of the related URL is None': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is weathered, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Granite, The free text description of the related URL is None.': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is New Zealand, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Monchiquite, The type of the primary location is Island, The additional information about the specific place where the sample was collected is Locality Key: West Otago; Specific Locality In Specimen List In Data File, The free text description of the related URL is Smithsonian collections record for NMNH 115696-5 (PET)': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," 'The country where the sample was collected is New Zealand, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Monchiquite, The type of the primary location is Island, The additional information about the specific place where the sample was collected is Locality Key: West Otago; Specific Locality In Specimen List In Data File, The free text description of the related URL is Smithsonian collections record for NMNH 115696-5 (PET).': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is UTAH, The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The taxonomy formal categorization of sample is Rock Classification, The method by which a sample was collected is Grab': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is UTAH, The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The taxonomy formal categorization of sample is Rock Classification, The method by which a sample was collected is Grab.': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Measured stratigraphic Section A - 165 m - same locality as John F. Lindsay's Section A. NW-facing slope 7 km N of Mt. Mackellar. Base elevation of 2600 m (map est). Pagoda Fm. crops out just above snow line. Outcrop also includes Mackellar Fm. which is, The detailed description of the sample is Era: Paleozoic; Period: Permo-Carboniferous; , The name of institution, museum, or repository where the sample is currently stored is U.S. Polar Rock Repository at Ohio State University, The taxonomy informal classification of sample is diamictite, The name of institution, museum, or repository where the sample was originally stored is US Polar Rock Repository, Byrd Polar Research Center, Ohio State University, The method by which a sample was collected is Manual\": 'diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Measured stratigraphic Section A - 165 m - same locality as John F. Lindsay's Section A. NW-facing slope 7 km N of Mt. Mackellar. Base elevation of 2600 m (map est). Pagoda Fm. crops out just above snow line. Outcrop also includes Mackellar Fm. which is, The detailed description of the sample is Era: Paleozoic; Period: Permo-Carboniferous; , The name of institution, museum, or repository where the sample is currently stored is U.S. Polar Rock Repository at Ohio State University, The taxonomy informal classification of sample is diamictite, The name of institution, museum, or repository where the sample was originally stored is US Polar Rock Repository, Byrd Polar Research Center, Ohio State University, The method by which a sample was collected is Manual.\": 'diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is South Africa, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basic granophyre, The free text description of the related URL is Smithsonian collections record for NMNH 116053-66 (PET)': 'basic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is South Africa, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basic granophyre, The free text description of the related URL is Smithsonian collections record for NMNH 116053-66 (PET).': 'basic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 5 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The taxonomy informal classification of sample is Soil Microbe, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 5 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The taxonomy informal classification of sample is Soil Microbe, The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Green Seamount, The free text description of the related URL is Smithsonian collections record for NMNH 117362-78 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The additional information about the specific place where the sample was collected is Green Seamount, The free text description of the related URL is Smithsonian collections record for NMNH 117362-78 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Colombia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cretaceous, The free text description of the location is Matched to the GeoNames record for Serranía de Simarua, La Guajira, Colombia (http://geonames.org/3758610) based on feature name and country using the situate.py script. Matching Simarua required using a wildcard search. Another place name mentioned in the EMu record (Simarua and Carpintero Areas) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all featureCode=HLLS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Conglomerate with quartz and chert, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Bahia Honda > Carpintero, The additional information about the specific place where the sample was collected is Guajira Peninsula; Simarua, The free text description of the related URL is Smithsonian collections record for NMNH 117697-37 (PET)': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Colombia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cretaceous, The free text description of the location is Matched to the GeoNames record for Serranía de Simarua, La Guajira, Colombia (http://geonames.org/3758610) based on feature name and country using the situate.py script. Matching Simarua required using a wildcard search. Another place name mentioned in the EMu record (Simarua and Carpintero Areas) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all featureCode=HLLS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Conglomerate with quartz and chert, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Bahia Honda > Carpintero, The additional information about the specific place where the sample was collected is Guajira Peninsula; Simarua, The free text description of the related URL is Smithsonian collections record for NMNH 117697-37 (PET).': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped coordinates and uncertainty for the locality string \"1.5 mi SW of Willow Creek\" in Mineral Co., Nevada, United States using the situate.py script based on coordinates given by GeoNames. The script interepreted Willow Creek as a synonym for Platora. Other place names mentioned in the EMu record (Aurora and Mineral) appear to describe larger, less specific features and were ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the calculated values., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is 1.5 Mi Sw Of Aurora In Willow Creek, The free text description of the related URL is Smithsonian collections record for NMNH 91483-457 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Mapped coordinates and uncertainty for the locality string \"1.5 mi SW of Willow Creek\" in Mineral Co., Nevada, United States using the situate.py script based on coordinates given by GeoNames. The script interepreted Willow Creek as a synonym for Platora. Other place names mentioned in the EMu record (Aurora and Mineral) appear to describe larger, less specific features and were ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the calculated values., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Granite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is 1.5 Mi Sw Of Aurora In Willow Creek, The free text description of the related URL is Smithsonian collections record for NMNH 91483-457 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is SHIP, The free text description of the location is South scarp oblique ridge south of OCC, The detailed description of the sample is protogranular peridotite, The name of institution, museum, or repository where the sample is currently stored is Litoteque de Institut Universitaire Européen de la Mer, 29280 Plouzané France, The taxonomy informal classification of sample is peridotite, The type of the primary location is Eastern RTI Romanche FZ, The additional information about the method by which a sample was collected is Nautile dive, The method by which a sample was collected is submersible, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SMARTIES': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is SHIP, The free text description of the location is South scarp oblique ridge south of OCC, The detailed description of the sample is protogranular peridotite, The name of institution, museum, or repository where the sample is currently stored is Litoteque de Institut Universitaire Européen de la Mer, 29280 Plouzané France, The taxonomy informal classification of sample is peridotite, The type of the primary location is Eastern RTI Romanche FZ, The additional information about the method by which a sample was collected is Nautile dive, The method by which a sample was collected is submersible, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SMARTIES.': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is University of Florida, The taxonomy informal classification of sample is basalt, The type of the primary location is mid-ocean ridge, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is JdFCL02': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is University of Florida, The taxonomy informal classification of sample is basalt, The type of the primary location is mid-ocean ridge, The method by which a sample was collected is Grab>ROV, The name or identifier of the field program (cruise or expedition), during which the sample was collected is JdFCL02.': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Hyland River area, The detailed description of the sample is undeformed cross-cutting dike of bt-plag porphyry, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is bt-plag porphyry': 'sediment/igneous rock/natural solid material/porphyry/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Hyland River area, The detailed description of the sample is undeformed cross-cutting dike of bt-plag porphyry, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is bt-plag porphyry.': 'sediment/igneous rock/natural solid material/porphyry/rock',\n"," 'The name of the specific place where the sample was collected is Thomas Hills, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Erratic. [Feldspar porphyry], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Rhyolite, The free text description of the related URL is None': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Thomas Hills, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Erratic. [Feldspar porphyry], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Rhyolite, The free text description of the related URL is None.': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The type of platform for the cruise is Ship, The detailed description of the sample is Uniform grey + some dark clasts and few orange clasts, unknown comp., angular aplilli, finer shape mixture, poor to moderate sorting, white pumice, dark grey lithics, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The taxonomy informal classification of sample is Ash, The type of the primary location is Volcano, The additional information about the method by which a sample was collected is Push core, The method by which a sample was collected is Coring>SubmersibleMountedCorer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is RR1506': 'sediment/natural solid material/tephra/rock',\n"," 'The object type of sample indicates that this sample is a sample (sometimes mechanically collected) from a deposit or area, not intended to be representative of the deposit or area, The type of platform for the cruise is Ship, The detailed description of the sample is Uniform grey + some dark clasts and few orange clasts, unknown comp., angular aplilli, finer shape mixture, poor to moderate sorting, white pumice, dark grey lithics, The name of institution, museum, or repository where the sample is currently stored is Woods Hole Oceanographic Institution (WHOI), The taxonomy informal classification of sample is Ash, The type of the primary location is Volcano, The additional information about the method by which a sample was collected is Push core, The method by which a sample was collected is Coring>SubmersibleMountedCorer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is RR1506.': 'sediment/natural solid material/tephra/rock',\n"," 'The object type of sample indicates that this sample is long cylindrical cores, The detailed description of the sample is [soft, light gray weatherd, dark gray fresh mudstone], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Mudstone, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is long cylindrical cores, The detailed description of the sample is [soft, light gray weatherd, dark gray fresh mudstone], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Mudstone, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is Thailand, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Thailand (http://geonames.org/1605651) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diamond, The free text description of the related URL is Smithsonian collections record for NMNH 174901-00 (MIN)': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is Thailand, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Thailand (http://geonames.org/1605651) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diamond, The free text description of the related URL is Smithsonian collections record for NMNH 174901-00 (MIN).': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is Bolivia, The province where the sample was collected is Oruro, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Socavón Mine (La Salvadora mine), Oruro city, Cercado Province, Oruro Department, Bolivia. Could not find a result for a Socayon Mine. Socayon Mine may be a misspelling of Socavon Mine. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-23147.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Stannite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 94686-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Bolivia, The province where the sample was collected is Oruro, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Socavón Mine (La Salvadora mine), Oruro city, Cercado Province, Oruro Department, Bolivia. Could not find a result for a Socayon Mine. Socayon Mine may be a misspelling of Socavon Mine. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-23147.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Stannite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 94686-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Olive gray, muddy, very coarse feldspathic sandstone (streaky) [Up? Arrow], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is Olive gray, muddy, very coarse feldspathic sandstone (streaky) [Up? Arrow], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Sandstone, The free text description of the related URL is None.': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tiger, Pinal Co., Arizona, United States (http://geonames.org/5317456) based on feature name, county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Harquahala Mine) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dioptase, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Tiger, The free text description of the related URL is Smithsonian collections record for NMNH M5662-00 (MIN)': 'natural solid material/mineral/mineral-silicate or germanate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Tiger, Pinal Co., Arizona, United States (http://geonames.org/5317456) based on feature name, county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Harquahala Mine) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dioptase, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Tiger, The free text description of the related URL is Smithsonian collections record for NMNH M5662-00 (MIN).': 'natural solid material/mineral/mineral-silicate or germanate',\n"," \"The country where the sample was collected is Canada, The province where the sample was collected is Newfoundland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Signal Hill, Newfoundland and Labrador, Canada (http://geonames.org/6147508) based on feature name, state/province, and country using the situate.py script. The script determined that this locality is located within St. John's (featureCode=PPLA), another feature mentioned in this record. Another place name mentioned in the EMu record (Signall Hill) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all featureCode=HLL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is St. John's, The taxonomy informal classification of sample is Feldspathic sandstone, The additional information about the specific place where the sample was collected is St. John's, Signall Hill, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Newfoundland Expedition, 1912, The free text description of the related URL is Smithsonian collections record for NMNH 117806-776 (PET)\": 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," \"The country where the sample was collected is Canada, The province where the sample was collected is Newfoundland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Signal Hill, Newfoundland and Labrador, Canada (http://geonames.org/6147508) based on feature name, state/province, and country using the situate.py script. The script determined that this locality is located within St. John's (featureCode=PPLA), another feature mentioned in this record. Another place name mentioned in the EMu record (Signall Hill) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all featureCode=HLL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is St. John's, The taxonomy informal classification of sample is Feldspathic sandstone, The additional information about the specific place where the sample was collected is St. John's, Signall Hill, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Newfoundland Expedition, 1912, The free text description of the related URL is Smithsonian collections record for NMNH 117806-776 (PET).\": 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Hampshire, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Belknap Mountains, Belknap Co., New Hampshire, United States (http://geonames.org/5083270) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=MTS). Matching Belknap-Mt required using a wildcard search. Another place name mentioned in the EMu record (Smiths Neck) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Smiths Neck, The taxonomy informal classification of sample is Camptonite, The additional information about the specific place where the sample was collected is Locality Key: Belknap-Mt, Smiths Neck, The free text description of the related URL is Smithsonian collections record for NMNH 111123-1366 (PET)': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Hampshire, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Belknap Mountains, Belknap Co., New Hampshire, United States (http://geonames.org/5083270) based on feature name, state/province, and country using the situate.py script. The script determined that the two sites that best match the information in this record are related and kept the less specific one (featureCode=MTS). Matching Belknap-Mt required using a wildcard search. Another place name mentioned in the EMu record (Smiths Neck) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Smiths Neck, The taxonomy informal classification of sample is Camptonite, The additional information about the specific place where the sample was collected is Locality Key: Belknap-Mt, Smiths Neck, The free text description of the related URL is Smithsonian collections record for NMNH 111123-1366 (PET).': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is [2 descriptions written on sample: Lowermost Mackellar Lindsay I and also: Lindsay A upper Mackellar. The original marker pen lists 'Lindsay I' while the painted sample number is Lindsay A. The PRR assumes that the collector wrote in sharpie pen in the field so location Lindsay I is listed] [greenish gray fissile siltstone to shale with brown weathering surface containing trace fossils], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None\": 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Permian, The detailed description of the sample is [2 descriptions written on sample: Lowermost Mackellar Lindsay I and also: Lindsay A upper Mackellar. The original marker pen lists 'Lindsay I' while the painted sample number is Lindsay A. The PRR assumes that the collector wrote in sharpie pen in the field so location Lindsay I is listed] [greenish gray fissile siltstone to shale with brown weathering surface containing trace fossils], The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None.\": 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Limestone: light orange-gray weathered; medium bedded 1- <1cm); limestone beds have recrystallized, micrite texture with trace arcuate recrystallized bio-clasts? And fenestrae texture filled with calcite spar; thin to medium beds (<1cm) of sheared, pinkish-orange, calcareous marl? or detrital material; highly sheared and fractured, possibly metamorphosed; crystalline shaley carbonate., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is None': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Limestone: light orange-gray weathered; medium bedded 1- <1cm); limestone beds have recrystallized, micrite texture with trace arcuate recrystallized bio-clasts? And fenestrae texture filled with calcite spar; thin to medium beds (<1cm) of sheared, pinkish-orange, calcareous marl? or detrital material; highly sheared and fractured, possibly metamorphosed; crystalline shaley carbonate., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Limestone, The free text description of the related URL is None.': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Skutterud, The country where the sample was collected is Norway, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Modum, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The name of the specific place where the sample was collected is Skutterud, The country where the sample was collected is Norway, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Modum, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The name of the specific place where the sample was collected is Cross Valley, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleogene, The detailed description of the sample is Medium brown/gray siltstone., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The name of the specific place where the sample was collected is Cross Valley, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Paleogene, The detailed description of the sample is Medium brown/gray siltstone., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Northern Cape Province, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Gem Collection, Smithsonian Institution, The city where the sample was collected is Postmasburg, The taxonomy informal classification of sample is Quartz (var. rock crystal), The additional information about the specific place where the sample was collected is Unknown, The free text description of the related URL is Smithsonian collections record for NMNH G5994-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Northern Cape Province, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Gem Collection, Smithsonian Institution, The city where the sample was collected is Postmasburg, The taxonomy informal classification of sample is Quartz (var. rock crystal), The additional information about the specific place where the sample was collected is Unknown, The free text description of the related URL is Smithsonian collections record for NMNH G5994-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Horseshoe (historical), Park Co., Colorado, United States (http://geonames.org/5425547) based on municipality name, district/county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Sheep Mt) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Horseshoe, The taxonomy informal classification of sample is Quartz-porphyry with quartz, The free text description of the related URL is Smithsonian collections record for NMNH 68873 (PET)': 'sediment/igneous rock/natural solid material/porphyry/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Horseshoe (historical), Park Co., Colorado, United States (http://geonames.org/5425547) based on municipality name, district/county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Sheep Mt) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Horseshoe, The taxonomy informal classification of sample is Quartz-porphyry with quartz, The free text description of the related URL is Smithsonian collections record for NMNH 68873 (PET).': 'sediment/igneous rock/natural solid material/porphyry/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Graphic Mine (MRDS ID: D007899, Deposit ID: 10012332) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Magdalena, The taxonomy informal classification of sample is Lamprophyre, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Level 5, X-cut S Of Sta. 30, The free text description of the related URL is Smithsonian collections record for NMNH 100280-66 (PET)': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Graphic Mine (MRDS ID: D007899, Deposit ID: 10012332) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Magdalena, The taxonomy informal classification of sample is Lamprophyre, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Level 5, X-cut S Of Sta. 30, The free text description of the related URL is Smithsonian collections record for NMNH 100280-66 (PET).': 'sediment/igneous rock/natural solid material/exotic composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ultramafic-rock, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 111290-40 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ultramafic-rock, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 111290-40 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n"," 'The country where the sample was collected is Saudi Arabia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite, The free text description of the related URL is Smithsonian collections record for NMNH 116504-30 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is Saudi Arabia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Peridotite, The free text description of the related URL is Smithsonian collections record for NMNH 116504-30 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The name of the specific place where the sample was collected is Madrone Forest, The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is arbitrarily cut segments of a core, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Eel Critical Zone Observatory (CZO)': 'natural solid material/soil',\n"," 'The name of the specific place where the sample was collected is Madrone Forest, The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is arbitrarily cut segments of a core, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Eel Critical Zone Observatory (CZO).': 'natural solid material/soil',\n"," 'The country where the sample was collected is El Salvador, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Not Applicable, The taxonomy informal classification of sample is lapillus, The type of the primary location is Caldera, The description of the platform for the cruise is Not Applicable, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Not Applicable': 'sediment/natural solid material/tephra/rock',\n"," 'The country where the sample was collected is El Salvador, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Not Applicable, The taxonomy informal classification of sample is lapillus, The type of the primary location is Caldera, The description of the platform for the cruise is Not Applicable, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Not Applicable.': 'sediment/natural solid material/tephra/rock',\n"," 'The country where the sample was collected is Romania, The province where the sample was collected is Maramures, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Maramureş, Romania (http://geonames.org/673887) based on state/province name and country using the situate.py script. Another place name mentioned in the EMu record (Herja) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (87 km)., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyrrhotite, The additional information about the specific place where the sample was collected is Herja, The free text description of the related URL is Smithsonian collections record for NMNH B16045-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Romania, The province where the sample was collected is Maramures, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Maramureş, Romania (http://geonames.org/673887) based on state/province name and country using the situate.py script. Another place name mentioned in the EMu record (Herja) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (87 km)., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyrrhotite, The additional information about the specific place where the sample was collected is Herja, The free text description of the related URL is Smithsonian collections record for NMNH B16045-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ultramafic nodule, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Between Keonchehee And Keana Kakai, The free text description of the related URL is Smithsonian collections record for NMNH 114808-2 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Ultramafic nodule, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Between Keonchehee And Keana Kakai, The free text description of the related URL is Smithsonian collections record for NMNH 114808-2 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/rock',\n"," 'The country where the sample was collected is Canada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is Fine- to medium-grained equigranular biotite>hornblende granodiorite/tonalite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock',\n"," 'The country where the sample was collected is Canada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is Fine- to medium-grained equigranular biotite>hornblende granodiorite/tonalite.': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is 2019 inundation tracer tests, The free text to add any comments pertaining to the sample is brown tint, fast flow, The type of the primary location is floodplain, aquifer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC-SFA': 'liquid water/fluid material',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is 2019 inundation tracer tests, The free text to add any comments pertaining to the sample is brown tint, fast flow, The type of the primary location is floodplain, aquifer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is SLAC-SFA.': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Diamond Craters, The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is NASA IceCrystal Grant #:80NSSC18K1518, The detailed description of the sample is red, brown, The taxonomy informal classification of sample is Basalt, The type of the primary location is Effusive Flow, The method by which a sample was collected is Rock Hammer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NASA IceCrystal Grant #:80NSSC18K1518': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Diamond Craters, The country where the sample was collected is United States, The province where the sample was collected is Oregon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is NASA IceCrystal Grant #:80NSSC18K1518, The detailed description of the sample is red, brown, The taxonomy informal classification of sample is Basalt, The type of the primary location is Effusive Flow, The method by which a sample was collected is Rock Hammer, The name or identifier of the field program (cruise or expedition), during which the sample was collected is NASA IceCrystal Grant #:80NSSC18K1518.': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Libethenite, The additional information about the specific place where the sample was collected is Range W Of Frisco, The free text description of the related URL is Smithsonian collections record for NMNH M21096-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Libethenite, The additional information about the specific place where the sample was collected is Range W Of Frisco, The free text description of the related URL is Smithsonian collections record for NMNH M21096-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Stewart Mine, Tourmaline Queen Mountain (Pala Mtn; Queen Mtn), Pala, Pala District, San Diego Co., California, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-3562.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Jahnsite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Pala, The free text description of the related URL is Smithsonian collections record for NMNH 157803-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Stewart Mine, Tourmaline Queen Mountain (Pala Mtn; Queen Mtn), Pala, Pala District, San Diego Co., California, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-3562.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Jahnsite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Pala, The free text description of the related URL is Smithsonian collections record for NMNH 157803-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is France, The province where the sample was collected is Midi-Pyrénées, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Luzenac, Ariège, Occitanie, France (http://geonames.org/2996974) based on municipality name, county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Trimouns quarry) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Luzenac, The taxonomy informal classification of sample is Bastnäsite-(Ce), The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 176329-00 (MIN)': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is France, The province where the sample was collected is Midi-Pyrénées, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Luzenac, Ariège, Occitanie, France (http://geonames.org/2996974) based on municipality name, county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Trimouns quarry) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Luzenac, The taxonomy informal classification of sample is Bastnäsite-(Ce), The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 176329-00 (MIN).': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Foote Lithium Co. Mine, Kings Mountain District, Cleveland Co., North Carolina, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-3280.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Kings Mountain, The taxonomy informal classification of sample is Tetrawickmanite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH M29721-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Foote Lithium Co. Mine, Kings Mountain District, Cleveland Co., North Carolina, USA. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-3280.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Kings Mountain, The taxonomy informal classification of sample is Tetrawickmanite, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH M29721-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is quartz diorite; 1 piece, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Diorite, The free text description of the related URL is None': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is quartz diorite; 1 piece, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Diorite, The free text description of the related URL is None.': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arkansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Hot Springs/Hot Springs Township, Garland Co., Arkansas, United States, including http://geonames.org/4115412 and http://geonames.org/4115430. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz, The additional information about the specific place where the sample was collected is Hot Springs, The free text description of the related URL is Smithsonian collections record for NMNH R17707-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arkansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Hot Springs/Hot Springs Township, Garland Co., Arkansas, United States, including http://geonames.org/4115412 and http://geonames.org/4115430. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz, The additional information about the specific place where the sample was collected is Hot Springs, The free text description of the related URL is Smithsonian collections record for NMNH R17707-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Colossus Mine (MRDS ID: K005239, Deposit ID: 10026532) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on the old name of a mine in the same country and state when mine names were standardized to end with \"mine.\", The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff with pyrite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Bull Face Workings, The free text description of the related URL is Smithsonian collections record for NMNH 76685-40 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Colossus Mine (MRDS ID: K005239, Deposit ID: 10026532) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on the old name of a mine in the same country and state when mine names were standardized to end with \"mine.\", The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tuff with pyrite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Bull Face Workings, The free text description of the related URL is Smithsonian collections record for NMNH 76685-40 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The free text description of the location is W flank Kanaga, 1994 flow. Looking for mafic inclusions, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is W flank Kanaga, 1994 flow. Looking for mafic inclusions, The name or identifier of the field program (cruise or expedition), during which the sample was collected is GeoPRISMS Western Aleutians 2015, The free text description of the related URL is Smithsonian collections record for NMNH 118212-15 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Alaska, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The free text description of the location is W flank Kanaga, 1994 flow. Looking for mafic inclusions, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is W flank Kanaga, 1994 flow. Looking for mafic inclusions, The name or identifier of the field program (cruise or expedition), during which the sample was collected is GeoPRISMS Western Aleutians 2015, The free text description of the related URL is Smithsonian collections record for NMNH 118212-15 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is Sand sized grains, many minerals as well as rock fragments. Quartz abundant, dark volcanic rock fragments, ash matrix. Conspicuous purplish alteration on some volcanic rock fragments and on one surface., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Tuff, The free text description of the related URL is None': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Jurassic, The detailed description of the sample is Sand sized grains, many minerals as well as rock fragments. Quartz abundant, dark volcanic rock fragments, ash matrix. Conspicuous purplish alteration on some volcanic rock fragments and on one surface., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Tuff, The free text description of the related URL is None.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Comstock Lode Mining District, Storey Co., Nevada, United States (http://geonames.org/5502108) based on feature name, state/province, and country using the situate.py script. The script determined that this locality is located within Washoe (featureCode=ADM2), another feature mentioned in this record. Matching Comstock Lode required using a wildcard search. Another place name mentioned in the EMu record (\"Surface Coll. Summit Of Mt Kate\") could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (100 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Augite andesite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Surface Coll. Summit Of Mt Kate, The free text description of the related URL is Smithsonian collections record for NMNH 70752 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Comstock Lode Mining District, Storey Co., Nevada, United States (http://geonames.org/5502108) based on feature name, state/province, and country using the situate.py script. The script determined that this locality is located within Washoe (featureCode=ADM2), another feature mentioned in this record. Matching Comstock Lode required using a wildcard search. Another place name mentioned in the EMu record (\"Surface Coll. Summit Of Mt Kate\") could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (100 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Augite andesite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Surface Coll. Summit Of Mt Kate, The free text description of the related URL is Smithsonian collections record for NMNH 70752 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Iceland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt with basalt glass, The free text description of the related URL is Smithsonian collections record for NMNH 113429-26 (PET)': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is Iceland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt with basalt glass, The free text description of the related URL is Smithsonian collections record for NMNH 113429-26 (PET).': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is U.S. Geological Survey, Reston Nannofossil laboratory, The city where the sample was collected is Long Branch (uncertain), The taxonomy informal classification of sample is Macrofossils': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is U.S. Geological Survey, Reston Nannofossil laboratory, The city where the sample was collected is Long Branch (uncertain), The taxonomy informal classification of sample is Macrofossils.': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is Sweden, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Sweden (http://geonames.org/2661886) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Crookesite, The free text description of the related URL is Smithsonian collections record for NMNH 149876-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Sweden, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Sweden (http://geonames.org/2661886) using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Crookesite, The free text description of the related URL is Smithsonian collections record for NMNH 149876-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Saddle +200', grades into purple porphyry, loses foliation., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Rhyodacite, The free text description of the related URL is None\": 'sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," \"The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The detailed description of the sample is Saddle +200', grades into purple porphyry, loses foliation., The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Rhyodacite, The free text description of the related URL is None.\": 'sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Volcán Miravalles, Costa Rica (http://geonames.org/3622852) based on volcano name and country using the situate.py script. Matching Miravalles required using a wildcard search. Another place name mentioned in the EMu record (ICE Geothermal Well No 1) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all volcano records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tephra with soil, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is ICE Geothermal well no. 1, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Costa Rican Volcanic Explosion Project, The free text description of the related URL is Smithsonian collections record for NMNH 116066-181 (PET)': 'sediment/natural solid material/tephra/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Volcán Miravalles, Costa Rica (http://geonames.org/3622852) based on volcano name and country using the situate.py script. Matching Miravalles required using a wildcard search. Another place name mentioned in the EMu record (ICE Geothermal Well No 1) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all volcano records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tephra with soil, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is ICE Geothermal well no. 1, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Costa Rican Volcanic Explosion Project, The free text description of the related URL is Smithsonian collections record for NMNH 116066-181 (PET).': 'sediment/natural solid material/tephra/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for California, United States (http://geonames.org/5332921) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Pala, The taxonomy informal classification of sample is Brucite with pyroaurite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Stewart Hill, The free text description of the related URL is Smithsonian collections record for NMNH 124116-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for California, United States (http://geonames.org/5332921) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Pala, The taxonomy informal classification of sample is Brucite with pyroaurite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Stewart Hill, The free text description of the related URL is Smithsonian collections record for NMNH 124116-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Ruiz Peak, Sandoval Co., New Mexico, United States (http://geonames.org/5488627) based on feature name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"9100+ feet N of peak\" and \"E slope of knob\") could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all records matched using wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The additional information about the specific place where the sample was collected is E slope of knob; 9100+ feet N of peak, The free text description of the related URL is Smithsonian collections record for NMNH 117230-212 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Ruiz Peak, Sandoval Co., New Mexico, United States (http://geonames.org/5488627) based on feature name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"9100+ feet N of peak\" and \"E slope of knob\") could not be matched and were ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all records matched using wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The additional information about the specific place where the sample was collected is E slope of knob; 9100+ feet N of peak, The free text description of the related URL is Smithsonian collections record for NMNH 117230-212 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Canada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is Granodiorite': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Canada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is Granodiorite.': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Calcite; Quality: display; drusy; nodule; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Kanab, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Mineral Group: Calcite; Quality: display; drusy; nodule; , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Kanab, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is British Columbia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is granite': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is British Columbia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is granite.': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Switzerland, The province where the sample was collected is Valais, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Binnatal, Valais, Switzerland (http://geonames.org/2661500) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Lercheltini Alp) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=VAL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Anatase with adularia and quartz, The additional information about the specific place where the sample was collected is Lercheltini Alp; Binnental, The free text description of the related URL is Smithsonian collections record for NMNH B6040-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is Switzerland, The province where the sample was collected is Valais, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Binnatal, Valais, Switzerland (http://geonames.org/2661500) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Lercheltini Alp) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=VAL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Anatase with adularia and quartz, The additional information about the specific place where the sample was collected is Lercheltini Alp; Binnental, The free text description of the related URL is Smithsonian collections record for NMNH B6040-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock Collection, Smithsonian Institution, The taxonomy informal classification of sample is Lead ore, The free text description of the related URL is Smithsonian collections record for NMNH 53643-383 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock Collection, Smithsonian Institution, The taxonomy informal classification of sample is Lead ore, The free text description of the related URL is Smithsonian collections record for NMNH 53643-383 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Walla Walla Co., WA, United States (http://geonames.org/5814921) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Columbia Plateau - Columbia River Basalt, Levey Quad, and Railroad Cut Opposite Levey Park) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (52 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Wanapum, The additional information about the specific place where the sample was collected is Railroad Cut Opposite Levey Park, The free text description of the related URL is Smithsonian collections record for NMNH 116861-271 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Washington, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Walla Walla Co., WA, United States (http://geonames.org/5814921) based on district/county name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Columbia Plateau - Columbia River Basalt, Levey Quad, and Railroad Cut Opposite Levey Park) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (52 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Basalt, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Wanapum, The additional information about the specific place where the sample was collected is Railroad Cut Opposite Levey Park, The free text description of the related URL is Smithsonian collections record for NMNH 116861-271 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Yaakov Weiss, The type of the primary location is kimberlite-lamproite, The method by which a sample was collected is purchasing on-site': 'natural solid material/mineral/mineral-native element',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Yaakov Weiss, The type of the primary location is kimberlite-lamproite, The method by which a sample was collected is purchasing on-site.': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Globe-Cornell Mine, Menominee iron range, Dickinson Co., Michigan, USA. Closest match to Cornell found on either the USGS MRDS or Mindat. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-123862.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Calcite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Menomonee, The free text description of the related URL is Smithsonian collections record for NMNH C1868-00 (MIN)': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Globe-Cornell Mine, Menominee iron range, Dickinson Co., Michigan, USA. Closest match to Cornell found on either the USGS MRDS or Mindat. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-123862.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Calcite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Menomonee, The free text description of the related URL is Smithsonian collections record for NMNH C1868-00 (MIN).': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The name of the specific place where the sample was collected is Plai River Valley, The country where the sample was collected is Romania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is gneissic texture; near contact with amphibole schist, The city where the sample was collected is Rasinari, The taxonomy informal classification of sample is K-feldspar, Quartz, Muscovite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Sebes Lotru?': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The name of the specific place where the sample was collected is Plai River Valley, The country where the sample was collected is Romania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is gneissic texture; near contact with amphibole schist, The city where the sample was collected is Rasinari, The taxonomy informal classification of sample is K-feldspar, Quartz, Muscovite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Sebes Lotru?.': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Entered locality string, \"1.5 Km Nw Of Big Creek,\" into GeoLocate using default settings. Chose the locality that was snapped to the town of Big Creek in Fresno County, CA., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Big Creek, The taxonomy informal classification of sample is Gabbro with alkalic basalt, The additional information about the specific place where the sample was collected is Volcanic Pipe 1.5 Km Nw Of Town, The free text description of the related URL is Smithsonian collections record for NMNH 116478-238 (PET)': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Entered locality string, \"1.5 Km Nw Of Big Creek,\" into GeoLocate using default settings. Chose the locality that was snapped to the town of Big Creek in Fresno County, CA., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Big Creek, The taxonomy informal classification of sample is Gabbro with alkalic basalt, The additional information about the specific place where the sample was collected is Volcanic Pipe 1.5 Km Nw Of Town, The free text description of the related URL is Smithsonian collections record for NMNH 116478-238 (PET).': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Spain, The province where the sample was collected is Andalusia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Rio Tinto Mines (Riotinto Mines), Minas de Riotinto, Huelva, Andalusia, Spain. URL: https://www.mindat.org/loc-3123.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Huelva Dist, The taxonomy informal classification of sample is Sulfide ore with pyrite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Atalaya Pit, The free text description of the related URL is Smithsonian collections record for NMNH 111175-10 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is Spain, The province where the sample was collected is Andalusia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record for Rio Tinto Mines (Riotinto Mines), Minas de Riotinto, Huelva, Andalusia, Spain. URL: https://www.mindat.org/loc-3123.html., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Huelva Dist, The taxonomy informal classification of sample is Sulfide ore with pyrite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Atalaya Pit, The free text description of the related URL is Smithsonian collections record for NMNH 111175-10 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Big Trees, Calaveras Co., California, United States (http://geonames.org/5328578) based on map name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"Clover Valley (5 Mi Sw Of)\") appears to describe a larger, less specific locality and was ignored when determining coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Augite latite with augite, The additional information about the specific place where the sample was collected is Locality Key: Sierra Nevada Mts, The free text description of the related URL is Smithsonian collections record for NMNH 53796-1956 (PET)': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Big Trees, Calaveras Co., California, United States (http://geonames.org/5328578) based on map name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"Clover Valley (5 Mi Sw Of)\") appears to describe a larger, less specific locality and was ignored when determining coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Augite latite with augite, The additional information about the specific place where the sample was collected is Locality Key: Sierra Nevada Mts, The free text description of the related URL is Smithsonian collections record for NMNH 53796-1956 (PET).': 'sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Jemez Springs, Sandoval Co., New Mexico, United States (http://geonames.org/5473318) based on map name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"Forms Ridge Above Fire Station, 1.4 Mi E\", Boyd Ranch, CP 2802, and Ross) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The additional information about the specific place where the sample was collected is Ross; forms ridge above fire station, 1.4 mi E Boyd Ranch, CP 2802, The free text description of the related URL is Smithsonian collections record for NMNH 117226-593 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Jemez Springs, Sandoval Co., New Mexico, United States (http://geonames.org/5473318) based on map name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (\"Forms Ridge Above Fire Station, 1.4 Mi E\", Boyd Ranch, CP 2802, and Ross) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all map records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Andesite, The additional information about the specific place where the sample was collected is Ross; forms ridge above fire station, 1.4 mi E Boyd Ranch, CP 2802, The free text description of the related URL is Smithsonian collections record for NMNH 117226-593 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Florida, The object type of sample indicates that this sample is long cylindrical cores, The free text to describe the collection purpose of the sample is Oil and gas exploration, The detailed description of the sample is This is a sample of type 4\"CORE SECTIONS from an oil or gas well., The name of institution, museum, or repository where the sample is currently stored is Bureau of Economic Geology Austin Core Research Center, The type of the primary location is Oil or Gas Field, The method by which a sample was collected is Coring': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Florida, The object type of sample indicates that this sample is long cylindrical cores, The free text to describe the collection purpose of the sample is Oil and gas exploration, The detailed description of the sample is This is a sample of type 4\"CORE SECTIONS from an oil or gas well., The name of institution, museum, or repository where the sample is currently stored is Bureau of Economic Geology Austin Core Research Center, The type of the primary location is Oil or Gas Field, The method by which a sample was collected is Coring.': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Sierra Co., California, United States (http://geonames.org/5395582) based on district/county name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende-pyroxene andesite, The free text description of the related URL is Smithsonian collections record for NMNH 111123-736 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Sierra Co., California, United States (http://geonames.org/5395582) based on district/county name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende-pyroxene andesite, The free text description of the related URL is Smithsonian collections record for NMNH 111123-736 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Russia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is Paleoenvironmental reconstruction, The name of institution, museum, or repository where the sample is currently stored is University of Cologne, Institute of Geography, The method by which a sample was collected is Coring': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is Russia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is Paleoenvironmental reconstruction, The name of institution, museum, or repository where the sample is currently stored is University of Cologne, Institute of Geography, The method by which a sample was collected is Coring.': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is Egypt, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Manganese ore, The additional information about the specific place where the sample was collected is From Block Of The Dendur Temple, The free text description of the related URL is Smithsonian collections record for NMNH 107692-13 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is Egypt, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Manganese ore, The additional information about the specific place where the sample was collected is From Block Of The Dendur Temple, The free text description of the related URL is Smithsonian collections record for NMNH 107692-13 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock Collection, Smithsonian Institution, The taxonomy informal classification of sample is Volcanic ash, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Station C, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Earth Watch 3, The free text description of the related URL is Smithsonian collections record for NMNH 116872-65 (PET)': 'sediment/natural solid material/tephra/rock',\n"," 'The country where the sample was collected is Costa Rica, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock Collection, Smithsonian Institution, The taxonomy informal classification of sample is Volcanic ash, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Station C, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Earth Watch 3, The free text description of the related URL is Smithsonian collections record for NMNH 116872-65 (PET).': 'sediment/natural solid material/tephra/rock',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Western Australia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diamond, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 176664 (MIN)': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is Australia, The province where the sample was collected is Western Australia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diamond, The type of the primary location is Mine, The free text description of the related URL is Smithsonian collections record for NMNH 176664 (MIN).': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Precambrian, The free text description of the location is About 1 mile southeast of Saratoga Springs, Ibex Mountains, San Bernardino County, The detailed description of the sample is Sedimentary>Chemical>Carbonate>Limestone>Pisolitic Limestone, The free text to add any comments pertaining to the sample is graded bedding; Pisolites becoming finer upward. Small grey sample. Original label: \"Pisolitic dolomite, Beck Spring formation, Pahrump Group, float in wash, April 1964.\"': 'sediment/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Precambrian, The free text description of the location is About 1 mile southeast of Saratoga Springs, Ibex Mountains, San Bernardino County, The detailed description of the sample is Sedimentary>Chemical>Carbonate>Limestone>Pisolitic Limestone, The free text to add any comments pertaining to the sample is graded bedding; Pisolites becoming finer upward. Small grey sample. Original label: \"Pisolitic dolomite, Beck Spring formation, Pahrump Group, float in wash, April 1964.\".': 'sediment/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Black Butte, Nevada, United States (n=13) and Goldfield, Esmeralda Co., Nevada, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~6 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende-biotite andesite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Locality Key: Goldfield, The free text description of the related URL is Smithsonian collections record for NMNH 111123-1685 (PET)': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Black Butte, Nevada, United States (n=13) and Goldfield, Esmeralda Co., Nevada, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~6 km)., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Hornblende-biotite andesite, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is Locality Key: Goldfield, The free text description of the related URL is Smithsonian collections record for NMNH 111123-1685 (PET).': 'andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Levack, Ontario, Canada (http://geonames.org/6053216) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (1400 ft and Sudbury Irruptive) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Levack, The taxonomy informal classification of sample is Granophyre, The additional information about the specific place where the sample was collected is 1400 ft, The free text description of the related URL is Smithsonian collections record for NMNH 117738-43 (PET)': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Ontario, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Levack, Ontario, Canada (http://geonames.org/6053216) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (1400 ft and Sudbury Irruptive) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Levack, The taxonomy informal classification of sample is Granophyre, The additional information about the specific place where the sample was collected is 1400 ft, The free text description of the related URL is Smithsonian collections record for NMNH 117738-43 (PET).': 'granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Baja California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dunite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118070-21 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Baja California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Dunite, The type of the primary location is Volcano, The free text description of the related URL is Smithsonian collections record for NMNH 118070-21 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Phoenixville, Chester Co., Pennsylvania, United States (http://geonames.org/5205849) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyromorphite with galena, The additional information about the specific place where the sample was collected is Phoenixville, The free text description of the related URL is Smithsonian collections record for NMNH M12783-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Phoenixville, Chester Co., Pennsylvania, United States (http://geonames.org/5205849) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Pyromorphite with galena, The additional information about the specific place where the sample was collected is Phoenixville, The free text description of the related URL is Smithsonian collections record for NMNH M12783-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Phoenixville, Chester Co., Pennsylvania, United States (http://geonames.org/5205849) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wulfenite with pyromorphite, The additional information about the specific place where the sample was collected is Phoenixville, The free text description of the related URL is Smithsonian collections record for NMNH M17021-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Pennsylvania, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Phoenixville, Chester Co., Pennsylvania, United States (http://geonames.org/5205849) based on feature name, district/county, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Wulfenite with pyromorphite, The additional information about the specific place where the sample was collected is Phoenixville, The free text description of the related URL is Smithsonian collections record for NMNH M17021-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is from E flank of \"Siberia Crater,\" SW side of main Dish Hill cone, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-websterite, The additional information about the specific place where the sample was collected is from E flank of \"Siberia Crater,\" SW side of main Dish Hill cone, The free text description of the related URL is Smithsonian collections record for NMNH 118018-24 (PET)': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is from E flank of \"Siberia Crater,\" SW side of main Dish Hill cone, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Olivine-websterite, The additional information about the specific place where the sample was collected is from E flank of \"Siberia Crater,\" SW side of main Dish Hill cone, The free text description of the related URL is Smithsonian collections record for NMNH 118018-24 (PET).': 'sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Oklahoma, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Oklahoma, United States (http://geonames.org/4544379) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Baryte, The free text description of the related URL is Smithsonian collections record for NMNH 118874-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Oklahoma, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Oklahoma, United States (http://geonames.org/4544379) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Baryte, The free text description of the related URL is Smithsonian collections record for NMNH 118874-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Utah, United States (http://geonames.org/5549030) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz, The additional information about the specific place where the sample was collected is Braser Canyon Crawford Mt, The free text description of the related URL is Smithsonian collections record for NMNH 122819-00 (MIN)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Utah, United States (http://geonames.org/5549030) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz, The additional information about the specific place where the sample was collected is Braser Canyon Crawford Mt, The free text description of the related URL is Smithsonian collections record for NMNH 122819-00 (MIN).': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Foote Lithium Co. Mine, Kings Mountain District, Cleveland Co., North Carolina, USA. Spodumene is listed by Mindat as being found at this locality. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-3280.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fairfieldite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Kings Mtn, The free text description of the related URL is Smithsonian collections record for NMNH 120988-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is North Carolina, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to Mindat locality record with a similar name, Foote Lithium Co. Mine, Kings Mountain District, Cleveland Co., North Carolina, USA. Spodumene is listed by Mindat as being found at this locality. Data manager assigned an arbitrary error radius of 1 km. URL: https://www.mindat.org/loc-3280.html., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Fairfieldite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Kings Mtn, The free text description of the related URL is Smithsonian collections record for NMNH 120988-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Lyon Co., Nevada, United States (http://geonames.org/5507669) based on district/county name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Churchill Buttes) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (84 km)., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Paratacamite, The additional information about the specific place where the sample was collected is Churchill Buttes, The free text description of the related URL is Smithsonian collections record for NMNH 106939-00 (MIN)': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Lyon Co., Nevada, United States (http://geonames.org/5507669) based on district/county name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Churchill Buttes) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The uncertainty radius represents the center-to-corner distance of the bounding box (84 km)., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Paratacamite, The additional information about the specific place where the sample was collected is Churchill Buttes, The free text description of the related URL is Smithsonian collections record for NMNH 106939-00 (MIN).': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is undeformed bt-granodiorite': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is Canada, The province where the sample was collected is Yukon, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is undeformed bt-granodiorite.': 'granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Soil subsample for archive, The name of institution, museum, or repository where the sample is currently stored is Soil Collection (NEONSOIC), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Soil subsample for archive, The name of institution, museum, or repository where the sample is currently stored is Soil Collection (NEONSOIC), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The country where the sample was collected is Germany, The province where the sample was collected is Saxony, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Freiberg, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is Germany, The province where the sample was collected is Saxony, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Freiberg, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral-halide/mineral',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Grund, Germany (n=18) and Iberg, Germany (n=15). The coordinates and error radius given here describe a circle encompassing the combination of sites matching both names with the smallest maximum distance between them (~19 km). Another place name mentioned in the EMu record (Harz Mountains) appears to describe a larger, less specific feature and was ignored when determining coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Baryte, The additional information about the specific place where the sample was collected is Harz Mtns.; Grund (Near), Iberg, The free text description of the related URL is Smithsonian collections record for NMNH B11427-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Grund, Germany (n=18) and Iberg, Germany (n=15). The coordinates and error radius given here describe a circle encompassing the combination of sites matching both names with the smallest maximum distance between them (~19 km). Another place name mentioned in the EMu record (Harz Mountains) appears to describe a larger, less specific feature and was ignored when determining coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Baryte, The additional information about the specific place where the sample was collected is Harz Mtns.; Grund (Near), Iberg, The free text description of the related URL is Smithsonian collections record for NMNH B11427-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The name of the specific place where the sample was collected is Marshall Gulch, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Environmental Science, University of Arizona, The taxonomy informal classification of sample is Schist Convergent Lysimeter 3, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is SoilMoisture suction cups lysimeter; Tesion Sampler (SoilMoisture Equipment Corp., Santa Barbara, CA), The method by which a sample was collected is TensionLys1, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO)': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Marshall Gulch, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Environmental Science, University of Arizona, The taxonomy informal classification of sample is Schist Convergent Lysimeter 3, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is SoilMoisture suction cups lysimeter; Tesion Sampler (SoilMoisture Equipment Corp., Santa Barbara, CA), The method by which a sample was collected is TensionLys1, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO).': 'liquid water/fluid material',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Powder, The taxonomy informal classification of sample is Basalt, The type of the primary location is Flood basalt, The method by which a sample was collected is Hammer': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Powder, The taxonomy informal classification of sample is Basalt, The type of the primary location is Flood basalt, The method by which a sample was collected is Hammer.': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Morocco, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mibladen, Béni Mellal-Khénifra, Morocco (http://geonames.org/6546859) based on feature name and country using the situate.py script. This was the most specific match possible based on information available in this record. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 25 km was assigned to all featureCode=ADM3 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Vanadinite, The additional information about the specific place where the sample was collected is Mibladen, The free text description of the related URL is Smithsonian collections record for NMNH B16020-00 (MIN)': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The country where the sample was collected is Morocco, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mibladen, Béni Mellal-Khénifra, Morocco (http://geonames.org/6546859) based on feature name and country using the situate.py script. This was the most specific match possible based on information available in this record. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 25 km was assigned to all featureCode=ADM3 records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Vanadinite, The additional information about the specific place where the sample was collected is Mibladen, The free text description of the related URL is Smithsonian collections record for NMNH B16020-00 (MIN).': 'natural solid material/mineral/mineral-phosphate, arsenate, or vanadate',\n"," 'The name of the specific place where the sample was collected is Mount Bigelow, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Environmental Science, University of Arizona, The taxonomy informal classification of sample is Mt Bigelow ZOB Pedon 2, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is Prenart Super Quartz suction cups soil water sampler, The method by which a sample was collected is Prenart, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO)': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Mount Bigelow, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Environmental Science, University of Arizona, The taxonomy informal classification of sample is Mt Bigelow ZOB Pedon 2, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is Prenart Super Quartz suction cups soil water sampler, The method by which a sample was collected is Prenart, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO).': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Virginia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Beverley Mill, Fauquier Co., Virginia, United States (http://geonames.org/4747208) based on feature name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tin ore, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Grinnell, The additional information about the specific place where the sample was collected is Beverly Mill, The free text description of the related URL is Smithsonian collections record for NMNH 117768-4 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Virginia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Beverley Mill, Fauquier Co., Virginia, United States (http://geonames.org/4747208) based on feature name, state/province, and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tin ore, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Grinnell, The additional information about the specific place where the sample was collected is Beverly Mill, The free text description of the related URL is Smithsonian collections record for NMNH 117768-4 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Tintic, Juab Co., Utah, United States, including http://geonames.org/5548522 (featureCode=?) and http://geonames.org/5548527 (featureCode=?). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing both localities. Matching Tintic required using a wildcard search. Other place names mentioned in the EMu record (\"100 Ft S Of Open Cut of Black Jack\" and \"N55e From Robinson Triangulation Point\") could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Lime, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is 100 Ft. S. Of Open Cut Of Black Jack Iron Mine, N55e From Robinson Triangulation Point, The free text description of the related URL is Smithsonian collections record for NMNH 53480-42 (PET)': 'natural solid material/mineral/mineral-oxide',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Tintic, Juab Co., Utah, United States, including http://geonames.org/5548522 (featureCode=?) and http://geonames.org/5548527 (featureCode=?). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing both localities. Matching Tintic required using a wildcard search. Other place names mentioned in the EMu record (\"100 Ft S Of Open Cut of Black Jack\" and \"N55e From Robinson Triangulation Point\") could not be matched and were ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Lime, The type of the primary location is Mining District, The additional information about the specific place where the sample was collected is 100 Ft. S. Of Open Cut Of Black Jack Iron Mine, N55e From Robinson Triangulation Point, The free text description of the related URL is Smithsonian collections record for NMNH 53480-42 (PET).': 'natural solid material/mineral/mineral-oxide',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene > Pliocene, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Carbonate rock with palagonite and glassy basalt, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 115251-8 (PET)': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The age of a sample as described by the stratigraphic era, period, state, etc. is Neogene > Pliocene, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Carbonate rock with palagonite and glassy basalt, The type of the primary location is Ocean, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Deep Sea Drilling Project, The free text description of the related URL is Smithsonian collections record for NMNH 115251-8 (PET).': 'sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Wyoming, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rhyolite, The additional information about the specific place where the sample was collected is E. side of canon of Gibbon River, back of hot springs, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Yellowstone Park Survey, The free text description of the related URL is Smithsonian collections record for NMNH 91322-1852 (PET)': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Wyoming, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Rhyolite, The additional information about the specific place where the sample was collected is E. side of canon of Gibbon River, back of hot springs, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Yellowstone Park Survey, The free text description of the related URL is Smithsonian collections record for NMNH 91322-1852 (PET).': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is nw120, The object type of sample indicates that this sample is core that can be positioned on the surface in the same way that it was arranged in the borehole before extraction, The free text description of the location is N side of NW spur on Neuburg Peak, The taxonomy formal categorization of sample is rock, The taxonomy informal classification of sample is Gabbronorite': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is nw120, The object type of sample indicates that this sample is core that can be positioned on the surface in the same way that it was arranged in the borehole before extraction, The free text description of the location is N side of NW spur on Neuburg Peak, The taxonomy formal categorization of sample is rock, The taxonomy informal classification of sample is Gabbronorite.': 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 3 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL': 'natural solid material/soil',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Specimen, The detailed description of the sample is Interchangeable soil subsample 3 for microbial sequencing analyses, The name of institution, museum, or repository where the sample is currently stored is Soil Microbe Collection (Bulk Subsamples) (NEONSOMC-BS), The additional information about the method by which a sample was collected is PreservedSpecimen, The method by which a sample was collected is Manual, The free text description of the related URL is Source Reference URL.': 'natural solid material/soil',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Michoacán, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Uruapan, Uruapan, Michoacán, Mexico (http://geonames.org/3980760) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Michoacan-Guanajuato and Paricutin Volcanic Field) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Uruapan, The taxonomy informal classification of sample is Basalt, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Near Uruapan, The free text description of the related URL is Smithsonian collections record for NMNH 109354 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is Mexico, The province where the sample was collected is Michoacán, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Uruapan, Uruapan, Michoacán, Mexico (http://geonames.org/3980760) based on municipality name, state/province, and country using the situate.py script. Other place names mentioned in the EMu record (Michoacan-Guanajuato and Paricutin Volcanic Field) could not be matched and were ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Uruapan, The taxonomy informal classification of sample is Basalt, The type of the primary location is Volcano, The additional information about the specific place where the sample was collected is Near Uruapan, The free text description of the related URL is Smithsonian collections record for NMNH 109354 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Bragdon (historical), Trinity Co., California, United States (http://geonames.org/5560287) based on municipality name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"2 Mi W Of Hirz On John\\'s Road\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Bragdon, The taxonomy informal classification of sample is Conglomerate, The additional information about the specific place where the sample was collected is 2 Mi W Of Hirz On John\\'s Road, The free text description of the related URL is Smithsonian collections record for NMNH 77082-29 (PET)': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Bragdon (historical), Trinity Co., California, United States (http://geonames.org/5560287) based on municipality name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (\"2 Mi W Of Hirz On John\\'s Road\") could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Bragdon, The taxonomy informal classification of sample is Conglomerate, The additional information about the specific place where the sample was collected is 2 Mi W Of Hirz On John\\'s Road, The free text description of the related URL is Smithsonian collections record for NMNH 77082-29 (PET).': 'sediment/generic conglomerate/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mesozoic>Jurassic>Late Jurassic, The free text description of the location is Mariana Trench. Original: 11 29.7N, 142 11.6E to 11 30.1N, 142 11.2E. Depth: -7800 to -7680., The detailed description of the sample is Igneous>Plutonic>Mafic Intrusive>Gabbro, The free text to add any comments pertaining to the sample is Two small cut samples measuring ~6x3.5x3 cm and ~5x3x2 from the cut faces. Black crystalline interior. Billet pieces in a bag. Original label: \"28 November, 1980.\"': 'sediment/igneous rock/natural solid material/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mesozoic>Jurassic>Late Jurassic, The free text description of the location is Mariana Trench. Original: 11 29.7N, 142 11.6E to 11 30.1N, 142 11.2E. Depth: -7800 to -7680., The detailed description of the sample is Igneous>Plutonic>Mafic Intrusive>Gabbro, The free text to add any comments pertaining to the sample is Two small cut samples measuring ~6x3.5x3 cm and ~5x3x2 from the cut faces. Black crystalline interior. Billet pieces in a bag. Original label: \"28 November, 1980.\".': 'sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Negaunee, Marquette Co., Michigan, United States (http://geonames.org/5003294) based on municipality name, county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Marquette Iron Range) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Negaunee, The taxonomy informal classification of sample is Iron-formation with carbonate and magnetite, The type of the primary location is Mining District, The free text description of the related URL is Smithsonian collections record for NMNH 113552-1777 (PET)': 'sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Michigan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Negaunee, Marquette Co., Michigan, United States (http://geonames.org/5003294) based on municipality name, county, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Marquette Iron Range) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all municipality records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Negaunee, The taxonomy informal classification of sample is Iron-formation with carbonate and magnetite, The type of the primary location is Mining District, The free text description of the related URL is Smithsonian collections record for NMNH 113552-1777 (PET).': 'sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Penn State Earth and Mineral Sciences Museum, The taxonomy informal classification of sample is fragments of Accendi di Pipa conglomerate matrix, The name or identifier of the field program (cruise or expedition), during which the sample was collected is E-FIRE': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is Penn State Earth and Mineral Sciences Museum, The taxonomy informal classification of sample is fragments of Accendi di Pipa conglomerate matrix, The name or identifier of the field program (cruise or expedition), during which the sample was collected is E-FIRE.': 'clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The free text description of the related URL is Smithsonian collections record for NMNH 115193-99 (PET)': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The type of platform for the cruise is Ship, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Glassy basalt, The type of the primary location is Ocean, The free text description of the related URL is Smithsonian collections record for NMNH 115193-99 (PET).': 'basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Cape Lloyd, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cretaceous, The detailed description of the sample is Green Phyllite, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Phyllite, The free text description of the related URL is None': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The name of the specific place where the sample was collected is Cape Lloyd, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cretaceous, The detailed description of the sample is Green Phyllite, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Phyllite, The free text description of the related URL is None.': 'sediment/natural solid material/metamorphic rock/rock',\n"," 'The name of the specific place where the sample was collected is TG Well PA-7A, The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is loose, coarse, unconsolidated material suspended in drilling fluid, The type of platform for the cruise is truck-mounted drill rig, The age of a sample as described by the stratigraphic era, period, state, etc. is Plio-Quaternary, The free text description of the location is western Black Rock Desert, The free text to describe the collection purpose of the sample is Thermal conductivity, The name of institution, museum, or repository where the sample is currently stored is Utah Core Research Center, The free text to add any comments pertaining to the sample is Thermal-gradient borehole, The taxonomy informal classification of sample is clay, pea gravel, The type of the primary location is Basin & Range desert basin, The additional information about the method by which a sample was collected is Cutting sample collected from drill rig shaker table, The additional information about the specific place where the sample was collected is Utah School Trust Land, The method by which a sample was collected is Shaker table': 'clastic sediment/sediment/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is TG Well PA-7A, The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is loose, coarse, unconsolidated material suspended in drilling fluid, The type of platform for the cruise is truck-mounted drill rig, The age of a sample as described by the stratigraphic era, period, state, etc. is Plio-Quaternary, The free text description of the location is western Black Rock Desert, The free text to describe the collection purpose of the sample is Thermal conductivity, The name of institution, museum, or repository where the sample is currently stored is Utah Core Research Center, The free text to add any comments pertaining to the sample is Thermal-gradient borehole, The taxonomy informal classification of sample is clay, pea gravel, The type of the primary location is Basin & Range desert basin, The additional information about the method by which a sample was collected is Cutting sample collected from drill rig shaker table, The additional information about the specific place where the sample was collected is Utah School Trust Land, The method by which a sample was collected is Shaker table.': 'clastic sediment/sediment/natural solid material/rock',\n"," 'The country where the sample was collected is Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Aguascalientes, The taxonomy informal classification of sample is Copper ore, The free text description of the related URL is Smithsonian collections record for NMNH 57099-3 (PET)': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is Mexico, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Aguascalientes, The taxonomy informal classification of sample is Copper ore, The free text description of the related URL is Smithsonian collections record for NMNH 57099-3 (PET).': 'sediment/natural solid material/metasomatic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Eureka/Eureka (historical), Nevada, United States (n=3) and Hoosac Mountain, Eureka Co., Nevada, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~8 km). Another place name mentioned in the EMu record (\"(W Slope Of 2nd Ridge Se Of)\") could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Eureka, The taxonomy informal classification of sample is Rhyolite, The free text description of the related URL is Smithsonian collections record for NMNH 23217 (PET)': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Nevada, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Eureka/Eureka (historical), Nevada, United States (n=3) and Hoosac Mountain, Eureka Co., Nevada, United States (n=1). The coordinates and uncertainty given here describe a circle encompassing the combination of localities matching both place names with the smallest maximum distance between them (~8 km). Another place name mentioned in the EMu record (\"(W Slope Of 2nd Ridge Se Of)\") could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Eureka, The taxonomy informal classification of sample is Rhyolite, The free text description of the related URL is Smithsonian collections record for NMNH 23217 (PET).': 'sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Sterling Hill Mine (MRDS ID: W031395, Deposit ID: 10080281) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gypsum, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Ogdensburg, The free text description of the related URL is Smithsonian collections record for NMNH 164123-00 (MIN)': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is New Jersey, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Coordinates for Sterling Hill Mine (MRDS ID: W031395, Deposit ID: 10080281) are from the USGS Mineral Resources Data System dataset (downloaded 2015-08-13). Record matched uniquely on mine name, country, state, and county., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Gypsum, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is Ogdensburg, The free text description of the related URL is Smithsonian collections record for NMNH 164123-00 (MIN).': 'natural solid material/mineral-sulfate, selenate, or tellurate/mineral',\n"," 'The country where the sample was collected is Kazakhstan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Dzhezkazgan, Karaganda, Kazakhstan (http://geonames.org/1516589) based on feature name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tennantite with calcite, The additional information about the specific place where the sample was collected is Dzhezkazgan, The free text description of the related URL is Smithsonian collections record for NMNH R19776-00 (MIN)': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Kazakhstan, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Dzhezkazgan, Karaganda, Kazakhstan (http://geonames.org/1516589) based on feature name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=PPL records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Tennantite with calcite, The additional information about the specific place where the sample was collected is Dzhezkazgan, The free text description of the related URL is Smithsonian collections record for NMNH R19776-00 (MIN).': 'natural solid material/mineral/mineral-sulfide or sulfosalt',\n"," 'The country where the sample was collected is Ecuador, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Cerro Brujo, Cantón San Cristóbal, Galápagos, Ecuador (http://geonames.org/3660081) based on feature name and country using the situate.py script. Another place name mentioned in the EMu record (San Cristobal) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=MT records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Guano, The type of the primary location is Island, The free text description of the related URL is Smithsonian collections record for NMNH 117387-21 (PET)': 'sediment/natural solid material/biogenic sediment/rock',\n"," 'The country where the sample was collected is Ecuador, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Cerro Brujo, Cantón San Cristóbal, Galápagos, Ecuador (http://geonames.org/3660081) based on feature name and country using the situate.py script. Another place name mentioned in the EMu record (San Cristobal) appears to describe a larger, less specific feature and was ignored when determining coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 10 km was assigned to all featureCode=MT records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Guano, The type of the primary location is Island, The free text description of the related URL is Smithsonian collections record for NMNH 117387-21 (PET).': 'sediment/natural solid material/biogenic sediment/rock',\n"," 'The name of the specific place where the sample was collected is TG Well Rozel-1, The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is loose, coarse, unconsolidated material suspended in drilling fluid, The type of platform for the cruise is truck-mounted drill rig, The age of a sample as described by the stratigraphic era, period, state, etc. is Plio-Quaternary, The free text description of the location is Rozel Flats, The free text to describe the collection purpose of the sample is Thermal conductivity, The name of institution, museum, or repository where the sample is currently stored is Utah Core Research Center, The free text to add any comments pertaining to the sample is Thermal-gradient borehole, The taxonomy informal classification of sample is clay, silt, sand, gravel, The type of the primary location is Basin & Range desert basin, The additional information about the method by which a sample was collected is Cutting sample collected from drill rig shaker table, The additional information about the specific place where the sample was collected is Utah School Trust Land, The method by which a sample was collected is Shaker table': 'clastic sediment/sediment/natural solid material/rock',\n"," 'The name of the specific place where the sample was collected is TG Well Rozel-1, The country where the sample was collected is United States, The province where the sample was collected is Utah, The object type of sample indicates that this sample is loose, coarse, unconsolidated material suspended in drilling fluid, The type of platform for the cruise is truck-mounted drill rig, The age of a sample as described by the stratigraphic era, period, state, etc. is Plio-Quaternary, The free text description of the location is Rozel Flats, The free text to describe the collection purpose of the sample is Thermal conductivity, The name of institution, museum, or repository where the sample is currently stored is Utah Core Research Center, The free text to add any comments pertaining to the sample is Thermal-gradient borehole, The taxonomy informal classification of sample is clay, silt, sand, gravel, The type of the primary location is Basin & Range desert basin, The additional information about the method by which a sample was collected is Cutting sample collected from drill rig shaker table, The additional information about the specific place where the sample was collected is Utah School Trust Land, The method by which a sample was collected is Shaker table.': 'clastic sediment/sediment/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Minnesota, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mesabi Range, Saint Louis Co., Minnesota, United States (http://geonames.org/5037332) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Biwabik Iron Formation) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Slaty taconite with fayalite and magnetite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Biwabik Iron Formation, The additional information about the specific place where the sample was collected is Locality Key: Biwabik Iron Formation, The free text description of the related URL is Smithsonian collections record for NMNH 112588-33B (PET)': 'sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Minnesota, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Mesabi Range, Saint Louis Co., Minnesota, United States (http://geonames.org/5037332) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Biwabik Iron Formation) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Slaty taconite with fayalite and magnetite, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Biwabik Iron Formation, The additional information about the specific place where the sample was collected is Locality Key: Biwabik Iron Formation, The free text description of the related URL is Smithsonian collections record for NMNH 112588-33B (PET).': 'sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Otting, The taxonomy informal classification of sample is Glass suevite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is W Side Of Quarry, The free text description of the related URL is Smithsonian collections record for NMNH 116515-111 (PET)': 'sediment/impact generated material/natural solid material/rock',\n"," 'The country where the sample was collected is Germany, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Otting, The taxonomy informal classification of sample is Glass suevite, The type of the primary location is Mine, The additional information about the specific place where the sample was collected is W Side Of Quarry, The free text description of the related URL is Smithsonian collections record for NMNH 116515-111 (PET).': 'sediment/impact generated material/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mississippian to Pennsylvanian, The free text description of the location is Matched to the GeoNames record for Horseshoe Hills, Gallatin Co., Montana, United States (http://geonames.org/5658117) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Horseshoe Hills Area) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartzite with sandstone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Quadrant, The additional information about the specific place where the sample was collected is Horseshoe Hills Area, The free text description of the related URL is Smithsonian collections record for NMNH 117787-15 (PET)': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Mississippian to Pennsylvanian, The free text description of the location is Matched to the GeoNames record for Horseshoe Hills, Gallatin Co., Montana, United States (http://geonames.org/5658117) based on feature name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Horseshoe Hills Area) could not be matched and was ignored when determining the coordinates given here. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 500 km was assigned to all featureCode=MTS records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartzite with sandstone, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Quadrant, The additional information about the specific place where the sample was collected is Horseshoe Hills Area, The free text description of the related URL is Smithsonian collections record for NMNH 117787-15 (PET).': 'sediment/natural solid material/generic sandstone/sedimentary rock/rock',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is Scotland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Isle of Skye, Highland, Scotland, United Kingdom (http://geonames.org/2637681) based on island name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Near Meall Tuath) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diabase, The type of the primary location is Island, The additional information about the specific place where the sample was collected is near Meall Tuath, The free text description of the related URL is Smithsonian collections record for NMNH 118158-56 (PET)': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is Scotland, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Isle of Skye, Highland, Scotland, United Kingdom (http://geonames.org/2637681) based on island name, state/province, and country using the situate.py script. Another place name mentioned in the EMu record (Near Meall Tuath) could not be matched and was ignored when determining the coordinates given here. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. An arbitrary error radius of 100 km was assigned to all island records matched using the script., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Diabase, The type of the primary location is Island, The additional information about the specific place where the sample was collected is near Meall Tuath, The free text description of the related URL is Smithsonian collections record for NMNH 118158-56 (PET).': 'sediment/igneous rock/natural solid material/doleritic rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Volcanic glass, The additional information about the specific place where the sample was collected is \"A169-11. Spatter erupted from vent C on February 22.\" (Swanson et al., 1976), The free text description of the related URL is Smithsonian collections record for NMNH 118325-11 (PET)': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Hawaii, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Volcanic glass, The additional information about the specific place where the sample was collected is \"A169-11. Spatter erupted from vent C on February 22.\" (Swanson et al., 1976), The free text description of the related URL is Smithsonian collections record for NMNH 118325-11 (PET).': 'sediment/igneous rock/natural solid material/glass rich igneous rock/rock',\n"," 'The country where the sample was collected is Germany, The province where the sample was collected is Baden-Württemberg, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Sasbach, Baden-Württemberg, Germany, including http://geonames.org/2841377, http://geonames.org/2841378, http://geonames.org/2841379, http://geonames.org/2841380, http://geonames.org/2841381, and http://geonames.org/6555799. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing all 6 localities., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Aragonite with calcite, The additional information about the specific place where the sample was collected is Kaiserstuhl, Sasbach, The free text description of the related URL is Smithsonian collections record for NMNH B7970-00 (MIN)': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is Germany, The province where the sample was collected is Baden-Württemberg, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Sasbach, Baden-Württemberg, Germany, including http://geonames.org/2841377, http://geonames.org/2841378, http://geonames.org/2841379, http://geonames.org/2841380, http://geonames.org/2841381, and http://geonames.org/6555799. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing all 6 localities., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Aragonite with calcite, The additional information about the specific place where the sample was collected is Kaiserstuhl, Sasbach, The free text description of the related URL is Smithsonian collections record for NMNH B7970-00 (MIN).': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Pierre Lakes, Pitkin Co., Colorado, United States (http://geonames.org/5434514) based on municipality/feature name, state/province, and country using the situate.py script. Matching Pierre required using a wildcard search. This was the most specific match possible based on information available in this record. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all records matched using wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Pierre, The taxonomy informal classification of sample is Shale, The free text description of the related URL is Smithsonian collections record for NMNH 76689-48 (PET)': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Colorado, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Pierre Lakes, Pitkin Co., Colorado, United States (http://geonames.org/5434514) based on municipality/feature name, state/province, and country using the situate.py script. Matching Pierre required using a wildcard search. This was the most specific match possible based on information available in this record. Point coordinates were rounded to 2 decimal places from the values given by GeoNames. A minimum uncertainty of 10 km was assigned to all records matched using wildcards., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Pierre, The taxonomy informal classification of sample is Shale, The free text description of the related URL is Smithsonian collections record for NMNH 76689-48 (PET).': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Entered locality string, \"9 Mi E Of Porterville,\" into GeoLocate using default settings., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Porterville, The taxonomy informal classification of sample is Magnesite, The additional information about the specific place where the sample was collected is Locality Key: Tulare Co - Porterville; 9 Mi E Of Porterville, The free text description of the related URL is Smithsonian collections record for NMNH 90424-6054 (PET)': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is California, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Entered locality string, \"9 Mi E Of Porterville,\" into GeoLocate using default settings., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Porterville, The taxonomy informal classification of sample is Magnesite, The additional information about the specific place where the sample was collected is Locality Key: Tulare Co - Porterville; 9 Mi E Of Porterville, The free text description of the related URL is Smithsonian collections record for NMNH 90424-6054 (PET).': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The age of a sample as described by the stratigraphic era, period, state, etc. is Cambrian, The name of institution, museum, or repository where the sample is currently stored is Polar Rock Repository, Byrd Polar and Climate Research Center, Ohio State University, The taxonomy informal classification of sample is Siltstone, The free text description of the related URL is None.': 'sediment/generic mudstone/natural solid material/sedimentary rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arkansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Murfreesboro, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'natural solid material/mineral/mineral-native element',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Arkansas, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Murfreesboro, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'natural solid material/mineral/mineral-native element',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is geochemistry, The name of institution, museum, or repository where the sample is currently stored is USGS - Anchorage, AK, The taxonomy informal classification of sample is volcanic, The method by which a sample was collected is outcrop': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text to describe the collection purpose of the sample is geochemistry, The name of institution, museum, or repository where the sample is currently stored is USGS - Anchorage, AK, The taxonomy informal classification of sample is volcanic, The method by which a sample was collected is outcrop.': 'sediment/igneous rock/natural solid material/fine grained igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Mount Bigelow, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Environmental Science, University of Arizona, The taxonomy informal classification of sample is Mt Bigelow ZOB Pedon 3, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is Prenart Super Quartz suction cups soil water sampler, The method by which a sample was collected is Prenart, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO)': 'liquid water/fluid material',\n"," 'The name of the specific place where the sample was collected is Mount Bigelow, The country where the sample was collected is United States, The province where the sample was collected is Arizona, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is Soil solution, The name of institution, museum, or repository where the sample is currently stored is Department of Environmental Science, University of Arizona, The taxonomy informal classification of sample is Mt Bigelow ZOB Pedon 3, The type of the primary location is Mountain, The additional information about the method by which a sample was collected is Prenart Super Quartz suction cups soil water sampler, The method by which a sample was collected is Prenart, The name or identifier of the field program (cruise or expedition), during which the sample was collected is Catalina-Jemez Critical Zone Observatory (CZO).': 'liquid water/fluid material',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Idaho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Spring Mt, Lemhi Co., Idaho, United States, including http://geonames.org/5606987 (featureCode=MT) and http://geonames.org/5608538 (featureCode=MT). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing both localities. Matching Spring Mt required using a wildcard search. Another place name mentioned in the EMu record (Quartzite Gulch) could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-diorite with quartz, The additional information about the specific place where the sample was collected is Locality Key: Spring Mt, The free text description of the related URL is Smithsonian collections record for NMNH 113655-140 (PET)': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Idaho, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Spring Mt, Lemhi Co., Idaho, United States, including http://geonames.org/5606987 (featureCode=MT) and http://geonames.org/5608538 (featureCode=MT). The situate.py script was unable to distinguish between the matched localities, and the coordinates and error radius given here describe a circle encompassing both localities. Matching Spring Mt required using a wildcard search. Another place name mentioned in the EMu record (Quartzite Gulch) could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The taxonomy informal classification of sample is Quartz-diorite with quartz, The additional information about the specific place where the sample was collected is Locality Key: Spring Mt, The free text description of the related URL is Smithsonian collections record for NMNH 113655-140 (PET).': 'dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is England, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Egremont, Cumbria, England, United Kingdom, including http://geonames.org/2650174 and http://geonames.org/7300135. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Calcite, The additional information about the specific place where the sample was collected is Egremont, The free text description of the related URL is Smithsonian collections record for NMNH B9455-00 (MIN)': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The country where the sample was collected is United Kingdom, The province where the sample was collected is England, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple records from GeoNames matched the locality Egremont, Cumbria, England, United Kingdom, including http://geonames.org/2650174 and http://geonames.org/7300135. The situate.py script was unable to distinguish between these localities, and the coordinates and error radius given here describe a circle encompassing both localities. This was the most specific place name found in this record., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Calcite, The additional information about the specific place where the sample was collected is Egremont, The free text description of the related URL is Smithsonian collections record for NMNH B9455-00 (MIN).': 'natural solid material/mineral-carbonate or nitrate/mineral',\n"," 'The name of the specific place where the sample was collected is Eromanga Basin, The country where the sample was collected is Australia, The object type of sample indicates that this sample is long cylindrical cores, The free text description of the location is GSQ Longreach 1B, 1251.87-1252.37, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is tuff, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Jochmus Formation/Edie Tuff, The additional information about the specific place where the sample was collected is GSQ Longreach 1B': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Eromanga Basin, The country where the sample was collected is Australia, The object type of sample indicates that this sample is long cylindrical cores, The free text description of the location is GSQ Longreach 1B, 1251.87-1252.37, The name of institution, museum, or repository where the sample is currently stored is Boise State University, The taxonomy informal classification of sample is tuff, A body of rock established as a distinct entity in the classification of the Earth’s rocks is Jochmus Formation/Edie Tuff, The additional information about the specific place where the sample was collected is GSQ Longreach 1B.': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Manjaka, The country where the sample was collected is Madagascar, The province where the sample was collected is Antananarivo, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Sahatany Valley, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual': 'mineral-borate/natural solid material/mineral',\n"," 'The name of the specific place where the sample was collected is Manjaka, The country where the sample was collected is Madagascar, The province where the sample was collected is Antananarivo, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The detailed description of the sample is , The name of institution, museum, or repository where the sample is currently stored is Harvard University, Department of Earth and Planetary Sciences (EPS), The city where the sample was collected is Sahatany Valley, The name of institution, museum, or repository where the sample was originally stored is Department of Earth & Planetary Sciences, Harvard University, Cambridge, Massachusetts, 02138, USA, The method by which a sample was collected is Manual.': 'mineral-borate/natural solid material/mineral',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Lincoln, Lewis and Clark Co., Montana, United States (n=1) and Crater Mountain, Lewis and Clark Co., Montana, United States (n=1). The coordinates and error radius given here describe a circle encompassing the combination of sites matching both names with the smallest maximum distance between them (~13 km). Another place name mentioned in the EMu record (South Side of Crater Mountain) could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Lincoln, The taxonomy informal classification of sample is Welded tuff, The additional information about the specific place where the sample was collected is South side of Crater Mountain, The free text description of the related URL is Smithsonian collections record for NMNH 117752-19 (PET)': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Montana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Multiple features of similar apparent specificity were matched to GeoNames records using the situate.py script, including Lincoln, Lewis and Clark Co., Montana, United States (n=1) and Crater Mountain, Lewis and Clark Co., Montana, United States (n=1). The coordinates and error radius given here describe a circle encompassing the combination of sites matching both names with the smallest maximum distance between them (~13 km). Another place name mentioned in the EMu record (South Side of Crater Mountain) could not be matched and was ignored when determining the coordinates given here., The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Lincoln, The taxonomy informal classification of sample is Welded tuff, The additional information about the specific place where the sample was collected is South side of Crater Mountain, The free text description of the related URL is Smithsonian collections record for NMNH 117752-19 (PET).': 'sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock',\n"," 'The name of the specific place where the sample was collected is Indiana Geological Survey Drill Hole 276, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Powder, The free text description of the location is T15N R5W Sec. 9, The free text to describe the collection purpose of the sample is Geochemical analysis, The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN': 'natural solid material/sediment/rock',\n"," 'The name of the specific place where the sample was collected is Indiana Geological Survey Drill Hole 276, The country where the sample was collected is United States, The province where the sample was collected is Indiana, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid. The more descriptive object type is Powder, The free text description of the location is T15N R5W Sec. 9, The free text to describe the collection purpose of the sample is Geochemical analysis, The name of institution, museum, or repository where the sample is currently stored is Indiana Geological and Water Survey, Indiana University; Bloomington, IN.': 'natural solid material/sediment/rock',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Northern Cape Province, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Postmasburg, The taxonomy informal classification of sample is Tincalconite, The additional information about the specific place where the sample was collected is Unknown, The free text description of the related URL is Smithsonian collections record for NMNH S505-00 (MIN)': 'mineral-borate/natural solid material/mineral',\n"," 'The country where the sample was collected is South Africa, The province where the sample was collected is Northern Cape Province, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The city where the sample was collected is Postmasburg, The taxonomy informal classification of sample is Tincalconite, The additional information about the specific place where the sample was collected is Unknown, The free text description of the related URL is Smithsonian collections record for NMNH S505-00 (MIN).': 'mineral-borate/natural solid material/mineral',\n"," \"The country where the sample was collected is South Africa, The province where the sample was collected is Limpopo, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Greater Tubatse, The taxonomy informal classification of sample is Norite, The additional information about the specific place where the sample was collected is Winterveld; 0-26.5' below Steelpoort seam, The free text description of the related URL is Smithsonian collections record for NMNH 118329-251 (PET)\": 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," \"The country where the sample was collected is South Africa, The province where the sample was collected is Limpopo, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The name of institution, museum, or repository where the sample is currently stored is National Rock & Ore Collection, Smithsonian Institution, The city where the sample was collected is Greater Tubatse, The taxonomy informal classification of sample is Norite, The additional information about the specific place where the sample was collected is Winterveld; 0-26.5' below Steelpoort seam, The free text description of the related URL is Smithsonian collections record for NMNH 118329-251 (PET).\": 'gabbroid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Georgia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Georgia, United States (http://geonames.org/4197000) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Bauxite, The free text description of the related URL is Smithsonian collections record for NMNH R2195-00 (MIN)': 'sediment/natural solid material/residual material/rock',\n"," 'The country where the sample was collected is United States, The province where the sample was collected is Georgia, The object type of sample indicates that this sample is a sample that is an individual unit, including rock hand samples, a biological specimen, or a bottle of fluid, The free text description of the location is Matched to the GeoNames record for Georgia, United States (http://geonames.org/4197000) based on state/province name and country using the situate.py script. This was the most specific match possible based on information available in this record. Bounding box coordinates were rounded to 2 decimal places from the values given by GeoNames. The error radius represents the center-to-corner distance of the bounding box., The name of institution, museum, or repository where the sample is currently stored is National Mineral Collection, Smithsonian Institution, The taxonomy informal classification of sample is Bauxite, The free text description of the related URL is Smithsonian collections record for NMNH R2195-00 (MIN).': 'sediment/natural solid material/residual material/rock',\n"," ...}"]},"metadata":{},"execution_count":40}]},{"cell_type":"code","source":["import pickle\n","# corpus_embeddings = sentbert_model.encode(train_descriptions, convert_to_tensor=True)\n","# with open('corpus_sentbert_embeddings_all.pickle', 'wb') as f:\n","# pickle.dump(corpus_embeddings, f)\n","corpus_embeddings = pickle.load(open('corpus_sentbert_embeddings_all.pickle', 'rb'))"],"metadata":{"id":"j1mFuS2ye_jZ","executionInfo":{"status":"ok","timestamp":1711574517578,"user_tz":420,"elapsed":3325,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":41,"outputs":[]},{"cell_type":"markdown","source":["# Set Up - Prompt"],"metadata":{"id":"Xw1aiO9g8OsU"}},{"cell_type":"code","source":["sys_prompt = \"A chat.\"\n","prompt = \"Tell me a joke.\"\n","\n","prefix = \"<|im_start|>\"\n","suffix = \"<|im_end|>\\n\"\n","sys_format = prefix + \"system\\n\" + sys_prompt + suffix\n","user_format = prefix + \"user\\n\" + prompt + suffix\n","assistant_format = prefix + \"assistant\\n\"\n","input_text = sys_format + user_format + assistant_format\n","\n","generation_config = GenerationConfig(\n"," max_length=256, temperature=1.1, top_p=0.95, repetition_penalty=1.0,\n"," do_sample=True, use_cache=True,\n"," eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id,\n"," transformers_version=\"4.34.0.dev0\")\n"],"metadata":{"id":"c_b6fRu78RlA","executionInfo":{"status":"ok","timestamp":1711574517578,"user_tz":420,"elapsed":6,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":42,"outputs":[]},{"cell_type":"code","source":["def generate_answer(system_prompt, prompt):\n"," sys_format = prefix + \"system\\n\" + sys_prompt + suffix\n"," user_format = prefix + \"user\\n\" + prompt + suffix\n"," assistant_format = prefix + \"assistant\\n\"\n"," input_text = sys_format + user_format + assistant_format\n","\n"," generation_config = GenerationConfig(\n"," max_new_tokens=100, temperature=0.0001, top_p=0.95, repetition_penalty=1.0,\n"," do_sample=True, use_cache=True,\n"," eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id,\n"," transformers_version=\"4.34.0.dev0\")\n","\n"," inputs = tokenizer(input_text, return_tensors=\"pt\", return_attention_mask=True).to(device)\n"," outputs = model.generate(**inputs, generation_config=generation_config)\n","\n"," text = tokenizer.batch_decode(outputs)[0]\n"," text = text.split(\"<|im_end|>\")[2][len(\"<|im_start|> assistant\"):].strip(\"\\n\")\n"," print(text)\n","\n"," return text"],"metadata":{"id":"nq7k-c7hDsQF","executionInfo":{"status":"ok","timestamp":1711574517578,"user_tz":420,"elapsed":4,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":43,"outputs":[]},{"cell_type":"code","source":["# Summarize And Explain\n","def add_taxonomy_description(description, description_to_taxonomy):\n"," # add taxonomy description if exists and see if that helps\n"," if description in description_to_taxonomy and type(description_to_taxonomy[description]) == str:\n"," term = description_to_taxonomy[description]\n"," system_prompt = \"You are a scientist. User will give you a geology term. You must generate a short description of the term.\"\n"," instruction = f\"\"\"You are a scientist. Your task is to give a brief one sentence description of material the geology term it consists.\n"," ###\n"," <<<\n"," Term:{term}\n"," >>>\n"," \"\"\"\n"," return generate_answer(system_prompt, instruction).strip(\"\\n\")\n"," else:\n"," return None\n","\n","def add_collectionMethod_description(description, description_to_cm):\n"," # add collectionMethod description if exists and see if that helps\n"," if description in description_to_cm and type( description_to_cm[description]) == str:\n"," term = description_to_cm[description]\n"," system_prompt = \"You are a scientist. User will give you a term that indicates how it collected a sample from nature. You must generate a short description of the term.\"\n"," instruction = f\"\"\"You are a scientist. Your task is to give a brief one sentence description of the given collection method of the sample.\n"," ###\n"," <<<\n"," Term:{term}\n"," >>>\n"," \"\"\"\n"," return generate_answer(system_prompt, instruction).strip(\"\\n\")\n"," else:\n"," return None\n","def add_description_description(description, description_to_desc):\n"," # add collectionMethod description if exists and see if that helps\n"," if description in description_to_desc and type( description_to_desc[description]) == str:\n"," term = description_to_desc[description]\n"," system_prompt = \"You are a scientist. User will give you a term that indicates a description of the sample. You must generate a short explanation of that description.\"\n"," instruction = f\"\"\"You are a scientist. Your task is to give a one sentence explanation of the description of the sample.\n"," ###\n"," <<<\n"," Description:{term}\n"," >>>\n"," \"\"\"\n"," else:\n"," return None\n","\n","\n","def generate_summary(sample_description):\n"," system_prompt = \"You are a scientist. User will give you a description of a material sample it sampled from the nature. You must generate a summarized description of the sample.\"\n"," prompt = f\"\"\"You are a scientist. Your task is to give a brief one sentence summary of the given description, focusing on the parts that is helpful in determining the type of material that constitutes it.\n"," Include important fields in the summary such as the taxonomy informal classification, collection method, and values that determine the material type.\n"," ###\n"," <<<\n"," Description: {sample_description}\n"," >>>\n"," \"\"\"\n","\n"," return generate_answer(system_prompt, prompt)\n","\n","def generate_summary_and_explanation(sample_description):\n"," summary = generate_summary(sample_description)\n","\n"," taxonomy_rich_description = add_taxonomy_description(sample_description, desc_to_tax_map)\n"," if taxonomy_rich_description:\n"," summary += taxonomy_rich_description.strip(\"\\n\")\n"," collectionMethodDesc = add_collectionMethod_description(sample_description, desc_to_cm_map)\n"," if collectionMethodDesc:\n"," summary += collectionMethodDesc.strip(\"\\n\")\n"," description_rich_description = add_description_description(sample_description, desc_to_desc_map)\n"," if description_rich_description:\n"," summary += description_rich_description.strip(\"\\n\")\n"," print(\"Enriched summary : \", summary)\n"," return summary"],"metadata":{"id":"G3LpcBwxAelY","executionInfo":{"status":"ok","timestamp":1711574517578,"user_tz":420,"elapsed":4,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":44,"outputs":[]},{"cell_type":"code","source":["def generate_prediction(material_types, examples, sample_description):\n"," system_prompt = \"You are a scientist. User will give you a task. You must generate an answer to the task.\"\n"," instruction = f\"\"\"\n"," You are a scientist. Your task is to analyze the description of a material sample and determine the kind of material that constitutes it after <<<>>> into one of the predefined material types: \\n\n"," {material_types} \\n\n"," You will only respond with the material type. Do not include the word \"Material type\". Do not provide explanations or notes.\n"," ###\\n\n"," Here are some examples:\n"," {examples}\n"," ### \\n\n"," <<<\n"," Description: {sample_description}\n"," Material type:\n"," >>>\n"," \"\"\"\n"," return generate_answer(system_prompt, instruction).strip(\"\\n\")"],"metadata":{"id":"KBpm1jkvFDoZ","executionInfo":{"status":"ok","timestamp":1711574517578,"user_tz":420,"elapsed":4,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":45,"outputs":[]},{"cell_type":"code","source":["import nltk\n","nltk.download('punkt')\n","from nltk.tokenize import word_tokenize\n","from nltk.util import ngrams\n","from nltk.metrics import jaccard_distance\n","\n","def jaccard_similarity(set1, set2):\n"," \"\"\"\n"," Calculate Jaccard similarity between two sets.\n"," \"\"\"\n"," intersection = len(set1.intersection(set2))\n"," union = len(set1.union(set2))\n"," return intersection / union\n","\n","def tokenize_text(text):\n"," \"\"\"\n"," Tokenize input text.\n"," \"\"\"\n"," return set(word_tokenize(text.lower()))\n","\n","def jaccard_highest_score(sample_text, set_of_texts):\n"," \"\"\"\n"," Find the text with the highest Jaccard similarity score compared to the sample text.\n"," \"\"\"\n"," sample_tokens = tokenize_text(sample_text)\n"," highest_score = 0\n"," most_similar_text = None\n"," print(set_of_texts)\n"," for text in set_of_texts:\n"," text_tokens = tokenize_text(text)\n"," similarity_score = jaccard_similarity(sample_tokens, text_tokens)\n","\n"," if similarity_score > highest_score:\n"," highest_score = similarity_score\n"," most_similar_text = text\n","\n"," return highest_score, most_similar_text # most similar material type\n","\n","def extract_prediction(candidate_material_types, response):\n"," # do greedy text match\n"," result = []\n"," response = response.lower()\n"," if response.startswith(\"The material type is: \"):\n"," response = response[len(\"The material type is: \")]\n"," for candidate_material_type in candidate_material_types:\n"," if candidate_material_type in response:\n"," result.append(candidate_material_type)\n"," # convert multi label type labels\n"," elif candidate_material_type == \"rock or sediment\":\n"," if \"rock\" in response.lower() or \"sediment\" in response.lower():\n"," result.append(candidate_material_type)\n"," elif candidate_material_type == \"mixed soil sediment or rock\" and \"soil\" in response.lower():\n"," result.append(candidate_material_type)\n"," if len(result) == 0:\n"," # use jaccard score\n"," higest_score, most_similar_material_types = jaccard_highest_score(response, candidate_material_types)\n"," #print(f\"Jaccard score : {higest_score} : {most_similar_material_types}\")\n"," if higest_score >= 0.5:\n"," result.append(most_similar_material_types)\n","\n"," return result"],"metadata":{"id":"UnBiXJz6G1O_","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1711574517757,"user_tz":420,"elapsed":182,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}},"outputId":"78d9864e-4142-4747-e4b7-88351122917a"},"execution_count":46,"outputs":[{"output_type":"stream","name":"stderr","text":["[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data] Package punkt is already up-to-date!\n"]}]},{"cell_type":"code","source":["# function to generate few shot\n","import pickle\n","INPUT_LIMIT = 2048\n","def count_words(input_string):\n"," words = input_string.split(\" \")\n"," return len(words)\n","def generate_few_shot_examples_sentbert(test_description, k):\n"," # use sentence transformers to find k closest few shot examples\n"," query_embedding = sentbert_model.encode(test_description, convert_to_tensor=True)\n","\n"," search_hits = util.semantic_search(query_embedding, corpus_embeddings)\n"," search_hits = search_hits[0] # Get the hits for the first query\n","\n"," examples = \"\"\n"," for i in range(k-1, -1, -1): # from least similar\n"," sample_id = search_hits[i]['corpus_id']\n"," sample_description = train_descriptions[sample_id]\n"," sample_label = \",\".join(final_desc_to_label[sample_description].split(\"/\"))\n","\n"," examples += \"\\nDescription: \" + sample_description + \"\\nMaterial type: \" + sample_label\n","\n"," if count_words(examples) >= INPUT_LIMIT:\n"," break\n","\n"," return examples\n"],"metadata":{"id":"vYYSP44thcJr","executionInfo":{"status":"ok","timestamp":1711574517757,"user_tz":420,"elapsed":2,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":47,"outputs":[]},{"cell_type":"code","source":["def few_shot_prediction(test_df, num_shot, summarize_and_explain = False, OUTPUT_FILE=\"output.json\"):\n"," \"\"\"\n"," num_shot : number of examples to use in the prompt\n"," summarize_and_explain : whether to summarize and explain the input text and use that instead of original sample_description\n"," \"\"\"\n"," test_prediction = []\n"," for idx, row in test_df.iterrows():\n"," sample_description = row['concatenated_text_B'].split(\"\")[0][len(\"\"):]\n"," input = sample_description\n","\n"," # SummarizeExplain\n"," if summarize_and_explain:\n"," summary = generate_summary_and_explanation(sample_description)\n"," input = summary\n","\n"," examples = generate_few_shot_examples_sentbert(sample_description, num_shot)\n"," output = generate_prediction(joined_leaf_material_types,examples,input)\n","\n"," prediction = extract_prediction(leaf_material_types, output)\n","\n"," final_prediction = []\n"," if len(prediction) > 0:\n"," for pred in prediction:\n"," final_prediction.extend(leaf_to_entire_path_mapping[pred]) # get entire parents as well and add it to the prediction\n","\n"," # TODO\n"," else:\n"," # level-up traversal recursively\n"," curr_labels = leaf_material_types\n"," parent_labels = get_parent_labels(curr_labels)\n","\n"," while len(prediction) == 0 and len(parent_labels)>0:\n"," curr_labels = parent_labels\n"," joined_curr_labels = \"\\n\".join(curr_labels)\n"," output = generate_prediction(joined_curr_labels,examples,input)\n","\n"," prediction = extract_prediction(curr_labels, output)\n"," print(\"Extracted labels: \", prediction)\n"," final_prediction = []\n"," for pred in prediction:\n"," final_prediction.append(pred)\n"," final_prediction.extend(label_to_parent[pred])\n","\n"," # recurse up one level\n"," parent_labels = get_parent_labels(curr_labels)\n","\n","\n"," final_prediction = list(set(final_prediction))\n"," final_prediction = [x for x in final_prediction if x!= None and x!='material']\n"," print(\"Prediction : \", final_prediction)\n","\n"," test_prediction.append(final_prediction)\n"," print(\"Gold: \",row[\"label_list\"])\n"," print(f\"{idx}-th prediction done\\n\\n\")\n"," if idx % 100 == 0:\n"," with open(OUTPUT_FILE, 'w') as file:\n"," json.dump(test_prediction, file)\n","\n"," with open(OUTPUT_FILE, 'w') as file:\n"," json.dump(test_prediction, file)\n","\n"," return test_prediction\n","\n","test_prediction = few_shot_prediction(test_df, 5, False, OUTPUT_FILE)"],"metadata":{"id":"UUXX1PN58R15","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"error","timestamp":1711574823868,"user_tz":420,"elapsed":306113,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}},"outputId":"6e7dee64-6b2a-4d5c-dac5-0b2160e2e3b1"},"execution_count":48,"outputs":[{"output_type":"stream","name":"stdout","text":[" sediment,igneous rock,natural solid material,foiditoid,fine grained igneous rock,rock\n","Prediction : ['fine grained igneous rock', 'foiditoid', 'igneous rock', 'rock or sediment', 'rock', 'natural solid material']\n","Gold: sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock\n","0-th prediction done\n","\n","\n"," diamictite,clastic sedimentary rock,sediment,natural solid material,sedimentary rock,rock\n","Prediction : ['sedimentary rock', 'rock or sediment', 'diamictite', 'rock', 'clastic sedimentary rock', 'natural solid material']\n","Gold: diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock\n","1-th prediction done\n","\n","\n"," sediment,igneous rock,natural solid material,ultramafic igneous rock,peridotite,rock\n","Prediction : ['ultramafic igneous rock', 'peridotite', 'igneous rock', 'rock or sediment', 'rock', 'natural solid material']\n","Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n","2-th prediction done\n","\n","\n"," andesite,sediment,igneous rock,natural solid material,intermediate composition igneous rock,rock\n","Prediction : ['igneous rock', 'rock or sediment', 'andesite', 'rock', 'intermediate composition igneous rock', 'natural solid material']\n","Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n","3-th prediction done\n","\n","\n"," andesite,sediment,igneous rock,natural solid material,intermediate composition igneous rock,rock\n","Prediction : ['igneous rock', 'rock or sediment', 'andesite', 'rock', 'intermediate composition igneous rock', 'natural solid material']\n","Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n","4-th prediction done\n","\n","\n"," sediment,igneous rock,natural solid material,ultramafic igneous rock,pyroxenite,rock\n","Prediction : ['ultramafic igneous rock', 'pyroxenite', 'igneous rock', 'rock or sediment', 'rock', 'natural solid material']\n","Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n","5-th prediction done\n","\n","\n"," sediment,natural solid material,generic sandstone,sedimentary rock,rock\n","Prediction : ['sedimentary rock', 'natural solid material', 'rock or sediment', 'generic sandstone', 'rock']\n","Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n","6-th prediction done\n","\n","\n"," liquid water,fluid material\n","Prediction : ['fluid material', 'liquid water']\n","Gold: liquid water/fluid material\n","7-th prediction done\n","\n","\n"," liquid water,fluid material\n","Prediction : ['fluid material', 'liquid water']\n","Gold: liquid water/fluid material\n","8-th prediction done\n","\n","\n"," natural solid material,mineral,mineral-silicate or germanate\n","Prediction : ['mineral', 'mineral-silicate or germanate', 'natural solid material']\n","Gold: natural solid material/mineral/mineral-silicate or germanate\n","9-th prediction done\n","\n","\n"," andesite,sediment,igneous rock,natural solid material,intermediate composition igneous rock,rock\n","Prediction : ['igneous rock', 'rock or sediment', 'andesite', 'rock', 'intermediate composition igneous rock', 'natural solid material']\n","Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n","10-th prediction done\n","\n","\n"," sediment,igneous rock,natural solid material,ultramafic igneous rock,peridotite,rock\n","Prediction : ['ultramafic igneous rock', 'peridotite', 'igneous rock', 'rock or sediment', 'rock', 'natural solid material']\n","Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n","11-th prediction done\n","\n","\n"," liquid water,fluid material\n","Prediction : ['fluid material', 'liquid water']\n","Gold: liquid water/fluid material\n","12-th prediction done\n","\n","\n"," andesite,sediment,igneous rock,natural solid material,intermediate composition igneous rock,rock\n","Prediction : ['igneous rock', 'rock or sediment', 'andesite', 'rock', 'intermediate composition igneous rock', 'natural solid material']\n","Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n","13-th prediction done\n","\n","\n"," andesite,sediment,igneous rock,natural solid material,intermediate composition igneous rock,rock\n","Prediction : ['igneous rock', 'rock or sediment', 'andesite', 'rock', 'intermediate composition igneous rock', 'natural solid material']\n","Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n","14-th prediction done\n","\n","\n"," sediment,impact generated material,natural solid material,rock\n","Prediction : ['impact generated material', 'rock', 'rock or sediment', 'natural solid material']\n","Gold: sediment/impact generated material/natural solid material/rock\n","15-th prediction done\n","\n","\n"," andesite,sediment,igneous rock,natural solid material,intermediate composition igneous rock,rock\n","Prediction : ['igneous rock', 'rock or sediment', 'andesite', 'rock', 'intermediate composition igneous rock', 'natural solid material']\n","Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n","16-th prediction done\n","\n","\n"," diamictite,clastic sedimentary rock,sediment,natural solid material,sedimentary rock,rock\n","Prediction : ['sedimentary rock', 'rock or sediment', 'diamictite', 'rock', 'clastic sedimentary rock', 'natural solid material']\n","Gold: diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock\n","17-th prediction done\n","\n","\n"," sediment,natural solid material,carbonate sedimentary rock,sedimentary rock,rock\n","Prediction : ['sedimentary rock', 'carbonate sedimentary rock', 'sediment', 'rock or sediment', 'rock', 'carbonate sediment', 'natural solid material']\n","Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n","18-th prediction done\n","\n","\n"]},{"output_type":"error","ename":"KeyboardInterrupt","evalue":"","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_prediction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 62\u001b[0;31m \u001b[0mfew_shot_prediction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mOUTPUT_FILE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m\u001b[0m in \u001b[0;36mfew_shot_prediction\u001b[0;34m(test_df, num_shot, summarize_and_explain, OUTPUT_FILE)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mexamples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenerate_few_shot_examples_sentbert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msample_description\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_shot\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenerate_prediction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjoined_leaf_material_types\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mexamples\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0mprediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mextract_prediction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleaf_material_types\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m\u001b[0m in \u001b[0;36mgenerate_prediction\u001b[0;34m(material_types, examples, sample_description)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m>>\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \"\"\"\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgenerate_answer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msystem_prompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minstruction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m\u001b[0m in \u001b[0;36mgenerate_answer\u001b[0;34m(system_prompt, prompt)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0minputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_text\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_tensors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"pt\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_attention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgeneration_config\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgeneration_config\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mtext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\u001b[0m in \u001b[0;36mdecorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdecorate_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mctx_factory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdecorate_context\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m 1575\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1576\u001b[0m \u001b[0;31m# 13. run sample\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1577\u001b[0;31m result = self._sample(\n\u001b[0m\u001b[1;32m 1578\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1579\u001b[0m \u001b[0mlogits_processor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprepared_logits_processor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\u001b[0m in \u001b[0;36m_sample\u001b[0;34m(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, output_logits, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)\u001b[0m\n\u001b[1;32m 2731\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2732\u001b[0m \u001b[0;31m# forward pass to get next token\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2733\u001b[0;31m outputs = self(\n\u001b[0m\u001b[1;32m 2734\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mmodel_inputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2735\u001b[0m \u001b[0mreturn_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1510\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1511\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1513\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\u001b[0m in \u001b[0;36mnew_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hf_hook\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1155\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1156\u001b[0m \u001b[0;31m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1157\u001b[0;31m outputs = self.model(\n\u001b[0m\u001b[1;32m 1158\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1159\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1510\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1511\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1513\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\u001b[0m in \u001b[0;36mnew_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hf_hook\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1040\u001b[0m )\n\u001b[1;32m 1041\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1042\u001b[0;31m layer_outputs = decoder_layer(\n\u001b[0m\u001b[1;32m 1043\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1044\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1510\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1511\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1513\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\u001b[0m in \u001b[0;36mnew_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hf_hook\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, **kwargs)\u001b[0m\n\u001b[1;32m 755\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[0;31m# Self Attention\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 757\u001b[0;31m hidden_states, self_attn_weights, present_key_value = self.self_attn(\n\u001b[0m\u001b[1;32m 758\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 759\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1510\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1511\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1513\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\u001b[0m in \u001b[0;36mnew_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hf_hook\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache)\u001b[0m\n\u001b[1;32m 651\u001b[0m \u001b[0mbsz\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mq_len\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 652\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 653\u001b[0;31m \u001b[0mquery_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mq_proj\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 654\u001b[0m \u001b[0mkey_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mk_proj\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0mvalue_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mv_proj\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1510\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1511\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1513\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\u001b[0m in \u001b[0;36mnew_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hf_hook\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/modules.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 427\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 428\u001b[0m \u001b[0mbias\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 429\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbnb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatmul_4bit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquant_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquant_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 430\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 431\u001b[0m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minp_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py\u001b[0m in \u001b[0;36mmatmul_4bit\u001b[0;34m(A, B, quant_state, out, bias)\u001b[0m\n\u001b[1;32m 575\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 576\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 577\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mMatMul4Bit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mB\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquant_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 551\u001b[0m \u001b[0;31m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_functorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munwrap_dead_wrappers\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 553\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 554\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 555\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_setup_ctx_defined\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(ctx, A, B, out, bias, quant_state)\u001b[0m\n\u001b[1;32m 514\u001b[0m \u001b[0;31m# 1. Dequantize\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[0;31m# 2. MatmulnN\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 516\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunctional\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdequantize_4bit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mB\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquant_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 517\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 518\u001b[0m \u001b[0;31m# 3. Save state\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/bitsandbytes/functional.py\u001b[0m in \u001b[0;36mdequantize_4bit\u001b[0;34m(A, quant_state, absmax, out, blocksize, quant_type)\u001b[0m\n\u001b[1;32m 1091\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcdequantize_blockwise_fp16_fp4\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mget_ptr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_ptr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_ptr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mabsmax\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_ptr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mct\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc_int\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquant_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocksize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mct\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc_int\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1092\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1093\u001b[0;31m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcdequantize_blockwise_fp16_nf4\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mget_ptr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_ptr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_ptr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mabsmax\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_ptr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mct\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc_int\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquant_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocksize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mct\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc_int\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1094\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbfloat16\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1095\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mquant_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquant_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'fp4'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mKeyboardInterrupt\u001b[0m: "]}]},{"cell_type":"markdown","source":["# Evaluate"],"metadata":{"id":"3a9ND_XmY1Hu"}},{"cell_type":"code","source":["multi_to_label = {\n"," \"rock or sediment\": [\"rock\", \"sediment\"],\n"," \"mixed soil sediment rock\" : [\"soil\", \"sediment\", \"rock\"]\n","}\n","\n","final_predicted_labels = test_prediction\n","for idx, labels in enumerate(test_prediction):\n"," for label in labels:\n"," if label in multi_to_label:\n"," labels.remove(label)\n"," labels.extend(multi_to_label[label])\n"," labels = list(set(labels))\n"," final_predicted_labels[idx] = labels # update\n","\n","# assert\n","for idx, labels in enumerate(test_prediction):\n"," for label in labels:\n"," if label in multi_to_label:\n"," print(\"invalid\")\n"," break\n","final_predicted_labels[-1]"],"metadata":{"id":"IlLXQYJS5aME","executionInfo":{"status":"aborted","timestamp":1711572302723,"user_tz":420,"elapsed":4,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["label_file=\"total_labels.txt\" # file that stores all labels of iSamples taxonomy\n","gold_label_names = open(label_file).read().splitlines()"],"metadata":{"id":"PzAzISU6ZGNl","executionInfo":{"status":"aborted","timestamp":1711572302723,"user_tz":420,"elapsed":3,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["true_labels = [x.split(\"/\") for x in test_df['label_list'].tolist()]\n","true_labels"],"metadata":{"id":"lguC6p6xZIcE","executionInfo":{"status":"aborted","timestamp":1711572302723,"user_tz":420,"elapsed":3,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["## Multi label evaluation\n","from sklearn.metrics import classification_report\n","from sklearn.preprocessing import MultiLabelBinarizer\n","\n","mlb = MultiLabelBinarizer()\n","mlb.fit([gold_label_names])\n","\n","true_labels_bin = mlb.transform(true_labels)\n","predicted_labels_bin = mlb.transform(final_predicted_labels)\n","\n","print(classification_report(true_labels_bin, predicted_labels_bin, target_names=mlb.classes_))\n","report = classification_report(true_labels_bin, predicted_labels_bin, target_names=mlb.classes_, output_dict=True)\n","\n","# Print the classification report\n","print(report)"],"metadata":{"id":"FM_92gqZZKwP","executionInfo":{"status":"aborted","timestamp":1711572302723,"user_tz":420,"elapsed":865884,"user":{"displayName":"Hyunju Song","userId":"03844618074789275015"}}},"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/SESAR/zeroshot-learning/iSamples_Zero_Shot_LLM.ipynb b/SESAR/zeroshot-learning/iSamples_Zero_Shot_LLM.ipynb new file mode 100644 index 0000000..a14079d --- /dev/null +++ b/SESAR/zeroshot-learning/iSamples_Zero_Shot_LLM.ipynb @@ -0,0 +1,7527 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D3iTBzjkH-MU", + "outputId": "94cc23be-39ba-403e-a2eb-93f083b13763" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/gdrive\n", + "/content/gdrive/MyDrive\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/gdrive')\n", + "%cd /content/gdrive/MyDrive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SONTODi_WbYa" + }, + "outputs": [], + "source": [ + "# set env variables\n", + "TEST_DATASET = \"SESAR_ZTC_test_multi_entire_filtered2.csv\"\n", + "OUTPUT_FILE = \"OUTPUT.json\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kZpc8Uvt5XCL" + }, + "source": [ + "# Set Up" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QOZjIsKd5dJI", + "outputId": "2c7d5674-3d7a-40a4-ae60-80ac4bff8ccb" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting datasets\n", + " Downloading datasets-2.18.0-py3-none-any.whl (510 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (0.1.99)\n", + "Requirement already satisfied: tokenizers in /usr/local/lib/python3.10/dist-packages (0.15.2)\n", + "Collecting bitsandbytes\n", + " Downloading bitsandbytes-0.43.0-py3-none-manylinux_2_24_x86_64.whl (102.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.2/102.2 MB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting accelerate\n", + " Downloading accelerate-0.28.0-py3-none-any.whl (290 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.1/290.1 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting xformers\n", + " Downloading xformers-0.0.25-cp310-cp310-manylinux2014_x86_64.whl (222.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.5/222.5 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting einops\n", + " Downloading einops-0.7.0-py3-none-any.whl (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.13.3)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.25.2)\n", + "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (14.0.2)\n", + "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", + " Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.2)\n", + "Collecting xxhash (from datasets)\n", + " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting multiprocess (from datasets)\n", + " Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: fsspec[http]<=2024.2.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.3)\n", + "Requirement already satisfied: huggingface-hub>=0.19.4 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.20.3)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from bitsandbytes) (2.2.1+cu121)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.4.2)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (4.10.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (3.1.3)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->bitsandbytes)\n", + " Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->bitsandbytes)\n", + " Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->bitsandbytes)\n", + " Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch->bitsandbytes)\n", + " Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m813.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch->bitsandbytes)\n", + " Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch->bitsandbytes)\n", + " Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch->bitsandbytes)\n", + " Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch->bitsandbytes)\n", + " Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch->bitsandbytes)\n", + " Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-nccl-cu12==2.19.3 (from torch->bitsandbytes)\n", + " Downloading nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl (166.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.0/166.0 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch->bitsandbytes)\n", + " Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes) (2.2.0)\n", + "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch->bitsandbytes)\n", + " Downloading nvidia_nvjitlink_cu12-12.4.99-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2024.2.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.4)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->bitsandbytes) (2.1.5)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->bitsandbytes) (1.3.0)\n", + "Installing collected packages: xxhash, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, einops, dill, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, nvidia-cusolver-cu12, datasets, xformers, bitsandbytes, accelerate\n", + "Successfully installed accelerate-0.28.0 bitsandbytes-0.43.0 datasets-2.18.0 dill-0.3.8 einops-0.7.0 multiprocess-0.70.16 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.99 nvidia-nvtx-cu12-12.1.105 xformers-0.0.25 xxhash-3.4.1\n", + "Collecting git+https://github.com/huggingface/transformers\n", + " Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-_allehbl\n", + " Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-_allehbl\n", + " Resolved https://github.com/huggingface/transformers to commit a25037beb9f039270b30a94c34ead72ea80ae8a5\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (3.13.3)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.20.3)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (1.25.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (24.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2023.12.25)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.15.2)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.4.2)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (4.66.2)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (4.10.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2024.2.2)\n", + "Building wheels for collected packages: transformers\n", + " Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for transformers: filename=transformers-4.40.0.dev0-py3-none-any.whl size=8793346 sha256=0759d5e697cfcf2d53b21751741a87e18f8d4e77db4ca68977bccb289679f404\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-7sg7kdwp/wheels/c0/14/d6/6c9a5582d2ac191ec0a483be151a4495fe1eb2a6706ca49f1b\n", + "Successfully built transformers\n", + "Installing collected packages: transformers\n", + " Attempting uninstall: transformers\n", + " Found existing installation: transformers 4.38.2\n", + " Uninstalling transformers-4.38.2:\n", + " Successfully uninstalled transformers-4.38.2\n", + "Successfully installed transformers-4.40.0.dev0\n" + ] + } + ], + "source": [ + "!pip install datasets sentencepiece tokenizers bitsandbytes accelerate xformers einops\n", + "!pip install git+https://github.com/huggingface/transformers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qtujFQnW5akB" + }, + "source": [ + "# Download" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "et7GPgVK5gvj" + }, + "outputs": [], + "source": [ + "import torch\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig\n", + "\n", + "device = \"cuda\"\n", + "# This causes OOM\n", + "\n", + "# model = AutoModelForCausalLM.from_pretrained(\n", + "# \"Open-Orca/Mistral-7B-OpenOrca\").to(device)\n", + "# tokenizer = AutoTokenizer.from_pretrained(\n", + "# \"Open-Orca/Mistral-7B-OpenOrca\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 540, + "referenced_widgets": [ + "a679085184e04852a5a66fa38ba91d88", + "1d9a8fb55c2e4199b83d826a14d8c5c8", + "bd9df7d514474d3285b05692a4b6e74e", + "b5485d1b489e4a15840ba32af8675b40", + "f373d8ecb2f44fbf9d26c06c74fe12ec", + "e9c92a0ed6994d4cab13dcc4c13a8472", + "b6015ae8961d4954bec0b56578a998d8", + "5aa1dcd678784e7d9f44d29b51c0afd6", + "8e2c3da60a7d4d7d927e3db45d6341fb", + "31c8549a556a408b99dc760cc23dfd22", + "f5773f126e2c4105af4b926dcbfb2aaf", + "09184a59b6554c019a5afddb4d46f7d4", + "3e46ac6ae7dc4f149079add9ecfe9b57", + "dabc885c909b4df1ab6d5f3d27a39804", + "41719d701fa444ba9199aa6fb155bb52", + "5e88a32b4d2640fbb4f53b9c0d01fb93", + "0f4b4142b9154ad4a58134e532d4a74e", + "20e20a69adc24c17b947ed1adf718cc5", + "2ba731dd172e45188f231c134f32240e", + "9f255ddfb0af4603927fa4fb8f15c7a0", + "998dec17d6014ba9a8ad1ccc7fda02a9", + "74ac0a6329c44866aab268580d54a54b", + "9006594029d54ea2b9dd22c10cdcb6b7", + "9b8aa1b4700f49e3b1406154fada90f0", + "4afb39daeffa40f18b1c552086b02d48", + "643975a54b434eddbcd0c9c25e722a36", + "177fddeae1df4d5fbfdfe5c2760440b4", + "1af94dbcbb7b41cc83d7156129f1656d", + "ee9633bc075d4c718ef751b27b340909", + "53d47acdae3a4a63af751a7b3bc34ecd", + "ea1c3a9d09f74d0a8bd4a011e573999e", + "193d711e29344e9a97b6f62ce586d192", + "56ee80bf9d684f7e91108dda63fede6a", + "d3a09b2a00d544368bfad9a40b6521d8", + "840328d985b046078cc8431e0ba471bc", + "6e42b7ca0c074994a4e55d4031bb095a", + "a51d3c26c81d44cb92b31a4d81645352", + "44da9a148280492eac8d9462ea2005ae", + "d4d823d872fe43f4b41d571c63810378", + "3b30bbcd7a224d1986d6c4f1f8a0537f", + "0a0fdb146b084f6cb9022658774d8d44", + "430c77a18ce14e57affa07b007f6b29e", + "8f7e56e7c530488ca080f17d09912927", + "262ed12e20f14a7da025941df7a8a4ec", + "26825528380e4248b907ec8fb2e3db98", + "3e258a7c24014a2985fc642836e1d659", + "9611c61276154567bf26c44b25e3f33c", + "91fb2a0fda9b4a55b25b71526a54f27e", + "7b178bf90246410cb76e8dc33cbb843c", + "534efbc9c6a04af6a67fc7240f6c3df6", + "5f01eb75fd0a4e1ca25584959b8b0f1d", + "d2e08c31b2914c4bbbe87eb82bbc0725", + "733656a77b48464fae03e7eae5f6f960", + "a1469f01440842e39052ffdaaa4b244d", + "708417fcc5ac4ddb866a863c40880ad3", + "4d6eae82b5bb467d89580d346034aa05", + "6589bf23342a4ca88a59b46d36fd7a51", + "e70780cadaaf4a5da5d2bdcce45ad6b7", + "4f0f87cea7734d8486b8b359505bf5c4", + "4a7a99ea869f46b3a4545b41991bd0ab", + "e77c1764577c44529fbb7ac08f0cfe7c", + "fc0f7af7ff57481aaec43c6a7d046759", + "5ea503a14a5c4aab9672c8724ec623ce", + "3b82e2f3bc86473db9f438f40f8b63fe", + "e0e9637f840947d6a7e83e1f41f6d0a2", + "54febdc979294498bc3834573a7aa0af", + "ea23ba351cec462fb93bd7a0238b096c", + "641147aed7ca48f7ad5c393ad8c29b66", + "80dcbaab52b549899ef0ebd71184bc03", + "8fbfd91171c44e3dbb728f8195f427e4", + "40cd217dbf3e42d79ab2ec57bd5b4796", + "e459814232c04920be36eb1785e80de4", + "d9f7e83019844328883fa4efce9c6554", + "60108ed6ff4b4923ba1e048f34f70fe1", + "859cf9a371164d97bef17523f257b776", + "2d290718498547a6b77ea89e94ef1ec8", + "66ee7b37aa22458e8bb7d0134bdc4370", + "84e3384be50f41beaa4fc5c9e339cd6e", + "dc1f4a7e147a4ea9ba439ac1e3784bdd", + "d1f57132236a4dc4997a13853cf5387b", + "1901d112bf484ebabc1a20174cc80e84", + "f52a8c9e0d4b407381f25e001a0d923d", + "ca0f6266390a4d11bb3f9e3e4efad740", + "1d90860ca416478f8aaed6eebf597f9b", + "1c8064dbc9ff4ad89bf77919980d229d", + "49f7f0962f994ae6a27bddab4626f545", + "98ee44eda25646bc983b44337247ab88", + "e1f1d5f66d4245b097e105b817102acf", + "daaebeb135604a7f92ca60ab9c548304", + "ed904233fca94f8dbeb2a9a74d2f5ae1", + "d8dc5a7eea1845ab8c4237b1b389e704", + "0d2aee31176a4dd1afae89bef077814f", + "c71ecf1c92c54d869f502f48597e5bf3", + "64d97f672bb04e0183883bbacf982a76", + "d4b411f78c7f4c1992a3d29511eb51a8", + "372f4d99d6354a67bfb3099957799266", + "75575c72fc35496c9d83cbde50892e54", + "6a683c2c5afd4ea1bee2dd6b87659277", + "5ccc3c0beabc4cd69f63a45db3b80303", + "fba0299de9e54d28b8dcc9fcf5e7b633", + "598795295536484c8fffd8551a2edec0", + "ee99716a1ce9485e9ea890a12bf6f9ef", + "aba9dbb872dc469cb74ffc0dc0efb930", + "e32ed5960e21445fa0f21aa882d2f0d8", + "46ecae9135194a2ca8008cc03b93402b", + "08e87d7167f04a898698f0b8fc4be7d6", + "a7ab89aa5bfd47149b722931bc73f328", + "474fee224b3147f98cf311f05946adbe", + "1f902a8c097a4358823e298d60011546", + "649985b1c88e49969b534ca7a96a5502", + "544fa6940306465d8aa59bac070bc4d9", + "ca9746eab1c44052a46c8e9bc396a97e", + "9f3986b389f146d888595473f9840e18", + "805650c210c64a8a87aac287b8a1eb23", + "c498e759f28c45b8ae11e93987c04778", + "65ceffed910546a5811ea3661966eb43", + "a577cd345e814a798715bf8589b0cb99", + "ceed07113d194cda963efcf12ce9f5e7", + "ac5c897c2352459fb486ac7ddce0e758", + "f0d3af130f70445da4f6b41c96829239", + "33bc886a105c4ad4a1e006325998f978", + "caf1af2a102f498788cc38eece442233", + "368f3674869e40bdbc9cf1824114c269", + "065cf325a5664abbadca315210554285", + "07cd49d1105146279d10d1016b91c49c", + "71420ecd2f514efbbce32dd3de546f6b", + "c66df63f43ac486c9dab4efb266780f6", + "390029fe0ebd471982f24eb2d04340c4", + "8a762994d3f94fcd84ae37871a2cc46d", + "fca999b97c394c478591921d0c5817b9", + "48d6a0eb9fdc413b97dbe9822f120c7c", + "a8173e520d7f404eab9d939270542faf" + ] + }, + "id": "WKKLVdta-X6w", + "outputId": "783a721d-85cf-4669-a228-63cf14d8c299" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "config.json: 0%| | 0.00/623 [00:00 Orcas were not known to be drawn to mistral energy, but they were seen recently 100 miles off the coast of Brazil.\n", + "\n", + "The Brazilian Navy reported that a group of orcas were seen swimming around a platform in the Campos Basin, an area known for its oil production.\n", + "\n", + "The orcas were seen swimming around the platform, which is 100 miles off the coast of Brazil, in the Campos Basin, an area known for its oil production.\n", + "\n", + "The navy said the orcas were swimming around the platform, which is 100 miles off the coast of Brazil, in the Campos Basin, an area known for its oil production.\n", + "\n", + "The navy said the orcas were swimming around the platform, which is 100 miles off the coast of Brazil, in the Campos Basin, an area known for its oil production.\n", + "\n", + "The navy said the orcas were swimming around the platform, which is 100 miles off the coast of Brazil, in the Campos Basin, an area known for its oil production.\n", + "\n", + "The navy said the orcas were swimming around the platform, which is 100 miles off the coast of Brazil, in the Campos Basin, an area known for its oil production.\n", + "\n", + "The navy\n" + ] + } + ], + "source": [ + "inputs = tokenizer(\n", + " \"Orcas were not known to be drawn to mistral energy, but they were seen recently \",\n", + " return_tensors=\"pt\").to(device)\n", + "outputs = model.generate(\n", + " **inputs, max_new_tokens=256, use_cache=True, do_sample=True,\n", + " temperature=0.2, top_p=0.95)\n", + "text = tokenizer.batch_decode(outputs)[0]\n", + "print(text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "orHjdlw76EhP", + "outputId": "753672af-69f2-4ccf-8aad-42c856b0941b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "<|im_start|> system\n", + "A chat.<|im_end|><|im_start|> user\n", + "Tell me a joke.<|im_end|><|im_start|> assistant\n", + " Two mice were talking, one had a tiny, tiny car. He said to his friend, \"Hey, how does your new car run?\" His friend replied, \"Well, it's so tiny, it can run under any door!\" The first mouse retorted, \"Wow, that's really impressive, but mine runs in your pocket!\"\n", + "<|im_end|>\n" + ] + } + ], + "source": [ + "sys_prompt = \"A chat.\"\n", + "prompt = \"Tell me a joke.\"\n", + "\n", + "prefix = \"<|im_start|>\"\n", + "suffix = \"<|im_end|>\\n\"\n", + "sys_format = prefix + \"system\\n\" + sys_prompt + suffix\n", + "user_format = prefix + \"user\\n\" + prompt + suffix\n", + "assistant_format = prefix + \"assistant\\n\"\n", + "input_text = sys_format + user_format + assistant_format\n", + "\n", + "generation_config = GenerationConfig(\n", + " max_length=256, temperature=1.1, top_p=0.95, repetition_penalty=1.0,\n", + " do_sample=True, use_cache=True,\n", + " eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id,\n", + " transformers_version=\"4.34.0.dev0\")\n", + "\n", + "inputs = tokenizer(input_text, return_tensors=\"pt\", return_attention_mask=True).to(device)\n", + "outputs = model.generate(**inputs, generation_config=generation_config)\n", + "\n", + "text = tokenizer.batch_decode(outputs)[0]\n", + "print(text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zviNf9VrF7e8", + "outputId": "3a78d1b4-0d56-4187-fad9-d80ebcbdedf3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Two mice were talking, one had a tiny, tiny car. He said to his friend, \"Hey, how does your new car run?\" His friend replied, \"Well, it's so tiny, it can run under any door!\" The first mouse retorted, \"Wow, that's really impressive, but mine runs in your pocket!\"\n" + ] + } + ], + "source": [ + "print(text.split(\"<|im_end|>\")[2][len(\"<|im_start|> assistant\"):].strip(\"\\n\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dEwZGnMf6gwT" + }, + "source": [ + "# Zero Shot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-VpaRb0y65vA" + }, + "outputs": [], + "source": [ + "# load test dataset\n", + "import pandas as pd\n", + "test_df = pd.read_csv(TEST_DATASET)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VFESw5xj7DUj" + }, + "source": [ + "## Set Up - Read Taxonomy Files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-Lg7CG_R7G60" + }, + "outputs": [], + "source": [ + "import json\n", + "leaf_material_types = list(open(\"unique_leaf_labels.txt\").read().splitlines())\n", + "joined_leaf_material_types = \"\\n\".join(leaf_material_types)\n", + "\n", + "leaf_to_entire_path_mapping = {}\n", + "with open('leaf_to_parents_mapping.json') as f:\n", + " leaf_to_entire_path_mapping = json.load(f)\n", + "\n", + "mapping = json.load(open(\"label_to_parent_mapping.json\"))\n", + "label_to_parent = {}\n", + "for k, v in mapping.items():\n", + " if \"material\" in v:\n", + " v.remove(\"material\")\n", + " label_to_parent[k] = v\n", + "\n", + "child_to_parent = json.load(open(\"child_to_parent_mapping.json\"))\n", + "def get_parent_labels(curr_labels):\n", + " # return the parent labels (upper level labels)\n", + " parent_labels = []\n", + " for label in curr_labels:\n", + " if label in child_to_parent and child_to_parent[label]:\n", + " parent_labels.append(child_to_parent[label])\n", + " parent_labels = list(set(parent_labels))\n", + " return parent_labels\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1D_Sho9GCIHr" + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "# mapping to description and individual fields that contain geology terms that need to be enriched\n", + "# this can be easily generated by using mapping of two columns in dataframe\n", + "desc_to_tax_map = json.load(open(\"description_to_taxonomy_train_all.json\"))\n", + "desc_to_cm_map = json.load(open(\"description_to_collectionMethod_train_all.json\"))\n", + "desc_to_desc_map = json.load(open(\"description_to_description_train_all.json\"))\n", + "\n", + "def trim_mapping(mapping):\n", + " return {k.split(\"\")[0][len(\"\"):] : v for k,v in mapping.items()}\n", + "desc_to_tax_map = trim_mapping(desc_to_tax_map)\n", + "desc_to_cm_map = trim_mapping(desc_to_cm_map)\n", + "desc_to_desc_map = trim_mapping(desc_to_desc_map)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xw1aiO9g8OsU" + }, + "source": [ + "# Set Up - Prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c_b6fRu78RlA" + }, + "outputs": [], + "source": [ + "sys_prompt = \"A chat.\"\n", + "prompt = \"Tell me a joke.\"\n", + "\n", + "prefix = \"<|im_start|>\"\n", + "suffix = \"<|im_end|>\\n\"\n", + "sys_format = prefix + \"system\\n\" + sys_prompt + suffix\n", + "user_format = prefix + \"user\\n\" + prompt + suffix\n", + "assistant_format = prefix + \"assistant\\n\"\n", + "input_text = sys_format + user_format + assistant_format\n", + "\n", + "generation_config = GenerationConfig(\n", + " max_length=256, temperature=1.1, top_p=0.95, repetition_penalty=1.0,\n", + " do_sample=True, use_cache=True,\n", + " eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id,\n", + " transformers_version=\"4.34.0.dev0\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nq7k-c7hDsQF" + }, + "outputs": [], + "source": [ + "def generate_answer(system_prompt, prompt):\n", + " sys_format = prefix + \"system\\n\" + sys_prompt + suffix\n", + " user_format = prefix + \"user\\n\" + prompt + suffix\n", + " assistant_format = prefix + \"assistant\\n\"\n", + " input_text = sys_format + user_format + assistant_format\n", + "\n", + " generation_config = GenerationConfig(\n", + " max_new_tokens=100, temperature=0.0001, top_p=0.95, repetition_penalty=1.0,\n", + " do_sample=True, use_cache=True,\n", + " eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id,\n", + " transformers_version=\"4.34.0.dev0\")\n", + "\n", + " inputs = tokenizer(input_text, return_tensors=\"pt\", return_attention_mask=True).to(device)\n", + " outputs = model.generate(**inputs, generation_config=generation_config)\n", + "\n", + " text = tokenizer.batch_decode(outputs)[0]\n", + " text = text.split(\"<|im_end|>\")[2][len(\"<|im_start|> assistant\"):].strip(\"\\n\")\n", + " print(text)\n", + "\n", + " return text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "G3LpcBwxAelY" + }, + "outputs": [], + "source": [ + "# Summarize And Explain\n", + "def add_taxonomy_description(description, description_to_taxonomy):\n", + " # add taxonomy description if exists and see if that helps\n", + " if description in description_to_taxonomy and type(description_to_taxonomy[description]) == str:\n", + " term = description_to_taxonomy[description]\n", + " system_prompt = \"You are a scientist. User will give you a geology term. You must generate a short description of the term.\"\n", + " instruction = f\"\"\"You are a scientist. Your task is to give a brief one sentence description of material the geology term it consists.\n", + " ###\n", + " <<<\n", + " Term:{term}\n", + " >>>\n", + " \"\"\"\n", + " return generate_answer(system_prompt, instruction).strip(\"\\n\")\n", + " else:\n", + " return None\n", + "\n", + "def add_collectionMethod_description(description, description_to_cm):\n", + " # add collectionMethod description if exists and see if that helps\n", + " if description in description_to_cm and type( description_to_cm[description]) == str:\n", + " term = description_to_cm[description]\n", + " system_prompt = \"You are a scientist. User will give you a term that indicates how it collected a sample from nature. You must generate a short description of the term.\"\n", + " instruction = f\"\"\"You are a scientist. Your task is to give a brief one sentence description of the given collection method of the sample.\n", + " ###\n", + " <<<\n", + " Term:{term}\n", + " >>>\n", + " \"\"\"\n", + " return generate_answer(system_prompt, instruction).strip(\"\\n\")\n", + " else:\n", + " return None\n", + "def add_description_description(description, description_to_desc):\n", + " # add collectionMethod description if exists and see if that helps\n", + " if description in description_to_desc and type( description_to_desc[description]) == str:\n", + " term = description_to_desc[description]\n", + " system_prompt = \"You are a scientist. User will give you a term that indicates a description of the sample. You must generate a short explanation of that description.\"\n", + " instruction = f\"\"\"You are a scientist. Your task is to give a one sentence explanation of the description of the sample.\n", + " ###\n", + " <<<\n", + " Description:{term}\n", + " >>>\n", + " \"\"\"\n", + " else:\n", + " return None\n", + "\n", + "\n", + "def generate_summary(sample_description):\n", + " system_prompt = \"You are a scientist. User will give you a description of a material sample it sampled from the nature. You must generate a summarized description of the sample.\"\n", + " prompt = f\"\"\"You are a scientist. Your task is to give a brief one sentence summary of the given description, focusing on the parts that is helpful in determining the type of material that constitutes it.\n", + " Include important fields in the summary such as the taxonomy informal classification, collection method, and values that determine the material type.\n", + " ###\n", + " <<<\n", + " Description: {sample_description}\n", + " >>>\n", + " \"\"\"\n", + "\n", + " return generate_answer(system_prompt, prompt)\n", + "\n", + "def generate_summary_and_explanation(sample_description):\n", + " summary = generate_summary(sample_description)\n", + "\n", + " taxonomy_rich_description = add_taxonomy_description(sample_description, desc_to_tax_map)\n", + " if taxonomy_rich_description:\n", + " #summary += enrich_field(\"fieldName\", sample_description, taxonomy_rich_description.strip(\"\\n\"))\n", + " summary += taxonomy_rich_description.strip(\"\\n\")\n", + " collectionMethodDesc = add_collectionMethod_description(sample_description, desc_to_cm_map)\n", + " if collectionMethodDesc:\n", + " #summary += enrich_field(\"collectionMethod\", sample_description, collectionMethodDesc.strip(\"\\n\"))\n", + " summary += collectionMethodDesc.strip(\"\\n\")\n", + " description_rich_description = add_description_description(sample_description, desc_to_desc_map)\n", + " if description_rich_description:\n", + " #summary += enrich_field(\"description\", sample_description, description_rich_description.strip(\"\\n\"))\n", + " summary += description_rich_description.strip(\"\\n\")\n", + " print(\"Enriched summary : \", summary)\n", + " return summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "egDdDDyKEfdv" + }, + "outputs": [], + "source": [ + "#CoT\n", + "\n", + "def generate_reasoning(material_types, sample_description):\n", + " system_prompt = \"You are a scientist. User will give you a task. You must generate an answer to the task.\"\n", + " instruction = f\"\"\"You are a scientist. Your task is to analyze the description of a material sample and determine the kind of material that constitutes it after <<<>>> into one of the predefined material types: \\n\n", + " {material_types} \\n\n", + " Let's think step by step.\\n\n", + " ###\\n\n", + " <<<\n", + " Description: {sample_description}\n", + " >>>\n", + " \"\"\"\n", + "\n", + " return generate_answer(system_prompt, instruction).strip(\"\\n\")\n", + "\n", + "def generate_prediction_with_reasoning(material_types, sample_description, reasoning):\n", + " system_prompt = \"You are a scientist. User will give you a task. You must generate an answer to the task\"\n", + " instruction = f\"\"\"You are a scientist. Your task is to analyze the description of a material sample and determine the kind of material that constitutes it after <<<>>> into one of the predefined material types: \\n\n", + " {material_types} \\n\n", + " Let's think step by step.\\n\n", + " ###\\n\n", + " <<<\n", + " Description: {sample_description}\n", + " >>>\n", + " {reasoning}\n", + " Therefore, the answer(kind of material) is\n", + " \"\"\"\n", + " return generate_answer(system_prompt, instruction).strip(\"\\n\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KBpm1jkvFDoZ" + }, + "outputs": [], + "source": [ + "def generate_prediction(material_types, sample_description):\n", + " system_prompt = \"You are a scientist. User will give you a task. You must generate an answer to the task.\"\n", + " instruction = f\"\"\"\n", + " You are a scientist. Your task is to analyze the description of a material sample and determine the kind of material that constitutes it after <<<>>> into one of the predefined material types: \\n\n", + " {material_types} \\n\n", + " You will only respond with the material type. Do not include the word \"Material type\". Do not provide explanations or notes.\n", + " <<<\n", + " Description: {sample_description}\n", + " Material type:\n", + " >>>\n", + " \"\"\"\n", + " return generate_answer(system_prompt, instruction).strip(\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UnBiXJz6G1O_", + "outputId": "72ee445c-7677-4aed-a4e6-c6e1afbbab37" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Unzipping tokenizers/punkt.zip.\n" + ] + } + ], + "source": [ + "import nltk\n", + "nltk.download('punkt')\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.util import ngrams\n", + "from nltk.metrics import jaccard_distance\n", + "\n", + "def jaccard_similarity(set1, set2):\n", + " \"\"\"\n", + " Calculate Jaccard similarity between two sets.\n", + " \"\"\"\n", + " intersection = len(set1.intersection(set2))\n", + " union = len(set1.union(set2))\n", + " return intersection / union\n", + "\n", + "def tokenize_text(text):\n", + " \"\"\"\n", + " Tokenize input text.\n", + " \"\"\"\n", + " return set(word_tokenize(text.lower()))\n", + "\n", + "def jaccard_highest_score(sample_text, set_of_texts):\n", + " \"\"\"\n", + " Find the text with the highest Jaccard similarity score compared to the sample text.\n", + " \"\"\"\n", + " sample_tokens = tokenize_text(sample_text)\n", + " highest_score = 0\n", + " most_similar_text = None\n", + " print(set_of_texts)\n", + " for text in set_of_texts:\n", + " text_tokens = tokenize_text(text)\n", + " similarity_score = jaccard_similarity(sample_tokens, text_tokens)\n", + "\n", + " if similarity_score > highest_score:\n", + " highest_score = similarity_score\n", + " most_similar_text = text\n", + "\n", + " return highest_score, most_similar_text # most similar material type\n", + "\n", + "def extract_prediction(candidate_material_types, response):\n", + " # do greedy text match\n", + " result = []\n", + " response = response.lower()\n", + " if response.startswith(\"The material type is: \"):\n", + " response = response[len(\"The material type is: \")]\n", + " for candidate_material_type in candidate_material_types:\n", + " if candidate_material_type in response:\n", + " result.append(candidate_material_type)\n", + " # convert multi label type labels\n", + " elif candidate_material_type == \"rock or sediment\":\n", + " if \"rock\" in response.lower() or \"sediment\" in response.lower():\n", + " result.append(candidate_material_type)\n", + " elif candidate_material_type == \"mixed soil sediment or rock\" and \"soil\" in response.lower():\n", + " result.append(candidate_material_type)\n", + " if len(result) == 0:\n", + " # use jaccard score\n", + " higest_score, most_similar_material_types = jaccard_highest_score(response, candidate_material_types)\n", + " #print(f\"Jaccard score : {higest_score} : {most_similar_material_types}\")\n", + " if higest_score >= 0.5:\n", + " result.append(most_similar_material_types)\n", + "\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "UUXX1PN58R15", + "outputId": "1d9b8090-9dad-46b0-9946-85bf18ea3dcd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Nephelinite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Nephelinite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock\n", + "0-th prediction done\n", + "\n", + "\n", + " diamictite\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'diamictite', 'natural solid material', 'rock or sediment']\n", + "Gold: diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock\n", + "1-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "2-th prediction done\n", + "\n", + "\n", + " hornblende andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "3-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "4-th prediction done\n", + "\n", + "\n", + " pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "5-th prediction done\n", + "\n", + "\n", + " sandstone\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + "Prediction : ['generic sandstone', 'rock', 'sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "6-th prediction done\n", + "\n", + "\n", + " glass rich igneous rock\n", + "Prediction : ['rock', 'anthropogenic material', 'any anthropogenic material', 'igneous rock', 'glass rich igneous rock', 'glass', 'natural solid material', 'rock or sediment']\n", + "Gold: liquid water/fluid material\n", + "7-th prediction done\n", + "\n", + "\n", + " mineral-rich sedimentary rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + "Prediction : ['hybrid sedimentary rock', 'rock', 'sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: liquid water/fluid material\n", + "8-th prediction done\n", + "\n", + "\n", + " mineral-oxide\n", + "Prediction : ['mineral-oxide', 'mineral', 'natural solid material']\n", + "Gold: natural solid material/mineral/mineral-silicate or germanate\n", + "9-th prediction done\n", + "\n", + "\n", + " hornblende andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "10-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "11-th prediction done\n", + "\n", + "\n", + " liquid water\n", + "Prediction : ['fluid material', 'liquid water']\n", + "Gold: liquid water/fluid material\n", + "12-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "13-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "14-th prediction done\n", + "\n", + "\n", + " suevite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " suevite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/impact generated material/natural solid material/rock\n", + "15-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "16-th prediction done\n", + "\n", + "\n", + " gale mudstone\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock\n", + "17-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "18-th prediction done\n", + "\n", + "\n", + " basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "19-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "20-th prediction done\n", + "\n", + "\n", + " augite andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "21-th prediction done\n", + "\n", + "\n", + " gaseous material\n", + "Prediction : ['fluid material', 'gaseous material']\n", + "Gold: liquid water/fluid material\n", + "22-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "23-th prediction done\n", + "\n", + "\n", + " streamwater\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " fluid material\n", + "Extracted labels: ['fluid material', 'material']\n", + "Prediction : ['fluid material']\n", + "Gold: liquid water/fluid material\n", + "24-th prediction done\n", + "\n", + "\n", + " coal\n", + "Prediction : ['rock', 'sedimentary rock', 'coal', 'organic rich sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock\n", + "25-th prediction done\n", + "\n", + "\n", + " basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "26-th prediction done\n", + "\n", + "\n", + " Spinel lherzolite with copper and diopside\n", + "Prediction : ['anthropogenic metal material', 'copper', 'any anthropogenic material']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "27-th prediction done\n", + "\n", + "\n", + " groundwater sample\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " fluid material\n", + "Extracted labels: ['fluid material', 'material']\n", + "Prediction : ['fluid material']\n", + "Gold: liquid water/fluid material\n", + "28-th prediction done\n", + "\n", + "\n", + " alkaline-feldspar granite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/alkali feldspar granite/rock\n", + "29-th prediction done\n", + "\n", + "\n", + " gaseous material\n", + "Prediction : ['fluid material', 'gaseous material']\n", + "Gold: liquid water/fluid material\n", + "30-th prediction done\n", + "\n", + "\n", + " pumice\n", + "Prediction : ['pumice', 'natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "31-th prediction done\n", + "\n", + "\n", + " bentonite rich sedimentary rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + "Prediction : ['rock', 'iron rich sedimentary rock', 'sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "32-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "33-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: natural solid material/mineral/mineral-oxide\n", + "34-th prediction done\n", + "\n", + "\n", + " iron rich sedimentary rock\n", + "Prediction : ['iron rich sediment', 'rock', 'iron rich sedimentary rock', 'sedimentary rock', 'anthropogenic metal material', 'any anthropogenic material', 'sediment', 'iron', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/iron rich sedimentary rock/sedimentary rock/rock\n", + "35-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "36-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "37-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: natural solid material/mineral/mineral-oxide\n", + "38-th prediction done\n", + "\n", + "\n", + " volcanic rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "39-th prediction done\n", + "\n", + "\n", + " mineral-sulfate, selenate, or tellurate\n", + "Prediction : ['mineral', 'mineral-sulfate, selenate, or tellurate', 'natural solid material']\n", + "Gold: natural solid material/mineral/mineral-phosphate, arsenate, or vanadate\n", + "40-th prediction done\n", + "\n", + "\n", + " mylonite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Mylonite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/mylonitic rock/fault related material/natural solid material/rock\n", + "41-th prediction done\n", + "\n", + "\n", + " olivine shonkinite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " ultramafic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'ultramafic igneous rock']\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: phaneritic igneous rock/sediment/igneous rock/natural solid material/foid syenitoid/rock\n", + "42-th prediction done\n", + "\n", + "\n", + " tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "43-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "44-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: liquid water/fluid material\n", + "45-th prediction done\n", + "\n", + "\n", + " basaltic andesite\n", + "Prediction : ['rock or sediment', 'rock', 'fine grained igneous rock', 'igneous rock', 'intermediate composition igneous rock', 'basalt', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "46-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: liquid water/fluid material\n", + "47-th prediction done\n", + "\n", + "\n", + " hornblendite\n", + "Prediction : ['rock', 'hornblendite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/hornblendite/ultramafic igneous rock/rock\n", + "48-th prediction done\n", + "\n", + "\n", + " tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic ash\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "49-th prediction done\n", + "\n", + "\n", + " mineral-sulfide or sulfosalt\n", + "Prediction : ['mineral', 'natural solid material', 'mineral-sulfide or sulfosalt']\n", + "Gold: natural solid material/mineral/mineral-sulfide or sulfosalt\n", + "50-th prediction done\n", + "\n", + "\n", + " peridotite\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "51-th prediction done\n", + "\n", + "\n", + " gaseous material\n", + "Prediction : ['fluid material', 'gaseous material']\n", + "Gold: fluid material/gaseous material\n", + "52-th prediction done\n", + "\n", + "\n", + " domite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Domite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock\n", + "53-th prediction done\n", + "\n", + "\n", + " coal\n", + "Prediction : ['rock', 'sedimentary rock', 'coal', 'organic rich sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock\n", + "54-th prediction done\n", + "\n", + "\n", + " metamorphic rock\n", + "Prediction : ['natural solid material', 'rock or sediment', 'metamorphic rock', 'rock']\n", + "Gold: sediment/natural solid material/metamorphic rock/rock\n", + "55-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "56-th prediction done\n", + "\n", + "\n", + " monzonite porphyry\n", + "Prediction : ['porphyry', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock\n", + "57-th prediction done\n", + "\n", + "\n", + " diamictite\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'diamictite', 'natural solid material', 'rock or sediment']\n", + "Gold: diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock\n", + "58-th prediction done\n", + "\n", + "\n", + " glass rich igneous rock\n", + "Prediction : ['rock', 'anthropogenic material', 'any anthropogenic material', 'igneous rock', 'glass rich igneous rock', 'glass', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/glass rich igneous rock/rock\n", + "59-th prediction done\n", + "\n", + "\n", + " sandstone\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + "Prediction : ['generic sandstone', 'rock', 'sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "60-th prediction done\n", + "\n", + "\n", + " tonalite\n", + "Prediction : ['tonalite', 'rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock\n", + "61-th prediction done\n", + "\n", + "\n", + " dunite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " ultramafic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'ultramafic igneous rock']\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "62-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "63-th prediction done\n", + "\n", + "\n", + " mineral-silicate or germanate\n", + "Prediction : ['mineral', 'natural solid material', 'mineral-silicate or germanate']\n", + "Gold: natural solid material/mineral/mineral-silicate or germanate\n", + "64-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "65-th prediction done\n", + "\n", + "\n", + " basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "66-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "67-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "68-th prediction done\n", + "\n", + "\n", + " quartz diorite\n", + "Prediction : ['quartz', 'natural solid material', 'mineral']\n", + "Gold: dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock\n", + "69-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "70-th prediction done\n", + "\n", + "\n", + " granite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granite', 'natural solid material', 'rock or sediment']\n", + "Gold: phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock\n", + "71-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "72-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "73-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "74-th prediction done\n", + "\n", + "\n", + " plagioclase peridotite\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "75-th prediction done\n", + "\n", + "\n", + " non clastic siliceous sediment\n", + "Prediction : ['non clastic siliceous sediment', 'natural solid material', 'rock or sediment', 'sediment']\n", + "Gold: liquid water/fluid material\n", + "76-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "77-th prediction done\n", + "\n", + "\n", + " peridotite\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "78-th prediction done\n", + "\n", + "\n", + " hornblende andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "79-th prediction done\n", + "\n", + "\n", + " pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "80-th prediction done\n", + "\n", + "\n", + " pebbly sandstone\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "81-th prediction done\n", + "\n", + "\n", + " peridotite crosscut by pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "82-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "83-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "84-th prediction done\n", + "\n", + "\n", + " glass rich igneous rock\n", + "Prediction : ['rock', 'anthropogenic material', 'any anthropogenic material', 'igneous rock', 'glass rich igneous rock', 'glass', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/glass rich igneous rock/rock\n", + "85-th prediction done\n", + "\n", + "\n", + " pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "86-th prediction done\n", + "\n", + "\n", + " metamorphic rock\n", + "Prediction : ['natural solid material', 'rock or sediment', 'metamorphic rock', 'rock']\n", + "Gold: sediment/natural solid material/metamorphic rock/rock\n", + "87-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "88-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "89-th prediction done\n", + "\n", + "\n", + " basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "90-th prediction done\n", + "\n", + "\n", + " generic mudstone\n", + "Prediction : ['rock', 'generic mudstone', 'sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "91-th prediction done\n", + "\n", + "\n", + " chert\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " chert\n", + "Extracted labels: ['chert']\n", + "Prediction : ['chert', 'natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/non clastic siliceous sedimentary rock/natural solid material/sedimentary rock/rock\n", + "92-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "93-th prediction done\n", + "\n", + "\n", + " nepheline syenite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Nepheline syenite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock\n", + "94-th prediction done\n", + "\n", + "\n", + " mineral-native element\n", + "Prediction : ['mineral-native element', 'natural solid material', 'mineral']\n", + "Gold: liquid water/fluid material\n", + "95-th prediction done\n", + "\n", + "\n", + " essexitic andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "96-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "97-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "98-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "99-th prediction done\n", + "\n", + "\n", + " Uvaldite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " ultramafic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'ultramafic igneous rock']\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: natural solid material/mineral/mineral-silicate or germanate\n", + "100-th prediction done\n", + "\n", + "\n", + " anorthositic rock\n", + "Prediction : ['anorthositic rock', 'rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock\n", + "101-th prediction done\n", + "\n", + "\n", + " tuff with basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "102-th prediction done\n", + "\n", + "\n", + " Xenolith with spinel harzburgite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Xenolith with spinel harzburgite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "103-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "104-th prediction done\n", + "\n", + "\n", + " dacite\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'dacite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "105-th prediction done\n", + "\n", + "\n", + " dioritoid\n", + "Prediction : ['dioritoid', 'rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock\n", + "106-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "107-th prediction done\n", + "\n", + "\n", + " mineral-organic compound\n", + "Prediction : ['natural solid material', 'mineral', 'mineral-organic compound']\n", + "Gold: liquid water/fluid material\n", + "108-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "109-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "110-th prediction done\n", + "\n", + "\n", + " augite basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "111-th prediction done\n", + "\n", + "\n", + " mineral-organic compound\n", + "Prediction : ['natural solid material', 'mineral', 'mineral-organic compound']\n", + "Gold: liquid water/fluid material\n", + "112-th prediction done\n", + "\n", + "\n", + " Keratophyre\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " keratophyre\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock\n", + "113-th prediction done\n", + "\n", + "\n", + " volcanic rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "114-th prediction done\n", + "\n", + "\n", + " soil\n", + "Prediction : ['mixed soil sediment or rock', 'natural solid material', 'soil']\n", + "Gold: liquid water/fluid material\n", + "115-th prediction done\n", + "\n", + "\n", + " volcanic rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "116-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "117-th prediction done\n", + "\n", + "\n", + " mineral-silicate or germanate\n", + "Prediction : ['mineral', 'natural solid material', 'mineral-silicate or germanate']\n", + "Gold: sediment/mud size sediment/natural solid material/rock\n", + "118-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "119-th prediction done\n", + "\n", + "\n", + " pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "120-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "121-th prediction done\n", + "\n", + "\n", + " generic mudstone\n", + "Prediction : ['rock', 'generic mudstone', 'sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "122-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "123-th prediction done\n", + "\n", + "\n", + " gaseous material\n", + "Prediction : ['fluid material', 'gaseous material']\n", + "Gold: liquid water/fluid material\n", + "124-th prediction done\n", + "\n", + "\n", + " welded tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "125-th prediction done\n", + "\n", + "\n", + " trachytoid\n", + "Prediction : ['trachytoid', 'rock', 'fine grained igneous rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock\n", + "126-th prediction done\n", + "\n", + "\n", + " sand size sediment\n", + "Prediction : ['sand size sediment', 'natural solid material', 'rock or sediment', 'sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "127-th prediction done\n", + "\n", + "\n", + " basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: basalt/sediment/igneous rock/natural solid material/glass rich igneous rock/fine grained igneous rock/rock\n", + "128-th prediction done\n", + "\n", + "\n", + " rhyodacite\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'dacite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "129-th prediction done\n", + "\n", + "\n", + " pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "130-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "131-th prediction done\n", + "\n", + "\n", + " camptonite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/exotic composition igneous rock/rock\n", + "132-th prediction done\n", + "\n", + "\n", + " lithic tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "133-th prediction done\n", + "\n", + "\n", + " iron rich sedimentary rock\n", + "Prediction : ['iron rich sediment', 'rock', 'iron rich sedimentary rock', 'sedimentary rock', 'anthropogenic metal material', 'any anthropogenic material', 'sediment', 'iron', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/metasomatic rock/rock\n", + "134-th prediction done\n", + "\n", + "\n", + " pyroxene andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "135-th prediction done\n", + "\n", + "\n", + " pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "136-th prediction done\n", + "\n", + "\n", + " plagioclase peridotite\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "137-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "138-th prediction done\n", + "\n", + "\n", + " peridotite\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "139-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "140-th prediction done\n", + "\n", + "\n", + " pyroclastic flow\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " fragmental igneous rock\n", + "Extracted labels: ['fragmental igneous rock', 'rock', 'igneous rock']\n", + "Prediction : ['rock', 'fragmental igneous rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "141-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "142-th prediction done\n", + "\n", + "\n", + " peridotite\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "143-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "144-th prediction done\n", + "\n", + "\n", + " pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "145-th prediction done\n", + "\n", + "\n", + " anorthositic rock\n", + "Prediction : ['anorthositic rock', 'rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: phaneritic igneous rock/sediment/igneous rock/natural solid material/anorthositic rock/rock\n", + "146-th prediction done\n", + "\n", + "\n", + " soil\n", + "Prediction : ['mixed soil sediment or rock', 'natural solid material', 'soil']\n", + "Gold: liquid water/fluid material\n", + "147-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "148-th prediction done\n", + "\n", + "\n", + " basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "149-th prediction done\n", + "\n", + "\n", + " non clastic siliceous sediment\n", + "Prediction : ['non clastic siliceous sediment', 'natural solid material', 'rock or sediment', 'sediment']\n", + "Gold: liquid water/fluid material\n", + "150-th prediction done\n", + "\n", + "\n", + " garnet websterite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " garnet websterite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "151-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "152-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "153-th prediction done\n", + "\n", + "\n", + " volcanic rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "154-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "155-th prediction done\n", + "\n", + "\n", + " doleritic rock\n", + "Prediction : ['rock', 'igneous rock', 'doleritic rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/doleritic rock/rock\n", + "156-th prediction done\n", + "\n", + "\n", + " mineral-organic compound\n", + "Prediction : ['natural solid material', 'mineral', 'mineral-organic compound']\n", + "Gold: liquid water/fluid material\n", + "157-th prediction done\n", + "\n", + "\n", + " claystone\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "158-th prediction done\n", + "\n", + "\n", + " copper rich sedimentary rock\n", + "Prediction : ['anthropogenic metal material', 'copper', 'any anthropogenic material']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "159-th prediction done\n", + "\n", + "\n", + " tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "160-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/foiditoid/fine grained igneous rock/rock\n", + "161-th prediction done\n", + "\n", + "\n", + " quartz arenite\n", + "Prediction : ['quartz', 'natural solid material', 'mineral']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "162-th prediction done\n", + "\n", + "\n", + " metamorphic rock\n", + "Prediction : ['natural solid material', 'rock or sediment', 'metamorphic rock', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "163-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "164-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "165-th prediction done\n", + "\n", + "\n", + " pulaskite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " pulaskite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock\n", + "166-th prediction done\n", + "\n", + "\n", + " foidolite\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'foidolite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "167-th prediction done\n", + "\n", + "\n", + " muscovite-biotite schist\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/metamorphic rock/rock\n", + "168-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "169-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "170-th prediction done\n", + "\n", + "\n", + " Volcanic ash with basaltic-andesite\n", + "Prediction : ['rock or sediment', 'rock', 'fine grained igneous rock', 'igneous rock', 'intermediate composition igneous rock', 'basalt', 'natural solid material', 'andesite']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "171-th prediction done\n", + "\n", + "\n", + " volcanic rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "172-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "173-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "174-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "175-th prediction done\n", + "\n", + "\n", + " gray bedded tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "176-th prediction done\n", + "\n", + "\n", + " Microdiorite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Microdiorite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock\n", + "177-th prediction done\n", + "\n", + "\n", + " mineral-oxide\n", + "Prediction : ['mineral-oxide', 'mineral', 'natural solid material']\n", + "Gold: liquid water/fluid material\n", + "178-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "179-th prediction done\n", + "\n", + "\n", + " ash bed\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/tephra/rock\n", + "180-th prediction done\n", + "\n", + "\n", + " basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "181-th prediction done\n", + "\n", + "\n", + " Kimberlite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Kimberlite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/exotic composition igneous rock/rock\n", + "182-th prediction done\n", + "\n", + "\n", + " liquid water\n", + "Prediction : ['fluid material', 'liquid water']\n", + "Gold: liquid water/fluid material\n", + "183-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/metamorphic rock/rock\n", + "184-th prediction done\n", + "\n", + "\n", + " hornfels grade metavolcaniclastic derived regolith\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/residual material/rock\n", + "185-th prediction done\n", + "\n", + "\n", + " rhyolite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " igneous rock\n", + "Extracted labels: ['rock', 'igneous rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'igneous rock', 'rock']\n", + "Gold: sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock\n", + "186-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: natural solid material/mineral/mineral-oxide\n", + "187-th prediction done\n", + "\n", + "\n", + " vent fluid\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " fluid material\n", + "Extracted labels: ['fluid material', 'material']\n", + "Prediction : ['fluid material']\n", + "Gold: liquid water/fluid material\n", + "188-th prediction done\n", + "\n", + "\n", + " trachytoid\n", + "Prediction : ['trachytoid', 'rock', 'fine grained igneous rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock\n", + "189-th prediction done\n", + "\n", + "\n", + " lapilli tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " igneous rock\n", + "Extracted labels: ['rock', 'igneous rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'igneous rock', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "190-th prediction done\n", + "\n", + "\n", + " copper rich sedimentary rock\n", + "Prediction : ['anthropogenic metal material', 'copper', 'any anthropogenic material']\n", + "Gold: natural solid material/mineral/mineral-native element\n", + "191-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "192-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "193-th prediction done\n", + "\n", + "\n", + " hornblende andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "194-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "195-th prediction done\n", + "\n", + "\n", + " sandstone\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + "Prediction : ['generic sandstone', 'rock', 'sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "196-th prediction done\n", + "\n", + "\n", + " gaseous material\n", + "Prediction : ['fluid material', 'gaseous material']\n", + "Gold: sediment/mud size sediment/natural solid material/rock\n", + "197-th prediction done\n", + "\n", + "\n", + " peridotite\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "198-th prediction done\n", + "\n", + "\n", + " tuff-sandstone\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "199-th prediction done\n", + "\n", + "\n", + " rhyolite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock\n", + "200-th prediction done\n", + "\n", + "\n", + " hornblende monzonite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " granitoid\n", + "Extracted labels: ['granitoid']\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock\n", + "201-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "202-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "203-th prediction done\n", + "\n", + "\n", + " tonalite\n", + "Prediction : ['tonalite', 'rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock\n", + "204-th prediction done\n", + "\n", + "\n", + " peridotite\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "205-th prediction done\n", + "\n", + "\n", + " mylonitic rock\n", + "Prediction : ['rock', 'mylonitic rock', 'natural solid material', 'rock or sediment', 'fault related material']\n", + "Gold: sediment/mylonitic rock/fault related material/natural solid material/rock\n", + "206-th prediction done\n", + "\n", + "\n", + " mineral-phosphate, arsenate, or vanadate\n", + "Prediction : ['natural solid material', 'mineral', 'mineral-phosphate, arsenate, or vanadate']\n", + "Gold: natural solid material/mineral/mineral-phosphate, arsenate, or vanadate\n", + "207-th prediction done\n", + "\n", + "\n", + " garnet lherzolite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " ultramafic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'ultramafic igneous rock']\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "208-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "209-th prediction done\n", + "\n", + "\n", + " tephra\n", + "Prediction : ['sediment', 'natural solid material', 'rock or sediment', 'tephra']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "210-th prediction done\n", + "\n", + "\n", + " diorite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " diorite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock\n", + "211-th prediction done\n", + "\n", + "\n", + " generic mudstone\n", + "Prediction : ['rock', 'generic mudstone', 'sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "212-th prediction done\n", + "\n", + "\n", + " apatite-feldspar pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "213-th prediction done\n", + "\n", + "\n", + " Green Claystone (Ash)\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "214-th prediction done\n", + "\n", + "\n", + " tonalite\n", + "Prediction : ['tonalite', 'rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/tonalite/rock\n", + "215-th prediction done\n", + "\n", + "\n", + " volcanic rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "216-th prediction done\n", + "\n", + "\n", + " lapilli tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "217-th prediction done\n", + "\n", + "\n", + " mineral-silicate or germanate\n", + "Prediction : ['mineral', 'natural solid material', 'mineral-silicate or germanate']\n", + "Gold: natural solid material/mineral-sulfate, selenate, or tellurate/mineral\n", + "218-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "219-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "220-th prediction done\n", + "\n", + "\n", + " tuff with geode\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "221-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "222-th prediction done\n", + "\n", + "\n", + " exotic composition igneous rock\n", + "Prediction : ['rock', 'exotic composition igneous rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: liquid water/fluid material\n", + "223-th prediction done\n", + "\n", + "\n", + " volcanic rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "224-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "225-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "226-th prediction done\n", + "\n", + "\n", + " pegmatite\n", + "Prediction : ['pegmatite', 'rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: natural solid material/mineral/mineral-silicate or germanate\n", + "227-th prediction done\n", + "\n", + "\n", + " coal\n", + "Prediction : ['rock', 'sedimentary rock', 'coal', 'organic rich sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock\n", + "228-th prediction done\n", + "\n", + "\n", + " Sand\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sediment\n", + "Extracted labels: ['clastic sediment', 'sediment']\n", + "Prediction : ['natural solid material', 'clastic sediment', 'rock or sediment', 'sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "229-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "230-th prediction done\n", + "\n", + "\n", + " tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "231-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "232-th prediction done\n", + "\n", + "\n", + " non clastic siliceous sediment\n", + "Prediction : ['non clastic siliceous sediment', 'natural solid material', 'rock or sediment', 'sediment']\n", + "Gold: liquid water/fluid material\n", + "233-th prediction done\n", + "\n", + "\n", + " sand size sediment\n", + "Prediction : ['sand size sediment', 'natural solid material', 'rock or sediment', 'sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "234-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "235-th prediction done\n", + "\n", + "\n", + " shale\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "236-th prediction done\n", + "\n", + "\n", + " pumice rich sediment\n", + "Prediction : ['pumice', 'natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "237-th prediction done\n", + "\n", + "\n", + " coal\n", + "Prediction : ['rock', 'sedimentary rock', 'coal', 'organic rich sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock\n", + "238-th prediction done\n", + "\n", + "\n", + " mineral-native element\n", + "Prediction : ['mineral-native element', 'natural solid material', 'mineral']\n", + "Gold: natural solid material/mineral/mineral-phosphate, arsenate, or vanadate\n", + "239-th prediction done\n", + "\n", + "\n", + " tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "240-th prediction done\n", + "\n", + "\n", + " pyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "241-th prediction done\n", + "\n", + "\n", + " ground water\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " fluid material\n", + "Extracted labels: ['fluid material', 'material']\n", + "Prediction : ['fluid material']\n", + "Gold: liquid water/fluid material\n", + "242-th prediction done\n", + "\n", + "\n", + " volcanic ash with basaltic-andesite\n", + "Prediction : ['rock or sediment', 'rock', 'fine grained igneous rock', 'igneous rock', 'intermediate composition igneous rock', 'basalt', 'natural solid material', 'andesite']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "243-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "244-th prediction done\n", + "\n", + "\n", + " coal\n", + "Prediction : ['rock', 'sedimentary rock', 'coal', 'organic rich sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock\n", + "245-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "246-th prediction done\n", + "\n", + "\n", + " Basal, fine grained bed. 1.5m of bedded, lapilli tuffs in units 8-16cm thick. Some, but not all the units grade up into ash.\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " fine grained igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'fine grained igneous rock']\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "247-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/porphyry/rock\n", + "248-th prediction done\n", + "\n", + "\n", + " peridotite\n", + "Prediction : ['rock', 'igneous rock', 'ultramafic igneous rock', 'peridotite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "249-th prediction done\n", + "\n", + "\n", + " mineral-silicate or germanate\n", + "Prediction : ['mineral', 'natural solid material', 'mineral-silicate or germanate']\n", + "Gold: natural solid material/mineral/mineral-phosphate, arsenate, or vanadate\n", + "250-th prediction done\n", + "\n", + "\n", + " clinopyroxenite\n", + "Prediction : ['rock', 'pyroxenite', 'igneous rock', 'ultramafic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "251-th prediction done\n", + "\n", + "\n", + " bronzitite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " mineral\n", + "Extracted labels: ['mineral']\n", + "Prediction : ['mineral', 'natural solid material']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "252-th prediction done\n", + "\n", + "\n", + " dacite\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'dacite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "253-th prediction done\n", + "\n", + "\n", + " coal\n", + "Prediction : ['rock', 'sedimentary rock', 'coal', 'organic rich sedimentary rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/coal/natural solid material/organic rich sedimentary rock/sedimentary rock/rock\n", + "254-th prediction done\n", + "\n", + "\n", + " augite andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "255-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "256-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "257-th prediction done\n", + "\n", + "\n", + " schist\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/metamorphic rock/rock\n", + "258-th prediction done\n", + "\n", + "\n", + " basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "259-th prediction done\n", + "\n", + "\n", + " glass rich igneous rock\n", + "Prediction : ['rock', 'anthropogenic material', 'any anthropogenic material', 'igneous rock', 'glass rich igneous rock', 'glass', 'natural solid material', 'rock or sediment']\n", + "Gold: glass/anthropogenic material/any anthropogenic material\n", + "260-th prediction done\n", + "\n", + "\n", + " rhyodacite\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'dacite', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/dacite/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "261-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/metamorphic rock/rock\n", + "262-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "263-th prediction done\n", + "\n", + "\n", + " limestone\n", + "Prediction : ['natural solid material', 'rock or sediment', 'limestone', 'rock']\n", + "Gold: sediment/natural solid material/carbonate sedimentary rock/sedimentary rock/rock\n", + "264-th prediction done\n", + "\n", + "\n", + " basaltic scoriaceous lapilli\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "265-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/porphyry/rock\n", + "266-th prediction done\n", + "\n", + "\n", + " diabase with dike-rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " diabase with dike-rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/doleritic rock/rock\n", + "267-th prediction done\n", + "\n", + "\n", + " quartz rich igneous rock\n", + "Prediction : ['rock', 'quartz', 'igneous rock', 'phaneritic igneous rock', 'quartz rich igneous rock', 'mineral', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/generic sandstone/sedimentary rock/rock\n", + "268-th prediction done\n", + "\n", + "\n", + " green-grey clay rich tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "269-th prediction done\n", + "\n", + "\n", + " mineral-oxide\n", + "Prediction : ['mineral-oxide', 'mineral', 'natural solid material']\n", + "Gold: liquid water/fluid material\n", + "270-th prediction done\n", + "\n", + "\n", + " soil solution\n", + "Prediction : ['mixed soil sediment or rock', 'natural solid material', 'soil']\n", + "Gold: liquid water/fluid material\n", + "271-th prediction done\n", + "\n", + "\n", + " quartz-diorite\n", + "Prediction : ['quartz', 'natural solid material', 'mineral']\n", + "Gold: dioritoid/phaneritic igneous rock/sediment/igneous rock/natural solid material/rock\n", + "272-th prediction done\n", + "\n", + "\n", + " bentonite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/generic mudstone/natural solid material/sedimentary rock/rock\n", + "273-th prediction done\n", + "\n", + "\n", + " basalt\n", + "Prediction : ['rock', 'fine grained igneous rock', 'igneous rock', 'basalt', 'natural solid material', 'rock or sediment']\n", + "Gold: basalt/sediment/igneous rock/natural solid material/fine grained igneous rock/rock\n", + "274-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "275-th prediction done\n", + "\n", + "\n", + " granodiorite\n", + "Prediction : ['rock', 'granitoid', 'igneous rock', 'phaneritic igneous rock', 'granodiorite', 'natural solid material', 'rock or sediment']\n", + "Gold: granitoid/phaneritic igneous rock/sediment/granodiorite/igneous rock/natural solid material/rock\n", + "276-th prediction done\n", + "\n", + "\n", + " basaltic-andesite\n", + "Prediction : ['rock or sediment', 'rock', 'fine grained igneous rock', 'igneous rock', 'intermediate composition igneous rock', 'basalt', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "277-th prediction done\n", + "\n", + "\n", + " phyllite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " phyllite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/natural solid material/metamorphic rock/rock\n", + "278-th prediction done\n", + "\n", + "\n", + " tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "279-th prediction done\n", + "\n", + "\n", + " vitric tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " vitric tuff\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "280-th prediction done\n", + "\n", + "\n", + " volcanic rock\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " volcanic rock\n", + "Extracted labels: ['rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "281-th prediction done\n", + "\n", + "\n", + " mineral-silicate or germanate\n", + "Prediction : ['mineral', 'natural solid material', 'mineral-silicate or germanate']\n", + "Gold: natural solid material/mineral/mineral-silicate or germanate\n", + "282-th prediction done\n", + "\n", + "\n", + " Kimberlite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Kimberlite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/exotic composition igneous rock/rock\n", + "283-th prediction done\n", + "\n", + "\n", + " gaseous material\n", + "Prediction : ['fluid material', 'gaseous material']\n", + "Gold: liquid water/fluid material\n", + "284-th prediction done\n", + "\n", + "\n", + " monzonite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Monzonite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: phaneritic igneous rock/sediment/igneous rock/natural solid material/syenitoid/rock\n", + "285-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "286-th prediction done\n", + "\n", + "\n", + " Mylonite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Mylonite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/mylonitic rock/fault related material/natural solid material/rock\n", + "287-th prediction done\n", + "\n", + "\n", + " tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "288-th prediction done\n", + "\n", + "\n", + " Devitrified rhyolite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " fragmental igneous rock\n", + "Extracted labels: ['fragmental igneous rock', 'rock', 'igneous rock']\n", + "Prediction : ['rock', 'fragmental igneous rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/rhyolitoid/natural solid material/fine grained igneous rock/rock\n", + "289-th prediction done\n", + "\n", + "\n", + " Kimberlite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " Kimberlite\n", + "['biogenic non-organic material', 'anthropogenic material', 'anthropogenic metal material', 'fragmental igneous rock', 'granitoid', 'organic material', 'clastic sediment', 'plant material', 'any ice', 'rock', 'sedimentary rock', 'fluid material', 'sediment', 'igneous rock', 'mineral', 'clastic sedimentary rock', 'phaneritic igneous rock', 'intermediate composition igneous rock', 'gabbroid', 'ultramafic igneous rock', 'organic rich sedimentary rock', 'chert', 'ceramic clay', 'fine grained igneous rock', 'anthropogenic organic material', 'organic animal product', 'natural solid material', 'material', 'fault related material']\n", + "Extracted labels: []\n", + " phaneritic igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'phaneritic igneous rock', 'rock or sediment']\n", + "Prediction : ['rock', 'igneous rock', 'phaneritic igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/exotic composition igneous rock/rock\n", + "290-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/peridotite/rock\n", + "291-th prediction done\n", + "\n", + "\n", + " gaseous material\n", + "Prediction : ['fluid material', 'gaseous material']\n", + "Gold: liquid water/fluid material\n", + "292-th prediction done\n", + "\n", + "\n", + " ultrabasic igneous rock\n", + "Prediction : ['ultrabasic igneous rock', 'rock', 'igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/ultramafic igneous rock/pyroxenite/rock\n", + "293-th prediction done\n", + "\n", + "\n", + " hawaiite\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " intermediate composition igneous rock\n", + "Extracted labels: ['rock', 'igneous rock', 'intermediate composition igneous rock']\n", + "Prediction : ['rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/trachytoid/fine grained igneous rock/rock\n", + "294-th prediction done\n", + "\n", + "\n", + " tuffaceous greywacke\n", + "Prediction : ['greywacke', 'natural solid material', 'rock or sediment', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "295-th prediction done\n", + "\n", + "\n", + " tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " clastic sedimentary rock\n", + "Extracted labels: ['clastic sediment', 'rock', 'sedimentary rock', 'sediment', 'clastic sedimentary rock']\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'sediment', 'clastic sediment', 'natural solid material', 'rock or sediment']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "296-th prediction done\n", + "\n", + "\n", + " andesite\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "297-th prediction done\n", + "\n", + "\n", + " latite tuff\n", + "['quartz rich igneous rock', 'generic mudstone', 'quartz', 'slate', 'copper', 'hybrid sediment', 'plaster', 'mineral-phosphate, arsenate, or vanadate', 'glass rich igneous rock', 'iron rich sedimentary rock', 'brick clay', 'gold', 'pegmatite', 'limestone', 'trachytoid', 'massive sulphide', 'mud size sediment', 'gaseous material', 'mineral-organic compound', 'residual material', 'charcoal', 'greywacke', 'glass', 'paper', 'leather', 'doleritic rock', 'mineral-halide', 'carbonate sediment', 'tephritoid', 'chemical sedimentary material', 'non-aqueous liquid material', 'pyroclastic rock', 'plutonic igneous rock', 'iron rich sediment', 'foid gabbroid', 'mica', 'aphanite', 'bone', 'plant fiber', 'andesite', 'exotic composition igneous rock', 'hybrid sedimentary rock', 'shell', 'cinder', 'amber', 'foid dioritoid', 'frozen water', 'sand size sediment', 'dolomite', 'marble', 'porcelain', 'aplite', 'iron', 'travertine', 'mineral-borate', 'pumice', 'breccia gouge series', 'bucchero', 'syenitoid', 'mineral-sulfide or sulfosalt', 'biogenic sediment', 'foiditoid', 'organic animal material', 'flint', 'mineral-carbonate or nitrate', 'tonalite', 'gabbro', 'carbonate sedimentary rock', 'phosphorite', 'generic sandstone', 'lead', 'tephra', 'phonolitoid', 'pewter', 'gabbroic rock', 'peridotite', 'non clastic siliceous sediment', 'hypabyssal intrusive rock', 'anorthositic rock', 'granodiorite', 'mineral-native element', 'alkali feldspar granite', 'hornblendite', 'tuffite', 'high magnesium fine grained igneous rock', 'foid syenitoid', 'bronze', 'breccia', 'soil', 'fiber material', 'diamictite', 'phosphate rich sediment', 'terra sigilata', 'dacite', 'hair', 'monzogabbroic rock', 'plastic (material)', 'cataclasite series', 'brass', 'metasomatic rock', 'obsidian', 'rhyolitoid', 'mixed soil sediment or rock', 'granite', 'coal', 'plaster or mortar', 'rubber', 'dispersed media', 'mineral-oxide', 'foidolite', 'impact generated material', 'gravel size sediment', 'diamicton', 'mineral-sulfate, selenate, or tellurate', 'non clastic siliceous sedimentary rock', 'terracotta', 'liquid water', 'mineral-silicate or germanate', 'kaolin', 'basalt', 'porphyry', 'generic conglomerate', 'wood', 'metamorphic rock', 'hematite', 'dioritoid', 'faience', 'pyroxenite', 'mylonitic rock', 'ultrabasic igneous rock']\n", + " igneous rock\n", + "Extracted labels: ['rock', 'igneous rock']\n", + "Prediction : ['natural solid material', 'rock or sediment', 'igneous rock', 'rock']\n", + "Gold: sediment/igneous rock/natural solid material/pyroclastic rock/fragmental igneous rock/rock\n", + "298-th prediction done\n", + "\n", + "\n", + " diamictite\n", + "Prediction : ['clastic sedimentary rock', 'rock', 'sedimentary rock', 'diamictite', 'natural solid material', 'rock or sediment']\n", + "Gold: diamictite/clastic sedimentary rock/sediment/natural solid material/sedimentary rock/rock\n", + "299-th prediction done\n", + "\n", + "\n", + " andesite dike\n", + "Prediction : ['rock or sediment', 'rock', 'igneous rock', 'intermediate composition igneous rock', 'natural solid material', 'andesite']\n", + "Gold: andesite/sediment/igneous rock/natural solid material/intermediate composition igneous rock/rock\n", + "300-th prediction done\n", + "\n", + "\n" + ] + }, + { + "output_type": "error", + "ename": "KeyboardInterrupt", + "evalue": "", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_prediction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0mzero_shot_prediction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mOUTPUT_FILE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36mzero_shot_prediction\u001b[0;34m(test_df, summarize_and_explain, cot, OUTPUT_FILE)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenerate_prediction_with_reasoning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjoined_leaf_material_types\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreasoning\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenerate_prediction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjoined_leaf_material_types\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0mprediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mextract_prediction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleaf_material_types\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mgenerate_prediction\u001b[0;34m(material_types, sample_description)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m>>\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \"\"\"\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgenerate_answer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msystem_prompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minstruction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36mgenerate_answer\u001b[0;34m(system_prompt, prompt)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0minputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_text\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_tensors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"pt\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_attention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgeneration_config\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgeneration_config\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mtext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\u001b[0m in \u001b[0;36mdecorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdecorate_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mctx_factory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdecorate_context\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m 1575\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1576\u001b[0m \u001b[0;31m# 13. run sample\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1577\u001b[0;31m result = self._sample(\n\u001b[0m\u001b[1;32m 1578\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1579\u001b[0m \u001b[0mlogits_processor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprepared_logits_processor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\u001b[0m in \u001b[0;36m_sample\u001b[0;34m(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, output_logits, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)\u001b[0m\n\u001b[1;32m 2731\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2732\u001b[0m \u001b[0;31m# forward pass to get next token\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2733\u001b[0;31m outputs = self(\n\u001b[0m\u001b[1;32m 2734\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mmodel_inputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2735\u001b[0m \u001b[0mreturn_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1510\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1511\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1513\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\u001b[0m in \u001b[0;36mnew_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hf_hook\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1155\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1156\u001b[0m \u001b[0;31m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1157\u001b[0;31m outputs = self.model(\n\u001b[0m\u001b[1;32m 1158\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1159\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1510\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1511\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1513\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\u001b[0m in \u001b[0;36mnew_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hf_hook\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1040\u001b[0m )\n\u001b[1;32m 1041\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1042\u001b[0;31m layer_outputs = decoder_layer(\n\u001b[0m\u001b[1;32m 1043\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1044\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1510\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1511\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1513\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\u001b[0m in \u001b[0;36mnew_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hf_hook\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, **kwargs)\u001b[0m\n\u001b[1;32m 752\u001b[0m \u001b[0mresidual\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 753\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 754\u001b[0;31m \u001b[0mhidden_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minput_layernorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 755\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[0;31m# Self Attention\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1510\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1511\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1513\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1518\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1519\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1521\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py\u001b[0m in \u001b[0;36mnew_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_old_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hf_hook\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states)\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0mhidden_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mvariance\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeepdim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 87\u001b[0;31m \u001b[0mhidden_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhidden_states\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrsqrt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariance\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvariance_epsilon\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 88\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "def zero_shot_prediction(test_df, summarize_and_explain = False, cot = False, OUTPUT_FILE=\"output.json\"):\n", + " test_prediction = []\n", + " for idx, row in test_df.iterrows():\n", + " sample_description = row['concatenated_text_B'].split(\"\")[0][len(\"\"):]\n", + " input = sample_description\n", + "\n", + " # SummarizeExplain\n", + " if summarize_and_explain:\n", + " summary = generate_summary_and_explanation(sample_description)\n", + " input = summary\n", + "\n", + " # ZTC-CoT\n", + " if cot:\n", + " reasoning = generate_reasoning(joined_leaf_material_types,input)\n", + " output = generate_prediction_with_reasoning(joined_leaf_material_types, input, reasoning)\n", + " else:\n", + " output = generate_prediction(joined_leaf_material_types,input)\n", + "\n", + " prediction = extract_prediction(leaf_material_types, output)\n", + "\n", + " final_prediction = []\n", + " if len(prediction) > 0:\n", + " for pred in prediction:\n", + " final_prediction.extend(leaf_to_entire_path_mapping[pred]) # get entire parents as well and add it to the prediction\n", + "\n", + " # TODO\n", + " else:\n", + " # level-up traversal recursively\n", + " curr_labels = leaf_material_types\n", + " parent_labels = get_parent_labels(curr_labels)\n", + "\n", + " while len(prediction) == 0 and len(parent_labels)>0:\n", + " curr_labels = parent_labels\n", + " joined_curr_labels = \"\\n\".join(curr_labels)\n", + " if cot:\n", + " reasoning = generate_reasoning(joined_curr_labels,input)\n", + " output = generate_prediction_with_reasoning(joined_curr_labels, input, reasoning)\n", + " else:\n", + " output = generate_prediction(joined_curr_labels,input)\n", + "\n", + " prediction = extract_prediction(curr_labels, output)\n", + " print(\"Extracted labels: \", prediction)\n", + " final_prediction = []\n", + " for pred in prediction:\n", + " final_prediction.append(pred)\n", + " final_prediction.extend(label_to_parent[pred])\n", + "\n", + " # recurse up one level\n", + " parent_labels = get_parent_labels(curr_labels)\n", + "\n", + "\n", + " final_prediction = list(set(final_prediction))\n", + " final_prediction = [x for x in final_prediction if x!= None and x!='material']\n", + " print(\"Prediction : \", final_prediction)\n", + "\n", + " test_prediction.append(final_prediction)\n", + " print(\"Gold: \",row[\"label_list\"])\n", + " print(f\"{idx}-th prediction done\\n\\n\")\n", + " if idx % 100 == 0:\n", + " with open(OUTPUT_FILE, 'w') as file:\n", + " json.dump(test_prediction, file)\n", + "\n", + " with open(OUTPUT_FILE, 'w') as file:\n", + " json.dump(test_prediction, file)\n", + "\n", + "test_prediction = zero_shot_prediction(test_df, False, False, OUTPUT_FILE)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3a9ND_XmY1Hu" + }, + "source": [ + "# Evaluate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IlLXQYJS5aME" + }, + "outputs": [], + "source": [ + "multi_to_label = {\n", + " \"rock or sediment\": [\"rock\", \"sediment\"],\n", + " \"mixed soil sediment rock\" : [\"soil\", \"sediment\", \"rock\"]\n", + "}\n", + "\n", + "final_predicted_labels = test_prediction\n", + "for idx, labels in enumerate(test_prediction):\n", + " for label in labels:\n", + " if label in multi_to_label:\n", + " labels.remove(label)\n", + " labels.extend(multi_to_label[label])\n", + " labels = list(set(labels))\n", + " final_predicted_labels[idx] = labels # update\n", + "\n", + "# assert\n", + "for idx, labels in enumerate(test_prediction):\n", + " for label in labels:\n", + " if label in multi_to_label:\n", + " print(\"invalid\")\n", + " break\n", + "final_predicted_labels[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PzAzISU6ZGNl" + }, + "outputs": [], + "source": [ + "label_file=\"total_labels.txt\" # file that stores all labels of iSamples taxonomy\n", + "gold_label_names = open(label_file).read().splitlines()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lguC6p6xZIcE" + }, + "outputs": [], + "source": [ + "true_labels = [x.split(\"/\") for x in test_df['label_list'].tolist()]\n", + "true_labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FM_92gqZZKwP" + }, + "outputs": [], + "source": [ + "## Multi label evaluation\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.preprocessing import MultiLabelBinarizer\n", + "\n", + "mlb = MultiLabelBinarizer()\n", + "mlb.fit([gold_label_names])\n", + "\n", + "true_labels_bin = mlb.transform(true_labels)\n", + "predicted_labels_bin = mlb.transform(final_predicted_labels)\n", + "\n", + "print(classification_report(true_labels_bin, predicted_labels_bin, target_names=mlb.classes_))\n", + "report = classification_report(true_labels_bin, predicted_labels_bin, target_names=mlb.classes_, output_dict=True)\n", + "\n", + "# Print the classification report\n", + "print(report)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "a679085184e04852a5a66fa38ba91d88": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1d9a8fb55c2e4199b83d826a14d8c5c8", + "IPY_MODEL_bd9df7d514474d3285b05692a4b6e74e", + "IPY_MODEL_b5485d1b489e4a15840ba32af8675b40" + ], + "layout": "IPY_MODEL_f373d8ecb2f44fbf9d26c06c74fe12ec" + } + }, + "1d9a8fb55c2e4199b83d826a14d8c5c8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e9c92a0ed6994d4cab13dcc4c13a8472", + "placeholder": "​", + "style": "IPY_MODEL_b6015ae8961d4954bec0b56578a998d8", + "value": "config.json: 100%" + } + }, + "bd9df7d514474d3285b05692a4b6e74e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5aa1dcd678784e7d9f44d29b51c0afd6", + "max": 623, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8e2c3da60a7d4d7d927e3db45d6341fb", + "value": 623 + } + }, + "b5485d1b489e4a15840ba32af8675b40": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_31c8549a556a408b99dc760cc23dfd22", + "placeholder": "​", + "style": "IPY_MODEL_f5773f126e2c4105af4b926dcbfb2aaf", + "value": " 623/623 [00:00<00:00, 40.4kB/s]" + } + }, + "f373d8ecb2f44fbf9d26c06c74fe12ec": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e9c92a0ed6994d4cab13dcc4c13a8472": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b6015ae8961d4954bec0b56578a998d8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5aa1dcd678784e7d9f44d29b51c0afd6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8e2c3da60a7d4d7d927e3db45d6341fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "31c8549a556a408b99dc760cc23dfd22": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f5773f126e2c4105af4b926dcbfb2aaf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "09184a59b6554c019a5afddb4d46f7d4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3e46ac6ae7dc4f149079add9ecfe9b57", + "IPY_MODEL_dabc885c909b4df1ab6d5f3d27a39804", + "IPY_MODEL_41719d701fa444ba9199aa6fb155bb52" + ], + "layout": "IPY_MODEL_5e88a32b4d2640fbb4f53b9c0d01fb93" + } + }, + "3e46ac6ae7dc4f149079add9ecfe9b57": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0f4b4142b9154ad4a58134e532d4a74e", + "placeholder": "​", + "style": "IPY_MODEL_20e20a69adc24c17b947ed1adf718cc5", + "value": "pytorch_model.bin.index.json: 100%" + } + }, + "dabc885c909b4df1ab6d5f3d27a39804": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2ba731dd172e45188f231c134f32240e", + "max": 23950, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9f255ddfb0af4603927fa4fb8f15c7a0", + "value": 23950 + } + }, + "41719d701fa444ba9199aa6fb155bb52": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_998dec17d6014ba9a8ad1ccc7fda02a9", + "placeholder": "​", + "style": "IPY_MODEL_74ac0a6329c44866aab268580d54a54b", + "value": " 23.9k/23.9k [00:00<00:00, 519kB/s]" + } + }, + "5e88a32b4d2640fbb4f53b9c0d01fb93": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0f4b4142b9154ad4a58134e532d4a74e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "20e20a69adc24c17b947ed1adf718cc5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2ba731dd172e45188f231c134f32240e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9f255ddfb0af4603927fa4fb8f15c7a0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "998dec17d6014ba9a8ad1ccc7fda02a9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "74ac0a6329c44866aab268580d54a54b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9006594029d54ea2b9dd22c10cdcb6b7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9b8aa1b4700f49e3b1406154fada90f0", + "IPY_MODEL_4afb39daeffa40f18b1c552086b02d48", + "IPY_MODEL_643975a54b434eddbcd0c9c25e722a36" + ], + "layout": "IPY_MODEL_177fddeae1df4d5fbfdfe5c2760440b4" + } + }, + "9b8aa1b4700f49e3b1406154fada90f0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1af94dbcbb7b41cc83d7156129f1656d", + "placeholder": "​", + "style": "IPY_MODEL_ee9633bc075d4c718ef751b27b340909", + "value": "Downloading shards: 100%" + } + }, + "4afb39daeffa40f18b1c552086b02d48": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_53d47acdae3a4a63af751a7b3bc34ecd", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ea1c3a9d09f74d0a8bd4a011e573999e", + "value": 2 + } + }, + "643975a54b434eddbcd0c9c25e722a36": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_193d711e29344e9a97b6f62ce586d192", + "placeholder": "​", + "style": "IPY_MODEL_56ee80bf9d684f7e91108dda63fede6a", + "value": " 2/2 [02:21<00:00, 66.37s/it]" + } + }, + "177fddeae1df4d5fbfdfe5c2760440b4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1af94dbcbb7b41cc83d7156129f1656d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ee9633bc075d4c718ef751b27b340909": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "53d47acdae3a4a63af751a7b3bc34ecd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ea1c3a9d09f74d0a8bd4a011e573999e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "193d711e29344e9a97b6f62ce586d192": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "56ee80bf9d684f7e91108dda63fede6a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d3a09b2a00d544368bfad9a40b6521d8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_840328d985b046078cc8431e0ba471bc", + "IPY_MODEL_6e42b7ca0c074994a4e55d4031bb095a", + "IPY_MODEL_a51d3c26c81d44cb92b31a4d81645352" + ], + "layout": "IPY_MODEL_44da9a148280492eac8d9462ea2005ae" + } + }, + "840328d985b046078cc8431e0ba471bc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d4d823d872fe43f4b41d571c63810378", + "placeholder": "​", + "style": "IPY_MODEL_3b30bbcd7a224d1986d6c4f1f8a0537f", + "value": "pytorch_model-00001-of-00002.bin: 100%" + } + }, + "6e42b7ca0c074994a4e55d4031bb095a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0a0fdb146b084f6cb9022658774d8d44", + "max": 9943044428, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_430c77a18ce14e57affa07b007f6b29e", + "value": 9943044428 + } + }, + "a51d3c26c81d44cb92b31a4d81645352": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8f7e56e7c530488ca080f17d09912927", + "placeholder": "​", + "style": "IPY_MODEL_262ed12e20f14a7da025941df7a8a4ec", + "value": " 9.94G/9.94G [01:35<00:00, 182MB/s]" + } + }, + "44da9a148280492eac8d9462ea2005ae": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d4d823d872fe43f4b41d571c63810378": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3b30bbcd7a224d1986d6c4f1f8a0537f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0a0fdb146b084f6cb9022658774d8d44": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "430c77a18ce14e57affa07b007f6b29e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8f7e56e7c530488ca080f17d09912927": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "262ed12e20f14a7da025941df7a8a4ec": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "26825528380e4248b907ec8fb2e3db98": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3e258a7c24014a2985fc642836e1d659", + "IPY_MODEL_9611c61276154567bf26c44b25e3f33c", + "IPY_MODEL_91fb2a0fda9b4a55b25b71526a54f27e" + ], + "layout": "IPY_MODEL_7b178bf90246410cb76e8dc33cbb843c" + } + }, + "3e258a7c24014a2985fc642836e1d659": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_534efbc9c6a04af6a67fc7240f6c3df6", + "placeholder": "​", + "style": "IPY_MODEL_5f01eb75fd0a4e1ca25584959b8b0f1d", + "value": "model.safetensors.index.json: 100%" + } + }, + "9611c61276154567bf26c44b25e3f33c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d2e08c31b2914c4bbbe87eb82bbc0725", + "max": 25125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_733656a77b48464fae03e7eae5f6f960", + "value": 25125 + } + }, + "91fb2a0fda9b4a55b25b71526a54f27e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a1469f01440842e39052ffdaaa4b244d", + "placeholder": "​", + "style": "IPY_MODEL_708417fcc5ac4ddb866a863c40880ad3", + "value": " 25.1k/25.1k [00:00<00:00, 386kB/s]" + } + }, + "7b178bf90246410cb76e8dc33cbb843c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "534efbc9c6a04af6a67fc7240f6c3df6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5f01eb75fd0a4e1ca25584959b8b0f1d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d2e08c31b2914c4bbbe87eb82bbc0725": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "733656a77b48464fae03e7eae5f6f960": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a1469f01440842e39052ffdaaa4b244d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "708417fcc5ac4ddb866a863c40880ad3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4d6eae82b5bb467d89580d346034aa05": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6589bf23342a4ca88a59b46d36fd7a51", + "IPY_MODEL_e70780cadaaf4a5da5d2bdcce45ad6b7", + "IPY_MODEL_4f0f87cea7734d8486b8b359505bf5c4" + ], + "layout": "IPY_MODEL_4a7a99ea869f46b3a4545b41991bd0ab" + } + }, + "6589bf23342a4ca88a59b46d36fd7a51": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e77c1764577c44529fbb7ac08f0cfe7c", + "placeholder": "​", + "style": "IPY_MODEL_fc0f7af7ff57481aaec43c6a7d046759", + "value": "pytorch_model-00002-of-00002.bin: 100%" + } + }, + "e70780cadaaf4a5da5d2bdcce45ad6b7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5ea503a14a5c4aab9672c8724ec623ce", + "max": 4540552031, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3b82e2f3bc86473db9f438f40f8b63fe", + "value": 4540552031 + } + }, + "4f0f87cea7734d8486b8b359505bf5c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e0e9637f840947d6a7e83e1f41f6d0a2", + "placeholder": "​", + "style": "IPY_MODEL_54febdc979294498bc3834573a7aa0af", + "value": " 4.54G/4.54G [00:45<00:00, 173MB/s]" + } + }, + "4a7a99ea869f46b3a4545b41991bd0ab": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e77c1764577c44529fbb7ac08f0cfe7c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc0f7af7ff57481aaec43c6a7d046759": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5ea503a14a5c4aab9672c8724ec623ce": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3b82e2f3bc86473db9f438f40f8b63fe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e0e9637f840947d6a7e83e1f41f6d0a2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "54febdc979294498bc3834573a7aa0af": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ea23ba351cec462fb93bd7a0238b096c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_641147aed7ca48f7ad5c393ad8c29b66", + "IPY_MODEL_80dcbaab52b549899ef0ebd71184bc03", + "IPY_MODEL_8fbfd91171c44e3dbb728f8195f427e4" + ], + "layout": "IPY_MODEL_40cd217dbf3e42d79ab2ec57bd5b4796" + } + }, + "641147aed7ca48f7ad5c393ad8c29b66": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e459814232c04920be36eb1785e80de4", + "placeholder": "​", + "style": "IPY_MODEL_d9f7e83019844328883fa4efce9c6554", + "value": "Loading checkpoint shards: 100%" + } + }, + "80dcbaab52b549899ef0ebd71184bc03": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_60108ed6ff4b4923ba1e048f34f70fe1", + "max": 2, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_859cf9a371164d97bef17523f257b776", + "value": 2 + } + }, + "8fbfd91171c44e3dbb728f8195f427e4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2d290718498547a6b77ea89e94ef1ec8", + "placeholder": "​", + "style": "IPY_MODEL_66ee7b37aa22458e8bb7d0134bdc4370", + "value": " 2/2 [01:09<00:00, 32.27s/it]" + } + }, + "40cd217dbf3e42d79ab2ec57bd5b4796": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e459814232c04920be36eb1785e80de4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d9f7e83019844328883fa4efce9c6554": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "60108ed6ff4b4923ba1e048f34f70fe1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "859cf9a371164d97bef17523f257b776": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2d290718498547a6b77ea89e94ef1ec8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "66ee7b37aa22458e8bb7d0134bdc4370": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "84e3384be50f41beaa4fc5c9e339cd6e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_dc1f4a7e147a4ea9ba439ac1e3784bdd", + "IPY_MODEL_d1f57132236a4dc4997a13853cf5387b", + "IPY_MODEL_1901d112bf484ebabc1a20174cc80e84" + ], + "layout": "IPY_MODEL_f52a8c9e0d4b407381f25e001a0d923d" + } + }, + "dc1f4a7e147a4ea9ba439ac1e3784bdd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ca0f6266390a4d11bb3f9e3e4efad740", + "placeholder": "​", + "style": "IPY_MODEL_1d90860ca416478f8aaed6eebf597f9b", + "value": "generation_config.json: 100%" + } + }, + "d1f57132236a4dc4997a13853cf5387b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1c8064dbc9ff4ad89bf77919980d229d", + "max": 120, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_49f7f0962f994ae6a27bddab4626f545", + "value": 120 + } + }, + "1901d112bf484ebabc1a20174cc80e84": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_98ee44eda25646bc983b44337247ab88", + "placeholder": "​", + "style": "IPY_MODEL_e1f1d5f66d4245b097e105b817102acf", + "value": " 120/120 [00:00<00:00, 7.38kB/s]" + } + }, + "f52a8c9e0d4b407381f25e001a0d923d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ca0f6266390a4d11bb3f9e3e4efad740": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1d90860ca416478f8aaed6eebf597f9b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1c8064dbc9ff4ad89bf77919980d229d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "49f7f0962f994ae6a27bddab4626f545": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "98ee44eda25646bc983b44337247ab88": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e1f1d5f66d4245b097e105b817102acf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "daaebeb135604a7f92ca60ab9c548304": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ed904233fca94f8dbeb2a9a74d2f5ae1", + "IPY_MODEL_d8dc5a7eea1845ab8c4237b1b389e704", + "IPY_MODEL_0d2aee31176a4dd1afae89bef077814f" + ], + "layout": "IPY_MODEL_c71ecf1c92c54d869f502f48597e5bf3" + } + }, + "ed904233fca94f8dbeb2a9a74d2f5ae1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_64d97f672bb04e0183883bbacf982a76", + "placeholder": "​", + "style": "IPY_MODEL_d4b411f78c7f4c1992a3d29511eb51a8", + "value": "tokenizer_config.json: 100%" + } + }, + "d8dc5a7eea1845ab8c4237b1b389e704": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_372f4d99d6354a67bfb3099957799266", + "max": 1687, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_75575c72fc35496c9d83cbde50892e54", + "value": 1687 + } + }, + "0d2aee31176a4dd1afae89bef077814f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6a683c2c5afd4ea1bee2dd6b87659277", + "placeholder": "​", + "style": "IPY_MODEL_5ccc3c0beabc4cd69f63a45db3b80303", + "value": " 1.69k/1.69k [00:00<00:00, 84.9kB/s]" + } + }, + "c71ecf1c92c54d869f502f48597e5bf3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "64d97f672bb04e0183883bbacf982a76": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d4b411f78c7f4c1992a3d29511eb51a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "372f4d99d6354a67bfb3099957799266": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "75575c72fc35496c9d83cbde50892e54": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6a683c2c5afd4ea1bee2dd6b87659277": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5ccc3c0beabc4cd69f63a45db3b80303": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fba0299de9e54d28b8dcc9fcf5e7b633": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_598795295536484c8fffd8551a2edec0", + "IPY_MODEL_ee99716a1ce9485e9ea890a12bf6f9ef", + "IPY_MODEL_aba9dbb872dc469cb74ffc0dc0efb930" + ], + "layout": "IPY_MODEL_e32ed5960e21445fa0f21aa882d2f0d8" + } + }, + "598795295536484c8fffd8551a2edec0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_46ecae9135194a2ca8008cc03b93402b", + "placeholder": "​", + "style": "IPY_MODEL_08e87d7167f04a898698f0b8fc4be7d6", + "value": "tokenizer.model: 100%" + } + }, + "ee99716a1ce9485e9ea890a12bf6f9ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a7ab89aa5bfd47149b722931bc73f328", + "max": 493443, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_474fee224b3147f98cf311f05946adbe", + "value": 493443 + } + }, + "aba9dbb872dc469cb74ffc0dc0efb930": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1f902a8c097a4358823e298d60011546", + "placeholder": "​", + "style": "IPY_MODEL_649985b1c88e49969b534ca7a96a5502", + "value": " 493k/493k [00:00<00:00, 27.7MB/s]" + } + }, + "e32ed5960e21445fa0f21aa882d2f0d8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "46ecae9135194a2ca8008cc03b93402b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "08e87d7167f04a898698f0b8fc4be7d6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a7ab89aa5bfd47149b722931bc73f328": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "474fee224b3147f98cf311f05946adbe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1f902a8c097a4358823e298d60011546": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "649985b1c88e49969b534ca7a96a5502": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "544fa6940306465d8aa59bac070bc4d9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ca9746eab1c44052a46c8e9bc396a97e", + "IPY_MODEL_9f3986b389f146d888595473f9840e18", + "IPY_MODEL_805650c210c64a8a87aac287b8a1eb23" + ], + "layout": "IPY_MODEL_c498e759f28c45b8ae11e93987c04778" + } + }, + "ca9746eab1c44052a46c8e9bc396a97e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_65ceffed910546a5811ea3661966eb43", + "placeholder": "​", + "style": "IPY_MODEL_a577cd345e814a798715bf8589b0cb99", + "value": "added_tokens.json: 100%" + } + }, + "9f3986b389f146d888595473f9840e18": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ceed07113d194cda963efcf12ce9f5e7", + "max": 90, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ac5c897c2352459fb486ac7ddce0e758", + "value": 90 + } + }, + "805650c210c64a8a87aac287b8a1eb23": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f0d3af130f70445da4f6b41c96829239", + "placeholder": "​", + "style": "IPY_MODEL_33bc886a105c4ad4a1e006325998f978", + "value": " 90.0/90.0 [00:00<00:00, 4.20kB/s]" + } + }, + "c498e759f28c45b8ae11e93987c04778": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "65ceffed910546a5811ea3661966eb43": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a577cd345e814a798715bf8589b0cb99": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ceed07113d194cda963efcf12ce9f5e7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ac5c897c2352459fb486ac7ddce0e758": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f0d3af130f70445da4f6b41c96829239": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "33bc886a105c4ad4a1e006325998f978": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "caf1af2a102f498788cc38eece442233": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_368f3674869e40bdbc9cf1824114c269", + "IPY_MODEL_065cf325a5664abbadca315210554285", + "IPY_MODEL_07cd49d1105146279d10d1016b91c49c" + ], + "layout": "IPY_MODEL_71420ecd2f514efbbce32dd3de546f6b" + } + }, + "368f3674869e40bdbc9cf1824114c269": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c66df63f43ac486c9dab4efb266780f6", + "placeholder": "​", + "style": "IPY_MODEL_390029fe0ebd471982f24eb2d04340c4", + "value": "special_tokens_map.json: 100%" + } + }, + "065cf325a5664abbadca315210554285": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8a762994d3f94fcd84ae37871a2cc46d", + "max": 101, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fca999b97c394c478591921d0c5817b9", + "value": 101 + } + }, + "07cd49d1105146279d10d1016b91c49c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_48d6a0eb9fdc413b97dbe9822f120c7c", + "placeholder": "​", + "style": "IPY_MODEL_a8173e520d7f404eab9d939270542faf", + "value": " 101/101 [00:00<00:00, 5.92kB/s]" + } + }, + "71420ecd2f514efbbce32dd3de546f6b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c66df63f43ac486c9dab4efb266780f6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "390029fe0ebd471982f24eb2d04340c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8a762994d3f94fcd84ae37871a2cc46d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fca999b97c394c478591921d0c5817b9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "48d6a0eb9fdc413b97dbe9822f120c7c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a8173e520d7f404eab9d939270542faf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file