Hi,
I am trying to use your code in order to reproduce the results, but I hit kind of a brick wall. I managed to deploy the application on a CPU-only system, where I get around 2 to 3 it/s.
Now I am trying to use a GPU (Quadro P4000), but I do not get any speed ups at all, I remain at 1 it/s, while the GPU is running at full power. And after a while I get an out of memory error (8GB).
Is this something you also encountered and fixed?
As a caveat: I am using nvidia-docker:
docker run --runtime=nvidia --rm reinoldus/ontoemma:latest bash /ontoemma/run_emma.sh cuda
The docker-repo is here: https://github.com/reinoldus/ontoemma
{
"dataset_reader": {
"type": "ontology_matcher",
"name_token_indexer": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
},
"token_characters": {
"type": "characters"
}
},
"token_only_indexer": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
}
}
},
"train_data_path": "/ontoemma/data/ontoemma.context.train",
"validation_data_path": "/ontoemma/data/ontoemma.context.dev",
"model": {
"type": "ontoemmaNN",
"name_embedder": {
"tokens": {
"type": "embedding",
"pretrained_file": "/ontoemma/data/weights100.txt.gz",
"embedding_dim": 100,
"trainable": false
},
"token_characters": {
"type": "character_encoding",
"embedding": {
"embedding_dim": 100
},
"encoder": {
"type": "cnn",
"embedding_dim": 100,
"num_filters": 50,
"ngram_filter_sizes": [4, 5]
},
"dropout": 0.2
}
},
"definition_embedder": {
"tokens": {
"type": "embedding",
"pretrained_file": "/ontoemma/data/weights100.txt.gz",
"embedding_dim": 100,
"trainable": false
}
},
"name_encoder": {
"type": "lstm",
"input_size": 200,
"hidden_size": 100,
"num_layers": 2,
"dropout": 0.2,
"bidirectional": true
},
"definition_encoder": {
"type": "lstm",
"input_size": 100,
"hidden_size": 100,
"num_layers": 2,
"dropout": 0.2,
"bidirectional": true
},
"siamese_feedforward": {
"input_dim": 600,
"num_layers": 2,
"hidden_dims": 100,
"activations": "relu",
"dropout": 0.2
},
"decision_feedforward": {
"input_dim": 232,
"num_layers": 2,
"hidden_dims": [232, 1],
"activations": ["relu", "linear"],
"dropout": [0.2, 0.0]
},
"initializer": [
[".*linear_layers.*weight", {"type": "xavier_normal"}],
[".*encoder.*module.*weight.*", {"type": "orthogonal"}]
]
},
"iterator": {
"type": "bucket",
"sorting_keys": [
["s_ent_def", "num_tokens"],
["t_ent_def", "num_tokens"],
["s_ent_alias", "list_num_tokens"],
["t_ent_alias", "list_num_tokens"],
["s_ent_name", "num_tokens"],
["t_ent_name", "num_tokens"]
],
"batch_size": 32
},
"trainer": {
"optimizer": "adam",
"num_epochs": 50,
"patience": 10,
"validation_metric": "+f1_score",
"cuda_device": 0
}
}
Hi,
I am trying to use your code in order to reproduce the results, but I hit kind of a brick wall. I managed to deploy the application on a CPU-only system, where I get around 2 to 3 it/s.
Now I am trying to use a GPU (Quadro P4000), but I do not get any speed ups at all, I remain at 1 it/s, while the GPU is running at full power. And after a while I get an out of memory error (8GB).
Is this something you also encountered and fixed?
As a caveat: I am using nvidia-docker:
docker run --runtime=nvidia --rm reinoldus/ontoemma:latest bash /ontoemma/run_emma.sh cudaThe docker-repo is here:
https://github.com/reinoldus/ontoemma{ "dataset_reader": { "type": "ontology_matcher", "name_token_indexer": { "tokens": { "type": "single_id", "lowercase_tokens": true }, "token_characters": { "type": "characters" } }, "token_only_indexer": { "tokens": { "type": "single_id", "lowercase_tokens": true } } }, "train_data_path": "/ontoemma/data/ontoemma.context.train", "validation_data_path": "/ontoemma/data/ontoemma.context.dev", "model": { "type": "ontoemmaNN", "name_embedder": { "tokens": { "type": "embedding", "pretrained_file": "/ontoemma/data/weights100.txt.gz", "embedding_dim": 100, "trainable": false }, "token_characters": { "type": "character_encoding", "embedding": { "embedding_dim": 100 }, "encoder": { "type": "cnn", "embedding_dim": 100, "num_filters": 50, "ngram_filter_sizes": [4, 5] }, "dropout": 0.2 } }, "definition_embedder": { "tokens": { "type": "embedding", "pretrained_file": "/ontoemma/data/weights100.txt.gz", "embedding_dim": 100, "trainable": false } }, "name_encoder": { "type": "lstm", "input_size": 200, "hidden_size": 100, "num_layers": 2, "dropout": 0.2, "bidirectional": true }, "definition_encoder": { "type": "lstm", "input_size": 100, "hidden_size": 100, "num_layers": 2, "dropout": 0.2, "bidirectional": true }, "siamese_feedforward": { "input_dim": 600, "num_layers": 2, "hidden_dims": 100, "activations": "relu", "dropout": 0.2 }, "decision_feedforward": { "input_dim": 232, "num_layers": 2, "hidden_dims": [232, 1], "activations": ["relu", "linear"], "dropout": [0.2, 0.0] }, "initializer": [ [".*linear_layers.*weight", {"type": "xavier_normal"}], [".*encoder.*module.*weight.*", {"type": "orthogonal"}] ] }, "iterator": { "type": "bucket", "sorting_keys": [ ["s_ent_def", "num_tokens"], ["t_ent_def", "num_tokens"], ["s_ent_alias", "list_num_tokens"], ["t_ent_alias", "list_num_tokens"], ["s_ent_name", "num_tokens"], ["t_ent_name", "num_tokens"] ], "batch_size": 32 }, "trainer": { "optimizer": "adam", "num_epochs": 50, "patience": 10, "validation_metric": "+f1_score", "cuda_device": 0 } }