From e048d01dafdedc6af308afb30c0ca92413dade93 Mon Sep 17 00:00:00 2001
From: shokoufeh-monjezi <68440803+shokoufeh-monjezi@users.noreply.github.com>
Date: Fri, 19 Feb 2021 13:04:44 -0800
Subject: [PATCH 1/3] Update inference.py

---
 inference.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/inference.py b/inference.py
index 791b09a..5c8df2c 100644
--- a/inference.py
+++ b/inference.py
@@ -51,9 +51,9 @@ def infer(flowtron_path, waveglow_path, output_dir, text, speaker_id, n_frames,
 
     # load flowtron
     model = Flowtron(**model_config).cuda()
-    state_dict = torch.load(flowtron_path, map_location='cpu')['state_dict']
-    model.load_state_dict(state_dict)
+    model = torch.load(flowtron_path, map_location='cpu')['model']
     model.eval()
+    model.cuda()
     print("Loaded checkpoint '{}')" .format(flowtron_path))
 
     ignore_keys = ['training_files', 'validation_files']
@@ -73,8 +73,8 @@ def infer(flowtron_path, waveglow_path, output_dir, text, speaker_id, n_frames,
     for k in range(len(attentions)):
         attention = torch.cat(attentions[k]).cpu().numpy()
         fig, axes = plt.subplots(1, 2, figsize=(16, 4))
-        axes[0].imshow(mels[0].cpu().numpy(), origin='bottom', aspect='auto')
-        axes[1].imshow(attention[:, 0].transpose(), origin='bottom', aspect='auto')
+        axes[0].imshow(mels[0].cpu().numpy(), origin='lower', aspect='auto')
+        axes[1].imshow(attention[:, 0].transpose(), origin='lower', aspect='auto')
         fig.savefig(os.path.join(output_dir, 'sid{}_sigma{}_attnlayer{}.png'.format(speaker_id, sigma, k)))
         plt.close("all")
 

From b2f8480a58629cfe00a07b783ce3e68976cb9de9 Mon Sep 17 00:00:00 2001
From: shokoufeh-monjezi <68440803+shokoufeh-monjezi@users.noreply.github.com>
Date: Thu, 26 Aug 2021 16:07:28 -0700
Subject: [PATCH 2/3] Created using Colaboratory

---
 GNMT_Tensorflow.ipynb | 2835 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 2835 insertions(+)
 create mode 100644 GNMT_Tensorflow.ipynb

diff --git a/GNMT_Tensorflow.ipynb b/GNMT_Tensorflow.ipynb
new file mode 100644
index 0000000..e4a6f6c
--- /dev/null
+++ b/GNMT_Tensorflow.ipynb
@@ -0,0 +1,2835 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    },
+    "colab": {
+      "name": "GNMT-Tensorflow.ipynb",
+      "provenance": [],
+      "include_colab_link": true
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/shokoufeh-monjezi/flowtron/blob/master/GNMT_Tensorflow.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "SkmdV0cgfmEl"
+      },
+      "source": [
+        "!nvidia-smi\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SXAgXZr8fvZp"
+      },
+      "source": [
+        "The below code check whether a Tensor core GPU is present.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "E1tQXGOAfp1y"
+      },
+      "source": [
+        "\n",
+        "from tensorflow.python.client import device_lib\n",
+        "\n",
+        "def check_tensor_core_gpu_present():\n",
+        "    local_device_protos = device_lib.list_local_devices()\n",
+        "    for line in local_device_protos:\n",
+        "        if \"compute capability\" in str(line):\n",
+        "            compute_capability = float(line.physical_device_desc.split(\"compute capability: \")[-1])\n",
+        "            if compute_capability>=7.0:\n",
+        "                return True\n",
+        "    \n",
+        "print(\"Tensor Core GPU Present:\", check_tensor_core_gpu_present())\n",
+        "tensor_core_gpu = check_tensor_core_gpu_present()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "2wISKppw4AI5",
+        "outputId": "a8f66ee1-664a-4712-e2c7-f24fc6f60ecc"
+      },
+      "source": [
+        "!git clone https://github.com/NVIDIA/DeepLearningExamples"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Cloning into 'DeepLearningExamples'...\n",
+            "remote: Enumerating objects: 26402, done.\u001b[K\n",
+            "remote: Counting objects: 100% (2053/2053), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (1144/1144), done.\u001b[K\n",
+            "remote: Total 26402 (delta 891), reused 1904 (delta 870), pack-reused 24349\u001b[K\n",
+            "Receiving objects: 100% (26402/26402), 77.00 MiB | 21.83 MiB/s, done.\n",
+            "Resolving deltas: 100% (19399/19399), done.\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jAaJOkB84UZN",
+        "outputId": "b0826569-70af-49e2-84aa-f0ac5e824e9a"
+      },
+      "source": [
+        "import os\n",
+        "\n",
+        "WORKSPACE_DIR='/content/DeepLearningExamples/TensorFlow/Translation/GNMT'\n",
+        "os.chdir(WORKSPACE_DIR)\n",
+        "print (os.getcwd())"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "/content/DeepLearningExamples/TensorFlow/Translation/GNMT\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "g5hUDmYi5Air",
+        "outputId": "29e38306-6b21-4b61-a5c4-d05486c3a7e9"
+      },
+      "source": [
+        "!ls\n"
+      ],
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "attention_wrapper.py\testimator.py\t model.py   requirements.txt\n",
+            "beam_search_decoder.py\texamples\t nmt.py     scripts\n",
+            "benchmark_hooks.py\tgnmt_model.py\t NOTICE     utils\n",
+            "block_lstm.py\t\timg\t\t qa\t    variable_mgr\n",
+            "Dockerfile\t\tmodel_helper.py  README.md\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BddCwhGY3--7"
+      },
+      "source": [
+        "### Download and preprocess the dataset.\n",
+        "\n",
+        "The GNMT v2 model was trained on the WMT16 English-German dataset and newstest2014 is used as a testing dataset. We use scripts/wmt16_en_de.sh download script which automatically downloads and preprocesses the training and test datasets. By default, data is downloaded to the data directory.Data will be downloaded to the data directory (on the host). The data directory is mounted to the /workspace/gnmt/data location in the Docker container. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cmqq94Dv3--9",
+        "outputId": "e7fd21cb-c2fa-408c-9290-3ab8d28fc3b7"
+      },
+      "source": [
+        "pip install git+https://github.com/NVIDIA/dllogger.git"
+      ],
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting git+https://github.com/NVIDIA/dllogger.git\n",
+            "  Cloning https://github.com/NVIDIA/dllogger.git to /tmp/pip-req-build-mkyvmrgx\n",
+            "  Running command git clone -q https://github.com/NVIDIA/dllogger.git /tmp/pip-req-build-mkyvmrgx\n",
+            "Building wheels for collected packages: DLLogger\n",
+            "  Building wheel for DLLogger (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for DLLogger: filename=DLLogger-0.1.0-py3-none-any.whl size=5656 sha256=a46ecff807d65f3e475c7ffd49b569bd3ccd85c9999b0f3be65302bcbd60eefa\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-jod60qin/wheels/db/ba/1b/87515aba93adffc7caccc21c0e93f80b70a857188790ce0436\n",
+            "Successfully built DLLogger\n",
+            "Installing collected packages: DLLogger\n",
+            "Successfully installed DLLogger-0.1.0\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "iebyj_up3---",
+        "outputId": "8cab5626-4063-4b76-d550-cb31beeb7a96"
+      },
+      "source": [
+        "!bash scripts/wmt16_en_de.sh"
+      ],
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Writing to data/wmt16_de_en. To change this, set the OUTPUT_DIR environment variable.\n",
+            "Downloading Europarl v7. This may take a while...\n",
+            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+            "100  188M  100  188M    0     0  4299k      0  0:00:44  0:00:44 --:--:-- 4502k\n",
+            "Downloading Common Crawl corpus. This may take a while...\n",
+            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+            "100  875M  100  875M    0     0  4005k      0  0:03:43  0:03:43 --:--:-- 4423k\n",
+            "Downloading News Commentary v11. This may take a while...\n",
+            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+            "100 71.6M  100 71.6M    0     0  7876k      0  0:00:09  0:00:09 --:--:-- 10.8M\n",
+            "Downloading dev/test sets\n",
+            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+            "100 21.7M  100 21.7M    0     0  5082k      0  0:00:04  0:00:04 --:--:-- 5394k\n",
+            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+            "100 3696k  100 3696k    0     0  1205k      0  0:00:03  0:00:03 --:--:-- 1205k\n",
+            "Extracting all files...\n",
+            "europarl-v7.de-en.de\n",
+            "europarl-v7.de-en.en\n",
+            "commoncrawl.cs-en.annotation\n",
+            "commoncrawl.cs-en.cs\n",
+            "commoncrawl.cs-en.en\n",
+            "commoncrawl.de-en.annotation\n",
+            "commoncrawl.de-en.de\n",
+            "commoncrawl.de-en.en\n",
+            "commoncrawl.es-en.annotation\n",
+            "commoncrawl.es-en.en\n",
+            "commoncrawl.es-en.es\n",
+            "commoncrawl.fr-en.annotation\n",
+            "commoncrawl.fr-en.en\n",
+            "commoncrawl.fr-en.fr\n",
+            "commoncrawl.ru-en.annotation\n",
+            "commoncrawl.ru-en.en\n",
+            "commoncrawl.ru-en.ru\n",
+            "training-parallel-nc-v11/\n",
+            "training-parallel-nc-v11/news-commentary-v11.ru-en.ru\n",
+            "training-parallel-nc-v11/news-commentary-v11.cs-en.en\n",
+            "training-parallel-nc-v11/news-commentary-v11.de-en.de\n",
+            "training-parallel-nc-v11/news-commentary-v11.ru-en.en\n",
+            "training-parallel-nc-v11/news-commentary-v11.cs-en.cs\n",
+            "training-parallel-nc-v11/news-commentary-v11.de-en.en\n",
+            "dev/\n",
+            "dev/newstest2009-ref.fr.sgm\n",
+            "dev/newstest2013.es\n",
+            "dev/newstest2014-deen-src.de.sgm\n",
+            "dev/newstest2015-ruen-src.ru.sgm\n",
+            "dev/newstest2010-ref.de.sgm\n",
+            "dev/newstest2012-src.fr.sgm\n",
+            "dev/newstest2014-ruen-ref.ru.sgm\n",
+            "dev/news-test2008.en\n",
+            "dev/news-test2008.es\n",
+            "dev/newstest2009-ref.hu.sgm\n",
+            "dev/newstest2014-csen-ref.en.sgm\n",
+            "dev/newsdiscussdev2015-enfr-src.en.sgm\n",
+            "dev/newstest2010.cs\n",
+            "dev/news-test2008-src.hu.sgm\n",
+            "dev/.newsdev2014-ref.en.sgm.swp\n",
+            "dev/newstest2011-ref.cs.sgm\n",
+            "dev/newstest2011-ref.fr.sgm\n",
+            "dev/newsdev2016-enro-ref.ro.sgm\n",
+            "dev/newstest2011.cs\n",
+            "dev/newstest2009.es\n",
+            "dev/newstest2011.en\n",
+            "dev/newsdev2015-enfi-src.en.sgm\n",
+            "dev/newstest2013.cs\n",
+            "dev/newstest2012-ref.es.sgm\n",
+            "dev/newstest2014-csen-ref.cs.sgm\n",
+            "dev/newsdev2014-src.hi.sgm\n",
+            "dev/newstest2015-encs-src.en.sgm\n",
+            "dev/newsdev2014-src.en.sgm\n",
+            "dev/newsdev2015-enfi-ref.fi.sgm\n",
+            "dev/newstest2011-ref.es.sgm\n",
+            "dev/newstest2013-src.ru.sgm\n",
+            "dev/newstest2012-src.de.sgm\n",
+            "dev/newsdev2016-tren-ref.en.sgm\n",
+            "dev/newstest2011-src.fr.sgm\n",
+            "dev/newssyscomb2009-src.de.sgm\n",
+            "dev/newstest2012-src.es.sgm\n",
+            "dev/newstest2010-ref.cs.sgm\n",
+            "dev/newstest2014-hien-ref.hi.sgm\n",
+            "dev/newssyscomb2009.de\n",
+            "dev/newstest2011-ref.en.sgm\n",
+            "dev/news-test2008.cs\n",
+            "dev/newstest2010.en\n",
+            "dev/newssyscomb2009.fr\n",
+            "dev/newstest2012-ref.en.sgm\n",
+            "dev/news-test2008.de\n",
+            "dev/newstest2011.de\n",
+            "dev/newstest2012.es\n",
+            "dev/newsdev2016-entr-ref.tr.sgm\n",
+            "dev/newstest2011-ref.de.sgm\n",
+            "dev/newsdev2014-ref.hi.sgm\n",
+            "dev/newstest2013-src.de.sgm\n",
+            "dev/newstest2012-ref.fr.sgm\n",
+            "dev/newstest2009.de\n",
+            "dev/newstest2012.en\n",
+            "dev/news-test2008-ref.cs.sgm\n",
+            "dev/newstest2013-ref.fr.sgm\n",
+            "dev/newsdev2014.hi\n",
+            "dev/newstest2011-src.cs.sgm\n",
+            "dev/newssyscomb2009-src.fr.sgm\n",
+            "dev/newstest2012.ru\n",
+            "dev/newstest2010-ref.es.sgm\n",
+            "dev/newstest2010-src.es.sgm\n",
+            "dev/news-test2008.fr\n",
+            "dev/newstest2009.en\n",
+            "dev/newstest2014-ruen-src.ru.sgm\n",
+            "dev/newssyscomb2009-ref.cs.sgm\n",
+            "dev/newstest2010-src.fr.sgm\n",
+            "dev/newssyscomb2009-src.en.sgm\n",
+            "dev/newstest2015-enru-ref.ru.sgm\n",
+            "dev/newstest2015-ende-ref.de.sgm\n",
+            "dev/newstest2013-ref.ru.sgm\n",
+            "dev/newssyscomb2009-src.it.sgm\n",
+            "dev/newsdiscusstest2015-enfr-src.en.sgm\n",
+            "dev/newstest2015-fien-ref.en.sgm\n",
+            "dev/newstest2010-src.en.sgm\n",
+            "dev/newstest2009.fr\n",
+            "dev/newstest2015-ruen-ref.en.sgm\n",
+            "dev/newstest2013-src.es.sgm\n",
+            "dev/newstest2014-hien-ref.en.sgm\n",
+            "dev/news-test2008-src.en.sgm\n",
+            "dev/newstest2012-ref.cs.sgm\n",
+            "dev/news-test2008-ref.es.sgm\n",
+            "dev/news-test2008-ref.fr.sgm\n",
+            "dev/newstest2014-ruen-ref.en.sgm\n",
+            "dev/news-test2008-src.es.sgm\n",
+            "dev/newstest2014-fren-src.en.sgm\n",
+            "dev/newstest2012-ref.de.sgm\n",
+            "dev/newstest2014-csen-src.cs.sgm\n",
+            "dev/newstest2014-csen-src.en.sgm\n",
+            "dev/newstest2011-src.de.sgm\n",
+            "dev/newssyscomb2009-src.cs.sgm\n",
+            "dev/newstest2015-enfi-ref.fi.sgm\n",
+            "dev/newstest2009-src.it.sgm\n",
+            "dev/newstest2010-src.de.sgm\n",
+            "dev/newstest2009-ref.cs.sgm\n",
+            "dev/newssyscomb2009-ref.es.sgm\n",
+            "dev/newstest2014-deen-src.en.sgm\n",
+            "dev/newsdiscusstest2015-fren-ref.en.sgm\n",
+            "dev/newstest2012.fr\n",
+            "dev/newsdiscusstest2015-enfr-ref.fr.sgm\n",
+            "dev/newsdev2016-enro-src.en.sgm\n",
+            "dev/newstest2009-src.es.sgm\n",
+            "dev/newstest2013-src.fr.sgm\n",
+            "dev/newstest2015-deen-src.de.sgm\n",
+            "dev/newsdev2015-fien-src.fi.sgm\n",
+            "dev/newsdiscusstest2015-fren-src.fr.sgm\n",
+            "dev/newstest2014-ruen-src.en.sgm\n",
+            "dev/newstest2012-src.en.sgm\n",
+            "dev/newstest2013.fr\n",
+            "dev/newstest2015-enru-src.en.sgm\n",
+            "dev/newstest2009-ref.es.sgm\n",
+            "dev/newstest2011.fr\n",
+            "dev/newstest2009-ref.en.sgm\n",
+            "dev/newstest2015-enfi-src.en.sgm\n",
+            "dev/newstest2009-src.xx.sgm\n",
+            "dev/newstest2015-encs-ref.cs.sgm\n",
+            "dev/newstest2013.ru\n",
+            "dev/newstest2009.cs\n",
+            "dev/newsdev2014.en\n",
+            "dev/newstest2014-fren-ref.fr.sgm\n",
+            "dev/news-test2008-ref.en.sgm\n",
+            "dev/newssyscomb2009.es\n",
+            "dev/news-test2008-src.cs.sgm\n",
+            "dev/newsdev2016-roen-src.ro.sgm\n",
+            "dev/.newstest2013-ref.en.sgm.swp\n",
+            "dev/newssyscomb2009-ref.hu.sgm\n",
+            "dev/newstest2010.de\n",
+            "dev/newstest2013-ref.cs.sgm\n",
+            "dev/newstest2013-ref.de.sgm\n",
+            "dev/newstest2009-src.cs.sgm\n",
+            "dev/newssyscomb2009.en\n",
+            "dev/newssyscomb2009-ref.it.sgm\n",
+            "dev/newstest2009-ref.it.sgm\n",
+            "dev/newstest2010-ref.fr.sgm\n",
+            "dev/newstest2015-csen-src.cs.sgm\n",
+            "dev/newsdev2016-entr-src.en.sgm\n",
+            "dev/newstest2010.es\n",
+            "dev/news-test2008-src.de.sgm\n",
+            "dev/newstest2013.en\n",
+            "dev/newsdev2016-roen-ref.en.sgm\n",
+            "dev/newstest2009-src.de.sgm\n",
+            "dev/newstest2010-ref.en.sgm\n",
+            "dev/newstest2011-src.es.sgm\n",
+            "dev/newssyscomb2009-ref.en.sgm\n",
+            "dev/newstest2014-fren-ref.en.sgm\n",
+            "dev/newstest2012.cs\n",
+            "dev/newstest2009-src.hu.sgm\n",
+            "dev/newstest2009-src.fr.sgm\n",
+            "dev/newstest2015-ende-src.en.sgm\n",
+            "dev/newstest2013-src.cs.sgm\n",
+            "dev/newstest2014-hien-src.hi.sgm\n",
+            "dev/news-test2008-ref.hu.sgm\n",
+            "dev/newstest2015-csen-ref.en.sgm\n",
+            "dev/newstest2013-ref.es.sgm\n",
+            "dev/newstest2013-ref.en.sgm\n",
+            "dev/newstest2010-src.cs.sgm\n",
+            "dev/newstest2010.fr\n",
+            "dev/newstest2015-deen-ref.en.sgm\n",
+            "dev/newstest2011.es\n",
+            "dev/newsdev2016-tren-src.tr.sgm\n",
+            "dev/newstest2013.de\n",
+            "dev/newstest2014-fren-src.fr.sgm\n",
+            "dev/newsdiscussdev2015-fren-ref.en.sgm\n",
+            "dev/newsdiscussdev2015-fren-src.fr.sgm\n",
+            "dev/newstest2014-deen-ref.de.sgm\n",
+            "dev/newstest2013-src.en.sgm\n",
+            "dev/newssyscomb2009-ref.fr.sgm\n",
+            "dev/newssyscomb2009-ref.de.sgm\n",
+            "dev/newstest2009-src.en.sgm\n",
+            "dev/newstest2009-ref.de.sgm\n",
+            "dev/newsdiscussdev2015-enfr-ref.fr.sgm\n",
+            "dev/newssyscomb2009.cs\n",
+            "dev/newstest2012-ref.ru.sgm\n",
+            "dev/newstest2014-hien-src.en.sgm\n",
+            "dev/news-test2008-src.fr.sgm\n",
+            "dev/newsdev2015-fien-ref.en.sgm\n",
+            "dev/newsdev2014-ref.en.sgm\n",
+            "dev/newstest2015-fien-src.fi.sgm\n",
+            "dev/news-test2008-ref.de.sgm\n",
+            "dev/newstest2012-src.ru.sgm\n",
+            "dev/newssyscomb2009-src.es.sgm\n",
+            "dev/newssyscomb2009-src.hu.sgm\n",
+            "dev/newstest2014-deen-ref.en.sgm\n",
+            "dev/newstest2012.de\n",
+            "dev/newstest2011-src.en.sgm\n",
+            "dev/newstest2012-src.cs.sgm\n",
+            "test/newstest2016-csen-ref.en.sgm\n",
+            "test/newstest2016-csen-src.cs.sgm\n",
+            "test/newstest2016-deen-ref.en.sgm\n",
+            "test/newstest2016-deen-src.de.sgm\n",
+            "test/newstest2016-encs-ref.cs.sgm\n",
+            "test/newstest2016-encs-src.en.sgm\n",
+            "test/newstest2016-ende-ref.de.sgm\n",
+            "test/newstest2016-ende-src.en.sgm\n",
+            "test/newstest2016-enfi-ref.fi.sgm\n",
+            "test/newstest2016-enfi-src.en.sgm\n",
+            "test/newstest2016-enro-ref.ro.sgm\n",
+            "test/newstest2016-enro-src.en.sgm\n",
+            "test/newstest2016-enru-ref.ru.sgm\n",
+            "test/newstest2016-enru-src.en.sgm\n",
+            "test/newstest2016-entr-ref.tr.sgm\n",
+            "test/newstest2016-entr-src.en.sgm\n",
+            "test/newstest2016-fien-ref.en.sgm\n",
+            "test/newstest2016-fien-src.fi.sgm\n",
+            "test/newstest2016-roen-ref.en.sgm\n",
+            "test/newstest2016-roen-src.ro.sgm\n",
+            "test/newstest2016-ruen-ref.en.sgm\n",
+            "test/newstest2016-ruen-src.ru.sgm\n",
+            "test/newstest2016-tren-ref.en.sgm\n",
+            "test/newstest2016-tren-src.tr.sgm\n",
+            "test/newstestB2016-enfi-ref.fi.sgm\n",
+            "test/newstestB2016-enfi-src.en.sgm\n",
+            "4562102 data/wmt16_de_en/train.en\n",
+            "4562102 data/wmt16_de_en/train.de\n",
+            "Cloning moses for data processing\n",
+            "Cloning into 'data/wmt16_de_en/mosesdecoder'...\n",
+            "remote: Enumerating objects: 148070, done.\u001b[K\n",
+            "remote: Counting objects: 100% (498/498), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (206/206), done.\u001b[K\n",
+            "remote: Total 148070 (delta 315), reused 433 (delta 289), pack-reused 147572\u001b[K\n",
+            "Receiving objects: 100% (148070/148070), 129.86 MiB | 17.29 MiB/s, done.\n",
+            "Resolving deltas: 100% (114341/114341), done.\n",
+            "HEAD is now at 8c5eaa1a1 Merge branch 'RELEASE-4.0' of github.com:jowagner/mosesdecoder\n",
+            "/content/DeepLearningExamples/TensorFlow/Translation/GNMT\n",
+            "Tokenizing data/wmt16_de_en/newstest2009.de...\n",
+            "Tokenizing data/wmt16_de_en/newstest2010.de...\n",
+            "Tokenizing data/wmt16_de_en/newstest2011.de...\n",
+            "Tokenizing data/wmt16_de_en/newstest2012.de...\n",
+            "Tokenizing data/wmt16_de_en/newstest2013.de...\n",
+            "Tokenizing data/wmt16_de_en/newstest2014.de...\n",
+            "Tokenizing data/wmt16_de_en/newstest2015.de...\n",
+            "Tokenizing data/wmt16_de_en/newstest2016.de...\n",
+            "Tokenizing data/wmt16_de_en/train.de...\n",
+            "Tokenizing data/wmt16_de_en/newstest2009.en...\n",
+            "Tokenizing data/wmt16_de_en/newstest2010.en...\n",
+            "Tokenizing data/wmt16_de_en/newstest2011.en...\n",
+            "Tokenizing data/wmt16_de_en/newstest2012.en...\n",
+            "Tokenizing data/wmt16_de_en/newstest2013.en...\n",
+            "Tokenizing data/wmt16_de_en/newstest2014.en...\n",
+            "Tokenizing data/wmt16_de_en/newstest2015.en...\n",
+            "Tokenizing data/wmt16_de_en/newstest2016.en...\n",
+            "Tokenizing data/wmt16_de_en/train.en...\n",
+            "Cleaning data/wmt16_de_en/newstest2009...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2009.de & .en to data/wmt16_de_en/newstest2009.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 2525  Output sentences:  2517\n",
+            "Cleaning data/wmt16_de_en/newstest2009.tok...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2009.tok.de & .en to data/wmt16_de_en/newstest2009.tok.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 2525  Output sentences:  2509\n",
+            "Cleaning data/wmt16_de_en/newstest2010...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2010.de & .en to data/wmt16_de_en/newstest2010.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 2489  Output sentences:  2488\n",
+            "Cleaning data/wmt16_de_en/newstest2010.tok...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2010.tok.de & .en to data/wmt16_de_en/newstest2010.tok.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 2489  Output sentences:  2485\n",
+            "Cleaning data/wmt16_de_en/newstest2011...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2011.de & .en to data/wmt16_de_en/newstest2011.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 3003  Output sentences:  3002\n",
+            "Cleaning data/wmt16_de_en/newstest2011.tok...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2011.tok.de & .en to data/wmt16_de_en/newstest2011.tok.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 3003  Output sentences:  2994\n",
+            "Cleaning data/wmt16_de_en/newstest2012...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2012.de & .en to data/wmt16_de_en/newstest2012.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 3003  Output sentences:  3000\n",
+            "Cleaning data/wmt16_de_en/newstest2012.tok...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2012.tok.de & .en to data/wmt16_de_en/newstest2012.tok.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 3003  Output sentences:  2996\n",
+            "Cleaning data/wmt16_de_en/newstest2013...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2013.de & .en to data/wmt16_de_en/newstest2013.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 3000  Output sentences:  2997\n",
+            "Cleaning data/wmt16_de_en/newstest2013.tok...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2013.tok.de & .en to data/wmt16_de_en/newstest2013.tok.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 3000  Output sentences:  2990\n",
+            "Cleaning data/wmt16_de_en/newstest2014...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2014.de & .en to data/wmt16_de_en/newstest2014.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 3003  Output sentences:  3003\n",
+            "Cleaning data/wmt16_de_en/newstest2014.tok...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2014.tok.de & .en to data/wmt16_de_en/newstest2014.tok.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 3003  Output sentences:  3002\n",
+            "Cleaning data/wmt16_de_en/newstest2015...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2015.de & .en to data/wmt16_de_en/newstest2015.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 2169  Output sentences:  2169\n",
+            "Cleaning data/wmt16_de_en/newstest2015.tok...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2015.tok.de & .en to data/wmt16_de_en/newstest2015.tok.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 2169  Output sentences:  2167\n",
+            "Cleaning data/wmt16_de_en/newstest2016...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2016.de & .en to data/wmt16_de_en/newstest2016.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 2999  Output sentences:  2998\n",
+            "Cleaning data/wmt16_de_en/newstest2016.tok...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/newstest2016.tok.de & .en to data/wmt16_de_en/newstest2016.tok.clean, cutoff 1-80, ratio 9\n",
+            "\n",
+            "Input sentences: 2999  Output sentences:  2997\n",
+            "Cleaning data/wmt16_de_en/train...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/train.de & .en to data/wmt16_de_en/train.clean, cutoff 1-80, ratio 9\n",
+            "..........(100000)..........(200000)..........(300000)..........(400000)..........(500000)..........(600000)..........(700000)..........(800000)..........(900000)..........(1000000)..........(1100000)..........(1200000)..........(1300000)..........(1400000)..........(1500000)..........(1600000)..........(1700000)..........(1800000)..........(1900000)..........(2000000)..........(2100000)..........(2200000)..........(2300000)..........(2400000)..........(2500000)..........(2600000)..........(2700000)..........(2800000)..........(2900000)..........(3000000)..........(3100000)..........(3200000)..........(3300000)..........(3400000)..........(3500000)..........(3600000)..........(3700000)..........(3800000)..........(3900000)..........(4000000)..........(4100000)..........(4200000)..........(4300000)..........(4400000)..........(4500000)......\n",
+            "Input sentences: 4562102  Output sentences:  4524868\n",
+            "Cleaning data/wmt16_de_en/train.tok...\n",
+            "clean-corpus.perl: processing data/wmt16_de_en/train.tok.de & .en to data/wmt16_de_en/train.tok.clean, cutoff 1-80, ratio 9\n",
+            "..........(100000)..........(200000)..........(300000)..........(400000)..........(500000)..........(600000)..........(700000)..........(800000)..........(900000)..........(1000000)..........(1100000)..........(1200000)..........(1300000)..........(1400000)..........(1500000)..........(1600000)..........(1700000)..........(1800000)..........(1900000)..........(2000000)..........(2100000)..........(2200000)..........(2300000)..........(2400000)..........(2500000)..........(2600000)..........(2700000)..........(2800000)..........(2900000)..........(3000000)..........(3100000)..........(3200000)..........(3300000)..........(3400000)..........(3500000)..........(3600000)..........(3700000)..........(3800000)..........(3900000)..........(4000000)..........(4100000)..........(4200000)..........(4300000)..........(4400000)..........(4500000)......\n",
+            "Input sentences: 4562102  Output sentences:  4500966\n",
+            "Processed 1 lines\n",
+            "Processed 100001 lines\n",
+            "Processed 200001 lines\n",
+            "Processed 300001 lines\n",
+            "Processed 400001 lines\n",
+            "Processed 500001 lines\n",
+            "Processed 600001 lines\n",
+            "Processed 700001 lines\n",
+            "Processed 800001 lines\n",
+            "Processed 900001 lines\n",
+            "Processed 1000001 lines\n",
+            "Processed 1100001 lines\n",
+            "Processed 1200001 lines\n",
+            "Processed 1300001 lines\n",
+            "Processed 1400001 lines\n",
+            "Processed 1500001 lines\n",
+            "Processed 1600001 lines\n",
+            "Processed 1700001 lines\n",
+            "Processed 1800001 lines\n",
+            "Processed 1900001 lines\n",
+            "Processed 2000001 lines\n",
+            "Processed 2100001 lines\n",
+            "Processed 2200001 lines\n",
+            "Processed 2300001 lines\n",
+            "Processed 2400001 lines\n",
+            "Processed 2500001 lines\n",
+            "Processed 2600001 lines\n",
+            "Processed 2700001 lines\n",
+            "Processed 2800001 lines\n",
+            "Processed 2900001 lines\n",
+            "Processed 3000001 lines\n",
+            "Processed 3100001 lines\n",
+            "Processed 3200001 lines\n",
+            "Processed 3300001 lines\n",
+            "Processed 3400001 lines\n",
+            "Processed 3500001 lines\n",
+            "Processed 3600001 lines\n",
+            "Processed 3700001 lines\n",
+            "Processed 3800001 lines\n",
+            "Processed 3900001 lines\n",
+            "Processed 4000001 lines\n",
+            "Processed 4100001 lines\n",
+            "Processed 4200001 lines\n",
+            "Processed 4300001 lines\n",
+            "Processed 4400001 lines\n",
+            "Processed 4500001 lines\n",
+            "Skipped: 432775, Valid: 4068191, Valid ratio 0.9038484183173123\n",
+            "Character frequency: Counter({' ': 95933432, 'e': 52612907, 't': 39531367, 'o': 34443857, 'a': 34315161, 'i': 32288996, 'n': 31316898, 's': 27807191, 'r': 27547036, 'h': 18575168, 'l': 17430932, 'd': 14356421, 'c': 13770269, 'u': 12722046, 'm': 10694282, 'p': 9849316, 'f': 9222652, 'g': 7472957, 'y': 6739113, 'w': 6298874, 'b': 5759405, 'v': 4603232, ',': 4566506, '.': 4522010, '\\n': 4068191, 'k': 2413992, 'T': 1556871, 'I': 1475221, 'C': 1106541, 'S': 1067920, 'A': 1017606, 'E': 997734, '-': 964780, 'M': 909863, 'x': 904216, 'P': 853835, ';': 846421, '0': 823086, '&': 762925, 'q': 705787, '1': 572547, 'W': 556727, 'B': 528579, '2': 483945, 'j': 473533, 'F': 463531, 'R': 456707, 'U': 443074, 'D': 439002, 'H': 429447, 'G': 409219, 'O': 406986, 'L': 386464, 'z': 384202, 'N': 365973, '9': 322935, '(': 321500, ')': 317979, '3': 251733, ':': 232153, '5': 219534, '4': 196479, 'V': 190902, 'K': 155351, '8': 150936, '6': 143849, 'J': 136334, '7': 135778, '?': 128658, '/': 128202, 'Y': 124120, '#': 91426, '!': 89656, '%': 53827, 'X': 37529, 'Z': 37248, 'é': 36739, 'Q': 36501, 'ü': 24350, 'ö': 18333, 'ä': 15833, 'á': 14049, '_': 12835, '+': 12704, '*': 12417, 'ó': 10862, '®': 9919, 'í': 9626, '$': 9368, 'è': 7354, '{': 7175, '}': 6843, 'à': 6610, '=': 6534, '´': 5665, '»': 5040, '@': 4481, 'ñ': 4450, 'ç': 3939, '«': 3757, 'ß': 3713, '°': 3655, '²': 3414, '`': 3308, 'ô': 3008, 'ú': 2761, 'ã': 2587, 'å': 2511, 'ê': 2055, '\\\\': 1984, 'â': 1731, 'Ö': 1673, '·': 1533, '\\xad': 1489, '©': 1483, '\\x92': 1455, 'ø': 1342, '§': 1166, 'ò': 1142, 'Ü': 1121, 'î': 1048, '£': 1036, 'ï': 1005, 'ë': 842, 'æ': 784, '\\x96': 706, 'É': 698, '\\x93': 677, 'º': 638, '\\x94': 628, '~': 587, 'ð': 563, 'Á': 518, '³': 489, 'õ': 487, 'ù': 486, 'Ã': 457, 'Å': 430, '¿': 411, 'ì': 407, 'ý': 399, '½': 374, 'Ø': 367, 'Ä': 362, 'û': 347, 'µ': 295, 'Â': 278, 'Ó': 258, '\\x97': 256, '×': 249, '^': 234, '\\x80': 217, '±': 205, '\\x99': 195, 'Í': 181, '\\x95': 176, '¡': 173, '¥': 154, 'Ñ': 143, 'ÿ': 138, 'Ç': 125, '÷': 116, 'Ú': 113, 'À': 110, '\\x91': 110, '¼': 106, '¹': 102, 'Ê': 98, 'Ý': 86, '¨': 77, 'È': 71, 'þ': 66, 'Î': 66, '\\x84': 61, '¬': 59, 'ª': 58, '¶': 58, 'Ð': 57, 'Ô': 55, 'Þ': 46, 'Õ': 44, 'Ì': 42, 'Ï': 37, '¤': 37, '¾': 34, '\\x82': 33, '¢': 26, 'Ò': 24, 'Æ': 23, 'Û': 19, '\\x9b': 17, '\\x8b': 17, '\\x8a': 13, 'Ë': 12, '¸': 12, '¦': 11, '\\x9a': 11, '¯': 9, 'Ù': 7, '\\x9c': 7, '\\x9f': 6, '\\x8c': 6, '\\x9d': 4, '\\x7f': 3, '\\x8d': 3, '\\x9e': 2, '\\x81': 2, '\\x88': 1, '\\x83': 1})\n",
+            "Processed 1 lines\n",
+            "Skipped: 64, Valid: 5100, Valid ratio 0.9876065065840434\n",
+            "Character frequency: Counter({' ': 104338, 'e': 55556, 't': 40764, 'a': 37614, 'o': 36131, 'n': 32380, 'i': 31842, 's': 30404, 'r': 28677, 'h': 21952, 'l': 17895, 'd': 16517, 'u': 13567, 'c': 13291, 'm': 10677, 'p': 9851, 'g': 9399, 'f': 9179, 'w': 7721, 'y': 7501, 'b': 6281, '\\n': 5100, '.': 5093, ',': 4769, 'v': 4442, 'k': 3167, ';': 3153, '&': 3087, 'q': 2141, 'T': 1863, 'A': 1350, 'I': 1345, 'S': 1335, '0': 1225, '-': 1202, 'M': 1022, 'B': 1007, 'C': 973, 'x': 897, '1': 800, 'H': 783, 'P': 721, 'W': 653, '2': 614, 'R': 612, 'F': 578, 'G': 577, 'D': 553, 'L': 513, 'E': 506, 'j': 504, 'z': 462, 'N': 406, 'U': 371, '5': 353, 'O': 350, '3': 317, ':': 303, '4': 292, '9': 285, 'K': 283, 'J': 266, '6': 202, 'V': 188, '8': 186, '7': 182, '(': 172, ')': 172, 'Y': 119, '?': 93, '$': 88, 'ü': 75, 'ö': 64, '/': 50, 'Q': 45, 'Z': 40, 'X': 38, '£': 30, '%': 30, '!': 27, 'ä': 21, '#': 17, 'é': 10, 'ß': 10, '+': 4, 'ç': 4, 'à': 3, '@': 3, '²': 3, '`': 2, '*': 2, 'á': 2, 'Ö': 1, 'ë': 1, 'í': 1})\n",
+            "Cloning into 'data/wmt16_de_en/subword-nmt'...\n",
+            "remote: Enumerating objects: 580, done.\u001b[K\n",
+            "remote: Counting objects: 100% (4/4), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (4/4), done.\u001b[K\n",
+            "remote: Total 580 (delta 0), reused 1 (delta 0), pack-reused 576\u001b[K\n",
+            "Receiving objects: 100% (580/580), 237.41 KiB | 1.67 MiB/s, done.\n",
+            "Resolving deltas: 100% (349/349), done.\n",
+            "HEAD is now at 48ba99e fix typo in previous commit\n",
+            "/content/DeepLearningExamples/TensorFlow/Translation/GNMT\n",
+            "Learning BPE with merge_ops=32000. This may take a while...\n",
+            "data/wmt16_de_en/subword-nmt/learn_bpe.py:267: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "Apply BPE with merge_ops=32000 to tokenized files...\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2009.tok.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2010.tok.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2011.tok.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2012.tok.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2013.tok.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2014.tok.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2015.tok.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2016.tok.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/train.tok.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2009.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2010.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2011.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2012.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2013.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2014.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2015.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2016.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest_dev.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/train.tok.clean.bpe.32000.en\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2009.tok.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2010.tok.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2011.tok.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2012.tok.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2013.tok.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2014.tok.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2015.tok.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2016.tok.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/train.tok.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2009.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2010.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2011.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2012.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2013.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2014.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2015.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest2016.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/newstest_dev.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
+            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
+            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
+            "data/wmt16_de_en/train.tok.clean.bpe.32000.de\n",
+            "data/wmt16_de_en/subword-nmt/get_vocab.py:60: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
+            "  DeprecationWarning\n",
+            "All done.\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LgEi29Hq3--_"
+      },
+      "source": [
+        "## Start Training.\n",
+        "\n",
+        "All results and logs are saved to the results directory (on the host) or to the /workspace/gnmt/results directory (in the container). The training script saves the checkpoint after every training epoch and after every 2000 training steps within each epoch. You can modify the results directory using the --output_dir argument.\n",
+        "\n",
+        "To launch mixed precision training on 1 GPU, run:\n",
+        "```\n",
+        "python nmt.py --output_dir=results --batch_size=128 --learning_rate=5e-4 --amp\n",
+        "```\n",
+        "To launch mixed precision training on 8 GPUs, run:\n",
+        "```\n",
+        "python nmt.py --output_dir=results --batch_size=1024 --num_gpus=8 --learning_rate=2e-3 --amp\n",
+        "```\n",
+        "To launch FP32 (TF32 on NVIDIA Ampere GPUs) training on 1 GPU, run:\n",
+        "```\n",
+        "python nmt.py --output_dir=results --batch_size=128 --learning_rate=5e-4\n",
+        "```\n",
+        "To launch FP32 (TF32 on NVIDIA Ampere GPUs) training on 8 GPUs, run:\n",
+        "```\n",
+        "python nmt.py --output_dir=results --batch_size=1024 --num_gpus=8 --learning_rate=2e-3\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4NXf8TF1Rbn_",
+        "outputId": "ded7483f-3b89-47cc-ca73-5135a6752e6c",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "source": [
+        "!pip uninstall tensorflow"
+      ],
+      "execution_count": 8,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Found existing installation: tensorflow 2.6.0\n",
+            "Uninstalling tensorflow-2.6.0:\n",
+            "  Would remove:\n",
+            "    /usr/local/bin/estimator_ckpt_converter\n",
+            "    /usr/local/bin/import_pb_to_tensorboard\n",
+            "    /usr/local/bin/saved_model_cli\n",
+            "    /usr/local/bin/tensorboard\n",
+            "    /usr/local/bin/tf_upgrade_v2\n",
+            "    /usr/local/bin/tflite_convert\n",
+            "    /usr/local/bin/toco\n",
+            "    /usr/local/bin/toco_from_protos\n",
+            "    /usr/local/lib/python3.7/dist-packages/tensorflow-2.6.0.dist-info/*\n",
+            "    /usr/local/lib/python3.7/dist-packages/tensorflow/*\n",
+            "Proceed (y/n)? y\n",
+            "  Successfully uninstalled tensorflow-2.6.0\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NbC0L7NsSIAC",
+        "outputId": "f0687819-7df7-42b2-aa8e-357e706fe725",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "source": [
+        "!pip install tensorflow==1.13.2"
+      ],
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting tensorflow==1.13.2\n",
+            "  Downloading tensorflow-1.13.2-cp37-cp37m-manylinux1_x86_64.whl (92.7 MB)\n",
+            "\u001b[K     |████████████████████████████████| 92.7 MB 11 kB/s \n",
+            "\u001b[?25hRequirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.1.0)\n",
+            "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (3.17.3)\n",
+            "Requirement already satisfied: absl-py>=0.1.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (0.12.0)\n",
+            "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (0.8.1)\n",
+            "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.19.5)\n",
+            "Collecting tensorboard<1.14.0,>=1.13.0\n",
+            "  Downloading tensorboard-1.13.1-py3-none-any.whl (3.2 MB)\n",
+            "\u001b[K     |████████████████████████████████| 3.2 MB 54.6 MB/s \n",
+            "\u001b[?25hCollecting tensorflow-estimator<1.14.0rc0,>=1.13.0\n",
+            "  Downloading tensorflow_estimator-1.13.0-py2.py3-none-any.whl (367 kB)\n",
+            "\u001b[K     |████████████████████████████████| 367 kB 49.1 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.15.0)\n",
+            "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.1.2)\n",
+            "Collecting keras-applications>=1.0.6\n",
+            "  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)\n",
+            "\u001b[K     |████████████████████████████████| 50 kB 6.5 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (0.37.0)\n",
+            "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.39.0)\n",
+            "Requirement already satisfied: gast>=0.2.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (0.4.0)\n",
+            "Requirement already satisfied: h5py in /usr/local/lib/python3.7/dist-packages (from keras-applications>=1.0.6->tensorflow==1.13.2) (3.1.0)\n",
+            "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (1.0.1)\n",
+            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (3.3.4)\n",
+            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (4.6.4)\n",
+            "Collecting mock>=2.0.0\n",
+            "  Downloading mock-4.0.3-py3-none-any.whl (28 kB)\n",
+            "Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py->keras-applications>=1.0.6->tensorflow==1.13.2) (1.5.2)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (3.5.0)\n",
+            "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (3.7.4.3)\n",
+            "Installing collected packages: mock, tensorflow-estimator, tensorboard, keras-applications, tensorflow\n",
+            "  Attempting uninstall: tensorflow-estimator\n",
+            "    Found existing installation: tensorflow-estimator 2.6.0\n",
+            "    Uninstalling tensorflow-estimator-2.6.0:\n",
+            "      Successfully uninstalled tensorflow-estimator-2.6.0\n",
+            "  Attempting uninstall: tensorboard\n",
+            "    Found existing installation: tensorboard 2.6.0\n",
+            "    Uninstalling tensorboard-2.6.0:\n",
+            "      Successfully uninstalled tensorboard-2.6.0\n",
+            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+            "kapre 0.3.5 requires tensorflow>=2.0.0, but you have tensorflow 1.13.2 which is incompatible.\u001b[0m\n",
+            "Successfully installed keras-applications-1.0.8 mock-4.0.3 tensorboard-1.13.1 tensorflow-1.13.2 tensorflow-estimator-1.13.0\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "vK1Cx58G3--_",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "743d00c9-d850-4075-d108-08e395d0ef1c"
+      },
+      "source": [
+        "!python nmt.py --output_dir=results --batch_size=128 --learning_rate=5e-4 --amp\n"
+      ],
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
+            "Enabling TF-AMP\n",
+            "use_defun is false for attention\n",
+            "Running train and eval mode.\n",
+            "# Set random seed to 1\n",
+            "# Creating output directory results ...\n",
+            "DLL 2021-08-26 21:07:25.420635 - PARAMETER num_units : 1024  num_layers : 4  num_encoder_layers : None  num_decoder_layers : None  encoder_type : gnmt  residual : True  time_major : True  num_embeddings_partitions : 0  attention : normed_bahdanau  attention_architecture : gnmt_v2  output_attention : True  pass_hidden_state : True  optimizer : adam  learning_rate : 0.0005  warmup_steps : 200  warmup_scheme : t2t  decay_scheme : luong234  max_train_epochs : 6  target_bleu : None  colocate_gradients_with_ops : True  label_smoothing : 0.1  init_op : uniform  init_weight : 0.1  src : en  tgt : de  data_dir : data/wmt16_de_en  train_prefix : train.tok.clean.bpe.32000  test_prefix : newstest2014.tok.bpe.32000  translate_file : None  output_dir : results  vocab_prefix : vocab.bpe.32000  embed_prefix : None  sos : <s>  eos : </s>  share_vocab : True  check_special_token : True  src_max_len : 50  tgt_max_len : 50  src_max_len_infer : None  tgt_max_len_infer : 80  unit_type : lstm  forget_bias : 0.0  dropout : 0.2  max_gradient_norm : 5.0  batch_size : 128  num_buckets : 5  subword_option : bpe  use_char_encode : False  save_checkpoints_steps : 2000  log_step_count_steps : 10  num_gpus : 1  hparams_path : None  random_seed : 1  language_model : False  ckpt : None  infer_batch_size : 128  detokenizer_file : None  tokenizer_file : None  infer_mode : beam_search  beam_width : 5  length_penalty_weight : 0.6  coverage_penalty_weight : 0.1  num_workers : 1  amp : True  use_fastmath : False  use_fp16 : False  fp16_loss_scale : 128  enable_auto_loss_scale : True  fp16_inc_loss_scale_every_n : 128  check_tower_loss_numerics : False  use_fp32_batch_matmul : False  force_inputs_padding : False  use_xla : False  xla_compile : False  use_autojit_xla : False  use_pintohost_optimizer : False  use_cudnn_lstm : False  use_loose_bidi_cudnn_lstm : False  use_fused_lstm : True  use_fused_lstm_dec : False  gpu_indices :   parallel_iterations : 10  use_dist_strategy : False  hierarchical_copy : False  network_topology : dgx1  use_block_lstm : False  use_defun : False  gradient_repacking : 0  compact_gradient_transfer : True  all_reduce_spec : nccl  agg_small_grads_max_bytes : 0  agg_small_grads_max_group : 10  allreduce_merge_scope : 1  local_parameter_device : gpu  use_resource_vars : False  debug : False  debug_num_train_steps : None  show_metrics : True  clip_grads : True  profile : False  profile_save_steps : 10  use_dynamic_rnn : True  use_synthetic_data : False  mode : train_and_eval \n",
+            "# Vocab file data/wmt16_de_en/vocab.bpe.32000.en exists\n",
+            "The first 3 vocab words [,, ., the] are not [<unk>, <s>, </s>]\n",
+            "  using source vocab for target\n",
+            "  agg_small_grads_max_bytes=0\n",
+            "  agg_small_grads_max_group=10\n",
+            "  all_reduce_spec=nccl\n",
+            "  allreduce_merge_scope=1\n",
+            "  amp=True\n",
+            "  attention=normed_bahdanau\n",
+            "  attention_architecture=gnmt_v2\n",
+            "  batch_size=128\n",
+            "  beam_width=5\n",
+            "  best_bleu=0\n",
+            "  best_bleu_dir=results/best_bleu\n",
+            "  check_special_token=True\n",
+            "  check_tower_loss_numerics=False\n",
+            "  ckpt=None\n",
+            "  clip_grads=True\n",
+            "  colocate_gradients_with_ops=True\n",
+            "  compact_gradient_transfer=True\n",
+            "  coverage_penalty_weight=0.1\n",
+            "  debug=False\n",
+            "  debug_num_train_steps=None\n",
+            "  decay_scheme=luong234\n",
+            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
+            "  dropout=0.2\n",
+            "  embed_prefix=None\n",
+            "  enable_auto_loss_scale=True\n",
+            "  encoder_type=gnmt\n",
+            "  eos=</s>\n",
+            "  epoch_step=0\n",
+            "  force_inputs_padding=False\n",
+            "  forget_bias=0.0\n",
+            "  fp16_inc_loss_scale_every_n=128\n",
+            "  fp16_loss_scale=128\n",
+            "  gpu_indices=\n",
+            "  gradient_repacking=0\n",
+            "  hierarchical_copy=False\n",
+            "  infer_batch_size=128\n",
+            "  infer_mode=beam_search\n",
+            "  init_op=uniform\n",
+            "  init_weight=0.1\n",
+            "  label_smoothing=0.1\n",
+            "  language_model=False\n",
+            "  learning_rate=0.0005\n",
+            "  length_penalty_weight=0.6\n",
+            "  local_parameter_device=gpu\n",
+            "  log_step_count_steps=10\n",
+            "  max_gradient_norm=5.0\n",
+            "  max_train_epochs=6\n",
+            "  mode=train_and_eval\n",
+            "  network_topology=NetworkTopology.DGX1\n",
+            "  num_buckets=5\n",
+            "  num_dec_emb_partitions=0\n",
+            "  num_decoder_layers=4\n",
+            "  num_decoder_residual_layers=2\n",
+            "  num_embeddings_partitions=0\n",
+            "  num_enc_emb_partitions=0\n",
+            "  num_encoder_layers=4\n",
+            "  num_encoder_residual_layers=2\n",
+            "  num_gpus=1\n",
+            "  num_units=1024\n",
+            "  optimizer=adam\n",
+            "  output_attention=True\n",
+            "  output_dir=results\n",
+            "  parallel_iterations=10\n",
+            "  pass_hidden_state=True\n",
+            "  profile=False\n",
+            "  profile_save_steps=10\n",
+            "  random_seed=1\n",
+            "  residual=True\n",
+            "  save_checkpoints_steps=2000\n",
+            "  share_vocab=True\n",
+            "  show_metrics=True\n",
+            "  sos=<s>\n",
+            "  src=en\n",
+            "  src_embed_file=\n",
+            "  src_max_len=50\n",
+            "  src_max_len_infer=None\n",
+            "  src_vocab_file=results/vocab.bpe.32000.en\n",
+            "  src_vocab_size=32320\n",
+            "  subword_option=bpe\n",
+            "  target_bleu=None\n",
+            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
+            "  tgt=de\n",
+            "  tgt_embed_file=\n",
+            "  tgt_max_len=50\n",
+            "  tgt_max_len_infer=80\n",
+            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
+            "  tgt_vocab_size=32320\n",
+            "  time_major=True\n",
+            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
+            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
+            "  translate_file=None\n",
+            "  unit_type=lstm\n",
+            "  use_autojit_xla=False\n",
+            "  use_block_lstm=False\n",
+            "  use_char_encode=False\n",
+            "  use_cudnn_lstm=False\n",
+            "  use_dist_strategy=False\n",
+            "  use_dynamic_rnn=True\n",
+            "  use_fastmath=False\n",
+            "  use_fp16=False\n",
+            "  use_fp32_batch_matmul=False\n",
+            "  use_fused_lstm=True\n",
+            "  use_fused_lstm_dec=False\n",
+            "  use_loose_bidi_cudnn_lstm=False\n",
+            "  use_pintohost_optimizer=False\n",
+            "  use_resource_vars=False\n",
+            "  use_synthetic_data=False\n",
+            "  use_xla=False\n",
+            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
+            "  warmup_scheme=t2t\n",
+            "  warmup_steps=200\n",
+            "  xla_compile=False\n",
+            "training hparams:\n",
+            "  agg_small_grads_max_bytes=0\n",
+            "  agg_small_grads_max_group=10\n",
+            "  all_reduce_spec=nccl\n",
+            "  allreduce_merge_scope=1\n",
+            "  amp=True\n",
+            "  attention=normed_bahdanau\n",
+            "  attention_architecture=gnmt_v2\n",
+            "  batch_size=128\n",
+            "  beam_width=5\n",
+            "  best_bleu=0\n",
+            "  best_bleu_dir=results/best_bleu\n",
+            "  check_special_token=True\n",
+            "  check_tower_loss_numerics=False\n",
+            "  ckpt=None\n",
+            "  clip_grads=True\n",
+            "  colocate_gradients_with_ops=True\n",
+            "  compact_gradient_transfer=True\n",
+            "  coverage_penalty_weight=0.1\n",
+            "  debug=False\n",
+            "  debug_num_train_steps=None\n",
+            "  decay_scheme=luong234\n",
+            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
+            "  dropout=0.2\n",
+            "  embed_prefix=None\n",
+            "  enable_auto_loss_scale=True\n",
+            "  encoder_type=gnmt\n",
+            "  eos=</s>\n",
+            "  epoch_step=0\n",
+            "  force_inputs_padding=False\n",
+            "  forget_bias=0.0\n",
+            "  fp16_inc_loss_scale_every_n=128\n",
+            "  fp16_loss_scale=128\n",
+            "  gpu_indices=\n",
+            "  gradient_repacking=0\n",
+            "  hierarchical_copy=False\n",
+            "  infer_batch_size=128\n",
+            "  infer_mode=beam_search\n",
+            "  init_op=uniform\n",
+            "  init_weight=0.1\n",
+            "  label_smoothing=0.1\n",
+            "  language_model=False\n",
+            "  learning_rate=0.0005\n",
+            "  length_penalty_weight=0.6\n",
+            "  local_parameter_device=gpu\n",
+            "  log_step_count_steps=10\n",
+            "  max_gradient_norm=5.0\n",
+            "  max_train_epochs=6\n",
+            "  mode=train_and_eval\n",
+            "  network_topology=NetworkTopology.DGX1\n",
+            "  num_buckets=5\n",
+            "  num_dec_emb_partitions=0\n",
+            "  num_decoder_layers=4\n",
+            "  num_decoder_residual_layers=2\n",
+            "  num_embeddings_partitions=0\n",
+            "  num_enc_emb_partitions=0\n",
+            "  num_encoder_layers=4\n",
+            "  num_encoder_residual_layers=2\n",
+            "  num_gpus=1\n",
+            "  num_units=1024\n",
+            "  optimizer=adam\n",
+            "  output_attention=True\n",
+            "  output_dir=results\n",
+            "  parallel_iterations=10\n",
+            "  pass_hidden_state=True\n",
+            "  profile=False\n",
+            "  profile_save_steps=10\n",
+            "  random_seed=1\n",
+            "  residual=True\n",
+            "  save_checkpoints_steps=2000\n",
+            "  share_vocab=True\n",
+            "  show_metrics=True\n",
+            "  sos=<s>\n",
+            "  src=en\n",
+            "  src_embed_file=\n",
+            "  src_max_len=50\n",
+            "  src_max_len_infer=None\n",
+            "  src_vocab_file=results/vocab.bpe.32000.en\n",
+            "  src_vocab_size=32320\n",
+            "  subword_option=bpe\n",
+            "  target_bleu=None\n",
+            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
+            "  tgt=de\n",
+            "  tgt_embed_file=\n",
+            "  tgt_max_len=50\n",
+            "  tgt_max_len_infer=80\n",
+            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
+            "  tgt_vocab_size=32320\n",
+            "  time_major=True\n",
+            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
+            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
+            "  translate_file=None\n",
+            "  unit_type=lstm\n",
+            "  use_autojit_xla=False\n",
+            "  use_block_lstm=False\n",
+            "  use_char_encode=False\n",
+            "  use_cudnn_lstm=False\n",
+            "  use_dist_strategy=False\n",
+            "  use_dynamic_rnn=True\n",
+            "  use_fastmath=False\n",
+            "  use_fp16=False\n",
+            "  use_fp32_batch_matmul=False\n",
+            "  use_fused_lstm=True\n",
+            "  use_fused_lstm_dec=False\n",
+            "  use_loose_bidi_cudnn_lstm=False\n",
+            "  use_pintohost_optimizer=False\n",
+            "  use_resource_vars=False\n",
+            "  use_synthetic_data=False\n",
+            "  use_xla=False\n",
+            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
+            "  warmup_scheme=t2t\n",
+            "  warmup_steps=200\n",
+            "  xla_compile=False\n",
+            "infer_hparams:\n",
+            "  agg_small_grads_max_bytes=0\n",
+            "  agg_small_grads_max_group=10\n",
+            "  all_reduce_spec=nccl\n",
+            "  allreduce_merge_scope=1\n",
+            "  amp=True\n",
+            "  attention=normed_bahdanau\n",
+            "  attention_architecture=gnmt_v2\n",
+            "  batch_size=128\n",
+            "  beam_width=5\n",
+            "  best_bleu=0\n",
+            "  best_bleu_dir=results/best_bleu\n",
+            "  check_special_token=True\n",
+            "  check_tower_loss_numerics=False\n",
+            "  ckpt=None\n",
+            "  clip_grads=True\n",
+            "  colocate_gradients_with_ops=True\n",
+            "  compact_gradient_transfer=True\n",
+            "  coverage_penalty_weight=0.1\n",
+            "  debug=False\n",
+            "  debug_num_train_steps=None\n",
+            "  decay_scheme=luong234\n",
+            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
+            "  dropout=0.2\n",
+            "  embed_prefix=None\n",
+            "  enable_auto_loss_scale=True\n",
+            "  encoder_type=gnmt\n",
+            "  eos=</s>\n",
+            "  epoch_step=0\n",
+            "  force_inputs_padding=False\n",
+            "  forget_bias=0.0\n",
+            "  fp16_inc_loss_scale_every_n=128\n",
+            "  fp16_loss_scale=128\n",
+            "  gpu_indices=\n",
+            "  gradient_repacking=0\n",
+            "  hierarchical_copy=False\n",
+            "  infer_batch_size=128\n",
+            "  infer_mode=beam_search\n",
+            "  init_op=uniform\n",
+            "  init_weight=0.1\n",
+            "  label_smoothing=0.1\n",
+            "  language_model=False\n",
+            "  learning_rate=0.0005\n",
+            "  length_penalty_weight=0.6\n",
+            "  local_parameter_device=gpu\n",
+            "  log_step_count_steps=10\n",
+            "  max_gradient_norm=5.0\n",
+            "  max_train_epochs=6\n",
+            "  mode=train_and_eval\n",
+            "  network_topology=NetworkTopology.DGX1\n",
+            "  num_buckets=1\n",
+            "  num_dec_emb_partitions=0\n",
+            "  num_decoder_layers=4\n",
+            "  num_decoder_residual_layers=2\n",
+            "  num_embeddings_partitions=0\n",
+            "  num_enc_emb_partitions=0\n",
+            "  num_encoder_layers=4\n",
+            "  num_encoder_residual_layers=2\n",
+            "  num_gpus=1\n",
+            "  num_units=1024\n",
+            "  optimizer=adam\n",
+            "  output_attention=True\n",
+            "  output_dir=results\n",
+            "  parallel_iterations=10\n",
+            "  pass_hidden_state=True\n",
+            "  profile=False\n",
+            "  profile_save_steps=10\n",
+            "  random_seed=1\n",
+            "  residual=True\n",
+            "  save_checkpoints_steps=2000\n",
+            "  share_vocab=True\n",
+            "  show_metrics=True\n",
+            "  sos=<s>\n",
+            "  src=en\n",
+            "  src_embed_file=\n",
+            "  src_max_len=50\n",
+            "  src_max_len_infer=None\n",
+            "  src_vocab_file=results/vocab.bpe.32000.en\n",
+            "  src_vocab_size=32320\n",
+            "  subword_option=bpe\n",
+            "  target_bleu=None\n",
+            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
+            "  tgt=de\n",
+            "  tgt_embed_file=\n",
+            "  tgt_max_len=50\n",
+            "  tgt_max_len_infer=80\n",
+            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
+            "  tgt_vocab_size=32320\n",
+            "  time_major=True\n",
+            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
+            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
+            "  translate_file=None\n",
+            "  unit_type=lstm\n",
+            "  use_autojit_xla=False\n",
+            "  use_block_lstm=False\n",
+            "  use_char_encode=False\n",
+            "  use_cudnn_lstm=False\n",
+            "  use_dist_strategy=False\n",
+            "  use_dynamic_rnn=True\n",
+            "  use_fastmath=False\n",
+            "  use_fp16=False\n",
+            "  use_fp32_batch_matmul=False\n",
+            "  use_fused_lstm=True\n",
+            "  use_fused_lstm_dec=False\n",
+            "  use_loose_bidi_cudnn_lstm=False\n",
+            "  use_pintohost_optimizer=False\n",
+            "  use_resource_vars=False\n",
+            "  use_synthetic_data=False\n",
+            "  use_xla=False\n",
+            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
+            "  warmup_scheme=t2t\n",
+            "  warmup_steps=200\n",
+            "  xla_compile=False\n",
+            "Starting epoch 0\n",
+            "sess master is \n",
+            "INFO:tensorflow:Using config: {'_model_dir': 'results', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 2000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true\n",
+            "graph_options {\n",
+            "  rewrite_options {\n",
+            "    pin_to_host_optimization: OFF\n",
+            "  }\n",
+            "}\n",
+            ", '_keep_checkpoint_max': None, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 10, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f955cf1e350>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n",
+            "WARNING:tensorflow:Estimator's model_fn (<function make_model_fn.<locals>.fn at 0x7f955cf8eb90>) includes params argument, but params are not passed to Estimator.\n",
+            "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Colocations handled automatically by placer.\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/utils/iterator_utils.py:112: DatasetV1.shard (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Use `dataset.apply(tf.data.experimental.filter_for_shard(...))`.\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/utils/iterator_utils.py:233: group_by_window (from tensorflow.contrib.data.python.ops.grouping) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Use `tf.data.experimental.group_by_window(...)`.\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/utils/iterator_utils.py:226: to_int64 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Use tf.cast instead.\n",
+            "INFO:tensorflow:Calling model_fn.\n",
+            "Running fast mode_fn\n",
+            "global_step already created!\n",
+            "model.global_step.name: global_step:0\n",
+            "# Use the same embedding for source and target\n",
+            "# Creating train graph ...\n",
+            "# Build a GNMT encoder\n",
+            "  num_bi_layers = 1\n",
+            "  num_uni_layers = 3\n",
+            "source.shape: (128, ?)\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/gnmt_model.py:131: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
+            "  cell 0  LSTM, forget_bias=0WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/model_helper.py:266: LSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
+            "  DropoutWrapper, dropout=0.2 \n",
+            "  cell 1  LSTM, forget_bias=0  DropoutWrapper, dropout=0.2 \n",
+            "  cell 2  LSTM, forget_bias=0  DropoutWrapper, dropout=0.2   ResidualWrapper\n",
+            "  cell 3  LSTM, forget_bias=0  DropoutWrapper, dropout=0.2   ResidualWrapper\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/gnmt_model.py:662: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/model.py:452: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Please use `keras.layers.RNN(cell)`, which is equivalent to this API\n",
+            "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/rnn.py:626: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Use tf.cast instead.\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/model.py:595: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Use tf.cast instead.\n",
+            "  learning_rate=0.0005, warmup_steps=200, warmup_scheme=t2t\n",
+            "  decay_scheme=luong234, start_decay_step=110468, decay_steps 13808, decay_factor 0.5\n",
+            "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/math_grad.py:102: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Deprecated in favor of operator or tf.math.divide.\n",
+            "# Trainable variables for tower: 0\n",
+            "Format: <name>, <shape>, <dtype>, <(soft) device placement>\n",
+            "  v0/embeddings/embedding_share:0, (32320, 1024), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0, (2048, 4096), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell/bias:0, (4096,), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0, (2048, 4096), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0, (4096,), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0, (3072, 4096), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0, (4096,), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0, (2048, 4096), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0, (4096,), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0, (2048, 4096), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0, (4096,), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/memory_layer/kernel:0, (1024, 1024), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/attention_v:0, (1024,), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/attention_g:0, (), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/attention_b:0, (1024,), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0, (3072, 4096), float32, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0, (4096,), float32, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0, (1024, 1024), float32_ref, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0, (3072, 4096), float32, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0, (4096,), float32, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0, (3072, 4096), float32, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0, (4096,), float32, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0, (3072, 4096), float32, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0, (4096,), float32, /device:GPU:0\n",
+            "  v0/dynamic_seq2seq/decoder/output_projection/kernel:0, (1024, 32320), float32_ref, /device:GPU:0\n",
+            "Total params size: 0.61 GB\n",
+            "Finish building fprop and per-tower bprop.\n",
+            "Finish building grad aggregation.\n",
+            "All copy-from vars(0): \n",
+            "All skippped vars(78): \n",
+            "global_step:0\n",
+            "v0/embeddings/embedding_share:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
+            "tower_0/append_gradient_ops/beta1_power:0\n",
+            "tower_0/append_gradient_ops/beta2_power:0\n",
+            "v0/embeddings/embedding_share/Adam:0\n",
+            "v0/embeddings/embedding_share/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam_1:0\n",
+            "Saveable vars(78): \n",
+            "global_step:0\n",
+            "v0/embeddings/embedding_share:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
+            "tower_0/append_gradient_ops/beta1_power:0\n",
+            "tower_0/append_gradient_ops/beta2_power:0\n",
+            "v0/embeddings/embedding_share/Adam:0\n",
+            "v0/embeddings/embedding_share/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam_1:0\n",
+            "All global trainable vars(25): \n",
+            "v0/embeddings/embedding_share:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
+            "All global vars(78): \n",
+            "global_step:0\n",
+            "v0/embeddings/embedding_share:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
+            "tower_0/append_gradient_ops/beta1_power:0\n",
+            "tower_0/append_gradient_ops/beta2_power:0\n",
+            "v0/embeddings/embedding_share/Adam:0\n",
+            "v0/embeddings/embedding_share/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam_1:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam_1:0\n",
+            "master backproped params(25): \n",
+            "v0/embeddings/embedding_share:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
+            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
+            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
+            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
+            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
+            "Finish building model_fn\n",
+            "INFO:tensorflow:Done calling model_fn.\n",
+            "INFO:tensorflow:Create CheckpointSaverHook.\n",
+            "INFO:tensorflow:Graph was finalized.\n",
+            "2021-08-26 21:08:52.078538: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
+            "2021-08-26 21:08:52.083180: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz\n",
+            "2021-08-26 21:08:52.083636: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x55f631b9cd60 executing computations on platform Host. Devices:\n",
+            "2021-08-26 21:08:52.083673: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): <undefined>, <undefined>\n",
+            "Traceback (most recent call last):\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1334, in _do_call\n",
+            "    return fn(*args)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1317, in _run_fn\n",
+            "    self._extend_graph()\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1352, in _extend_graph\n",
+            "    tf_session.ExtendSession(self._session)\n",
+            "tensorflow.python.framework.errors_impl.InvalidArgumentError: No OpKernel was registered to support Op 'NcclAllReduce' used by {{node allreduce/allreduce/NcclAllReduce}}with these attrs: [reduction=\"sum\", shared_name=\"c0\", T=DT_FLOAT, num_devices=1]\n",
+            "Registered devices: [CPU, XLA_CPU]\n",
+            "Registered kernels:\n",
+            "  <no registered kernels>\n",
+            "\n",
+            "\t [[{{node allreduce/allreduce/NcclAllReduce}}]]\n",
+            "\n",
+            "During handling of the above exception, another exception occurred:\n",
+            "\n",
+            "Traceback (most recent call last):\n",
+            "  File \"nmt.py\", line 1116, in <module>\n",
+            "    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py\", line 125, in run\n",
+            "    _sys.exit(main(argv))\n",
+            "  File \"nmt.py\", line 1060, in main\n",
+            "    train_speed, _ = estimator.train_fn(hparams)\n",
+            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 831, in train_fn\n",
+            "    hooks=train_hooks,\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 358, in train\n",
+            "    loss = self._train_model(input_fn, hooks, saving_listeners)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1124, in _train_model\n",
+            "    return self._train_model_default(input_fn, hooks, saving_listeners)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1158, in _train_model_default\n",
+            "    saving_listeners)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1403, in _train_with_estimator_spec\n",
+            "    log_step_count_steps=log_step_count_steps) as mon_sess:\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 508, in MonitoredTrainingSession\n",
+            "    stop_grace_period_secs=stop_grace_period_secs)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 934, in __init__\n",
+            "    stop_grace_period_secs=stop_grace_period_secs)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 648, in __init__\n",
+            "    self._sess = _RecoverableSession(self._coordinated_creator)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 1122, in __init__\n",
+            "    _WrappedSession.__init__(self, self._create_session())\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 1127, in _create_session\n",
+            "    return self._sess_creator.create_session()\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 805, in create_session\n",
+            "    self.tf_sess = self._session_creator.create_session()\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 571, in create_session\n",
+            "    init_fn=self._scaffold.init_fn)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/session_manager.py\", line 287, in prepare_session\n",
+            "    sess.run(init_op, feed_dict=init_feed_dict)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 929, in run\n",
+            "    run_metadata_ptr)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1152, in _run\n",
+            "    feed_dict_tensor, options, run_metadata)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1328, in _do_run\n",
+            "    run_metadata)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1348, in _do_call\n",
+            "    raise type(e)(node_def, op, message)\n",
+            "tensorflow.python.framework.errors_impl.InvalidArgumentError: No OpKernel was registered to support Op 'NcclAllReduce' used by node allreduce/allreduce/NcclAllReduce (defined at /content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py:336) with these attrs: [reduction=\"sum\", shared_name=\"c0\", T=DT_FLOAT, num_devices=1]\n",
+            "Registered devices: [CPU, XLA_CPU]\n",
+            "Registered kernels:\n",
+            "  <no registered kernels>\n",
+            "\n",
+            "\t [[node allreduce/allreduce/NcclAllReduce (defined at /content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py:336) ]]\n",
+            "\n",
+            "Caused by op 'allreduce/allreduce/NcclAllReduce', defined at:\n",
+            "  File \"nmt.py\", line 1116, in <module>\n",
+            "    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py\", line 125, in run\n",
+            "    _sys.exit(main(argv))\n",
+            "  File \"nmt.py\", line 1060, in main\n",
+            "    train_speed, _ = estimator.train_fn(hparams)\n",
+            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 831, in train_fn\n",
+            "    hooks=train_hooks,\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 358, in train\n",
+            "    loss = self._train_model(input_fn, hooks, saving_listeners)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1124, in _train_model\n",
+            "    return self._train_model_default(input_fn, hooks, saving_listeners)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1154, in _train_model_default\n",
+            "    features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1112, in _call_model_fn\n",
+            "    model_fn_results = self._model_fn(features=features, **kwargs)\n",
+            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 559, in fn\n",
+            "    features, labels, mode, params)\n",
+            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 434, in build_graph\n",
+            "    tower_gradvars)\n",
+            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/variable_mgr.py\", line 189, in preprocess_device_grads\n",
+            "    compact_grads, defer_grads)\n",
+            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/batch_allreduce.py\", line 141, in batch_all_reduce\n",
+            "    all_device_tensors = self._do_batch_all_reduce(all_device_tensors)\n",
+            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/batch_allreduce.py\", line 329, in _do_batch_all_reduce\n",
+            "    agg_small_grads_max_group=self._agg_small_grads_max_group)\n",
+            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py\", line 454, in sum_gradients_all_reduce\n",
+            "    num_shards))\n",
+            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py\", line 336, in sum_grad_and_var_all_reduce\n",
+            "    summed_grads = all_reduce.build_nccl_all_reduce(scaled_grads, tf.add)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/all_reduce.py\", line 696, in build_nccl_all_reduce\n",
+            "    output_tensors = nccl_ops.all_sum(input_tensors)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/nccl_ops.py\", line 45, in all_sum\n",
+            "    return _apply_all_reduce('sum', tensors)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/nccl_ops.py\", line 224, in _apply_all_reduce\n",
+            "    shared_name=shared_name))\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_nccl_ops.py\", line 84, in nccl_all_reduce\n",
+            "    name=name)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py\", line 788, in _apply_op_helper\n",
+            "    op_def=op_def)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/deprecation.py\", line 507, in new_func\n",
+            "    return func(*args, **kwargs)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py\", line 3300, in create_op\n",
+            "    op_def=op_def)\n",
+            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py\", line 1801, in __init__\n",
+            "    self._traceback = tf_stack.extract_stack()\n",
+            "\n",
+            "InvalidArgumentError (see above for traceback): No OpKernel was registered to support Op 'NcclAllReduce' used by node allreduce/allreduce/NcclAllReduce (defined at /content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py:336) with these attrs: [reduction=\"sum\", shared_name=\"c0\", T=DT_FLOAT, num_devices=1]\n",
+            "Registered devices: [CPU, XLA_CPU]\n",
+            "Registered kernels:\n",
+            "  <no registered kernels>\n",
+            "\n",
+            "\t [[node allreduce/allreduce/NcclAllReduce (defined at /content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py:336) ]]\n",
+            "\n",
+            "ERROR:tensorflow:==================================\n",
+            "Object was never used (type <class 'tensorflow.python.framework.ops.Tensor'>):\n",
+            "<tf.Tensor 'report_uninitialized_variables/boolean_mask/GatherV2:0' shape=(?,) dtype=string>\n",
+            "If you want to mark it as used call its \"mark_used()\" method.\n",
+            "It was originally created here:\n",
+            "  File \"nmt.py\", line 1116, in <module>\n",
+            "    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py\", line 125, in run\n",
+            "    _sys.exit(main(argv))  File \"nmt.py\", line 1062, in main\n",
+            "    utils.print_out(\"training hits OutOfRangeError\", f=sys.stderr)  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 831, in train_fn\n",
+            "    hooks=train_hooks,  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 360, in train\n",
+            "    return self  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1124, in _train_model\n",
+            "    return self._train_model_default(input_fn, hooks, saving_listeners)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1158, in _train_model_default\n",
+            "    saving_listeners)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1118, in _call_model_fn\n",
+            "    return model_fn_results  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 565, in fn\n",
+            "    train_op=train_op)  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 532, in build_graph\n",
+            "    return loss, master_params, master_grads, None, train_op, scaffold  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/tf_should_use.py\", line 193, in wrapped\n",
+            "    return _add_should_use_warning(fn(*args, **kwargs))\n",
+            "==================================\n",
+            "ERROR:tensorflow:==================================\n",
+            "Object was never used (type <class 'tensorflow.python.framework.ops.Tensor'>):\n",
+            "<tf.Tensor 'report_uninitialized_variables_1/boolean_mask/GatherV2:0' shape=(?,) dtype=string>\n",
+            "If you want to mark it as used call its \"mark_used()\" method.\n",
+            "It was originally created here:\n",
+            "  File \"nmt.py\", line 1116, in <module>\n",
+            "    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py\", line 125, in run\n",
+            "    _sys.exit(main(argv))  File \"nmt.py\", line 1062, in main\n",
+            "    utils.print_out(\"training hits OutOfRangeError\", f=sys.stderr)  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 831, in train_fn\n",
+            "    hooks=train_hooks,  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 360, in train\n",
+            "    return self  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1124, in _train_model\n",
+            "    return self._train_model_default(input_fn, hooks, saving_listeners)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1158, in _train_model_default\n",
+            "    saving_listeners)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1118, in _call_model_fn\n",
+            "    return model_fn_results  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 565, in fn\n",
+            "    train_op=train_op)  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 532, in build_graph\n",
+            "    return loss, master_params, master_grads, None, train_op, scaffold  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/tf_should_use.py\", line 193, in wrapped\n",
+            "    return _add_should_use_warning(fn(*args, **kwargs))\n",
+            "==================================\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "H1bpOS4x3-_A"
+      },
+      "source": [
+        "## Start evaluation.\n",
+        "\n",
+        "The training process automatically runs evaluation and outputs the BLEU score after each training epoch. Additionally, after the training is done, you can manually run inference on test dataset with the checkpoint saved during the training.\n",
+        "\n",
+        "To launch mixed precision inference on 1 GPU, run:\n",
+        "```\n",
+        "python nmt.py --output_dir=results --infer_batch_size=128 --mode=infer --amp\n",
+        "```\n",
+        "To launch FP32 (TF32 on NVIDIA Ampere GPUs) inference on 1 GPU, run:\n",
+        "```\n",
+        "python nmt.py --output_dir=results --infer_batch_size=128 --mode=infer\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "J7GfPXAK3-_A",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "d5b48c06-67c6-4e1d-d560-d52a08b562d0"
+      },
+      "source": [
+        "!python nmt.py --output_dir=results --infer_batch_size=128 --mode=infer --amp\n"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
+            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+            "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
+            "Enabling TF-AMP\n",
+            "use_defun is false for attention\n",
+            "Running inference mode.\n",
+            "# Set random seed to 1\n",
+            "DLL 2021-08-26 21:11:24.601318 - PARAMETER num_units : 1024  num_layers : 4  num_encoder_layers : None  num_decoder_layers : None  encoder_type : gnmt  residual : True  time_major : True  num_embeddings_partitions : 0  attention : normed_bahdanau  attention_architecture : gnmt_v2  output_attention : True  pass_hidden_state : True  optimizer : adam  learning_rate : 0.0005  warmup_steps : 200  warmup_scheme : t2t  decay_scheme : luong234  max_train_epochs : 6  target_bleu : None  colocate_gradients_with_ops : True  label_smoothing : 0.1  init_op : uniform  init_weight : 0.1  src : en  tgt : de  data_dir : data/wmt16_de_en  train_prefix : train.tok.clean.bpe.32000  test_prefix : newstest2014.tok.bpe.32000  translate_file : None  output_dir : results  vocab_prefix : vocab.bpe.32000  embed_prefix : None  sos : <s>  eos : </s>  share_vocab : True  check_special_token : True  src_max_len : 50  tgt_max_len : 50  src_max_len_infer : None  tgt_max_len_infer : 80  unit_type : lstm  forget_bias : 0.0  dropout : 0.2  max_gradient_norm : 5.0  batch_size : 128  num_buckets : 5  subword_option : bpe  use_char_encode : False  save_checkpoints_steps : 2000  log_step_count_steps : 10  num_gpus : 1  hparams_path : None  random_seed : 1  language_model : False  ckpt : None  infer_batch_size : 128  detokenizer_file : None  tokenizer_file : None  infer_mode : beam_search  beam_width : 5  length_penalty_weight : 0.6  coverage_penalty_weight : 0.1  num_workers : 1  amp : True  use_fastmath : False  use_fp16 : False  fp16_loss_scale : 128  enable_auto_loss_scale : True  fp16_inc_loss_scale_every_n : 128  check_tower_loss_numerics : False  use_fp32_batch_matmul : False  force_inputs_padding : False  use_xla : False  xla_compile : False  use_autojit_xla : False  use_pintohost_optimizer : False  use_cudnn_lstm : False  use_loose_bidi_cudnn_lstm : False  use_fused_lstm : True  use_fused_lstm_dec : False  gpu_indices :   parallel_iterations : 10  use_dist_strategy : False  hierarchical_copy : False  network_topology : dgx1  use_block_lstm : False  use_defun : False  gradient_repacking : 0  compact_gradient_transfer : True  all_reduce_spec : nccl  agg_small_grads_max_bytes : 0  agg_small_grads_max_group : 10  allreduce_merge_scope : 1  local_parameter_device : gpu  use_resource_vars : False  debug : False  debug_num_train_steps : None  show_metrics : True  clip_grads : True  profile : False  profile_save_steps : 10  use_dynamic_rnn : True  use_synthetic_data : False  mode : infer \n",
+            "# Vocab file data/wmt16_de_en/vocab.bpe.32000.en exists\n",
+            "The first 3 vocab words [,, ., the] are not [<unk>, <s>, </s>]\n",
+            "  using source vocab for target\n",
+            "  agg_small_grads_max_bytes=0\n",
+            "  agg_small_grads_max_group=10\n",
+            "  all_reduce_spec=nccl\n",
+            "  allreduce_merge_scope=1\n",
+            "  amp=True\n",
+            "  attention=normed_bahdanau\n",
+            "  attention_architecture=gnmt_v2\n",
+            "  batch_size=128\n",
+            "  beam_width=5\n",
+            "  best_bleu=0\n",
+            "  best_bleu_dir=results/best_bleu\n",
+            "  check_special_token=True\n",
+            "  check_tower_loss_numerics=False\n",
+            "  ckpt=None\n",
+            "  clip_grads=True\n",
+            "  colocate_gradients_with_ops=True\n",
+            "  compact_gradient_transfer=True\n",
+            "  coverage_penalty_weight=0.1\n",
+            "  debug=False\n",
+            "  debug_num_train_steps=None\n",
+            "  decay_scheme=luong234\n",
+            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
+            "  dropout=0.2\n",
+            "  embed_prefix=None\n",
+            "  enable_auto_loss_scale=True\n",
+            "  encoder_type=gnmt\n",
+            "  eos=</s>\n",
+            "  epoch_step=0\n",
+            "  force_inputs_padding=False\n",
+            "  forget_bias=0.0\n",
+            "  fp16_inc_loss_scale_every_n=128\n",
+            "  fp16_loss_scale=128\n",
+            "  gpu_indices=\n",
+            "  gradient_repacking=0\n",
+            "  hierarchical_copy=False\n",
+            "  infer_batch_size=128\n",
+            "  infer_mode=beam_search\n",
+            "  init_op=uniform\n",
+            "  init_weight=0.1\n",
+            "  label_smoothing=0.1\n",
+            "  language_model=False\n",
+            "  learning_rate=0.0005\n",
+            "  length_penalty_weight=0.6\n",
+            "  local_parameter_device=gpu\n",
+            "  log_step_count_steps=10\n",
+            "  max_gradient_norm=5.0\n",
+            "  max_train_epochs=6\n",
+            "  mode=infer\n",
+            "  network_topology=NetworkTopology.DGX1\n",
+            "  num_buckets=1\n",
+            "  num_dec_emb_partitions=0\n",
+            "  num_decoder_layers=4\n",
+            "  num_decoder_residual_layers=2\n",
+            "  num_embeddings_partitions=0\n",
+            "  num_enc_emb_partitions=0\n",
+            "  num_encoder_layers=4\n",
+            "  num_encoder_residual_layers=2\n",
+            "  num_gpus=1\n",
+            "  num_units=1024\n",
+            "  optimizer=adam\n",
+            "  output_attention=True\n",
+            "  output_dir=results\n",
+            "  parallel_iterations=10\n",
+            "  pass_hidden_state=True\n",
+            "  profile=False\n",
+            "  profile_save_steps=10\n",
+            "  random_seed=1\n",
+            "  residual=True\n",
+            "  save_checkpoints_steps=2000\n",
+            "  share_vocab=True\n",
+            "  show_metrics=True\n",
+            "  sos=<s>\n",
+            "  src=en\n",
+            "  src_embed_file=\n",
+            "  src_max_len=50\n",
+            "  src_max_len_infer=None\n",
+            "  src_vocab_file=results/vocab.bpe.32000.en\n",
+            "  src_vocab_size=32320\n",
+            "  subword_option=bpe\n",
+            "  target_bleu=None\n",
+            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
+            "  tgt=de\n",
+            "  tgt_embed_file=\n",
+            "  tgt_max_len=50\n",
+            "  tgt_max_len_infer=80\n",
+            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
+            "  tgt_vocab_size=32320\n",
+            "  time_major=True\n",
+            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
+            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
+            "  translate_file=None\n",
+            "  unit_type=lstm\n",
+            "  use_autojit_xla=False\n",
+            "  use_block_lstm=False\n",
+            "  use_char_encode=False\n",
+            "  use_cudnn_lstm=False\n",
+            "  use_dist_strategy=False\n",
+            "  use_dynamic_rnn=True\n",
+            "  use_fastmath=False\n",
+            "  use_fp16=False\n",
+            "  use_fp32_batch_matmul=False\n",
+            "  use_fused_lstm=True\n",
+            "  use_fused_lstm_dec=False\n",
+            "  use_loose_bidi_cudnn_lstm=False\n",
+            "  use_pintohost_optimizer=False\n",
+            "  use_resource_vars=False\n",
+            "  use_synthetic_data=False\n",
+            "  use_xla=False\n",
+            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
+            "  warmup_scheme=t2t\n",
+            "  warmup_steps=200\n",
+            "  xla_compile=False\n",
+            "infer_hparams:\n",
+            "  agg_small_grads_max_bytes=0\n",
+            "  agg_small_grads_max_group=10\n",
+            "  all_reduce_spec=nccl\n",
+            "  allreduce_merge_scope=1\n",
+            "  amp=True\n",
+            "  attention=normed_bahdanau\n",
+            "  attention_architecture=gnmt_v2\n",
+            "  batch_size=128\n",
+            "  beam_width=5\n",
+            "  best_bleu=0\n",
+            "  best_bleu_dir=results/best_bleu\n",
+            "  check_special_token=True\n",
+            "  check_tower_loss_numerics=False\n",
+            "  ckpt=None\n",
+            "  clip_grads=True\n",
+            "  colocate_gradients_with_ops=True\n",
+            "  compact_gradient_transfer=True\n",
+            "  coverage_penalty_weight=0.1\n",
+            "  debug=False\n",
+            "  debug_num_train_steps=None\n",
+            "  decay_scheme=luong234\n",
+            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
+            "  dropout=0.2\n",
+            "  embed_prefix=None\n",
+            "  enable_auto_loss_scale=True\n",
+            "  encoder_type=gnmt\n",
+            "  eos=</s>\n",
+            "  epoch_step=0\n",
+            "  force_inputs_padding=False\n",
+            "  forget_bias=0.0\n",
+            "  fp16_inc_loss_scale_every_n=128\n",
+            "  fp16_loss_scale=128\n",
+            "  gpu_indices=\n",
+            "  gradient_repacking=0\n",
+            "  hierarchical_copy=False\n",
+            "  infer_batch_size=128\n",
+            "  infer_mode=beam_search\n",
+            "  init_op=uniform\n",
+            "  init_weight=0.1\n",
+            "  label_smoothing=0.1\n",
+            "  language_model=False\n",
+            "  learning_rate=0.0005\n",
+            "  length_penalty_weight=0.6\n",
+            "  local_parameter_device=gpu\n",
+            "  log_step_count_steps=10\n",
+            "  max_gradient_norm=5.0\n",
+            "  max_train_epochs=6\n",
+            "  mode=infer\n",
+            "  network_topology=NetworkTopology.DGX1\n",
+            "  num_buckets=1\n",
+            "  num_dec_emb_partitions=0\n",
+            "  num_decoder_layers=4\n",
+            "  num_decoder_residual_layers=2\n",
+            "  num_embeddings_partitions=0\n",
+            "  num_enc_emb_partitions=0\n",
+            "  num_encoder_layers=4\n",
+            "  num_encoder_residual_layers=2\n",
+            "  num_gpus=1\n",
+            "  num_units=1024\n",
+            "  optimizer=adam\n",
+            "  output_attention=True\n",
+            "  output_dir=results\n",
+            "  parallel_iterations=10\n",
+            "  pass_hidden_state=True\n",
+            "  profile=False\n",
+            "  profile_save_steps=10\n",
+            "  random_seed=1\n",
+            "  residual=True\n",
+            "  save_checkpoints_steps=2000\n",
+            "  share_vocab=True\n",
+            "  show_metrics=True\n",
+            "  sos=<s>\n",
+            "  src=en\n",
+            "  src_embed_file=\n",
+            "  src_max_len=50\n",
+            "  src_max_len_infer=None\n",
+            "  src_vocab_file=results/vocab.bpe.32000.en\n",
+            "  src_vocab_size=32320\n",
+            "  subword_option=bpe\n",
+            "  target_bleu=None\n",
+            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
+            "  tgt=de\n",
+            "  tgt_embed_file=\n",
+            "  tgt_max_len=50\n",
+            "  tgt_max_len_infer=80\n",
+            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
+            "  tgt_vocab_size=32320\n",
+            "  time_major=True\n",
+            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
+            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
+            "  translate_file=None\n",
+            "  unit_type=lstm\n",
+            "  use_autojit_xla=False\n",
+            "  use_block_lstm=False\n",
+            "  use_char_encode=False\n",
+            "  use_cudnn_lstm=False\n",
+            "  use_dist_strategy=False\n",
+            "  use_dynamic_rnn=True\n",
+            "  use_fastmath=False\n",
+            "  use_fp16=False\n",
+            "  use_fp32_batch_matmul=False\n",
+            "  use_fused_lstm=True\n",
+            "  use_fused_lstm_dec=False\n",
+            "  use_loose_bidi_cudnn_lstm=False\n",
+            "  use_pintohost_optimizer=False\n",
+            "  use_resource_vars=False\n",
+            "  use_synthetic_data=False\n",
+            "  use_xla=False\n",
+            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
+            "  warmup_scheme=t2t\n",
+            "  warmup_steps=200\n",
+            "  xla_compile=False\n",
+            "INFO:tensorflow:Starting to evaluate...\n",
+            "INFO:tensorflow:Using default config.\n",
+            "INFO:tensorflow:Using config: {'_model_dir': 'results', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
+            "graph_options {\n",
+            "  rewrite_options {\n",
+            "    meta_optimizer_iterations: ONE\n",
+            "  }\n",
+            "}\n",
+            ", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd55873fc50>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n",
+            "WARNING:tensorflow:Estimator's model_fn (<function make_model_fn.<locals>.fn at 0x7fd558836ef0>) includes params argument, but params are not passed to Estimator.\n",
+            "INFO:tensorflow:Could not find trained model in model_dir: results, running initialization to predict.\n",
+            "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Colocations handled automatically by placer.\n",
+            "INFO:tensorflow:Calling model_fn.\n",
+            "Running fast mode_fn\n",
+            "inference hparmas:\n",
+            "  agg_small_grads_max_bytes=0\n",
+            "  agg_small_grads_max_group=10\n",
+            "  all_reduce_spec=nccl\n",
+            "  allreduce_merge_scope=1\n",
+            "  amp=True\n",
+            "  attention=normed_bahdanau\n",
+            "  attention_architecture=gnmt_v2\n",
+            "  batch_size=128\n",
+            "  beam_width=5\n",
+            "  best_bleu=0\n",
+            "  best_bleu_dir=results/best_bleu\n",
+            "  check_special_token=True\n",
+            "  check_tower_loss_numerics=False\n",
+            "  ckpt=None\n",
+            "  clip_grads=True\n",
+            "  colocate_gradients_with_ops=True\n",
+            "  compact_gradient_transfer=True\n",
+            "  coverage_penalty_weight=0.1\n",
+            "  debug=False\n",
+            "  debug_num_train_steps=None\n",
+            "  decay_scheme=luong234\n",
+            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
+            "  dropout=0.2\n",
+            "  embed_prefix=None\n",
+            "  enable_auto_loss_scale=True\n",
+            "  encoder_type=gnmt\n",
+            "  eos=</s>\n",
+            "  epoch_step=0\n",
+            "  force_inputs_padding=False\n",
+            "  forget_bias=0.0\n",
+            "  fp16_inc_loss_scale_every_n=128\n",
+            "  fp16_loss_scale=128\n",
+            "  gpu_indices=\n",
+            "  gradient_repacking=0\n",
+            "  hierarchical_copy=False\n",
+            "  infer_batch_size=128\n",
+            "  infer_mode=beam_search\n",
+            "  init_op=uniform\n",
+            "  init_weight=0.1\n",
+            "  label_smoothing=0.1\n",
+            "  language_model=False\n",
+            "  learning_rate=0.0005\n",
+            "  length_penalty_weight=0.6\n",
+            "  local_parameter_device=gpu\n",
+            "  log_step_count_steps=10\n",
+            "  max_gradient_norm=5.0\n",
+            "  max_train_epochs=6\n",
+            "  mode=infer\n",
+            "  network_topology=NetworkTopology.DGX1\n",
+            "  num_buckets=1\n",
+            "  num_dec_emb_partitions=0\n",
+            "  num_decoder_layers=4\n",
+            "  num_decoder_residual_layers=2\n",
+            "  num_embeddings_partitions=0\n",
+            "  num_enc_emb_partitions=0\n",
+            "  num_encoder_layers=4\n",
+            "  num_encoder_residual_layers=2\n",
+            "  num_gpus=1\n",
+            "  num_units=1024\n",
+            "  optimizer=adam\n",
+            "  output_attention=True\n",
+            "  output_dir=results\n",
+            "  parallel_iterations=10\n",
+            "  pass_hidden_state=True\n",
+            "  profile=False\n",
+            "  profile_save_steps=10\n",
+            "  random_seed=1\n",
+            "  residual=True\n",
+            "  save_checkpoints_steps=2000\n",
+            "  share_vocab=True\n",
+            "  show_metrics=True\n",
+            "  sos=<s>\n",
+            "  src=en\n",
+            "  src_embed_file=\n",
+            "  src_max_len=50\n",
+            "  src_max_len_infer=None\n",
+            "  src_vocab_file=results/vocab.bpe.32000.en\n",
+            "  src_vocab_size=32320\n",
+            "  subword_option=bpe\n",
+            "  target_bleu=None\n",
+            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
+            "  tgt=de\n",
+            "  tgt_embed_file=\n",
+            "  tgt_max_len=50\n",
+            "  tgt_max_len_infer=80\n",
+            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
+            "  tgt_vocab_size=32320\n",
+            "  time_major=True\n",
+            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
+            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
+            "  translate_file=None\n",
+            "  unit_type=lstm\n",
+            "  use_autojit_xla=False\n",
+            "  use_block_lstm=False\n",
+            "  use_char_encode=False\n",
+            "  use_cudnn_lstm=False\n",
+            "  use_dist_strategy=False\n",
+            "  use_dynamic_rnn=True\n",
+            "  use_fastmath=False\n",
+            "  use_fp16=False\n",
+            "  use_fp32_batch_matmul=False\n",
+            "  use_fused_lstm=True\n",
+            "  use_fused_lstm_dec=False\n",
+            "  use_loose_bidi_cudnn_lstm=False\n",
+            "  use_pintohost_optimizer=False\n",
+            "  use_resource_vars=False\n",
+            "  use_synthetic_data=False\n",
+            "  use_xla=False\n",
+            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
+            "  warmup_scheme=t2t\n",
+            "  warmup_steps=200\n",
+            "  xla_compile=False\n",
+            "global_step already created!\n",
+            "model.global_step.name: global_step:0\n",
+            "# Use the same embedding for source and target\n",
+            "# Creating infer graph ...\n",
+            "# Build a GNMT encoder\n",
+            "  num_bi_layers = 1\n",
+            "  num_uni_layers = 3\n",
+            "source.shape: (?, ?)\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/block_lstm.py:296: to_int64 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Use tf.cast instead.\n",
+            "  cell 0  LSTM, forget_bias=0WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/model_helper.py:266: LSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
+            "\n",
+            "  cell 1  LSTM, forget_bias=0\n",
+            "  cell 2  LSTM, forget_bias=0  ResidualWrapper\n",
+            "  cell 3  LSTM, forget_bias=0  ResidualWrapper\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/gnmt_model.py:662: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.\n",
+            "  decoding maximum_iterations 80\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/beam_search_decoder.py:733: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Use tf.cast instead.\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/beam_search_decoder.py:1001: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Deprecated in favor of operator or tf.math.divide.\n",
+            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/beam_search_decoder.py:781: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
+            "Instructions for updating:\n",
+            "Use tf.cast instead.\n",
+            "  learning_rate=0.0005, warmup_steps=200, warmup_scheme=t2t\n",
+            "  decay_scheme=luong234, start_decay_step=110468, decay_steps 13808, decay_factor 0.5\n",
+            "INFO:tensorflow:Done calling model_fn.\n",
+            "INFO:tensorflow:Graph was finalized.\n",
+            "2021-08-26 21:12:08.807161: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
+            "2021-08-26 21:12:08.811073: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz\n",
+            "2021-08-26 21:12:08.811304: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x55f246af0aa0 executing computations on platform Host. Devices:\n",
+            "2021-08-26 21:12:08.811341: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): <undefined>, <undefined>\n",
+            "2021-08-26 21:12:08.884223: W tensorflow/core/framework/allocator.cc:124] Allocation of 50331648 exceeds 10% of system memory.\n",
+            "2021-08-26 21:12:08.884575: W tensorflow/core/framework/allocator.cc:124] Allocation of 132382720 exceeds 10% of system memory.\n",
+            "2021-08-26 21:12:08.991523: W tensorflow/core/framework/allocator.cc:124] Allocation of 50331648 exceeds 10% of system memory.\n",
+            "2021-08-26 21:12:09.188681: W tensorflow/core/framework/allocator.cc:124] Allocation of 50331648 exceeds 10% of system memory.\n",
+            "2021-08-26 21:12:09.390475: W tensorflow/core/framework/allocator.cc:124] Allocation of 50331648 exceeds 10% of system memory.\n",
+            "INFO:tensorflow:Running local_init_op.\n",
+            "2021-08-26 21:12:10.193341: I tensorflow/core/kernels/lookup_util.cc:376] Table trying to initialize from file results/vocab.bpe.32000.en is already initialized.\n",
+            "INFO:tensorflow:Done running local_init_op.\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZeVIs8kI3-_A"
+      },
+      "source": [
+        "## Start translation.\n",
+        "\n",
+        "After the training is done, you can translate custom sentences with the checkpoint saved during the training.\n",
+        "```\n",
+        "echo \"The quick brown fox jumps over the lazy dog\" >file.txt python nmt.py --output_dir=results --mode=translate --translate-file=file.txt cat file.txt.trans\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "q3viIchE3-_B"
+      },
+      "source": [
+        "!echo \"The quick brown fox jumps over the lazy dog\" >file.txt python nmt.py --output_dir=results --mode=translate --translate-file=file.txt cat file.txt.trans"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ds6BgVBd3-_B"
+      },
+      "source": [
+        "## Other command options\n",
+        "To see the full list of available options and their descriptions, use the -h or --help command line option, for example:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "VvdEteL43-_B",
+        "outputId": "61b1de4f-2149-49d2-9b45-7ee09ffc61df"
+      },
+      "source": [
+        "!python nmt.py --help\n"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2021-06-11 04:01:29.489201: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.11.0\n",
+            "WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.\n",
+            "WARNING:tensorflow:\n",
+            "The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
+            "For more information, please see:\n",
+            "  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
+            "  * https://github.com/tensorflow/addons\n",
+            "  * https://github.com/tensorflow/io (for I/O related ops)\n",
+            "If you depend on functionality not listed there, please file an issue.\n",
+            "\n",
+            "WARNING:tensorflow:From /workspace/gnmt/attention_wrapper.py:554: The name tf.nn.rnn_cell.RNNCell is deprecated. Please use tf.compat.v1.nn.rnn_cell.RNNCell instead.\n",
+            "\n",
+            "WARNING:tensorflow:From /workspace/gnmt/benchmark_hooks.py:24: The name tf.train.SessionRunHook is deprecated. Please use tf.estimator.SessionRunHook instead.\n",
+            "\n",
+            "usage: nmt.py [-h] [--num_units NUM_UNITS] [--num_layers NUM_LAYERS]\n",
+            "              [--num_encoder_layers NUM_ENCODER_LAYERS]\n",
+            "              [--num_decoder_layers NUM_DECODER_LAYERS]\n",
+            "              [--encoder_type ENCODER_TYPE] [--residual [RESIDUAL]]\n",
+            "              [--time_major [TIME_MAJOR]]\n",
+            "              [--num_embeddings_partitions NUM_EMBEDDINGS_PARTITIONS]\n",
+            "              [--attention ATTENTION]\n",
+            "              [--attention_architecture ATTENTION_ARCHITECTURE]\n",
+            "              [--output_attention [OUTPUT_ATTENTION]]\n",
+            "              [--pass_hidden_state [PASS_HIDDEN_STATE]]\n",
+            "              [--optimizer OPTIMIZER] [--learning_rate LEARNING_RATE]\n",
+            "              [--warmup_steps WARMUP_STEPS] [--warmup_scheme WARMUP_SCHEME]\n",
+            "              [--decay_scheme DECAY_SCHEME]\n",
+            "              [--max_train_epochs MAX_TRAIN_EPOCHS]\n",
+            "              [--target_bleu TARGET_BLEU]\n",
+            "              [--colocate_gradients_with_ops [COLOCATE_GRADIENTS_WITH_OPS]]\n",
+            "              [--label_smoothing LABEL_SMOOTHING] [--init_op INIT_OP]\n",
+            "              [--init_weight INIT_WEIGHT] [--src SRC] [--tgt TGT]\n",
+            "              [--data_dir DATA_DIR] [--train_prefix TRAIN_PREFIX]\n",
+            "              [--test_prefix TEST_PREFIX] [--translate_file TRANSLATE_FILE]\n",
+            "              [--output_dir OUTPUT_DIR] [--vocab_prefix VOCAB_PREFIX]\n",
+            "              [--embed_prefix EMBED_PREFIX] [--sos SOS] [--eos EOS]\n",
+            "              [--share_vocab [SHARE_VOCAB]]\n",
+            "              [--check_special_token CHECK_SPECIAL_TOKEN]\n",
+            "              [--src_max_len SRC_MAX_LEN] [--tgt_max_len TGT_MAX_LEN]\n",
+            "              [--src_max_len_infer SRC_MAX_LEN_INFER]\n",
+            "              [--tgt_max_len_infer TGT_MAX_LEN_INFER] [--unit_type UNIT_TYPE]\n",
+            "              [--forget_bias FORGET_BIAS] [--dropout DROPOUT]\n",
+            "              [--max_gradient_norm MAX_GRADIENT_NORM]\n",
+            "              [--batch_size BATCH_SIZE] [--num_buckets NUM_BUCKETS]\n",
+            "              [--subword_option {,bpe,spm}]\n",
+            "              [--use_char_encode USE_CHAR_ENCODE]\n",
+            "              [--save_checkpoints_steps SAVE_CHECKPOINTS_STEPS]\n",
+            "              [--log_step_count_steps LOG_STEP_COUNT_STEPS]\n",
+            "              [--num_gpus NUM_GPUS] [--hparams_path HPARAMS_PATH]\n",
+            "              [--random_seed RANDOM_SEED] [--language_model [LANGUAGE_MODEL]]\n",
+            "              [--ckpt CKPT] [--infer_batch_size INFER_BATCH_SIZE]\n",
+            "              [--detokenizer_file DETOKENIZER_FILE]\n",
+            "              [--tokenizer_file TOKENIZER_FILE]\n",
+            "              [--infer_mode {greedy,beam_search}] [--beam_width BEAM_WIDTH]\n",
+            "              [--length_penalty_weight LENGTH_PENALTY_WEIGHT]\n",
+            "              [--coverage_penalty_weight COVERAGE_PENALTY_WEIGHT]\n",
+            "              [--num_workers NUM_WORKERS] [--amp]\n",
+            "              [--use_fastmath USE_FASTMATH] [--use_fp16 USE_FP16]\n",
+            "              [--fp16_loss_scale FP16_LOSS_SCALE]\n",
+            "              [--enable_auto_loss_scale ENABLE_AUTO_LOSS_SCALE]\n",
+            "              [--fp16_inc_loss_scale_every_n FP16_INC_LOSS_SCALE_EVERY_N]\n",
+            "              [--check_tower_loss_numerics CHECK_TOWER_LOSS_NUMERICS]\n",
+            "              [--use_fp32_batch_matmul USE_FP32_BATCH_MATMUL]\n",
+            "              [--force_inputs_padding FORCE_INPUTS_PADDING]\n",
+            "              [--use_xla USE_XLA] [--xla_compile XLA_COMPILE]\n",
+            "              [--use_autojit_xla USE_AUTOJIT_XLA]\n",
+            "              [--use_pintohost_optimizer USE_PINTOHOST_OPTIMIZER]\n",
+            "              [--use_cudnn_lstm USE_CUDNN_LSTM]\n",
+            "              [--use_loose_bidi_cudnn_lstm USE_LOOSE_BIDI_CUDNN_LSTM]\n",
+            "              [--use_fused_lstm USE_FUSED_LSTM]\n",
+            "              [--use_fused_lstm_dec USE_FUSED_LSTM_DEC]\n",
+            "              [--gpu_indices GPU_INDICES]\n",
+            "              [--parallel_iterations PARALLEL_ITERATIONS]\n",
+            "              [--use_dist_strategy USE_DIST_STRATEGY]\n",
+            "              [--hierarchical_copy HIERARCHICAL_COPY]\n",
+            "              [--network_topology {NetworkTopology.DGX1,NetworkTopology.GCP_V100}]\n",
+            "              [--use_block_lstm USE_BLOCK_LSTM] [--use_defun USE_DEFUN]\n",
+            "              [--gradient_repacking GRADIENT_REPACKING]\n",
+            "              [--compact_gradient_transfer COMPACT_GRADIENT_TRANSFER]\n",
+            "              [--all_reduce_spec ALL_REDUCE_SPEC]\n",
+            "              [--agg_small_grads_max_bytes AGG_SMALL_GRADS_MAX_BYTES]\n",
+            "              [--agg_small_grads_max_group AGG_SMALL_GRADS_MAX_GROUP]\n",
+            "              [--allreduce_merge_scope ALLREDUCE_MERGE_SCOPE]\n",
+            "              [--local_parameter_device LOCAL_PARAMETER_DEVICE]\n",
+            "              [--use_resource_vars USE_RESOURCE_VARS] [--debug DEBUG]\n",
+            "              [--debug_num_train_steps DEBUG_NUM_TRAIN_STEPS]\n",
+            "              [--show_metrics SHOW_METRICS] [--clip_grads CLIP_GRADS]\n",
+            "              [--profile PROFILE] [--profile_save_steps PROFILE_SAVE_STEPS]\n",
+            "              [--use_dynamic_rnn USE_DYNAMIC_RNN]\n",
+            "              [--use_synthetic_data USE_SYNTHETIC_DATA]\n",
+            "              [--mode {train_and_eval,infer,translate}]\n",
+            "\n",
+            "optional arguments:\n",
+            "  -h, --help            show this help message and exit\n",
+            "  --num_units NUM_UNITS\n",
+            "                        Network size.\n",
+            "  --num_layers NUM_LAYERS\n",
+            "                        Network depth.\n",
+            "  --num_encoder_layers NUM_ENCODER_LAYERS\n",
+            "                        Encoder depth, equal to num_layers if None.\n",
+            "  --num_decoder_layers NUM_DECODER_LAYERS\n",
+            "                        Decoder depth, equal to num_layers if None.\n",
+            "  --encoder_type ENCODER_TYPE\n",
+            "                        uni | bi | gnmt. For bi, we build num_encoder_layers/2\n",
+            "                        bi-directional layers. For gnmt, we build 1 bi-\n",
+            "                        directional layer, and (num_encoder_layers - 1) uni-\n",
+            "                        directional layers.\n",
+            "  --residual [RESIDUAL]\n",
+            "                        Whether to add residual connections.\n",
+            "  --time_major [TIME_MAJOR]\n",
+            "                        Whether to use time-major mode for dynamic RNN.\n",
+            "  --num_embeddings_partitions NUM_EMBEDDINGS_PARTITIONS\n",
+            "                        Number of partitions for embedding vars.\n",
+            "  --attention ATTENTION\n",
+            "                        luong | scaled_luong | bahdanau | normed_bahdanau or\n",
+            "                        set to \"\" for no attention\n",
+            "  --attention_architecture ATTENTION_ARCHITECTURE\n",
+            "                        standard | gnmt | gnmt_v2. standard: use top layer to\n",
+            "                        compute attention. gnmt: GNMT style of computing\n",
+            "                        attention, use previous bottom layer to compute\n",
+            "                        attention. gnmt_v2: similar to gnmt, but use current\n",
+            "                        bottom layer to compute attention.\n",
+            "  --output_attention [OUTPUT_ATTENTION]\n",
+            "                        Only used in standard attention_architecture. Whether\n",
+            "                        use attention as the cell output at each timestep. .\n",
+            "  --pass_hidden_state [PASS_HIDDEN_STATE]\n",
+            "                        Whether to pass encoder's hidden state to decoder when\n",
+            "                        using an attention based model.\n",
+            "  --optimizer OPTIMIZER\n",
+            "                        sgd | adam\n",
+            "  --learning_rate LEARNING_RATE\n",
+            "                        Learning rate. Adam: 0.001 | 0.0001\n",
+            "  --warmup_steps WARMUP_STEPS\n",
+            "                        How many steps we inverse-decay learning.\n",
+            "  --warmup_scheme WARMUP_SCHEME\n",
+            "                        How to warmup learning rates. Options include: t2t:\n",
+            "                        Tensor2Tensor's way, start with lr 100 times smaller,\n",
+            "                        then exponentiate until the specified lr.\n",
+            "  --decay_scheme DECAY_SCHEME\n",
+            "                        How we decay learning rate. Options include: luong234:\n",
+            "                        after 2/3 num train steps, we start halving the\n",
+            "                        learning rate for 4 times before finishing. luong5:\n",
+            "                        after 1/2 num train steps, we start halving the\n",
+            "                        learning rate for 5 times before finishing. luong10:\n",
+            "                        after 1/2 num train steps, we start halving the\n",
+            "                        learning rate for 10 times before finishing.\n",
+            "  --max_train_epochs MAX_TRAIN_EPOCHS\n",
+            "                        Max number of epochs.\n",
+            "  --target_bleu TARGET_BLEU\n",
+            "                        Target bleu.\n",
+            "  --colocate_gradients_with_ops [COLOCATE_GRADIENTS_WITH_OPS]\n",
+            "                        Whether try colocating gradients with corresponding op\n",
+            "  --label_smoothing LABEL_SMOOTHING\n",
+            "                        If nonzero, smooth the labels towards 1/num_classes.\n",
+            "  --init_op INIT_OP     uniform | glorot_normal | glorot_uniform\n",
+            "  --init_weight INIT_WEIGHT\n",
+            "                        for uniform init_op, initialize weights between\n",
+            "                        [-this, this].\n",
+            "  --src SRC             Source suffix, e.g., en.\n",
+            "  --tgt TGT             Target suffix, e.g., de.\n",
+            "  --data_dir DATA_DIR   Training/eval data directory.\n",
+            "  --train_prefix TRAIN_PREFIX\n",
+            "                        Train prefix, expect files with src/tgt suffixes.\n",
+            "  --test_prefix TEST_PREFIX\n",
+            "                        Test prefix, expect files with src/tgt suffixes.\n",
+            "  --translate_file TRANSLATE_FILE\n",
+            "                        File to translate, works only with translate mode\n",
+            "  --output_dir OUTPUT_DIR\n",
+            "                        Store log/model files.\n",
+            "  --vocab_prefix VOCAB_PREFIX\n",
+            "                        Vocab prefix, expect files with src/tgt suffixes.\n",
+            "  --embed_prefix EMBED_PREFIX\n",
+            "                        Pretrained embedding prefix, expect files with src/tgt\n",
+            "                        suffixes. The embedding files should be Glove\n",
+            "                        formatted txt files.\n",
+            "  --sos SOS             Start-of-sentence symbol.\n",
+            "  --eos EOS             End-of-sentence symbol.\n",
+            "  --share_vocab [SHARE_VOCAB]\n",
+            "                        Whether to use the source vocab and embeddings for\n",
+            "                        both source and target.\n",
+            "  --check_special_token CHECK_SPECIAL_TOKEN\n",
+            "                        Whether check special sos, eos, unk tokens exist in\n",
+            "                        the vocab files.\n",
+            "  --src_max_len SRC_MAX_LEN\n",
+            "                        Max length of src sequences during training (including\n",
+            "                        EOS).\n",
+            "  --tgt_max_len TGT_MAX_LEN\n",
+            "                        Max length of tgt sequences during training (including\n",
+            "                        BOS).\n",
+            "  --src_max_len_infer SRC_MAX_LEN_INFER\n",
+            "                        Max length of src sequences during inference\n",
+            "                        (including EOS).\n",
+            "  --tgt_max_len_infer TGT_MAX_LEN_INFER\n",
+            "                        Max length of tgt sequences during inference\n",
+            "                        (including BOS). Also use to restrict the maximum\n",
+            "                        decoding length.\n",
+            "  --unit_type UNIT_TYPE\n",
+            "                        lstm | gru | layer_norm_lstm | nas\n",
+            "  --forget_bias FORGET_BIAS\n",
+            "                        Forget bias for BasicLSTMCell.\n",
+            "  --dropout DROPOUT     Dropout rate (not keep_prob)\n",
+            "  --max_gradient_norm MAX_GRADIENT_NORM\n",
+            "                        Clip gradients to this norm.\n",
+            "  --batch_size BATCH_SIZE\n",
+            "                        Total batch size.\n",
+            "  --num_buckets NUM_BUCKETS\n",
+            "                        Put data into similar-length buckets (only for\n",
+            "                        training).\n",
+            "  --subword_option {,bpe,spm}\n",
+            "                        Set to bpe or spm to activate subword desegmentation.\n",
+            "  --use_char_encode USE_CHAR_ENCODE\n",
+            "                        Whether to split each word or bpe into character, and\n",
+            "                        then generate the word-level representation from the\n",
+            "                        character reprentation.\n",
+            "  --save_checkpoints_steps SAVE_CHECKPOINTS_STEPS\n",
+            "                        save_checkpoints_steps\n",
+            "  --log_step_count_steps LOG_STEP_COUNT_STEPS\n",
+            "                        The frequency, in number of global steps, that the\n",
+            "                        global step and the loss will be logged during\n",
+            "                        training\n",
+            "  --num_gpus NUM_GPUS   Number of gpus in each worker.\n",
+            "  --hparams_path HPARAMS_PATH\n",
+            "                        Path to standard hparams json file that\n",
+            "                        overrideshparams values from FLAGS.\n",
+            "  --random_seed RANDOM_SEED\n",
+            "                        Random seed (>0, set a specific seed).\n",
+            "  --language_model [LANGUAGE_MODEL]\n",
+            "                        True to train a language model, ignoring encoder\n",
+            "  --ckpt CKPT           Checkpoint file to load a model for inference.\n",
+            "                        (defaults to newest checkpoint)\n",
+            "  --infer_batch_size INFER_BATCH_SIZE\n",
+            "                        Batch size for inference mode.\n",
+            "  --detokenizer_file DETOKENIZER_FILE\n",
+            "                        Detokenizer script file. Default: DATA_DIR/mosesdecode\n",
+            "                        r/scripts/tokenizer/detokenizer.perl\n",
+            "  --tokenizer_file TOKENIZER_FILE\n",
+            "                        Tokenizer script file. Default:\n",
+            "                        DATA_DIR/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
+            "  --infer_mode {greedy,beam_search}\n",
+            "                        Which type of decoder to use during inference.\n",
+            "  --beam_width BEAM_WIDTH\n",
+            "                        beam width when using beam search decoder. If 0, use\n",
+            "                        standard decoder with greedy helper.\n",
+            "  --length_penalty_weight LENGTH_PENALTY_WEIGHT\n",
+            "                        Length penalty for beam search.\n",
+            "  --coverage_penalty_weight COVERAGE_PENALTY_WEIGHT\n",
+            "                        Coverage penalty for beam search.\n",
+            "  --num_workers NUM_WORKERS\n",
+            "                        Number of workers (inference only).\n",
+            "  --amp                 use amp for training and inference\n",
+            "  --use_fastmath USE_FASTMATH\n",
+            "                        use_fastmath for training and inference\n",
+            "  --use_fp16 USE_FP16   use_fp16 for training and inference\n",
+            "  --fp16_loss_scale FP16_LOSS_SCALE\n",
+            "                        If fp16 is enabled, the loss is multiplied by this\n",
+            "                        amount right before gradients are computed, then each\n",
+            "                        gradient is divided by this amount. Mathematically,\n",
+            "                        this has no effect, but it helps avoid fp16 underflow.\n",
+            "                        Set to 1 to effectively disable.\n",
+            "  --enable_auto_loss_scale ENABLE_AUTO_LOSS_SCALE\n",
+            "                        If True and use_fp16 is True, automatically adjust the\n",
+            "                        loss scale during training.\n",
+            "  --fp16_inc_loss_scale_every_n FP16_INC_LOSS_SCALE_EVERY_N\n",
+            "                        If fp16 is enabled and enable_auto_loss_scale is True,\n",
+            "                        increase the loss scale every n steps.\n",
+            "  --check_tower_loss_numerics CHECK_TOWER_LOSS_NUMERICS\n",
+            "                        whether to check tower loss numerics\n",
+            "  --use_fp32_batch_matmul USE_FP32_BATCH_MATMUL\n",
+            "                        Whether to use fp32 batch matmul\n",
+            "  --force_inputs_padding FORCE_INPUTS_PADDING\n",
+            "                        Force padding input batch to src_max_len and\n",
+            "                        tgt_max_len\n",
+            "  --use_xla USE_XLA     Use xla to compile a few selected locations, mostly\n",
+            "                        Defuns.\n",
+            "  --xla_compile XLA_COMPILE\n",
+            "                        Use xla.compile() for each tower's fwd and bak pass.\n",
+            "  --use_autojit_xla USE_AUTOJIT_XLA\n",
+            "                        Use auto jit xla.\n",
+            "  --use_pintohost_optimizer USE_PINTOHOST_OPTIMIZER\n",
+            "                        whether to use PinToHost optimizer\n",
+            "  --use_cudnn_lstm USE_CUDNN_LSTM\n",
+            "                        whether to use cudnn_lstm for encoder, non residual\n",
+            "                        layers\n",
+            "  --use_loose_bidi_cudnn_lstm USE_LOOSE_BIDI_CUDNN_LSTM\n",
+            "                        whether to use loose bidi cudnn_lstm\n",
+            "  --use_fused_lstm USE_FUSED_LSTM\n",
+            "                        whether to use fused lstm and variant. If enabled,\n",
+            "                        training will use LSTMBlockFusedCell, infer will use\n",
+            "                        LSTMBlockCell when appropriate.\n",
+            "  --use_fused_lstm_dec USE_FUSED_LSTM_DEC\n",
+            "                        whether to use fused lstm for decoder (training only).\n",
+            "  --gpu_indices GPU_INDICES\n",
+            "                        Indices of worker GPUs in ring order\n",
+            "  --parallel_iterations PARALLEL_ITERATIONS\n",
+            "                        number of parallel iterations in dynamic_rnn\n",
+            "  --use_dist_strategy USE_DIST_STRATEGY\n",
+            "                        whether to use distribution strategy\n",
+            "  --hierarchical_copy HIERARCHICAL_COPY\n",
+            "                        Use hierarchical copies. Currently only optimized for\n",
+            "                        use on a DGX-1 with 8 GPUs and may perform poorly on\n",
+            "                        other hardware. Requires --num_gpus > 1, and only\n",
+            "                        recommended when --num_gpus=8\n",
+            "  --network_topology {NetworkTopology.DGX1,NetworkTopology.GCP_V100}\n",
+            "  --use_block_lstm USE_BLOCK_LSTM\n",
+            "                        whether to use block lstm\n",
+            "  --use_defun USE_DEFUN\n",
+            "                        whether to use Defun\n",
+            "  --gradient_repacking GRADIENT_REPACKING\n",
+            "                        Use gradient repacking. Itcurrently only works with\n",
+            "                        replicated mode. At the end ofof each step, it repacks\n",
+            "                        the gradients for more efficientcross-device\n",
+            "                        transportation. A non-zero value specifiesthe number\n",
+            "                        of split packs that will be formed.\n",
+            "  --compact_gradient_transfer COMPACT_GRADIENT_TRANSFER\n",
+            "                        Compact gradient as much as possible for cross-device\n",
+            "                        transfer and aggregation.\n",
+            "  --all_reduce_spec ALL_REDUCE_SPEC\n",
+            "                        A specification of the all_reduce algorithm to be used\n",
+            "                        for reducing gradients. For more details, see\n",
+            "                        parse_all_reduce_spec in variable_mgr.py. An\n",
+            "                        all_reduce_spec has BNF form: int ::= positive whole\n",
+            "                        number g_int ::= int[KkMGT]? alg_spec ::= alg |\n",
+            "                        alg#int range_spec ::= alg_spec | alg_spec/alg_spec\n",
+            "                        spec ::= range_spec | range_spec:g_int:range_spec\n",
+            "                        NOTE: not all syntactically correct constructs are\n",
+            "                        supported. Examples: \"xring\" == use one global ring\n",
+            "                        reduction for all tensors \"pscpu\" == use CPU at worker\n",
+            "                        0 to reduce all tensors \"nccl\" == use NCCL to locally\n",
+            "                        reduce all tensors. Limited to 1 worker. \"nccl/xring\"\n",
+            "                        == locally (to one worker) reduce values using NCCL\n",
+            "                        then ring reduce across workers. \"pscpu:32k:xring\" ==\n",
+            "                        use pscpu algorithm for tensors of size up to 32kB,\n",
+            "                        then xring for larger tensors.\n",
+            "  --agg_small_grads_max_bytes AGG_SMALL_GRADS_MAX_BYTES\n",
+            "                        If > 0, try to aggregate tensors of less than this\n",
+            "                        number of bytes prior to all-reduce.\n",
+            "  --agg_small_grads_max_group AGG_SMALL_GRADS_MAX_GROUP\n",
+            "                        When aggregating small tensors for all-reduce do not\n",
+            "                        aggregate more than this many into one new tensor.\n",
+            "  --allreduce_merge_scope ALLREDUCE_MERGE_SCOPE\n",
+            "                        Establish a name scope around this many gradients\n",
+            "                        prior to creating the all-reduce operations. It may\n",
+            "                        affect the ability of the backend to merge parallel\n",
+            "                        ops.\n",
+            "  --local_parameter_device LOCAL_PARAMETER_DEVICE\n",
+            "                        Device to use as parameter server: cpu or gpu. For\n",
+            "                        distributed training, it can affect where caching of\n",
+            "                        variables happens.\n",
+            "  --use_resource_vars USE_RESOURCE_VARS\n",
+            "                        Use resource variables instead of normal variables.\n",
+            "                        Resource variables are slower, but this option is\n",
+            "                        useful for debugging their performance.\n",
+            "  --debug DEBUG         Debug train and eval\n",
+            "  --debug_num_train_steps DEBUG_NUM_TRAIN_STEPS\n",
+            "                        Num steps to train.\n",
+            "  --show_metrics SHOW_METRICS\n",
+            "                        whether to show detailed metrics\n",
+            "  --clip_grads CLIP_GRADS\n",
+            "                        whether to clip gradients\n",
+            "  --profile PROFILE     If generate profile\n",
+            "  --profile_save_steps PROFILE_SAVE_STEPS\n",
+            "                        Save timeline every N steps.\n",
+            "  --use_dynamic_rnn USE_DYNAMIC_RNN\n",
+            "  --use_synthetic_data USE_SYNTHETIC_DATA\n",
+            "  --mode {train_and_eval,infer,translate}\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Mz84-D1A3-_C"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file

From 9f90ae61d3b8441738cc5d1049189597b21ff49a Mon Sep 17 00:00:00 2001
From: shokoufeh-monjezi <68440803+shokoufeh-monjezi@users.noreply.github.com>
Date: Thu, 26 Aug 2021 17:20:56 -0700
Subject: [PATCH 3/3] Delete GNMT_Tensorflow.ipynb

---
 GNMT_Tensorflow.ipynb | 2835 -----------------------------------------
 1 file changed, 2835 deletions(-)
 delete mode 100644 GNMT_Tensorflow.ipynb

diff --git a/GNMT_Tensorflow.ipynb b/GNMT_Tensorflow.ipynb
deleted file mode 100644
index e4a6f6c..0000000
--- a/GNMT_Tensorflow.ipynb
+++ /dev/null
@@ -1,2835 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.6.9"
-    },
-    "colab": {
-      "name": "GNMT-Tensorflow.ipynb",
-      "provenance": [],
-      "include_colab_link": true
-    },
-    "accelerator": "GPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/shokoufeh-monjezi/flowtron/blob/master/GNMT_Tensorflow.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "SkmdV0cgfmEl"
-      },
-      "source": [
-        "!nvidia-smi\n"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "SXAgXZr8fvZp"
-      },
-      "source": [
-        "The below code check whether a Tensor core GPU is present.\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "E1tQXGOAfp1y"
-      },
-      "source": [
-        "\n",
-        "from tensorflow.python.client import device_lib\n",
-        "\n",
-        "def check_tensor_core_gpu_present():\n",
-        "    local_device_protos = device_lib.list_local_devices()\n",
-        "    for line in local_device_protos:\n",
-        "        if \"compute capability\" in str(line):\n",
-        "            compute_capability = float(line.physical_device_desc.split(\"compute capability: \")[-1])\n",
-        "            if compute_capability>=7.0:\n",
-        "                return True\n",
-        "    \n",
-        "print(\"Tensor Core GPU Present:\", check_tensor_core_gpu_present())\n",
-        "tensor_core_gpu = check_tensor_core_gpu_present()"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "2wISKppw4AI5",
-        "outputId": "a8f66ee1-664a-4712-e2c7-f24fc6f60ecc"
-      },
-      "source": [
-        "!git clone https://github.com/NVIDIA/DeepLearningExamples"
-      ],
-      "execution_count": 1,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Cloning into 'DeepLearningExamples'...\n",
-            "remote: Enumerating objects: 26402, done.\u001b[K\n",
-            "remote: Counting objects: 100% (2053/2053), done.\u001b[K\n",
-            "remote: Compressing objects: 100% (1144/1144), done.\u001b[K\n",
-            "remote: Total 26402 (delta 891), reused 1904 (delta 870), pack-reused 24349\u001b[K\n",
-            "Receiving objects: 100% (26402/26402), 77.00 MiB | 21.83 MiB/s, done.\n",
-            "Resolving deltas: 100% (19399/19399), done.\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "jAaJOkB84UZN",
-        "outputId": "b0826569-70af-49e2-84aa-f0ac5e824e9a"
-      },
-      "source": [
-        "import os\n",
-        "\n",
-        "WORKSPACE_DIR='/content/DeepLearningExamples/TensorFlow/Translation/GNMT'\n",
-        "os.chdir(WORKSPACE_DIR)\n",
-        "print (os.getcwd())"
-      ],
-      "execution_count": 2,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "/content/DeepLearningExamples/TensorFlow/Translation/GNMT\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "g5hUDmYi5Air",
-        "outputId": "29e38306-6b21-4b61-a5c4-d05486c3a7e9"
-      },
-      "source": [
-        "!ls\n"
-      ],
-      "execution_count": 3,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "attention_wrapper.py\testimator.py\t model.py   requirements.txt\n",
-            "beam_search_decoder.py\texamples\t nmt.py     scripts\n",
-            "benchmark_hooks.py\tgnmt_model.py\t NOTICE     utils\n",
-            "block_lstm.py\t\timg\t\t qa\t    variable_mgr\n",
-            "Dockerfile\t\tmodel_helper.py  README.md\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BddCwhGY3--7"
-      },
-      "source": [
-        "### Download and preprocess the dataset.\n",
-        "\n",
-        "The GNMT v2 model was trained on the WMT16 English-German dataset and newstest2014 is used as a testing dataset. We use scripts/wmt16_en_de.sh download script which automatically downloads and preprocesses the training and test datasets. By default, data is downloaded to the data directory.Data will be downloaded to the data directory (on the host). The data directory is mounted to the /workspace/gnmt/data location in the Docker container. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "cmqq94Dv3--9",
-        "outputId": "e7fd21cb-c2fa-408c-9290-3ab8d28fc3b7"
-      },
-      "source": [
-        "pip install git+https://github.com/NVIDIA/dllogger.git"
-      ],
-      "execution_count": 4,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Collecting git+https://github.com/NVIDIA/dllogger.git\n",
-            "  Cloning https://github.com/NVIDIA/dllogger.git to /tmp/pip-req-build-mkyvmrgx\n",
-            "  Running command git clone -q https://github.com/NVIDIA/dllogger.git /tmp/pip-req-build-mkyvmrgx\n",
-            "Building wheels for collected packages: DLLogger\n",
-            "  Building wheel for DLLogger (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for DLLogger: filename=DLLogger-0.1.0-py3-none-any.whl size=5656 sha256=a46ecff807d65f3e475c7ffd49b569bd3ccd85c9999b0f3be65302bcbd60eefa\n",
-            "  Stored in directory: /tmp/pip-ephem-wheel-cache-jod60qin/wheels/db/ba/1b/87515aba93adffc7caccc21c0e93f80b70a857188790ce0436\n",
-            "Successfully built DLLogger\n",
-            "Installing collected packages: DLLogger\n",
-            "Successfully installed DLLogger-0.1.0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "iebyj_up3---",
-        "outputId": "8cab5626-4063-4b76-d550-cb31beeb7a96"
-      },
-      "source": [
-        "!bash scripts/wmt16_en_de.sh"
-      ],
-      "execution_count": 5,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Writing to data/wmt16_de_en. To change this, set the OUTPUT_DIR environment variable.\n",
-            "Downloading Europarl v7. This may take a while...\n",
-            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
-            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100  188M  100  188M    0     0  4299k      0  0:00:44  0:00:44 --:--:-- 4502k\n",
-            "Downloading Common Crawl corpus. This may take a while...\n",
-            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
-            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100  875M  100  875M    0     0  4005k      0  0:03:43  0:03:43 --:--:-- 4423k\n",
-            "Downloading News Commentary v11. This may take a while...\n",
-            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
-            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100 71.6M  100 71.6M    0     0  7876k      0  0:00:09  0:00:09 --:--:-- 10.8M\n",
-            "Downloading dev/test sets\n",
-            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
-            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100 21.7M  100 21.7M    0     0  5082k      0  0:00:04  0:00:04 --:--:-- 5394k\n",
-            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
-            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100 3696k  100 3696k    0     0  1205k      0  0:00:03  0:00:03 --:--:-- 1205k\n",
-            "Extracting all files...\n",
-            "europarl-v7.de-en.de\n",
-            "europarl-v7.de-en.en\n",
-            "commoncrawl.cs-en.annotation\n",
-            "commoncrawl.cs-en.cs\n",
-            "commoncrawl.cs-en.en\n",
-            "commoncrawl.de-en.annotation\n",
-            "commoncrawl.de-en.de\n",
-            "commoncrawl.de-en.en\n",
-            "commoncrawl.es-en.annotation\n",
-            "commoncrawl.es-en.en\n",
-            "commoncrawl.es-en.es\n",
-            "commoncrawl.fr-en.annotation\n",
-            "commoncrawl.fr-en.en\n",
-            "commoncrawl.fr-en.fr\n",
-            "commoncrawl.ru-en.annotation\n",
-            "commoncrawl.ru-en.en\n",
-            "commoncrawl.ru-en.ru\n",
-            "training-parallel-nc-v11/\n",
-            "training-parallel-nc-v11/news-commentary-v11.ru-en.ru\n",
-            "training-parallel-nc-v11/news-commentary-v11.cs-en.en\n",
-            "training-parallel-nc-v11/news-commentary-v11.de-en.de\n",
-            "training-parallel-nc-v11/news-commentary-v11.ru-en.en\n",
-            "training-parallel-nc-v11/news-commentary-v11.cs-en.cs\n",
-            "training-parallel-nc-v11/news-commentary-v11.de-en.en\n",
-            "dev/\n",
-            "dev/newstest2009-ref.fr.sgm\n",
-            "dev/newstest2013.es\n",
-            "dev/newstest2014-deen-src.de.sgm\n",
-            "dev/newstest2015-ruen-src.ru.sgm\n",
-            "dev/newstest2010-ref.de.sgm\n",
-            "dev/newstest2012-src.fr.sgm\n",
-            "dev/newstest2014-ruen-ref.ru.sgm\n",
-            "dev/news-test2008.en\n",
-            "dev/news-test2008.es\n",
-            "dev/newstest2009-ref.hu.sgm\n",
-            "dev/newstest2014-csen-ref.en.sgm\n",
-            "dev/newsdiscussdev2015-enfr-src.en.sgm\n",
-            "dev/newstest2010.cs\n",
-            "dev/news-test2008-src.hu.sgm\n",
-            "dev/.newsdev2014-ref.en.sgm.swp\n",
-            "dev/newstest2011-ref.cs.sgm\n",
-            "dev/newstest2011-ref.fr.sgm\n",
-            "dev/newsdev2016-enro-ref.ro.sgm\n",
-            "dev/newstest2011.cs\n",
-            "dev/newstest2009.es\n",
-            "dev/newstest2011.en\n",
-            "dev/newsdev2015-enfi-src.en.sgm\n",
-            "dev/newstest2013.cs\n",
-            "dev/newstest2012-ref.es.sgm\n",
-            "dev/newstest2014-csen-ref.cs.sgm\n",
-            "dev/newsdev2014-src.hi.sgm\n",
-            "dev/newstest2015-encs-src.en.sgm\n",
-            "dev/newsdev2014-src.en.sgm\n",
-            "dev/newsdev2015-enfi-ref.fi.sgm\n",
-            "dev/newstest2011-ref.es.sgm\n",
-            "dev/newstest2013-src.ru.sgm\n",
-            "dev/newstest2012-src.de.sgm\n",
-            "dev/newsdev2016-tren-ref.en.sgm\n",
-            "dev/newstest2011-src.fr.sgm\n",
-            "dev/newssyscomb2009-src.de.sgm\n",
-            "dev/newstest2012-src.es.sgm\n",
-            "dev/newstest2010-ref.cs.sgm\n",
-            "dev/newstest2014-hien-ref.hi.sgm\n",
-            "dev/newssyscomb2009.de\n",
-            "dev/newstest2011-ref.en.sgm\n",
-            "dev/news-test2008.cs\n",
-            "dev/newstest2010.en\n",
-            "dev/newssyscomb2009.fr\n",
-            "dev/newstest2012-ref.en.sgm\n",
-            "dev/news-test2008.de\n",
-            "dev/newstest2011.de\n",
-            "dev/newstest2012.es\n",
-            "dev/newsdev2016-entr-ref.tr.sgm\n",
-            "dev/newstest2011-ref.de.sgm\n",
-            "dev/newsdev2014-ref.hi.sgm\n",
-            "dev/newstest2013-src.de.sgm\n",
-            "dev/newstest2012-ref.fr.sgm\n",
-            "dev/newstest2009.de\n",
-            "dev/newstest2012.en\n",
-            "dev/news-test2008-ref.cs.sgm\n",
-            "dev/newstest2013-ref.fr.sgm\n",
-            "dev/newsdev2014.hi\n",
-            "dev/newstest2011-src.cs.sgm\n",
-            "dev/newssyscomb2009-src.fr.sgm\n",
-            "dev/newstest2012.ru\n",
-            "dev/newstest2010-ref.es.sgm\n",
-            "dev/newstest2010-src.es.sgm\n",
-            "dev/news-test2008.fr\n",
-            "dev/newstest2009.en\n",
-            "dev/newstest2014-ruen-src.ru.sgm\n",
-            "dev/newssyscomb2009-ref.cs.sgm\n",
-            "dev/newstest2010-src.fr.sgm\n",
-            "dev/newssyscomb2009-src.en.sgm\n",
-            "dev/newstest2015-enru-ref.ru.sgm\n",
-            "dev/newstest2015-ende-ref.de.sgm\n",
-            "dev/newstest2013-ref.ru.sgm\n",
-            "dev/newssyscomb2009-src.it.sgm\n",
-            "dev/newsdiscusstest2015-enfr-src.en.sgm\n",
-            "dev/newstest2015-fien-ref.en.sgm\n",
-            "dev/newstest2010-src.en.sgm\n",
-            "dev/newstest2009.fr\n",
-            "dev/newstest2015-ruen-ref.en.sgm\n",
-            "dev/newstest2013-src.es.sgm\n",
-            "dev/newstest2014-hien-ref.en.sgm\n",
-            "dev/news-test2008-src.en.sgm\n",
-            "dev/newstest2012-ref.cs.sgm\n",
-            "dev/news-test2008-ref.es.sgm\n",
-            "dev/news-test2008-ref.fr.sgm\n",
-            "dev/newstest2014-ruen-ref.en.sgm\n",
-            "dev/news-test2008-src.es.sgm\n",
-            "dev/newstest2014-fren-src.en.sgm\n",
-            "dev/newstest2012-ref.de.sgm\n",
-            "dev/newstest2014-csen-src.cs.sgm\n",
-            "dev/newstest2014-csen-src.en.sgm\n",
-            "dev/newstest2011-src.de.sgm\n",
-            "dev/newssyscomb2009-src.cs.sgm\n",
-            "dev/newstest2015-enfi-ref.fi.sgm\n",
-            "dev/newstest2009-src.it.sgm\n",
-            "dev/newstest2010-src.de.sgm\n",
-            "dev/newstest2009-ref.cs.sgm\n",
-            "dev/newssyscomb2009-ref.es.sgm\n",
-            "dev/newstest2014-deen-src.en.sgm\n",
-            "dev/newsdiscusstest2015-fren-ref.en.sgm\n",
-            "dev/newstest2012.fr\n",
-            "dev/newsdiscusstest2015-enfr-ref.fr.sgm\n",
-            "dev/newsdev2016-enro-src.en.sgm\n",
-            "dev/newstest2009-src.es.sgm\n",
-            "dev/newstest2013-src.fr.sgm\n",
-            "dev/newstest2015-deen-src.de.sgm\n",
-            "dev/newsdev2015-fien-src.fi.sgm\n",
-            "dev/newsdiscusstest2015-fren-src.fr.sgm\n",
-            "dev/newstest2014-ruen-src.en.sgm\n",
-            "dev/newstest2012-src.en.sgm\n",
-            "dev/newstest2013.fr\n",
-            "dev/newstest2015-enru-src.en.sgm\n",
-            "dev/newstest2009-ref.es.sgm\n",
-            "dev/newstest2011.fr\n",
-            "dev/newstest2009-ref.en.sgm\n",
-            "dev/newstest2015-enfi-src.en.sgm\n",
-            "dev/newstest2009-src.xx.sgm\n",
-            "dev/newstest2015-encs-ref.cs.sgm\n",
-            "dev/newstest2013.ru\n",
-            "dev/newstest2009.cs\n",
-            "dev/newsdev2014.en\n",
-            "dev/newstest2014-fren-ref.fr.sgm\n",
-            "dev/news-test2008-ref.en.sgm\n",
-            "dev/newssyscomb2009.es\n",
-            "dev/news-test2008-src.cs.sgm\n",
-            "dev/newsdev2016-roen-src.ro.sgm\n",
-            "dev/.newstest2013-ref.en.sgm.swp\n",
-            "dev/newssyscomb2009-ref.hu.sgm\n",
-            "dev/newstest2010.de\n",
-            "dev/newstest2013-ref.cs.sgm\n",
-            "dev/newstest2013-ref.de.sgm\n",
-            "dev/newstest2009-src.cs.sgm\n",
-            "dev/newssyscomb2009.en\n",
-            "dev/newssyscomb2009-ref.it.sgm\n",
-            "dev/newstest2009-ref.it.sgm\n",
-            "dev/newstest2010-ref.fr.sgm\n",
-            "dev/newstest2015-csen-src.cs.sgm\n",
-            "dev/newsdev2016-entr-src.en.sgm\n",
-            "dev/newstest2010.es\n",
-            "dev/news-test2008-src.de.sgm\n",
-            "dev/newstest2013.en\n",
-            "dev/newsdev2016-roen-ref.en.sgm\n",
-            "dev/newstest2009-src.de.sgm\n",
-            "dev/newstest2010-ref.en.sgm\n",
-            "dev/newstest2011-src.es.sgm\n",
-            "dev/newssyscomb2009-ref.en.sgm\n",
-            "dev/newstest2014-fren-ref.en.sgm\n",
-            "dev/newstest2012.cs\n",
-            "dev/newstest2009-src.hu.sgm\n",
-            "dev/newstest2009-src.fr.sgm\n",
-            "dev/newstest2015-ende-src.en.sgm\n",
-            "dev/newstest2013-src.cs.sgm\n",
-            "dev/newstest2014-hien-src.hi.sgm\n",
-            "dev/news-test2008-ref.hu.sgm\n",
-            "dev/newstest2015-csen-ref.en.sgm\n",
-            "dev/newstest2013-ref.es.sgm\n",
-            "dev/newstest2013-ref.en.sgm\n",
-            "dev/newstest2010-src.cs.sgm\n",
-            "dev/newstest2010.fr\n",
-            "dev/newstest2015-deen-ref.en.sgm\n",
-            "dev/newstest2011.es\n",
-            "dev/newsdev2016-tren-src.tr.sgm\n",
-            "dev/newstest2013.de\n",
-            "dev/newstest2014-fren-src.fr.sgm\n",
-            "dev/newsdiscussdev2015-fren-ref.en.sgm\n",
-            "dev/newsdiscussdev2015-fren-src.fr.sgm\n",
-            "dev/newstest2014-deen-ref.de.sgm\n",
-            "dev/newstest2013-src.en.sgm\n",
-            "dev/newssyscomb2009-ref.fr.sgm\n",
-            "dev/newssyscomb2009-ref.de.sgm\n",
-            "dev/newstest2009-src.en.sgm\n",
-            "dev/newstest2009-ref.de.sgm\n",
-            "dev/newsdiscussdev2015-enfr-ref.fr.sgm\n",
-            "dev/newssyscomb2009.cs\n",
-            "dev/newstest2012-ref.ru.sgm\n",
-            "dev/newstest2014-hien-src.en.sgm\n",
-            "dev/news-test2008-src.fr.sgm\n",
-            "dev/newsdev2015-fien-ref.en.sgm\n",
-            "dev/newsdev2014-ref.en.sgm\n",
-            "dev/newstest2015-fien-src.fi.sgm\n",
-            "dev/news-test2008-ref.de.sgm\n",
-            "dev/newstest2012-src.ru.sgm\n",
-            "dev/newssyscomb2009-src.es.sgm\n",
-            "dev/newssyscomb2009-src.hu.sgm\n",
-            "dev/newstest2014-deen-ref.en.sgm\n",
-            "dev/newstest2012.de\n",
-            "dev/newstest2011-src.en.sgm\n",
-            "dev/newstest2012-src.cs.sgm\n",
-            "test/newstest2016-csen-ref.en.sgm\n",
-            "test/newstest2016-csen-src.cs.sgm\n",
-            "test/newstest2016-deen-ref.en.sgm\n",
-            "test/newstest2016-deen-src.de.sgm\n",
-            "test/newstest2016-encs-ref.cs.sgm\n",
-            "test/newstest2016-encs-src.en.sgm\n",
-            "test/newstest2016-ende-ref.de.sgm\n",
-            "test/newstest2016-ende-src.en.sgm\n",
-            "test/newstest2016-enfi-ref.fi.sgm\n",
-            "test/newstest2016-enfi-src.en.sgm\n",
-            "test/newstest2016-enro-ref.ro.sgm\n",
-            "test/newstest2016-enro-src.en.sgm\n",
-            "test/newstest2016-enru-ref.ru.sgm\n",
-            "test/newstest2016-enru-src.en.sgm\n",
-            "test/newstest2016-entr-ref.tr.sgm\n",
-            "test/newstest2016-entr-src.en.sgm\n",
-            "test/newstest2016-fien-ref.en.sgm\n",
-            "test/newstest2016-fien-src.fi.sgm\n",
-            "test/newstest2016-roen-ref.en.sgm\n",
-            "test/newstest2016-roen-src.ro.sgm\n",
-            "test/newstest2016-ruen-ref.en.sgm\n",
-            "test/newstest2016-ruen-src.ru.sgm\n",
-            "test/newstest2016-tren-ref.en.sgm\n",
-            "test/newstest2016-tren-src.tr.sgm\n",
-            "test/newstestB2016-enfi-ref.fi.sgm\n",
-            "test/newstestB2016-enfi-src.en.sgm\n",
-            "4562102 data/wmt16_de_en/train.en\n",
-            "4562102 data/wmt16_de_en/train.de\n",
-            "Cloning moses for data processing\n",
-            "Cloning into 'data/wmt16_de_en/mosesdecoder'...\n",
-            "remote: Enumerating objects: 148070, done.\u001b[K\n",
-            "remote: Counting objects: 100% (498/498), done.\u001b[K\n",
-            "remote: Compressing objects: 100% (206/206), done.\u001b[K\n",
-            "remote: Total 148070 (delta 315), reused 433 (delta 289), pack-reused 147572\u001b[K\n",
-            "Receiving objects: 100% (148070/148070), 129.86 MiB | 17.29 MiB/s, done.\n",
-            "Resolving deltas: 100% (114341/114341), done.\n",
-            "HEAD is now at 8c5eaa1a1 Merge branch 'RELEASE-4.0' of github.com:jowagner/mosesdecoder\n",
-            "/content/DeepLearningExamples/TensorFlow/Translation/GNMT\n",
-            "Tokenizing data/wmt16_de_en/newstest2009.de...\n",
-            "Tokenizing data/wmt16_de_en/newstest2010.de...\n",
-            "Tokenizing data/wmt16_de_en/newstest2011.de...\n",
-            "Tokenizing data/wmt16_de_en/newstest2012.de...\n",
-            "Tokenizing data/wmt16_de_en/newstest2013.de...\n",
-            "Tokenizing data/wmt16_de_en/newstest2014.de...\n",
-            "Tokenizing data/wmt16_de_en/newstest2015.de...\n",
-            "Tokenizing data/wmt16_de_en/newstest2016.de...\n",
-            "Tokenizing data/wmt16_de_en/train.de...\n",
-            "Tokenizing data/wmt16_de_en/newstest2009.en...\n",
-            "Tokenizing data/wmt16_de_en/newstest2010.en...\n",
-            "Tokenizing data/wmt16_de_en/newstest2011.en...\n",
-            "Tokenizing data/wmt16_de_en/newstest2012.en...\n",
-            "Tokenizing data/wmt16_de_en/newstest2013.en...\n",
-            "Tokenizing data/wmt16_de_en/newstest2014.en...\n",
-            "Tokenizing data/wmt16_de_en/newstest2015.en...\n",
-            "Tokenizing data/wmt16_de_en/newstest2016.en...\n",
-            "Tokenizing data/wmt16_de_en/train.en...\n",
-            "Cleaning data/wmt16_de_en/newstest2009...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2009.de & .en to data/wmt16_de_en/newstest2009.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 2525  Output sentences:  2517\n",
-            "Cleaning data/wmt16_de_en/newstest2009.tok...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2009.tok.de & .en to data/wmt16_de_en/newstest2009.tok.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 2525  Output sentences:  2509\n",
-            "Cleaning data/wmt16_de_en/newstest2010...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2010.de & .en to data/wmt16_de_en/newstest2010.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 2489  Output sentences:  2488\n",
-            "Cleaning data/wmt16_de_en/newstest2010.tok...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2010.tok.de & .en to data/wmt16_de_en/newstest2010.tok.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 2489  Output sentences:  2485\n",
-            "Cleaning data/wmt16_de_en/newstest2011...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2011.de & .en to data/wmt16_de_en/newstest2011.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 3003  Output sentences:  3002\n",
-            "Cleaning data/wmt16_de_en/newstest2011.tok...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2011.tok.de & .en to data/wmt16_de_en/newstest2011.tok.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 3003  Output sentences:  2994\n",
-            "Cleaning data/wmt16_de_en/newstest2012...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2012.de & .en to data/wmt16_de_en/newstest2012.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 3003  Output sentences:  3000\n",
-            "Cleaning data/wmt16_de_en/newstest2012.tok...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2012.tok.de & .en to data/wmt16_de_en/newstest2012.tok.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 3003  Output sentences:  2996\n",
-            "Cleaning data/wmt16_de_en/newstest2013...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2013.de & .en to data/wmt16_de_en/newstest2013.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 3000  Output sentences:  2997\n",
-            "Cleaning data/wmt16_de_en/newstest2013.tok...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2013.tok.de & .en to data/wmt16_de_en/newstest2013.tok.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 3000  Output sentences:  2990\n",
-            "Cleaning data/wmt16_de_en/newstest2014...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2014.de & .en to data/wmt16_de_en/newstest2014.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 3003  Output sentences:  3003\n",
-            "Cleaning data/wmt16_de_en/newstest2014.tok...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2014.tok.de & .en to data/wmt16_de_en/newstest2014.tok.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 3003  Output sentences:  3002\n",
-            "Cleaning data/wmt16_de_en/newstest2015...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2015.de & .en to data/wmt16_de_en/newstest2015.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 2169  Output sentences:  2169\n",
-            "Cleaning data/wmt16_de_en/newstest2015.tok...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2015.tok.de & .en to data/wmt16_de_en/newstest2015.tok.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 2169  Output sentences:  2167\n",
-            "Cleaning data/wmt16_de_en/newstest2016...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2016.de & .en to data/wmt16_de_en/newstest2016.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 2999  Output sentences:  2998\n",
-            "Cleaning data/wmt16_de_en/newstest2016.tok...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/newstest2016.tok.de & .en to data/wmt16_de_en/newstest2016.tok.clean, cutoff 1-80, ratio 9\n",
-            "\n",
-            "Input sentences: 2999  Output sentences:  2997\n",
-            "Cleaning data/wmt16_de_en/train...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/train.de & .en to data/wmt16_de_en/train.clean, cutoff 1-80, ratio 9\n",
-            "..........(100000)..........(200000)..........(300000)..........(400000)..........(500000)..........(600000)..........(700000)..........(800000)..........(900000)..........(1000000)..........(1100000)..........(1200000)..........(1300000)..........(1400000)..........(1500000)..........(1600000)..........(1700000)..........(1800000)..........(1900000)..........(2000000)..........(2100000)..........(2200000)..........(2300000)..........(2400000)..........(2500000)..........(2600000)..........(2700000)..........(2800000)..........(2900000)..........(3000000)..........(3100000)..........(3200000)..........(3300000)..........(3400000)..........(3500000)..........(3600000)..........(3700000)..........(3800000)..........(3900000)..........(4000000)..........(4100000)..........(4200000)..........(4300000)..........(4400000)..........(4500000)......\n",
-            "Input sentences: 4562102  Output sentences:  4524868\n",
-            "Cleaning data/wmt16_de_en/train.tok...\n",
-            "clean-corpus.perl: processing data/wmt16_de_en/train.tok.de & .en to data/wmt16_de_en/train.tok.clean, cutoff 1-80, ratio 9\n",
-            "..........(100000)..........(200000)..........(300000)..........(400000)..........(500000)..........(600000)..........(700000)..........(800000)..........(900000)..........(1000000)..........(1100000)..........(1200000)..........(1300000)..........(1400000)..........(1500000)..........(1600000)..........(1700000)..........(1800000)..........(1900000)..........(2000000)..........(2100000)..........(2200000)..........(2300000)..........(2400000)..........(2500000)..........(2600000)..........(2700000)..........(2800000)..........(2900000)..........(3000000)..........(3100000)..........(3200000)..........(3300000)..........(3400000)..........(3500000)..........(3600000)..........(3700000)..........(3800000)..........(3900000)..........(4000000)..........(4100000)..........(4200000)..........(4300000)..........(4400000)..........(4500000)......\n",
-            "Input sentences: 4562102  Output sentences:  4500966\n",
-            "Processed 1 lines\n",
-            "Processed 100001 lines\n",
-            "Processed 200001 lines\n",
-            "Processed 300001 lines\n",
-            "Processed 400001 lines\n",
-            "Processed 500001 lines\n",
-            "Processed 600001 lines\n",
-            "Processed 700001 lines\n",
-            "Processed 800001 lines\n",
-            "Processed 900001 lines\n",
-            "Processed 1000001 lines\n",
-            "Processed 1100001 lines\n",
-            "Processed 1200001 lines\n",
-            "Processed 1300001 lines\n",
-            "Processed 1400001 lines\n",
-            "Processed 1500001 lines\n",
-            "Processed 1600001 lines\n",
-            "Processed 1700001 lines\n",
-            "Processed 1800001 lines\n",
-            "Processed 1900001 lines\n",
-            "Processed 2000001 lines\n",
-            "Processed 2100001 lines\n",
-            "Processed 2200001 lines\n",
-            "Processed 2300001 lines\n",
-            "Processed 2400001 lines\n",
-            "Processed 2500001 lines\n",
-            "Processed 2600001 lines\n",
-            "Processed 2700001 lines\n",
-            "Processed 2800001 lines\n",
-            "Processed 2900001 lines\n",
-            "Processed 3000001 lines\n",
-            "Processed 3100001 lines\n",
-            "Processed 3200001 lines\n",
-            "Processed 3300001 lines\n",
-            "Processed 3400001 lines\n",
-            "Processed 3500001 lines\n",
-            "Processed 3600001 lines\n",
-            "Processed 3700001 lines\n",
-            "Processed 3800001 lines\n",
-            "Processed 3900001 lines\n",
-            "Processed 4000001 lines\n",
-            "Processed 4100001 lines\n",
-            "Processed 4200001 lines\n",
-            "Processed 4300001 lines\n",
-            "Processed 4400001 lines\n",
-            "Processed 4500001 lines\n",
-            "Skipped: 432775, Valid: 4068191, Valid ratio 0.9038484183173123\n",
-            "Character frequency: Counter({' ': 95933432, 'e': 52612907, 't': 39531367, 'o': 34443857, 'a': 34315161, 'i': 32288996, 'n': 31316898, 's': 27807191, 'r': 27547036, 'h': 18575168, 'l': 17430932, 'd': 14356421, 'c': 13770269, 'u': 12722046, 'm': 10694282, 'p': 9849316, 'f': 9222652, 'g': 7472957, 'y': 6739113, 'w': 6298874, 'b': 5759405, 'v': 4603232, ',': 4566506, '.': 4522010, '\\n': 4068191, 'k': 2413992, 'T': 1556871, 'I': 1475221, 'C': 1106541, 'S': 1067920, 'A': 1017606, 'E': 997734, '-': 964780, 'M': 909863, 'x': 904216, 'P': 853835, ';': 846421, '0': 823086, '&': 762925, 'q': 705787, '1': 572547, 'W': 556727, 'B': 528579, '2': 483945, 'j': 473533, 'F': 463531, 'R': 456707, 'U': 443074, 'D': 439002, 'H': 429447, 'G': 409219, 'O': 406986, 'L': 386464, 'z': 384202, 'N': 365973, '9': 322935, '(': 321500, ')': 317979, '3': 251733, ':': 232153, '5': 219534, '4': 196479, 'V': 190902, 'K': 155351, '8': 150936, '6': 143849, 'J': 136334, '7': 135778, '?': 128658, '/': 128202, 'Y': 124120, '#': 91426, '!': 89656, '%': 53827, 'X': 37529, 'Z': 37248, 'é': 36739, 'Q': 36501, 'ü': 24350, 'ö': 18333, 'ä': 15833, 'á': 14049, '_': 12835, '+': 12704, '*': 12417, 'ó': 10862, '®': 9919, 'í': 9626, '$': 9368, 'è': 7354, '{': 7175, '}': 6843, 'à': 6610, '=': 6534, '´': 5665, '»': 5040, '@': 4481, 'ñ': 4450, 'ç': 3939, '«': 3757, 'ß': 3713, '°': 3655, '²': 3414, '`': 3308, 'ô': 3008, 'ú': 2761, 'ã': 2587, 'å': 2511, 'ê': 2055, '\\\\': 1984, 'â': 1731, 'Ö': 1673, '·': 1533, '\\xad': 1489, '©': 1483, '\\x92': 1455, 'ø': 1342, '§': 1166, 'ò': 1142, 'Ü': 1121, 'î': 1048, '£': 1036, 'ï': 1005, 'ë': 842, 'æ': 784, '\\x96': 706, 'É': 698, '\\x93': 677, 'º': 638, '\\x94': 628, '~': 587, 'ð': 563, 'Á': 518, '³': 489, 'õ': 487, 'ù': 486, 'Ã': 457, 'Å': 430, '¿': 411, 'ì': 407, 'ý': 399, '½': 374, 'Ø': 367, 'Ä': 362, 'û': 347, 'µ': 295, 'Â': 278, 'Ó': 258, '\\x97': 256, '×': 249, '^': 234, '\\x80': 217, '±': 205, '\\x99': 195, 'Í': 181, '\\x95': 176, '¡': 173, '¥': 154, 'Ñ': 143, 'ÿ': 138, 'Ç': 125, '÷': 116, 'Ú': 113, 'À': 110, '\\x91': 110, '¼': 106, '¹': 102, 'Ê': 98, 'Ý': 86, '¨': 77, 'È': 71, 'þ': 66, 'Î': 66, '\\x84': 61, '¬': 59, 'ª': 58, '¶': 58, 'Ð': 57, 'Ô': 55, 'Þ': 46, 'Õ': 44, 'Ì': 42, 'Ï': 37, '¤': 37, '¾': 34, '\\x82': 33, '¢': 26, 'Ò': 24, 'Æ': 23, 'Û': 19, '\\x9b': 17, '\\x8b': 17, '\\x8a': 13, 'Ë': 12, '¸': 12, '¦': 11, '\\x9a': 11, '¯': 9, 'Ù': 7, '\\x9c': 7, '\\x9f': 6, '\\x8c': 6, '\\x9d': 4, '\\x7f': 3, '\\x8d': 3, '\\x9e': 2, '\\x81': 2, '\\x88': 1, '\\x83': 1})\n",
-            "Processed 1 lines\n",
-            "Skipped: 64, Valid: 5100, Valid ratio 0.9876065065840434\n",
-            "Character frequency: Counter({' ': 104338, 'e': 55556, 't': 40764, 'a': 37614, 'o': 36131, 'n': 32380, 'i': 31842, 's': 30404, 'r': 28677, 'h': 21952, 'l': 17895, 'd': 16517, 'u': 13567, 'c': 13291, 'm': 10677, 'p': 9851, 'g': 9399, 'f': 9179, 'w': 7721, 'y': 7501, 'b': 6281, '\\n': 5100, '.': 5093, ',': 4769, 'v': 4442, 'k': 3167, ';': 3153, '&': 3087, 'q': 2141, 'T': 1863, 'A': 1350, 'I': 1345, 'S': 1335, '0': 1225, '-': 1202, 'M': 1022, 'B': 1007, 'C': 973, 'x': 897, '1': 800, 'H': 783, 'P': 721, 'W': 653, '2': 614, 'R': 612, 'F': 578, 'G': 577, 'D': 553, 'L': 513, 'E': 506, 'j': 504, 'z': 462, 'N': 406, 'U': 371, '5': 353, 'O': 350, '3': 317, ':': 303, '4': 292, '9': 285, 'K': 283, 'J': 266, '6': 202, 'V': 188, '8': 186, '7': 182, '(': 172, ')': 172, 'Y': 119, '?': 93, '$': 88, 'ü': 75, 'ö': 64, '/': 50, 'Q': 45, 'Z': 40, 'X': 38, '£': 30, '%': 30, '!': 27, 'ä': 21, '#': 17, 'é': 10, 'ß': 10, '+': 4, 'ç': 4, 'à': 3, '@': 3, '²': 3, '`': 2, '*': 2, 'á': 2, 'Ö': 1, 'ë': 1, 'í': 1})\n",
-            "Cloning into 'data/wmt16_de_en/subword-nmt'...\n",
-            "remote: Enumerating objects: 580, done.\u001b[K\n",
-            "remote: Counting objects: 100% (4/4), done.\u001b[K\n",
-            "remote: Compressing objects: 100% (4/4), done.\u001b[K\n",
-            "remote: Total 580 (delta 0), reused 1 (delta 0), pack-reused 576\u001b[K\n",
-            "Receiving objects: 100% (580/580), 237.41 KiB | 1.67 MiB/s, done.\n",
-            "Resolving deltas: 100% (349/349), done.\n",
-            "HEAD is now at 48ba99e fix typo in previous commit\n",
-            "/content/DeepLearningExamples/TensorFlow/Translation/GNMT\n",
-            "Learning BPE with merge_ops=32000. This may take a while...\n",
-            "data/wmt16_de_en/subword-nmt/learn_bpe.py:267: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "Apply BPE with merge_ops=32000 to tokenized files...\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2009.tok.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2010.tok.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2011.tok.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2012.tok.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2013.tok.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2014.tok.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2015.tok.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2016.tok.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/train.tok.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2009.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2010.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2011.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2012.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2013.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2014.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2015.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2016.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest_dev.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/train.tok.clean.bpe.32000.en\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2009.tok.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2010.tok.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2011.tok.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2012.tok.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2013.tok.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2014.tok.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2015.tok.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2016.tok.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/train.tok.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2009.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2010.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2011.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2012.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2013.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2014.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2015.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest2016.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/newstest_dev.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:331: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "data/wmt16_de_en/subword-nmt/apply_bpe.py:348: ResourceWarning: unclosed file <_io.TextIOWrapper name='data/wmt16_de_en/bpe.32000' mode='r' encoding='UTF-8'>\n",
-            "  args.codes = codecs.open(args.codes.name, encoding='utf-8')\n",
-            "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n",
-            "data/wmt16_de_en/train.tok.clean.bpe.32000.de\n",
-            "data/wmt16_de_en/subword-nmt/get_vocab.py:60: DeprecationWarning: this script's location has moved to /content/DeepLearningExamples/TensorFlow/Translation/GNMT/data/wmt16_de_en/subword-nmt/subword_nmt. This symbolic link will be removed in a future version. Please point to the new location, or install the package and use the command 'subword-nmt'\n",
-            "  DeprecationWarning\n",
-            "All done.\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "LgEi29Hq3--_"
-      },
-      "source": [
-        "## Start Training.\n",
-        "\n",
-        "All results and logs are saved to the results directory (on the host) or to the /workspace/gnmt/results directory (in the container). The training script saves the checkpoint after every training epoch and after every 2000 training steps within each epoch. You can modify the results directory using the --output_dir argument.\n",
-        "\n",
-        "To launch mixed precision training on 1 GPU, run:\n",
-        "```\n",
-        "python nmt.py --output_dir=results --batch_size=128 --learning_rate=5e-4 --amp\n",
-        "```\n",
-        "To launch mixed precision training on 8 GPUs, run:\n",
-        "```\n",
-        "python nmt.py --output_dir=results --batch_size=1024 --num_gpus=8 --learning_rate=2e-3 --amp\n",
-        "```\n",
-        "To launch FP32 (TF32 on NVIDIA Ampere GPUs) training on 1 GPU, run:\n",
-        "```\n",
-        "python nmt.py --output_dir=results --batch_size=128 --learning_rate=5e-4\n",
-        "```\n",
-        "To launch FP32 (TF32 on NVIDIA Ampere GPUs) training on 8 GPUs, run:\n",
-        "```\n",
-        "python nmt.py --output_dir=results --batch_size=1024 --num_gpus=8 --learning_rate=2e-3\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "4NXf8TF1Rbn_",
-        "outputId": "ded7483f-3b89-47cc-ca73-5135a6752e6c",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        }
-      },
-      "source": [
-        "!pip uninstall tensorflow"
-      ],
-      "execution_count": 8,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Found existing installation: tensorflow 2.6.0\n",
-            "Uninstalling tensorflow-2.6.0:\n",
-            "  Would remove:\n",
-            "    /usr/local/bin/estimator_ckpt_converter\n",
-            "    /usr/local/bin/import_pb_to_tensorboard\n",
-            "    /usr/local/bin/saved_model_cli\n",
-            "    /usr/local/bin/tensorboard\n",
-            "    /usr/local/bin/tf_upgrade_v2\n",
-            "    /usr/local/bin/tflite_convert\n",
-            "    /usr/local/bin/toco\n",
-            "    /usr/local/bin/toco_from_protos\n",
-            "    /usr/local/lib/python3.7/dist-packages/tensorflow-2.6.0.dist-info/*\n",
-            "    /usr/local/lib/python3.7/dist-packages/tensorflow/*\n",
-            "Proceed (y/n)? y\n",
-            "  Successfully uninstalled tensorflow-2.6.0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "NbC0L7NsSIAC",
-        "outputId": "f0687819-7df7-42b2-aa8e-357e706fe725",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        }
-      },
-      "source": [
-        "!pip install tensorflow==1.13.2"
-      ],
-      "execution_count": 9,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Collecting tensorflow==1.13.2\n",
-            "  Downloading tensorflow-1.13.2-cp37-cp37m-manylinux1_x86_64.whl (92.7 MB)\n",
-            "\u001b[K     |████████████████████████████████| 92.7 MB 11 kB/s \n",
-            "\u001b[?25hRequirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.1.0)\n",
-            "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (3.17.3)\n",
-            "Requirement already satisfied: absl-py>=0.1.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (0.12.0)\n",
-            "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (0.8.1)\n",
-            "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.19.5)\n",
-            "Collecting tensorboard<1.14.0,>=1.13.0\n",
-            "  Downloading tensorboard-1.13.1-py3-none-any.whl (3.2 MB)\n",
-            "\u001b[K     |████████████████████████████████| 3.2 MB 54.6 MB/s \n",
-            "\u001b[?25hCollecting tensorflow-estimator<1.14.0rc0,>=1.13.0\n",
-            "  Downloading tensorflow_estimator-1.13.0-py2.py3-none-any.whl (367 kB)\n",
-            "\u001b[K     |████████████████████████████████| 367 kB 49.1 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.15.0)\n",
-            "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.1.2)\n",
-            "Collecting keras-applications>=1.0.6\n",
-            "  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)\n",
-            "\u001b[K     |████████████████████████████████| 50 kB 6.5 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (0.37.0)\n",
-            "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (1.39.0)\n",
-            "Requirement already satisfied: gast>=0.2.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow==1.13.2) (0.4.0)\n",
-            "Requirement already satisfied: h5py in /usr/local/lib/python3.7/dist-packages (from keras-applications>=1.0.6->tensorflow==1.13.2) (3.1.0)\n",
-            "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (1.0.1)\n",
-            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (3.3.4)\n",
-            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (4.6.4)\n",
-            "Collecting mock>=2.0.0\n",
-            "  Downloading mock-4.0.3-py3-none-any.whl (28 kB)\n",
-            "Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py->keras-applications>=1.0.6->tensorflow==1.13.2) (1.5.2)\n",
-            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (3.5.0)\n",
-            "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard<1.14.0,>=1.13.0->tensorflow==1.13.2) (3.7.4.3)\n",
-            "Installing collected packages: mock, tensorflow-estimator, tensorboard, keras-applications, tensorflow\n",
-            "  Attempting uninstall: tensorflow-estimator\n",
-            "    Found existing installation: tensorflow-estimator 2.6.0\n",
-            "    Uninstalling tensorflow-estimator-2.6.0:\n",
-            "      Successfully uninstalled tensorflow-estimator-2.6.0\n",
-            "  Attempting uninstall: tensorboard\n",
-            "    Found existing installation: tensorboard 2.6.0\n",
-            "    Uninstalling tensorboard-2.6.0:\n",
-            "      Successfully uninstalled tensorboard-2.6.0\n",
-            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-            "kapre 0.3.5 requires tensorflow>=2.0.0, but you have tensorflow 1.13.2 which is incompatible.\u001b[0m\n",
-            "Successfully installed keras-applications-1.0.8 mock-4.0.3 tensorboard-1.13.1 tensorflow-1.13.2 tensorflow-estimator-1.13.0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "vK1Cx58G3--_",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "743d00c9-d850-4075-d108-08e395d0ef1c"
-      },
-      "source": [
-        "!python nmt.py --output_dir=results --batch_size=128 --learning_rate=5e-4 --amp\n"
-      ],
-      "execution_count": 10,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
-            "Enabling TF-AMP\n",
-            "use_defun is false for attention\n",
-            "Running train and eval mode.\n",
-            "# Set random seed to 1\n",
-            "# Creating output directory results ...\n",
-            "DLL 2021-08-26 21:07:25.420635 - PARAMETER num_units : 1024  num_layers : 4  num_encoder_layers : None  num_decoder_layers : None  encoder_type : gnmt  residual : True  time_major : True  num_embeddings_partitions : 0  attention : normed_bahdanau  attention_architecture : gnmt_v2  output_attention : True  pass_hidden_state : True  optimizer : adam  learning_rate : 0.0005  warmup_steps : 200  warmup_scheme : t2t  decay_scheme : luong234  max_train_epochs : 6  target_bleu : None  colocate_gradients_with_ops : True  label_smoothing : 0.1  init_op : uniform  init_weight : 0.1  src : en  tgt : de  data_dir : data/wmt16_de_en  train_prefix : train.tok.clean.bpe.32000  test_prefix : newstest2014.tok.bpe.32000  translate_file : None  output_dir : results  vocab_prefix : vocab.bpe.32000  embed_prefix : None  sos : <s>  eos : </s>  share_vocab : True  check_special_token : True  src_max_len : 50  tgt_max_len : 50  src_max_len_infer : None  tgt_max_len_infer : 80  unit_type : lstm  forget_bias : 0.0  dropout : 0.2  max_gradient_norm : 5.0  batch_size : 128  num_buckets : 5  subword_option : bpe  use_char_encode : False  save_checkpoints_steps : 2000  log_step_count_steps : 10  num_gpus : 1  hparams_path : None  random_seed : 1  language_model : False  ckpt : None  infer_batch_size : 128  detokenizer_file : None  tokenizer_file : None  infer_mode : beam_search  beam_width : 5  length_penalty_weight : 0.6  coverage_penalty_weight : 0.1  num_workers : 1  amp : True  use_fastmath : False  use_fp16 : False  fp16_loss_scale : 128  enable_auto_loss_scale : True  fp16_inc_loss_scale_every_n : 128  check_tower_loss_numerics : False  use_fp32_batch_matmul : False  force_inputs_padding : False  use_xla : False  xla_compile : False  use_autojit_xla : False  use_pintohost_optimizer : False  use_cudnn_lstm : False  use_loose_bidi_cudnn_lstm : False  use_fused_lstm : True  use_fused_lstm_dec : False  gpu_indices :   parallel_iterations : 10  use_dist_strategy : False  hierarchical_copy : False  network_topology : dgx1  use_block_lstm : False  use_defun : False  gradient_repacking : 0  compact_gradient_transfer : True  all_reduce_spec : nccl  agg_small_grads_max_bytes : 0  agg_small_grads_max_group : 10  allreduce_merge_scope : 1  local_parameter_device : gpu  use_resource_vars : False  debug : False  debug_num_train_steps : None  show_metrics : True  clip_grads : True  profile : False  profile_save_steps : 10  use_dynamic_rnn : True  use_synthetic_data : False  mode : train_and_eval \n",
-            "# Vocab file data/wmt16_de_en/vocab.bpe.32000.en exists\n",
-            "The first 3 vocab words [,, ., the] are not [<unk>, <s>, </s>]\n",
-            "  using source vocab for target\n",
-            "  agg_small_grads_max_bytes=0\n",
-            "  agg_small_grads_max_group=10\n",
-            "  all_reduce_spec=nccl\n",
-            "  allreduce_merge_scope=1\n",
-            "  amp=True\n",
-            "  attention=normed_bahdanau\n",
-            "  attention_architecture=gnmt_v2\n",
-            "  batch_size=128\n",
-            "  beam_width=5\n",
-            "  best_bleu=0\n",
-            "  best_bleu_dir=results/best_bleu\n",
-            "  check_special_token=True\n",
-            "  check_tower_loss_numerics=False\n",
-            "  ckpt=None\n",
-            "  clip_grads=True\n",
-            "  colocate_gradients_with_ops=True\n",
-            "  compact_gradient_transfer=True\n",
-            "  coverage_penalty_weight=0.1\n",
-            "  debug=False\n",
-            "  debug_num_train_steps=None\n",
-            "  decay_scheme=luong234\n",
-            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
-            "  dropout=0.2\n",
-            "  embed_prefix=None\n",
-            "  enable_auto_loss_scale=True\n",
-            "  encoder_type=gnmt\n",
-            "  eos=</s>\n",
-            "  epoch_step=0\n",
-            "  force_inputs_padding=False\n",
-            "  forget_bias=0.0\n",
-            "  fp16_inc_loss_scale_every_n=128\n",
-            "  fp16_loss_scale=128\n",
-            "  gpu_indices=\n",
-            "  gradient_repacking=0\n",
-            "  hierarchical_copy=False\n",
-            "  infer_batch_size=128\n",
-            "  infer_mode=beam_search\n",
-            "  init_op=uniform\n",
-            "  init_weight=0.1\n",
-            "  label_smoothing=0.1\n",
-            "  language_model=False\n",
-            "  learning_rate=0.0005\n",
-            "  length_penalty_weight=0.6\n",
-            "  local_parameter_device=gpu\n",
-            "  log_step_count_steps=10\n",
-            "  max_gradient_norm=5.0\n",
-            "  max_train_epochs=6\n",
-            "  mode=train_and_eval\n",
-            "  network_topology=NetworkTopology.DGX1\n",
-            "  num_buckets=5\n",
-            "  num_dec_emb_partitions=0\n",
-            "  num_decoder_layers=4\n",
-            "  num_decoder_residual_layers=2\n",
-            "  num_embeddings_partitions=0\n",
-            "  num_enc_emb_partitions=0\n",
-            "  num_encoder_layers=4\n",
-            "  num_encoder_residual_layers=2\n",
-            "  num_gpus=1\n",
-            "  num_units=1024\n",
-            "  optimizer=adam\n",
-            "  output_attention=True\n",
-            "  output_dir=results\n",
-            "  parallel_iterations=10\n",
-            "  pass_hidden_state=True\n",
-            "  profile=False\n",
-            "  profile_save_steps=10\n",
-            "  random_seed=1\n",
-            "  residual=True\n",
-            "  save_checkpoints_steps=2000\n",
-            "  share_vocab=True\n",
-            "  show_metrics=True\n",
-            "  sos=<s>\n",
-            "  src=en\n",
-            "  src_embed_file=\n",
-            "  src_max_len=50\n",
-            "  src_max_len_infer=None\n",
-            "  src_vocab_file=results/vocab.bpe.32000.en\n",
-            "  src_vocab_size=32320\n",
-            "  subword_option=bpe\n",
-            "  target_bleu=None\n",
-            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
-            "  tgt=de\n",
-            "  tgt_embed_file=\n",
-            "  tgt_max_len=50\n",
-            "  tgt_max_len_infer=80\n",
-            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
-            "  tgt_vocab_size=32320\n",
-            "  time_major=True\n",
-            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
-            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
-            "  translate_file=None\n",
-            "  unit_type=lstm\n",
-            "  use_autojit_xla=False\n",
-            "  use_block_lstm=False\n",
-            "  use_char_encode=False\n",
-            "  use_cudnn_lstm=False\n",
-            "  use_dist_strategy=False\n",
-            "  use_dynamic_rnn=True\n",
-            "  use_fastmath=False\n",
-            "  use_fp16=False\n",
-            "  use_fp32_batch_matmul=False\n",
-            "  use_fused_lstm=True\n",
-            "  use_fused_lstm_dec=False\n",
-            "  use_loose_bidi_cudnn_lstm=False\n",
-            "  use_pintohost_optimizer=False\n",
-            "  use_resource_vars=False\n",
-            "  use_synthetic_data=False\n",
-            "  use_xla=False\n",
-            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
-            "  warmup_scheme=t2t\n",
-            "  warmup_steps=200\n",
-            "  xla_compile=False\n",
-            "training hparams:\n",
-            "  agg_small_grads_max_bytes=0\n",
-            "  agg_small_grads_max_group=10\n",
-            "  all_reduce_spec=nccl\n",
-            "  allreduce_merge_scope=1\n",
-            "  amp=True\n",
-            "  attention=normed_bahdanau\n",
-            "  attention_architecture=gnmt_v2\n",
-            "  batch_size=128\n",
-            "  beam_width=5\n",
-            "  best_bleu=0\n",
-            "  best_bleu_dir=results/best_bleu\n",
-            "  check_special_token=True\n",
-            "  check_tower_loss_numerics=False\n",
-            "  ckpt=None\n",
-            "  clip_grads=True\n",
-            "  colocate_gradients_with_ops=True\n",
-            "  compact_gradient_transfer=True\n",
-            "  coverage_penalty_weight=0.1\n",
-            "  debug=False\n",
-            "  debug_num_train_steps=None\n",
-            "  decay_scheme=luong234\n",
-            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
-            "  dropout=0.2\n",
-            "  embed_prefix=None\n",
-            "  enable_auto_loss_scale=True\n",
-            "  encoder_type=gnmt\n",
-            "  eos=</s>\n",
-            "  epoch_step=0\n",
-            "  force_inputs_padding=False\n",
-            "  forget_bias=0.0\n",
-            "  fp16_inc_loss_scale_every_n=128\n",
-            "  fp16_loss_scale=128\n",
-            "  gpu_indices=\n",
-            "  gradient_repacking=0\n",
-            "  hierarchical_copy=False\n",
-            "  infer_batch_size=128\n",
-            "  infer_mode=beam_search\n",
-            "  init_op=uniform\n",
-            "  init_weight=0.1\n",
-            "  label_smoothing=0.1\n",
-            "  language_model=False\n",
-            "  learning_rate=0.0005\n",
-            "  length_penalty_weight=0.6\n",
-            "  local_parameter_device=gpu\n",
-            "  log_step_count_steps=10\n",
-            "  max_gradient_norm=5.0\n",
-            "  max_train_epochs=6\n",
-            "  mode=train_and_eval\n",
-            "  network_topology=NetworkTopology.DGX1\n",
-            "  num_buckets=5\n",
-            "  num_dec_emb_partitions=0\n",
-            "  num_decoder_layers=4\n",
-            "  num_decoder_residual_layers=2\n",
-            "  num_embeddings_partitions=0\n",
-            "  num_enc_emb_partitions=0\n",
-            "  num_encoder_layers=4\n",
-            "  num_encoder_residual_layers=2\n",
-            "  num_gpus=1\n",
-            "  num_units=1024\n",
-            "  optimizer=adam\n",
-            "  output_attention=True\n",
-            "  output_dir=results\n",
-            "  parallel_iterations=10\n",
-            "  pass_hidden_state=True\n",
-            "  profile=False\n",
-            "  profile_save_steps=10\n",
-            "  random_seed=1\n",
-            "  residual=True\n",
-            "  save_checkpoints_steps=2000\n",
-            "  share_vocab=True\n",
-            "  show_metrics=True\n",
-            "  sos=<s>\n",
-            "  src=en\n",
-            "  src_embed_file=\n",
-            "  src_max_len=50\n",
-            "  src_max_len_infer=None\n",
-            "  src_vocab_file=results/vocab.bpe.32000.en\n",
-            "  src_vocab_size=32320\n",
-            "  subword_option=bpe\n",
-            "  target_bleu=None\n",
-            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
-            "  tgt=de\n",
-            "  tgt_embed_file=\n",
-            "  tgt_max_len=50\n",
-            "  tgt_max_len_infer=80\n",
-            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
-            "  tgt_vocab_size=32320\n",
-            "  time_major=True\n",
-            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
-            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
-            "  translate_file=None\n",
-            "  unit_type=lstm\n",
-            "  use_autojit_xla=False\n",
-            "  use_block_lstm=False\n",
-            "  use_char_encode=False\n",
-            "  use_cudnn_lstm=False\n",
-            "  use_dist_strategy=False\n",
-            "  use_dynamic_rnn=True\n",
-            "  use_fastmath=False\n",
-            "  use_fp16=False\n",
-            "  use_fp32_batch_matmul=False\n",
-            "  use_fused_lstm=True\n",
-            "  use_fused_lstm_dec=False\n",
-            "  use_loose_bidi_cudnn_lstm=False\n",
-            "  use_pintohost_optimizer=False\n",
-            "  use_resource_vars=False\n",
-            "  use_synthetic_data=False\n",
-            "  use_xla=False\n",
-            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
-            "  warmup_scheme=t2t\n",
-            "  warmup_steps=200\n",
-            "  xla_compile=False\n",
-            "infer_hparams:\n",
-            "  agg_small_grads_max_bytes=0\n",
-            "  agg_small_grads_max_group=10\n",
-            "  all_reduce_spec=nccl\n",
-            "  allreduce_merge_scope=1\n",
-            "  amp=True\n",
-            "  attention=normed_bahdanau\n",
-            "  attention_architecture=gnmt_v2\n",
-            "  batch_size=128\n",
-            "  beam_width=5\n",
-            "  best_bleu=0\n",
-            "  best_bleu_dir=results/best_bleu\n",
-            "  check_special_token=True\n",
-            "  check_tower_loss_numerics=False\n",
-            "  ckpt=None\n",
-            "  clip_grads=True\n",
-            "  colocate_gradients_with_ops=True\n",
-            "  compact_gradient_transfer=True\n",
-            "  coverage_penalty_weight=0.1\n",
-            "  debug=False\n",
-            "  debug_num_train_steps=None\n",
-            "  decay_scheme=luong234\n",
-            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
-            "  dropout=0.2\n",
-            "  embed_prefix=None\n",
-            "  enable_auto_loss_scale=True\n",
-            "  encoder_type=gnmt\n",
-            "  eos=</s>\n",
-            "  epoch_step=0\n",
-            "  force_inputs_padding=False\n",
-            "  forget_bias=0.0\n",
-            "  fp16_inc_loss_scale_every_n=128\n",
-            "  fp16_loss_scale=128\n",
-            "  gpu_indices=\n",
-            "  gradient_repacking=0\n",
-            "  hierarchical_copy=False\n",
-            "  infer_batch_size=128\n",
-            "  infer_mode=beam_search\n",
-            "  init_op=uniform\n",
-            "  init_weight=0.1\n",
-            "  label_smoothing=0.1\n",
-            "  language_model=False\n",
-            "  learning_rate=0.0005\n",
-            "  length_penalty_weight=0.6\n",
-            "  local_parameter_device=gpu\n",
-            "  log_step_count_steps=10\n",
-            "  max_gradient_norm=5.0\n",
-            "  max_train_epochs=6\n",
-            "  mode=train_and_eval\n",
-            "  network_topology=NetworkTopology.DGX1\n",
-            "  num_buckets=1\n",
-            "  num_dec_emb_partitions=0\n",
-            "  num_decoder_layers=4\n",
-            "  num_decoder_residual_layers=2\n",
-            "  num_embeddings_partitions=0\n",
-            "  num_enc_emb_partitions=0\n",
-            "  num_encoder_layers=4\n",
-            "  num_encoder_residual_layers=2\n",
-            "  num_gpus=1\n",
-            "  num_units=1024\n",
-            "  optimizer=adam\n",
-            "  output_attention=True\n",
-            "  output_dir=results\n",
-            "  parallel_iterations=10\n",
-            "  pass_hidden_state=True\n",
-            "  profile=False\n",
-            "  profile_save_steps=10\n",
-            "  random_seed=1\n",
-            "  residual=True\n",
-            "  save_checkpoints_steps=2000\n",
-            "  share_vocab=True\n",
-            "  show_metrics=True\n",
-            "  sos=<s>\n",
-            "  src=en\n",
-            "  src_embed_file=\n",
-            "  src_max_len=50\n",
-            "  src_max_len_infer=None\n",
-            "  src_vocab_file=results/vocab.bpe.32000.en\n",
-            "  src_vocab_size=32320\n",
-            "  subword_option=bpe\n",
-            "  target_bleu=None\n",
-            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
-            "  tgt=de\n",
-            "  tgt_embed_file=\n",
-            "  tgt_max_len=50\n",
-            "  tgt_max_len_infer=80\n",
-            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
-            "  tgt_vocab_size=32320\n",
-            "  time_major=True\n",
-            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
-            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
-            "  translate_file=None\n",
-            "  unit_type=lstm\n",
-            "  use_autojit_xla=False\n",
-            "  use_block_lstm=False\n",
-            "  use_char_encode=False\n",
-            "  use_cudnn_lstm=False\n",
-            "  use_dist_strategy=False\n",
-            "  use_dynamic_rnn=True\n",
-            "  use_fastmath=False\n",
-            "  use_fp16=False\n",
-            "  use_fp32_batch_matmul=False\n",
-            "  use_fused_lstm=True\n",
-            "  use_fused_lstm_dec=False\n",
-            "  use_loose_bidi_cudnn_lstm=False\n",
-            "  use_pintohost_optimizer=False\n",
-            "  use_resource_vars=False\n",
-            "  use_synthetic_data=False\n",
-            "  use_xla=False\n",
-            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
-            "  warmup_scheme=t2t\n",
-            "  warmup_steps=200\n",
-            "  xla_compile=False\n",
-            "Starting epoch 0\n",
-            "sess master is \n",
-            "INFO:tensorflow:Using config: {'_model_dir': 'results', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 2000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true\n",
-            "graph_options {\n",
-            "  rewrite_options {\n",
-            "    pin_to_host_optimization: OFF\n",
-            "  }\n",
-            "}\n",
-            ", '_keep_checkpoint_max': None, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 10, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f955cf1e350>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n",
-            "WARNING:tensorflow:Estimator's model_fn (<function make_model_fn.<locals>.fn at 0x7f955cf8eb90>) includes params argument, but params are not passed to Estimator.\n",
-            "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Colocations handled automatically by placer.\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/utils/iterator_utils.py:112: DatasetV1.shard (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Use `dataset.apply(tf.data.experimental.filter_for_shard(...))`.\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/utils/iterator_utils.py:233: group_by_window (from tensorflow.contrib.data.python.ops.grouping) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Use `tf.data.experimental.group_by_window(...)`.\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/utils/iterator_utils.py:226: to_int64 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Use tf.cast instead.\n",
-            "INFO:tensorflow:Calling model_fn.\n",
-            "Running fast mode_fn\n",
-            "global_step already created!\n",
-            "model.global_step.name: global_step:0\n",
-            "# Use the same embedding for source and target\n",
-            "# Creating train graph ...\n",
-            "# Build a GNMT encoder\n",
-            "  num_bi_layers = 1\n",
-            "  num_uni_layers = 3\n",
-            "source.shape: (128, ?)\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/gnmt_model.py:131: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
-            "  cell 0  LSTM, forget_bias=0WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/model_helper.py:266: LSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
-            "  DropoutWrapper, dropout=0.2 \n",
-            "  cell 1  LSTM, forget_bias=0  DropoutWrapper, dropout=0.2 \n",
-            "  cell 2  LSTM, forget_bias=0  DropoutWrapper, dropout=0.2   ResidualWrapper\n",
-            "  cell 3  LSTM, forget_bias=0  DropoutWrapper, dropout=0.2   ResidualWrapper\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/gnmt_model.py:662: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/model.py:452: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Please use `keras.layers.RNN(cell)`, which is equivalent to this API\n",
-            "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/rnn.py:626: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Use tf.cast instead.\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/model.py:595: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Use tf.cast instead.\n",
-            "  learning_rate=0.0005, warmup_steps=200, warmup_scheme=t2t\n",
-            "  decay_scheme=luong234, start_decay_step=110468, decay_steps 13808, decay_factor 0.5\n",
-            "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/math_grad.py:102: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Deprecated in favor of operator or tf.math.divide.\n",
-            "# Trainable variables for tower: 0\n",
-            "Format: <name>, <shape>, <dtype>, <(soft) device placement>\n",
-            "  v0/embeddings/embedding_share:0, (32320, 1024), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0, (2048, 4096), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell/bias:0, (4096,), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0, (2048, 4096), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0, (4096,), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0, (3072, 4096), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0, (4096,), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0, (2048, 4096), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0, (4096,), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0, (2048, 4096), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0, (4096,), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/memory_layer/kernel:0, (1024, 1024), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/attention_v:0, (1024,), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/attention_g:0, (), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/attention_b:0, (1024,), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0, (3072, 4096), float32, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0, (4096,), float32, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0, (1024, 1024), float32_ref, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0, (3072, 4096), float32, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0, (4096,), float32, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0, (3072, 4096), float32, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0, (4096,), float32, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0, (3072, 4096), float32, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0, (4096,), float32, /device:GPU:0\n",
-            "  v0/dynamic_seq2seq/decoder/output_projection/kernel:0, (1024, 32320), float32_ref, /device:GPU:0\n",
-            "Total params size: 0.61 GB\n",
-            "Finish building fprop and per-tower bprop.\n",
-            "Finish building grad aggregation.\n",
-            "All copy-from vars(0): \n",
-            "All skippped vars(78): \n",
-            "global_step:0\n",
-            "v0/embeddings/embedding_share:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
-            "tower_0/append_gradient_ops/beta1_power:0\n",
-            "tower_0/append_gradient_ops/beta2_power:0\n",
-            "v0/embeddings/embedding_share/Adam:0\n",
-            "v0/embeddings/embedding_share/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam_1:0\n",
-            "Saveable vars(78): \n",
-            "global_step:0\n",
-            "v0/embeddings/embedding_share:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
-            "tower_0/append_gradient_ops/beta1_power:0\n",
-            "tower_0/append_gradient_ops/beta2_power:0\n",
-            "v0/embeddings/embedding_share/Adam:0\n",
-            "v0/embeddings/embedding_share/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam_1:0\n",
-            "All global trainable vars(25): \n",
-            "v0/embeddings/embedding_share:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
-            "All global vars(78): \n",
-            "global_step:0\n",
-            "v0/embeddings/embedding_share:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
-            "tower_0/append_gradient_ops/beta1_power:0\n",
-            "tower_0/append_gradient_ops/beta2_power:0\n",
-            "v0/embeddings/embedding_share/Adam:0\n",
-            "v0/embeddings/embedding_share/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias/Adam_1:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel/Adam_1:0\n",
-            "master backproped params(25): \n",
-            "v0/embeddings/embedding_share:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_1/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_2/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_3/bias:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/kernel:0\n",
-            "v0/dynamic_seq2seq/encoder/lstm_cell_4/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/memory_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_v:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_g:0\n",
-            "v0/dynamic_seq2seq/decoder/attention_b:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_0_attention/attention/bahdanau_attention/query_layer/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_1/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_2/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/kernel:0\n",
-            "v0/dynamic_seq2seq/decoder/multi_rnn_cell/cell_3/lstm_cell/bias:0\n",
-            "v0/dynamic_seq2seq/decoder/output_projection/kernel:0\n",
-            "Finish building model_fn\n",
-            "INFO:tensorflow:Done calling model_fn.\n",
-            "INFO:tensorflow:Create CheckpointSaverHook.\n",
-            "INFO:tensorflow:Graph was finalized.\n",
-            "2021-08-26 21:08:52.078538: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
-            "2021-08-26 21:08:52.083180: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz\n",
-            "2021-08-26 21:08:52.083636: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x55f631b9cd60 executing computations on platform Host. Devices:\n",
-            "2021-08-26 21:08:52.083673: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): <undefined>, <undefined>\n",
-            "Traceback (most recent call last):\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1334, in _do_call\n",
-            "    return fn(*args)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1317, in _run_fn\n",
-            "    self._extend_graph()\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1352, in _extend_graph\n",
-            "    tf_session.ExtendSession(self._session)\n",
-            "tensorflow.python.framework.errors_impl.InvalidArgumentError: No OpKernel was registered to support Op 'NcclAllReduce' used by {{node allreduce/allreduce/NcclAllReduce}}with these attrs: [reduction=\"sum\", shared_name=\"c0\", T=DT_FLOAT, num_devices=1]\n",
-            "Registered devices: [CPU, XLA_CPU]\n",
-            "Registered kernels:\n",
-            "  <no registered kernels>\n",
-            "\n",
-            "\t [[{{node allreduce/allreduce/NcclAllReduce}}]]\n",
-            "\n",
-            "During handling of the above exception, another exception occurred:\n",
-            "\n",
-            "Traceback (most recent call last):\n",
-            "  File \"nmt.py\", line 1116, in <module>\n",
-            "    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py\", line 125, in run\n",
-            "    _sys.exit(main(argv))\n",
-            "  File \"nmt.py\", line 1060, in main\n",
-            "    train_speed, _ = estimator.train_fn(hparams)\n",
-            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 831, in train_fn\n",
-            "    hooks=train_hooks,\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 358, in train\n",
-            "    loss = self._train_model(input_fn, hooks, saving_listeners)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1124, in _train_model\n",
-            "    return self._train_model_default(input_fn, hooks, saving_listeners)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1158, in _train_model_default\n",
-            "    saving_listeners)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1403, in _train_with_estimator_spec\n",
-            "    log_step_count_steps=log_step_count_steps) as mon_sess:\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 508, in MonitoredTrainingSession\n",
-            "    stop_grace_period_secs=stop_grace_period_secs)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 934, in __init__\n",
-            "    stop_grace_period_secs=stop_grace_period_secs)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 648, in __init__\n",
-            "    self._sess = _RecoverableSession(self._coordinated_creator)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 1122, in __init__\n",
-            "    _WrappedSession.__init__(self, self._create_session())\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 1127, in _create_session\n",
-            "    return self._sess_creator.create_session()\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 805, in create_session\n",
-            "    self.tf_sess = self._session_creator.create_session()\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/monitored_session.py\", line 571, in create_session\n",
-            "    init_fn=self._scaffold.init_fn)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/session_manager.py\", line 287, in prepare_session\n",
-            "    sess.run(init_op, feed_dict=init_feed_dict)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 929, in run\n",
-            "    run_metadata_ptr)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1152, in _run\n",
-            "    feed_dict_tensor, options, run_metadata)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1328, in _do_run\n",
-            "    run_metadata)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py\", line 1348, in _do_call\n",
-            "    raise type(e)(node_def, op, message)\n",
-            "tensorflow.python.framework.errors_impl.InvalidArgumentError: No OpKernel was registered to support Op 'NcclAllReduce' used by node allreduce/allreduce/NcclAllReduce (defined at /content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py:336) with these attrs: [reduction=\"sum\", shared_name=\"c0\", T=DT_FLOAT, num_devices=1]\n",
-            "Registered devices: [CPU, XLA_CPU]\n",
-            "Registered kernels:\n",
-            "  <no registered kernels>\n",
-            "\n",
-            "\t [[node allreduce/allreduce/NcclAllReduce (defined at /content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py:336) ]]\n",
-            "\n",
-            "Caused by op 'allreduce/allreduce/NcclAllReduce', defined at:\n",
-            "  File \"nmt.py\", line 1116, in <module>\n",
-            "    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py\", line 125, in run\n",
-            "    _sys.exit(main(argv))\n",
-            "  File \"nmt.py\", line 1060, in main\n",
-            "    train_speed, _ = estimator.train_fn(hparams)\n",
-            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 831, in train_fn\n",
-            "    hooks=train_hooks,\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 358, in train\n",
-            "    loss = self._train_model(input_fn, hooks, saving_listeners)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1124, in _train_model\n",
-            "    return self._train_model_default(input_fn, hooks, saving_listeners)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1154, in _train_model_default\n",
-            "    features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1112, in _call_model_fn\n",
-            "    model_fn_results = self._model_fn(features=features, **kwargs)\n",
-            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 559, in fn\n",
-            "    features, labels, mode, params)\n",
-            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 434, in build_graph\n",
-            "    tower_gradvars)\n",
-            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/variable_mgr.py\", line 189, in preprocess_device_grads\n",
-            "    compact_grads, defer_grads)\n",
-            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/batch_allreduce.py\", line 141, in batch_all_reduce\n",
-            "    all_device_tensors = self._do_batch_all_reduce(all_device_tensors)\n",
-            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/batch_allreduce.py\", line 329, in _do_batch_all_reduce\n",
-            "    agg_small_grads_max_group=self._agg_small_grads_max_group)\n",
-            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py\", line 454, in sum_gradients_all_reduce\n",
-            "    num_shards))\n",
-            "  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py\", line 336, in sum_grad_and_var_all_reduce\n",
-            "    summed_grads = all_reduce.build_nccl_all_reduce(scaled_grads, tf.add)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/all_reduce.py\", line 696, in build_nccl_all_reduce\n",
-            "    output_tensors = nccl_ops.all_sum(input_tensors)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/nccl_ops.py\", line 45, in all_sum\n",
-            "    return _apply_all_reduce('sum', tensors)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/nccl_ops.py\", line 224, in _apply_all_reduce\n",
-            "    shared_name=shared_name))\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_nccl_ops.py\", line 84, in nccl_all_reduce\n",
-            "    name=name)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py\", line 788, in _apply_op_helper\n",
-            "    op_def=op_def)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/deprecation.py\", line 507, in new_func\n",
-            "    return func(*args, **kwargs)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py\", line 3300, in create_op\n",
-            "    op_def=op_def)\n",
-            "  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py\", line 1801, in __init__\n",
-            "    self._traceback = tf_stack.extract_stack()\n",
-            "\n",
-            "InvalidArgumentError (see above for traceback): No OpKernel was registered to support Op 'NcclAllReduce' used by node allreduce/allreduce/NcclAllReduce (defined at /content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py:336) with these attrs: [reduction=\"sum\", shared_name=\"c0\", T=DT_FLOAT, num_devices=1]\n",
-            "Registered devices: [CPU, XLA_CPU]\n",
-            "Registered kernels:\n",
-            "  <no registered kernels>\n",
-            "\n",
-            "\t [[node allreduce/allreduce/NcclAllReduce (defined at /content/DeepLearningExamples/TensorFlow/Translation/GNMT/variable_mgr/allreduce.py:336) ]]\n",
-            "\n",
-            "ERROR:tensorflow:==================================\n",
-            "Object was never used (type <class 'tensorflow.python.framework.ops.Tensor'>):\n",
-            "<tf.Tensor 'report_uninitialized_variables/boolean_mask/GatherV2:0' shape=(?,) dtype=string>\n",
-            "If you want to mark it as used call its \"mark_used()\" method.\n",
-            "It was originally created here:\n",
-            "  File \"nmt.py\", line 1116, in <module>\n",
-            "    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py\", line 125, in run\n",
-            "    _sys.exit(main(argv))  File \"nmt.py\", line 1062, in main\n",
-            "    utils.print_out(\"training hits OutOfRangeError\", f=sys.stderr)  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 831, in train_fn\n",
-            "    hooks=train_hooks,  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 360, in train\n",
-            "    return self  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1124, in _train_model\n",
-            "    return self._train_model_default(input_fn, hooks, saving_listeners)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1158, in _train_model_default\n",
-            "    saving_listeners)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1118, in _call_model_fn\n",
-            "    return model_fn_results  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 565, in fn\n",
-            "    train_op=train_op)  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 532, in build_graph\n",
-            "    return loss, master_params, master_grads, None, train_op, scaffold  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/tf_should_use.py\", line 193, in wrapped\n",
-            "    return _add_should_use_warning(fn(*args, **kwargs))\n",
-            "==================================\n",
-            "ERROR:tensorflow:==================================\n",
-            "Object was never used (type <class 'tensorflow.python.framework.ops.Tensor'>):\n",
-            "<tf.Tensor 'report_uninitialized_variables_1/boolean_mask/GatherV2:0' shape=(?,) dtype=string>\n",
-            "If you want to mark it as used call its \"mark_used()\" method.\n",
-            "It was originally created here:\n",
-            "  File \"nmt.py\", line 1116, in <module>\n",
-            "    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py\", line 125, in run\n",
-            "    _sys.exit(main(argv))  File \"nmt.py\", line 1062, in main\n",
-            "    utils.print_out(\"training hits OutOfRangeError\", f=sys.stderr)  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 831, in train_fn\n",
-            "    hooks=train_hooks,  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 360, in train\n",
-            "    return self  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1124, in _train_model\n",
-            "    return self._train_model_default(input_fn, hooks, saving_listeners)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1158, in _train_model_default\n",
-            "    saving_listeners)  File \"/usr/local/lib/python3.7/dist-packages/tensorflow_estimator/python/estimator/estimator.py\", line 1118, in _call_model_fn\n",
-            "    return model_fn_results  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 565, in fn\n",
-            "    train_op=train_op)  File \"/content/DeepLearningExamples/TensorFlow/Translation/GNMT/estimator.py\", line 532, in build_graph\n",
-            "    return loss, master_params, master_grads, None, train_op, scaffold  File \"/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/tf_should_use.py\", line 193, in wrapped\n",
-            "    return _add_should_use_warning(fn(*args, **kwargs))\n",
-            "==================================\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "H1bpOS4x3-_A"
-      },
-      "source": [
-        "## Start evaluation.\n",
-        "\n",
-        "The training process automatically runs evaluation and outputs the BLEU score after each training epoch. Additionally, after the training is done, you can manually run inference on test dataset with the checkpoint saved during the training.\n",
-        "\n",
-        "To launch mixed precision inference on 1 GPU, run:\n",
-        "```\n",
-        "python nmt.py --output_dir=results --infer_batch_size=128 --mode=infer --amp\n",
-        "```\n",
-        "To launch FP32 (TF32 on NVIDIA Ampere GPUs) inference on 1 GPU, run:\n",
-        "```\n",
-        "python nmt.py --output_dir=results --infer_batch_size=128 --mode=infer\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "J7GfPXAK3-_A",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "d5b48c06-67c6-4e1d-d560-d52a08b562d0"
-      },
-      "source": [
-        "!python nmt.py --output_dir=results --infer_batch_size=128 --mode=infer --amp\n"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
-            "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-            "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
-            "Enabling TF-AMP\n",
-            "use_defun is false for attention\n",
-            "Running inference mode.\n",
-            "# Set random seed to 1\n",
-            "DLL 2021-08-26 21:11:24.601318 - PARAMETER num_units : 1024  num_layers : 4  num_encoder_layers : None  num_decoder_layers : None  encoder_type : gnmt  residual : True  time_major : True  num_embeddings_partitions : 0  attention : normed_bahdanau  attention_architecture : gnmt_v2  output_attention : True  pass_hidden_state : True  optimizer : adam  learning_rate : 0.0005  warmup_steps : 200  warmup_scheme : t2t  decay_scheme : luong234  max_train_epochs : 6  target_bleu : None  colocate_gradients_with_ops : True  label_smoothing : 0.1  init_op : uniform  init_weight : 0.1  src : en  tgt : de  data_dir : data/wmt16_de_en  train_prefix : train.tok.clean.bpe.32000  test_prefix : newstest2014.tok.bpe.32000  translate_file : None  output_dir : results  vocab_prefix : vocab.bpe.32000  embed_prefix : None  sos : <s>  eos : </s>  share_vocab : True  check_special_token : True  src_max_len : 50  tgt_max_len : 50  src_max_len_infer : None  tgt_max_len_infer : 80  unit_type : lstm  forget_bias : 0.0  dropout : 0.2  max_gradient_norm : 5.0  batch_size : 128  num_buckets : 5  subword_option : bpe  use_char_encode : False  save_checkpoints_steps : 2000  log_step_count_steps : 10  num_gpus : 1  hparams_path : None  random_seed : 1  language_model : False  ckpt : None  infer_batch_size : 128  detokenizer_file : None  tokenizer_file : None  infer_mode : beam_search  beam_width : 5  length_penalty_weight : 0.6  coverage_penalty_weight : 0.1  num_workers : 1  amp : True  use_fastmath : False  use_fp16 : False  fp16_loss_scale : 128  enable_auto_loss_scale : True  fp16_inc_loss_scale_every_n : 128  check_tower_loss_numerics : False  use_fp32_batch_matmul : False  force_inputs_padding : False  use_xla : False  xla_compile : False  use_autojit_xla : False  use_pintohost_optimizer : False  use_cudnn_lstm : False  use_loose_bidi_cudnn_lstm : False  use_fused_lstm : True  use_fused_lstm_dec : False  gpu_indices :   parallel_iterations : 10  use_dist_strategy : False  hierarchical_copy : False  network_topology : dgx1  use_block_lstm : False  use_defun : False  gradient_repacking : 0  compact_gradient_transfer : True  all_reduce_spec : nccl  agg_small_grads_max_bytes : 0  agg_small_grads_max_group : 10  allreduce_merge_scope : 1  local_parameter_device : gpu  use_resource_vars : False  debug : False  debug_num_train_steps : None  show_metrics : True  clip_grads : True  profile : False  profile_save_steps : 10  use_dynamic_rnn : True  use_synthetic_data : False  mode : infer \n",
-            "# Vocab file data/wmt16_de_en/vocab.bpe.32000.en exists\n",
-            "The first 3 vocab words [,, ., the] are not [<unk>, <s>, </s>]\n",
-            "  using source vocab for target\n",
-            "  agg_small_grads_max_bytes=0\n",
-            "  agg_small_grads_max_group=10\n",
-            "  all_reduce_spec=nccl\n",
-            "  allreduce_merge_scope=1\n",
-            "  amp=True\n",
-            "  attention=normed_bahdanau\n",
-            "  attention_architecture=gnmt_v2\n",
-            "  batch_size=128\n",
-            "  beam_width=5\n",
-            "  best_bleu=0\n",
-            "  best_bleu_dir=results/best_bleu\n",
-            "  check_special_token=True\n",
-            "  check_tower_loss_numerics=False\n",
-            "  ckpt=None\n",
-            "  clip_grads=True\n",
-            "  colocate_gradients_with_ops=True\n",
-            "  compact_gradient_transfer=True\n",
-            "  coverage_penalty_weight=0.1\n",
-            "  debug=False\n",
-            "  debug_num_train_steps=None\n",
-            "  decay_scheme=luong234\n",
-            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
-            "  dropout=0.2\n",
-            "  embed_prefix=None\n",
-            "  enable_auto_loss_scale=True\n",
-            "  encoder_type=gnmt\n",
-            "  eos=</s>\n",
-            "  epoch_step=0\n",
-            "  force_inputs_padding=False\n",
-            "  forget_bias=0.0\n",
-            "  fp16_inc_loss_scale_every_n=128\n",
-            "  fp16_loss_scale=128\n",
-            "  gpu_indices=\n",
-            "  gradient_repacking=0\n",
-            "  hierarchical_copy=False\n",
-            "  infer_batch_size=128\n",
-            "  infer_mode=beam_search\n",
-            "  init_op=uniform\n",
-            "  init_weight=0.1\n",
-            "  label_smoothing=0.1\n",
-            "  language_model=False\n",
-            "  learning_rate=0.0005\n",
-            "  length_penalty_weight=0.6\n",
-            "  local_parameter_device=gpu\n",
-            "  log_step_count_steps=10\n",
-            "  max_gradient_norm=5.0\n",
-            "  max_train_epochs=6\n",
-            "  mode=infer\n",
-            "  network_topology=NetworkTopology.DGX1\n",
-            "  num_buckets=1\n",
-            "  num_dec_emb_partitions=0\n",
-            "  num_decoder_layers=4\n",
-            "  num_decoder_residual_layers=2\n",
-            "  num_embeddings_partitions=0\n",
-            "  num_enc_emb_partitions=0\n",
-            "  num_encoder_layers=4\n",
-            "  num_encoder_residual_layers=2\n",
-            "  num_gpus=1\n",
-            "  num_units=1024\n",
-            "  optimizer=adam\n",
-            "  output_attention=True\n",
-            "  output_dir=results\n",
-            "  parallel_iterations=10\n",
-            "  pass_hidden_state=True\n",
-            "  profile=False\n",
-            "  profile_save_steps=10\n",
-            "  random_seed=1\n",
-            "  residual=True\n",
-            "  save_checkpoints_steps=2000\n",
-            "  share_vocab=True\n",
-            "  show_metrics=True\n",
-            "  sos=<s>\n",
-            "  src=en\n",
-            "  src_embed_file=\n",
-            "  src_max_len=50\n",
-            "  src_max_len_infer=None\n",
-            "  src_vocab_file=results/vocab.bpe.32000.en\n",
-            "  src_vocab_size=32320\n",
-            "  subword_option=bpe\n",
-            "  target_bleu=None\n",
-            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
-            "  tgt=de\n",
-            "  tgt_embed_file=\n",
-            "  tgt_max_len=50\n",
-            "  tgt_max_len_infer=80\n",
-            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
-            "  tgt_vocab_size=32320\n",
-            "  time_major=True\n",
-            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
-            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
-            "  translate_file=None\n",
-            "  unit_type=lstm\n",
-            "  use_autojit_xla=False\n",
-            "  use_block_lstm=False\n",
-            "  use_char_encode=False\n",
-            "  use_cudnn_lstm=False\n",
-            "  use_dist_strategy=False\n",
-            "  use_dynamic_rnn=True\n",
-            "  use_fastmath=False\n",
-            "  use_fp16=False\n",
-            "  use_fp32_batch_matmul=False\n",
-            "  use_fused_lstm=True\n",
-            "  use_fused_lstm_dec=False\n",
-            "  use_loose_bidi_cudnn_lstm=False\n",
-            "  use_pintohost_optimizer=False\n",
-            "  use_resource_vars=False\n",
-            "  use_synthetic_data=False\n",
-            "  use_xla=False\n",
-            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
-            "  warmup_scheme=t2t\n",
-            "  warmup_steps=200\n",
-            "  xla_compile=False\n",
-            "infer_hparams:\n",
-            "  agg_small_grads_max_bytes=0\n",
-            "  agg_small_grads_max_group=10\n",
-            "  all_reduce_spec=nccl\n",
-            "  allreduce_merge_scope=1\n",
-            "  amp=True\n",
-            "  attention=normed_bahdanau\n",
-            "  attention_architecture=gnmt_v2\n",
-            "  batch_size=128\n",
-            "  beam_width=5\n",
-            "  best_bleu=0\n",
-            "  best_bleu_dir=results/best_bleu\n",
-            "  check_special_token=True\n",
-            "  check_tower_loss_numerics=False\n",
-            "  ckpt=None\n",
-            "  clip_grads=True\n",
-            "  colocate_gradients_with_ops=True\n",
-            "  compact_gradient_transfer=True\n",
-            "  coverage_penalty_weight=0.1\n",
-            "  debug=False\n",
-            "  debug_num_train_steps=None\n",
-            "  decay_scheme=luong234\n",
-            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
-            "  dropout=0.2\n",
-            "  embed_prefix=None\n",
-            "  enable_auto_loss_scale=True\n",
-            "  encoder_type=gnmt\n",
-            "  eos=</s>\n",
-            "  epoch_step=0\n",
-            "  force_inputs_padding=False\n",
-            "  forget_bias=0.0\n",
-            "  fp16_inc_loss_scale_every_n=128\n",
-            "  fp16_loss_scale=128\n",
-            "  gpu_indices=\n",
-            "  gradient_repacking=0\n",
-            "  hierarchical_copy=False\n",
-            "  infer_batch_size=128\n",
-            "  infer_mode=beam_search\n",
-            "  init_op=uniform\n",
-            "  init_weight=0.1\n",
-            "  label_smoothing=0.1\n",
-            "  language_model=False\n",
-            "  learning_rate=0.0005\n",
-            "  length_penalty_weight=0.6\n",
-            "  local_parameter_device=gpu\n",
-            "  log_step_count_steps=10\n",
-            "  max_gradient_norm=5.0\n",
-            "  max_train_epochs=6\n",
-            "  mode=infer\n",
-            "  network_topology=NetworkTopology.DGX1\n",
-            "  num_buckets=1\n",
-            "  num_dec_emb_partitions=0\n",
-            "  num_decoder_layers=4\n",
-            "  num_decoder_residual_layers=2\n",
-            "  num_embeddings_partitions=0\n",
-            "  num_enc_emb_partitions=0\n",
-            "  num_encoder_layers=4\n",
-            "  num_encoder_residual_layers=2\n",
-            "  num_gpus=1\n",
-            "  num_units=1024\n",
-            "  optimizer=adam\n",
-            "  output_attention=True\n",
-            "  output_dir=results\n",
-            "  parallel_iterations=10\n",
-            "  pass_hidden_state=True\n",
-            "  profile=False\n",
-            "  profile_save_steps=10\n",
-            "  random_seed=1\n",
-            "  residual=True\n",
-            "  save_checkpoints_steps=2000\n",
-            "  share_vocab=True\n",
-            "  show_metrics=True\n",
-            "  sos=<s>\n",
-            "  src=en\n",
-            "  src_embed_file=\n",
-            "  src_max_len=50\n",
-            "  src_max_len_infer=None\n",
-            "  src_vocab_file=results/vocab.bpe.32000.en\n",
-            "  src_vocab_size=32320\n",
-            "  subword_option=bpe\n",
-            "  target_bleu=None\n",
-            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
-            "  tgt=de\n",
-            "  tgt_embed_file=\n",
-            "  tgt_max_len=50\n",
-            "  tgt_max_len_infer=80\n",
-            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
-            "  tgt_vocab_size=32320\n",
-            "  time_major=True\n",
-            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
-            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
-            "  translate_file=None\n",
-            "  unit_type=lstm\n",
-            "  use_autojit_xla=False\n",
-            "  use_block_lstm=False\n",
-            "  use_char_encode=False\n",
-            "  use_cudnn_lstm=False\n",
-            "  use_dist_strategy=False\n",
-            "  use_dynamic_rnn=True\n",
-            "  use_fastmath=False\n",
-            "  use_fp16=False\n",
-            "  use_fp32_batch_matmul=False\n",
-            "  use_fused_lstm=True\n",
-            "  use_fused_lstm_dec=False\n",
-            "  use_loose_bidi_cudnn_lstm=False\n",
-            "  use_pintohost_optimizer=False\n",
-            "  use_resource_vars=False\n",
-            "  use_synthetic_data=False\n",
-            "  use_xla=False\n",
-            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
-            "  warmup_scheme=t2t\n",
-            "  warmup_steps=200\n",
-            "  xla_compile=False\n",
-            "INFO:tensorflow:Starting to evaluate...\n",
-            "INFO:tensorflow:Using default config.\n",
-            "INFO:tensorflow:Using config: {'_model_dir': 'results', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
-            "graph_options {\n",
-            "  rewrite_options {\n",
-            "    meta_optimizer_iterations: ONE\n",
-            "  }\n",
-            "}\n",
-            ", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd55873fc50>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n",
-            "WARNING:tensorflow:Estimator's model_fn (<function make_model_fn.<locals>.fn at 0x7fd558836ef0>) includes params argument, but params are not passed to Estimator.\n",
-            "INFO:tensorflow:Could not find trained model in model_dir: results, running initialization to predict.\n",
-            "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Colocations handled automatically by placer.\n",
-            "INFO:tensorflow:Calling model_fn.\n",
-            "Running fast mode_fn\n",
-            "inference hparmas:\n",
-            "  agg_small_grads_max_bytes=0\n",
-            "  agg_small_grads_max_group=10\n",
-            "  all_reduce_spec=nccl\n",
-            "  allreduce_merge_scope=1\n",
-            "  amp=True\n",
-            "  attention=normed_bahdanau\n",
-            "  attention_architecture=gnmt_v2\n",
-            "  batch_size=128\n",
-            "  beam_width=5\n",
-            "  best_bleu=0\n",
-            "  best_bleu_dir=results/best_bleu\n",
-            "  check_special_token=True\n",
-            "  check_tower_loss_numerics=False\n",
-            "  ckpt=None\n",
-            "  clip_grads=True\n",
-            "  colocate_gradients_with_ops=True\n",
-            "  compact_gradient_transfer=True\n",
-            "  coverage_penalty_weight=0.1\n",
-            "  debug=False\n",
-            "  debug_num_train_steps=None\n",
-            "  decay_scheme=luong234\n",
-            "  detokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/detokenizer.perl\n",
-            "  dropout=0.2\n",
-            "  embed_prefix=None\n",
-            "  enable_auto_loss_scale=True\n",
-            "  encoder_type=gnmt\n",
-            "  eos=</s>\n",
-            "  epoch_step=0\n",
-            "  force_inputs_padding=False\n",
-            "  forget_bias=0.0\n",
-            "  fp16_inc_loss_scale_every_n=128\n",
-            "  fp16_loss_scale=128\n",
-            "  gpu_indices=\n",
-            "  gradient_repacking=0\n",
-            "  hierarchical_copy=False\n",
-            "  infer_batch_size=128\n",
-            "  infer_mode=beam_search\n",
-            "  init_op=uniform\n",
-            "  init_weight=0.1\n",
-            "  label_smoothing=0.1\n",
-            "  language_model=False\n",
-            "  learning_rate=0.0005\n",
-            "  length_penalty_weight=0.6\n",
-            "  local_parameter_device=gpu\n",
-            "  log_step_count_steps=10\n",
-            "  max_gradient_norm=5.0\n",
-            "  max_train_epochs=6\n",
-            "  mode=infer\n",
-            "  network_topology=NetworkTopology.DGX1\n",
-            "  num_buckets=1\n",
-            "  num_dec_emb_partitions=0\n",
-            "  num_decoder_layers=4\n",
-            "  num_decoder_residual_layers=2\n",
-            "  num_embeddings_partitions=0\n",
-            "  num_enc_emb_partitions=0\n",
-            "  num_encoder_layers=4\n",
-            "  num_encoder_residual_layers=2\n",
-            "  num_gpus=1\n",
-            "  num_units=1024\n",
-            "  optimizer=adam\n",
-            "  output_attention=True\n",
-            "  output_dir=results\n",
-            "  parallel_iterations=10\n",
-            "  pass_hidden_state=True\n",
-            "  profile=False\n",
-            "  profile_save_steps=10\n",
-            "  random_seed=1\n",
-            "  residual=True\n",
-            "  save_checkpoints_steps=2000\n",
-            "  share_vocab=True\n",
-            "  show_metrics=True\n",
-            "  sos=<s>\n",
-            "  src=en\n",
-            "  src_embed_file=\n",
-            "  src_max_len=50\n",
-            "  src_max_len_infer=None\n",
-            "  src_vocab_file=results/vocab.bpe.32000.en\n",
-            "  src_vocab_size=32320\n",
-            "  subword_option=bpe\n",
-            "  target_bleu=None\n",
-            "  test_prefix=data/wmt16_de_en/newstest2014.tok.bpe.32000\n",
-            "  tgt=de\n",
-            "  tgt_embed_file=\n",
-            "  tgt_max_len=50\n",
-            "  tgt_max_len_infer=80\n",
-            "  tgt_vocab_file=results/vocab.bpe.32000.en\n",
-            "  tgt_vocab_size=32320\n",
-            "  time_major=True\n",
-            "  tokenizer_file=data/wmt16_de_en/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
-            "  train_prefix=data/wmt16_de_en/train.tok.clean.bpe.32000\n",
-            "  translate_file=None\n",
-            "  unit_type=lstm\n",
-            "  use_autojit_xla=False\n",
-            "  use_block_lstm=False\n",
-            "  use_char_encode=False\n",
-            "  use_cudnn_lstm=False\n",
-            "  use_dist_strategy=False\n",
-            "  use_dynamic_rnn=True\n",
-            "  use_fastmath=False\n",
-            "  use_fp16=False\n",
-            "  use_fp32_batch_matmul=False\n",
-            "  use_fused_lstm=True\n",
-            "  use_fused_lstm_dec=False\n",
-            "  use_loose_bidi_cudnn_lstm=False\n",
-            "  use_pintohost_optimizer=False\n",
-            "  use_resource_vars=False\n",
-            "  use_synthetic_data=False\n",
-            "  use_xla=False\n",
-            "  vocab_prefix=data/wmt16_de_en/vocab.bpe.32000\n",
-            "  warmup_scheme=t2t\n",
-            "  warmup_steps=200\n",
-            "  xla_compile=False\n",
-            "global_step already created!\n",
-            "model.global_step.name: global_step:0\n",
-            "# Use the same embedding for source and target\n",
-            "# Creating infer graph ...\n",
-            "# Build a GNMT encoder\n",
-            "  num_bi_layers = 1\n",
-            "  num_uni_layers = 3\n",
-            "source.shape: (?, ?)\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/block_lstm.py:296: to_int64 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Use tf.cast instead.\n",
-            "  cell 0  LSTM, forget_bias=0WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/model_helper.py:266: LSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
-            "\n",
-            "  cell 1  LSTM, forget_bias=0\n",
-            "  cell 2  LSTM, forget_bias=0  ResidualWrapper\n",
-            "  cell 3  LSTM, forget_bias=0  ResidualWrapper\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/gnmt_model.py:662: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.\n",
-            "  decoding maximum_iterations 80\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/beam_search_decoder.py:733: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Use tf.cast instead.\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/beam_search_decoder.py:1001: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Deprecated in favor of operator or tf.math.divide.\n",
-            "WARNING:tensorflow:From /content/DeepLearningExamples/TensorFlow/Translation/GNMT/beam_search_decoder.py:781: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
-            "Instructions for updating:\n",
-            "Use tf.cast instead.\n",
-            "  learning_rate=0.0005, warmup_steps=200, warmup_scheme=t2t\n",
-            "  decay_scheme=luong234, start_decay_step=110468, decay_steps 13808, decay_factor 0.5\n",
-            "INFO:tensorflow:Done calling model_fn.\n",
-            "INFO:tensorflow:Graph was finalized.\n",
-            "2021-08-26 21:12:08.807161: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
-            "2021-08-26 21:12:08.811073: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz\n",
-            "2021-08-26 21:12:08.811304: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x55f246af0aa0 executing computations on platform Host. Devices:\n",
-            "2021-08-26 21:12:08.811341: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): <undefined>, <undefined>\n",
-            "2021-08-26 21:12:08.884223: W tensorflow/core/framework/allocator.cc:124] Allocation of 50331648 exceeds 10% of system memory.\n",
-            "2021-08-26 21:12:08.884575: W tensorflow/core/framework/allocator.cc:124] Allocation of 132382720 exceeds 10% of system memory.\n",
-            "2021-08-26 21:12:08.991523: W tensorflow/core/framework/allocator.cc:124] Allocation of 50331648 exceeds 10% of system memory.\n",
-            "2021-08-26 21:12:09.188681: W tensorflow/core/framework/allocator.cc:124] Allocation of 50331648 exceeds 10% of system memory.\n",
-            "2021-08-26 21:12:09.390475: W tensorflow/core/framework/allocator.cc:124] Allocation of 50331648 exceeds 10% of system memory.\n",
-            "INFO:tensorflow:Running local_init_op.\n",
-            "2021-08-26 21:12:10.193341: I tensorflow/core/kernels/lookup_util.cc:376] Table trying to initialize from file results/vocab.bpe.32000.en is already initialized.\n",
-            "INFO:tensorflow:Done running local_init_op.\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ZeVIs8kI3-_A"
-      },
-      "source": [
-        "## Start translation.\n",
-        "\n",
-        "After the training is done, you can translate custom sentences with the checkpoint saved during the training.\n",
-        "```\n",
-        "echo \"The quick brown fox jumps over the lazy dog\" >file.txt python nmt.py --output_dir=results --mode=translate --translate-file=file.txt cat file.txt.trans\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "q3viIchE3-_B"
-      },
-      "source": [
-        "!echo \"The quick brown fox jumps over the lazy dog\" >file.txt python nmt.py --output_dir=results --mode=translate --translate-file=file.txt cat file.txt.trans"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ds6BgVBd3-_B"
-      },
-      "source": [
-        "## Other command options\n",
-        "To see the full list of available options and their descriptions, use the -h or --help command line option, for example:\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "VvdEteL43-_B",
-        "outputId": "61b1de4f-2149-49d2-9b45-7ee09ffc61df"
-      },
-      "source": [
-        "!python nmt.py --help\n"
-      ],
-      "execution_count": null,
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "2021-06-11 04:01:29.489201: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.11.0\n",
-            "WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.\n",
-            "WARNING:tensorflow:\n",
-            "The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
-            "For more information, please see:\n",
-            "  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
-            "  * https://github.com/tensorflow/addons\n",
-            "  * https://github.com/tensorflow/io (for I/O related ops)\n",
-            "If you depend on functionality not listed there, please file an issue.\n",
-            "\n",
-            "WARNING:tensorflow:From /workspace/gnmt/attention_wrapper.py:554: The name tf.nn.rnn_cell.RNNCell is deprecated. Please use tf.compat.v1.nn.rnn_cell.RNNCell instead.\n",
-            "\n",
-            "WARNING:tensorflow:From /workspace/gnmt/benchmark_hooks.py:24: The name tf.train.SessionRunHook is deprecated. Please use tf.estimator.SessionRunHook instead.\n",
-            "\n",
-            "usage: nmt.py [-h] [--num_units NUM_UNITS] [--num_layers NUM_LAYERS]\n",
-            "              [--num_encoder_layers NUM_ENCODER_LAYERS]\n",
-            "              [--num_decoder_layers NUM_DECODER_LAYERS]\n",
-            "              [--encoder_type ENCODER_TYPE] [--residual [RESIDUAL]]\n",
-            "              [--time_major [TIME_MAJOR]]\n",
-            "              [--num_embeddings_partitions NUM_EMBEDDINGS_PARTITIONS]\n",
-            "              [--attention ATTENTION]\n",
-            "              [--attention_architecture ATTENTION_ARCHITECTURE]\n",
-            "              [--output_attention [OUTPUT_ATTENTION]]\n",
-            "              [--pass_hidden_state [PASS_HIDDEN_STATE]]\n",
-            "              [--optimizer OPTIMIZER] [--learning_rate LEARNING_RATE]\n",
-            "              [--warmup_steps WARMUP_STEPS] [--warmup_scheme WARMUP_SCHEME]\n",
-            "              [--decay_scheme DECAY_SCHEME]\n",
-            "              [--max_train_epochs MAX_TRAIN_EPOCHS]\n",
-            "              [--target_bleu TARGET_BLEU]\n",
-            "              [--colocate_gradients_with_ops [COLOCATE_GRADIENTS_WITH_OPS]]\n",
-            "              [--label_smoothing LABEL_SMOOTHING] [--init_op INIT_OP]\n",
-            "              [--init_weight INIT_WEIGHT] [--src SRC] [--tgt TGT]\n",
-            "              [--data_dir DATA_DIR] [--train_prefix TRAIN_PREFIX]\n",
-            "              [--test_prefix TEST_PREFIX] [--translate_file TRANSLATE_FILE]\n",
-            "              [--output_dir OUTPUT_DIR] [--vocab_prefix VOCAB_PREFIX]\n",
-            "              [--embed_prefix EMBED_PREFIX] [--sos SOS] [--eos EOS]\n",
-            "              [--share_vocab [SHARE_VOCAB]]\n",
-            "              [--check_special_token CHECK_SPECIAL_TOKEN]\n",
-            "              [--src_max_len SRC_MAX_LEN] [--tgt_max_len TGT_MAX_LEN]\n",
-            "              [--src_max_len_infer SRC_MAX_LEN_INFER]\n",
-            "              [--tgt_max_len_infer TGT_MAX_LEN_INFER] [--unit_type UNIT_TYPE]\n",
-            "              [--forget_bias FORGET_BIAS] [--dropout DROPOUT]\n",
-            "              [--max_gradient_norm MAX_GRADIENT_NORM]\n",
-            "              [--batch_size BATCH_SIZE] [--num_buckets NUM_BUCKETS]\n",
-            "              [--subword_option {,bpe,spm}]\n",
-            "              [--use_char_encode USE_CHAR_ENCODE]\n",
-            "              [--save_checkpoints_steps SAVE_CHECKPOINTS_STEPS]\n",
-            "              [--log_step_count_steps LOG_STEP_COUNT_STEPS]\n",
-            "              [--num_gpus NUM_GPUS] [--hparams_path HPARAMS_PATH]\n",
-            "              [--random_seed RANDOM_SEED] [--language_model [LANGUAGE_MODEL]]\n",
-            "              [--ckpt CKPT] [--infer_batch_size INFER_BATCH_SIZE]\n",
-            "              [--detokenizer_file DETOKENIZER_FILE]\n",
-            "              [--tokenizer_file TOKENIZER_FILE]\n",
-            "              [--infer_mode {greedy,beam_search}] [--beam_width BEAM_WIDTH]\n",
-            "              [--length_penalty_weight LENGTH_PENALTY_WEIGHT]\n",
-            "              [--coverage_penalty_weight COVERAGE_PENALTY_WEIGHT]\n",
-            "              [--num_workers NUM_WORKERS] [--amp]\n",
-            "              [--use_fastmath USE_FASTMATH] [--use_fp16 USE_FP16]\n",
-            "              [--fp16_loss_scale FP16_LOSS_SCALE]\n",
-            "              [--enable_auto_loss_scale ENABLE_AUTO_LOSS_SCALE]\n",
-            "              [--fp16_inc_loss_scale_every_n FP16_INC_LOSS_SCALE_EVERY_N]\n",
-            "              [--check_tower_loss_numerics CHECK_TOWER_LOSS_NUMERICS]\n",
-            "              [--use_fp32_batch_matmul USE_FP32_BATCH_MATMUL]\n",
-            "              [--force_inputs_padding FORCE_INPUTS_PADDING]\n",
-            "              [--use_xla USE_XLA] [--xla_compile XLA_COMPILE]\n",
-            "              [--use_autojit_xla USE_AUTOJIT_XLA]\n",
-            "              [--use_pintohost_optimizer USE_PINTOHOST_OPTIMIZER]\n",
-            "              [--use_cudnn_lstm USE_CUDNN_LSTM]\n",
-            "              [--use_loose_bidi_cudnn_lstm USE_LOOSE_BIDI_CUDNN_LSTM]\n",
-            "              [--use_fused_lstm USE_FUSED_LSTM]\n",
-            "              [--use_fused_lstm_dec USE_FUSED_LSTM_DEC]\n",
-            "              [--gpu_indices GPU_INDICES]\n",
-            "              [--parallel_iterations PARALLEL_ITERATIONS]\n",
-            "              [--use_dist_strategy USE_DIST_STRATEGY]\n",
-            "              [--hierarchical_copy HIERARCHICAL_COPY]\n",
-            "              [--network_topology {NetworkTopology.DGX1,NetworkTopology.GCP_V100}]\n",
-            "              [--use_block_lstm USE_BLOCK_LSTM] [--use_defun USE_DEFUN]\n",
-            "              [--gradient_repacking GRADIENT_REPACKING]\n",
-            "              [--compact_gradient_transfer COMPACT_GRADIENT_TRANSFER]\n",
-            "              [--all_reduce_spec ALL_REDUCE_SPEC]\n",
-            "              [--agg_small_grads_max_bytes AGG_SMALL_GRADS_MAX_BYTES]\n",
-            "              [--agg_small_grads_max_group AGG_SMALL_GRADS_MAX_GROUP]\n",
-            "              [--allreduce_merge_scope ALLREDUCE_MERGE_SCOPE]\n",
-            "              [--local_parameter_device LOCAL_PARAMETER_DEVICE]\n",
-            "              [--use_resource_vars USE_RESOURCE_VARS] [--debug DEBUG]\n",
-            "              [--debug_num_train_steps DEBUG_NUM_TRAIN_STEPS]\n",
-            "              [--show_metrics SHOW_METRICS] [--clip_grads CLIP_GRADS]\n",
-            "              [--profile PROFILE] [--profile_save_steps PROFILE_SAVE_STEPS]\n",
-            "              [--use_dynamic_rnn USE_DYNAMIC_RNN]\n",
-            "              [--use_synthetic_data USE_SYNTHETIC_DATA]\n",
-            "              [--mode {train_and_eval,infer,translate}]\n",
-            "\n",
-            "optional arguments:\n",
-            "  -h, --help            show this help message and exit\n",
-            "  --num_units NUM_UNITS\n",
-            "                        Network size.\n",
-            "  --num_layers NUM_LAYERS\n",
-            "                        Network depth.\n",
-            "  --num_encoder_layers NUM_ENCODER_LAYERS\n",
-            "                        Encoder depth, equal to num_layers if None.\n",
-            "  --num_decoder_layers NUM_DECODER_LAYERS\n",
-            "                        Decoder depth, equal to num_layers if None.\n",
-            "  --encoder_type ENCODER_TYPE\n",
-            "                        uni | bi | gnmt. For bi, we build num_encoder_layers/2\n",
-            "                        bi-directional layers. For gnmt, we build 1 bi-\n",
-            "                        directional layer, and (num_encoder_layers - 1) uni-\n",
-            "                        directional layers.\n",
-            "  --residual [RESIDUAL]\n",
-            "                        Whether to add residual connections.\n",
-            "  --time_major [TIME_MAJOR]\n",
-            "                        Whether to use time-major mode for dynamic RNN.\n",
-            "  --num_embeddings_partitions NUM_EMBEDDINGS_PARTITIONS\n",
-            "                        Number of partitions for embedding vars.\n",
-            "  --attention ATTENTION\n",
-            "                        luong | scaled_luong | bahdanau | normed_bahdanau or\n",
-            "                        set to \"\" for no attention\n",
-            "  --attention_architecture ATTENTION_ARCHITECTURE\n",
-            "                        standard | gnmt | gnmt_v2. standard: use top layer to\n",
-            "                        compute attention. gnmt: GNMT style of computing\n",
-            "                        attention, use previous bottom layer to compute\n",
-            "                        attention. gnmt_v2: similar to gnmt, but use current\n",
-            "                        bottom layer to compute attention.\n",
-            "  --output_attention [OUTPUT_ATTENTION]\n",
-            "                        Only used in standard attention_architecture. Whether\n",
-            "                        use attention as the cell output at each timestep. .\n",
-            "  --pass_hidden_state [PASS_HIDDEN_STATE]\n",
-            "                        Whether to pass encoder's hidden state to decoder when\n",
-            "                        using an attention based model.\n",
-            "  --optimizer OPTIMIZER\n",
-            "                        sgd | adam\n",
-            "  --learning_rate LEARNING_RATE\n",
-            "                        Learning rate. Adam: 0.001 | 0.0001\n",
-            "  --warmup_steps WARMUP_STEPS\n",
-            "                        How many steps we inverse-decay learning.\n",
-            "  --warmup_scheme WARMUP_SCHEME\n",
-            "                        How to warmup learning rates. Options include: t2t:\n",
-            "                        Tensor2Tensor's way, start with lr 100 times smaller,\n",
-            "                        then exponentiate until the specified lr.\n",
-            "  --decay_scheme DECAY_SCHEME\n",
-            "                        How we decay learning rate. Options include: luong234:\n",
-            "                        after 2/3 num train steps, we start halving the\n",
-            "                        learning rate for 4 times before finishing. luong5:\n",
-            "                        after 1/2 num train steps, we start halving the\n",
-            "                        learning rate for 5 times before finishing. luong10:\n",
-            "                        after 1/2 num train steps, we start halving the\n",
-            "                        learning rate for 10 times before finishing.\n",
-            "  --max_train_epochs MAX_TRAIN_EPOCHS\n",
-            "                        Max number of epochs.\n",
-            "  --target_bleu TARGET_BLEU\n",
-            "                        Target bleu.\n",
-            "  --colocate_gradients_with_ops [COLOCATE_GRADIENTS_WITH_OPS]\n",
-            "                        Whether try colocating gradients with corresponding op\n",
-            "  --label_smoothing LABEL_SMOOTHING\n",
-            "                        If nonzero, smooth the labels towards 1/num_classes.\n",
-            "  --init_op INIT_OP     uniform | glorot_normal | glorot_uniform\n",
-            "  --init_weight INIT_WEIGHT\n",
-            "                        for uniform init_op, initialize weights between\n",
-            "                        [-this, this].\n",
-            "  --src SRC             Source suffix, e.g., en.\n",
-            "  --tgt TGT             Target suffix, e.g., de.\n",
-            "  --data_dir DATA_DIR   Training/eval data directory.\n",
-            "  --train_prefix TRAIN_PREFIX\n",
-            "                        Train prefix, expect files with src/tgt suffixes.\n",
-            "  --test_prefix TEST_PREFIX\n",
-            "                        Test prefix, expect files with src/tgt suffixes.\n",
-            "  --translate_file TRANSLATE_FILE\n",
-            "                        File to translate, works only with translate mode\n",
-            "  --output_dir OUTPUT_DIR\n",
-            "                        Store log/model files.\n",
-            "  --vocab_prefix VOCAB_PREFIX\n",
-            "                        Vocab prefix, expect files with src/tgt suffixes.\n",
-            "  --embed_prefix EMBED_PREFIX\n",
-            "                        Pretrained embedding prefix, expect files with src/tgt\n",
-            "                        suffixes. The embedding files should be Glove\n",
-            "                        formatted txt files.\n",
-            "  --sos SOS             Start-of-sentence symbol.\n",
-            "  --eos EOS             End-of-sentence symbol.\n",
-            "  --share_vocab [SHARE_VOCAB]\n",
-            "                        Whether to use the source vocab and embeddings for\n",
-            "                        both source and target.\n",
-            "  --check_special_token CHECK_SPECIAL_TOKEN\n",
-            "                        Whether check special sos, eos, unk tokens exist in\n",
-            "                        the vocab files.\n",
-            "  --src_max_len SRC_MAX_LEN\n",
-            "                        Max length of src sequences during training (including\n",
-            "                        EOS).\n",
-            "  --tgt_max_len TGT_MAX_LEN\n",
-            "                        Max length of tgt sequences during training (including\n",
-            "                        BOS).\n",
-            "  --src_max_len_infer SRC_MAX_LEN_INFER\n",
-            "                        Max length of src sequences during inference\n",
-            "                        (including EOS).\n",
-            "  --tgt_max_len_infer TGT_MAX_LEN_INFER\n",
-            "                        Max length of tgt sequences during inference\n",
-            "                        (including BOS). Also use to restrict the maximum\n",
-            "                        decoding length.\n",
-            "  --unit_type UNIT_TYPE\n",
-            "                        lstm | gru | layer_norm_lstm | nas\n",
-            "  --forget_bias FORGET_BIAS\n",
-            "                        Forget bias for BasicLSTMCell.\n",
-            "  --dropout DROPOUT     Dropout rate (not keep_prob)\n",
-            "  --max_gradient_norm MAX_GRADIENT_NORM\n",
-            "                        Clip gradients to this norm.\n",
-            "  --batch_size BATCH_SIZE\n",
-            "                        Total batch size.\n",
-            "  --num_buckets NUM_BUCKETS\n",
-            "                        Put data into similar-length buckets (only for\n",
-            "                        training).\n",
-            "  --subword_option {,bpe,spm}\n",
-            "                        Set to bpe or spm to activate subword desegmentation.\n",
-            "  --use_char_encode USE_CHAR_ENCODE\n",
-            "                        Whether to split each word or bpe into character, and\n",
-            "                        then generate the word-level representation from the\n",
-            "                        character reprentation.\n",
-            "  --save_checkpoints_steps SAVE_CHECKPOINTS_STEPS\n",
-            "                        save_checkpoints_steps\n",
-            "  --log_step_count_steps LOG_STEP_COUNT_STEPS\n",
-            "                        The frequency, in number of global steps, that the\n",
-            "                        global step and the loss will be logged during\n",
-            "                        training\n",
-            "  --num_gpus NUM_GPUS   Number of gpus in each worker.\n",
-            "  --hparams_path HPARAMS_PATH\n",
-            "                        Path to standard hparams json file that\n",
-            "                        overrideshparams values from FLAGS.\n",
-            "  --random_seed RANDOM_SEED\n",
-            "                        Random seed (>0, set a specific seed).\n",
-            "  --language_model [LANGUAGE_MODEL]\n",
-            "                        True to train a language model, ignoring encoder\n",
-            "  --ckpt CKPT           Checkpoint file to load a model for inference.\n",
-            "                        (defaults to newest checkpoint)\n",
-            "  --infer_batch_size INFER_BATCH_SIZE\n",
-            "                        Batch size for inference mode.\n",
-            "  --detokenizer_file DETOKENIZER_FILE\n",
-            "                        Detokenizer script file. Default: DATA_DIR/mosesdecode\n",
-            "                        r/scripts/tokenizer/detokenizer.perl\n",
-            "  --tokenizer_file TOKENIZER_FILE\n",
-            "                        Tokenizer script file. Default:\n",
-            "                        DATA_DIR/mosesdecoder/scripts/tokenizer/tokenizer.perl\n",
-            "  --infer_mode {greedy,beam_search}\n",
-            "                        Which type of decoder to use during inference.\n",
-            "  --beam_width BEAM_WIDTH\n",
-            "                        beam width when using beam search decoder. If 0, use\n",
-            "                        standard decoder with greedy helper.\n",
-            "  --length_penalty_weight LENGTH_PENALTY_WEIGHT\n",
-            "                        Length penalty for beam search.\n",
-            "  --coverage_penalty_weight COVERAGE_PENALTY_WEIGHT\n",
-            "                        Coverage penalty for beam search.\n",
-            "  --num_workers NUM_WORKERS\n",
-            "                        Number of workers (inference only).\n",
-            "  --amp                 use amp for training and inference\n",
-            "  --use_fastmath USE_FASTMATH\n",
-            "                        use_fastmath for training and inference\n",
-            "  --use_fp16 USE_FP16   use_fp16 for training and inference\n",
-            "  --fp16_loss_scale FP16_LOSS_SCALE\n",
-            "                        If fp16 is enabled, the loss is multiplied by this\n",
-            "                        amount right before gradients are computed, then each\n",
-            "                        gradient is divided by this amount. Mathematically,\n",
-            "                        this has no effect, but it helps avoid fp16 underflow.\n",
-            "                        Set to 1 to effectively disable.\n",
-            "  --enable_auto_loss_scale ENABLE_AUTO_LOSS_SCALE\n",
-            "                        If True and use_fp16 is True, automatically adjust the\n",
-            "                        loss scale during training.\n",
-            "  --fp16_inc_loss_scale_every_n FP16_INC_LOSS_SCALE_EVERY_N\n",
-            "                        If fp16 is enabled and enable_auto_loss_scale is True,\n",
-            "                        increase the loss scale every n steps.\n",
-            "  --check_tower_loss_numerics CHECK_TOWER_LOSS_NUMERICS\n",
-            "                        whether to check tower loss numerics\n",
-            "  --use_fp32_batch_matmul USE_FP32_BATCH_MATMUL\n",
-            "                        Whether to use fp32 batch matmul\n",
-            "  --force_inputs_padding FORCE_INPUTS_PADDING\n",
-            "                        Force padding input batch to src_max_len and\n",
-            "                        tgt_max_len\n",
-            "  --use_xla USE_XLA     Use xla to compile a few selected locations, mostly\n",
-            "                        Defuns.\n",
-            "  --xla_compile XLA_COMPILE\n",
-            "                        Use xla.compile() for each tower's fwd and bak pass.\n",
-            "  --use_autojit_xla USE_AUTOJIT_XLA\n",
-            "                        Use auto jit xla.\n",
-            "  --use_pintohost_optimizer USE_PINTOHOST_OPTIMIZER\n",
-            "                        whether to use PinToHost optimizer\n",
-            "  --use_cudnn_lstm USE_CUDNN_LSTM\n",
-            "                        whether to use cudnn_lstm for encoder, non residual\n",
-            "                        layers\n",
-            "  --use_loose_bidi_cudnn_lstm USE_LOOSE_BIDI_CUDNN_LSTM\n",
-            "                        whether to use loose bidi cudnn_lstm\n",
-            "  --use_fused_lstm USE_FUSED_LSTM\n",
-            "                        whether to use fused lstm and variant. If enabled,\n",
-            "                        training will use LSTMBlockFusedCell, infer will use\n",
-            "                        LSTMBlockCell when appropriate.\n",
-            "  --use_fused_lstm_dec USE_FUSED_LSTM_DEC\n",
-            "                        whether to use fused lstm for decoder (training only).\n",
-            "  --gpu_indices GPU_INDICES\n",
-            "                        Indices of worker GPUs in ring order\n",
-            "  --parallel_iterations PARALLEL_ITERATIONS\n",
-            "                        number of parallel iterations in dynamic_rnn\n",
-            "  --use_dist_strategy USE_DIST_STRATEGY\n",
-            "                        whether to use distribution strategy\n",
-            "  --hierarchical_copy HIERARCHICAL_COPY\n",
-            "                        Use hierarchical copies. Currently only optimized for\n",
-            "                        use on a DGX-1 with 8 GPUs and may perform poorly on\n",
-            "                        other hardware. Requires --num_gpus > 1, and only\n",
-            "                        recommended when --num_gpus=8\n",
-            "  --network_topology {NetworkTopology.DGX1,NetworkTopology.GCP_V100}\n",
-            "  --use_block_lstm USE_BLOCK_LSTM\n",
-            "                        whether to use block lstm\n",
-            "  --use_defun USE_DEFUN\n",
-            "                        whether to use Defun\n",
-            "  --gradient_repacking GRADIENT_REPACKING\n",
-            "                        Use gradient repacking. Itcurrently only works with\n",
-            "                        replicated mode. At the end ofof each step, it repacks\n",
-            "                        the gradients for more efficientcross-device\n",
-            "                        transportation. A non-zero value specifiesthe number\n",
-            "                        of split packs that will be formed.\n",
-            "  --compact_gradient_transfer COMPACT_GRADIENT_TRANSFER\n",
-            "                        Compact gradient as much as possible for cross-device\n",
-            "                        transfer and aggregation.\n",
-            "  --all_reduce_spec ALL_REDUCE_SPEC\n",
-            "                        A specification of the all_reduce algorithm to be used\n",
-            "                        for reducing gradients. For more details, see\n",
-            "                        parse_all_reduce_spec in variable_mgr.py. An\n",
-            "                        all_reduce_spec has BNF form: int ::= positive whole\n",
-            "                        number g_int ::= int[KkMGT]? alg_spec ::= alg |\n",
-            "                        alg#int range_spec ::= alg_spec | alg_spec/alg_spec\n",
-            "                        spec ::= range_spec | range_spec:g_int:range_spec\n",
-            "                        NOTE: not all syntactically correct constructs are\n",
-            "                        supported. Examples: \"xring\" == use one global ring\n",
-            "                        reduction for all tensors \"pscpu\" == use CPU at worker\n",
-            "                        0 to reduce all tensors \"nccl\" == use NCCL to locally\n",
-            "                        reduce all tensors. Limited to 1 worker. \"nccl/xring\"\n",
-            "                        == locally (to one worker) reduce values using NCCL\n",
-            "                        then ring reduce across workers. \"pscpu:32k:xring\" ==\n",
-            "                        use pscpu algorithm for tensors of size up to 32kB,\n",
-            "                        then xring for larger tensors.\n",
-            "  --agg_small_grads_max_bytes AGG_SMALL_GRADS_MAX_BYTES\n",
-            "                        If > 0, try to aggregate tensors of less than this\n",
-            "                        number of bytes prior to all-reduce.\n",
-            "  --agg_small_grads_max_group AGG_SMALL_GRADS_MAX_GROUP\n",
-            "                        When aggregating small tensors for all-reduce do not\n",
-            "                        aggregate more than this many into one new tensor.\n",
-            "  --allreduce_merge_scope ALLREDUCE_MERGE_SCOPE\n",
-            "                        Establish a name scope around this many gradients\n",
-            "                        prior to creating the all-reduce operations. It may\n",
-            "                        affect the ability of the backend to merge parallel\n",
-            "                        ops.\n",
-            "  --local_parameter_device LOCAL_PARAMETER_DEVICE\n",
-            "                        Device to use as parameter server: cpu or gpu. For\n",
-            "                        distributed training, it can affect where caching of\n",
-            "                        variables happens.\n",
-            "  --use_resource_vars USE_RESOURCE_VARS\n",
-            "                        Use resource variables instead of normal variables.\n",
-            "                        Resource variables are slower, but this option is\n",
-            "                        useful for debugging their performance.\n",
-            "  --debug DEBUG         Debug train and eval\n",
-            "  --debug_num_train_steps DEBUG_NUM_TRAIN_STEPS\n",
-            "                        Num steps to train.\n",
-            "  --show_metrics SHOW_METRICS\n",
-            "                        whether to show detailed metrics\n",
-            "  --clip_grads CLIP_GRADS\n",
-            "                        whether to clip gradients\n",
-            "  --profile PROFILE     If generate profile\n",
-            "  --profile_save_steps PROFILE_SAVE_STEPS\n",
-            "                        Save timeline every N steps.\n",
-            "  --use_dynamic_rnn USE_DYNAMIC_RNN\n",
-            "  --use_synthetic_data USE_SYNTHETIC_DATA\n",
-            "  --mode {train_and_eval,infer,translate}\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Mz84-D1A3-_C"
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}
\ No newline at end of file