From 56aece7648cc8cc3cd42a9cc02b087eac8e7087c Mon Sep 17 00:00:00 2001
From: topikuning <yellowhat89@gmail.com>
Date: Sat, 19 Dec 2020 22:51:36 +0700
Subject: [PATCH 1/7] tf.train.Optimizer deprecated tf2

edit tf.train.Optimizer to compat
---
 lm/optimization_adafactor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lm/optimization_adafactor.py b/lm/optimization_adafactor.py
index b8d03ed1..954e112f 100644
--- a/lm/optimization_adafactor.py
+++ b/lm/optimization_adafactor.py
@@ -85,7 +85,7 @@ def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu):
     return train_op, train_metrics
 
 
-class AdaFactorOptimizer(tf.train.Optimizer):
+class AdaFactorOptimizer(tf.compat.v1.train.Optimizer):
     """here's the optimizer we'll use"""
 
     def __init__(self,

From 03769dca3f01f7f014dc1fb285c9834a499f04d3 Mon Sep 17 00:00:00 2001
From: topikuning <yellowhat89@gmail.com>
Date: Sat, 19 Dec 2020 22:59:23 +0700
Subject: [PATCH 2/7] tf.gfile change tf2

change to tf.io.gfile
---
 lm/modeling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lm/modeling.py b/lm/modeling.py
index 7e9e5330..9a72789f 100644
--- a/lm/modeling.py
+++ b/lm/modeling.py
@@ -84,7 +84,7 @@ def from_dict(cls, json_object):
     @classmethod
     def from_json_file(cls, json_file):
         """Constructs a `NewsConfig` from a json file of parameters."""
-        with tf.gfile.GFile(json_file, "r") as reader:
+        with tf.io.gfile.GFile(json_file, "r") as reader:
             text = reader.read()
         return cls.from_dict(json.loads(text))
 

From 4c69885c42425eb8d44139f00402401725fe43ac Mon Sep 17 00:00:00 2001
From: topikuning <yellowhat89@gmail.com>
Date: Sat, 19 Dec 2020 23:06:55 +0700
Subject: [PATCH 3/7] Update modeling.py

---
 lm/modeling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lm/modeling.py b/lm/modeling.py
index 9a72789f..0a3e9f04 100644
--- a/lm/modeling.py
+++ b/lm/modeling.py
@@ -722,7 +722,7 @@ def sample_step(tokens, ignore_ids, news_config, batch_size=1, p_for_topp=0.95,
         config=news_config,
         is_training=False,
         input_ids=tokens,
-        reuse=tf.AUTO_REUSE,
+        reuse=tf.compat.v1.AUTO_REUSE,
         scope='newslm',
         chop_off_last_token=False,
         do_cache=True,

From 412fa561d430e8eea302f1bde1b72444f7dc21a6 Mon Sep 17 00:00:00 2001
From: topikuning <yellowhat89@gmail.com>
Date: Sat, 19 Dec 2020 23:08:03 +0700
Subject: [PATCH 4/7] Update modeling.py

---
 lm/modeling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lm/modeling.py b/lm/modeling.py
index 0a3e9f04..ed091db7 100644
--- a/lm/modeling.py
+++ b/lm/modeling.py
@@ -354,7 +354,7 @@ def _top_p_sample(logits, ignore_ids=None, num_samples=1, p=0.9):
 
     # TODO FIGURE OUT HOW TO DO THIS ON TPUS. IT'S HELLA SLOW RIGHT NOW, DUE TO ARGSORT I THINK
     """
-    with tf.variable_scope('top_p_sample'):
+    with tf.compat.v1.variable_scope('top_p_sample'):
         batch_size, vocab_size = get_shape_list(logits, expected_rank=2)
 
         probs = tf.nn.softmax(logits if ignore_ids is None else logits - tf.cast(ignore_ids[None], tf.float32) * 1e10,

From bc095854eefc02ae52321ae0c26220e36ec403c5 Mon Sep 17 00:00:00 2001
From: topikuning <yellowhat89@gmail.com>
Date: Sat, 19 Dec 2020 23:09:57 +0700
Subject: [PATCH 5/7] Update modeling.py

---
 lm/modeling.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lm/modeling.py b/lm/modeling.py
index ed091db7..8e886569 100644
--- a/lm/modeling.py
+++ b/lm/modeling.py
@@ -408,7 +408,7 @@ def _top_k_sample(logits, ignore_ids=None, num_samples=1, k=10):
 
     # TODO FIGURE OUT HOW TO DO THIS ON TPUS. IT'S HELLA SLOW RIGHT NOW, DUE TO ARGSORT I THINK
     """
-    with tf.variable_scope('top_p_sample'):
+    with tf.compat.v1.variable_scope('top_p_sample'):
         batch_size, vocab_size = get_shape_list(logits, expected_rank=2)
 
         probs = tf.nn.softmax(logits if ignore_ids is None else logits - tf.cast(ignore_ids[None], tf.float32) * 1e10,
@@ -487,8 +487,8 @@ def __init__(self,
             assert features_ == (config.hidden_size // config.num_attention_heads)
             caches = tf.unstack(cache, axis=1)
 
-        with tf.variable_scope(scope, default_name='newslm', reuse=reuse):
-            with tf.variable_scope("embeddings"):
+        with tf.compat.v1.variable_scope(scope, default_name='newslm', reuse=reuse):
+            with tf.compat.v1.variable_scope("embeddings"):
                 embeddings, self.embedding_table = embed(self.input_ids, config.vocab_size,
                                                          config.hidden_size,
                                                          position_offset=self.cache_length,
@@ -505,7 +505,7 @@ def __init__(self,
             hidden_state = tf.reshape(embeddings, [self.batch_size * self.seq_length, self.config.hidden_size])
             new_kvs = []
             for layer_idx, layer_cache in enumerate(caches):
-                with tf.variable_scope('layer{:02d}'.format(layer_idx)):
+                with tf.compat.v1.variable_scope('layer{:02d}'.format(layer_idx)):
                     # [batch_size * seq_length, hidden_size]
                     attention_output, new_kv = attention_layer(
                         hidden_state,
@@ -845,7 +845,7 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
             chop_off_last_token=False,
         )
 
-        with tf.variable_scope('classification'):
+        with tf.compat.v1.variable_scope('classification'):
             hidden_state = model.pooled_output(pool_token_id)
             if is_training:
                 hidden_state = dropout(hidden_state, dropout_prob=0.1)

From e9ebab9048048536ac1d38588505e3ed87e45572 Mon Sep 17 00:00:00 2001
From: topikuning <yellowhat89@gmail.com>
Date: Sat, 19 Dec 2020 23:13:41 +0700
Subject: [PATCH 6/7] Update modeling.py

---
 lm/modeling.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/lm/modeling.py b/lm/modeling.py
index 8e886569..ccf69504 100644
--- a/lm/modeling.py
+++ b/lm/modeling.py
@@ -18,7 +18,9 @@
 import math
 
 import six
-import tensorflow as tf
+#import tensorflow as tf
+import tensorflow.compat.v1 as tf
+tf.disable_v2_behavior()
 
 from lm import optimization_adafactor
 from lm.utils import get_assignment_map_from_checkpoint, get_shape_list, get_attention_mask, gelu, layer_norm, dropout, \
@@ -354,7 +356,7 @@ def _top_p_sample(logits, ignore_ids=None, num_samples=1, p=0.9):
 
     # TODO FIGURE OUT HOW TO DO THIS ON TPUS. IT'S HELLA SLOW RIGHT NOW, DUE TO ARGSORT I THINK
     """
-    with tf.compat.v1.variable_scope('top_p_sample'):
+    with tf.variable_scope('top_p_sample'):
         batch_size, vocab_size = get_shape_list(logits, expected_rank=2)
 
         probs = tf.nn.softmax(logits if ignore_ids is None else logits - tf.cast(ignore_ids[None], tf.float32) * 1e10,
@@ -408,7 +410,7 @@ def _top_k_sample(logits, ignore_ids=None, num_samples=1, k=10):
 
     # TODO FIGURE OUT HOW TO DO THIS ON TPUS. IT'S HELLA SLOW RIGHT NOW, DUE TO ARGSORT I THINK
     """
-    with tf.compat.v1.variable_scope('top_p_sample'):
+    with tf.variable_scope('top_p_sample'):
         batch_size, vocab_size = get_shape_list(logits, expected_rank=2)
 
         probs = tf.nn.softmax(logits if ignore_ids is None else logits - tf.cast(ignore_ids[None], tf.float32) * 1e10,
@@ -487,8 +489,8 @@ def __init__(self,
             assert features_ == (config.hidden_size // config.num_attention_heads)
             caches = tf.unstack(cache, axis=1)
 
-        with tf.compat.v1.variable_scope(scope, default_name='newslm', reuse=reuse):
-            with tf.compat.v1.variable_scope("embeddings"):
+        with tf.variable_scope(scope, default_name='newslm', reuse=reuse):
+            with tf.variable_scope("embeddings"):
                 embeddings, self.embedding_table = embed(self.input_ids, config.vocab_size,
                                                          config.hidden_size,
                                                          position_offset=self.cache_length,
@@ -505,7 +507,7 @@ def __init__(self,
             hidden_state = tf.reshape(embeddings, [self.batch_size * self.seq_length, self.config.hidden_size])
             new_kvs = []
             for layer_idx, layer_cache in enumerate(caches):
-                with tf.compat.v1.variable_scope('layer{:02d}'.format(layer_idx)):
+                with tf.variable_scope('layer{:02d}'.format(layer_idx)):
                     # [batch_size * seq_length, hidden_size]
                     attention_output, new_kv = attention_layer(
                         hidden_state,
@@ -722,7 +724,7 @@ def sample_step(tokens, ignore_ids, news_config, batch_size=1, p_for_topp=0.95,
         config=news_config,
         is_training=False,
         input_ids=tokens,
-        reuse=tf.compat.v1.AUTO_REUSE,
+        reuse=tf.AUTO_REUSE,
         scope='newslm',
         chop_off_last_token=False,
         do_cache=True,
@@ -845,7 +847,7 @@ def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
             chop_off_last_token=False,
         )
 
-        with tf.compat.v1.variable_scope('classification'):
+        with tf.variable_scope('classification'):
             hidden_state = model.pooled_output(pool_token_id)
             if is_training:
                 hidden_state = dropout(hidden_state, dropout_prob=0.1)

From 84623fad07f3fb5cdf3b09f019a99c61868e3d1a Mon Sep 17 00:00:00 2001
From: topikuning <yellowhat89@gmail.com>
Date: Sat, 19 Dec 2020 23:14:53 +0700
Subject: [PATCH 7/7] Update utils.py

---
 lm/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lm/utils.py b/lm/utils.py
index aa75c71b..bc971825 100644
--- a/lm/utils.py
+++ b/lm/utils.py
@@ -17,7 +17,9 @@
 import re
 
 import six
-import tensorflow as tf
+#import tensorflow as tf
+import tensorflow.compat.v1 as tf
+tf.disable_v2_behavior()
 import numpy as np
 from tensorflow.python.lib.io import file_io