From db76f8527299c33e0edf90d0d88a75f8ddb47466 Mon Sep 17 00:00:00 2001
From: Joseph Mellor <the.landfill.coding@gmail.com>
Date: Sun, 5 Sep 2021 17:09:21 -0500
Subject: [PATCH 1/6] Added typos for all letters and some punctuation. Also
 updated prompts.

---
 bot/prompt.py |  67 ++++++++++---------
 bot/typos.py  | 180 +++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 206 insertions(+), 41 deletions(-)

diff --git a/bot/prompt.py b/bot/prompt.py
index e0bddf8..6e07f69 100644
--- a/bot/prompt.py
+++ b/bot/prompt.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 import random
 from collections import Counter
-from .typos import add_typos
+from typos import add_typos
 
 def random_select_weighted_list(ls):
     return random.choices([k[1] for k in ls], weights = [k[0] for k in ls], k = 1)[0]
@@ -126,13 +126,13 @@ def random_select_weighted_list(ls):
   (8.0, 'violated'),
   (8.0, 'disregarded'),
   (8.0, 'disobeyed'),
-  (8.0, 'assisted someone in violating'),
-  (8.0, 'assisted someone in breaking'),
-  (8.0, 'assisted someone in disobeying'),
-  (2.0, 'helped someone violate'),
-  (2.0, 'helped someone disobey'),
-  (2.0, 'helped someone break'),
-  (2.0, 'helped violate'),
+  (4.0, 'assisted someone in violating'),
+  (4.0, 'assisted someone in breaking'),
+  (4.0, 'assisted someone in disobeying'),
+  (3.0, 'helped someone violate'),
+  (3.0, 'helped someone disobey'),
+  (3.0, 'helped someone break'),
+  (3.0, 'helped violate'),
   (0.4, 'helped someone get an abortion in violation of'),
   (0.4, 'helped someone have an abortion in violation of'),
   (0.4, 'helped someone get an abortion, violating'),
@@ -209,11 +209,12 @@ def random_select_weighted_list(ls):
 future_time_frames.extend(['next ' + k for k in days_of_the_week])
 future_time_frames.extend(['on ' + k for k in days_of_the_week])
 abortion_ban_words = [
-  'abortion ban', 'ban on abortion', 'new abortion law', 'law on abortion', 'recent abortion law', 'abortion restrictions',
-  'restrictions on abortion', 'law'
+  'abortion ban', 'ban on abortion', 'law on abortion', 'recent abortion law', 'abortion restrictions', 'restrictions on abortion',
 ]
-abortion_ban_words = [*["Texas's " + k for k in abortion_ban_words], *["the " + k for k in abortion_ban_words]]
-abortion_ban_words.extend(['Texas law', 'the new law'])
+abortion_ban_words = [ *["recently passed " + k for k in abortion_ban_words], *[k for k in abortion_ban_words] ]
+abortion_ban_words = [ *["Texas's " + k for k in abortion_ban_words], *["the " + k for k in abortion_ban_words] ]
+abortion_ban_words.extend(['Texas law', 'the new law', 'Texas law on abortion', 'the Texas law on abortion', 'the Texas abortion law',
+  'the new Texas abortion law', 'the recently passed Texas abortion law', 'new abortion law'])
 
 def gen_abortion_prompt_I(accused):
   abortion_prompt = 'I '
@@ -249,7 +250,10 @@ def gen_abortion_prompt_My(accused):
   abortion_prompt += '.'
   return abortion_prompt
 
+counter = 0
+
 def gen_abortion_prompt():
+  global counter
   accused_family_person = random_select_weighted_list(my_family_possessive_adj)
   accused_family_person += random_select_weighted_list(my_family_words)
   accused_nonfamily_person = random_select_weighted_list(my_nonfamily_possessive_adj)
@@ -262,11 +266,14 @@ def gen_abortion_prompt():
     (0.5, accused_teacher)
   ])
   abortion_prompts = [
-    (5.2, gen_abortion_prompt_I(accused)),
-    (2.6, gen_abortion_prompt_My(accused))
+    (1.0, gen_abortion_prompt_I(accused)),
+    (1.0, gen_abortion_prompt_My(accused))
   ]
-  return random_select_weighted_list(abortion_prompts)
-  #return add_typos(random_select_weighted_list(abortion_prompts))
+  #return random_select_weighted_list(abortion_prompts)
+  counter += 1
+  if random.random() < 0.001:
+    print('\r\x1b[K' + str(counter), end='')
+  return add_typos(random_select_weighted_list(abortion_prompts))
 
 bigram_counter = Counter()
 trigram_counter = Counter()
@@ -285,21 +292,21 @@ def check_ngram_frequency(prompt):
     quadgram_counter[cur_quadgram] += 1
   return prompt
 
+def write_ngram_to_file(counter, filename, total):
+    with open(filename, 'w') as writer:
+        for k, v in counter.most_common():
+            writer.write( "{} {}\n".format(k, float(v) / total) )
+            if float(v) / total < 0.001:
+                break
+
 if __name__ == "__main__":
   total_number = 2000000
-  sample_abortion_prompts = { check_ngram_frequency(gen_abortion_prompt()) for k in range(total_number) }
+  sample_abortion_prompts = [ check_ngram_frequency(gen_abortion_prompt()) for k in range(total_number) ]
   for k in sorted(list(sample_abortion_prompts)[:200], key = lambda o: random.random()):
     print(k)
-  print('Duplicates: ' + str(total_number - len(sample_abortion_prompts)))
-  print('Unique:     ' + str(len(sample_abortion_prompts)))
-  print('I [think]:  ' + str(len([k for k in sample_abortion_prompts if 'I' in k])))
-  print('Other:      ' + str(len(sample_abortion_prompts) - len([k for k in sample_abortion_prompts if 'I' in k])))
-  with open('bigram_freq.txt', 'w') as writer:
-    for k,v in  bigram_counter.most_common():
-        writer.write( "{} {}\n".format(k,v) )
-  with open('trigram_freq.txt', 'w') as writer:
-    for k,v in  trigram_counter.most_common():
-        writer.write( "{} {}\n".format(k,v) )
-  with open('quadgram_freq.txt', 'w') as writer:
-    for k,v in  quadgram_counter.most_common():
-        writer.write( "{} {}\n".format(k,v) )
+  unique = len(set(sample_abortion_prompts))
+  print('Duplicates: ' + str(total_number - unique))
+  print('Unique:     ' + str(unique))
+  write_ngram_to_file(bigram_counter, 'bigram_freq.txt', len(sample_abortion_prompts))
+  write_ngram_to_file(trigram_counter, 'trigram_freq.txt', len(sample_abortion_prompts))
+  write_ngram_to_file(quadgram_counter, 'quadgram_freq.txt', len(sample_abortion_prompts))
diff --git a/bot/typos.py b/bot/typos.py
index 2c9c1a5..145071f 100644
--- a/bot/typos.py
+++ b/bot/typos.py
@@ -1,14 +1,19 @@
 #!/usr/bin/env python3
 import random
+import re
 
 def random_select_weighted_list(ls):
     return random.choices([k[1] for k in ls], weights = [k[0] for k in ls], k = 1)[0]
 
 def gen_typo_odds():
-    return 1.0 - (random.betavariate(16, 2) * 0.3 + 0.6995)
+    return 1.0 - (random.betavariate(0.5, 0.15) * 0.3 + 0.699995)
 
 def add_typos(in_str):
     typo_odds = gen_typo_odds()
+    # Outside of English transliterations, the letter 'q' is always followed by
+    # a 'u'. By combining them into one letter, typos are easier to introduce.
+    in_str = re.sub('qu', 'q', in_str)
+    in_str = re.sub("'s", "'", in_str)
     sub_dict = {
         'a': [
             # First element is always 1 - typo_odds and it's always the correct
@@ -16,17 +21,170 @@ def add_typos(in_str):
             # They can also be multiple letters or zero letters.
             (1 - typo_odds, 'a'),
             # Make sure that these add up to 1.0.
-            (0.1 * typo_odds, 's'),
-            (0.2 * typo_odds, 'q'),
-            (0.1 * typo_odds, 'w'),
-            (0.5 * typo_odds, 'e'),
+            (0.2 * typo_odds, 'e'),
             (0.1 * typo_odds, 'i')
         ],
+        'b': [
+            (1 - typo_odds, 'b'),
+            (0.2 * typo_odds, 'p'),
+            (0.1 * typo_odds, 'h'),
+            (0.1 * typo_odds, 'n'),
+        ],
+        'c': [
+            (1 - typo_odds, 'c'),
+            (0.1 * typo_odds, 'ts'),
+            (0.3 * typo_odds, 's'),
+            (0.3 * typo_odds, 'k'),
+        ],
+        'd': [
+            (1 - typo_odds, 'd'),
+            (0.5 * typo_odds, 't'),
+        ],
+        'e': [
+            (1 - typo_odds, 'e'),
+            (0.1 * typo_odds, 'a'),
+            (0.5 * typo_odds, 'i'),
+        ],
+        'f': [
+            (1 - typo_odds, 'f'),
+            (0.5 * typo_odds, 'v'),
+        ],
+        'g': [
+            (1 - typo_odds, 'g'),
+            (0.1 * typo_odds, 'k'),
+        ],
+        'h': [
+            (1 - typo_odds, 'h'),
+            (0.2 * typo_odds, ''),
+        ],
+        'i': [
+            (1 - typo_odds, 'i'),
+            (0.3 * typo_odds, 'e'),
+            (0.1 * typo_odds, 'a'),
+            (0.01 * typo_odds, 'k'),
+        ],
+        'j': [
+            (1 - typo_odds, 'j'),
+            (0.01 * typo_odds, 'ch'),
+        ],
+        'k': [
+            (1 - typo_odds, 'k'),
+            (0.1 * typo_odds, 'g'),
+            (0.4 * typo_odds, 'c')
+        ],
+        'l': [
+            (1 - typo_odds, 'l'),
+            (0.03 * typo_odds, 'r'),
+            (0.03 * typo_odds, 'w'),
+        ],
+        'm': [
+            (1 - typo_odds, 'm'),
+            (0.5 * typo_odds, 'n'),
+        ],
+        'n': [
+            (1 - typo_odds, 'n'),
+            (0.5 * typo_odds, 'n'),
+            (0.2 * typo_odds, 'b')
+        ],
+        'o': [
+            (1 - typo_odds, 'o'),
+            (0.2 * typo_odds, 'u'),
+            (0.1 * typo_odds, 'p')
+        ],
+        'p': [
+            (1 - typo_odds, 'p'),
+            (0.5 * typo_odds, 'b'),
+            (0.2 * typo_odds, 'o'),
+        ],
+        'q': [
+            (1 - typo_odds, 'qu'),
+            (0.8 * typo_odds, 'kw'),
+        ],
+        'r': [
+            (1 - typo_odds, 'r'),
+            (0.3 * typo_odds, 'l'),
+            (0.1 * typo_odds, 't')
+        ],
+        's': [
+            (1 - typo_odds, 's'),
+            (0.1 * typo_odds, 'sh'),
+            (0.2 * typo_odds, 'c'),
+            (0.2 * typo_odds, 'z')
+        ],
+        't': [
+            (1 - typo_odds, 't'),
+            (0.5 * typo_odds, 'd'),
+            (0.3 * typo_odds, 'th'),
+            (0.1 * typo_odds, 'r')
+        ],
+        'u': [
+            (1 - typo_odds, 'u'),
+            (0.01 * typo_odds, 'yu'),
+            (0.2 * typo_odds, 'o'),
+            (0.1 * typo_odds, ''),
+        ],
+        'v': [
+            (1 - typo_odds, 'v'),
+            (typo_odds, 'f')
+        ],
+        'w': [
+            (1 - typo_odds, 'w'),
+            (0.5 * typo_odds, ''),
+        ],
+        'x': [
+            (1 - typo_odds, 'x'),
+            (0.6 * typo_odds, 'ks'),
+            (0.1 * typo_odds, 'z'),
+        ],
+        'y': [
+            (1 - typo_odds, 'y'),
+            (0.5 * typo_odds, 'u'),
+            (0.2 * typo_odds, 'h'),
+            (0.1 * typo_odds, 'j')
+        ],
+        'z': [
+            (1 - typo_odds, 'z'),
+            (0.9 * typo_odds, 's'),
+            (0.05 * typo_odds, 'x')
+        ],
+        ',': [
+            (1 - typo_odds ** 0.5, ','),
+            (0.9 * typo_odds ** 0.5, ''),
+            (0.1 * typo_odds ** 0.5, '.'),
+        ],
+        '.': [
+            (1 - typo_odds ** 0.75, '.'),
+            (0.3 * typo_odds ** 0.75, ''),
+            (0.3 * typo_odds ** 0.75, ','),
+        ],
+        "'": [
+            (1 - typo_odds ** 0.5, "'s"),
+            (0.7 * typo_odds ** 0.5, "s"),
+            (0.3 * typo_odds ** 0.5, "s'")
+        ]
     }
     out_str = ''
-    for k in in_str:
-        if k in sub_dict:
-            out_str += random_select_weighted_list(sub_dict[k])
-        else:
-            out_str += k
-    return out_str
+    words = in_str.split()
+    out_words = []
+    misspelled_words = {}
+    for word in words:
+        prev_char_in = ''
+        prev_char_out = ''
+        out_word = ''
+        if word in misspelled_words and random.random() < 0.5:
+            out_words.append(misspelled_words[word])
+            continue
+        for k in word:
+            if k in sub_dict:
+                if k == prev_char_in:
+                    out_word += prev_char_out
+                else:
+                    prev_char_out = random_select_weighted_list(sub_dict[k])
+                    out_word += prev_char_out
+            else:
+                out_word += k
+            prev_char_in = k
+        if out_word != word:
+            misspelled_words[word] = out_word
+        out_words.append(out_word)
+    return ' '.join(out_words)

From fbb54137ec236d628840692662c32116f2e2fa4b Mon Sep 17 00:00:00 2001
From: Joseph Mellor <the.landfill.coding@gmail.com>
Date: Sun, 5 Sep 2021 18:28:30 -0500
Subject: [PATCH 2/6] Added more variation in the kinds of typos and used
 better probability distributions

---
 bot/typos.py | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/bot/typos.py b/bot/typos.py
index 145071f..949149e 100644
--- a/bot/typos.py
+++ b/bot/typos.py
@@ -10,6 +10,9 @@ def gen_typo_odds():
 
 def add_typos(in_str):
     typo_odds = gen_typo_odds()
+    space_typo_odds = gen_typo_odds() / 1.5
+    lowercase_odds = 0.5 * (gen_typo_odds() ** 0.5)
+    punct_typo_odds = gen_typo_odds() + typo_odds ** 0.5
     # Outside of English transliterations, the letter 'q' is always followed by
     # a 'u'. By combining them into one letter, typos are easier to introduce.
     in_str = re.sub('qu', 'q', in_str)
@@ -148,22 +151,26 @@ def add_typos(in_str):
             (0.05 * typo_odds, 'x')
         ],
         ',': [
-            (1 - typo_odds ** 0.5, ','),
-            (0.9 * typo_odds ** 0.5, ''),
-            (0.1 * typo_odds ** 0.5, '.'),
+            (1 - punct_typo_odds, ','),
+            (0.9 * punct_typo_odds, ''),
+            (0.1 * punct_typo_odds, '.'),
         ],
         '.': [
-            (1 - typo_odds ** 0.75, '.'),
-            (0.3 * typo_odds ** 0.75, ''),
-            (0.3 * typo_odds ** 0.75, ','),
+            (1 - punct_typo_odds, '.'),
+            (0.3 * punct_typo_odds, ''),
+            (0.3 * punct_typo_odds, ','),
         ],
         "'": [
-            (1 - typo_odds ** 0.5, "'s"),
-            (0.7 * typo_odds ** 0.5, "s"),
-            (0.3 * typo_odds ** 0.5, "s'")
-        ]
+            (1 - punct_typo_odds, "'s"),
+            (0.7 * punct_typo_odds, "s"),
+            (0.3 * punct_typo_odds, "s'")
+        ],
     }
-    out_str = ''
+    space_subs = [
+        (1 - space_typo_odds, ' '),
+        (0.9 * space_typo_odds, ''),
+        (0.1 * space_typo_odds, '  ')
+    ]
     words = in_str.split()
     out_words = []
     misspelled_words = {}
@@ -187,4 +194,13 @@ def add_typos(in_str):
         if out_word != word:
             misspelled_words[word] = out_word
         out_words.append(out_word)
-    return ' '.join(out_words)
+    intermediate_str = ' '.join(out_words)
+    if lowercase_odds < 0.1:
+        intermediate_str = intermediate_str.lower()
+    out_str = ''
+    for k in intermediate_str:
+        if k == ' ':
+            out_str += random_select_weighted_list(space_subs)
+        else:
+            out_str += k
+    return out_str

From a5fc892eea7faf2d4c97f3396aa5932bc3323784 Mon Sep 17 00:00:00 2001
From: Joseph Mellor <the.landfill.coding@gmail.com>
Date: Sun, 5 Sep 2021 18:29:13 -0500
Subject: [PATCH 3/6] Fixed local import

---
 bot/prompt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bot/prompt.py b/bot/prompt.py
index 6e07f69..615531c 100644
--- a/bot/prompt.py
+++ b/bot/prompt.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 import random
 from collections import Counter
-from typos import add_typos
+from .typos import add_typos
 
 def random_select_weighted_list(ls):
     return random.choices([k[1] for k in ls], weights = [k[0] for k in ls], k = 1)[0]

From 0028c5645dc4ef4db3db70a7fab44c33db7e7adf Mon Sep 17 00:00:00 2001
From: Joseph Mellor <the.landfill.coding@gmail.com>
Date: Sun, 5 Sep 2021 19:47:45 -0500
Subject: [PATCH 4/6] Added arguments for controlling typo frequencies

---
 bot/typos.py | 390 ++++++++++++++++++++++++++-------------------------
 1 file changed, 197 insertions(+), 193 deletions(-)

diff --git a/bot/typos.py b/bot/typos.py
index 949149e..9e5641e 100644
--- a/bot/typos.py
+++ b/bot/typos.py
@@ -3,204 +3,208 @@
 import re
 
 def random_select_weighted_list(ls):
-    return random.choices([k[1] for k in ls], weights = [k[0] for k in ls], k = 1)[0]
+  return random.choices([k[1] for k in ls], weights = [k[0] for k in ls], k = 1)[0]
 
 def gen_typo_odds():
-    return 1.0 - (random.betavariate(0.5, 0.15) * 0.3 + 0.699995)
+  return 1.0 - (random.betavariate(0.5, 0.15) * 0.3 + 0.699995)
 
-def add_typos(in_str):
+def add_typos(in_str, typo_odds = -1, space_typo_odds = -1, lowercase_odds = -1, punct_typo_odds = -1):
+  if typo_odds < 0:
     typo_odds = gen_typo_odds()
+  if space_typo_odds < 0:
     space_typo_odds = gen_typo_odds() / 1.5
+  if lowercase_odds < 0:
     lowercase_odds = 0.5 * (gen_typo_odds() ** 0.5)
+  if punct_typo_odds < 0:
     punct_typo_odds = gen_typo_odds() + typo_odds ** 0.5
-    # Outside of English transliterations, the letter 'q' is always followed by
-    # a 'u'. By combining them into one letter, typos are easier to introduce.
-    in_str = re.sub('qu', 'q', in_str)
-    in_str = re.sub("'s", "'", in_str)
-    sub_dict = {
-        'a': [
-            # First element is always 1 - typo_odds and it's always the correct
-            # letter. The rest of the elements are incorrect substitutions.
-            # They can also be multiple letters or zero letters.
-            (1 - typo_odds, 'a'),
-            # Make sure that these add up to 1.0.
-            (0.2 * typo_odds, 'e'),
-            (0.1 * typo_odds, 'i')
-        ],
-        'b': [
-            (1 - typo_odds, 'b'),
-            (0.2 * typo_odds, 'p'),
-            (0.1 * typo_odds, 'h'),
-            (0.1 * typo_odds, 'n'),
-        ],
-        'c': [
-            (1 - typo_odds, 'c'),
-            (0.1 * typo_odds, 'ts'),
-            (0.3 * typo_odds, 's'),
-            (0.3 * typo_odds, 'k'),
-        ],
-        'd': [
-            (1 - typo_odds, 'd'),
-            (0.5 * typo_odds, 't'),
-        ],
-        'e': [
-            (1 - typo_odds, 'e'),
-            (0.1 * typo_odds, 'a'),
-            (0.5 * typo_odds, 'i'),
-        ],
-        'f': [
-            (1 - typo_odds, 'f'),
-            (0.5 * typo_odds, 'v'),
-        ],
-        'g': [
-            (1 - typo_odds, 'g'),
-            (0.1 * typo_odds, 'k'),
-        ],
-        'h': [
-            (1 - typo_odds, 'h'),
-            (0.2 * typo_odds, ''),
-        ],
-        'i': [
-            (1 - typo_odds, 'i'),
-            (0.3 * typo_odds, 'e'),
-            (0.1 * typo_odds, 'a'),
-            (0.01 * typo_odds, 'k'),
-        ],
-        'j': [
-            (1 - typo_odds, 'j'),
-            (0.01 * typo_odds, 'ch'),
-        ],
-        'k': [
-            (1 - typo_odds, 'k'),
-            (0.1 * typo_odds, 'g'),
-            (0.4 * typo_odds, 'c')
-        ],
-        'l': [
-            (1 - typo_odds, 'l'),
-            (0.03 * typo_odds, 'r'),
-            (0.03 * typo_odds, 'w'),
-        ],
-        'm': [
-            (1 - typo_odds, 'm'),
-            (0.5 * typo_odds, 'n'),
-        ],
-        'n': [
-            (1 - typo_odds, 'n'),
-            (0.5 * typo_odds, 'n'),
-            (0.2 * typo_odds, 'b')
-        ],
-        'o': [
-            (1 - typo_odds, 'o'),
-            (0.2 * typo_odds, 'u'),
-            (0.1 * typo_odds, 'p')
-        ],
-        'p': [
-            (1 - typo_odds, 'p'),
-            (0.5 * typo_odds, 'b'),
-            (0.2 * typo_odds, 'o'),
-        ],
-        'q': [
-            (1 - typo_odds, 'qu'),
-            (0.8 * typo_odds, 'kw'),
-        ],
-        'r': [
-            (1 - typo_odds, 'r'),
-            (0.3 * typo_odds, 'l'),
-            (0.1 * typo_odds, 't')
-        ],
-        's': [
-            (1 - typo_odds, 's'),
-            (0.1 * typo_odds, 'sh'),
-            (0.2 * typo_odds, 'c'),
-            (0.2 * typo_odds, 'z')
-        ],
-        't': [
-            (1 - typo_odds, 't'),
-            (0.5 * typo_odds, 'd'),
-            (0.3 * typo_odds, 'th'),
-            (0.1 * typo_odds, 'r')
-        ],
-        'u': [
-            (1 - typo_odds, 'u'),
-            (0.01 * typo_odds, 'yu'),
-            (0.2 * typo_odds, 'o'),
-            (0.1 * typo_odds, ''),
-        ],
-        'v': [
-            (1 - typo_odds, 'v'),
-            (typo_odds, 'f')
-        ],
-        'w': [
-            (1 - typo_odds, 'w'),
-            (0.5 * typo_odds, ''),
-        ],
-        'x': [
-            (1 - typo_odds, 'x'),
-            (0.6 * typo_odds, 'ks'),
-            (0.1 * typo_odds, 'z'),
-        ],
-        'y': [
-            (1 - typo_odds, 'y'),
-            (0.5 * typo_odds, 'u'),
-            (0.2 * typo_odds, 'h'),
-            (0.1 * typo_odds, 'j')
-        ],
-        'z': [
-            (1 - typo_odds, 'z'),
-            (0.9 * typo_odds, 's'),
-            (0.05 * typo_odds, 'x')
-        ],
-        ',': [
-            (1 - punct_typo_odds, ','),
-            (0.9 * punct_typo_odds, ''),
-            (0.1 * punct_typo_odds, '.'),
-        ],
-        '.': [
-            (1 - punct_typo_odds, '.'),
-            (0.3 * punct_typo_odds, ''),
-            (0.3 * punct_typo_odds, ','),
-        ],
-        "'": [
-            (1 - punct_typo_odds, "'s"),
-            (0.7 * punct_typo_odds, "s"),
-            (0.3 * punct_typo_odds, "s'")
-        ],
-    }
-    space_subs = [
-        (1 - space_typo_odds, ' '),
-        (0.9 * space_typo_odds, ''),
-        (0.1 * space_typo_odds, '  ')
-    ]
-    words = in_str.split()
-    out_words = []
-    misspelled_words = {}
-    for word in words:
-        prev_char_in = ''
-        prev_char_out = ''
-        out_word = ''
-        if word in misspelled_words and random.random() < 0.5:
-            out_words.append(misspelled_words[word])
-            continue
-        for k in word:
-            if k in sub_dict:
-                if k == prev_char_in:
-                    out_word += prev_char_out
-                else:
-                    prev_char_out = random_select_weighted_list(sub_dict[k])
-                    out_word += prev_char_out
-            else:
-                out_word += k
-            prev_char_in = k
-        if out_word != word:
-            misspelled_words[word] = out_word
-        out_words.append(out_word)
-    intermediate_str = ' '.join(out_words)
-    if lowercase_odds < 0.1:
-        intermediate_str = intermediate_str.lower()
-    out_str = ''
-    for k in intermediate_str:
-        if k == ' ':
-            out_str += random_select_weighted_list(space_subs)
+  # Outside of English transliterations, the letter 'q' is always followed by
+  # a 'u'. By combining them into one letter, typos are easier to introduce.
+  in_str = re.sub('qu', 'q', in_str)
+  in_str = re.sub("'s", "'", in_str)
+  sub_dict = {
+    'a': [
+      # First element is always 1 - typo_odds and it's always the correct
+      # letter. The rest of the elements are incorrect substitutions.
+      # They can also be multiple letters or zero letters.
+      (1 - typo_odds, 'a'),
+      # Make sure that these add up to 1.0.
+      (0.2 * typo_odds, 'e'),
+      (0.1 * typo_odds, 'i')
+    ],
+    'b': [
+      (1 - typo_odds, 'b'),
+      (0.2 * typo_odds, 'p'),
+      (0.1 * typo_odds, 'h'),
+      (0.1 * typo_odds, 'n'),
+    ],
+    'c': [
+      (1 - typo_odds, 'c'),
+      (0.1 * typo_odds, 'ts'),
+      (0.3 * typo_odds, 's'),
+      (0.3 * typo_odds, 'k'),
+    ],
+    'd': [
+      (1 - typo_odds, 'd'),
+      (0.5 * typo_odds, 't'),
+    ],
+    'e': [
+      (1 - typo_odds, 'e'),
+      (0.1 * typo_odds, 'a'),
+      (0.5 * typo_odds, 'i'),
+    ],
+    'f': [
+      (1 - typo_odds, 'f'),
+      (0.5 * typo_odds, 'v'),
+    ],
+    'g': [
+      (1 - typo_odds, 'g'),
+      (0.1 * typo_odds, 'k'),
+    ],
+    'h': [
+      (1 - typo_odds, 'h'),
+      (0.2 * typo_odds, ''),
+    ],
+    'i': [
+      (1 - typo_odds, 'i'),
+      (0.3 * typo_odds, 'e'),
+      (0.1 * typo_odds, 'a'),
+      (0.01 * typo_odds, 'k'),
+    ],
+    'j': [
+      (1 - typo_odds, 'j'),
+      (0.01 * typo_odds, 'ch'),
+    ],
+    'k': [
+      (1 - typo_odds, 'k'),
+      (0.1 * typo_odds, 'g'),
+      (0.4 * typo_odds, 'c')
+    ],
+    'l': [
+      (1 - typo_odds, 'l'),
+      (0.03 * typo_odds, 'r'),
+      (0.03 * typo_odds, 'w'),
+    ],
+    'm': [
+      (1 - typo_odds, 'm'),
+      (0.5 * typo_odds, 'n'),
+    ],
+    'n': [
+      (1 - typo_odds, 'n'),
+      (0.5 * typo_odds, 'n'),
+      (0.2 * typo_odds, 'b')
+    ],
+    'o': [
+      (1 - typo_odds, 'o'),
+      (0.2 * typo_odds, 'u'),
+      (0.1 * typo_odds, 'p')
+    ],
+    'p': [
+      (1 - typo_odds, 'p'),
+      (0.5 * typo_odds, 'b'),
+      (0.2 * typo_odds, 'o'),
+    ],
+    'q': [
+      (1 - typo_odds, 'qu'),
+      (0.8 * typo_odds, 'kw'),
+    ],
+    'r': [
+      (1 - typo_odds, 'r'),
+      (0.3 * typo_odds, 'l'),
+      (0.1 * typo_odds, 't')
+    ],
+    's': [
+      (1 - typo_odds, 's'),
+      (0.1 * typo_odds, 'sh'),
+      (0.2 * typo_odds, 'c'),
+      (0.2 * typo_odds, 'z')
+    ],
+    't': [
+      (1 - typo_odds, 't'),
+      (0.5 * typo_odds, 'd'),
+      (0.3 * typo_odds, 'th'),
+      (0.1 * typo_odds, 'r')
+    ],
+    'u': [
+      (1 - typo_odds, 'u'),
+      (0.01 * typo_odds, 'yu'),
+      (0.2 * typo_odds, 'o'),
+      (0.1 * typo_odds, ''),
+    ],
+    'v': [
+      (1 - typo_odds, 'v'),
+      (typo_odds, 'f')
+    ],
+    'w': [
+      (1 - typo_odds, 'w'),
+      (0.5 * typo_odds, ''),
+    ],
+    'x': [
+      (1 - typo_odds, 'x'),
+      (0.6 * typo_odds, 'ks'),
+      (0.1 * typo_odds, 'z'),
+    ],
+    'y': [
+      (1 - typo_odds, 'y'),
+      (0.5 * typo_odds, 'u'),
+      (0.2 * typo_odds, 'h'),
+      (0.1 * typo_odds, 'j')
+    ],
+    'z': [
+      (1 - typo_odds, 'z'),
+      (0.9 * typo_odds, 's'),
+      (0.05 * typo_odds, 'x')
+    ],
+    ',': [
+      (1 - punct_typo_odds, ','),
+      (0.9 * punct_typo_odds, ''),
+      (0.1 * punct_typo_odds, '.'),
+    ],
+    '.': [
+      (1 - punct_typo_odds, '.'),
+      (0.3 * punct_typo_odds, ''),
+      (0.3 * punct_typo_odds, ','),
+    ],
+    "'": [
+      (1 - punct_typo_odds, "'s"),
+      (0.7 * punct_typo_odds, "s"),
+      (0.3 * punct_typo_odds, "s'")
+    ],
+  }
+  space_subs = [
+    (1 - space_typo_odds, ' '),
+    (0.9 * space_typo_odds, ''),
+    (0.1 * space_typo_odds, '  ')
+  ]
+  words = in_str.split()
+  out_words = []
+  misspelled_words = {}
+  for word in words:
+    prev_char_in = ''
+    prev_char_out = ''
+    out_word = ''
+    if word in misspelled_words and random.random() < 0.5:
+      out_words.append(misspelled_words[word])
+      continue
+    for k in word:
+      if k in sub_dict:
+        if k == prev_char_in:
+          out_word += prev_char_out
         else:
-            out_str += k
-    return out_str
+          prev_char_out = random_select_weighted_list(sub_dict[k])
+          out_word += prev_char_out
+      else:
+        out_word += k
+      prev_char_in = k
+    if out_word != word:
+      misspelled_words[word] = out_word
+    out_words.append(out_word)
+  intermediate_str = ' '.join(out_words)
+  if lowercase_odds < 0.1:
+    intermediate_str = intermediate_str.lower()
+  out_str = ''
+  for k in intermediate_str:
+    if k == ' ':
+      out_str += random_select_weighted_list(space_subs)
+    else:
+      out_str += k
+  return out_str

From 4f62d91fc45cd3b79794a413441c84a59b55f5dc Mon Sep 17 00:00:00 2001
From: Joseph Mellor <the.landfill.coding@gmail.com>
Date: Tue, 7 Sep 2021 16:59:28 -0500
Subject: [PATCH 5/6] Updated typo generation

---
 bot/prompt.py |  1 -
 bot/typos.py  | 85 +++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/bot/prompt.py b/bot/prompt.py
index 51ef28b..2f43b69 100644
--- a/bot/prompt.py
+++ b/bot/prompt.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 import random
 from collections import Counter
-from typos import add_typos
 
 def random_select_weighted_list(ls):
     return random.choices([k[1] for k in ls], weights = [k[0] for k in ls], k = 1)[0]
diff --git a/bot/typos.py b/bot/typos.py
index 9e5641e..107c273 100644
--- a/bot/typos.py
+++ b/bot/typos.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 import random
 import re
+#import seaborn as sb
+#import matplotlib.pyplot as plt
 
 def random_select_weighted_list(ls):
   return random.choices([k[1] for k in ls], weights = [k[0] for k in ls], k = 1)[0]
@@ -8,19 +10,38 @@ def random_select_weighted_list(ls):
 def gen_typo_odds():
   return 1.0 - (random.betavariate(0.5, 0.15) * 0.3 + 0.699995)
 
+#typo_data = []
+#space_data = []
+#punct_data = []
+
 def add_typos(in_str, typo_odds = -1, space_typo_odds = -1, lowercase_odds = -1, punct_typo_odds = -1):
+  #global typo_data
+  #global space_data
+  #global punct_data
   if typo_odds < 0:
-    typo_odds = gen_typo_odds()
+    if random.random() < 0.1:
+      typo_odds = 0.0
+    else:
+      typo_odds = gen_typo_odds()
+  #  typo_data.append(typo_odds)
   if space_typo_odds < 0:
-    space_typo_odds = gen_typo_odds() / 1.5
+    space_typo_odds = gen_typo_odds() / 2.0
+  #  space_data.append(space_typo_odds)
   if lowercase_odds < 0:
-    lowercase_odds = 0.5 * (gen_typo_odds() ** 0.5)
+    lowercase_odds = random.random()
+  uppercase_odds = random.random()
   if punct_typo_odds < 0:
-    punct_typo_odds = gen_typo_odds() + typo_odds ** 0.5
+    punct_typo_odds = (gen_typo_odds() + typo_odds ** 0.5) / 2.0
+  #  punct_data.append(punct_typo_odds)
+  in_str = re.sub('ies ', '\u2605 ', in_str)
+  in_str = re.sub('es ', '\u2604 ', in_str)
+  in_str = re.sub('ie ', '\u2606 ', in_str)
+  in_str = re.sub('ie', '\u2603', in_str)
+  in_str = re.sub('ei', '\u2602', in_str)
   # Outside of English transliterations, the letter 'q' is always followed by
   # a 'u'. By combining them into one letter, typos are easier to introduce.
-  in_str = re.sub('qu', 'q', in_str)
-  in_str = re.sub("'s", "'", in_str)
+  in_str = re.sub('qu', '\u2601', in_str)
+  in_str = re.sub("'s", "\u2600", in_str)
   sub_dict = {
     'a': [
       # First element is always 1 - typo_odds and it's always the correct
@@ -103,9 +124,10 @@ def add_typos(in_str, typo_odds = -1, space_typo_odds = -1, lowercase_odds = -1,
       (0.5 * typo_odds, 'b'),
       (0.2 * typo_odds, 'o'),
     ],
-    'q': [
+    '\u2601': [
       (1 - typo_odds, 'qu'),
-      (0.8 * typo_odds, 'kw'),
+      (0.9 * typo_odds, 'kw'),
+      (0.1 * typo_odds, 'q'),
     ],
     'r': [
       (1 - typo_odds, 'r'),
@@ -114,8 +136,6 @@ def add_typos(in_str, typo_odds = -1, space_typo_odds = -1, lowercase_odds = -1,
     ],
     's': [
       (1 - typo_odds, 's'),
-      (0.1 * typo_odds, 'sh'),
-      (0.2 * typo_odds, 'c'),
       (0.2 * typo_odds, 'z')
     ],
     't': [
@@ -154,6 +174,26 @@ def add_typos(in_str, typo_odds = -1, space_typo_odds = -1, lowercase_odds = -1,
       (0.9 * typo_odds, 's'),
       (0.05 * typo_odds, 'x')
     ],
+    "\u2602": [
+      (1 - typo_odds, 'ei'),
+      (1.0 * typo_odds, 'ie')
+    ],
+    "\u2603": [
+      (1 - typo_odds, 'ie'),
+      (1.0 * typo_odds, 'ei')
+    ],
+    "\u2604": [
+      (1 - typo_odds, 'es'),
+      (1.0 * typo_odds, 's')
+    ],
+    "\u2605": [
+      (1 - typo_odds, 'ies'),
+      (1.0 * typo_odds, 'ys')
+    ],
+    "\u2606": [
+      (1 - typo_odds, 'ie'),
+      (1.0 * typo_odds, 'y')
+    ],
     ',': [
       (1 - punct_typo_odds, ','),
       (0.9 * punct_typo_odds, ''),
@@ -162,13 +202,20 @@ def add_typos(in_str, typo_odds = -1, space_typo_odds = -1, lowercase_odds = -1,
     '.': [
       (1 - punct_typo_odds, '.'),
       (0.3 * punct_typo_odds, ''),
+      (0.1 * punct_typo_odds, ' .'),
+      (0.01 * punct_typo_odds, '>'),
       (0.3 * punct_typo_odds, ','),
     ],
-    "'": [
+    "\u2600": [
       (1 - punct_typo_odds, "'s"),
       (0.7 * punct_typo_odds, "s"),
-      (0.3 * punct_typo_odds, "s'")
+      (0.1 * punct_typo_odds, "s'"),
+      (0.2 * punct_typo_odds, "'")
     ],
+    "'": [
+      (1 - punct_typo_odds, "'"),
+      (1.0 * punct_typo_odds, "")
+    ]
   }
   space_subs = [
     (1 - space_typo_odds, ' '),
@@ -187,7 +234,7 @@ def add_typos(in_str, typo_odds = -1, space_typo_odds = -1, lowercase_odds = -1,
       continue
     for k in word:
       if k in sub_dict:
-        if k == prev_char_in:
+        if k == prev_char_in and random.random() > typo_odds:
           out_word += prev_char_out
         else:
           prev_char_out = random_select_weighted_list(sub_dict[k])
@@ -201,6 +248,8 @@ def add_typos(in_str, typo_odds = -1, space_typo_odds = -1, lowercase_odds = -1,
   intermediate_str = ' '.join(out_words)
   if lowercase_odds < 0.1:
     intermediate_str = intermediate_str.lower()
+  elif uppercase_odds < 0.1:
+    intermediate_str = intermediate_str.upper()
   out_str = ''
   for k in intermediate_str:
     if k == ' ':
@@ -208,3 +257,13 @@ def add_typos(in_str, typo_odds = -1, space_typo_odds = -1, lowercase_odds = -1,
     else:
       out_str += k
   return out_str
+
+#def test_plot():
+#  global typo_data
+#  global space_data
+#  global punct_data
+#  sb.kdeplot(typo_data, label="Typos")
+#  sb.kdeplot(space_data, label="Spaces")
+#  sb.kdeplot(punct_data, label="Punctuation")
+#  plt.legend()
+#  plt.show()

From 9f795609451a19c7747be80dcd8af91adbb2f448 Mon Sep 17 00:00:00 2001
From: Joseph Mellor <the.landfill.coding@gmail.com>
Date: Wed, 8 Sep 2021 19:30:30 -0500
Subject: [PATCH 6/6] Made everything four spaces

---
 bot/prompt.py | 588 +++++++++++++++++++++++++-------------------------
 1 file changed, 294 insertions(+), 294 deletions(-)

diff --git a/bot/prompt.py b/bot/prompt.py
index 2f43b69..c134030 100644
--- a/bot/prompt.py
+++ b/bot/prompt.py
@@ -6,367 +6,367 @@ def random_select_weighted_list(ls):
     return random.choices([k[1] for k in ls], weights = [k[0] for k in ls], k = 1)[0]
 
 suspect_words = [
-  (1.0, 'suspect'),
-  (0.3, 'have reason to suspect'),
-  (1.0, 'believe'),
-  (0.3, 'have reason to believe'),
-  (1.0, 'think'),
-  (0.5, 'have evidence'),
-  (0.2, 'have strong evidence'),
-  (1.0, 'am convinced'),
-  (1.0, 'am certain'),
-  (1.0, 'can prove'),
-  (1.0, 'have proof'),
+    (1.0, 'suspect'),
+    (0.3, 'have reason to suspect'),
+    (1.0, 'believe'),
+    (0.3, 'have reason to believe'),
+    (1.0, 'think'),
+    (0.5, 'have evidence'),
+    (0.2, 'have strong evidence'),
+    (1.0, 'am convinced'),
+    (1.0, 'am certain'),
+    (1.0, 'can prove'),
+    (1.0, 'have proof'),
 ]
 my_family_words = [
-  (0.5, 'father'),
-  (0.5, 'mother'),
-  (1.0, 'brother'),
-  (1.0, 'sister'),
-  (0.5, 'older brother'),
-  (0.5, 'older sister'),
-  (0.5, 'younger brother'),
-  (0.5, 'younger sister'),
-  (1.0, 'cousin'),
-  (2.0, 'aunt'),
-  (0.5, 'uncle'),
-  (0.6, 'daughter'),
-  (0.6, 'son'),
-  (0.2, 'step-son'),
-  (0.1, 'step son'),
-  (0.2, 'step-daughter'),
-  (0.1, 'step daughter'),
-  (0.7, 'nephew'),
-  (0.7, 'niece'),
-  (0.5, 'grandmother'),
-  (0.2, 'grandma'),
-  (0.5, 'grandfather'),
-  (0.2, 'grandpa'),
-  (0.5, 'granddad'),
-  (1.0, 'grandson'),
-  (1.0, 'granddaughter'),
-  (1.0, 'son-in-law'),
-  (1.0, 'daughter-in-law'),
-  (1.0, 'mother-in-law'),
-  (1.0, 'father-in-law'),
-  (0.1, 'half-brother'),
-  (0.1, 'half-sister'),
+    (0.5, 'father'),
+    (0.5, 'mother'),
+    (1.0, 'brother'),
+    (1.0, 'sister'),
+    (0.5, 'older brother'),
+    (0.5, 'older sister'),
+    (0.5, 'younger brother'),
+    (0.5, 'younger sister'),
+    (1.0, 'cousin'),
+    (2.0, 'aunt'),
+    (0.5, 'uncle'),
+    (0.6, 'daughter'),
+    (0.6, 'son'),
+    (0.2, 'step-son'),
+    (0.1, 'step son'),
+    (0.2, 'step-daughter'),
+    (0.1, 'step daughter'),
+    (0.7, 'nephew'),
+    (0.7, 'niece'),
+    (0.5, 'grandmother'),
+    (0.2, 'grandma'),
+    (0.5, 'grandfather'),
+    (0.2, 'grandpa'),
+    (0.5, 'granddad'),
+    (1.0, 'grandson'),
+    (1.0, 'granddaughter'),
+    (1.0, 'son-in-law'),
+    (1.0, 'daughter-in-law'),
+    (1.0, 'mother-in-law'),
+    (1.0, 'father-in-law'),
+    (0.1, 'half-brother'),
+    (0.1, 'half-sister'),
 ]
 subjects = [
-  'science',
-  'math',
-  'history',
-  'social studies',
-  'chemistry',
-  'algebra',
-  'Spanish',
-  'calculus',
-  'art',
-  'music',
-  'gym',
-  'English',
-  'language arts',
-  'composition',
-  'geometry',
-  'statistics',
-  'physics',
-  'earth science',
-  'economics',
-  'geography',
-  'government',
-  'French',
-  'business',
+    'science',
+    'math',
+    'history',
+    'social studies',
+    'chemistry',
+    'algebra',
+    'Spanish',
+    'calculus',
+    'art',
+    'music',
+    'gym',
+    'English',
+    'language arts',
+    'composition',
+    'geometry',
+    'statistics',
+    'physics',
+    'earth science',
+    'economics',
+    'geography',
+    'government',
+    'French',
+    'business',
 ]
 my_teacher_words = [
-  (1.0, 'teacher'),
-  *[(0.2, k + ' teacher') for k in subjects],
-  (0.5, 'tutor'),
-  *[(0.1, k + ' tutor') for k in subjects],
-  (1.0, 'babysitter'),
-  (1.0, 'instructor'),
-  *[(0.2, k + ' instructor') for k in subjects],
-  (0.5, 'professor'),
-  *[(0.1, k + ' professor') for k in subjects],
+    (1.0, 'teacher'),
+    *[(0.2, k + ' teacher') for k in subjects],
+    (0.5, 'tutor'),
+    *[(0.1, k + ' tutor') for k in subjects],
+    (1.0, 'babysitter'),
+    (1.0, 'instructor'),
+    *[(0.2, k + ' instructor') for k in subjects],
+    (0.5, 'professor'),
+    *[(0.1, k + ' professor') for k in subjects],
 ]
 my_nonfamily_words = [
-  (2.0, 'neighbor'),
-  (0.6, 'next-door neighbor'),
-  (1.5, 'boss'),
-  (0.7, 'landlord'),
-  (1.5, 'doctor'),
-  (0.7, 'employee'),
-  (0.5, 'roommate'),
-  (1.0, 'friend'),
-  (1.0, 'girlfriend'),
-  (1.0, 'boyfriend'),
-  (0.3, 'maid'),
-  (0.2, 'live-in maid'),
-  (0.1, 'live in maid'),
-  (0.2, 'housekeeper'),
-  (0.1, 'cleaning lady'),
-  (2.0, 'ex'),
-  (0.3, 'therapist'),
-  (0.5, 'supervisor'),
-  (1.0, 'employer'),
-  (0.2, 'lawyer'),
-  (0.4, 'dentist'),
-  (0.2, 'plumber'),
-  (1.5, 'pastor'),
-  (0.5, 'deacon'),
-  (0.8, 'priest'),
-  (0.2, 'accountant'),
+    (2.0, 'neighbor'),
+    (0.6, 'next-door neighbor'),
+    (1.5, 'boss'),
+    (0.7, 'landlord'),
+    (1.5, 'doctor'),
+    (0.7, 'employee'),
+    (0.5, 'roommate'),
+    (1.0, 'friend'),
+    (1.0, 'girlfriend'),
+    (1.0, 'boyfriend'),
+    (0.3, 'maid'),
+    (0.2, 'live-in maid'),
+    (0.1, 'live in maid'),
+    (0.2, 'housekeeper'),
+    (0.1, 'cleaning lady'),
+    (2.0, 'ex'),
+    (0.3, 'therapist'),
+    (0.5, 'supervisor'),
+    (1.0, 'employer'),
+    (0.2, 'lawyer'),
+    (0.4, 'dentist'),
+    (0.2, 'plumber'),
+    (1.5, 'pastor'),
+    (0.5, 'deacon'),
+    (0.8, 'priest'),
+    (0.2, 'accountant'),
 ]
 my_family_possessive_adj = [(k[0], k[1] + "'s ") for k in my_nonfamily_words]
 my_family_possessive_adj.append((20.0, ''))
 my_nonfamily_possessive_adj = [(k[0], k[1] + "'s ") for k in my_family_words]
 my_nonfamily_possessive_adj.append((20.0, ''))
 my_teacher_possessive_adj = [
-  (0.2, 'younger brother'),
-  (0.1, 'older brother'),
-  (0.2, 'younger sister'),
-  (0.1, 'older sister'),
-  (0.8, 'brother'),
-  (0.4, 'step-brother'),
-  (0.8, 'sister'),
-  (0.4, 'step-sister'),
-  (1.0, 'cousin'),
-  (2.0, 'daughter'),
-  (2.0, 'son'),
-  (0.4, 'step-son'),
-  (0.2, 'step son'),
-  (0.4, 'step-daughter'),
-  (0.2, 'step daughter'),
-  (0.7, 'nephew'),
-  (0.7, 'niece'),
+    (0.2, 'younger brother'),
+    (0.1, 'older brother'),
+    (0.2, 'younger sister'),
+    (0.1, 'older sister'),
+    (0.8, 'brother'),
+    (0.4, 'step-brother'),
+    (0.8, 'sister'),
+    (0.4, 'step-sister'),
+    (1.0, 'cousin'),
+    (2.0, 'daughter'),
+    (2.0, 'son'),
+    (0.4, 'step-son'),
+    (0.2, 'step son'),
+    (0.4, 'step-daughter'),
+    (0.2, 'step daughter'),
+    (0.7, 'nephew'),
+    (0.7, 'niece'),
 ]
 my_teacher_possessive_adj = [ (k[0], k[1] + "'s ") for k in my_teacher_possessive_adj ]
 violated_words = [
-  (8.0, 'violated'),
-  (8.0, 'disregarded'),
-  (8.0, 'disobeyed'),
-  (4.0, 'assisted someone in violating'),
-  (4.0, 'assisted someone in breaking'),
-  (4.0, 'assisted someone in disobeying'),
-  (3.0, 'helped someone violate'),
-  (3.0, 'helped someone disobey'),
-  (3.0, 'helped someone break'),
-  (3.0, 'helped violate'),
-  (3.0, 'helped break'),
-  (3.0, 'helped disobey'),
+    (8.0, 'violated'),
+    (8.0, 'disregarded'),
+    (8.0, 'disobeyed'),
+    (4.0, 'assisted someone in violating'),
+    (4.0, 'assisted someone in breaking'),
+    (4.0, 'assisted someone in disobeying'),
+    (3.0, 'helped someone violate'),
+    (3.0, 'helped someone disobey'),
+    (3.0, 'helped someone break'),
+    (3.0, 'helped violate'),
+    (3.0, 'helped break'),
+    (3.0, 'helped disobey'),
 
-  (0.4, 'helped someone have an abortion, violating'),
-  (0.4, 'helped someone have an abortion, breaking'),
-  (0.4, 'helped someone have an abortion in violation of'),
+    (0.4, 'helped someone have an abortion, violating'),
+    (0.4, 'helped someone have an abortion, breaking'),
+    (0.4, 'helped someone have an abortion in violation of'),
 
-  (0.4, 'helped someone get an abortion, violating'),
-  (0.4, 'helped someone get an abortion, breaking'),
-  (0.4, 'helped someone get an abortion in violation of'),
+    (0.4, 'helped someone get an abortion, violating'),
+    (0.4, 'helped someone get an abortion, breaking'),
+    (0.4, 'helped someone get an abortion in violation of'),
 
-  (0.4, 'helped someone to get an abortion, violating'),
-  (0.4, 'helped someone to get an abortion, breaking'),
-  (0.4, 'helped someone to get an abortion in violation of'),
+    (0.4, 'helped someone to get an abortion, violating'),
+    (0.4, 'helped someone to get an abortion, breaking'),
+    (0.4, 'helped someone to get an abortion in violation of'),
 
-  (0.1, 'helped someone kill her child and violate'),
-  (0.1, 'helped someone kill her baby and violate'),
-  (0.1, 'helped someone kill a child and violate'),
-  (0.1, 'helped someone kill a baby and violate'),
+    (0.1, 'helped someone kill her child and violate'),
+    (0.1, 'helped someone kill her baby and violate'),
+    (0.1, 'helped someone kill a child and violate'),
+    (0.1, 'helped someone kill a baby and violate'),
 
-  (0.1, 'helped someone kill a child and disobey'),
-  (0.1, 'helped someone kill a baby and disobey'),
-  (0.1, 'helped someone kill her child and disobey'),
-  (0.1, 'helped someone kill her baby and disobey'),
+    (0.1, 'helped someone kill a child and disobey'),
+    (0.1, 'helped someone kill a baby and disobey'),
+    (0.1, 'helped someone kill her child and disobey'),
+    (0.1, 'helped someone kill her baby and disobey'),
 
-  (0.4, 'helped someone abort her child and violate'),
-  (0.4, 'helped someone abort her baby and violate'),
-  (0.4, 'helped someone abort her child and disobey'),
-  (0.4, 'helped someone abort her baby and disobey'),
+    (0.4, 'helped someone abort her child and violate'),
+    (0.4, 'helped someone abort her baby and violate'),
+    (0.4, 'helped someone abort her child and disobey'),
+    (0.4, 'helped someone abort her baby and disobey'),
 
-  (0.1, 'helped someone murder her child and violate'),
-  (0.1, 'helped someone murder her baby and violate'),
+    (0.1, 'helped someone murder her child and violate'),
+    (0.1, 'helped someone murder her baby and violate'),
 
-  (0.1, 'helped someone murder a child and disobey'),
-  (0.1, 'helped someone murder a baby and disobey'),
-  (0.1, 'helped someone murder her child and disobey'),
-  (0.1, 'helped someone murder her baby and disobey'),
+    (0.1, 'helped someone murder a child and disobey'),
+    (0.1, 'helped someone murder a baby and disobey'),
+    (0.1, 'helped someone murder her child and disobey'),
+    (0.1, 'helped someone murder her baby and disobey'),
 
-  (0.1, 'helped someone murder her child in violation of'),
-  (0.1, 'helped someone murder her baby in violation of'),
-  (0.1, 'helped someone murder a child in violation of'),
-  (0.1, 'helped someone murder a baby in violation of'),
+    (0.1, 'helped someone murder her child in violation of'),
+    (0.1, 'helped someone murder her baby in violation of'),
+    (0.1, 'helped someone murder a child in violation of'),
+    (0.1, 'helped someone murder a baby in violation of'),
 
-  (0.1, 'helped someone kill her child in violation of'),
-  (0.1, 'helped someone kill her baby in violation of'),
-  (0.1, 'helped someone kill a child in violation of'),
-  (0.1, 'helped someone kill a baby in violation of'),
+    (0.1, 'helped someone kill her child in violation of'),
+    (0.1, 'helped someone kill her baby in violation of'),
+    (0.1, 'helped someone kill a child in violation of'),
+    (0.1, 'helped someone kill a baby in violation of'),
 
-  (0.1, 'helped someone kill her child, violating'),
-  (0.1, 'helped someone kill her baby, violating'),
-  (0.1, 'helped someone kill a child, violating'),
-  (0.1, 'helped someone kill a baby, violating'),
+    (0.1, 'helped someone kill her child, violating'),
+    (0.1, 'helped someone kill her baby, violating'),
+    (0.1, 'helped someone kill a child, violating'),
+    (0.1, 'helped someone kill a baby, violating'),
 
-  (0.1, 'helped someone murder a child, violating'),
-  (0.1, 'helped someone murder a baby, violating'),
-  (0.1, 'helped someone murder her child, violating'),
-  (0.1, 'helped someone murder her baby, violating'),
+    (0.1, 'helped someone murder a child, violating'),
+    (0.1, 'helped someone murder a baby, violating'),
+    (0.1, 'helped someone murder her child, violating'),
+    (0.1, 'helped someone murder her baby, violating'),
 
-  (0.1, 'aided in the killing of a child, violating'),
-  (0.1, 'aided in the killing of a baby, violating'),
-  (0.1, 'aided in the killing of her child, violating'),
-  (0.1, 'aided in the killing of her baby, violating'),
-  (0.1, 'aided her in killing her baby, violating'),
-  (0.1, 'aided her in killing her child, violating'),
+    (0.1, 'aided in the killing of a child, violating'),
+    (0.1, 'aided in the killing of a baby, violating'),
+    (0.1, 'aided in the killing of her child, violating'),
+    (0.1, 'aided in the killing of her baby, violating'),
+    (0.1, 'aided her in killing her baby, violating'),
+    (0.1, 'aided her in killing her child, violating'),
 
-  (0.1, 'aided in the killing of a child, disobeying'),
-  (0.1, 'aided in the killing of a baby, disobeying'),
-  (0.1, 'aided her in killing her baby, disobeying'),
-  (0.1, 'aided her in killing her child, disobeying'),
+    (0.1, 'aided in the killing of a child, disobeying'),
+    (0.1, 'aided in the killing of a baby, disobeying'),
+    (0.1, 'aided her in killing her baby, disobeying'),
+    (0.1, 'aided her in killing her child, disobeying'),
 
-  (0.1, 'aided in the killing of a child, breaking'),
-  (0.1, 'aided in the killing of a baby, breaking'),
-  (0.1, 'aided in the killing of her child, breaking'),
-  (0.1, 'aided in the killing of her baby, breaking'),
-  (0.1, 'aided in the killing of a child, breaking'),
-  (0.1, 'aided in the killing of a baby, breaking'),
+    (0.1, 'aided in the killing of a child, breaking'),
+    (0.1, 'aided in the killing of a baby, breaking'),
+    (0.1, 'aided in the killing of her child, breaking'),
+    (0.1, 'aided in the killing of her baby, breaking'),
+    (0.1, 'aided in the killing of a child, breaking'),
+    (0.1, 'aided in the killing of a baby, breaking'),
 
-  (0.1, 'aided her in killing her baby, breaking'),
-  (0.1, 'aided her in killing her child, breaking'),
-  (0.1, 'aided her in killing her baby, breaking'),
-  (0.1, 'aided her in killing her child, breaking'),
+    (0.1, 'aided her in killing her baby, breaking'),
+    (0.1, 'aided her in killing her child, breaking'),
+    (0.1, 'aided her in killing her baby, breaking'),
+    (0.1, 'aided her in killing her child, breaking'),
 
-  (0.1, 'aided in the killing of a child, in violation of'),
-  (0.1, 'aided in the killing of a baby, in violation of'),
-  (0.1, 'aided in the killing of her child, in violation of'),
-  (0.1, 'aided in the killing of her baby, in violation of'),
-  (0.1, 'aided in the killing of a child, in violation of'),
-  (0.1, 'aided in the killing of a baby, in violation of'),
+    (0.1, 'aided in the killing of a child, in violation of'),
+    (0.1, 'aided in the killing of a baby, in violation of'),
+    (0.1, 'aided in the killing of her child, in violation of'),
+    (0.1, 'aided in the killing of her baby, in violation of'),
+    (0.1, 'aided in the killing of a child, in violation of'),
+    (0.1, 'aided in the killing of a baby, in violation of'),
 
-  (0.1, 'aided her in killing her baby, in violation of'),
-  (0.1, 'aided her in killing her child, in violation of'),
-  (0.1, 'aided her in killing her baby, in violation of'),
-  (0.1, 'aided her in killing her child, in violation of'),
+    (0.1, 'aided her in killing her baby, in violation of'),
+    (0.1, 'aided her in killing her child, in violation of'),
+    (0.1, 'aided her in killing her baby, in violation of'),
+    (0.1, 'aided her in killing her child, in violation of'),
 
-  (0.1, 'aided in the killing of a child and violated'),
-  (0.1, 'aided in the killing of a baby and violated'),
-  (0.1, 'aided in the killing of her child and violated'),
-  (0.1, 'aided in the killing of her baby and violated'),
+    (0.1, 'aided in the killing of a child and violated'),
+    (0.1, 'aided in the killing of a baby and violated'),
+    (0.1, 'aided in the killing of her child and violated'),
+    (0.1, 'aided in the killing of her baby and violated'),
 
-  (0.1, 'aided her in killing her baby and violated'),
-  (0.1, 'aided her in killing her child and violated'),
+    (0.1, 'aided her in killing her baby and violated'),
+    (0.1, 'aided her in killing her child and violated'),
 ]
 days_of_the_week = [
-  'Sunday',
-  'Monday',
-  'Tuesday',
-  'Wednesday',
-  'Thursday',
-  'Friday',
-  'Saturday',
+    'Sunday',
+    'Monday',
+    'Tuesday',
+    'Wednesday',
+    'Thursday',
+    'Friday',
+    'Saturday',
 ]
 got_words = [
-  'got', 'had', 'helped someone get', 'assisted someone in getting', 'helped someone have'
+    'got', 'had', 'helped someone get', 'assisted someone in getting', 'helped someone have'
 ]
 past_time_frames = [
-  'last week', 'last month', 'this week', 'this month', 'yesterday', 'a week ago', 'two weeks ago', 'two days ago', 'on the weekend',
-  'this weekend', 'last weekend'
+    'last week', 'last month', 'this week', 'this month', 'yesterday', 'a week ago', 'two weeks ago', 'two days ago', 'on the weekend',
+    'this weekend', 'last weekend'
 ]
 past_time_frames.extend([ 'last ' + k for k in days_of_the_week ])
 past_time_frames.extend([ 'on ' + k for k in days_of_the_week ])
 will_get_words = [
-  'is getting', 'will get', 'plans on having', 'is trying to get', 'is trying to have', 'will try to get', 'is helping someone get', 'is planning to get', 'is planning on getting',
-  'plans to get'
+    'is getting', 'will get', 'plans on having', 'is trying to get', 'is trying to have', 'will try to get', 'is helping someone get', 'is planning to get', 'is planning on getting',
+    'plans to get'
 ]
 past_time_frames.extend(['last ' + k for k in days_of_the_week])
 past_time_frames.extend(['on ' + k for k in days_of_the_week])
 will_get_words = ['is getting', 'will get', 'plans on having', 'is trying to get', 'is trying to have', 'will try to get']
 future_time_frames = [
-  'next week', 'this week', 'tomorrow', 'two days from now', 'a week from now', 'after she leaves work', 'after work', 'on the weekend',
-  'this weekend', 'next weekend'
+    'next week', 'this week', 'tomorrow', 'two days from now', 'a week from now', 'after she leaves work', 'after work', 'on the weekend',
+    'this weekend', 'next weekend'
 ]
 future_time_frames.extend(['next ' + k for k in days_of_the_week])
 future_time_frames.extend(['on ' + k for k in days_of_the_week])
 abortion_ban_words = [
-  'abortion ban', 'ban on abortion', 'law on abortion', 'recent abortion law', 'abortion restrictions', 'restrictions on abortion',
+    'abortion ban', 'ban on abortion', 'law on abortion', 'recent abortion law', 'abortion restrictions', 'restrictions on abortion',
 ]
 abortion_ban_words = [ *["recently passed " + k for k in abortion_ban_words], *[k for k in abortion_ban_words] ]
 abortion_ban_words = [ *["Texas's " + k for k in abortion_ban_words], *["the " + k for k in abortion_ban_words] ]
 abortion_ban_words.extend(['Texas law', 'the new law', 'Texas law on abortion', 'the Texas law on abortion', 'the Texas abortion law',
-  'the new Texas abortion law', 'the recently passed Texas abortion law', 'new abortion law'])
+    'the new Texas abortion law', 'the recently passed Texas abortion law', 'new abortion law'])
 
 def gen_abortion_prompt_I(accused):
-  abortion_prompt = 'I '
-  abortion_prompt += random_select_weighted_list(suspect_words)
-  abortion_prompt += random.choices([' that', ''], weights=[0.75, 0.25], k=1)[0]
-  abortion_prompt += ' my '
-  abortion_prompt += accused
-  abortion_prompt += random.choice([' has ', ' '])
-  abortion_prompt += random_select_weighted_list(violated_words)
-  abortion_prompt += ' '
-  abortion_prompt += random.choice(abortion_ban_words)
-  abortion_prompt += '.'
-  return abortion_prompt
+    abortion_prompt = 'I '
+    abortion_prompt += random_select_weighted_list(suspect_words)
+    abortion_prompt += random.choices([' that', ''], weights=[0.75, 0.25], k=1)[0]
+    abortion_prompt += ' my '
+    abortion_prompt += accused
+    abortion_prompt += random.choice([' has ', ' '])
+    abortion_prompt += random_select_weighted_list(violated_words)
+    abortion_prompt += ' '
+    abortion_prompt += random.choice(abortion_ban_words)
+    abortion_prompt += '.'
+    return abortion_prompt
 
 def gen_abortion_prompt_My(accused):
-  abortion_prompt = 'My '
-  abortion_prompt += accused
-  abortion_prompt += ' '
-  past = random.random() > 0.5
-  if past:
-    abortion_prompt += random.choice(got_words)
-  else:
-    abortion_prompt += random.choice(will_get_words)
-  abortion_prompt += ' an'
-  abortion_prompt += random.choices(['', ' illegal', ' unlawful'], weights=[0.625, 0.375 / 2.0, 0.375 / 2.0], k=1)[0]
-  abortion_prompt += ' abortion'
-  if random.random() > 0.5:
+    abortion_prompt = 'My '
+    abortion_prompt += accused
     abortion_prompt += ' '
+    past = random.random() > 0.5
     if past:
-      abortion_prompt += random.choice(past_time_frames)
+        abortion_prompt += random.choice(got_words)
     else:
-      abortion_prompt += random.choice(future_time_frames)
-  abortion_prompt += '.'
-  return abortion_prompt
+        abortion_prompt += random.choice(will_get_words)
+    abortion_prompt += ' an'
+    abortion_prompt += random.choices(['', ' illegal', ' unlawful'], weights=[0.625, 0.375 / 2.0, 0.375 / 2.0], k=1)[0]
+    abortion_prompt += ' abortion'
+    if random.random() > 0.5:
+        abortion_prompt += ' '
+        if past:
+            abortion_prompt += random.choice(past_time_frames)
+        else:
+            abortion_prompt += random.choice(future_time_frames)
+    abortion_prompt += '.'
+    return abortion_prompt
 
 counter = 0
 
 def gen_abortion_prompt():
-  global counter
-  accused_family_person = random_select_weighted_list(my_family_possessive_adj)
-  accused_family_person += random_select_weighted_list(my_family_words)
-  accused_nonfamily_person = random_select_weighted_list(my_nonfamily_possessive_adj)
-  accused_nonfamily_person += random_select_weighted_list(my_nonfamily_words)
-  accused_teacher = random_select_weighted_list(my_teacher_possessive_adj)
-  accused_teacher += random_select_weighted_list(my_teacher_words)
-  accused = random_select_weighted_list([
-    (1.0, accused_family_person),
-    (1.5, accused_nonfamily_person),
-    (0.5, accused_teacher)
-  ])
-  abortion_prompts = [
-    (1.0, gen_abortion_prompt_I(accused)),
-    (1.0, gen_abortion_prompt_My(accused))
-  ]
-  counter += 1
-  if random.random() < 0.001:
-    print('\r\x1b[K' + str(counter), end='')
-  return random_select_weighted_list(abortion_prompts)
-  #return add_typos(random_select_weighted_list(abortion_prompts))
+    global counter
+    accused_family_person = random_select_weighted_list(my_family_possessive_adj)
+    accused_family_person += random_select_weighted_list(my_family_words)
+    accused_nonfamily_person = random_select_weighted_list(my_nonfamily_possessive_adj)
+    accused_nonfamily_person += random_select_weighted_list(my_nonfamily_words)
+    accused_teacher = random_select_weighted_list(my_teacher_possessive_adj)
+    accused_teacher += random_select_weighted_list(my_teacher_words)
+    accused = random_select_weighted_list([
+        (1.0, accused_family_person),
+        (1.5, accused_nonfamily_person),
+        (0.5, accused_teacher)
+    ])
+    abortion_prompts = [
+        (1.0, gen_abortion_prompt_I(accused)),
+        (1.0, gen_abortion_prompt_My(accused))
+    ]
+    counter += 1
+    if random.random() < 0.001:
+        print('\r\x1b[K' + str(counter), end='')
+    return random_select_weighted_list(abortion_prompts)
+    #return add_typos(random_select_weighted_list(abortion_prompts))
 
 bigram_counter = Counter()
 trigram_counter = Counter()
 quadgram_counter = Counter()
 
 def check_ngram_frequency(prompt):
-  words = prompt.split()
-  for i in range(len(words) - 1):
-    cur_bigram = ' '.join(words[i:i+2])
-    bigram_counter[cur_bigram] += 1
-  for i in range(len(words) - 2):
-    cur_trigram = ' '.join(words[i:i+3])
-    trigram_counter[cur_trigram] += 1
-  for i in range(len(words) - 3):
-    cur_quadgram = ' '.join(words[i:i+4])
-    quadgram_counter[cur_quadgram] += 1
-  return prompt
+    words = prompt.split()
+    for i in range(len(words) - 1):
+        cur_bigram = ' '.join(words[i:i+2])
+        bigram_counter[cur_bigram] += 1
+    for i in range(len(words) - 2):
+        cur_trigram = ' '.join(words[i:i+3])
+        trigram_counter[cur_trigram] += 1
+    for i in range(len(words) - 3):
+        cur_quadgram = ' '.join(words[i:i+4])
+        quadgram_counter[cur_quadgram] += 1
+    return prompt
 
 def write_ngram_to_file(counter, filename, total):
     with open(filename, 'w') as writer:
@@ -376,13 +376,13 @@ def write_ngram_to_file(counter, filename, total):
                 break
 
 if __name__ == "__main__":
-  total_number = 2000000
-  sample_abortion_prompts = [ check_ngram_frequency(gen_abortion_prompt()) for k in range(total_number) ]
-  for k in sorted(list(sample_abortion_prompts)[:200], key = lambda o: random.random()):
-    print(k)
-  unique = len(set(sample_abortion_prompts))
-  print('Duplicates: ' + str(total_number - unique))
-  print('Unique:     ' + str(unique))
-  write_ngram_to_file(bigram_counter, 'bigram_freq.txt', len(sample_abortion_prompts))
-  write_ngram_to_file(trigram_counter, 'trigram_freq.txt', len(sample_abortion_prompts))
-  write_ngram_to_file(quadgram_counter, 'quadgram_freq.txt', len(sample_abortion_prompts))
+    total_number = 2000000
+    sample_abortion_prompts = [ check_ngram_frequency(gen_abortion_prompt()) for k in range(total_number) ]
+    for k in sorted(list(sample_abortion_prompts)[:200], key = lambda o: random.random()):
+        print(k)
+    unique = len(set(sample_abortion_prompts))
+    print('Duplicates: ' + str(total_number - unique))
+    print('Unique:     ' + str(unique))
+    write_ngram_to_file(bigram_counter, 'bigram_freq.txt', len(sample_abortion_prompts))
+    write_ngram_to_file(trigram_counter, 'trigram_freq.txt', len(sample_abortion_prompts))
+    write_ngram_to_file(quadgram_counter, 'quadgram_freq.txt', len(sample_abortion_prompts))