diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d3716aa6..3710b631 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,5 +1,13 @@
 name: main
-on: [push]
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
 jobs:
   build-and-test:
     strategy:
@@ -18,9 +26,8 @@ jobs:
 
       - name: conda env
         run: |
-          wget -O Mambaforge.sh  "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
-          curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
-          bash Mambaforge.sh -b -p "${HOME}/conda"
+          wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+          bash Miniforge3.sh -b -p "${HOME}/conda"
           source "${HOME}/conda/etc/profile.d/conda.sh"
           source "${HOME}/conda/etc/profile.d/mamba.sh"
           which conda
@@ -102,7 +109,7 @@ jobs:
 
       - name: push artifact
         if: ${{ (matrix.python-version == 3.9) }}
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: doc
           path: /tmp/docs
diff --git a/.gitignore b/.gitignore
index 29a95722..335fa300 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+env/
 *.swo
 *gfffeature.so
 *.swp
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 2f9adefe..309a689a 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -117,10 +117,10 @@ Integration with other tools
     :toctree: autodocs
     :nosignatures:
 
-    gffutils.biopython_integration.to_seqfeature
-    gffutils.biopython_integration.from_seqfeature
-    gffutils.pybedtools_integration.tsses
-    gffutils.pybedtools_integration.to_bedtool
+    biopython_integration.to_seqfeature
+    biopython_integration.from_seqfeature
+    pybedtools_integration.tsses
+    pybedtools_integration.to_bedtool
 
 
 
@@ -131,10 +131,10 @@ Utilities
     :toctree: autodocs
     :nosignatures:
 
-    gffutils.helpers.asinterval
-    gffutils.helpers.merge_attributes
-    gffutils.helpers.sanitize_gff_db
-    gffutils.helpers.annotate_gff_db
-    gffutils.helpers.infer_dialect
-    gffutils.helpers.example_filename
-    gffutils.inspect.inspect
+    helpers.asinterval
+    helpers.merge_attributes
+    helpers.sanitize_gff_db
+    helpers.annotate_gff_db
+    helpers.infer_dialect
+    helpers.example_filename
+    inspect.inspect
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
index f1f7545b..8ec34b22 100644
--- a/doc/source/changelog.rst
+++ b/doc/source/changelog.rst
@@ -3,6 +3,35 @@
 Change log
 ==========
 
+
+v0.14
+-----
+
+- If a value contained a semicolon there would be unexpected behavior (reported
+  in `#212 <https://github.com/daler/gffutils/issues/212>`__). This is solved
+  by adding a new entry to the dialect, ``semicolon in quotes```, and running
+  the necessary regular expression only -- thanks to @DevangThakkar for the
+  fix.
+- Refactored the attributes parsing to make it clearer to follow along, and
+  added more tests. The refactoring fixed some subtle bugs on corner cases:
+  - Previously, for features with repeated keys, the ``order`` key of dialects
+    would list the repeated keys each time which could result in undetermined
+    behavior. The ``order`` key is now unique and only the first occurrence of
+    a repeated key will be added to the order.
+  - Previously, the ``ensembl_gtf.txt`` example file had a leading *space* in
+    front of the attributes. This looks to be an error in the creation of the
+    example file in the first place, but had previously parsed fine. Now the
+    parser (correctly) mis-handles it. Since I'm unaware of any cases in the
+    wild that have a leading space, I actually consider the new parsing to be
+    more correct.
+  - Added tests to directly inspect the inferred dialects for the test cases.
+- CI, testing, and docs infrastructure updates (miniforge instead of
+  mambaforge; GitHub Action version bumps; skip biopython test if it's not
+  installed; reduce build errors for docs)
+- Fix `#224 <https://github.com/daler/gffutils/issues/224>`__), which was cause
+  by changes to the ``argh`` package used for the command-line tool.
+
+
 v0.13
 -----
 
diff --git a/doc/source/conf.py b/doc/source/conf.py
index c65c4a28..2b85647c 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -53,5 +53,3 @@
 templates_path = ['_templates']
 exclude_patterns = []
 html_theme = 'sphinx_rtd_theme'
-html_static_path = ['_static']
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
diff --git a/doc/source/dialect.rst b/doc/source/dialect.rst
index b02d7c6a..aed018b9 100644
--- a/doc/source/dialect.rst
+++ b/doc/source/dialect.rst
@@ -38,7 +38,8 @@ A GTF dialect might look like this::
      'multival separator': ',',
      'quoted GFF2 values': True,
      'repeated keys': False,
-     'trailing semicolon': True}
+     'trailing semicolon': True,
+     'semicolon_in_quotes': False}
 
 In contrast, a GFF dialect might look like this::
 
@@ -49,7 +50,9 @@ In contrast, a GFF dialect might look like this::
      'multival separator': ',',
      'quoted GFF2 values': False,
      'repeated keys': False,
-     'trailing semicolon': False}
+     'trailing semicolon': False,
+     'semicolon_in_quotes': False}
+
 
 As other real-world files are brought to the attention of the developers, it's
 likely that more entries will be added to the dialect.
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
index 6b631236..54d8e45a 100644
--- a/doc/source/examples.rst
+++ b/doc/source/examples.rst
@@ -235,7 +235,7 @@ data upon import into the database:
 ...     return x
 
 
-Now we can supply this tranform function to :func:`create_db`:
+Now we can supply this transform function to :func:`create_db`:
 
 >>> fn = gffutils.example_filename('ensembl_gtf.txt')
 >>> db = gffutils.create_db(fn, ":memory:",
@@ -643,8 +643,8 @@ attributes to have the same format.  To help with this, we can use the
 >>> dialect = helpers.infer_dialect(
 ... 'Transcript "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138" ; CDS "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138"',
 ... )
->>> print(dialect)
-{'leading semicolon': False, 'trailing semicolon': False, 'quoted GFF2 values': True, 'field separator': ' ; ', 'keyval separator': ' ', 'multival separator': ',', 'fmt': 'gtf', 'repeated keys': True, 'order': ['Transcript', 'WormPep', 'Note', 'Prediction_status', 'Gene', 'CDS', 'WormPep', 'Note', 'Prediction_status', 'Gene']}
+>>> print({k: v for k, v in sorted(dialect.items())})
+{'field separator': ' ; ', 'fmt': 'gtf', 'keyval separator': ' ', 'leading semicolon': False, 'multival separator': ',', 'order': ['Transcript', 'WormPep', 'Note', 'Prediction_status', 'Gene', 'CDS'], 'quoted GFF2 values': True, 'repeated keys': True, 'semicolon in quotes': False, 'trailing semicolon': False}
 
 >>> db.dialect = dialect
 
diff --git a/gffutils/constants.py b/gffutils/constants.py
index 901e7146..2543e64d 100644
--- a/gffutils/constants.py
+++ b/gffutils/constants.py
@@ -127,6 +127,12 @@
     # vs
     #   ID=001; Name=gene1
     "field separator": ";",
+    # Sometimes there are semicolons inside quotes that break things, e.g.,
+    #
+    #   note "Evidence 1a: Function1, Function2"
+    # vs
+    #   note "Evidence 1a: Function; PubMedId: 123, 456"
+    "semicolon in quotes": False,
     # Usually "=" for GFF3; " " for GTF, e.g.,
     #
     #   gene_id "GENE1"
diff --git a/gffutils/interface.py b/gffutils/interface.py
index 9216cd39..ee9ec743 100644
--- a/gffutils/interface.py
+++ b/gffutils/interface.py
@@ -1285,7 +1285,7 @@ def create_introns(
 
             with open('tmp.gtf', 'w') as fout:
                 for intron in db.create_introns(**intron_kwargs):
-                    fout.write(str(intron) + "\n")
+                    fout.write(str(intron) + "\\n")
             db.update(gffutils.DataIterator('tmp.gtf'), **create_kwargs)
 
         """
diff --git a/gffutils/parser.py b/gffutils/parser.py
index 058423ad..f18a269a 100644
--- a/gffutils/parser.py
+++ b/gffutils/parser.py
@@ -1,9 +1,8 @@
 # Portions copied over from BCBio.GFF.GFFParser
 
 import re
-import copy
 import collections
-import urllib
+from urllib import parse
 from gffutils import constants
 from gffutils.exceptions import AttributeStringError
 
@@ -16,8 +15,31 @@
 ch.setFormatter(formatter)
 logger.addHandler(ch)
 
+# Compile regexs up front
 gff3_kw_pat = re.compile(r"\w+=")
 
+# Regex for each separator that will be tested
+quoted_semicolon_patterns = dict()
+
+for sep in (" ; ", "; ", ";"):
+    quoted_semicolon_patterns[sep] = re.compile(
+        rf"""
+            {re.escape(sep)}   # The separator we're considering (escaped for VERBOSE mode)
+            (?=                # Positive lookahead: does remaining content match?
+                (?:            # Start non-capturing group
+                    [^"]       # Either: match any character that is NOT a quote
+                    |          # OR
+                    "[^"]*"    # Match a complete quoted string, specifically:
+                               #   - opening quote ", followed by
+                               #   - zero or more non-quote characters [^"]*
+                               #   - followed by closing quote "
+                )*             # Repeat the above pattern zero or more times
+                $              # Until we reach the end of the string
+            )                  # End of lookahead
+        """,
+        re.VERBOSE,
+    )
+
 # Encoding/decoding notes
 # -----------------------
 # From
@@ -50,9 +72,9 @@
 #
 # See also issue #98.
 #
-# Note that spaces are NOT encoded. Some GFF files have spaces encoded; in
-# these cases round-trip invariance will not hold since the %20 will be decoded
-# but not re-encoded.
+# Note that spaces are NOT supposed to be encoded. Yet some GFF files have
+# spaces encoded anyway; in these cases round-trip invariance will not hold
+# since the %20 will be decoded but not re-encoded.
 _to_quote = "\n\t\r%;=&,"
 _to_quote += "".join([chr(i) for i in range(32)])
 _to_quote += chr(127)
@@ -74,6 +96,208 @@ def __missing__(self, b):
 quoter = Quoter()
 
 
+def _split_keyvals(keyval_str, dialect=None):
+    """
+    Dialect detection requires partially parsing the attributes.
+    """
+    from gffutils import feature
+
+    quals = feature.dict_class()
+
+    if not keyval_str:
+        return quals, dialect
+
+    infer_dialect = False
+    if dialect is None:
+        infer_dialect = True
+        dialect = {}
+
+    # No known cases yet of different multival separator
+    dialect["multival separator"] = ","
+
+    # Detection for these dialect fields can work on the full attribute
+    # string. Other detection needs to wait until we've further parsed the
+    # attributes.
+    if infer_dialect:
+        dialect["trailing semicolon"] = keyval_str[-1] == ";"
+        dialect["leading semicolon"] = keyval_str[0] == ";"
+        semicolon_in_quotes = False
+        sep = None
+        for sep in (" ; ", "; ", ";"):
+            parts = keyval_str.split(sep)
+            if len(parts) > 1:
+                # If naive split differs from more expensive regex, we infer there was
+                # a semicolon within quoted value and we'll have to use the expensive
+                # method later
+                parts_regex = re.split(quoted_semicolon_patterns[sep], keyval_str)
+                if parts != parts_regex:
+                    semicolon_in_quotes = True
+                break
+        dialect["semicolon in quotes"] = semicolon_in_quotes
+        dialect["field separator"] = sep
+
+    if dialect["trailing semicolon"]:
+        keyval_str = keyval_str.rstrip(";")
+
+    if dialect["leading semicolon"]:
+        keyval_str = keyval_str.lstrip(";")
+
+    if dialect["semicolon in quotes"]:
+        parts = re.split(
+            quoted_semicolon_patterns[dialect["field separator"]], keyval_str
+        )
+    else:
+        parts = keyval_str.split(dialect["field separator"])
+
+    # The next stage of dialect inference works on the 'parts' like:
+    #
+    #    parts = ["ID=001", "Name=gene1"]
+    #
+    if infer_dialect:
+        dialect["fmt"] = "gff3"
+        if gff3_kw_pat.match(parts[0]):
+            dialect["fmt"] = "gff3"
+            dialect["keyval separator"] = "="
+        else:
+            dialect["fmt"] = "gtf"
+            dialect["keyval separator"] = " "
+
+    # Now we split
+    #
+    #    parts = ["ID=001", "Name=gene1"]
+    #
+    # into
+    #
+    #   key_val_tuples = [("ID", "001"), ("Name", "gene1")]
+    #
+    # in a dialect-dependent manner.
+    kvsep = dialect["keyval separator"]
+    key_val_tuples = [p.split(kvsep) for p in parts]
+
+    # With the split keys we can detect whether any are repeated
+    if infer_dialect:
+        keys = [i[0] for i in key_val_tuples]
+        dialect["repeated keys"] = len(keys) != len(set(keys))
+
+    # For dialect detection, this will help figure out if there is inconsistent
+    # quoting across values.
+    quoted_values = []
+
+    for i in key_val_tuples:
+
+        if len(i) == 2:
+            # Easy, on-spec case
+            key, val = i
+
+        elif len(i) == 1:
+            # By convention, no value becomes an empty string, e.g.:
+            #
+            #     attributes = "ID=001;is_gene;"
+            key = i[0]
+            val = ""
+
+        else:
+            # Multiple spaces within quoted values are joined back together without
+            # requiring a regex (like we need when there's *field* separator like
+            # a semicolon in the values)
+            #
+            # That is:
+            #
+            #   attributes = 'gene_description "an important gene"; gene_id "g001"'
+            #
+            # becomes
+            #
+            #   key_val_tuples = [("gene_description", "an", "important", "gene"), ("gene_id", "g001")]
+            #
+            # so here that first key/val pair will become:
+            #
+            #   key = "gene_description"
+            #   val = "an important gene"
+            #
+            # Another pathological case, this time for GFF3:
+            #
+            #   Alias=SGN-M1347;ID=T0028;Note=marker name(s): T0028 SGN-M1347 |identity=99.58|escore=2e-126
+            #                                                                          ^            ^
+            key = i[0]
+            val = kvsep.join(i[1:])
+
+        # By convention all values are lists, even if there's only one value
+        # (or even no values)
+        if key not in quals:
+            quals[key] = []
+
+        # This will run on every value.
+        if infer_dialect:
+            quoted = len(val) > 0 and val[0] == '"' and val[-1] == '"'
+            quoted_values.append(quoted)
+            dialect["quoted GFF2 values"] = quoted
+
+        if dialect["quoted GFF2 values"] and val:
+            val = val.strip('"')
+
+        if val:
+            # For repeated keys dialect, don't split on an internal comma. That is,
+            #
+            #   attributes = 'db_xref="g01,g02"; db_xref="XYZ"'
+            #
+            # becomes:
+            #
+            #   {
+            #     "db_xref": ["g01,g02", "XYZ"]
+            #    }
+            #
+            if dialect.get("repeated keys"):
+                quals[key].append(val)
+
+            # Otherwise, split but only if it's a comma without a space. So:
+            #
+            #    attributes = 'db_xref="g01,g02"'
+            #
+            # becomes
+            #    {
+            #      "db_xref": ["g01", "g02"]
+            #    }
+            # but
+            #
+            #    attributes = 'description="kinase, subunit 1"'
+            #                                      ^ note the space here
+            # becomes
+            #    {
+            #      "description": ["kinase, subunit 1"]
+            #    }
+            #
+            else:
+                # E.g. the "kinase, subunit 1" example above
+                if ", " in val:
+                    quals[key].append(val)
+                else:
+                    quals[key].extend(val.split(","))
+
+    # If there was inconsistent quoting, we fall back to "not quoted" so
+    # as to avoid incorrectly stripping off first and last quotes.
+    if infer_dialect and len(set(quoted_values)) > 1:
+        # Prior behavior was to use whatever the first value used
+        dialect["quoted GFF2 values"] = quoted_values[0]
+
+        # Though there could be an argument for considering quotes in mixed
+        # cases to be part of the string, though technically they should be
+        # %-encoded if so.
+        # dialect["quoted GFF2 values"] = False
+
+    # Handle unquoting of %-encoded values
+    if not constants.ignore_url_escape_characters and dialect["fmt"] == "gff3":
+        for key, vals in quals.items():
+            unquoted = [parse.unquote(v) for v in vals]
+            quals[key] = unquoted
+
+    # Now that we're not supporting old Python versions we can rely on dict
+    # insertion order
+    if infer_dialect:
+        dialect["order"] = list(quals.keys())
+
+    return quals, dialect
+
+
 def _reconstruct(keyvals, dialect, keep_order=False, sort_attribute_values=False):
     """
     Reconstructs the original attributes string according to the dialect.
@@ -156,6 +380,20 @@ def sort_key(x):
                 part = key
         else:
             if dialect["fmt"] == "gtf":
+                # By convention, GTF attributes with no value are reconstructed
+                # with an empty string. E.g.:
+                #   'gene_id "gene1"; is_gene;'
+                #
+                # becomes
+                #
+                #   {
+                #     "gene_id": "gene1",
+                #     "is_gene": ""
+                #    }
+                #
+                # and is printed as:
+                #
+                #   'gene_id "gene1"; is_gene "";'
                 part = dialect["keyval separator"].join([key, '""'])
             else:
                 part = key
@@ -169,207 +407,3 @@ def sort_key(x):
         parts_str += ";"
 
     return parts_str
-
-
-# TODO:
-# Cythonize -- profiling shows that the bulk of the time is spent on this
-# function...
-def _split_keyvals(keyval_str, dialect=None):
-    """
-    Given the string attributes field of a GFF-like line, split it into an
-    attributes dictionary and a "dialect" dictionary which contains information
-    needed to reconstruct the original string.
-
-    Lots of logic here to handle all the corner cases.
-
-    If `dialect` is None, then do all the logic to infer a dialect from this
-    attribute string.
-
-    Otherwise, use the provided dialect (and return it at the end).
-    """
-
-    def _unquote_quals(quals, dialect):
-        """
-        Handles the unquoting (decoding) of percent-encoded characters.
-
-        See notes on encoding/decoding above.
-        """
-        if not constants.ignore_url_escape_characters and dialect["fmt"] == "gff3":
-            for key, vals in quals.items():
-                unquoted = [urllib.parse.unquote(v) for v in vals]
-                quals[key] = unquoted
-        return quals
-
-    infer_dialect = False
-    if dialect is None:
-        # Make a copy of default dialect so it can be modified as needed
-        dialect = copy.copy(constants.dialect)
-        infer_dialect = True
-    from gffutils import feature
-
-    quals = feature.dict_class()
-    if not keyval_str:
-        return quals, dialect
-
-    # If a dialect was provided, then use that directly.
-    if not infer_dialect:
-        if dialect["trailing semicolon"]:
-            keyval_str = keyval_str.rstrip(";")
-
-        parts = keyval_str.split(dialect["field separator"])
-
-        kvsep = dialect["keyval separator"]
-        if dialect["leading semicolon"]:
-            pieces = []
-            for p in parts:
-                if p and p[0] == ";":
-                    p = p[1:]
-                pieces.append(p.strip().split(kvsep))
-                key_vals = [(p[0], " ".join(p[1:])) for p in pieces]
-
-        if dialect["fmt"] == "gff3":
-            key_vals = [p.split(kvsep) for p in parts]
-        else:
-            leadingsemicolon = dialect["leading semicolon"]
-            pieces = []
-            for i, p in enumerate(parts):
-                if i == 0 and leadingsemicolon:
-                    p = p[1:]
-                pieces.append(p.strip().split(kvsep))
-                key_vals = [(p[0], " ".join(p[1:])) for p in pieces]
-
-        quoted = dialect["quoted GFF2 values"]
-        for item in key_vals:
-            # Easy if it follows spec
-            if len(item) == 2:
-                key, val = item
-
-            # Only key provided?
-            elif len(item) == 1:
-                key = item[0]
-                val = ""
-
-            else:
-                key = item[0]
-                val = dialect["keyval separator"].join(item[1:])
-
-            try:
-                quals[key]
-            except KeyError:
-                quals[key] = []
-
-            if quoted:
-                if len(val) > 0 and val[0] == '"' and val[-1] == '"':
-                    val = val[1:-1]
-
-            if val:
-                # TODO: if there are extra commas for a value, just use empty
-                # strings
-                # quals[key].extend([v for v in val.split(',') if v])
-                vals = val.split(",")
-                quals[key].extend(vals)
-
-        quals = _unquote_quals(quals, dialect)
-        return quals, dialect
-
-    # If we got here, then we need to infer the dialect....
-    #
-    # Reset the order to an empty list so that it will only be populated with
-    # keys that are found in the file.
-    dialect["order"] = []
-
-    # ensembl GTF has trailing semicolon
-    if keyval_str[-1] == ";":
-        keyval_str = keyval_str[:-1]
-        dialect["trailing semicolon"] = True
-
-    # GFF2/GTF has a semicolon with at least one space after it.
-    # Spaces can be on both sides (e.g. wormbase)
-    # GFF3 works with no spaces.
-    # So split on the first one we can recognize...
-    for sep in (" ; ", "; ", ";"):
-        parts = keyval_str.split(sep)
-        if len(parts) > 1:
-            dialect["field separator"] = sep
-            break
-
-    # Is it GFF3?  They have key-vals separated by "="
-    if gff3_kw_pat.match(parts[0]):
-        key_vals = [p.split("=") for p in parts]
-        dialect["fmt"] = "gff3"
-        dialect["keyval separator"] = "="
-
-    # Otherwise, key-vals separated by space.  Key is first item.
-    else:
-        dialect["keyval separator"] = " "
-        pieces = []
-        for p in parts:
-            # Fix misplaced semicolons in keys in some GFF2 files
-            if p and p[0] == ";":
-                p = p[1:]
-                dialect["leading semicolon"] = True
-            pieces.append(p.strip().split(" "))
-        key_vals = [(p[0], " ".join(p[1:])) for p in pieces]
-
-    for item in key_vals:
-
-        # Easy if it follows spec
-        if len(item) == 2:
-            key, val = item
-
-        # Only key provided?
-        elif len(item) == 1:
-            key = item[0]
-            val = ""
-
-        # Pathological cases where values of a key have within them the key-val
-        # separator, e.g.,
-        #  Alias=SGN-M1347;ID=T0028;Note=marker name(s): T0028 SGN-M1347 |identity=99.58|escore=2e-126
-        #                                                                         ^            ^
-        else:
-            key = item[0]
-            val = dialect["keyval separator"].join(item[1:])
-
-        # Is the key already in there?
-        if key in quals:
-            dialect["repeated keys"] = True
-        else:
-            quals[key] = []
-
-        # Remove quotes in GFF2
-        if len(val) > 0 and val[0] == '"' and val[-1] == '"':
-            val = val[1:-1]
-            dialect["quoted GFF2 values"] = True
-        if val:
-
-            # TODO: if there are extra commas for a value, just use empty
-            # strings
-            # quals[key].extend([v for v in val.split(',') if v])
-
-            # See issue #198, where commas within a description can incorrectly
-            # cause the dialect inference to conclude that there are not
-            # repeated keys.
-            #
-            # More description in PR #208.
-            if dialect["repeated keys"]:
-                quals[key].append(val)
-            else:
-                vals = val.split(",")
-
-                # If anything starts with a leading space, then we infer that
-                # it was part of a description or some other typographical
-                # interpretation, not a character to split multiple vals on --
-                # and append the original val rather than the split vals.
-                if any([i[0] == " " for i in vals if i]):
-                    quals[key].append(val)
-                else:
-                    quals[key].extend(vals)
-
-        # keep track of the order of keys
-        dialect["order"].append(key)
-
-    if (dialect["keyval separator"] == " ") and (dialect["quoted GFF2 values"]):
-        dialect["fmt"] = "gtf"
-
-    quals = _unquote_quals(quals, dialect)
-    return quals, dialect
diff --git a/gffutils/pybedtools_integration.py b/gffutils/pybedtools_integration.py
index 5c5c2b90..e01e4911 100644
--- a/gffutils/pybedtools_integration.py
+++ b/gffutils/pybedtools_integration.py
@@ -113,7 +113,7 @@ def tsses(
     if they overlap (as in the first two):
 
 
-    >>> print(tsses(db))                        # doctest: +NORMALIZE_WHITESPACE
+    >>> print(gffutils.pybedtools_integration.tsses(db))                        # doctest: +NORMALIZE_WHITESPACE
     chr2L	gffutils_derived	transcript_TSS	7529	7529	.	+	.	gene_id "FBgn0031208"; transcript_id "FBtr0300689";
     chr2L	gffutils_derived	transcript_TSS	7529	7529	.	+	.	gene_id "FBgn0031208"; transcript_id "FBtr0300690";
     chr2L	gffutils_derived	transcript_TSS	11000	11000	.	-	.	gene_id "Fk_gene_1"; transcript_id "transcript_Fk_gene_1";
@@ -124,7 +124,7 @@ def tsses(
     Default merging, showing the first two TSSes merged and reported as
     a single unique TSS for the gene. Note the conversion to BED:
 
-    >>> x = tsses(db, merge_overlapping=True)
+    >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True)
     >>> print(x)  # doctest: +NORMALIZE_WHITESPACE
     chr2L	7528	7529	FBgn0031208	.	+
     chr2L	10999	11000	Fk_gene_1	.	-
@@ -135,7 +135,7 @@ def tsses(
     be easier to parse than the original GTF or GFF file. With no merging
     specified, we must add `as_bed6=True` to see the names in BED format.
 
-    >>> x = tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True)
+    >>> x = gffutils.pybedtools_integration.tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True)
     >>> print(x)  # doctest: +NORMALIZE_WHITESPACE
     chr2L	7528	7529	FBgn0031208:FBtr0300689	.	+
     chr2L	7528	7529	FBgn0031208:FBtr0300690	.	+
@@ -145,7 +145,7 @@ def tsses(
 
     Use a 3kb merge distance so the last 2 features are merged together:
 
-    >>> x = tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000))
+    >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000))
     >>> print(x)  # doctest: +NORMALIZE_WHITESPACE
     chr2L	7528	7529	FBgn0031208	.	+
     chr2L	10999	12500	Fk_gene_1,Fk_gene_2	.	-
@@ -154,7 +154,7 @@ def tsses(
 
     The set of unique TSSes for each gene, +1kb upstream and 500bp downstream:
 
-    >>> x = tsses(db, merge_overlapping=True)
+    >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True)
     >>> x = x.slop(l=1000, r=500, s=True, genome='dm3')
     >>> print(x)  # doctest: +NORMALIZE_WHITESPACE
     chr2L	6528	8029	FBgn0031208	.	+
diff --git a/gffutils/scripts/gffutils-cli b/gffutils/scripts/gffutils-cli
index 051b76d5..70a882b5 100755
--- a/gffutils/scripts/gffutils-cli
+++ b/gffutils/scripts/gffutils-cli
@@ -76,7 +76,7 @@ def fetch(db, ids):
      (like grep -v)''')
 @arg('--exclude-self', help='''Use this to suppress reporting the IDs you've
      provided.''')
-def children(db, ids, limit=None, exclude=None, exclude_self=False):
+def children(db, ids, *, limit=None, exclude=None, exclude_self=False):
     """
     Fetch children from the database according to ID.
     """
@@ -110,7 +110,7 @@ def children(db, ids, limit=None, exclude=None, exclude_self=False):
      (like grep -v)''')
 @arg('--exclude-self', help='''Use this to suppress reporting the IDs you've
      provided.''')
-def parents(db, ids, limit=None, exclude=None, exclude_self=False):
+def parents(db, ids, *, limit=None, exclude=None, exclude_self=False):
     """
     Fetch parents from the database according to ID.
     """
@@ -167,7 +167,7 @@ def common(db):
 @arg('--disable-infer-transcripts', help='''Disable inferring of transcript
      extents for GTF files. Use this if your GTF file already has "transcript"
      featuretypes''')
-def create(filename, output=None, force=False, quiet=False, merge="merge",
+def create(filename, *, output=None, force=False, quiet=False, merge="merge",
            disable_infer_genes=False, disable_infer_transcripts=False):
     """
     Create a database.
@@ -198,7 +198,7 @@ def clean(filename):
 @arg('--in-place',
      help='''Sanitize file in-place: overwrites current file with sanitized
      version.''')
-def sanitize(filename,
+def sanitize(filename, *,
              in_memory=True,
              in_place=False):
     """
@@ -225,7 +225,7 @@ def sanitize(filename,
 @arg('filename', help='''GFF or GTF file to use.''')
 @arg('--in-place', help='''Remove duplicates in place (overwrite current
      file.)''')
-def rmdups(filename, in_place=False):
+def rmdups(filename, *, in_place=False):
     """
     Remove duplicates from a GFF file.
     """
@@ -278,7 +278,7 @@ def convert(filename):
 @arg('--featuretype', help='''Restrict to a particular featuretype.  This can
      be faster than doing a grep on the output, since it restricts the search
      space in the database''')
-def search(db, text, featuretype=None):
+def search(db, text, *, featuretype=None):
     """
     Search the attributes.
     """
diff --git a/gffutils/test/attr_test_cases.py b/gffutils/test/attr_test_cases.py
index b08afbe3..9ee1de96 100644
--- a/gffutils/test/attr_test_cases.py
+++ b/gffutils/test/attr_test_cases.py
@@ -15,36 +15,130 @@
 
 
 """
+
 attrs = [
+    dict(
+        str="ID=001;Name=gene1",
+        attrs={
+            "ID": ["001"],
+            "Name": ["gene1"],
+        },
+        ok=None,
+        dialect_mods={"order": ["ID", "Name"]},
+    ),
+    dict(
+        str="ID=001;Name=gene1;",
+        attrs={
+            "ID": ["001"],
+            "Name": ["gene1"],
+        },
+        ok=None,
+        dialect_mods={"trailing semicolon": True, "order": ["ID", "Name"]},
+    ),
+    dict(
+        str="ID=001; Name=gene1;",
+        attrs={
+            "ID": ["001"],
+            "Name": ["gene1"],
+        },
+        ok=None,
+        dialect_mods={
+            "trailing semicolon": True,
+            "field separator": "; ",
+            "order": ["ID", "Name"],
+        },
+    ),
+    dict(
+        str='ID="001"',
+        attrs={"ID": ["001"]},
+        ok=None,
+        dialect_mods={
+            "quoted GFF2 values": True,
+            "order": ["ID"],
+        },
+    ),
+    dict(
+        str='ID="001"; Name="gene1"; types="a,b,c"',
+        attrs={"ID": ["001"], "Name": ["gene1"], "types": ["a", "b", "c"]},
+        ok=None,
+        dialect_mods={
+            "quoted GFF2 values": True,
+            "field separator": "; ",
+            "order": ["ID", "Name", "types"],
+        },
+    ),
+    dict(
+        str='ID="001"; Name="gene1"; types="a"; types="b"; types="c"',
+        attrs={"ID": ["001"], "Name": ["gene1"], "types": ["a", "b", "c"]},
+        ok=None,
+        dialect_mods={
+            "quoted GFF2 values": True,
+            "field separator": "; ",
+            "repeated keys": True,
+            "order": ["ID", "Name", "types"],
+        },
+    ),
+    dict(
+        str="Name=gene1;ID=001",
+        attrs={"Name": ["gene1"], "ID": ["001"]},
+        ok=None,
+        dialect_mods={"order": ["Name", "ID"]},
+    ),
+    dict(
+        str='gene_id "001";gene_name "gene1"',
+        attrs={"gene_id": ["001"], "gene_name": ["gene1"]},
+        ok=None,
+        dialect_mods={
+            "fmt": "gtf",
+            "quoted GFF2 values": True,
+            "keyval separator": " ",
+            "order": ["gene_id", "gene_name"],
+        },
+    ),
     # c_elegans_WS199_shortened_gff.txt
-    (
-        "count=1;gene=amx-2;sequence=SAGE:ggcagagtcttttggca;" "transcript=B0019.1",
-        {
+    dict(
+        str="count=1;gene=amx-2;sequence=SAGE:ggcagagtcttttggca;transcript=B0019.1",
+        attrs={
             "count": ["1"],
             "gene": ["amx-2"],
             "sequence": ["SAGE:ggcagagtcttttggca"],
             "transcript": ["B0019.1"],
         },
-        None,
+        ok=None,
+        dialect_mods={"order": ["count", "gene", "sequence", "transcript"]},
     ),
     # ensembl_gtf.txt
-    (
-        'gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; '
-        'gene_name "Y74C9A.6"; transcript_name "NR_001477.2";',
-        {
+    dict(
+        str=(
+            'gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; gene_name "Y74C9A.6"; transcript_name "NR_001477.2";'
+        ),
+        attrs={
             "gene_id": ["Y74C9A.6"],
             "transcript_id": ["Y74C9A.6"],
             "exon_number": ["1"],
             "gene_name": ["Y74C9A.6"],
             "transcript_name": ["NR_001477.2"],
         },
-        None,
+        ok=None,
+        dialect_mods={
+            "trailing semicolon": True,
+            "fmt": "gtf",
+            "keyval separator": " ",
+            "field separator": "; ",
+            "quoted GFF2 values": True,
+            "order": [
+                "gene_id",
+                "transcript_id",
+                "exon_number",
+                "gene_name",
+                "transcript_name",
+            ],
+        },
     ),
     # F3-unique-3.v2.gff
-    (
-        "g=A3233312322232122211;i=1;p=1.000;q=23,12,18,17,10,24,19,14,27,9,23"
-        ",9,16,20,11,7,8,4,4,14;u=0,0,0,1",
-        {
+    dict(
+        str="g=A3233312322232122211;i=1;p=1.000;q=23,12,18,17,10,24,19,14,27,9,23,9,16,20,11,7,8,4,4,14;u=0,0,0,1",
+        attrs={
             "g": ["A3233312322232122211"],
             "i": ["1"],
             "p": ["1.000"],
@@ -72,20 +166,27 @@
             ],
             "u": ["0", "0", "0", "1"],
         },
-        None,
+        ok=None,
+        dialect_mods={"order": ["g", "i", "p", "q", "u"]},
     ),
     # glimmer_nokeyval.gff3
-    (
-        "ID=GL0000006;Name=GL0000006;Lack 3'-end;",
-        {"ID": ["GL0000006"], "Name": ["GL0000006"], "Lack 3'-end": []},
-        None,
+    dict(
+        str="ID=GL0000006;Name=GL0000006;Lack 3'-end;",
+        attrs={"ID": ["GL0000006"], "Name": ["GL0000006"], "Lack 3'-end": []},
+        ok=None,
+        dialect_mods={
+            "order": ["ID", "Name", "Lack 3'-end"],
+            "trailing semicolon": True,
+        },
     ),
     # hybrid1.gff3
-    (
-        "ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:"
-        "A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:"
-        "SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1",
-        {
+    dict(
+        str=(
+            "ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:"
+            "A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:"
+            "SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1"
+        ),
+        attrs={
             "ID": ["A00469"],
             "Dbxref": [
                 "AFFX-U133:205840_x_at",
@@ -99,9 +200,10 @@
             "Note": ["growth hormone 1"],
             "Alias": ["GH1"],
         },
-        "ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:"
+        ok="ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:"
         "A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:"
         "SOMA_HUMAN;Note=growth hormone 1;Alias=GH1",
+        dialect_mods={"order": ["ID", "Dbxref", "Note", "Alias"]},
     ),
     # jgi_gff2.txt
     #
@@ -109,19 +211,30 @@
     # quoted but string values are.  Only way to make this be invariant is to
     # keep track of the "flavor" of each attribute; not sure it's worth the
     # effort / processing time.
-    (
-        'name "fgenesh1_pg.C_chr_1000007"; transcriptId 873',
-        {"name": ["fgenesh1_pg.C_chr_1000007"], "transcriptId": ["873"]},
-        'name "fgenesh1_pg.C_chr_1000007"; transcriptId "873"',
+    dict(
+        str='name "fgenesh1_pg.C_chr_1000007"; transcriptId 873',
+        attrs={"name": ["fgenesh1_pg.C_chr_1000007"], "transcriptId": ["873"]},
+        ok='name "fgenesh1_pg.C_chr_1000007"; transcriptId "873"',
+        dialect_mods={
+            "order": ["name", "transcriptId"],
+            "quoted GFF2 values": True,
+            "keyval separator": " ",
+            "fmt": "gtf",
+            "field separator": "; ",
+        },
     ),
     # mouse_extra_comma.gff3: extra comma line
     #
     # Note extra empty string in the dictionary's "Parent" field.
     #
-    (
-        "Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,",
-        {"Name": ["CDS:NC_000083.5:LOC100040603"], "Parent": ["XM_001475631.1", ""]},
-        None,
+    dict(
+        str="Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,",
+        attrs={
+            "Name": ["CDS:NC_000083.5:LOC100040603"],
+            "Parent": ["XM_001475631.1", ""],
+        },
+        ok=None,
+        dialect_mods={"order": ["Name", "Parent"]},
     ),
     # mouse_extra_comma.gff3
     #
@@ -135,20 +248,23 @@
     #
     # In both cases, the dictionary entry is simply an empty list; it's just in
     # the reconstruction where things get tricky.
-    (
-        "ID=;Parent=XM_001475631.1",
-        {"ID": [], "Parent": ["XM_001475631.1"]},
-        "ID;Parent=XM_001475631.1",
+    dict(
+        str="ID=;Parent=XM_001475631.1",
+        attrs={"ID": [], "Parent": ["XM_001475631.1"]},
+        ok="ID;Parent=XM_001475631.1",
+        dialect_mods={"order": ["ID", "Parent"]},
     ),
     # ncbi_gff3.txt
-    (
-        "ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;"
-        "locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20mat"
-        "ch%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20prote"
-        "in%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;p"
-        "rotein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;"
-        "exon_number=1",
-        {
+    dict(
+        str=(
+            "ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;"
+            "locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20mat"
+            "ch%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20prote"
+            "in%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;p"
+            "rotein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;"
+            "exon_number=1"
+        ),
+        attrs={
             "ID": ["NC_008596.1:speB:unknown_transcript_1"],
             "Parent": ["NC_008596.1:speB"],
             "locus_tag": ["MSMEG_1072"],
@@ -164,18 +280,39 @@
             "db_xref": ["GI:118469242", "GeneID:4535378"],
             "exon_number": ["1"],
         },
-        "ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;"
+        ok="ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;"
         "locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified by mat"
         "ch to protein family HMM PF00491%3B match to prote"
         "in family HMM TIGR01230;transl_table=11;product=agmatinase;p"
         "rotein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;"
         "exon_number=1",
+        dialect_mods={
+            "order": [
+                "ID",
+                "Parent",
+                "locus_tag",
+                "EC_number",
+                "note",
+                "transl_table",
+                "product",
+                "protein_id",
+                "db_xref",
+                "exon_number",
+            ],
+            "repeated keys": True,
+        },
     ),
     # wormbase_gff2_alt.txt
     #
-    (
-        'CDS "cr01.sctg102.wum.2.1"',
-        {"CDS": ["cr01.sctg102.wum.2.1"]},
-        None,
+    dict(
+        str='CDS "cr01.sctg102.wum.2.1"',
+        attrs={"CDS": ["cr01.sctg102.wum.2.1"]},
+        ok=None,
+        dialect_mods={
+            "order": ["CDS"],
+            "quoted GFF2 values": True,
+            "keyval separator": " ",
+            "fmt": "gtf",
+        },
     ),
 ]
diff --git a/gffutils/test/data/ensembl_gtf.txt b/gffutils/test/data/ensembl_gtf.txt
index f54f8fdd..88de6d51 100644
--- a/gffutils/test/data/ensembl_gtf.txt
+++ b/gffutils/test/data/ensembl_gtf.txt
@@ -1,33 +1,33 @@
-I	snoRNA	exon	3747	3909	.	-	.	 gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; gene_name "Y74C9A.6"; transcript_name "NR_001477.2";
-I	protein_coding	exon	12764812	12764949	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12764812	12764937	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	start_codon	12764935	12764937	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	exon	12764291	12764471	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12764291	12764471	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12763979	12764102	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12763979	12764102	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12763729	12763882	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12763729	12763882	.	-	1	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12763448	12763655	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12763448	12763655	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12763112	12763249	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12763112	12763249	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12762648	12762806	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12762648	12762806	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12762127	12762268	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12762127	12762268	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12761799	12761953	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12761799	12761953	.	-	1	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12761172	12761516	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12761172	12761516	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12760834	12760904	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12760834	12760904	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12760365	12760494	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12760365	12760494	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12760227	12760319	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12760227	12760319	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12759949	12760013	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12759949	12760013	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12759579	12759828	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12759748	12759828	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	stop_codon	12759745	12759747	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
+I	snoRNA	exon	3747	3909	.	-	.	gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; gene_name "Y74C9A.6"; transcript_name "NR_001477.2";
+I	protein_coding	exon	12764812	12764949	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12764812	12764937	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	start_codon	12764935	12764937	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	exon	12764291	12764471	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12764291	12764471	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763979	12764102	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763979	12764102	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763729	12763882	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763729	12763882	.	-	1	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763448	12763655	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763448	12763655	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763112	12763249	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763112	12763249	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12762648	12762806	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12762648	12762806	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12762127	12762268	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12762127	12762268	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12761799	12761953	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12761799	12761953	.	-	1	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12761172	12761516	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12761172	12761516	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12760834	12760904	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12760834	12760904	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12760365	12760494	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12760365	12760494	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12760227	12760319	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12760227	12760319	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12759949	12760013	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12759949	12760013	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12759579	12759828	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12759748	12759828	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	stop_codon	12759745	12759747	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
diff --git a/gffutils/test/parser_test.py b/gffutils/test/parser_test.py
index ed578a7e..4f510287 100644
--- a/gffutils/test/parser_test.py
+++ b/gffutils/test/parser_test.py
@@ -53,17 +53,32 @@ def test_attrs_OK(item):
     (see attr_test_cases.py for details); `acceptable_reconstruction` handles
     those.
     """
-    attr_str, attr_dict, acceptable_reconstruction = item
-    result, dialect = parser._split_keyvals(attr_str)
+    attr_str = item["str"]
+    attr_dict = item["attrs"]
+    acceptable_reconstruction = item["ok"]
+    dialect_mods = item["dialect_mods"]
+
+    result, inferred_dialect = parser._split_keyvals(attr_str)
     result = dict(result)
     assert result == attr_dict, result
 
-    reconstructed = parser._reconstruct(result, dialect, keep_order=True)
+    reconstructed = parser._reconstruct(result, inferred_dialect, keep_order=True)
     if acceptable_reconstruction:
         assert reconstructed == acceptable_reconstruction, reconstructed
     else:
         assert reconstructed == attr_str, reconstructed
 
+    # Get the default dialect for comparison, and update it with any diffs
+    # indicated by the test case
+    default_dialect = constants.dialect.copy()
+    default_dialect.update(dialect_mods)
+
+    print(inferred_dialect)
+    print(dialect_mods)
+    print(attr_str)
+    assert default_dialect == inferred_dialect
+
+
 
 def parser_smoke_test():
     """
@@ -75,7 +90,7 @@ def parser_smoke_test():
     parser.logger.setLevel(logging.CRITICAL)
     for filename in TEST_FILENAMES:
         p = iterators._FileIterator(filename)
-        for i in p:
+        for _ in p:
             continue
 
 
@@ -93,7 +108,8 @@ def test_empty_recontruct():
 def test_empty_split_keyvals():
     attrs, dialect = parser._split_keyvals(keyval_str=None)
     assert attrs == feature.dict_class()
-    assert dialect == constants.dialect
+    # assert dialect == constants.dialect
+    assert dialect is None
 
 
 def test_repeated_keys_conflict():
diff --git a/gffutils/test/test_1.py b/gffutils/test/test_1.py
index 2b88cc04..1780d996 100644
--- a/gffutils/test/test_1.py
+++ b/gffutils/test/test_1.py
@@ -14,10 +14,7 @@
 import tempfile
 import http.server as SimpleHTTPServer
 
-if sys.version_info.major == 3:
-    import socketserver as SocketServer
-else:
-    import SocketServer
+import socketserver as SocketServer
 
 import multiprocessing
 import json
@@ -482,58 +479,51 @@ def test_sanitize_gff():
     print("Sanitized GFF successfully.")
 
 
-def test_region():
-
+@pytest.mark.parametrize("kwargs,expected", [
+    # previously failed, see issue #45
+    (dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27),
+    (dict(region="chr2L", start=0), ValueError),
+    (dict(region="chr2L", end=0), ValueError),
+    (dict(region="chr2L", seqid=0), ValueError),
+    # these coords should catch everything
+    (dict(region="chr2L:7529-12500"), 27),
+    # stranded versions:
+    (dict(region="chr2L:7529-12500", strand="."), 0),
+    (dict(region="chr2L:7529-12500", strand="+"), 21),
+    (dict(region="chr2L:7529-12500", strand="-"), 6),
+    # different ways of selecting only that last exon in the last gene:
+    (dict(seqid="chr2L", start=11500, featuretype="exon"), 1),
+    (dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1),
+    # alternative method
+    (dict(seqid="chr2L", start=7529, end=12500), 27),
+    # since default completely_within=False, this catches anything that
+    # falls after 7680.  So it only excludes the 5'UTR, which ends at 7679.
+    (dict(seqid="chr2L", start=7680), 26),
+    # but completely_within=True will exclude the gene and mRNAs, first
+    # exon and the 5'UTR
+    (dict(seqid="chr2L", start=7680, completely_within=True), 22),
+    # similarly, this will *exclude* anything before 7680
+    (dict(seqid="chr2L", end=7680), 5),
+    # and also similarly, this will only get us the 5'UTR which is the only
+    # feature falling completely before 7680
+    (dict(seqid="chr2L", end=7680, completely_within=True), 1),
+    # and there's only features from chr2L in this file, so this catches
+    # everything too
+    (dict(region="chr2L"), 27),
+    # using seqid should work similarly to `region` with only chromosome
+    (dict(seqid="chr2L"), 27),
+    # nonexistent
+    (dict(region="nowhere"), 0),
+])
+def test_region(kwargs, expected):
     db_fname = gffutils.example_filename("FBgn0031208.gff")
     db = gffutils.create_db(db_fname, ":memory:", keep_order=True)
 
-    def _check(item):
-        kwargs, expected = item
-        try:
-            obs = list(db.region(**kwargs))
-            assert len(obs) == expected, "expected %s got %s" % (expected, len(obs))
-        except expected:
-            pass
-
-    regions = [
-        # previously failed, see issue #45
-        (dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27),
-        (dict(region="chr2L", start=0), ValueError),
-        (dict(region="chr2L", end=0), ValueError),
-        (dict(region="chr2L", seqid=0), ValueError),
-        # these coords should catch everything
-        (dict(region="chr2L:7529-12500"), 27),
-        # stranded versions:
-        (dict(region="chr2L:7529-12500", strand="."), 0),
-        (dict(region="chr2L:7529-12500", strand="+"), 21),
-        (dict(region="chr2L:7529-12500", strand="-"), 6),
-        # different ways of selecting only that last exon in the last gene:
-        (dict(seqid="chr2L", start=11500, featuretype="exon"), 1),
-        (dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1),
-        # alternative method
-        (dict(seqid="chr2L", start=7529, end=12500), 27),
-        # since default completely_within=False, this catches anything that
-        # falls after 7680.  So it only excludes the 5'UTR, which ends at 7679.
-        (dict(seqid="chr2L", start=7680), 26),
-        # but completely_within=True will exclude the gene and mRNAs, first
-        # exon and the 5'UTR
-        (dict(seqid="chr2L", start=7680, completely_within=True), 22),
-        # similarly, this will *exclude* anything before 7680
-        (dict(seqid="chr2L", end=7680), 5),
-        # and also similarly, this will only get us the 5'UTR which is the only
-        # feature falling completely before 7680
-        (dict(seqid="chr2L", end=7680, completely_within=True), 1),
-        # and there's only features from chr2L in this file, so this catches
-        # everything too
-        (dict(region="chr2L"), 27),
-        # using seqid should work similarly to `region` with only chromosome
-        (dict(seqid="chr2L"), 27),
-        # nonexistent
-        (dict(region="nowhere"), 0),
-    ]
-
-    for item in regions:
-        yield _check, item
+    try:
+        obs = list(db.region(**kwargs))
+        assert len(obs) == expected, "expected %s got %s" % (expected, len(obs))
+    except expected:
+        pass
 
 
 def test_nonascii():
diff --git a/gffutils/test/test_biopython_integration.py b/gffutils/test/test_biopython_integration.py
index 58c5866a..e9f8e81d 100644
--- a/gffutils/test/test_biopython_integration.py
+++ b/gffutils/test/test_biopython_integration.py
@@ -1,6 +1,10 @@
 from gffutils import example_filename
 import gffutils
 import gffutils.biopython_integration as bp
+import pytest
+
+# Skip tests entirely if BioPython not available
+pytest.importorskip('Bio')
 
 
 def test_roundtrip():
diff --git a/gffutils/test/test_issues.py b/gffutils/test/test_issues.py
index 79996ba5..64deda16 100644
--- a/gffutils/test/test_issues.py
+++ b/gffutils/test/test_issues.py
@@ -200,7 +200,10 @@ def test_pr_144():
     assert f.attributes["a"] == [""]
     assert str(f) == ".	.	.	.	.	.	.	.	a"
     g = gffutils.feature.feature_from_line(str(f))
-    assert g == f
+    g.dialect["fmt"] = "gff3"
+    print(g.attributes)
+    print(g.dialect)
+    assert str(g) == str(f)
 
 
 def test_pr_172():
@@ -452,21 +455,43 @@ def test_issue_198():
 
     assert f.attributes["description"] == ["WASP family homolog 7, pseudogene"]
 
-    # If we remove one of the db_xref keys, then the parser sees the comma and
-    # figures it's a multivalue key.
+    # If we remove one of the db_xref keys, then previously the parser saw the
+    # comma and figured it was a multivalue key, and split it. Now, it's
+    # correctly identified as a single-value key.
+    #
+    # Note that we still have gene_synonym as a repeated key.
     line = 'NC_000001.11	BestRefSeq	gene	14362	29370	.	-	.	gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; description "WASP family homolog 7, pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; gene_synonym "WASH5P"; pseudo "true";'
     f = feature.feature_from_line(line)
+    assert f.dialect["repeated keys"]
 
     # Previous result, note leading space --------------------------->| |
     # assert f.attributes['description'] == ['WASP family homolog 7', ' pseudogene']
+
+    # Current result: not split.
     assert f.attributes["description"] == ["WASP family homolog 7, pseudogene"]
 
-    # But removing that space before "pseudogene" means it's interpreted as
-    # a multivalue attribute
+    # Removing that space before "pseudogene" might mean it's a multivalue, but
+    # we decide on the convention that if keys are repeated at all, that wins.
+    # So we still don't split
     line = 'NC_000001.11	BestRefSeq	gene	14362	29370	.	-	.	gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; description "WASP family homolog 7,pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; gene_synonym "WASH5P"; pseudo "true";'
     f = feature.feature_from_line(line)
+    assert f.dialect["repeated keys"]
+    assert f.attributes["description"] == ["WASP family homolog 7,pseudogene"]
+
+    # But if we get rid of all repeated keys, it's interpreted as multiple values
+    line = 'NC_000001.11	BestRefSeq	gene	14362	29370	.	-	.	gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; description "WASP family homolog 7,pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; pseudo "true";'
+    f = feature.feature_from_line(line)
+    assert not f.dialect["repeated keys"]
     assert f.attributes["description"] == ["WASP family homolog 7", "pseudogene"]
 
+    # ....but if there's a ", " (comma followed by space) instead of just
+    # comma, then it's not split.
+    line = 'NC_000001.11	BestRefSeq	gene	14362	29370	.	-	.	gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; description "WASP family homolog 7, pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; pseudo "true";'
+    f = feature.feature_from_line(line)
+    assert not f.dialect["repeated keys"]
+    assert f.attributes["description"] == ["WASP family homolog 7, pseudogene"]
+
+
     # Confirm behavior of corner cases like a trailing comma
     line = "chr17	RefSeq	CDS	6806527	6806553	.	+	0	Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,"
     f = feature.feature_from_line(line)
@@ -612,3 +637,18 @@ def test_issue_213():
     db = gffutils.create_db(tmp, dbfn="issue_213.db", force=True)
     assert db.directives == ["gff-version 3"], db.directives
     assert len(db.directives) == 1
+
+def test_issue_212():
+
+
+    data = dedent(
+        """
+    NC_000964.3	RefSeq	CDS	410	1747	.	+	0	gene_id "BSU_00010"; transcript_id "unassigned_transcript_1"; db_xref "EnsemblGenomes-Gn:BSU00010"; db_xref "EnsemblGenomes-Tr:CAB11777"; db_xref "GOA:P05648"; db_xref "InterPro:IPR001957"; db_xref "InterPro:IPR003593"; db_xref "InterPro:IPR010921"; db_xref "InterPro:IPR013159"; db_xref "InterPro:IPR013317"; db_xref "InterPro:IPR018312"; db_xref "InterPro:IPR020591"; db_xref "InterPro:IPR024633"; db_xref "InterPro:IPR027417"; db_xref "PDB:4TPS"; db_xref "SubtiList:BG10065"; db_xref "UniProtKB/Swiss-Prot:P05648"; db_xref "GenBank:NP_387882.1"; db_xref "GeneID:939978"; experiment "publication(s) with functional evidences, PMID:2167836, 2846289, 12682299, 16120674, 1779750, 28166228"; gbkey "CDS"; gene "dnaA"; locus_tag "BSU_00010"; note "Evidence 1a: Function from experimental evidences in the studied strain; PubMedId: 2167836, 2846289, 12682299, 16120674, 1779750, 28166228; Product type f : factor"; product "chromosomal replication initiator informational ATPase"; protein_id "NP_387882.1"; transl_table "11"; exon_number "1";
+        """
+    )
+    inferred_dialect = gffutils.helpers.infer_dialect(data.split('\t')[-1])
+    assert inferred_dialect["semicolon in quotes"]
+
+    f = next(iter(gffutils.DataIterator(data, from_string=True, dialect=inferred_dialect)))
+    assert f.dialect["semicolon in quotes"]
+    assert f.attributes["note"] == ["Evidence 1a: Function from experimental evidences in the studied strain; PubMedId: 2167836, 2846289, 12682299, 16120674, 1779750, 28166228; Product type f : factor"]