From 4acff2d749b6957fd183ab1ca7a7798fc18f59a0 Mon Sep 17 00:00:00 2001
From: Rafael Zubairov <zubrabubra@gmail.com>
Date: Tue, 20 Oct 2015 00:35:27 +0300
Subject: [PATCH] Extra parser class to allow instance reuse, memory buffers
 reuse.

---
 src/main/java/org/noggit/JSONParserExt.java | 1228 +++++++++++++++++++
 1 file changed, 1228 insertions(+)
 create mode 100644 src/main/java/org/noggit/JSONParserExt.java

diff --git a/src/main/java/org/noggit/JSONParserExt.java b/src/main/java/org/noggit/JSONParserExt.java
new file mode 100644
index 0000000..e0925b1
--- /dev/null
+++ b/src/main/java/org/noggit/JSONParserExt.java
@@ -0,0 +1,1228 @@
+package org.noggit;
+
+
+import java.io.IOException;
+
+/**
+ * Class complimentary to the JSONParser, but allowing one to reuse memory buffers, reinit paser, etc.
+ */
+public class JSONParserExt {
+
+    /**
+     * Event indicating a JSON string value, including member names of objects
+     */
+    public static final int STRING = 1;
+    /**
+     * Event indicating a JSON number value which fits into a signed 64 bit integer
+     */
+    public static final int LONG = 2;
+    /**
+     * Event indicating a JSON number value which has a fractional part or an exponent
+     * and with string length <= 23 chars not including sign.  This covers
+     * all representations of normal values for Double.toString().
+     */
+    public static final int NUMBER = 3;
+    /**
+     * Event indicating a JSON number value that was not produced by toString of any
+     * Java primitive numerics such as Double or Long.  It is either
+     * an integer outside the range of a 64 bit signed integer, or a floating
+     * point value with a string representation of more than 23 chars.
+     */
+    public static final int BIGNUMBER = 4;
+    /**
+     * Event indicating a JSON boolean
+     */
+    public static final int BOOLEAN = 5;
+    /**
+     * Event indicating a JSON null
+     */
+    public static final int NULL = 6;
+    /**
+     * Event indicating the start of a JSON object
+     */
+    public static final int OBJECT_START = 7;
+    /**
+     * Event indicating the end of a JSON object
+     */
+    public static final int OBJECT_END = 8;
+    /**
+     * Event indicating the start of a JSON array
+     */
+    public static final int ARRAY_START = 9;
+    /**
+     * Event indicating the end of a JSON array
+     */
+    public static final int ARRAY_END = 10;
+    /**
+     * Event indicating the end of input has been reached
+     */
+    public static final int EOF = 11;
+    /**
+     * Flags to control parsing behavior
+     */
+    public static final int ALLOW_COMMENTS = 1 << 0;
+    public static final int ALLOW_SINGLE_QUOTES = 1 << 1;
+    public static final int ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER = 1 << 2;
+    public static final int ALLOW_UNQUOTED_KEYS = 1 << 3;
+    public static final int ALLOW_UNQUOTED_STRING_VALUES = 1 << 4;
+    /**
+     * ALLOW_EXTRA_COMMAS causes any nunber of extra commas in arrays and objects to be ignored
+     * Note that a trailing comma in [] would be [,] (hence calling the feature "trailing" commas
+     * isn't really correct.  Since trailing commas is fundamentally incompatible with any future
+     * "fill-in-missing-values-with-null", it was decided to extend this feature to handle any
+     * number of extra commas.
+     */
+    public static final int ALLOW_EXTRA_COMMAS = 1 << 5;
+    public static final int FLAGS_STRICT = 0;
+    public static final int FLAGS_DEFAULT = ALLOW_COMMENTS | ALLOW_SINGLE_QUOTES | ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER | ALLOW_UNQUOTED_KEYS | ALLOW_UNQUOTED_STRING_VALUES | ALLOW_EXTRA_COMMAS;
+    // parser states stored in the stack
+    private static final byte DID_OBJSTART = 1;  // '{' just read
+    private static final byte DID_ARRSTART = 2;  // '[' just read
+    private static final byte DID_ARRELEM = 3;   // array element just read
+    private static final byte DID_MEMNAME = 4;   // object member name (map key) just read
+    private static final byte DID_MEMVAL = 5;    // object member value (map val) just read
+    private static final CharArr devNull = new NullCharArr();
+    private static final long WS_MASK = (1L << ' ') | (1L << '\t') | (1L << '\r') | (1L << '\n') | (1L << '#') | (1L << '/') | (0x01); // set 1 bit so 0xA0 will be flagged as whitespace
+    private static final int HAS_FRACTION = 0x01;  // nstate flag, '.' already read
+    private static final int HAS_EXPONENT = 0x02;  // nstate flag, '[eE][+-]?[0-9]' already read
+
+    char[] buf;  // input buffer with JSON text in it
+    // temporary output buffer
+    private final CharArr out;
+    // a dummy buffer we can use to point at other buffers
+    private final CharArr tmp;
+    int flags = FLAGS_DEFAULT;
+    int start;         // current position in the buffer
+    int end;           // end position in the buffer (one past last valid index)
+    boolean eof = false; // true if the end of the stream was reached.
+    long gpos;          // global position = gpos + start
+    int event;         // last event read
+    int stringTerm;    // The terminator for the last string we read: single quote, double quote, or 0 for unterminated.
+    // We need to keep some state in order to (at a minimum) know if
+    // we should skip ',' or ':'.
+    private byte[] stack;
+    private int ptr = 0;     // pointer into the stack of parser states
+
+    // idea - if someone passes us a CharArrayReader, we could
+    // directly use that buffer as it's protected.
+    private byte state = 0;  // current parser state
+    // info about value that was just read (or is in the middle of being read)
+    private int valstate;
+    private boolean bool; // bo-olean value read
+    private long lval;    // long value read
+    private int nstate;   // current state while reading a number
+
+    public JSONParserExt(char[] data, int start, int end, byte[] stack, CharArr localBuffer, CharArr tmp) {
+        this.stack = stack;
+        this.out = localBuffer;
+        this.tmp = tmp;
+
+        initBase(data, start, end);
+    }
+
+    public void init(char[] data, int start, int end) {
+        initBase(data, start, end);
+    }
+
+    private void initBase(char[] data, int start, int end) {
+        this.buf = data;
+        this.flags = FLAGS_DEFAULT;
+        this.start = start;
+        this.end = end;
+        this.eof = false;
+        this.gpos = 0;
+        this.event = 0;
+        this.stringTerm = 0;
+        this.ptr = 0;     // pointer into the stack of parser states
+
+        // idea - if someone passes us a CharArrayReader, we could
+        // directly use that buffer as it's protected.
+        this.state = 0;  // current parser state
+        // info about value that was just read (or is in the middle of being read)
+        this.valstate = 0;
+        this.bool = false;
+        this.lval = 0;
+        this.nstate = 0;
+    }
+
+    public static String getEventString(int e) {
+        switch (e) {
+            case STRING:
+                return "STRING";
+            case LONG:
+                return "LONG";
+            case NUMBER:
+                return "NUMBER";
+            case BIGNUMBER:
+                return "BIGNUMBER";
+            case BOOLEAN:
+                return "BOOLEAN";
+            case NULL:
+                return "NULL";
+            case OBJECT_START:
+                return "OBJECT_START";
+            case OBJECT_END:
+                return "OBJECT_END";
+            case ARRAY_START:
+                return "ARRAY_START";
+            case ARRAY_END:
+                return "ARRAY_END";
+            case EOF:
+                return "EOF";
+        }
+        return "Unknown: " + e;
+    }
+
+    /**
+     * Returns true if the given character is considered to be whitespace.
+     * One difference between Java's Character.isWhitespace() is that this method
+     * considers a hard space (non-breaking space, or nbsp) to be whitespace.
+     */
+    protected static final boolean isWhitespace(int ch) {
+        return (Character.isWhitespace(ch) || ch == 0x00a0);
+    }
+
+    private static boolean isUnquotedStringStart(int ch) {
+        return Character.isJavaIdentifierStart(ch);
+    }
+
+    // What characters are allowed to continue an unquoted string
+    // once we know we are in one.
+    private static boolean isUnquotedStringChar(int ch) {
+        return Character.isJavaIdentifierPart(ch)
+                || ch == '.'
+                || ch == '-'
+                || ch == '/';
+
+        // would checking for a-z first speed up the common case?
+
+        // possibly much more liberal unquoted string handling...
+        /***
+         switch (ch) {
+         case -1:
+         case ' ':
+         case '\t':
+         case '\r':
+         case '\n':
+         case '}':
+         case ']':
+         case ',':
+         case ':':
+         case '=':   // reserved for future use
+         case '\\':  // check for backslash should come after this function call
+         return false;
+         }
+         return true;
+         ***/
+    }
+
+    public int getFlags() {
+        return flags;
+    }
+
+    public int setFlags(int flags) {
+        int oldFlags = flags;
+        this.flags = flags;
+        return oldFlags;
+    }
+
+    // push current parser state (use at start of new container)
+    private final void push() {
+        if (ptr >= stack.length) {
+            // doubling here is probably overkill, but anything that needs to double more than
+            // once (32 levels deep) is very atypical anyway.
+            byte[] newstack = new byte[stack.length << 1];
+            System.arraycopy(stack, 0, newstack, 0, stack.length);
+            stack = newstack;
+        }
+        stack[ptr++] = state;
+    }
+
+    // pop  parser state (use at end of container)
+    private final void pop() {
+        if (--ptr < 0) {
+            throw err("Unbalanced container");
+        } else {
+            state = stack[ptr];
+        }
+    }
+
+    protected void fill() throws IOException {
+        if (start >= end) eof = true;
+    }
+
+    private void getMore() throws IOException {
+        fill();
+        if (start >= end) {
+            throw err(null);
+        }
+    }
+
+    protected int getChar() throws IOException {
+        if (start >= end) {
+            fill();
+            if (start >= end) return -1;
+        }
+        return buf[start++];
+    }
+
+    protected int getCharNWS() throws IOException {
+        for (; ; ) {
+            int ch = getChar();
+            // getCharNWS is normally called in the context of expecting certain JSON special characters
+            // such as ":}"],"
+            // all of these characters are below 64 (including comment chars '/' and '#', so we can make this the fast path
+            // even w/o checking the range first.  We'll only get some false-positives while using bare strings (chars "IJMc")
+            if (((WS_MASK >> ch) & 0x01) == 0) {
+                return ch;
+            } else if (ch <= ' ') {   // this will only be true if one of the whitespace bits was set
+                continue;
+            } else if (ch == '/') {
+                getSlashComment();
+            } else if (ch == '#') {
+                getNewlineComment();
+            } else if (!isWhitespace(ch)) { // we'll only reach here with certain bare strings, errors, or strange whitespace like 0xa0
+                return ch;
+            }
+
+            /***
+             // getCharNWS is normally called in the context of expecting certain JSON special characters
+             // such as ":}"],"
+             // all of these characters are below 64 (including comment chars '/' and '#', so we can make this the fast path
+             if (ch < 64) {
+             if (((WS_MASK >> ch) & 0x01) == 0) return ch;
+             if (ch <= ' ') continue;  // whitespace below a normal space
+             if (ch=='/') {
+             getSlashComment();
+             } else if (ch=='#') {
+             getNewlineComment();
+             }
+             } else if (!isWhitespace(ch)) {  // check for higher whitespace like 0xA0
+             return ch;
+             }
+             ***/
+
+            /** older code
+             switch (ch) {
+             case ' ' :
+             case '\t' :
+             case '\r' :
+             case '\n' :
+             continue outer;
+             case '#' :
+             getNewlineComment();
+             continue outer;
+             case '/' :
+             getSlashComment();
+             continue outer;
+             default:
+             return ch;
+             }
+             **/
+        }
+    }
+
+    protected int getCharNWS(int ch) throws IOException {
+        for (; ; ) {
+            // getCharNWS is normally called in the context of expecting certain JSON special characters
+            // such as ":}"],"
+            // all of these characters are below 64 (including comment chars '/' and '#', so we can make this the fast path
+            // even w/o checking the range first.  We'll only get some false-positives while using bare strings (chars "IJMc")
+            if (((WS_MASK >> ch) & 0x01) == 0) {
+                return ch;
+            } else if (ch <= ' ') {   // this will only be true if one of the whitespace bits was set
+                // whitespace... get new char at bottom of loop
+            } else if (ch == '/') {
+                getSlashComment();
+            } else if (ch == '#') {
+                getNewlineComment();
+            } else if (!isWhitespace(ch)) { // we'll only reach here with certain bare strings, errors, or strange whitespace like 0xa0
+                return ch;
+            }
+            ch = getChar();
+        }
+    }
+
+    private int getCharExpected(int expected) throws IOException {
+        for (; ; ) {
+            int ch = getChar();
+            if (ch == expected) return expected;
+            if (ch == ' ') continue;
+            return getCharNWS(ch);
+        }
+    }
+
+    protected void getNewlineComment() throws IOException {
+        // read a # or a //, so go until newline
+        for (; ; ) {
+            int ch = getChar();
+            // don't worry about DOS /r/n... we'll stop on the \r and let the rest of the whitespace
+            // eater consume the \n
+            if (ch == '\n' || ch == '\r' || ch == -1) {
+                return;
+            }
+        }
+    }
+
+    protected void getSlashComment() throws IOException {
+        int ch = getChar();
+        if (ch == '/') {
+            getNewlineComment();
+            return;
+        }
+
+        if (ch != '*') {
+            throw err("Invalid comment: expected //, /*, or #");
+        }
+
+        ch = getChar();
+        for (; ; ) {
+            if (ch == '*') {
+                ch = getChar();
+                if (ch == '/') {
+                    return;
+                } else if (ch == '*') {
+                    // handle cases of *******/
+                    continue;
+                }
+            }
+            if (ch == -1) {
+                return;
+            }
+            ch = getChar();
+        }
+    }
+
+    private boolean matchBareWord(char[] arr) throws IOException {
+        for (int i = 1; i < arr.length; i++) {
+            int ch = getChar();
+            if (ch != arr[i]) {
+                if ((flags & ALLOW_UNQUOTED_STRING_VALUES) == 0) {
+                    throw err("Expected " + new String(arr));
+                } else {
+                    stringTerm = 0;
+                    out.reset();
+                    out.write(arr, 0, i);
+                    if (!eof) {
+                        start--;
+                    }
+                    return false;
+                }
+            }
+        }
+
+        // if we don't allow bare strings, we don't need to check that the string actually terminates... just
+        // let things fail as the parser tries to continue
+        if ((flags & ALLOW_UNQUOTED_STRING_VALUES) == 0) {
+            return true;
+        }
+
+        // check that the string actually terminates... for example trueX should return false
+        int ch = getChar();
+        if (eof) {
+            return true;
+        } else if (!isUnquotedStringChar(ch)) {
+            start--;
+            return true;
+        }
+
+        // we encountered something like "trueX" when matching "true"
+        stringTerm = 0;
+        out.reset();
+        out.unsafeWrite(arr, 0, arr.length);
+        out.unsafeWrite(ch);
+        return false;
+    }
+
+    protected ParseException err(String msg) {
+        // We can't tell if EOF was hit by comparing start<=end
+        // because the illegal char could have been the last in the buffer
+        // or in the stream.  To deal with this, the "eof" var was introduced
+        if (!eof && start > 0) start--;  // backup one char
+        String chs = "char=" + ((start >= end) ? "(EOF)" : "" + buf[start]);
+        String pos = "position=" + (gpos + start);
+        String tot = chs + ',' + pos + getContext();
+        if (msg == null) {
+            if (start >= end) msg = "Unexpected EOF";
+            else msg = "JSON Parse Error";
+        }
+        return new ParseException(msg + ": " + tot);
+    }
+
+    private String getContext() {
+        String context = "";
+        if (start >= 0) {
+            context += " BEFORE='" + errEscape(Math.max(start - 60, 0), start + 1) + "'";
+        }
+        if (start < end) {
+            context += " AFTER='" + errEscape(start + 1, start + 40) + "'";
+        }
+        return context;
+    }
+
+    private String errEscape(int a, int b) {
+        b = Math.min(b, end);
+        if (a >= b) return "";
+        return new String(buf, a, b - a).replaceAll("\\s+", " ");
+    }
+
+    /**
+     * Returns the long read... only significant if valstate==LONG after
+     * this call.  firstChar should be the first numeric digit read.
+     */
+    private long readNumber(int firstChar, boolean isNeg) throws IOException {
+        out.unsafeWrite(firstChar);   // unsafe OK since we know output is big enough
+        // We build up the number in the negative plane since it's larger (by one) than
+        // the positive plane.
+        long v = '0' - firstChar;
+        // can't overflow a long in 18 decimal digits (i.e. 17 additional after the first).
+        // we also need 22 additional to handle double so we'll handle in 2 separate loops.
+        int i;
+        for (i = 0; i < 17; i++) {
+            int ch = getChar();
+            // TODO: is this switch faster as an if-then-else?
+            switch (ch) {
+                case '0':
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '6':
+                case '7':
+                case '8':
+                case '9':
+                    v = v * 10 - (ch - '0');
+                    out.unsafeWrite(ch);
+                    continue;
+                case '.':
+                    out.unsafeWrite('.');
+                    valstate = readFrac(out, 22 - i);
+                    return 0;
+                case 'e':
+                case 'E':
+                    out.unsafeWrite(ch);
+                    nstate = 0;
+                    valstate = readExp(out, 22 - i);
+                    return 0;
+                default:
+                    // return the number, relying on nextEvent() to return an error
+                    // for invalid chars following the number.
+                    if (ch != -1) --start;   // push back last char if not EOF
+
+                    valstate = LONG;
+                    return isNeg ? v : -v;
+            }
+        }
+
+        // after this, we could overflow a long and need to do extra checking
+        boolean overflow = false;
+        long maxval = isNeg ? Long.MIN_VALUE : -Long.MAX_VALUE;
+
+        for (; i < 22; i++) {
+            int ch = getChar();
+            switch (ch) {
+                case '0':
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '6':
+                case '7':
+                case '8':
+                case '9':
+                    if (v < (0x8000000000000000L / 10)) overflow = true;  // can't multiply by 10 w/o overflowing
+                    v *= 10;
+                    int digit = ch - '0';
+                    if (v < maxval + digit) overflow = true; // can't add digit w/o overflowing
+                    v -= digit;
+                    out.unsafeWrite(ch);
+                    continue;
+                case '.':
+                    out.unsafeWrite('.');
+                    valstate = readFrac(out, 22 - i);
+                    return 0;
+                case 'e':
+                case 'E':
+                    out.unsafeWrite(ch);
+                    nstate = 0;
+                    valstate = readExp(out, 22 - i);
+                    return 0;
+                default:
+                    // return the number, relying on nextEvent() to return an error
+                    // for invalid chars following the number.
+                    if (ch != -1) --start;   // push back last char if not EOF
+
+                    valstate = overflow ? BIGNUMBER : LONG;
+                    return isNeg ? v : -v;
+            }
+        }
+
+
+        nstate = 0;
+        valstate = BIGNUMBER;
+        return 0;
+    }
+
+    // read digits right of decimal point
+    private int readFrac(CharArr arr, int lim) throws IOException {
+        nstate = HAS_FRACTION;  // deliberate set instead of '|'
+        while (--lim >= 0) {
+            int ch = getChar();
+            if (ch >= '0' && ch <= '9') {
+                arr.write(ch);
+            } else if (ch == 'e' || ch == 'E') {
+                arr.write(ch);
+                return readExp(arr, lim);
+            } else {
+                if (ch != -1) start--; // back up
+                return NUMBER;
+            }
+        }
+        return BIGNUMBER;
+    }
+
+    // call after 'e' or 'E' has been seen to read the rest of the exponent
+    private int readExp(CharArr arr, int lim) throws IOException {
+        nstate |= HAS_EXPONENT;
+        int ch = getChar();
+        lim--;
+
+        if (ch == '+' || ch == '-') {
+            arr.write(ch);
+            ch = getChar();
+            lim--;
+        }
+
+        // make sure at least one digit is read.
+        if (ch < '0' || ch > '9') {
+            throw err("missing exponent number");
+        }
+        arr.write(ch);
+
+        return readExpDigits(arr, lim);
+    }
+
+    // continuation of readExpStart
+    private int readExpDigits(CharArr arr, int lim) throws IOException {
+        while (--lim >= 0) {
+            int ch = getChar();
+            if (ch >= '0' && ch <= '9') {
+                arr.write(ch);
+            } else {
+                if (ch != -1) start--; // back up
+                return NUMBER;
+            }
+        }
+        return BIGNUMBER;
+    }
+
+    private void continueNumber(CharArr arr) throws IOException {
+        if (arr != out) arr.write(out);
+
+        if ((nstate & HAS_EXPONENT) != 0) {
+            readExpDigits(arr, Integer.MAX_VALUE);
+            return;
+        }
+        if (nstate != 0) {
+            readFrac(arr, Integer.MAX_VALUE);
+            return;
+        }
+
+        for (; ; ) {
+            int ch = getChar();
+            if (ch >= '0' && ch <= '9') {
+                arr.write(ch);
+            } else if (ch == '.') {
+                arr.write(ch);
+                readFrac(arr, Integer.MAX_VALUE);
+                return;
+            } else if (ch == 'e' || ch == 'E') {
+                arr.write(ch);
+                readExp(arr, Integer.MAX_VALUE);
+                return;
+            } else {
+                if (ch != -1) start--;
+                return;
+            }
+        }
+    }
+
+    private int hexval(int hexdig) {
+        if (hexdig >= '0' && hexdig <= '9') {
+            return hexdig - '0';
+        } else if (hexdig >= 'A' && hexdig <= 'F') {
+            return hexdig + (10 - 'A');
+        } else if (hexdig >= 'a' && hexdig <= 'f') {
+            return hexdig + (10 - 'a');
+        }
+        throw err("invalid hex digit");
+    }
+
+    // backslash has already been read when this is called
+    private char readEscapedChar() throws IOException {
+        int ch = getChar();
+        switch (ch) {
+            case '"':
+                return '"';
+            case '\'':
+                return '\'';
+            case '\\':
+                return '\\';
+            case '/':
+                return '/';
+            case 'n':
+                return '\n';
+            case 'r':
+                return '\r';
+            case 't':
+                return '\t';
+            case 'f':
+                return '\f';
+            case 'b':
+                return '\b';
+            case 'u':
+                return (char) (
+                        (hexval(getChar()) << 12)
+                                | (hexval(getChar()) << 8)
+                                | (hexval(getChar()) << 4)
+                                | (hexval(getChar())));
+        }
+        if ((flags & ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER) != 0 && ch != EOF) {
+            return (char) ch;
+        }
+        throw err("Invalid character escape");
+    }
+
+    private CharArr readStringChars() throws IOException {
+        if (stringTerm == 0) {
+            // "out" will already contain the first part of the bare string, so don't reset it
+            readStringBare(out);
+            return out;
+        }
+
+        char terminator = (char) stringTerm;
+        int i;
+        for (i = start; i < end; i++) {
+            char c = buf[i];
+            if (c == terminator) {
+                tmp.set(buf, start, i);  // directly use input buffer
+                start = i + 1; // advance past last '"'
+                return tmp;
+            } else if (c == '\\') {
+                break;
+            }
+        }
+        out.reset();
+        readStringChars2(out, i);
+        return out;
+    }
+
+    // middle is the pointer to the middle of a buffer to start scanning for a non-string
+    // character ('"' or "/").  start<=middle<end
+    // this should be faster for strings with fewer escapes, but probably slower for many escapes.
+    private void readStringChars2(CharArr arr, int middle) throws IOException {
+        if (stringTerm == 0) {
+            readStringBare(arr);
+            return;
+        }
+
+        char terminator = (char) stringTerm;
+
+        for (; ; ) {
+            if (middle >= end) {
+                arr.write(buf, start, middle - start);
+                start = middle;
+                getMore();
+                middle = start;
+            }
+            int ch = buf[middle++];
+            if (ch == terminator) {
+                int len = middle - start - 1;
+                if (len > 0) arr.write(buf, start, len);
+                start = middle;
+                return;
+            } else if (ch == '\\') {
+                int len = middle - start - 1;
+                if (len > 0) arr.write(buf, start, len);
+                start = middle;
+                arr.write(readEscapedChar());
+                middle = start;
+            }
+        }
+    }
+
+    private void readStringBare(CharArr arr) throws IOException {
+        if (arr != out) {
+            arr.append(out);
+        }
+
+        for (; ; ) {
+            int ch = getChar();
+            if (!isUnquotedStringChar(ch)) {
+                if (ch == -1) break;
+                if (ch == '\\') {
+                    arr.write(readEscapedChar());
+                    continue;
+                }
+                start--;
+                break;
+            }
+
+            if (ch == '\\') {
+                arr.write(readEscapedChar());
+                continue;
+            }
+
+            arr.write(ch);
+        }
+    }
+
+    // isName==true if this is a field name (as opposed to a value)
+    private void handleNonDoubleQuoteString(int ch, boolean isName) throws IOException {
+        if (ch == '\'') {
+            stringTerm = ch;
+            if ((flags & ALLOW_SINGLE_QUOTES) == 0) {
+                throw err("Single quoted strings not allowed");
+            }
+        } else {
+            if (isName && (flags & ALLOW_UNQUOTED_KEYS) == 0
+                    || !isName && (flags & ALLOW_UNQUOTED_STRING_VALUES) == 0
+                    || eof) {
+                if (isName) {
+                    throw err("Expected quoted string");
+                } else {
+                    throw err(null);
+                }
+            }
+
+            if (!isUnquotedStringStart(ch)) {
+                throw err(null);
+            }
+
+            stringTerm = 0;  // signal for unquoted string
+            out.reset();
+            out.unsafeWrite(ch);
+        }
+    }
+
+    /**
+     * alternate implementation
+     * // middle is the pointer to the middle of a buffer to start scanning for a non-string
+     * // character ('"' or "/").  start<=middle<end
+     * private void readStringChars2a(CharArr arr, int middle) throws IOException {
+     * int ch=0;
+     * for(;;) {
+     * // find the next non-string char
+     * for (; middle<end; middle++) {
+     * ch = buf[middle];
+     * if (ch=='"' || ch=='\\') break;
+     * }
+     * <p>
+     * arr.write(buf,start,middle-start);
+     * if (middle>=end) {
+     * getMore();
+     * middle=start;
+     * } else {
+     * start = middle+1;   // set buffer pointer to correct spot
+     * if (ch=='"') {
+     * valstate=0;
+     * return;
+     * } else if (ch=='\\') {
+     * arr.write(readEscapedChar());
+     * if (start>=end) getMore();
+     * middle=start;
+     * }
+     * }
+     * }
+     * }
+     * *
+     */
+
+
+    // return the next event when parser is in a neutral state (no
+    // map separators or array element separators to read
+    private int next(int ch) throws IOException {
+        // TODO: try my own form of indirect jump... look up char class and index directly into handling implementation?
+        for (; ; ) {
+            switch (ch) {
+                case ' ': // this is not the exclusive list of whitespace chars... the rest are handled in default:
+                case '\t':
+                case '\r':
+                case '\n':
+                    ch = getCharNWS(); // calling getCharNWS here seems faster than letting the switch handle it
+                    break;
+                case '"':
+                    stringTerm = '"';
+                    valstate = STRING;
+                    return STRING;
+                case '\'':
+                    if ((flags & ALLOW_SINGLE_QUOTES) == 0) {
+                        throw err("Single quoted strings not allowed");
+                    }
+                    stringTerm = '\'';
+                    valstate = STRING;
+                    return STRING;
+                case '{':
+                    push();
+                    state = DID_OBJSTART;
+                    return OBJECT_START;
+                case '[':
+                    push();
+                    state = DID_ARRSTART;
+                    return ARRAY_START;
+                case '0':
+                    out.reset();
+                    //special case '0'?  If next char isn't '.' val=0
+                    ch = getChar();
+                    if (ch == '.') {
+                        start--;
+                        ch = '0';
+                        readNumber('0', false);
+                        return valstate;
+                    } else if (ch > '9' || ch < '0') {
+                        out.unsafeWrite('0');
+                        if (ch != -1) start--;
+                        lval = 0;
+                        valstate = LONG;
+                        return LONG;
+                    } else {
+                        throw err("Leading zeros not allowed");
+                    }
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '6':
+                case '7':
+                case '8':
+                case '9':
+                    out.reset();
+                    lval = readNumber(ch, false);
+                    return valstate;
+                case '-':
+                    out.reset();
+                    out.unsafeWrite('-');
+                    ch = getChar();
+                    if (ch < '0' || ch > '9') throw err("expected digit after '-'");
+                    lval = readNumber(ch, true);
+                    return valstate;
+                case 't':
+                    // TODO: test performance of this non-branching inline version.
+                    // if ((('r'-getChar())|('u'-getChar())|('e'-getChar())) != 0) throw err("");
+                    if (matchBareWord(JSONUtil.TRUE_CHARS)) {
+                        bool = true;
+                        valstate = BOOLEAN;
+                        return valstate;
+                    } else {
+                        valstate = STRING;
+                        return STRING;
+                    }
+                case 'f':
+                    if (matchBareWord(JSONUtil.FALSE_CHARS)) {
+                        bool = false;
+                        valstate = BOOLEAN;
+                        return valstate;
+                    } else {
+                        valstate = STRING;
+                        return STRING;
+                    }
+                case 'n':
+                    if (matchBareWord(JSONUtil.NULL_CHARS)) {
+                        valstate = NULL;
+                        return valstate;
+                    } else {
+                        valstate = STRING;
+                        return STRING;
+                    }
+                case '/':
+                    getSlashComment();
+                    ch = getChar();
+                    break;
+                case '#':
+                    getNewlineComment();
+                    ch = getChar();
+                    break;
+                case ']':  // This only happens with a trailing comma (or an error)
+                    if (state != DID_ARRELEM || (flags & ALLOW_EXTRA_COMMAS) == 0) {
+                        throw err("Unexpected array closer ]");
+                    }
+                    pop();
+                    return event = ARRAY_END;
+                case '}':  // This only happens with a trailing comma (or an error)
+                    if (state != DID_MEMVAL || (flags & ALLOW_EXTRA_COMMAS) == 0) {
+                        throw err("Unexpected object closer }");
+                    }
+                    pop();
+                    return event = ARRAY_END;
+                case ',': // This only happens with input like [1,]
+                    if ((state != DID_ARRELEM && state != DID_MEMVAL) || (flags & ALLOW_EXTRA_COMMAS) == 0) {
+                        throw err("Unexpected comma");
+                    }
+                    ch = getChar();
+                    break;
+                case -1:
+                    if (getLevel() > 0) throw err("Premature EOF");
+                    return EOF;
+                default:
+                    // Handle unusual unicode whitespace like no-break space (0xA0)
+                    if (isWhitespace(ch)) {
+                        ch = getChar();  // getCharNWS() would also work
+                        break;
+                    }
+                    handleNonDoubleQuoteString(ch, false);
+                    valstate = STRING;
+                    return STRING;
+                // throw err(null);
+            }
+
+        }
+    }
+
+    @Override
+    public String toString() {
+        return "start=" + start + ",end=" + end + ",state=" + state + "valstate=" + valstate;
+    }
+
+    /**
+     * Returns the next event encountered in the JSON stream, one of
+     * <ul>
+     * <li>{@link #STRING}</li>
+     * <li>{@link #LONG}</li>
+     * <li>{@link #NUMBER}</li>
+     * <li>{@link #BIGNUMBER}</li>
+     * <li>{@link #BOOLEAN}</li>
+     * <li>{@link #NULL}</li>
+     * <li>{@link #OBJECT_START}</li>
+     * <li>{@link #OBJECT_END}</li>
+     * <li>{@link #OBJECT_END}</li>
+     * <li>{@link #ARRAY_START}</li>
+     * <li>{@link #ARRAY_END}</li>
+     * <li>{@link #EOF}</li>
+     * </ul>
+     */
+    public int nextEvent() throws IOException {
+        if (valstate != 0) {
+            if (valstate == STRING) {
+                readStringChars2(devNull, start);
+            } else if (valstate == BIGNUMBER) {
+                continueNumber(devNull);
+            }
+            valstate = 0;
+        }
+
+        int ch;
+        outer:
+        for (; ; ) {
+            switch (state) {
+                case 0:
+                    return event = next(getChar());
+                case DID_OBJSTART:
+                    ch = getCharExpected('"');
+                    if (ch == '}') {
+                        pop();
+                        return event = OBJECT_END;
+                    }
+                    if (ch == '"') {
+                        stringTerm = ch;
+                    } else if (ch == ',' && (flags & ALLOW_EXTRA_COMMAS) != 0) {
+                        continue outer;
+                    } else {
+                        handleNonDoubleQuoteString(ch, true);
+                    }
+                    state = DID_MEMNAME;
+                    valstate = STRING;
+                    return event = STRING;
+                case DID_MEMNAME:
+                    ch = getCharExpected(':');
+                    if (ch != ':') {
+                        throw err("Expected key,value separator ':'");
+                    }
+                    state = DID_MEMVAL;  // set state first because it might be pushed...
+                    return event = next(getChar());
+                case DID_MEMVAL:
+                    ch = getCharExpected(',');
+                    if (ch == '}') {
+                        pop();
+                        return event = OBJECT_END;
+                    } else if (ch != ',') {
+                        throw err("Expected ',' or '}'");
+                    }
+                    ch = getCharExpected('"');
+                    if (ch == '"') {
+                        stringTerm = ch;
+                    } else if ((ch == ',' || ch == '}') && (flags & ALLOW_EXTRA_COMMAS) != 0) {
+                        if (ch == ',') continue outer;
+                        pop();
+                        return event = OBJECT_END;
+                    } else {
+                        handleNonDoubleQuoteString(ch, true);
+                    }
+                    state = DID_MEMNAME;
+                    valstate = STRING;
+                    return event = STRING;
+                case DID_ARRSTART:
+                    ch = getCharNWS();
+                    if (ch == ']') {
+                        pop();
+                        return event = ARRAY_END;
+                    }
+                    state = DID_ARRELEM;  // set state first, might be pushed...
+                    return event = next(ch);
+                case DID_ARRELEM:
+                    ch = getCharExpected(',');
+                    if (ch == ',') {
+                        // state = DID_ARRELEM;  // redundant
+                        return event = next(getChar());
+                    } else if (ch == ']') {
+                        pop();
+                        return event = ARRAY_END;
+                    } else {
+                        throw err("Expected ',' or ']'");
+                    }
+            }
+        } // end for(;;)
+    }
+
+    public int lastEvent() {
+        return event;
+    }
+
+    public boolean wasKey() {
+        return state == DID_MEMNAME;
+    }
+
+    private void goTo(int what) throws IOException {
+        if (valstate == what) {
+            valstate = 0;
+            return;
+        }
+        if (valstate == 0) {
+      /*int ev = */
+            nextEvent();      // TODO
+            if (valstate != what) {
+                throw err("type mismatch");
+            }
+            valstate = 0;
+        } else {
+            throw err("type mismatch");
+        }
+    }
+
+    /**
+     * Returns the JSON string value, decoding any escaped characters.
+     */
+    public String getString() throws IOException {
+        return getStringChars().toString();
+    }
+
+    /**
+     * Returns the characters of a JSON string value, decoding any escaped characters.
+     * <p/>The underlying buffer of the returned <code>CharArr</code> should *not* be
+     * modified as it may be shared with the input buffer.
+     * <p/>The returned <code>CharArr</code> will only be valid up until
+     * the next JSONParser method is called.  Any required data should be
+     * read before that point.
+     */
+    public CharArr getStringChars() throws IOException {
+        goTo(STRING);
+        return readStringChars();
+    }
+
+    /**
+     * Reads a JSON string into the output, decoding any escaped characters.
+     */
+    public void getString(CharArr output) throws IOException {
+        goTo(STRING);
+        readStringChars2(output, start);
+    }
+
+    /**
+     * Reads a number from the input stream and parses it as a long, only if
+     * the value will in fact fit into a signed 64 bit integer.
+     */
+    public long getLong() throws IOException {
+        goTo(LONG);
+        return lval;
+    }
+
+    /**
+     * Reads a number from the input stream and parses it as a double
+     */
+    public double getDouble() throws IOException {
+        return Double.parseDouble(getNumberChars().toString());
+    }
+
+    /**
+     * Returns the characters of a JSON numeric value.
+     * <p/>The underlying buffer of the returned <code>CharArr</code> should *not* be
+     * modified as it may be shared with the input buffer.
+     * <p/>The returned <code>CharArr</code> will only be valid up until
+     * the next JSONParser method is called.  Any required data should be
+     * read before that point.
+     */
+    public CharArr getNumberChars() throws IOException {
+        int ev = 0;
+        if (valstate == 0) ev = nextEvent();
+
+        if (valstate == LONG || valstate == NUMBER) {
+            valstate = 0;
+            return out;
+        } else if (valstate == BIGNUMBER) {
+            continueNumber(out);
+            valstate = 0;
+            return out;
+        } else {
+            throw err("Unexpected " + ev);
+        }
+    }
+
+    /**
+     * Reads a JSON numeric value into the output.
+     */
+    public void getNumberChars(CharArr output) throws IOException {
+        int ev = 0;
+        if (valstate == 0) ev = nextEvent();
+        if (valstate == LONG || valstate == NUMBER) output.write(this.out);
+        else if (valstate == BIGNUMBER) {
+            continueNumber(output);
+        } else {
+            throw err("Unexpected " + ev);
+        }
+        valstate = 0;
+    }
+
+    /**
+     * Reads a boolean value
+     */
+    public boolean getBoolean() throws IOException {
+        goTo(BOOLEAN);
+        return bool;
+    }
+
+    /**
+     * Reads a null value
+     */
+    public void getNull() throws IOException {
+        goTo(NULL);
+    }
+
+    /**
+     * @return the current nesting level, the number of parent objects or arrays.
+     */
+    public int getLevel() {
+        return ptr;
+    }
+
+    public long getPosition() {
+        return gpos + start;
+    }
+
+    public static class ParseException extends RuntimeException {
+        public ParseException(String msg) {
+            super(msg);
+        }
+    }
+}