From 1cd5ae03de60b13933426d069800f7b3a4e73a51 Mon Sep 17 00:00:00 2001 From: Alexandre Dube Date: Thu, 3 Jan 2013 12:08:21 -0500 Subject: [PATCH 1/3] More compact JSON representation by eliminating whitespaces --- csv2json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csv2json.py b/csv2json.py index f1073cd..270f3d9 100755 --- a/csv2json.py +++ b/csv2json.py @@ -25,7 +25,7 @@ def csv2json(csv_file, delimiter=',', quotechar='"', indent=None, callback=None, out.write('%s(' % callback); elif variable: out.write('var %s = ' % variable) - simplejson.dump(rows, out, indent=indent) + simplejson.dump(rows, out, indent=indent, separators=(',', ':')) if callback: out.write(');'); elif variable: From 59d6549009097cb706fcf837bda2988f7251a324 Mon Sep 17 00:00:00 2001 From: Alexandre Dube Date: Thu, 3 Jan 2013 16:04:37 -0500 Subject: [PATCH 2/3] Manually cast integer and float values to avoid quoting them in json.dump --- csv2json.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/csv2json.py b/csv2json.py index 270f3d9..4f3c279 100755 --- a/csv2json.py +++ b/csv2json.py @@ -3,9 +3,18 @@ A simple script for generating JSON/JavaScript from comma-separated (or otherwise delimited) values. +Python 2.7 or higher is recommended see : +"Floating Point Arithmetic issues and Limitations" at +http://docs.python.org/2/tutorial/floatingpoint.html + by Shawn Allen +modified by Alexandre Dube for lighter json """ -import csv, simplejson +import csv +try: + import json +except ImportError: + import simplejson as json from StringIO import StringIO # These are shorthands for delimiters that might be a pain to type or escape. @@ -13,11 +22,39 @@ 'sc': ';', 'bar': '|'} +def is_number(s): + try: + float(s) + return True + except ValueError: + return False + +def is_int(s): + try: + int(s) + return True + except ValueError: + return False + def csv2json(csv_file, delimiter=',', quotechar='"', indent=None, callback=None, variable=None, **csv_opts): if delimiter_map.has_key(delimiter): delimiter = delimiter_map.get(delimiter) reader = csv.DictReader(csv_file, delimiter=delimiter, quotechar=quotechar or None, **csv_opts) - rows = [row for row in reader] + + # manually cast to integer or float according values for a lighter json + # csv.DictReader has no mean to return unquoted integer and float values, + # that's why it's manually done here. None really efficient upon script + # execution, but json is much lighter that way + rows = [] + for row in reader: + for field in row: + if is_number(row[field]): + if is_int(row[field]): + row[field] = int(row[field]) + else: + row[field] = float(row[field]) + rows.append(row) + if hasattr(indent, 'isdigit') and indent.isdigit(): indent = ' ' * int(indent) out = StringIO() @@ -25,7 +62,7 @@ def csv2json(csv_file, delimiter=',', quotechar='"', indent=None, callback=None, out.write('%s(' % callback); elif variable: out.write('var %s = ' % variable) - simplejson.dump(rows, out, indent=indent, separators=(',', ':')) + json.dump(rows, out, indent=indent, separators=(',', ':')) if callback: out.write(');'); elif variable: From 828e0a6cc693afa21699f92eb12bd6a6a640f2b5 Mon Sep 17 00:00:00 2001 From: Alexandre Dube Date: Tue, 26 Nov 2013 13:46:20 -0500 Subject: [PATCH 3/3] Fix json.dumps, disable ensure_ascii --- csv2json.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/csv2json.py b/csv2json.py index 4f3c279..a833c88 100755 --- a/csv2json.py +++ b/csv2json.py @@ -57,17 +57,9 @@ def csv2json(csv_file, delimiter=',', quotechar='"', indent=None, callback=None, if hasattr(indent, 'isdigit') and indent.isdigit(): indent = ' ' * int(indent) - out = StringIO() - if callback: - out.write('%s(' % callback); - elif variable: - out.write('var %s = ' % variable) - json.dump(rows, out, indent=indent, separators=(',', ':')) - if callback: - out.write(');'); - elif variable: - out.write(';') - return out.getvalue() + + return json.dumps(rows, indent=indent, separators=(',', ':'), + ensure_ascii=False) if __name__ == '__main__': import sys