diff --git a/csv2json.py b/csv2json.py index f1073cd..a833c88 100755 --- a/csv2json.py +++ b/csv2json.py @@ -3,9 +3,18 @@ A simple script for generating JSON/JavaScript from comma-separated (or otherwise delimited) values. +Python 2.7 or higher is recommended see : +"Floating Point Arithmetic issues and Limitations" at +http://docs.python.org/2/tutorial/floatingpoint.html + by Shawn Allen +modified by Alexandre Dube for lighter json """ -import csv, simplejson +import csv +try: + import json +except ImportError: + import simplejson as json from StringIO import StringIO # These are shorthands for delimiters that might be a pain to type or escape. @@ -13,24 +22,44 @@ 'sc': ';', 'bar': '|'} +def is_number(s): + try: + float(s) + return True + except ValueError: + return False + +def is_int(s): + try: + int(s) + return True + except ValueError: + return False + def csv2json(csv_file, delimiter=',', quotechar='"', indent=None, callback=None, variable=None, **csv_opts): if delimiter_map.has_key(delimiter): delimiter = delimiter_map.get(delimiter) reader = csv.DictReader(csv_file, delimiter=delimiter, quotechar=quotechar or None, **csv_opts) - rows = [row for row in reader] + + # manually cast to integer or float according values for a lighter json + # csv.DictReader has no mean to return unquoted integer and float values, + # that's why it's manually done here. None really efficient upon script + # execution, but json is much lighter that way + rows = [] + for row in reader: + for field in row: + if is_number(row[field]): + if is_int(row[field]): + row[field] = int(row[field]) + else: + row[field] = float(row[field]) + rows.append(row) + if hasattr(indent, 'isdigit') and indent.isdigit(): indent = ' ' * int(indent) - out = StringIO() - if callback: - out.write('%s(' % callback); - elif variable: - out.write('var %s = ' % variable) - simplejson.dump(rows, out, indent=indent) - if callback: - out.write(');'); - elif variable: - out.write(';') - return out.getvalue() + + return json.dumps(rows, indent=indent, separators=(',', ':'), + ensure_ascii=False) if __name__ == '__main__': import sys