diff --git a/README.md b/README.md index 26e1769..851bf4a 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,10 @@ In addition to the fields, the object contains two special keys: * @sequenceNumber `Number` indicates the order in which it was extracted * @deleted `Boolean` whether this record has been deleted or not +Each field is parsed using the `utf-8` encoding. If you want a different encoding, you should initialize the Parser with that encoding: + + new Parser('/path/to/my/dbase/file.dbf', 'win1252'); + This object may look like: { diff --git a/lib/parser.js b/lib/parser.js index 28edbbe..423a362 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -1,6 +1,6 @@ -// Generated by CoffeeScript 1.3.3 +// Generated by CoffeeScript 1.7.1 (function() { - var EventEmitter, Header, Parser, fs, + var EventEmitter, Header, Parser, fs, iconv, __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }, __hasProp = {}.hasOwnProperty, __extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; }; @@ -11,55 +11,56 @@ fs = require('fs'); - Parser = (function(_super) { + iconv = require('iconv-lite'); + Parser = (function(_super) { __extends(Parser, _super); - function Parser(filename) { + function Parser(filename, encoding) { this.filename = filename; + this.encoding = encoding; this.parseField = __bind(this.parseField, this); - this.parseRecord = __bind(this.parseRecord, this); - this.parse = __bind(this.parse, this); - } Parser.prototype.parse = function() { - var _this = this; this.emit('start', this); this.header = new Header(this.filename); - this.header.parse(function(err) { - var sequenceNumber; - _this.emit('header', _this.header); - sequenceNumber = 0; - return fs.readFile(_this.filename, function(err, buffer) { - var loc; - if (err) { - throw err; - } - loc = _this.header.start; - while (loc < (_this.header.start + _this.header.numberOfRecords * _this.header.recordLength) && loc < buffer.length) { - _this.emit('record', _this.parseRecord(++sequenceNumber, buffer.slice(loc, loc += _this.header.recordLength))); - } - return _this.emit('end', _this); - }); - }); + this.header.parse((function(_this) { + return function(err) { + var sequenceNumber; + _this.emit('header', _this.header); + sequenceNumber = 0; + return fs.readFile(_this.filename, function(err, buffer) { + var loc; + if (err) { + throw err; + } + loc = _this.header.start; + while (loc < (_this.header.start + _this.header.numberOfRecords * _this.header.recordLength) && loc < buffer.length) { + _this.emit('record', _this.parseRecord(++sequenceNumber, buffer.slice(loc, loc += _this.header.recordLength))); + } + return _this.emit('end', _this); + }); + }; + })(this)); return this; }; Parser.prototype.parseRecord = function(sequenceNumber, buffer) { - var field, loc, record, _fn, _i, _len, _ref, - _this = this; + var field, loc, record, _fn, _i, _len, _ref; record = { '@sequenceNumber': sequenceNumber, '@deleted': (buffer.slice(0, 1))[0] !== 32 }; loc = 1; _ref = this.header.fields; - _fn = function(field) { - return record[field.name] = _this.parseField(field, buffer.slice(loc, loc += field.length)); - }; + _fn = (function(_this) { + return function(field) { + return record[field.name] = _this.parseField(field, buffer.slice(loc, loc += field.length)); + }; + })(this); for (_i = 0, _len = _ref.length; _i < _len; _i++) { field = _ref[_i]; _fn(field); @@ -68,8 +69,12 @@ }; Parser.prototype.parseField = function(field, buffer) { - var value; - value = (buffer.toString('utf-8')).replace(/^\x20+|\x20+$/g, ''); + var encoding, value; + encoding = 'utf-8'; + if (this.encoding) { + encoding = this.encoding; + } + value = iconv.decode(buffer, encoding).replace(/^\x20+|\x20+$/g, ''); if (field.type === 'N') { value = parseInt(value, 10); } diff --git a/package.json b/package.json index eab1925..8e49466 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "node-dbf", - "version": "0.1.0", + "version": "0.1.1", "description": "An efficient dBase DBF file parser written in pure JavaScript", "main": "./lib/parser.js", "repository": { @@ -17,5 +17,8 @@ }, "scripts": { "prepublish": "cake build" + }, + "dependencies": { + "iconv-lite": "~0.2.11" } -} \ No newline at end of file +} diff --git a/src/parser.coffee b/src/parser.coffee index 61ec67d..13685a6 100644 --- a/src/parser.coffee +++ b/src/parser.coffee @@ -1,10 +1,12 @@ {EventEmitter} = require 'events' Header = require './header' fs = require 'fs' +iconv = require 'iconv-lite' + class Parser extends EventEmitter - constructor: (@filename) -> + constructor: (@filename, @encoding) -> parse: => @emit 'start', @ @@ -41,7 +43,10 @@ class Parser extends EventEmitter return record parseField: (field, buffer) => - value = (buffer.toString 'utf-8').replace /^\x20+|\x20+$/g, '' + encoding = 'utf-8' + encoding = @encoding if @encoding + + value = iconv.decode(buffer, encoding).replace /^\x20+|\x20+$/g, '' if field.type is 'N' then value = parseInt value, 10