diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e970233 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.project \ No newline at end of file diff --git a/README b/README index 3a1810d..e8290c9 100644 --- a/README +++ b/README @@ -50,3 +50,19 @@ http://code.google.com/p/stdf4j CONTACT tragicphantom@gmail.com + +--- + +performance optimize: +1. change the ByteArray to singleton +2. new Float/Double to Float/Double.valueOf +3. change the "new String(cbuf, 0, length).intern()" to new +String(data, offset, length, ASCII) +4. change the Arrays.copyOfRange to System.arraycopy + +one 1.2G stdf file need 24s with "record.getData()" called, +after optimized, the time is 9s(test env, Windows 7, Java 1.8) + +CONTACT + +lantianjialiang@126.com diff --git a/lib/.gitignore b/lib/.gitignore new file mode 100644 index 0000000..98fb7d1 --- /dev/null +++ b/lib/.gitignore @@ -0,0 +1,4 @@ +.classpath +.project +target +.settings \ No newline at end of file diff --git a/lib/src/main/java/com/tragicphantom/stdf/Record.java b/lib/src/main/java/com/tragicphantom/stdf/Record.java index c325b16..e79fed8 100644 --- a/lib/src/main/java/com/tragicphantom/stdf/Record.java +++ b/lib/src/main/java/com/tragicphantom/stdf/Record.java @@ -22,21 +22,30 @@ import java.text.ParseException; +import com.tragicphantom.stdf.util.ByteArray; + public class Record{ private RecordDescriptor desc; private int pos; private byte [] data; - private ByteOrder byteOrder; +// private ByteOrder byteOrder; private RecordData rd; - public Record(RecordDescriptor desc, int pos, - byte [] data, ByteOrder byteOrder){ - this.desc = desc; - this.pos = pos; - this.data = data; - this.byteOrder = byteOrder; - this.rd = null; - } +// public Record(RecordDescriptor desc, int pos, +// byte [] data, ByteOrder byteOrder){ +// this.desc = desc; +// this.pos = pos; +// this.data = data; +// this.byteOrder = byteOrder; +// this.rd = null; +// } + + public Record(RecordDescriptor desc, int pos, byte[] data) { + this.desc = desc; + this.pos = pos; + this.data = data; + this.rd = null; + } public Record(RecordDescriptor desc, RecordData rd){ this.desc = desc; @@ -54,7 +63,7 @@ public int getPosition(){ public RecordData getData() throws ParseException{ if(rd == null) - rd = desc.parse(pos, data, byteOrder); + rd = desc.parse(pos, data); return rd; } @@ -66,4 +75,8 @@ public String toString(){ return "(null)"; } } + + public int getDataLength() { + return data.length; + } } diff --git a/lib/src/main/java/com/tragicphantom/stdf/RecordDataParser.java b/lib/src/main/java/com/tragicphantom/stdf/RecordDataParser.java index 1d8a412..8fe4e4b 100644 --- a/lib/src/main/java/com/tragicphantom/stdf/RecordDataParser.java +++ b/lib/src/main/java/com/tragicphantom/stdf/RecordDataParser.java @@ -19,9 +19,9 @@ package com.tragicphantom.stdf; import java.util.Arrays; - +import java.io.UnsupportedEncodingException; import java.nio.ByteOrder; - +import java.nio.charset.Charset; import java.text.ParseException; import com.tragicphantom.stdf.util.ByteArray; @@ -33,13 +33,14 @@ public class RecordDataParser{ private ByteArray byteArray; private int available; private int offset; + + private static byte[] sharedBytes = new byte[10]; - public RecordDataParser(RecordDescriptor desc, int pos, byte [] data, - ByteOrder byteOrder){ + public RecordDataParser(RecordDescriptor desc, int pos, byte [] data){ this.desc = desc; this.pos = pos; this.data = data; - this.byteArray = new ByteArray(byteOrder); + this.byteArray = ByteArray.getInstance(); this.available = data.length; this.offset = 0; } @@ -50,7 +51,7 @@ public RecordData parse() throws ParseException{ for(Field field : desc.getFields()){ Object value = null; - if(available > 0 && validField(field, fieldList)){ + if(available > 0 /*&& validField(field, fieldList) */ ){ //System.err.println(field.getName() + " => " + field.getType() + " => " + field.getLength()); value = readField(field.getType(), field.getLength(), @@ -58,8 +59,8 @@ public RecordData parse() throws ParseException{ field.getArrayType(), field.getArraySizeFieldIndex(), fieldList); - } - else{ + + } else { char type = field.getType(); if(type == 'U' || type == 'I') value = Integer.valueOf(0); @@ -131,17 +132,18 @@ else if(type == 'S') else // type == 'C' return readString(readUnsignedInt(1)); case 'R': - if(length == 4) - return new Float(byteArray.toFloat(getBytes(4))); - else - return new Double(byteArray.toDouble(getBytes(8))); + if(length == 4) { + return Float.valueOf((byteArray.toFloat(getBytes(4)))); + } + + return Double.valueOf(byteArray.toDouble(getBytes(8))); case 'V': return readVariableTypeList(); case 'k': return readArray(arrayType, arraySizeFieldIndex, lengthFieldIndex, length, fields, false); } - + throw new ParseException("Invalid type code: " + type, pos); } @@ -248,6 +250,8 @@ protected long getFieldSize(Object field){ return size; } + private static final Charset ASCII = Charset.forName( "US-ASCII" ); + protected String readString(int length){ /** * The following is a trick from Trevor Pounds: @@ -260,15 +264,28 @@ protected String readString(int length){ * but from my testing this custom logic outperforms the internal charset * encoding algorithm (i.e. -agentlib:hprof=cpu=times). */ - final byte[] bbuf = getBytes(length); - - length = bbuf.length; // adjust according to how much data actually read - - char [] cbuf = new char[length]; - for(int i = 0; i < length; i++) - cbuf[i] = (char) (0xFF & bbuf[i]); - - return new String(cbuf, 0, length).intern(); +// final byte[] bbuf = getBytes(length); +// +// length = bbuf.length; // adjust according to how much data actually read +// +// char [] cbuf = new char[length]; +// for(int i = 0; i < length; i++) +// cbuf[i] = (char) (0xFF & bbuf[i]); +// +// return new String(cbuf, 0, length).intern(); + +// final byte[] bbuf = getBytes(length); +// return new String(bbuf); + available -= length; + if (available < 0) { + length += available; + available = 0; + } + + String tmp = new String(data, offset, length, ASCII); + offset += length; + + return tmp; } protected int readSigned(int length){ @@ -304,9 +321,13 @@ protected byte[] getBytes(int numBytes){ available = 0; } - byte[] bytes = Arrays.copyOfRange(data, offset, offset + numBytes); - offset += numBytes; +// byte[] bytes = Arrays.copyOfRange(data, offset, offset + numBytes); +// offset += numBytes; - return bytes; + //TODO file sharedBytes to 0 + System.arraycopy(data, offset, sharedBytes, 0, numBytes); + offset += numBytes; + + return sharedBytes; } } diff --git a/lib/src/main/java/com/tragicphantom/stdf/RecordDescriptor.java b/lib/src/main/java/com/tragicphantom/stdf/RecordDescriptor.java index bc001e2..478bfc0 100644 --- a/lib/src/main/java/com/tragicphantom/stdf/RecordDescriptor.java +++ b/lib/src/main/java/com/tragicphantom/stdf/RecordDescriptor.java @@ -75,7 +75,8 @@ public boolean contains(String name){ return indexes.containsKey(name); } - public RecordData parse(int pos, byte [] bytes, ByteOrder byteOrder) throws ParseException{ - return new RecordDataParser(this, pos, bytes, byteOrder).parse(); + public RecordData parse(int pos, byte [] bytes) throws ParseException{ + RecordDataParser parser = new RecordDataParser(this, pos, bytes); + return parser.parse(); } } diff --git a/lib/src/main/java/com/tragicphantom/stdf/RecordVisitor.java b/lib/src/main/java/com/tragicphantom/stdf/RecordVisitor.java index 6968258..67eb38b 100644 --- a/lib/src/main/java/com/tragicphantom/stdf/RecordVisitor.java +++ b/lib/src/main/java/com/tragicphantom/stdf/RecordVisitor.java @@ -18,8 +18,10 @@ **/ package com.tragicphantom.stdf; +import java.text.ParseException; + public interface RecordVisitor{ public void beforeFile(); public void afterFile(); - public void handleRecord(Record record); + public void handleRecord(Record record) throws ParseException; } diff --git a/lib/src/main/java/com/tragicphantom/stdf/STDFReader.java b/lib/src/main/java/com/tragicphantom/stdf/STDFReader.java index 8919ead..68f6f2a 100644 --- a/lib/src/main/java/com/tragicphantom/stdf/STDFReader.java +++ b/lib/src/main/java/com/tragicphantom/stdf/STDFReader.java @@ -45,7 +45,7 @@ public class STDFReader{ private InputStream stream = null; private int available = 0; private int totalBytes = 0; - private ByteArray byteArray = new ByteArray(); + private ByteArray byteArray = ByteArray.getInstance(); private boolean errorOnUnknown = true; public STDFReader(String fileName) throws FileNotFoundException, IOException{ @@ -57,7 +57,7 @@ public STDFReader(File file) throws FileNotFoundException, IOException{ } public STDFReader(InputStream stream) throws IOException{ - InputStream bufis = new BufferedInputStream(stream); + InputStream bufis = new BufferedInputStream(stream, 8192 * 1); bufis.mark(2); int header = ((bufis.read() & 0xFF) << 8) + (bufis.read() & 0xFF); bufis.reset(); @@ -118,7 +118,7 @@ record = readRecord(header, records); visitor.handleRecord(record); } } - catch(IOException e){ + catch(IOException | ParseException e){ // Ignore //e.printStackTrace(); } @@ -151,8 +151,8 @@ protected Record readRecord(Header header, if(records.containsKey(header.getRecordType())){ record = new Record(records.get(header.getRecordType()), totalBytes, - getBytes(header.getLength()), - byteArray.getByteOrder()); + getBytes(header.getLength()) ); +// byteArray.getByteOrder()); } else{ // this may just be a user-defined record type not specified diff --git a/lib/src/main/java/com/tragicphantom/stdf/util/ByteArray.java b/lib/src/main/java/com/tragicphantom/stdf/util/ByteArray.java index 7c47579..483278a 100644 --- a/lib/src/main/java/com/tragicphantom/stdf/util/ByteArray.java +++ b/lib/src/main/java/com/tragicphantom/stdf/util/ByteArray.java @@ -31,9 +31,17 @@ */ public class ByteArray { - private ByteOrder byteOrder = ByteOrder.nativeOrder(); - - public ByteArray(){ + private static ByteOrder staticByteOrder = ByteOrder.nativeOrder(); + + private ByteOrder byteOrder = ByteOrder.nativeOrder(); + + private static ByteArray array = new ByteArray(staticByteOrder); + + public static ByteArray getInstance() { + return array; + } + + private ByteArray(){ } public ByteArray(ByteOrder byteOrder){ @@ -61,14 +69,16 @@ public final char toChar(final byte[] bytes){ return (char) ((bytes[0] & 0xFF) + ((bytes[1] & 0xFF) << 8)); } - public final double toDouble(final byte[] bytes) - { return Double.longBitsToDouble(toLong(bytes)); } + public final double toDouble(final byte[] bytes) { + //TODO below maybe wrong for shared bytes array + return Double.longBitsToDouble(toLong(bytes)); + } public final float toFloat(final byte[] bytes) - { return Float.intBitsToFloat(toInt(bytes)); } + { return Float.intBitsToFloat(toInt(bytes, 4)); } - public final int toInt(final byte[] bytes){ - if(bytes.length == 4){ + public final int toInt(final byte[] bytes, int length){ + if(length == 4){ if(byteOrder == ByteOrder.BIG_ENDIAN) return (int) (((bytes[0] & 0xFF) << 24) + ((bytes[1] & 0xFF) << 16) + ((bytes[2] & 0xFF) << 8) + (bytes[3] & 0xFF)); @@ -83,12 +93,12 @@ public final int toInt(final byte[] bytes){ public int toSigned(final byte[] bytes, int length){ int value = 0; - if(length == 1 && bytes.length >= 1) + if(length == 1) value = (int)bytes[0]; - else if(length == 2 && bytes.length >= 2) - value = (int)toShort(bytes); + else if(length == 2) + value = (int)toShort(bytes, length); else - value = toInt(bytes); + value = toInt(bytes, length); return value; } @@ -103,11 +113,11 @@ public long toUnsigned(final byte[] bytes, int length){ value = b < 0 ? ((long)(b & 0x7F) | 0x80) : b; } else if(length == 2 && bytes.length >= 2){ - short s = toShort(bytes); + short s = toShort(bytes, length); value = s < 0 ? ((long)(s & 0x7FFF) | 0x8000) : s; } else{ - int i = toInt(bytes); + int i = toInt(bytes, length); value = i < 0 ? ((long)(i & 0x7FFFFFFFL) | 0x80000000L) : i; } @@ -122,7 +132,7 @@ public int toUnsignedInt(final byte[] bytes, int length){ value = b < 0 ? ((b & 0x7F) | 0x80) : b; } else if(length == 2 && bytes.length >= 2){ - short s = toShort(bytes); + short s = toShort(bytes, length); value = s < 0 ? ((s & 0x7FFF) | 0x8000) : s; } @@ -146,8 +156,8 @@ public final long toLong(final byte[] bytes){ return 0; } - public final short toShort(final byte[] bytes){ - if(bytes.length == 2){ + public final short toShort(final byte[] bytes, int length){ + if(length == 2){ if(byteOrder == ByteOrder.BIG_ENDIAN) return (short) ((bytes[1] & 0xFF) + ((bytes[0] & 0xFF) << 8)); else diff --git a/lib/src/main/resources/stdf_v4_types.xml b/lib/src/main/resources/stdf_v4_types.xml index d0536c3..d73aa86 100644 --- a/lib/src/main/resources/stdf_v4_types.xml +++ b/lib/src/main/resources/stdf_v4_types.xml @@ -1,391 +1,382 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 0000000..e970233 --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1 @@ +.project \ No newline at end of file diff --git a/tools/dump/.gitignore b/tools/dump/.gitignore new file mode 100644 index 0000000..17a1acc --- /dev/null +++ b/tools/dump/.gitignore @@ -0,0 +1,4 @@ +/target/ +.classpath +.project +.settings \ No newline at end of file diff --git a/tools/extract/.gitignore b/tools/extract/.gitignore new file mode 100644 index 0000000..e848fff --- /dev/null +++ b/tools/extract/.gitignore @@ -0,0 +1,4 @@ +.classpath +.project +.settings +target \ No newline at end of file diff --git a/tools/stats/.gitignore b/tools/stats/.gitignore new file mode 100644 index 0000000..e848fff --- /dev/null +++ b/tools/stats/.gitignore @@ -0,0 +1,4 @@ +.classpath +.project +.settings +target \ No newline at end of file diff --git a/tools/stats/src/main/java/com/tragicphantom/stdf/tools/Stats.java b/tools/stats/src/main/java/com/tragicphantom/stdf/tools/Stats.java index fe8c284..192172e 100644 --- a/tools/stats/src/main/java/com/tragicphantom/stdf/tools/Stats.java +++ b/tools/stats/src/main/java/com/tragicphantom/stdf/tools/Stats.java @@ -19,15 +19,18 @@ package com.tragicphantom.stdf.tools; import java.util.Map; +import java.text.ParseException; import java.util.HashMap; import com.tragicphantom.stdf.STDFReader; import com.tragicphantom.stdf.Record; +import com.tragicphantom.stdf.RecordData; import com.tragicphantom.stdf.RecordVisitor; public class Stats implements RecordVisitor{ private HashMap counts = new HashMap(); private int total = 0; + private int dataLenSum = 0; public void beforeFile(){ } @@ -36,33 +39,60 @@ public void afterFile(){ } public void handleRecord(Record record){ - String key = record.getType().toUpperCase(); - if(counts.containsKey(key)) - counts.put(key, counts.get(key) + 1); - else - counts.put(key, 1); - total++; +// String key = record.getType().toUpperCase(); + try { + RecordData data = record.getData(); + } catch (ParseException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } +// if(counts.containsKey(key)) +// counts.put(key, counts.get(key) + 1); +// else +// counts.put(key, 1); + +// dataLenSum += record.getDataLength(); +// total++; } public void print(){ System.out.println("Total records: " + total); +// System.out.println("Avg length: " + dataLenSum/total); for(Map.Entry entry : counts.entrySet()) System.out.println(entry.getKey() + ": " + entry.getValue()); } public static void main(String [] args){ - for(String arg : args){ - try{ - Stats stats = new Stats(); - STDFReader reader = new STDFReader(arg); - reader.setErrorOnUnknown(false); - reader.parse(stats); - stats.print(); - } - catch(Exception e){ - e.printStackTrace(); - } - } + com.tragicphantom.stdf.v4.Types.getRecordDescriptors(); +// try { +// Thread.sleep(10000); +// } catch (InterruptedException e1) { +// // TODO Auto-generated catch block +// e1.printStackTrace(); +// } + + long start = System.currentTimeMillis(); + for (String arg : args) { + try { + Stats stats = new Stats(); + STDFReader reader = new STDFReader(arg); + reader.setErrorOnUnknown(false); + reader.parse(stats); + stats.print(); + } catch (Exception e) { + e.printStackTrace(); + } + } + long end = System.currentTimeMillis(); + System.out.println("time " + (end - start) / 1000.0); + +// try { +// Thread.sleep(10000); +// } catch (InterruptedException e1) { +// // TODO Auto-generated catch block +// e1.printStackTrace(); +// } + } } diff --git a/tools/viewer/.gitignore b/tools/viewer/.gitignore new file mode 100644 index 0000000..e848fff --- /dev/null +++ b/tools/viewer/.gitignore @@ -0,0 +1,4 @@ +.classpath +.project +.settings +target \ No newline at end of file