-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCSV_Parser.lss
More file actions
300 lines (257 loc) · 7.56 KB
/
CSV_Parser.lss
File metadata and controls
300 lines (257 loc) · 7.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
%REM
Library CSVParser
Created Nov 9, 2017 by Sam Sirry
Description:
* A parser library for CSV (Comma-Separated Values) text.
* Work adapted from http://www.boyet.com/articles/csvparser.html made by Julian M Bucknall.
* This library uses the BigStringWalker class, which can be found here: https://github.com/SamSirry/LotusScript-BigStringWalker
* Usage:
- Create a class based on ICsvConsumer, and override its methods.
- Create an instance from this Consumer class.
- Create instances from the CSVCharTokenizer and CSVParser classes
- Call the Parse method of the CSVParser
See sample code for an example.
%END REM
Option Declare
Private Const CsvEOF = "" 'Since 2022-08-04: An empty string indicates the end of file when reading from the source no longer produces bytes. This allows for nulls as part of the CSV data stream.
Private Const LF = Uchr$(10)
Private Const CR = Uchr$(13)
%REM
Class ICharTokenizer
Created Nov 9, 2017 by Sam Sirry
Description:
%END REM
Public Class CSVCharTokenizer
Private s As BigStringWalker
Private index As Long
Private haveUnreadChar As Boolean
Private unreadChar As String
Public Sub New(strg As String)
Set me.s = New BigStringWalker(strg)
index = 0
haveUnreadChar = False
End sub
Private Sub skipCrInCrLf()
If (sAt(index) = CR) and (index + 1 <= s.Size) Then
If (sAt(index+1) = LF) then
index = index +1
End if
End If
End Sub
Private Function mapCrToLf(c As String) As String
if (c = CR) then
mapCrToLf = LF
else
mapCrToLf = c
End if
End function
Public Function Peek() As String
if (haveUnreadChar) then
Peek = unreadChar
elseif (index < s.Size) then
Peek = mapCrToLf(sAt(index))
Else
Peek = CsvEOF
End If
End Function
Public Function Read() As String
If (haveUnreadChar) Then
haveUnreadChar = False
me.Read = unreadChar
ElseIf (index < s.Size) Then
Call skipCrInCrLf()
me.Read = mapCrToLf(sAt(index))
index = index +1
Else
me.Read = CsvEOF
End If
End Function
Public Sub Unread(c As String)
If (haveUnreadChar) then
Error 5, "Unread() cannot accept more than one pushed back character"
End if
haveUnreadChar = True
unreadChar = c
End Sub
Private Function sAt(ByVal n As Long) As String
' sAt = Mid$(s, n+1, 1)
sAt = s.CharAt(n)
End Function
%REM
Property Get CurrentIndex
Created Nov 10, 2017 by Sam Sirry
Description: Zero-based index of the current reader's approx location.
%END REM
Public Property Get CurrentIndex As Long
CurrentIndex = index
End Property
%REM
Property Get DataSize
Created Nov 10, 2017 by Sam Sirry
Description:
%END REM
Public Property Get DataSize As Long
DataSize = s.Size -1
End Property
End Class
%REM
Class CSVParser
Created Nov 9, 2017 by Sam Sirry
Description:
%END REM
Public Class CSVParser
Public Sub Parse(reader As CSVCharTokenizer, consumer As ICsvConsumer)
Call parseCsvFile(reader, consumer)
End Sub
Private Sub parseCsvFile(reader As CSVCharTokenizer , consumer As ICsvConsumer)
Dim continue As Boolean
continue = True
Do While (reader.Peek() <> CsvEOF)
Call parseCsvRecord(reader, consumer, continue)
If Not continue Then Exit Sub
Loop
Call consumer.SignalProgress(1)
Call consumer.SignalEndOfFile()
End Sub
Private Sub parseCsvRecord(reader As CSVCharTokenizer, consumer As ICsvConsumer, continue As Boolean)
Call parseCsvStringList(reader, consumer, continue)
If Not continue Then Exit Sub
Dim ch As String
ch = reader.Read()
If ch = CsvEOF Then
reader.Unread(ch)
ch = LF
End If
If ch <> LF Then
Error 32106, "End of record was expected but more data exists."
End If
Call consumer.SignalEndOfRecord(continue)
Dim FractionDone As Double
FractionDone = reader.CurrentIndex / reader.DataSize
Call consumer.SignalProgress(FractionDone)
End Sub
Private Sub parseCsvStringList(reader As CSVCharTokenizer , consumer As ICsvConsumer, continue As Boolean)
Dim ch As String
Do
Call parseRawString(reader, consumer, continue)
If Not continue Then Exit Sub
ch = reader.Read()
Loop While (ch = ",")
Call reader.Unread(ch)
End Sub
Private Function isFieldTerminator(ByVal c As String) As Boolean
isFieldTerminator = ((c = ",") Or (c = LF) Or (c = CsvEOF))
End Function
Private Function isSpace(ByVal c As String) As Boolean
isSpace = ((c = " ") Or (c = UChr$(9)))
End Function
Private Sub parseOptionalSpaces(reader As CSVCharTokenizer)
Dim ch As String
Do
ch = reader.Read()
Loop While (isSpace(ch))
Call reader.Unread(ch)
End Sub
Private Sub parseRawString(reader As CSVCharTokenizer , consumer As ICsvConsumer, continue As Boolean)
Call parseOptionalSpaces(reader)
Call parseRawField(reader, consumer, continue)
If Not continue Then Exit Sub
If Not isFieldTerminator(reader.Peek()) Then
Call parseOptionalSpaces(reader)
End If
End Sub
Private Sub parseRawField(reader As CSVCharTokenizer , consumer As ICsvConsumer, continue As Boolean)
Dim fieldValue As String
Dim ch As String
ch = reader.Peek()
If Not isFieldTerminator(ch) Then
If ch = {"} Then
fieldValue = parseQuotedField(reader)
Else
fieldValue = parseSimpleField(reader)
End If
End If
Call consumer.ConsumeField(fieldValue, continue)
End Sub
Private Function parseQuotedField(reader As CSVCharTokenizer ) As String
Call reader.Read() 'read and discard initial quote
Dim Field As String
Field = parseEscapedField(reader)
Dim ch As String
ch = reader.Read()
If (ch <> {"}) Then
reader.Unread(ch)
Error 32106, "Quoted field has no terminating double quote at char pos " & reader.CurrentIndex
End If
parseQuotedField = Field
End Function
Private Function parseEscapedField(reader As CSVCharTokenizer) As String
Dim sb As String
Call parseSubField(reader, sb)
Dim ch As String
ch = reader.Read()
Do While (processDoubleQuote(reader, ch))
sb = sb & {"}
Call parseSubField(reader, sb)
ch = reader.Read()
Loop
Call reader.Unread(ch)
parseEscapedField = sb
End Function
Private Sub parseSubField(reader As CSVCharTokenizer, sb As String)
Dim ch As String
ch = reader.Read()
Do While ((ch <> {"}) And (ch <> CsvEOF))
sb = sb & ch
ch = reader.Read()
Loop
Call reader.Unread(ch)
End Sub
Private Function isBadSimpleFieldChar(c As String) As Boolean
'isBadSimpleFieldChar = isSpace(c) Or isFieldTerminator(c) Or (c = {"})
isBadSimpleFieldChar = isFieldTerminator(c) Or (c = {"}) ''Since 2020-07-04: Spc & Tab are not bad, and shouldn't be considered so. -Sam
End Function
Private Function parseSimpleField(reader As CSVCharTokenizer) As String
Dim ch As String
ch = reader.Read()
If (isBadSimpleFieldChar(ch)) Then
Call reader.Unread(ch)
parseSimpleField = ""
Exit Function
End If
Dim sb As String
sb = ch
ch = reader.Read()
Do While Not isBadSimpleFieldChar(ch)
sb = sb & ch
ch = reader.Read()
Loop
reader.Unread(ch)
parseSimpleField = sb
End Function
Private Function processDoubleQuote(reader As CSVCharTokenizer, ByVal ch As String) As Boolean
If ((ch = {"}) And (reader.Peek() = {"})) Then
Call reader.Read() 'discard second quote of double
processDoubleQuote = True
End If
End Function
End Class
%REM
Class ICsvConsumer
Created Nov 9, 2017 by Sam Sirry
Description:
%END REM
Public Class ICsvConsumer
Public sub ConsumeField(ByVal FieldVal As String, Continue As Boolean)
'Override this
End Sub
Public Sub SignalEndOfRecord(Continue As Boolean)
'Override this
End Sub
Public Sub SignalEndOfFile()
'Override this
End Sub
Public Sub SignalProgress(FractionDone As Double)
'Override this
End Sub
End Class