forked from luoxn28/tinyhtml
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtinyhtm.h
More file actions
399 lines (314 loc) · 12.2 KB
/
tinyhtm.h
File metadata and controls
399 lines (314 loc) · 12.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
#ifndef TINYHTM_H
#define TINYHTM_H
#include <iostream>
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
class TiHtmDocument;
class TiHtmElement;
class TiHtmComment;
class TiHtmUnknown;
class TiHtmText;
class TiHtmDeclaration;
class TiHtmParsingData;
/// record the location
struct TiHtmCursor
{
TiHtmCursor() { clear(); }
void clear() { row = column = -1; }
int row;
int column;
};
/// the base class of whole tinyhtml
class TiHtmBase
{
friend class TiHtmNode;
friend class TiHtmElement;
//friend class TiHtmDocumnet;
public:
TiHtmBase() {}
virtual ~TiHtmBase() {}
virtual void print(FILE *cfile, int depth) const = 0;
virtual const char *parse(const char *p, TiHtmParsingData *data) = 0;
int row() const { return location.row; }
int column() const { return location.row; }
//static void encodeString(const char *str, char *out); // not write code
protected:
static bool isWhiteSpace(char c);
static bool isWhiteSpace(int c)
{
if (c < 256)
return isWhiteSpace((char)c);
return false;
}
static const char *skipWhiteSpace(const char *p);
/** Read a html name into the string provided. Return a pointer
pointer just past the last character fo the name.
return NULL if has error.
*/
static const char *readName(const char *p, std::string *name);
/// 读取element中的text,直到遇到endTag为止
static const char *readText(const char *p, std::string *text, bool trimWhiteSpace, const char *endTag, bool ignoreCase);
/// 把转义字符转化为原来的字符,比如"<"转化为 '<'
static const char *getEntity(const char *p, char *value);
/// 从输入流中获取一个字符,有可能转换转义字符
static const char *getChar(const char *p, char *value);
/// if tge is p's prefix, return true, else return false
static bool stringEqual(const char *p, const char *tag, bool ignoreCase);
// record the location message
TiHtmCursor location;
static int isAlpha(unsigned char c);
static int isAlnum(unsigned char c);
static int toLower(int c) { return tolower(c); }
private:
TiHtmBase(const TiHtmBase &); // not allowd
void operator=(const TiHtmBase &base); // not allowd
struct Entity
{
const char *str;
unsigned int strLength;
const char chr;
};
enum
{
NUM_ENTITY = 5,
MAX_ENTITY_LENGTH = 6
};
static Entity entity[NUM_ENTITY]; // defined in file tinyhtmparser.cpp
};
class TiHtmNode : public TiHtmBase
{
friend class TiHtmElement;
//friend class TiHtmDocument;
public:
/// the type of html nodes
enum NodeType
{
TINYHTM_DOCUMENT,
TINYHTM_ELEMENT,
TINYHTM_COMMENT,
TINYHTM_UNKNOWN,
TINYHTM_TEXT,
TINYHTM_DECLARATION,
TINYHTM_TYPTCOUNT
};
virtual ~TiHtmNode();
/**
the different meaning of value
Document: filename of the xml file
Element: name of the element
Comment: the comment text
Unknown: the tag contents
Text: the text string
*/
const std::string getValue() const { return value; }
const char *getValueStr() const { return value.c_str(); }
void setValue(const char *_value) { value = _value; }
void setValue(const std::string &_value) { value = _value; }
/// delete alll the child nodes
void clear();
TiHtmNode *getParent() { return parent; }
const TiHtmNode *getParent() const { return parent; }
TiHtmNode *getFirstChild() { return firstChild; }
const TiHtmNode *getFirstChild() const { return firstChild; }
TiHtmNode *getLastChild() { return lastChild; }
const TiHtmNode *getLastChild() const { return lastChild; }
const TiHtmNode *getFirstChild(const char *_value) const;
TiHtmNode *getFirstChild(const char *_value)
{
return const_cast<TiHtmNode *>((const_cast<const TiHtmNode*>(this))->getFirstChild(_value));
}
const TiHtmNode *getFirstChild(const std::string &_value) const { return getFirstChild(_value.c_str()); }
TiHtmNode *getFirstChild(const std::string &_value) { return getFirstChild(_value.c_str()); }
const TiHtmNode *getLastChild(const char *_value) const;
TiHtmNode *getLastChild(const char *_value)
{
return const_cast<TiHtmNode *>((const_cast<const TiHtmNode*>(this))->getLastChild(_value));
}
const TiHtmNode *getLastChild(const std::string &_value) const { return getLastChild(_value.c_str()); }
TiHtmNode *geLastChild(const std::string &_value) { return getLastChild(_value.c_str()); }
TiHtmNode *insertEndChild(const TiHtmNode &addNode);
TiHtmNode *linkEndChild(TiHtmNode *pnode);
TiHtmNode *insertBeforeChild(TiHtmNode *beforeNode, const TiHtmNode &addNode);
TiHtmNode *insertAfterChild(TiHtmNode *afterNode, const TiHtmNode &addNode);
TiHtmNode *replaceChild(TiHtmNode *replaceNode, const TiHtmNode &withNode);
/// Delete a child of this node
bool removeChild(TiHtmNode *removeNode);
const TiHtmNode *previousSibling() const { return prev; }
TiHtmNode *previousSibling() { return prev; }
const TiHtmNode *previousSibling(const char *_value) const;
TiHtmNode *previousSibling(const char *_value)
{
return const_cast<TiHtmNode *>((const_cast<const TiHtmNode *>(this))->previousSibling(_value));
}
/// Using std::string
const TiHtmNode *previousSibling(const std::string &_value) const { return previousSibling(_value.c_str()); }
TiHtmNode *previousSibling(const std::string &_value) { return previousSibling(_value.c_str()); }
const TiHtmNode *nextSibling(const std::string &_value) const { return nextSibling(_value.c_str()); }
TiHtmNode *nextSibling(const std::string &_value) { return nextSibling(_value.c_str()); }
const TiHtmNode *nextSibling() const { return next; }
TiHtmNode *nextSibling() { return next; }
const TiHtmNode *nextSibling(const char *_value) const;
TiHtmNode *nextSibling(const char *_value)
{
return const_cast<TiHtmNode *>((const_cast<const TiHtmNode *>(this))->nextSibling(_value));
}
/// Some convenience function
const TiHtmElement *nextSiblingElement() const;
TiHtmElement *nextSiblingElement()
{
return const_cast<TiHtmElement *>((const_cast<const TiHtmNode *>(this))->nextSiblingElement());
}
const TiHtmElement *nextSiblingElement(const char *_value) const;
TiHtmElement *nextSiblingElement(const char *_value)
{
return const_cast<TiHtmElement *>((const_cast<const TiHtmNode *>(this))->nextSiblingElement(_value));
}
/// Some convenience function using std::string
const TiHtmElement *nextSiblingElement(const std::string &_value) const { return nextSiblingElement(_value.c_str()); }
TiHtmElement *nextSiblingElement(const std::string &_value) { return nextSiblingElement(_value.c_str()); }
/// Convenience function to get through elements.
const TiHtmElement *firstChildElement() const;
TiHtmElement *firstChildElement()
{
return const_cast<TiHtmElement *>((const_cast<const TiHtmNode *>(this))->firstChildElement());
}
const TiHtmElement *firstChildElement(const char *_value) const;
TiHtmElement *firstChildElement(const char *_value)
{
return const_cast<TiHtmElement *>((const_cast<const TiHtmNode *>(this))->firstChildElement(_value));
}
/// Some convenience function using std::string
const TiHtmElement *firstChildElement(const std::string &_value) const { return firstChildElement(_value.c_str()); }
TiHtmElement *firstChildElement(const std::string &_value) { return firstChildElement(_value.c_str()); }
/** the type of this node
they are TINYHTM_DOCUMENT,TINYHTM_ELEMENT,TINYHTM_COMMENT,
TINYHTM_UNKNOWN,TINYHTM_TEXT,TINYHTM_DECLARATION
*/
int getType() const { return type; }
/// return true if this node has no children
bool noChildren() const { return !firstChild; }
virtual const TiHtmDocument *toDocument() const { return NULL; }
virtual const TiHtmElement *toElement() const { return NULL; }
virtual const TiHtmComment *toComment() const { return NULL; }
virtual const TiHtmUnknown *toUnknown() const { return NULL; }
virtual const TiHtmText *toText() const { return NULL; }
virtual const TiHtmDeclaration *toDeclaration() const { return NULL; }
virtual TiHtmDocument *toDocument() { return NULL; }
virtual TiHtmElement *toElement() { return NULL; }
virtual TiHtmComment *toComment() { return NULL; }
virtual TiHtmUnknown *toUnknown() { return NULL; }
virtual TiHtmText *toText() { return NULL; }
virtual TiHtmDeclaration *toDeclaration() { return NULL; }
/// Create an exact duplicate of this node and return it. The memory must be deleted by the caller
virtual TiHtmNode *clone() const = 0;
/// Copy to allocated object
void copyTo(TiHtmNode *target) const;
/// Figure out what is at *p, and parse it. Returns null if it is not an html node.
/// 解析输入流中字符是什么,然后new相应的类型并返回
TiHtmNode *identify(const char *p);
protected:
TiHtmNode(NodeType _type);
TiHtmNode *parent;
NodeType type;
TiHtmNode *firstChild;
TiHtmNode *lastChild;
std::string value;
TiHtmNode *prev;
TiHtmNode *next;
private:
TiHtmNode(const TiHtmNode &); // not implemented
void operator=(const TiHtmNode &base); // not implemented
};
class TiHtmElement : public TiHtmNode
{
public:
TiHtmElement(const char *_value);
// Using std::string
TiHtmElement(const std::string &_value);
TiHtmElement(const TiHtmElement &);
//TiHtmElement &operator=(const TiHtmElement &base);
virtual ~TiHtmElement();
/// 如果element的第一个child不是text,则返回null,否则返回text的字符串
//const char *getText() const;
/// Creates a new Element and returns it - the returned element is a copy.
virtual TiHtmNode *clone() const;
/// Print the Element to a FILE stream
virtual void print(FILE *cfile, int depth) const;
/// Parse() declared in class TiHTmBase
virtual const char *parse(const char *p, TiHtmParsingData *data);
virtual const TiHtmElement *toElement() const { return this; }
virtual TiHtmElement *toElement() { return this; }
protected:
void copyTo(TiHtmElement *target) const;
void clearThis(); // like clear, bur initializes 'this' object as well
/* [internal use]
Reads the "value" of the element -- another element, or text.
This should terminate with the current end tag.
*/
const char *readValue(const char *in, TiHtmParsingData *prevData);
};
class TiHtmText : public TiHtmNode
{
friend class TiHtmElement;
public:
TiHtmText(const char *_value) : TiHtmNode(TiHtmNode::TINYHTM_TEXT)
{
setValue(_value);
}
TiHtmText(const std::string &_value) : TiHtmNode(TiHtmNode::TINYHTM_TEXT)
{
setValue(_value);
}
virtual ~TiHtmText() {}
TiHtmText(const TiHtmText ©) : TiHtmNode(TiHtmNode::TINYHTM_TEXT) { copy.copyTo(this); }
TiHtmText &operator=(const TiHtmText &base) { base.copyTo(this); return *this; }
/// Write this text to a FILE stream
virtual void print(FILE *cfile, int depth) const;
/// Parse the text
virtual const char *parse(const char *p, TiHtmParsingData *data);
virtual const TiHtmText *toText() const { return this; }
virtual TiHtmText *toText() { return this; }
protected:
// internal use, create a new elemnet node and returns it
virtual TiHtmText *clone() const;
void copyTo(TiHtmText *target) const { TiHtmNode::copyTo(target); }
bool blank() const;
};
class TiHtmDocument : public TiHtmNode
{
public:
TiHtmDocument();
TiHtmDocument(const char *filename);
TiHtmDocument(std::string filename);
TiHtmDocument(const TiHtmDocument ©);
TiHtmDocument &operator=(const TiHtmDocument ©);
virtual ~TiHtmDocument() {}
bool loadFile();
bool loadFile(const char *filename);
bool loadFile(const std::string &filename);
bool loadFile(FILE *file);
bool saveFile() const;
bool saveFile(const char *filename) const;
bool saveFile(const std::string &filename) const;
bool saveFile(FILE *file) const;
/// Start parse the document file
virtual const char *parse(const char *p, TiHtmParsingData *data = NULL);
/// Get the root element
const TiHtmElement *rootElement() const { return firstChildElement(); }
TiHtmElement *rootElement() { return firstChildElement(); }
/// The tab size funciton
void setTabSize(int _tabsize) { tabsize = _tabsize; }
int tabSize() const { return tabsize; }
void print() const { print(stdout, 0); }
virtual void print(FILE *cfile, int depth = 0) const;
private:
/// Internal use
virtual TiHtmNode *clone() const;
void copyTo(TiHtmDocument *target) const;
int tabsize;
};
#endif