-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathModel.py
More file actions
executable file
·121 lines (85 loc) · 3.31 KB
/
Model.py
File metadata and controls
executable file
·121 lines (85 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from peewee import *
DBCrawl = SqliteDatabase("CrawlTable.db", threadlocals=True)
DBUnCrawl = SqliteDatabase("UnCrawlTable.db", threadlocals=True)
DBRobot = SqliteDatabase("RobotTable.db", threadlocals=True)
DBWebPage = SqliteDatabase("WebPageTable.db", threadlocals=True)
DBPageRank = SqliteDatabase("PageRankTable.db", threadlocals=True)
DBIndexer = SqliteDatabase("IndexerTable.db", threadlocals=True)
DBQuery = SqliteDatabase("QueryTable.db", threadlocals=True)
indexedCount = SqliteDatabase("indexedCount.db", threadlocals=True)
DBPhrase = SqliteDatabase("FullPagesTable.db", threadlocals=True)
''''---------------------------------------------CRAWLER Stuff-------------------------------------------------------'''
class CrawledTable(Model):
crawledURL = CharField(unique =True)
class Meta:
database = DBCrawl
class UncrawledTable(Model):
uncrawledURL = CharField(unique = True)
class Meta:
database = DBUnCrawl
class RobotTxts(Model):
netLoc = CharField(unique=True)
robotContent = TextField()
class Meta:
database = DBRobot
class WebPages(Model):
pageURL = CharField(unique=True)
pageContent = TextField()
class Meta:
database = DBWebPage
class Seeds(Model):
pageURL = CharField(unique=True)
crawlFrequency = IntegerField()
lastCrawl = DateTimeField()
class Meta:
database = DBCrawl
''''---------------------------------------------Page InLinks Stuff-------------------------------------------------------'''
class PageRank(Model):
pageURL = CharField(unique=True)
pageInLinks = IntegerField(default = 1)
class Meta:
database = DBPageRank
''''---------------------------------------------Search Suggestions Stuff-------------------------------------------------------'''
class QuerySuggestion(Model):
keyword = TextField(unique=True)
stem = TextField()
count = IntegerField(default = 1)
class Meta:
database = DBQuery
''''---------------------------------------------Phrase Search Stuff-------------------------------------------------------'''
class FullPages(Model):
pageURL = CharField(unique=True)
pageContent = TextField()
pageTitle = TextField()
class Meta:
database = DBPhrase
''''---------------------------------------------INDEXER Stuff-------------------------------------------------------'''
class IndexedCount(Model):
indexedURLs= IntegerField(default=0)
class Meta:
database = indexedCount
class PositionsField(CharField):
'''convert python data type for storage in the database'''
def db_value(self, value):
dbValue = ''
for x in range(0,len(value)-1):
dbValue += str(value[x])
dbValue += ","
if value:
dbValue += str(value[len(value)-1])
return dbValue
'''convert data type from database to python '''
def python_value(self, value):
result = value.split(',')
result = list(map(int,result))
#print(result)
return result
class IndexerTable(Model):
keyword = CharField()
stem = CharField()
url = CharField()
positions = PositionsField(default = [])
importance = IntegerField() # 0-> title , 1-> header, 2->plain text
class Meta:
database = DBIndexer
primary_key = CompositeKey('keyword', 'url')