-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataCleaning.py
More file actions
44 lines (35 loc) · 1.45 KB
/
dataCleaning.py
File metadata and controls
44 lines (35 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from catalogScraper import generateClassesPrereqs
def removeNonCourse(course, classesDict):
newClassesDict = {}
for className in classesDict:
subClasses = []
for subClass in classesDict[className]:
checkString = str(course)+'.'
if subClass[:len(checkString)] == checkString:
if subClass=='18.10':
print(className,'here')
subClasses.append(subClass)
newClassesDict[className] = subClasses
return newClassesDict
def removeNoConnections(classesDict):
classConnected = {i:False for i in classesDict.keys()}
for className in classesDict:
for parent in classesDict[className]:
classConnected[className] = True
classConnected[parent] = True
newClassesDict = {}
for className in classConnected:
if classConnected[className]:
newClassesDict[className] = classesDict[className]
return newClassesDict
def generateCleanData(course, courseExceptions = {}):
classesDict, classMapping = generateClassesPrereqs(course, courseExceptions)
classesDict = removeNonCourse(course, classesDict)
classesDict = removeNoConnections(classesDict)
if '18.0002' in classesDict:
classesDict.pop('18.0002')
return classesDict, classMapping
if __name__ == '__main__':
#classes, prereqs, classToUrl = scraper(6)
classesDict, totalClassMap = generateCleanData(24)
print(classesDict)