-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyzing_initial_dataset.py
More file actions
71 lines (45 loc) · 2.1 KB
/
analyzing_initial_dataset.py
File metadata and controls
71 lines (45 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import json
INPUT_FILE = "appraisals_dataset.json"
with open(INPUT_FILE, "r") as f:
data = json.load(f)
EXTRACTED_DATA_FILE= "gpt_extracted_features_appraisals.json"
with open(EXTRACTED_DATA_FILE, "r") as f:
extracted_data = json.load(f)
unique_subject_conditions = []
unique_comp_conditions = []
unique_property_conditions = []
def clean_extracted_conditions(appraisal):
subject = appraisal['subject']
subject_cond = subject.get('condition')
if subject_cond.lower().strip() not in unique_subject_conditions:
unique_subject_conditions.append(subject_cond.lower().strip())
for comp in appraisal['comps']:
comp_cond = comp.get('condition')
if comp_cond.lower().strip() not in unique_comp_conditions:
unique_comp_conditions.append(comp_cond.lower().strip())
for prop in appraisal['properties']:
prop_cond = prop.get('condition')
if prop_cond:
normalized = prop_cond.strip().lower()
if normalized and normalized not in unique_property_conditions:
unique_property_conditions.append(normalized)
total_appraisals = len(data['appraisals'])
total_extracted_appraisals = len(extracted_data['appraisals'])
print(f"Total initial appraisals: {total_appraisals}")
print(f"Total extracted appraisals: {total_extracted_appraisals}")
total_properties = 0
for appraisal in data['appraisals']:
total_properties+=len(appraisal['properties'])
total_extracted_properties = 0
for appraisal in extracted_data['appraisals']:
clean_extracted_conditions(appraisal)
total_extracted_properties+=len(appraisal['properties'])
print(f"Total initial properties: {total_properties}")
print(f"Total extracted properties: {total_extracted_properties}")
print(unique_subject_conditions)
print(unique_comp_conditions)
print(unique_property_conditions)
# average_properties_per_appraisal = total_properties / total_appraisals
# print(f"Average properties per appraisal: {average_properties_per_appraisal}")
# for property in data['appraisals'][0]['properties'][:10]:
# print(property['public_remarks'] + "\n")