forked from prasaaaad98/Grid7.0
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert_csv_to_json.py
More file actions
119 lines (103 loc) · 4.66 KB
/
convert_csv_to_json.py
File metadata and controls
119 lines (103 loc) · 4.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pandas as pd
import json
import random
import re
def clean_price(price_str):
"""Clean price string by removing currency symbols and commas"""
if pd.isna(price_str) or price_str == '':
return random.randint(100, 50000)
# Convert to string and remove currency symbols, commas, and spaces
price_str = str(price_str)
price_str = re.sub(r'[₹$,\s]', '', price_str)
try:
return float(price_str)
except ValueError:
return random.randint(100, 50000)
def convert_csv_to_json():
# Read the CSV file
print("Reading dataset.csv...")
df = pd.read_csv('dataset.csv')
print(f"Found {len(df)} products in the dataset")
print("Columns available:", df.columns.tolist())
# Create the products list with the required structure
products = []
for index, row in df.iterrows():
# Parse images as array
images = []
image_field = row.get('image', '')
if isinstance(image_field, str) and image_field.strip().startswith('['):
try:
# Remove extra quotes and parse as list
images = eval(image_field)
if not isinstance(images, list):
images = [str(image_field)]
except Exception:
images = [str(image_field)]
elif isinstance(image_field, str) and image_field:
images = [image_field]
else:
images = [f'https://dummyimage.com/product{index + 1}']
# Clean category
category = row.get('product_category_tree', '')
if isinstance(category, str) and category.strip().startswith('['):
try:
# Remove extra quotes and parse as list, take the last category as most specific
cat_list = eval(category)
if isinstance(cat_list, list) and len(cat_list) > 0:
category = cat_list[-1].split('>>')[-1].strip()
else:
category = 'General'
except Exception:
category = 'General'
elif isinstance(category, str) and category:
category = category
else:
category = 'General'
# Clean price fields
price = clean_price(row.get('discounted_price'))
retail_price = clean_price(row.get('retail_price'))
# Clean rating
try:
rating = float(row.get('product_rating'))
if rating <= 0 or pd.isna(rating):
rating = round(random.uniform(3.5, 5.0), 1)
except Exception:
rating = round(random.uniform(3.5, 5.0), 1)
# Build product dict
product = {
"id": index + 1,
"title": str(row.get('product_name', f'Product {index + 1}')),
"brand": str(row.get('brand', 'Unknown')),
"category": category,
"price": price,
"retail_price": retail_price,
"images": images,
"rating": rating,
"description": str(row.get('description', f'High quality {row.get('product_name', f'Product {index + 1}')}')),
}
# Fallbacks for missing/invalid values
if not product['title'] or product['title'] == 'nan':
product['title'] = f'Product {index + 1}'
if not product['brand'] or product['brand'] == 'nan':
product['brand'] = 'Unknown'
if not product['category'] or product['category'] == 'nan':
product['category'] = 'General'
if not product['images'] or product['images'][0] == 'nan':
product['images'] = [f'https://dummyimage.com/product{index + 1}']
if not product['description'] or product['description'] == 'nan':
product['description'] = f'High quality {product["title"]}'
if not product['price'] or pd.isna(product['price']) or product['price'] <= 0:
product['price'] = random.randint(100, 50000)
if not product['retail_price'] or pd.isna(product['retail_price']) or product['retail_price'] <= 0:
product['retail_price'] = product['price'] + random.randint(100, 1000)
if not product['rating'] or pd.isna(product['rating']) or product['rating'] <= 0:
product['rating'] = round(random.uniform(3.5, 5.0), 1)
products.append(product)
# Save to JSON file
output_file = 'backend/data/products.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(products, f, indent=2, ensure_ascii=False)
print(f"Successfully converted {len(products)} products to {output_file}")
print(f"Sample product: {products[0] if products else 'No products found'}")
if __name__ == "__main__":
convert_csv_to_json()