Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "main.py",
"console": "integratedTerminal"
}
]
}
43 changes: 43 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pandas as pd
import json
from utils import Utils

# Load header mapping from JSON file
with open('mapping.json', 'r') as json_file:
header_mapping = json.load(json_file)

# Define input and output filenames
input_file = 'homework.csv'
output_file = 'formatted.csv'

# Read input CSV file using pandas
df = pd.read_csv(input_file)

# Process each row using the header mapping and Utils
formatted_rows = []
for index, row in df.iterrows():
formatted_row = Utils.process_row(row, header_mapping)

# If using separate columns for width and depth
width = formatted_row.get('furniture seat width (inches)', None)
depth = formatted_row.get('furniture seat depth (inches)', None)

formatted_row['attrib__seat_width'] = width
formatted_row['attrib__seat_depth'] = depth

formatted_rows.append(formatted_row)

# Create a DataFrame from the list of formatted rows
formatted_df = pd.DataFrame(formatted_rows)

# Drop columns starting with '_'
columns_to_drop = [col for col in formatted_df.columns if col.startswith('_')]
df_filtered = formatted_df.drop(columns=columns_to_drop)

df_filtered['product__country_of_origin__alpha_3'] = df_filtered['product__country_of_origin__alpha_3'].apply(Utils.convert_country_name_to_iso2)
df_filtered['attrib__ul_certified'] = df_filtered['attrib__ul_certified'].apply(Utils.convert_ul_certified)
df_filtered['ean13'] = df_filtered['ean13'].astype(str)

df_filtered.to_csv(output_file, index=False)

print(f"Transformation completed. Transformed data written to '{output_file}'.")
82 changes: 82 additions & 0 deletions mapping.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"manufacturer_sku": "item number",
"ean13": "upc",
"weight": "item weight (pounds)",
"length": "_function_",
"width": "item width (inches)",
"height": "item height (inches)",

"_depth": "item depth (inches)",
"_diameter": "item diameter (inches)",

"prop_65": "certified damp/wet",
"cost_price": "msrp ($)",
"min_price": "map ($)",
"made_to_order": "",
"product__product_class__name": "item category",
"product__brand__name": "brand",
"product__title": "description",
"product__description": "long description",
"product__bullets__0": "selling point 1",
"product__bullets__1": "selling point 2",
"product__bullets__2": "selling point 3",
"product__bullets__3": "selling point 4",
"product__bullets__4": "selling point 5",
"product__bullets__5": "selling point 6",
"product__bullets__6": "selling point 7",
"product__configuration__codes": "",
"product__multipack_quantity": "min order qty",
"product__country_of_origin__alpha_3": "country of origin",
"product__parent_sku": "related items",
"attrib__arm_height": "furniture arm height (inches)",
"attrib__assembly_required": "url instruction sheet",
"attrib__back_material": "",
"attrib__blade_finish": "",
"attrib__bulb_included": "bulb 1 included",
"attrib__bulb_type": "bulb 1 type",
"attrib__color": "primary color family",
"attrib__cord_length": "cord length (inches)",
"attrib__design_id": "",
"attrib__designer": "licensed by",
"attrib__distressed_finish": "",
"attrib__fill": "",
"attrib__finish": "item finish",
"attrib__frame_color": "",
"attrib__hardwire": "",
"attrib__kit": "conversion kit option",
"attrib__leg_color": "",
"attrib__leg_finish": "",
"attrib__material": "item materials",
"attrib__number_bulbs": "bulb 1 count",
"attrib__orientation": "",
"attrib__outdoor_safe": "outdoor",
"attrib__pile_height": "",
"attrib__seat_depth": "",
"attrib__seat_width": "",
"attrib__seat_height": "furniture seat height (inches)",
"attrib__shade": "shade shape",
"attrib__size": "",
"attrib__switch_type": "switch type",
"attrib__ul_certified": "safety rating",
"attrib__warranty_years": "",
"attrib__wattage": "voltage",
"attrib__weave": "",
"attrib__weight_capacity": "furniture weight capacity (pounds)",
"boxes__0__weight": "carton 1 weight (pounds)",
"boxes__0__length": "carton 1 length (inches)",
"boxes__0__height": "carton 1 height (inches)",
"boxes__0__width": "carton 1 width (inches)",
"boxes__1__weight": "carton 2 weight (pounds)",
"boxes__1__length": "carton 2 length (inches)",
"boxes__1__height": "carton 2 height (inches)",
"boxes__1__width": "carton 2 width (inches)",
"boxes__2__weight": "carton 3 weight (pounds)",
"boxes__2__length": "carton 3 length (inches)",
"boxes__2__height": "carton 3 height (inches)",
"boxes__2__width": "carton 3 width (inches)",
"boxes__3__weight": "",
"boxes__3__length": "",
"boxes__3__height": "",
"boxes__3__width": "",
"product__styles": "item style"
}
37 changes: 37 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
asttokens==2.4.1
comm==0.2.2
country-converter==1.2
debugpy==1.8.1
decorator==5.1.1
et-xmlfile==1.1.0
exceptiongroup==1.2.1
executing==2.0.1
ipykernel==6.29.4
ipython==8.24.0
jedi==0.19.1
jupyter_client==8.6.1
jupyter_core==5.7.2
matplotlib-inline==0.1.7
nest-asyncio==1.6.0
numpy==1.26.4
openpyxl==3.1.2
packaging==24.0
pandas==2.2.2
parso==0.8.4
pexpect==4.9.0
platformdirs==4.2.1
prompt-toolkit==3.0.43
psutil==5.9.8
ptyprocess==0.7.0
pure-eval==0.2.2
Pygments==2.18.0
python-dateutil==2.9.0.post0
pytz==2024.1
pyzmq==26.0.3
six==1.16.0
stack-data==0.6.3
tornado==6.4
traitlets==5.14.3
typing_extensions==4.11.0
tzdata==2024.1
wcwidth==0.2.13
113 changes: 113 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from datetime import datetime
from decimal import Decimal


class Utils:

CONVERTER_CM = 0.393701
CONVERTER_MM = 0.0393701
CONVERTER_KG_TO_POUNDS = 2.20462

@staticmethod
def process_row(row, header_mapping):
formatted_row = {}

for output_field, input_field in header_mapping.items():
try:
value = row[input_field]
except:
value = ''

if isinstance(value, str):
value = value.strip() # Strip whitespace if it's a string

if any(field in output_field for field in ['width', 'height', 'depth']) and 'inches' not in output_field:
value = Utils.convert_to_inches(value, 'inches')
elif output_field == 'weight':
value = Utils.convert_to_pounds(value, 'pounds')
elif output_field == 'upc':
value = str(value)
elif output_field == 'system creation date':
value = Utils.format_date(value)
elif output_field in ['wholesale ($)', 'map ($)', 'msrp ($)']:
value = Utils.format_currency(value)
elif output_field == 'length':
# Calculate length using furniture dimensions
length = Utils.calculate_furniture_length(row)
formatted_row[output_field] = length
continue
formatted_row[output_field] = value

return formatted_row

@staticmethod
def convert_to_inches(value, unit):
try:
if unit.lower() == 'inches':
return float(value)
elif unit.lower() == 'cm':
return float(value) * Utils.CONVERTER_CM
elif unit.lower() == 'mm':
return float(value) * Utils.CONVERTER_MM
else:
return float(value) # Assume already in inches if unknown unit
except:
return None

@staticmethod
def convert_to_pounds(value, unit):
if unit.lower() == 'pounds':
return float(value)
elif unit.lower() == 'kg':
return float(value) * Utils.CONVERTER_KG_TO_POUNDS
else:
return float(value)

@staticmethod
def format_currency(value):
try:
return Decimal(value).quantize(Decimal('0.01'))
except:
return None

@staticmethod
def format_date(date_str):
try:
return datetime.strptime(date_str, '%m/%d/%y').strftime('%Y-%m-%d')
except:
return None

@staticmethod
def calculate_furniture_length(row):
width = float(row['item width (inches)'])
depth = float(row['item depth (inches)'])
height = float(row['item height (inches)'])
diameter = float(row['item diameter (inches)'])
dimensions = [width, depth, height, diameter]
return max(dimensions)

@staticmethod
def convert_country_name_to_iso2(country_name):
ref = {
"China": "CHN",
"India": "IND",
"Indonesia": "IDN",
"Phillipines": "PHL",
"Thailand": "THA",
"Vietnam": "VNM",
}
try:
return ref[country_name]
except:
return None

@staticmethod
def convert_ul_certified(ul):
ref = {
"UL": 'True',
"UL/CUL": 'True',
}
try:
return ref[ul]
except:
return False