-
Notifications
You must be signed in to change notification settings - Fork 37
Open
Description
The idea was to convert the Freecam and Tool archive spreadsheet into a markdown file.
The code snippet:
import re
import os
import argparse
import pandas as pd
input_formats_supported = ["csv", "xlsx"]
output_formats_supported = ["md", "html"]
def read_file(file_name):
"""
This function reads a file and returns a pandas dataframe
"""
if not os.path.exists(file_name):
raise Exception('Input File does not exist.')
if file_name.endswith('.csv'):
return pd.read_csv(file_name)
elif file_name.endswith('.xlsx'):
return pd.read_excel(file_name)
else:
raise Exception('Input File type not supported')
def process_df(df):
"""
This function is written to parse and clean Nico's Freecam-Tools Spreadsheet. Change it to parse and clean your own data.
"""
#df = df.head(10)
# Replace NaN values with empty strings
df.fillna('', inplace=True)
# Replace new-line characters in each string in the columns with whitespaces
for col in df.columns:
df[col] = df[col].str.replace('\n', ' ', regex = True)
return df
def process_markdown_string(string):
"""
This function cleans the markdown string.
"""
# Removing unncessary hyphens used to create the headers
string = re.sub("-+", "-", string)
# Cleaning whitespaces except newline and carriage return
string = re.sub("[^\S\r\n]+", " ", string)
return string
def save_file(df, file_name):
"""
Saves the dataframe to a file
"""
if file_name.endswith('.md'):
string = df.to_markdown(index = False)
string = process_markdown_string(string)
elif file_name.endswith('.html'):
string = df.to_html(index = False, justify = 'center')
else:
raise Exception('Output File type not supported')
print("The final String.... \n\n" + string)
with open(file_name, "w", encoding="utf-8", errors="xmlcharrefreplace") as output_file:
output_file.write(string)
def read_and_convert(input_path, output_path):
"""
This function reads the input file, processes it and converts it to the output file.
"""
df = read_file(input_path)
df = process_df(df)
save_file(df, output_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("input_file", help = "Path of the input file to convert. Supported formats are: " + ", ".join(input_formats_supported))
parser.add_argument("output_file", help = "Path of the output file. Supported formats are: " + ", ".join(output_formats_supported))
args = parser.parse_args()
input_path = args.input_file
output_path = args.output_file
input_file_ext = input_path.split(".")[-1]
output_file_ext = output_path.split(".")[-1]
if input_file_ext not in input_formats_supported:
raise Exception("Input file format not supported. Only the following formats are supported: " + ", ".join(input_formats_supported))
if output_file_ext not in output_formats_supported:
raise Exception("Output file format not supported. Only the following formats are supported: " + ", ".join(output_formats_supported))
read_and_convert(input_path, output_path)
I didn't create a new PR since I wasn't sure where (or even if) the code should be placed in the repo.
Metadata
Metadata
Assignees
Labels
No labels