Python script to convert from csv/xlsx to md/html

The idea was to convert the Freecam and Tool archive spreadsheet into a markdown file.

The code snippet:
```
import re
import os
import argparse
import pandas as pd

input_formats_supported = ["csv", "xlsx"]
output_formats_supported = ["md", "html"]

def read_file(file_name):
    """
    This function reads a file and returns a pandas dataframe
    """
    if not os.path.exists(file_name):
        raise Exception('Input File does not exist.')

    if file_name.endswith('.csv'):
        return pd.read_csv(file_name)
    elif file_name.endswith('.xlsx'):
        return pd.read_excel(file_name)
    else:
        raise Exception('Input File type not supported')


def process_df(df):
    """
    This function is written to parse and clean Nico's Freecam-Tools Spreadsheet. Change it to parse and clean your own data.
    """

    #df = df.head(10)
    # Replace NaN values with empty strings
    df.fillna('', inplace=True)
    
    # Replace new-line characters in each string in the columns with whitespaces
    for col in df.columns:
        df[col] = df[col].str.replace('\n', ' ', regex = True)

    return df


def process_markdown_string(string):
    """
    This function cleans the markdown string.
    """

    # Removing unncessary hyphens used to create the headers
    string = re.sub("-+", "-", string)
    
    # Cleaning whitespaces except newline and carriage return
    string = re.sub("[^\S\r\n]+", " ", string)

    return string


def save_file(df, file_name):
    """
    Saves the dataframe to a file
    """

    if file_name.endswith('.md'):
        string = df.to_markdown(index = False)
        string = process_markdown_string(string)

    elif file_name.endswith('.html'):
        string = df.to_html(index = False, justify = 'center')
    
    else:
        raise Exception('Output File type not supported')

    print("The final String.... \n\n" + string)
    
    with open(file_name, "w", encoding="utf-8", errors="xmlcharrefreplace") as output_file:
        output_file.write(string)


def read_and_convert(input_path, output_path):
    """
    This function reads the input file, processes it and converts it to the output file.
    """

    df = read_file(input_path)
    df = process_df(df)
    save_file(df, output_path)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("input_file", help = "Path of the input file to convert. Supported formats are: " + ", ".join(input_formats_supported))
    parser.add_argument("output_file", help = "Path of the output file. Supported formats are: " + ", ".join(output_formats_supported))

    args = parser.parse_args()

    input_path = args.input_file
    output_path = args.output_file

    input_file_ext = input_path.split(".")[-1]
    output_file_ext = output_path.split(".")[-1]

    if input_file_ext not in input_formats_supported:
        raise Exception("Input file format not supported. Only the following formats are supported: " + ", ".join(input_formats_supported))

    if output_file_ext not in output_formats_supported:
        raise Exception("Output file format not supported. Only the following formats are supported: " + ", ".join(output_formats_supported))

    read_and_convert(input_path, output_path)
```

I didn't create a new PR since I wasn't sure where (or even if) the code should be placed in the repo.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Python script to convert from csv/xlsx to md/html #126

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Python script to convert from csv/xlsx to md/html #126

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions