Datafest_24/Extract_comments_from_response_csv.py at main · JapjotS/Datafest_24 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
from tqdm import tqdm


#CHANGE ME

# Correct the filepath based on your directory structure
filepath = 'C:\\Users\\Japjot\\Downloads\\OneDrive_1_3-22-2024\\full_03_04\\responses.csv'

# Load the data into a DataFrame, adding low_memory=False to avoid DtypeWarning
df = pd.read_csv(filepath, low_memory=False)

# Define a function to safely extract comments
def extract_comments(x):
    # Check if x is a string to avoid AttributeError
    if isinstance(x, str):
        # Split by '#' and strip spaces
        return ' '.join([part.strip() for part in x.split('#') if part.strip()])
    else:
        # Return an empty string or placeholder if x is not a string
        return ''

# Apply the function to the 'response' column
comments = df['response'].apply(extract_comments)

# Open a text file for writing comments
with open('comments.txt', 'w', encoding='utf-8') as file:
    # Iterate over comments with a progress bar
    for i, comment in tqdm(enumerate(comments, start=1), total=comments.shape[0], desc="Writing comments"):
        file.write(f"Comment {i}: {comment}\n\n")