Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Blogpost_IRIS/FindFaultyLines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pandas as pd

filename = 'C:/Users/SPECTRE/anaconda3/blog.datathinking.org/Blogpost_IRIS/final_file_PAT_Res.csv'
error_lines = []

with open(filename, 'r') as file:
for line_number, line in enumerate(file, start=1):
line = line.strip()
columns = line.split(',')
num_values = len(columns)
if num_values != 69: # Adjust the expected number of columns accordingly
error_lines.append((line_number, num_values))

print(f"Found {len(error_lines)} lines with inconsistent column count:")
for line_number, num_values in error_lines:
print(f"Line {line_number}: {num_values} values")



38 changes: 38 additions & 0 deletions Blogpost_IRIS/Fix_columnnames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pandas as pd
from tabulate import tabulate

# Load the final file
df = pd.read_csv('final_file.csv')

# Add a new index column
#df['New Index'] = range(1, len(df) + 1)

# Specify the range of rows and columns you want to display
#subset = df.iloc[:10, :5]

# Convert the subset to a formatted table
#table = tabulate(subset, headers='keys', tablefmt='psql')

# Original column names
column_names = ['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
'1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
'1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
'1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
'1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
'1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
'2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
'2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']

# Renamed column names
renamed_column_names = column_names[:-1] # Exclude the last column

# Rename columns
for i in range(len(renamed_column_names)-1):
df.rename(columns= )
column_names[i] = renamed_column_names[i+1]

df.columns=column_names
print(column_names)

# Print the table
print(df.columns)
Empty file added Blogpost_IRIS/PAT_NRes.csv
Empty file.
267 changes: 267 additions & 0 deletions Blogpost_IRIS/PAT_Res.csv

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions Blogpost_IRIS/Remove_extra_lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import csv

# Read the CSV file
with open('C:/Users/SPECTRE/anaconda3/blog.datathinking.org/Blogpost_IRIS/SUIC.csv', 'r') as file:
reader = csv.reader(file)
lines = list(reader)

# Remove extra values from each line
for i in range(len(lines)):
if len(lines[i]) > 67:
lines[i] = lines[i][:67]

# Write the modified lines back to a new CSV file
with open('modified_file_SUIC.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerows(lines)

33 changes: 33 additions & 0 deletions Blogpost_IRIS/Remove_quotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Read the modified CSV file
with open('modified_file_PAT_Res.csv', 'r') as file:
lines = file.readlines()

# Process the data and write to a new file
output_file = 'final_file_PAT_Res.csv'

with open(output_file, 'w') as file:
for line in lines:
# Remove quotation marks
cleaned_line = line.replace('"', '')

# Replace commas after "Patent application" with "-"
cleaned_line = cleaned_line.replace('Patent applications,', 'Patent applications -')

# Split the line on commas
columns = cleaned_line.split(',')

# Replace empty values with zeros
for i in range(len(columns)):
if columns[i] == '':
columns[i] = '0'

# Write the modified line to the file
file.write(','.join(columns) + '\n')

print(f"Modified data has been written to '{output_file}'.")






267 changes: 267 additions & 0 deletions Blogpost_IRIS/SUIC.csv

Large diffs are not rendered by default.

410 changes: 410 additions & 0 deletions Blogpost_IRIS/final_file_PAT_NRes.csv

Large diffs are not rendered by default.

314 changes: 314 additions & 0 deletions Blogpost_IRIS/final_file_PAT_Res.csv

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions Blogpost_IRIS/first_trialFile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pandas as pd

# Read the CSV file
df = pd.read_csv('C:/Users/SPECTRE/anaconda3/blog.datathinking.org/Blogpost_IRIS/modified_file_PAT_Res.csv', delimiter=',', quotechar='"', quoting=3, skipinitialspace=True)

# Remove leading/trailing spaces from column names
df.columns = df.columns.str.strip()

# Remove quotation marks from values
df = df.apply(lambda x: x.str.strip('"'))

# Fill empty values with 0
df = df.fillna(0)

# Display a sample of the data
print(df.head())


205 changes: 205 additions & 0 deletions Blogpost_IRIS/modified_file_PAT_NRes.csv

Large diffs are not rendered by default.

157 changes: 157 additions & 0 deletions Blogpost_IRIS/modified_file_PAT_Res.csv

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions Blogpost_IRIS/proov4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pandas as pd
with open(r'PAT_Nres.csv', 'r') as infile, \
open(r'PAT_Nres.csv', 'w') as outfile:
data = infile.read()
data = data.replace("\"", "")
outfile.write(data)
# Read the modified file
df = pd.read_csv('PAT_Nres.csv')
# Print the table
print(df[43:50])

1 change: 1 addition & 0 deletions modified_file_SUIC.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Country Name","Country Code""""","Indicator Name""""","Indicator Code""""","1960""""","1961""""","1962""""","1963""""","1964""""","1965""""","1966""""","1967""""","1968""""","1969""""","1970""""","1971""""","1972""""","1973""""","1974""""","1975""""","1976""""","1977""""","1978""""","1979""""","1980""""","1981""""","1982""""","1983""""","1984""""","1985""""","1986""""","1987""""","1988""""","1989""""","1990""""","1991""""","1992""""","1993""""","1994""""","1995""""","1996""""","1997""""","1998""""","1999""""","2000""""","2001""""","2002""""","2003""""","2004""""","2005""""","2006""""","2007""""","2008""""","2009""""","2010""""","2011""""","2012""""","2013""""","2014""""","2015""""","2016""""","2017""""","2018""""","2019""""","2020""""","2021""""","2022"""""