From ada4d367dcbf60315dd71c90e5af08dc95b3f505 Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:13:18 -0500 Subject: [PATCH 01/12] Created a parser for JSON that will load into a database --- json_parsing.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 json_parsing.py diff --git a/json_parsing.py b/json_parsing.py new file mode 100644 index 0000000..6a55eea --- /dev/null +++ b/json_parsing.py @@ -0,0 +1,60 @@ +import json +import sqlalchemy as db +import pandas as pd +import os +import requests + +locations = '' +with open('sampleJSON.txt', 'r') as file: + locations = file.read() +response = json.loads(locations) + +class Parser: + def __init__(self, json): + if 'data' in json: + self.json = json['data'] + else: + self.json = json + self.df = pd.json_normalize(self.json) + self.key = os.getenv('TRIPADVISOR_API_KEY') + + def write_to_database(self, tb_name): + engine = db.create_engine('sqlite:///tripadv.db') + self.df.to_sql(tb_name, con=engine, if_exists='append', index=True) + + #removing duplicates, this should work + with engine.connect() as connection: + remove_dupes = f"""DELETE FROM {tb_name} + WHERE ROWID NOT IN ( + SELECT MAX(ROWID) + FROM {tb_name} + GROUP BY Name + );""" + connection.execute(db.text(remove_dupes)) + # query_result = + # connection.execute(db.text(f"SELECT * FROM {tb_name};")).fetchall() + # print(pd.DataFrame(query_result)) + self.get_ratings() + + def get_ratings(self): + if type(self.json) != type([]): + return + engine = db.create_engine('sqlite:///tripadv.db') + for location in self.json: + location_id = location['location_id'] + url = "https://api.content.tripadvisor.com/api/v1/location/search?language=en" + data = { + 'key': self.key, + 'locationId': location_id + } + r = requests.get(url, data=data).json() + dataFrame = pd.json_normalize(r) + dataFrame.to_sql("temp", con=engine, if_exists='append', index=True) + + + + + + +test = Parser(response) +test.get_ratings() \ No newline at end of file From 28cb07d4896094b751223d0d3eee4b5da42ecc54 Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:25:52 -0500 Subject: [PATCH 02/12] Adjusted to fit style guide --- json_parsing.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/json_parsing.py b/json_parsing.py index 6a55eea..da0eae2 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -17,13 +17,14 @@ def __init__(self, json): self.json = json self.df = pd.json_normalize(self.json) self.key = os.getenv('TRIPADVISOR_API_KEY') - + self.engine = db.create_engine('sqlite:///tripadv.db') + def write_to_database(self, tb_name): - engine = db.create_engine('sqlite:///tripadv.db') - self.df.to_sql(tb_name, con=engine, if_exists='append', index=True) + self.df.\ + to_sql(tb_name, con=self.engine, if_exists='append', index=True) - #removing duplicates, this should work - with engine.connect() as connection: + # removing duplicates, this should work + with self.engine.connect() as connection: remove_dupes = f"""DELETE FROM {tb_name} WHERE ROWID NOT IN ( SELECT MAX(ROWID) @@ -32,29 +33,31 @@ def write_to_database(self, tb_name): );""" connection.execute(db.text(remove_dupes)) # query_result = - # connection.execute(db.text(f"SELECT * FROM {tb_name};")).fetchall() + # connection.execute + # (db.text(f"SELECT * FROM {tb_name};")).fetchall() # print(pd.DataFrame(query_result)) self.get_ratings() def get_ratings(self): - if type(self.json) != type([]): + if not isinstance(self.json, type([])): return - engine = db.create_engine('sqlite:///tripadv.db') for location in self.json: location_id = location['location_id'] - url = "https://api.content.tripadvisor.com/api/v1/location/search?language=en" + url = """https://api.content.tripadvisor.com/ + api/v1/location/search?language=en""" data = { 'key': self.key, 'locationId': location_id } + r = requests.get(url, data=data).json() dataFrame = pd.json_normalize(r) - dataFrame.to_sql("temp", con=engine, if_exists='append', index=True) - - - + dataFrame.\ + to_sql("temp", con=self.engine, if_exists='append', index=True) - - -test = Parser(response) -test.get_ratings() \ No newline at end of file + join_command = """ CREATE TABLE recommendations AS + SELECT * FROM locations + JOIN temp ON locations.location_id = temp.location_id;""" + with self.engine.connect() as connection: + connection.execute(db.text(join_command)) + \ No newline at end of file From 62fd9ea3930d0a9d2258269ba8e7c1862b656820 Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:28:26 -0500 Subject: [PATCH 03/12] More fixes --- json_parsing.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/json_parsing.py b/json_parsing.py index da0eae2..b8e71f2 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -10,6 +10,7 @@ response = json.loads(locations) class Parser: + def __init__(self, json): if 'data' in json: self.json = json['data'] @@ -18,10 +19,10 @@ def __init__(self, json): self.df = pd.json_normalize(self.json) self.key = os.getenv('TRIPADVISOR_API_KEY') self.engine = db.create_engine('sqlite:///tripadv.db') - + def write_to_database(self, tb_name): self.df.\ - to_sql(tb_name, con=self.engine, if_exists='append', index=True) + to_sql(tb_name, con=self.engine, if_exists='append', index=True) # removing duplicates, this should work with self.engine.connect() as connection: @@ -32,7 +33,7 @@ def write_to_database(self, tb_name): GROUP BY Name );""" connection.execute(db.text(remove_dupes)) - # query_result = + # query_result = # connection.execute # (db.text(f"SELECT * FROM {tb_name};")).fetchall() # print(pd.DataFrame(query_result)) @@ -51,13 +52,12 @@ def get_ratings(self): } r = requests.get(url, data=data).json() - dataFrame = pd.json_normalize(r) - dataFrame.\ - to_sql("temp", con=self.engine, if_exists='append', index=True) + dFrame = pd.json_normalize(r) + dFrame.to_sql\ + ("temp", con=self.engine, if_exists='append', index=True) join_command = """ CREATE TABLE recommendations AS SELECT * FROM locations JOIN temp ON locations.location_id = temp.location_id;""" with self.engine.connect() as connection: connection.execute(db.text(join_command)) - \ No newline at end of file From b13b288a2cf1683ae4b95dd6c655a68fcc88de83 Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:31:24 -0500 Subject: [PATCH 04/12] I should just get a linter --- json_parsing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/json_parsing.py b/json_parsing.py index b8e71f2..958e787 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -9,6 +9,7 @@ locations = file.read() response = json.loads(locations) + class Parser: def __init__(self, json): @@ -38,7 +39,7 @@ def write_to_database(self, tb_name): # (db.text(f"SELECT * FROM {tb_name};")).fetchall() # print(pd.DataFrame(query_result)) self.get_ratings() - + def get_ratings(self): if not isinstance(self.json, type([])): return @@ -55,7 +56,7 @@ def get_ratings(self): dFrame = pd.json_normalize(r) dFrame.to_sql\ ("temp", con=self.engine, if_exists='append', index=True) - + join_command = """ CREATE TABLE recommendations AS SELECT * FROM locations JOIN temp ON locations.location_id = temp.location_id;""" From c9e37486eecab471a88fadfc87066c5540bc0429 Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:33:07 -0500 Subject: [PATCH 05/12] Fixes --- json_parsing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/json_parsing.py b/json_parsing.py index 958e787..1704320 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -54,11 +54,11 @@ def get_ratings(self): r = requests.get(url, data=data).json() dFrame = pd.json_normalize(r) - dFrame.to_sql\ - ("temp", con=self.engine, if_exists='append', index=True) + dFrame.to_sql("temp",\ + con=self.engine, if_exists='append', index=True) - join_command = """ CREATE TABLE recommendations AS - SELECT * FROM locations - JOIN temp ON locations.location_id = temp.location_id;""" + join_command = """CREATE TABLE recommendations AS + SELECT * FROM locations + JOIN temp ON locations.location_id = temp.location_id;""" with self.engine.connect() as connection: connection.execute(db.text(join_command)) From 54b0b845111676b0a1d64fc00f19726a3009ef0d Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:34:29 -0500 Subject: [PATCH 06/12] Fixing line break formatting --- json_parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/json_parsing.py b/json_parsing.py index 1704320..d1ec656 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -54,8 +54,8 @@ def get_ratings(self): r = requests.get(url, data=data).json() dFrame = pd.json_normalize(r) - dFrame.to_sql("temp",\ - con=self.engine, if_exists='append', index=True) + dFrame.\ + to_sql("temp",con=self.engine, if_exists='append', index=True) join_command = """CREATE TABLE recommendations AS SELECT * FROM locations From 225d0264ac31fe67b870cb41d7f02a21b10f9a07 Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:37:45 -0500 Subject: [PATCH 07/12] line breaks --- json_parsing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/json_parsing.py b/json_parsing.py index d1ec656..c21cfc8 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -53,9 +53,8 @@ def get_ratings(self): } r = requests.get(url, data=data).json() - dFrame = pd.json_normalize(r) - dFrame.\ - to_sql("temp",con=self.engine, if_exists='append', index=True) + df = pd.json_normalize(r) + df.to_sql("temp", con=self.engine, if_exists='append', index=True) join_command = """CREATE TABLE recommendations AS SELECT * FROM locations From 1e564d29ba2d0790ae815dcb4b6939ce939da03a Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:45:34 -0500 Subject: [PATCH 08/12] Forgot to remove extraneous code --- json_parsing.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/json_parsing.py b/json_parsing.py index c21cfc8..6e67eb8 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -1,15 +1,8 @@ -import json import sqlalchemy as db import pandas as pd import os import requests -locations = '' -with open('sampleJSON.txt', 'r') as file: - locations = file.read() -response = json.loads(locations) - - class Parser: def __init__(self, json): From 898610dbd68e8943e586cf88f892c8ae8388077e Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:46:37 -0500 Subject: [PATCH 09/12] blank lines --- json_parsing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/json_parsing.py b/json_parsing.py index 6e67eb8..408a12a 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -3,6 +3,8 @@ import os import requests + + class Parser: def __init__(self, json): From 17eb02ed98ab756192633b27b218eb3f72968973 Mon Sep 17 00:00:00 2001 From: wiidu Date: Tue, 1 Jul 2025 16:47:19 -0500 Subject: [PATCH 10/12] Somehow too many blank lines --- json_parsing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/json_parsing.py b/json_parsing.py index 408a12a..9503ff2 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -4,7 +4,6 @@ import requests - class Parser: def __init__(self, json): From 9944bba87e280d0b3ab6a1065619dbf3a0458d06 Mon Sep 17 00:00:00 2001 From: wiidu Date: Wed, 2 Jul 2025 11:48:24 -0500 Subject: [PATCH 11/12] Minor fixes, still working on this part --- combined.py | 2 ++ json_parsing.py | 34 +++++++++++++++++++++++++--------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/combined.py b/combined.py index eea161f..9016b38 100644 --- a/combined.py +++ b/combined.py @@ -2,8 +2,10 @@ import requests from google import genai from google.genai import types +from dotenv import load_dotenv # Set environment variables +load_dotenv() TRIPADVISOR_API_KEY = os.getenv('TRIPADVISOR_API_KEY') GENAI_KEY = os.getenv('GENAI_KEY') diff --git a/json_parsing.py b/json_parsing.py index 9503ff2..863fc48 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -1,12 +1,15 @@ +import json import sqlalchemy as db import pandas as pd import os import requests +from dotenv import load_dotenv class Parser: def __init__(self, json): + load_dotenv() if 'data' in json: self.json = json['data'] else: @@ -17,7 +20,7 @@ def __init__(self, json): def write_to_database(self, tb_name): self.df.\ - to_sql(tb_name, con=self.engine, if_exists='append', index=True) + to_sql(tb_name, con=self.engine, if_exists='append', index=False) # removing duplicates, this should work with self.engine.connect() as connection: @@ -37,21 +40,34 @@ def write_to_database(self, tb_name): def get_ratings(self): if not isinstance(self.json, type([])): return + rows = [] for location in self.json: - location_id = location['location_id'] - url = """https://api.content.tripadvisor.com/ - api/v1/location/search?language=en""" + location_id = int(location['location_id']) + url = (f"https://api.content.tripadvisor.com/api/v1/location/" + f"{location_id}/details") + headers = {"accept": "application/json"} data = { - 'key': self.key, - 'locationId': location_id + 'key': self.key } - r = requests.get(url, data=data).json() - df = pd.json_normalize(r) - df.to_sql("temp", con=self.engine, if_exists='append', index=True) + r = requests.get(url, headers=headers, params=data) + if r.status_code == 200: + flattened = pd.json_normalize(r.json()) + print(flattened) + rows.append(flattened) + df = pd.concat(rows, ignore_index=True) + df = df.infer_objects() + print(df) + print(df.dtypes) + # df.to_sql("temp", con=self.engine, if_exists='append', index=False) join_command = """CREATE TABLE recommendations AS SELECT * FROM locations JOIN temp ON locations.location_id = temp.location_id;""" with self.engine.connect() as connection: connection.execute(db.text(join_command)) + +with open('sampleJSON.txt', 'r') as file: + location = file.read() +test = Parser(json.loads(location)) +test.write_to_database("locations") From 88ec3c259a9de6e9538beed62d7404ee6344cb1a Mon Sep 17 00:00:00 2001 From: wiidu Date: Wed, 2 Jul 2025 12:12:07 -0500 Subject: [PATCH 12/12] Replanning --- json_parsing.py | 66 ++++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/json_parsing.py b/json_parsing.py index 863fc48..6d6ea3e 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -31,41 +31,45 @@ def write_to_database(self, tb_name): GROUP BY Name );""" connection.execute(db.text(remove_dupes)) - # query_result = - # connection.execute - # (db.text(f"SELECT * FROM {tb_name};")).fetchall() - # print(pd.DataFrame(query_result)) - self.get_ratings() + # self.get_ratings() - def get_ratings(self): - if not isinstance(self.json, type([])): - return - rows = [] - for location in self.json: - location_id = int(location['location_id']) - url = (f"https://api.content.tripadvisor.com/api/v1/location/" - f"{location_id}/details") - headers = {"accept": "application/json"} - data = { - 'key': self.key - } + # def get_ratings(self): + # if not isinstance(self.json, type([])): + # return + # rows = [] + # for location in self.json: + # location_id = int(location['location_id']) + # url = (f"https://api.content.tripadvisor.com/api/v1/location/" + # f"{location_id}/details") + # headers = {"accept": "application/json"} + # data = { + # 'key': self.key + # } - r = requests.get(url, headers=headers, params=data) - if r.status_code == 200: - flattened = pd.json_normalize(r.json()) - print(flattened) - rows.append(flattened) - df = pd.concat(rows, ignore_index=True) - df = df.infer_objects() - print(df) - print(df.dtypes) - # df.to_sql("temp", con=self.engine, if_exists='append', index=False) + # r = requests.get(url, headers=headers, params=data) + # if r.status_code == 200: + # flattened = pd.json_normalize(r.json()) + # rows.append(flattened) + # df = pd.concat(rows, ignore_index=True) + # for col in df.columns: + # if df[col].apply(lambda x: isinstance(x, (dict, list))).any(): + # df[col] = df[col].apply(json.dumps) + # df = df.infer_objects() + # print(df) + # print(df.dtypes) + # # df.to_sql("temp", con=self.engine, if_exists='append', index=False) - join_command = """CREATE TABLE recommendations AS - SELECT * FROM locations - JOIN temp ON locations.location_id = temp.location_id;""" + # join_command = """CREATE TABLE recommendations AS + # SELECT * FROM locations + # JOIN temp ON locations.location_id = temp.location_id;""" + # with self.engine.connect() as connection: + # connection.execute(db.text(join_command)) + # self.drop("temp") + + def drop(self, table_name): + command = f"DROP TABLE IF EXISTS {table_name}" with self.engine.connect() as connection: - connection.execute(db.text(join_command)) + connection.execute(db.text(command)) with open('sampleJSON.txt', 'r') as file: location = file.read()