From 3ed73bcf743305c0781bcd06e0958d01e22735e8 Mon Sep 17 00:00:00 2001 From: Eshaal Syeda Date: Wed, 2 Jul 2025 17:37:54 +0000 Subject: [PATCH 1/7] added this file --- json_parsing.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 json_parsing.py diff --git a/json_parsing.py b/json_parsing.py new file mode 100644 index 0000000..4b8b062 --- /dev/null +++ b/json_parsing.py @@ -0,0 +1,77 @@ +import json +import sqlalchemy as db +import pandas as pd +import os +import requests +from dotenv import load_dotenv + + +class Parser: + + def __init__(self, json): + load_dotenv() + if 'data' in json: + self.json = json['data'] + else: + self.json = json + self.df = pd.json_normalize(self.json) + self.key = os.getenv('TRIPADVISOR_API_KEY') + self.engine = db.create_engine('sqlite:///tripadv.db') + + def write_to_database(self, tb_name): + self.df.\ + to_sql(tb_name, con=self.engine, if_exists='append', index=False) + + # removing duplicates, this should work + with self.engine.connect() as connection: + remove_dupes = f"""DELETE FROM {tb_name} + WHERE ROWID NOT IN ( + SELECT MAX(ROWID) + FROM {tb_name} + GROUP BY Name + );""" + connection.execute(db.text(remove_dupes)) + # self.get_ratings() + + # def get_ratings(self): + # if not isinstance(self.json, type([])): + # return + # rows = [] + # for location in self.json: + # location_id = int(location['location_id']) + # url = (f"https://api.content.tripadvisor.com/api/v1/location/" + # f"{location_id}/details") + # headers = {"accept": "application/json"} + # data = { + # 'key': self.key + # } + + # r = requests.get(url, headers=headers, params=data) + # if r.status_code == 200: + # flattened = pd.json_normalize(r.json()) + # rows.append(flattened) + # df = pd.concat(rows, ignore_index=True) + # for col in df.columns: + # if df[col].apply(lambda x: isinstance(x, (dict, list))).any(): + # df[col] = df[col].apply(json.dumps) + # df = df.infer_objects() + # print(df) + # print(df.dtypes) + # # df.to_sql("temp", con=self.engine, if_exists='append', index=False) + + # join_command = """CREATE TABLE recommendations AS + # SELECT * FROM locations + # JOIN temp ON locations.location_id = temp.location_id;""" + # with self.engine.connect() as connection: + # connection.execute(db.text(join_command)) + # self.drop("temp") + + def drop(self, table_name): + command = f"DROP TABLE IF EXISTS {table_name}" + with self.engine.connect() as connection: + connection.execute(db.text(command)) + +with open('sampleJSON.txt', 'r') as file: + location = file.read() +test = Parser(json.loads(location)) +test.write_to_database("locations") \ No newline at end of file From 43a8c950731d6d6bfdfdbc95eec8b1e7dd00cd68 Mon Sep 17 00:00:00 2001 From: Eshaal Syeda Date: Wed, 2 Jul 2025 17:42:19 +0000 Subject: [PATCH 2/7] updates --- combined.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/combined.py b/combined.py index 5725354..6ee1fff 100644 --- a/combined.py +++ b/combined.py @@ -55,6 +55,3 @@ ) print(ai_response.text) - -parser = Parser(data_of_trip) -parser.write_to_database('locations') From 41b68e9b17a267eda12bebca1972bfd787b8b9f2 Mon Sep 17 00:00:00 2001 From: Eshaal Syeda Date: Wed, 2 Jul 2025 18:29:43 +0000 Subject: [PATCH 3/7] working code --- json_parsing.py | 75 ++++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 48 deletions(-) diff --git a/json_parsing.py b/json_parsing.py index 4b8b062..6e67eb8 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -1,15 +1,11 @@ -import json import sqlalchemy as db import pandas as pd import os import requests -from dotenv import load_dotenv - class Parser: def __init__(self, json): - load_dotenv() if 'data' in json: self.json = json['data'] else: @@ -20,7 +16,7 @@ def __init__(self, json): def write_to_database(self, tb_name): self.df.\ - to_sql(tb_name, con=self.engine, if_exists='append', index=False) + to_sql(tb_name, con=self.engine, if_exists='append', index=True) # removing duplicates, this should work with self.engine.connect() as connection: @@ -31,47 +27,30 @@ def write_to_database(self, tb_name): GROUP BY Name );""" connection.execute(db.text(remove_dupes)) - # self.get_ratings() - - # def get_ratings(self): - # if not isinstance(self.json, type([])): - # return - # rows = [] - # for location in self.json: - # location_id = int(location['location_id']) - # url = (f"https://api.content.tripadvisor.com/api/v1/location/" - # f"{location_id}/details") - # headers = {"accept": "application/json"} - # data = { - # 'key': self.key - # } - - # r = requests.get(url, headers=headers, params=data) - # if r.status_code == 200: - # flattened = pd.json_normalize(r.json()) - # rows.append(flattened) - # df = pd.concat(rows, ignore_index=True) - # for col in df.columns: - # if df[col].apply(lambda x: isinstance(x, (dict, list))).any(): - # df[col] = df[col].apply(json.dumps) - # df = df.infer_objects() - # print(df) - # print(df.dtypes) - # # df.to_sql("temp", con=self.engine, if_exists='append', index=False) - - # join_command = """CREATE TABLE recommendations AS - # SELECT * FROM locations - # JOIN temp ON locations.location_id = temp.location_id;""" - # with self.engine.connect() as connection: - # connection.execute(db.text(join_command)) - # self.drop("temp") - - def drop(self, table_name): - command = f"DROP TABLE IF EXISTS {table_name}" + # query_result = + # connection.execute + # (db.text(f"SELECT * FROM {tb_name};")).fetchall() + # print(pd.DataFrame(query_result)) + self.get_ratings() + + def get_ratings(self): + if not isinstance(self.json, type([])): + return + for location in self.json: + location_id = location['location_id'] + url = """https://api.content.tripadvisor.com/ + api/v1/location/search?language=en""" + data = { + 'key': self.key, + 'locationId': location_id + } + + r = requests.get(url, data=data).json() + df = pd.json_normalize(r) + df.to_sql("temp", con=self.engine, if_exists='append', index=True) + + join_command = """CREATE TABLE recommendations AS + SELECT * FROM locations + JOIN temp ON locations.location_id = temp.location_id;""" with self.engine.connect() as connection: - connection.execute(db.text(command)) - -with open('sampleJSON.txt', 'r') as file: - location = file.read() -test = Parser(json.loads(location)) -test.write_to_database("locations") \ No newline at end of file + connection.execute(db.text(join_command)) From c2c8c840910948208eaf87ce5e763e706828fb55 Mon Sep 17 00:00:00 2001 From: Eshaal Syeda Date: Wed, 2 Jul 2025 18:31:24 +0000 Subject: [PATCH 4/7] working code --- combined.py | 5 ++++ json_parsing.py | 75 +++++++++++++++++++++++++++++++------------------ 2 files changed, 53 insertions(+), 27 deletions(-) diff --git a/combined.py b/combined.py index 6ee1fff..68d7ebf 100644 --- a/combined.py +++ b/combined.py @@ -3,8 +3,10 @@ from google import genai from google.genai import types from json_parsing import Parser +from dotenv import load_dotenv # Set environment variables +load_dotenv() TRIPADVISOR_API_KEY = os.getenv('TRIPADVISOR_API_KEY') GENAI_KEY = os.getenv('GENAI_KEY') @@ -55,3 +57,6 @@ ) print(ai_response.text) + +parser = Parser(data_of_trip) +parser.write_to_database('locations') diff --git a/json_parsing.py b/json_parsing.py index 6e67eb8..4b8b062 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -1,11 +1,15 @@ +import json import sqlalchemy as db import pandas as pd import os import requests +from dotenv import load_dotenv + class Parser: def __init__(self, json): + load_dotenv() if 'data' in json: self.json = json['data'] else: @@ -16,7 +20,7 @@ def __init__(self, json): def write_to_database(self, tb_name): self.df.\ - to_sql(tb_name, con=self.engine, if_exists='append', index=True) + to_sql(tb_name, con=self.engine, if_exists='append', index=False) # removing duplicates, this should work with self.engine.connect() as connection: @@ -27,30 +31,47 @@ def write_to_database(self, tb_name): GROUP BY Name );""" connection.execute(db.text(remove_dupes)) - # query_result = - # connection.execute - # (db.text(f"SELECT * FROM {tb_name};")).fetchall() - # print(pd.DataFrame(query_result)) - self.get_ratings() - - def get_ratings(self): - if not isinstance(self.json, type([])): - return - for location in self.json: - location_id = location['location_id'] - url = """https://api.content.tripadvisor.com/ - api/v1/location/search?language=en""" - data = { - 'key': self.key, - 'locationId': location_id - } - - r = requests.get(url, data=data).json() - df = pd.json_normalize(r) - df.to_sql("temp", con=self.engine, if_exists='append', index=True) - - join_command = """CREATE TABLE recommendations AS - SELECT * FROM locations - JOIN temp ON locations.location_id = temp.location_id;""" + # self.get_ratings() + + # def get_ratings(self): + # if not isinstance(self.json, type([])): + # return + # rows = [] + # for location in self.json: + # location_id = int(location['location_id']) + # url = (f"https://api.content.tripadvisor.com/api/v1/location/" + # f"{location_id}/details") + # headers = {"accept": "application/json"} + # data = { + # 'key': self.key + # } + + # r = requests.get(url, headers=headers, params=data) + # if r.status_code == 200: + # flattened = pd.json_normalize(r.json()) + # rows.append(flattened) + # df = pd.concat(rows, ignore_index=True) + # for col in df.columns: + # if df[col].apply(lambda x: isinstance(x, (dict, list))).any(): + # df[col] = df[col].apply(json.dumps) + # df = df.infer_objects() + # print(df) + # print(df.dtypes) + # # df.to_sql("temp", con=self.engine, if_exists='append', index=False) + + # join_command = """CREATE TABLE recommendations AS + # SELECT * FROM locations + # JOIN temp ON locations.location_id = temp.location_id;""" + # with self.engine.connect() as connection: + # connection.execute(db.text(join_command)) + # self.drop("temp") + + def drop(self, table_name): + command = f"DROP TABLE IF EXISTS {table_name}" with self.engine.connect() as connection: - connection.execute(db.text(join_command)) + connection.execute(db.text(command)) + +with open('sampleJSON.txt', 'r') as file: + location = file.read() +test = Parser(json.loads(location)) +test.write_to_database("locations") \ No newline at end of file From 3fa6adab83efb403c5d8b9544f57152184359829 Mon Sep 17 00:00:00 2001 From: Eshaal Syeda Date: Wed, 2 Jul 2025 18:33:35 +0000 Subject: [PATCH 5/7] working code --- combined.py | 4 ++-- json_parsing.py | 52 +++++++++++++------------------------------------ 2 files changed, 15 insertions(+), 41 deletions(-) diff --git a/combined.py b/combined.py index 68d7ebf..73e4ad3 100644 --- a/combined.py +++ b/combined.py @@ -40,7 +40,7 @@ prompt = ( f"recommend the top {cate} " - f"in {cy}: (combined)." + f"in {cy}: {combined}." "based on these, recommend the better one and why" "two to three sentences" ) @@ -59,4 +59,4 @@ print(ai_response.text) parser = Parser(data_of_trip) -parser.write_to_database('locations') +parser.write_to_database('locations') \ No newline at end of file diff --git a/json_parsing.py b/json_parsing.py index 4b8b062..8e25e45 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -7,7 +7,6 @@ class Parser: - def __init__(self, json): load_dotenv() if 'data' in json: @@ -31,47 +30,22 @@ def write_to_database(self, tb_name): GROUP BY Name );""" connection.execute(db.text(remove_dupes)) - # self.get_ratings() - - # def get_ratings(self): - # if not isinstance(self.json, type([])): - # return - # rows = [] - # for location in self.json: - # location_id = int(location['location_id']) - # url = (f"https://api.content.tripadvisor.com/api/v1/location/" - # f"{location_id}/details") - # headers = {"accept": "application/json"} - # data = { - # 'key': self.key - # } - - # r = requests.get(url, headers=headers, params=data) - # if r.status_code == 200: - # flattened = pd.json_normalize(r.json()) - # rows.append(flattened) - # df = pd.concat(rows, ignore_index=True) - # for col in df.columns: - # if df[col].apply(lambda x: isinstance(x, (dict, list))).any(): - # df[col] = df[col].apply(json.dumps) - # df = df.infer_objects() - # print(df) - # print(df.dtypes) - # # df.to_sql("temp", con=self.engine, if_exists='append', index=False) + + def pull_list(self, table_name, city): + query = (f"SELECT * FROM {table_name} " + f"WHERE \"address_obj.city\" = '{city}' LIMIT 10;") + with self.engine.connect() as connection: + result = connection.execute(db.text(query)).fetchall() + print(pd.DataFrame(result)) - # join_command = """CREATE TABLE recommendations AS - # SELECT * FROM locations - # JOIN temp ON locations.location_id = temp.location_id;""" - # with self.engine.connect() as connection: - # connection.execute(db.text(join_command)) - # self.drop("temp") - def drop(self, table_name): command = f"DROP TABLE IF EXISTS {table_name}" with self.engine.connect() as connection: connection.execute(db.text(command)) -with open('sampleJSON.txt', 'r') as file: - location = file.read() -test = Parser(json.loads(location)) -test.write_to_database("locations") \ No newline at end of file +with open('sample2.txt', 'r') as file: + jackson = json.loads(file.read()) +# print(jackson) +test = Parser(jackson) +test.write_to_database("test") +test.pull_list("test", "Plano") From d08859c13db501f5fee82ea2e6bceb103604df3a Mon Sep 17 00:00:00 2001 From: Eshaal Syeda Date: Wed, 2 Jul 2025 18:35:17 +0000 Subject: [PATCH 6/7] working code --- json_parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/json_parsing.py b/json_parsing.py index 8e25e45..9163a78 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -30,7 +30,7 @@ def write_to_database(self, tb_name): GROUP BY Name );""" connection.execute(db.text(remove_dupes)) - + def pull_list(self, table_name, city): query = (f"SELECT * FROM {table_name} " f"WHERE \"address_obj.city\" = '{city}' LIMIT 10;") From 3d409b8e5a154b1a597adb396a8ec47040a679ba Mon Sep 17 00:00:00 2001 From: Eshaal Syeda Date: Wed, 2 Jul 2025 18:36:54 +0000 Subject: [PATCH 7/7] working code --- combined.py | 2 +- json_parsing.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/combined.py b/combined.py index 73e4ad3..1c8a4c7 100644 --- a/combined.py +++ b/combined.py @@ -59,4 +59,4 @@ print(ai_response.text) parser = Parser(data_of_trip) -parser.write_to_database('locations') \ No newline at end of file +parser.write_to_database('locations') diff --git a/json_parsing.py b/json_parsing.py index 9163a78..a4cc188 100644 --- a/json_parsing.py +++ b/json_parsing.py @@ -43,6 +43,7 @@ def drop(self, table_name): with self.engine.connect() as connection: connection.execute(db.text(command)) + with open('sample2.txt', 'r') as file: jackson = json.loads(file.read()) # print(jackson)