From 935a742b737caeb3c000b94c922850d633f12b2d Mon Sep 17 00:00:00 2001 From: Charles983 <97625324+Charles983@users.noreply.github.com> Date: Thu, 21 Nov 2024 16:11:33 -0500 Subject: [PATCH 01/15] Made changes to config to pull images and reviews --- src/configs.py | 2 + src/configs_mt.py | 34 +++-- src/formattr.py | 6 +- src/frontend/account.py | 2 +- src/frontend/favourites.py | 2 +- src/frontend/firebase.py | 2 +- src/frontend/logger.txt | 34 +++++ ...dc-firebase-adminsdk-60nyc-05d8e88f22.json | 13 ++ src/frontend/slash_user_interface.py | 140 +++++++++++++++++- src/scraper.py | 13 +- src/scraper_mt.py | 14 +- 11 files changed, 238 insertions(+), 24 deletions(-) create mode 100644 src/frontend/shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json diff --git a/src/configs.py b/src/configs.py index 585928e..438b7d8 100644 --- a/src/configs.py +++ b/src/configs.py @@ -59,6 +59,8 @@ 'title_indicator': 'h4.sku-header a', 'price_indicator': 'div.priceView-customer-price span', 'link_indicator': 'a.image-link', + 'image_indicator': 'image.primary-image', + 'review_indicator': 'span.ugc-c-review-average' } diff --git a/src/configs_mt.py b/src/configs_mt.py index 6d620a4..e186269 100644 --- a/src/configs_mt.py +++ b/src/configs_mt.py @@ -24,20 +24,24 @@ }, 'title_indicator': 'span.lh-title', 'price_indicator': 'div.lh-copy', - 'link_indicator': 'a' + 'link_indicator': 'a', + 'image_indicator': 'absolute top-0 left-0', + 'review_indicator': 'span.stars-container' } -AMAZON = { - 'site': 'amazon', - 'url': 'https://www.amazon.com/s?k=', - 'item_component': 'div', - 'item_indicator': { - 'data-component-type': 's-search-result' - }, - 'title_indicator': 'h2 a span', - 'price_indicator': 'span.a-price span', - 'link_indicator': 'h2 a.a-link-normal' -} +# AMAZON = { +# 'site': 'amazon', +# 'url': 'https://www.amazon.com/s?k=', +# 'item_component': 'div', +# 'item_indicator': { +# 'data-component-type': 's-search-result' +# }, +# 'title_indicator': 'h2 a span', +# 'price_indicator': 'span.a-price span', +# 'link_indicator': 'h2 a.a-link-normal', +# 'image_indicator': 'img.s-image', +# 'review_indicator': 'span.a-declarative a i span' +# } COSTCO = { 'site': 'costco', @@ -49,6 +53,8 @@ 'title_indicator': 'span a', 'price_indicator': 'div.price', 'link_indicator': 'span.description a', + 'image_indicator': 'img.product-image', + 'review_indicator': 'div.reviews-container' } BESTBUY = { @@ -61,6 +67,8 @@ 'title_indicator': 'h4.sku-title a', 'price_indicator': 'div.priceView-customer-price span', 'link_indicator': 'a.image-link', + 'image_indicator': 'img.product-image', + 'review_indicator': 'div.c-ratings-reviews p' } @@ -194,4 +202,4 @@ def run(self): self.result = items -CONFIGS = [WALMART, AMAZON, COSTCO, BESTBUY] +CONFIGS = [WALMART, COSTCO, BESTBUY] diff --git a/src/formattr.py b/src/formattr.py index c165bd6..76de949 100644 --- a/src/formattr.py +++ b/src/formattr.py @@ -15,7 +15,7 @@ """ -def formatResult(website, titles, prices, links): +def formatResult(website, titles, prices, links, image_url=None, review=None): """ The formatResult function takes the scraped HTML as input, and extracts the necessary values from the HTML code. Ex. extracting a price '$19.99' from @@ -34,8 +34,10 @@ def formatResult(website, titles, prices, links): "price": price, "link": f'www.{website}.com{link}', "website": website, + "image_url": image_url or "https://via.placeholder.com/150", + "review": review or "No Reviews" } - print(product['title']) + #print(product['title']) if website=='walmart': if link[0:4]=='http': product['link']=f'{link}' diff --git a/src/frontend/account.py b/src/frontend/account.py index acb488b..855f0a0 100644 --- a/src/frontend/account.py +++ b/src/frontend/account.py @@ -28,7 +28,7 @@ def initialize_firebase(mock=False): firebase_admin.initialize_app() return True - json_path = os.path.join(os.path.dirname(__file__), 'shopsync-se-firebase-adminsdk-nkzuw-e871ea65d4.json') + json_path = os.path.join(os.path.dirname(__file__), 'shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json') try: # Path to Firebase service account key cred = credentials.Certificate(json_path) diff --git a/src/frontend/favourites.py b/src/frontend/favourites.py index 4559a87..b30f16f 100644 --- a/src/frontend/favourites.py +++ b/src/frontend/favourites.py @@ -14,7 +14,7 @@ def initialize_firebase(mock=False): firebase_admin.initialize_app() return True - json_path = os.path.join(os.path.dirname(__file__), 'shopsync-se-firebase-adminsdk-nkzuw-e871ea65d4.json') + json_path = os.path.join(os.path.dirname(__file__), 'shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json') try: # Path to Firebase service account key cred = credentials.Certificate(json_path) diff --git a/src/frontend/firebase.py b/src/frontend/firebase.py index fa3dc03..cee8038 100644 --- a/src/frontend/firebase.py +++ b/src/frontend/firebase.py @@ -4,7 +4,7 @@ def initialize_firebase(): # Check if Firebase has already been initialized if not firebase_admin._apps: - cred = credentials.Certificate('shopsync-se-firebase-adminsdk-nkzuw-e871ea65d4.json') + cred = credentials.Certificate('shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json') # cred = credentials.Certificate('shopsync-se-firebase-adminsdk-nkzuw-ca6838f54f.json') firebase_admin.initialize_app(cred) \ No newline at end of file diff --git a/src/frontend/logger.txt b/src/frontend/logger.txt index 2cef09f..39b97d5 100644 --- a/src/frontend/logger.txt +++ b/src/frontend/logger.txt @@ -49,3 +49,37 @@ amazon query:iphone amazon query:iphone amazon query:iphone amazon query:ginger +amazon query:Icecream +amazon query:Icecream +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Apple +amazon query:Apple +amazon query:Banana +amazon query:Milk +amazon query:Milk +amazon query:Apple +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:milk +amazon query:Milk +amazon query:Apple +amazon query:Apple +amazon query:Apple +amazon query:Airpod +amazon query:Airpod +amazon query:Milk +amazon query:Milk diff --git a/src/frontend/shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json b/src/frontend/shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json new file mode 100644 index 0000000..d563401 --- /dev/null +++ b/src/frontend/shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json @@ -0,0 +1,13 @@ +{ + "type": "service_account", + "project_id": "shopsync-9ecdc", + "private_key_id": "05d8e88f22ac83c91b58b295a781a55ed4c4990e", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC5R1i2UbzZ77zD\nOhrxbWA1VMtX2sZuplbpyZGHevDVBw3zeZBtQsdc8xTy/ooF8Z7WIejWH1nVGKYe\nmEMvl9g2B8mYrd1UNdEEYL0X/GRl6BVsog5n0TGyV4wwiUkZmFM9RClr0R4hVxhs\nCuAyQUpcr2Srqmd3SNllsFkWGpwvOKsKTSeCG/n2Ott7FqxyXR4OnHTrEjCO2brb\nGU5/J7d92XFUc/m9GGWALSyxel6qQ5moAinPX/oeCkiV8FVsUQsStUDkl7HjzsWg\n7eneBYiqzSacILCmqCqxdCe32M5MHWdi/yzSNSUW2xY2Bv81XOETOxmUBP3RAQeO\ng8ELG1NxAgMBAAECggEAGLpCR2hL9soy5zCvZ96xIxeuC+D8RXLt0UXq+6nsz9b2\nhbLeaQwAj5HhueHpieGK0V61NIlP8/YriunHYxK2SH/BksmhlcZDnydKFWlvUip8\nsYLvaUECNksjlVa9QC0+7sOqGORP9nh+n0IpqeC3i/yHSGH1wnXL40Z7R/fv4F1K\nz1UwTCwAFRApSRKylAX7ZM9CbDu3r4ZQD5g98CwDhRgvC+FOVPT5L0ogC5HFs/ZO\nLkWqQpqw9Oo5RNhlkjqVEqPNSKytb7esYp1zsA7jfWyspktoLHGGvbQvjDeCSCtC\nF3Fo+bgB2B/WTmb9D5WGCjgpxC1uRcwLw8OeZPhmAQKBgQDr87Xq7qoHq2xu64zH\nIpTQjywjRklwMlqsv0g0lsNHU+2BSzLn+KNtlu5Ad92wCXwWCbZ+twoTfcHf6XqL\nuVzkOACDzqamJtHH1YZqW+vC7Y+SKP91LoQS1QyYfZOFebZJEoHaogPYDqCcFsgu\nRHh7tS4YYdFmr3GNL7uWSJluowKBgQDJBWuKRdEel8FcNl5Q0n9sFtqQ1soym55J\naK+cyJHBJV1Mo5/VfI4njV0RrYoRvP5l02mLoi5N0XBEVIdHM+LEcCTRozsDahtj\nbNVcxXWcK0jWFHR6JZf7AWRco+Xz6guoOcC+MbmMP3AQftsz3t2E6Bf4j842GZwT\ngewBnGB62wKBgQDRZIac5xCdndOs2/0i+910+JoC8+1YVFeD1n0Nrn2+Xwz3IPUc\nR9tA9iCZtcZW4xPrutLpwSaABap4O4s1VFrLbaeHUW0zJmAlJ6kR2mFvq8MtwpRy\nOcWbsNZsvYSdf1X1oyb6D625n8GIw+8CoGEL583wdV6P8kKjOSkQRX1kYQKBgQC6\nRgx95+489BDYWwUQzc4HojHMj0x0kuGdUqWQmgb+PJp1HxZIJJAxtHvumqnbgA8Y\n2kvueU2BDLeEifOFFl5m+ygTHrfblSJmAn6/5bXzDeUDg5bfbSClFogilDnMyS8e\nJs4lMDyo6kv07ShAq58Hvm4gBVnnpdmL9hN09qwsiwKBgAecKahDlhqyLyrVky9U\nJkcvoiFCHdfaq9nVsBvGKPrhcgpRHcsdJW5DQWvgVuHxldX5oVcsWTK928t0g7NT\nK2lJUKPzWFjloTtVjwitLSeRzZwiQyuXCdcyQue6nbYFdBj81itYrHyxFUsEdNtO\nVh2hCgwDliUj3b0sgwVqtj7B\n-----END PRIVATE KEY-----\n", + "client_email": "firebase-adminsdk-60nyc@shopsync-9ecdc.iam.gserviceaccount.com", + "client_id": "111221445997110214622", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-60nyc%40shopsync-9ecdc.iam.gserviceaccount.com", + "universe_domain": "googleapis.com" +} diff --git a/src/frontend/slash_user_interface.py b/src/frontend/slash_user_interface.py index df0c7ca..8e78964 100644 --- a/src/frontend/slash_user_interface.py +++ b/src/frontend/slash_user_interface.py @@ -16,6 +16,8 @@ from main_streamlit import search_items_API import streamlit as st from firebase_admin import firestore, auth +from bs4 import BeautifulSoup +import requests # sys.path.append('../') # st.set_page_config(layout= "wide") # st.title("ShopSync") @@ -32,8 +34,144 @@ def create_app(db_client=None): db_client = get_firestore_client() def search_product(website, product_name): - return search_items_API(website, product_name) + results = search_items_API(website, product_name) + print(results) + return results + + +def search_walmart_product(query): + api_key = "YOUR_WALMART_API_KEY" + url = f"https://api.walmartlabs.com/v1/search" + params = { + "query": query, + "format": "json", + "apiKey": api_key + } + response = requests.get(url, params=params) + if response.status_code == 200: + data = response.json() + return [{ + "title": item.get("name"), + "price": f"${item.get('salePrice', 'N/A')}", + "link": item.get("productUrl"), + "image_url": item.get("thumbnailImage", "https://via.placeholder.com/150") + } for item in data.get("items", [])] + return [] + +def search_amazon_product(query): + url = "https://amazon24.p.rapidapi.com/api/product" + headers = { + "X-RapidAPI-Key": "YOUR_RAPIDAPI_KEY", + "X-RapidAPI-Host": "amazon24.p.rapidapi.com" + } + params = {"country": "US", "keyword": query} + response = requests.get(url, headers=headers, params=params) + if response.status_code == 200: + data = response.json() + return [{ + "title": product.get("title"), + "price": product.get("price"), + "link": product.get("link"), + "image_url": product.get("thumbnail") + } for product in data.get("docs", [])] + return [] + +def search_ebay_product(query): + app_id = "YOUR_EBAY_APP_ID" + url = "https://svcs.ebay.com/services/search/FindingService/v1" + params = { + "OPERATION-NAME": "findItemsByKeywords", + "SERVICE-VERSION": "1.0.0", + "SECURITY-APPNAME": app_id, + "RESPONSE-DATA-FORMAT": "JSON", + "keywords": query + } + response = requests.get(url, params=params) + if response.status_code == 200: + data = response.json() + return [{ + "title": item.get("title")[0], + "price": item.get("sellingStatus", [{}])[0].get("currentPrice", [{}])[0].get("__value__"), + "link": item.get("viewItemURL", ["#"])[0], + "image_url": item.get("galleryURL", ["https://via.placeholder.com/150"])[0] + } for item in data.get("findItemsByKeywordsResponse", [{}])[0].get("searchResult", [{}])[0].get("item", [])] + return [] + +def search_bestbuy_product(query): + api_key = "YOUR_BESTBUY_API_KEY" + url = f"https://api.bestbuy.com/v1/products((search={query}))" + params = { + "format": "json", + "apiKey": api_key + } + response = requests.get(url, params=params) + if response.status_code == 200: + data = response.json() + return [{ + "title": product.get("name"), + "price": product.get("salePrice"), + "link": product.get("url"), + "image_url": product.get("image") + } for product in data.get("products", [])] + return [] + +def search_target_product(query): + url = "https://target1.p.rapidapi.com/products/v3/search" + headers = { + "X-RapidAPI-Key": "YOUR_RAPIDAPI_KEY", + "X-RapidAPI-Host": "target1.p.rapidapi.com" + } + params = {"keyword": query, "count": 10} + + response = requests.get(url, headers=headers, params=params) + if response.status_code == 200: + data = response.json() + return [{ + "title": item.get("title"), + "price": item.get("price", {}).get("formatted_current_price"), + "link": item.get("url"), + "image_url": item.get("images", [{}])[0].get("base_url") + item.get("images", [{}])[0].get("primary") + } for item in data.get("data", {}).get("search", {}).get("products", [])] + return [] + +def search_costco_product(query): + url = "https://costco4.p.rapidapi.com/search" + headers = { + "X-RapidAPI-Key": "YOUR_RAPIDAPI_KEY", + "X-RapidAPI-Host": "costco4.p.rapidapi.com" + } + params = {"q": query} + + response = requests.get(url, headers=headers, params=params) + if response.status_code == 200: + data = response.json() + return [{ + "title": item.get("name"), + "price": item.get("price"), + "link": item.get("url"), + "image_url": item.get("image") + } for item in data.get("products", [])] + return [] + + +def fetch_image_from_bing(product_name): + api_key = "YOUR_BING_API_KEY" + search_url = "https://api.bing.microsoft.com/v7.0/images/search" + headers = {"Ocp-Apim-Subscription-Key": api_key} + params = {"q": product_name, "count": 1} # Limit to 1 image for speed + + try: + response = requests.get(search_url, headers=headers, params=params) + if response.status_code == 200: + data = response.json() + if "value" in data and len(data["value"]) > 0: + return data["value"][0]["contentUrl"] # Return the first image URL + except Exception as e: + print(f"Error fetching image: {e}") + + return "https://via.placeholder.com/150" # Default image if no result + def check_product_input(product): """Check if the product input is valid based on multiple criteria.""" # Check for non-empty input diff --git a/src/scraper.py b/src/scraper.py index 0188e1c..754c957 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -61,12 +61,19 @@ def run(self): title = res.select(self.config['title_indicator']) price = res.select(self.config['price_indicator']) link = res.select(self.config['link_indicator']) - # print("Title =======> ", title) - # print("Price =======> ", price) - # print("Link =======> ", link) + image = res.select(self.config['image_indicator']) + review = res.select(self.config['review_indicator']) product = form.formatResult(self.config['site'], title, price, link) + + print("Raw Image Data:", image) # Debugging image + print("Raw Review Data:", review) # Debugging review + + product['image_url'] = image[0]['src'] if image else "https://via.placeholder.com/150" + product['review'] = review[0].text.strip() if review else "No Reviews" + if product['title'] != '' and product['price'] != '' and product['link'] != '': products.append(product) + self.result = products def httpsGet(self, URL): diff --git a/src/scraper_mt.py b/src/scraper_mt.py index d1e1dc8..08d8037 100644 --- a/src/scraper_mt.py +++ b/src/scraper_mt.py @@ -67,10 +67,20 @@ def run(self): title = res.select(self.config['title_indicator']) price = res.select(self.config['price_indicator']) link = res.select(self.config['link_indicator']) - product = form.formatResult( - self.config['site'], title, price, link) + image = res.select(self.config['image_indicator']) + review = res.select(self.config['review_indicator']) + product = form.formatResult(self.config['site'], title, price, link) + + #print("Raw Title Data:", title) # Debugging image + print("Raw Image Data:", image) # Debugging image + print("Raw Review Data:", review) # Debugging review + + product['image_url'] = image[0]['src'] if image else "https://via.placeholder.com/150" + product['review'] = review[0].text.strip() if review else "No Reviews" + if product['title'] != '' and product['price'] != '' and product['link'] != '': products.append(product) + self.result = products def httpsGet(self, URL): From 1ae34990e33a8ae4772ef17845c51aa754a57e90 Mon Sep 17 00:00:00 2001 From: Shawty 2084 Date: Fri, 22 Nov 2024 17:15:30 -0500 Subject: [PATCH 02/15] first run --- .DS_Store | Bin 6148 -> 10244 bytes .github/.DS_Store | Bin 0 -> 6148 bytes .gitignore | 4 +++- def solution(n):.py | 36 ++++++++++++++++++++++++++++++++++++ src/.DS_Store | Bin 6148 -> 6148 bytes src/frontend/.DS_Store | Bin 0 -> 6148 bytes tempCodeRunnerFile.python | 21 +++++++++++++++++++++ 7 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 .github/.DS_Store create mode 100644 def solution(n):.py create mode 100644 src/frontend/.DS_Store create mode 100644 tempCodeRunnerFile.python diff --git a/.DS_Store b/.DS_Store index 7246d7731ee418d64844767164bfb40905491063..8aa7d97e5eb85404def6bd7e3877e56a0645e8b3 100644 GIT binary patch literal 10244 zcmeHM&u<$=6n^VCjqLypP!u>++6z)asg*QM(jY)i>VQ%Tjp7o7C`xVb)^V}D#y^sV zG$prIRDuIs3J9qjs5n%G1HH7O912o7a^W8+SKx#M7x>=nX8mTfZpr}(wY!#g);r&u zdGC8aW@k-A#&p{nAu@?5fiJfM58xDXa6f;y95L_C0$Ky|L`ABQjdQtD+JcU5L<~d> zL<~d>L<~d>{2v&=n$1U|-)*CF#6ZMA#DLEL*9Q|{ZZlrH;kK%D;H0(yXydr84qRg& zpkz$1&3NsG+bU6v>Fyp3Pt@?X7=|m3ev{#7GhVylwyroCt~eRq%!ao^G1TmkGZmbS zjN3-%h=GWKtqkzqJxOWavr|Lv-$!yqr&@OfdcF!rRh?|MP;mkU$!Eo4))rIn|uVusX84^x}&5%#&;RkCk1_))PkRE%2O5ZS^}H^^LjU& z)a|$TeBDCFlp1b_m3n%$pY2_SrbTkdB3tqX?dI?eAf#^lKJazhg6ioe|v;!PB@v$M(3}CO^30qic*1d6W73!68vo$>Y zvk~iw_wMLRBvPr=gQ+)jOU_EJm2b71)A`05{A(`XS}J(gM;)i)U*`)8WqW0DVAnyX z*2>xZuWR}^_{iT>t`+~lgaQ_SK8_R_Rk;VN+U#bYP#AE_Dh!@e(9sl zRM5tdVb%?!xiigmNY`UI*QiyT8ozF2-3RC(lC^=rtHN<6&Cn^h&xY%paK1w|-I2y_ z$VjJeE3es2gKKv)r{v7*og#QsbfiwTEAwAnj(-so8 zN>+fI^8s#lLgom)LPu$yW@(yU#nmi+zZfe0XF&Q>9-%$Z<0*Ou zqijt|B%1N7NXistAzh@mFjfPjmHCCUu6K-MLHaa~nOP>7#8be1CdT%0x{n__2Atm~ zefjR3AJ?M4Q>4QBb>9qQXq0tkF$XrM=rBGZz923*QrAA+0oe?t_hxm$?~^kebrX3~QQ1 zq}#ND?BklmOL-tBRqRS`LnJiK>GtLDais;%-|)E(8BbbrXQXD4yRk}OIp=Cy49`Vg zRE>u;54iY~v`_aHM|d}+Zt?gzSlsK)i*cl-?|g2i>TtBCF*?WIYKP{iDP%@vxh zDyWAk8+Y>Psp2lL$QJ7OUK*!i{90A`31G*78KDUpg|tlvpvOsIL)UI?ZAb!DQS&T8 z2KAl-^+3g#)V>E8YTPfn)X zp|R`VTyXa)UauC6yMD)@^aNbOp;7mj@p-tYh7T`;FMv+|-&HvbNOnlzK}RK`Tu6J> zh#@6N1*kM%AT@jGuP>pQ=#@5~zWRBmy!*vY+8vJ2Y&TSS->odQU}s*H&dq?Us~{?& zm~wEH;L1gO3ka4mFahUNDd1QRc4-+idp{pNuJI`B?`Mi4+am_x|cKxUCY0oDxT? m#L>EM^Z&Cy1Jo3`GpdDaFY-N%{FXj2jD|vn*!k;1C391_A+YAmIu!bYtOn a=E?jjjv!k>#Bb5PgeiAR0qyCFc(?hSKgJP?%_9L~9XIG;%qjQQE7l?2M&}f5Bfs=kM~( z?h1E+U}cP%A+v99c4l|>-N!NjOzpl?0ZIT0RKZ-4%?y!o(J9G!%No(xIWBP5>sFfQ zanzskR>NOpK1Us-<(EbN={$EfM|sX&BZ|j)|1>7O4c&Dx=`aw4CBIS54>OKF>&a^ zVSMpn{L02xD8{bN`2*_?6FSsZ8BhkY3@o|JlAQmI@9Y0;kX|VR%D}&3z~t-AdW~E1 zXY17F 0: + unique_bytes += 1 + # Check if this index corresponds to the end of a segment + if i in segment_ends: + result.append(unique_bytes) + + return result + +# Example Input +segments = [[1, 9], [1, 3], [8, 15], [6, 9], [2, 5]] + +# Example Output +print(solution(segments)) # Expected: [9, 3, 15, 9, 5] diff --git a/src/.DS_Store b/src/.DS_Store index 403f52318f17caf325d37542fde3e6a54abc58c6..65296725ac7544d1bf46c550b6f30e2db202ca77 100644 GIT binary patch literal 6148 zcmeHK!AiqG5S^_RB1Nn>kNJiEK`ikk2wL!HHHq3o+6}gfB1o?K3F65Q5c~pfJ^KlM zhzH;7E^U^yf)^2)f!Vj&oyp9;4Z9g4Qr*j}L)0ds0Ls`{L$g45p0y?oH?j^?<{c@W zP)Y+jrGCv?2L2)gymu|^R-X#G!QRdHub-cHdZ`{3HtqX|<(5Mo$4RaeSOn8#zj(Z! zZhvQ;{lK~yRI_;JWCA*)3(_>A3BI%uW!X5*6RmT6g!M^p8S3b6kxg|0d3*Qp$u6K4 zXsG9@zl?6*+(K)1Uef9VA!(I*l!M+e4ZMstXwRRm=JGjtK6t${WLEsESuD2j;%`PA zUql!X284kM1AJUUf|f;Tg#lqe7??4@=R*i(j2xC0&C!9zlmNgchK-=lwFLWw4kL%9 zMa)2ybp={iCJzu~2NHo}wrt0|NsP3otO0Fr+f%F{GpvC+AEoRA*$IyoX6)^A~1e qmQ5@k%$wOc_&I>OHh*OP&ODi4M3)n!@cF(!F;EYKSh3&)9Eax*pAiER zLP7}HQu3U{c05m-BqkzWT(9RuGa{LUQ*VXlE+pS*%{+hxaMXzs*be6a8QQoOM4j`&H9_Gr9lI z@XL%A`9qf&1p~prKVyI=^|D^zqx^3D@Og6ACbV-j5wR Date: Sat, 23 Nov 2024 12:41:23 -0500 Subject: [PATCH 03/15] Fixed Walmart Display --- src/frontend/logger.txt | 7 +++++++ src/frontend/slash_user_interface.py | 5 +++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/frontend/logger.txt b/src/frontend/logger.txt index 39b97d5..8ad06f9 100644 --- a/src/frontend/logger.txt +++ b/src/frontend/logger.txt @@ -83,3 +83,10 @@ amazon query:Airpod amazon query:Airpod amazon query:Milk amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk diff --git a/src/frontend/slash_user_interface.py b/src/frontend/slash_user_interface.py index 8e78964..cd9d2e4 100644 --- a/src/frontend/slash_user_interface.py +++ b/src/frontend/slash_user_interface.py @@ -449,10 +449,11 @@ def get_random_value_from_list(lst): dataframe.insert(0, 'Product', product_column) dataframe['Price'] = dataframe['Price'].apply( - lambda x: float(f'{x:.2f}')) + lambda x: float(f'{float(x):.2f}') if pd.notnull(x) and str(x).replace('.', '', 1).isdigit() else None + ) # dataframe = dataframe.sort_values(by='Price', ascending=True) dataframe = dataframe.reset_index(drop=True) - dataframe['Price'] = [f'{x:.2f}' for x in dataframe['Price']] + dataframe['Price'] = dataframe['Price'].apply(lambda x: f'{x:.2f}' if x is not None else 'N/A') def add_http_if_not_present(url): if url.startswith('http://') or url.startswith('https://'): From 46b9b87157b6a5c74c4ac0ba7b5bb8afb68a0aa5 Mon Sep 17 00:00:00 2001 From: Charles983 <97625324+Charles983@users.noreply.github.com> Date: Sat, 23 Nov 2024 15:14:44 -0500 Subject: [PATCH 04/15] Configured Best Buy, Walmart, and made progress on Target and Costco --- src/frontend/logger.txt | 53 ++++++++++++++++++++++++++++ src/frontend/slash_user_interface.py | 11 +++--- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/frontend/logger.txt b/src/frontend/logger.txt index 8ad06f9..228d366 100644 --- a/src/frontend/logger.txt +++ b/src/frontend/logger.txt @@ -90,3 +90,56 @@ amazon query:Milk amazon query:Milk amazon query:Milk amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Apple +amazon query:Apple +amazon query:Milk +amazon query:Milk +amazon query:Iphone +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Apple +amazon query:Apple +amazon query:Apple +amazon query:Apple +amazon query:Milk +amazon query:Milk +amazon query:Phone +amazon query:Apple +amazon query:Apples +amazon query:Apples +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk diff --git a/src/frontend/slash_user_interface.py b/src/frontend/slash_user_interface.py index cd9d2e4..ac76358 100644 --- a/src/frontend/slash_user_interface.py +++ b/src/frontend/slash_user_interface.py @@ -416,17 +416,20 @@ def get_random_value_from_list(lst): description.append(result['title']) url.append(result['link']) price_str = result['price'] - rating_value = get_random_value_from_list(my_list) - + rating_value = result.get('review', '0') # Safely access 'review' + print(rating_value) + image = result['image_url'] # Clean and extract price clean_price_str = re.sub(r'[^\d\.\,]', '', price_str) # Remove unwanted characters match = re.search(r'(\d{1,3}(?:,\d{3})*(?:\.\d{1,2})?)', clean_price_str) - + rating_matches = re.findall(r"\d+(?:\.\d+)?", rating_value) + print(rating_matches) + rating_float = [float(match) for match in rating_matches[:1]] if match: price_str = match.group(0).replace(',', '') # Remove commas for conversion price_f = float(price_str) price.append(price_f) - rating.append(rating_value) # Append rating only if price is valid + rating.append(rating_float) # Append rating only if price is valid else: print("Unable to extract a valid price from the string:", price_str) price.append(None) # Append None if price extraction fails From 270101999394ca36aa41821d4d7ac5fd426660fe Mon Sep 17 00:00:00 2001 From: Charles983 <97625324+Charles983@users.noreply.github.com> Date: Sat, 23 Nov 2024 17:42:48 -0500 Subject: [PATCH 05/15] Fixed amazon and added pictures and reviews --- src/configs_mt.py | 138 ++++++++++++++++++--------- src/frontend/logger.txt | 23 +++++ src/frontend/slash_user_interface.py | 69 +++++++++++--- src/scraper.py | 39 ++++---- src/scraper_mt.py | 16 ++-- 5 files changed, 198 insertions(+), 87 deletions(-) diff --git a/src/configs_mt.py b/src/configs_mt.py index e186269..d89592e 100644 --- a/src/configs_mt.py +++ b/src/configs_mt.py @@ -23,9 +23,9 @@ 'data-item-id': True }, 'title_indicator': 'span.lh-title', - 'price_indicator': 'div.lh-copy', + 'price_indicator': 'div[data-automation-id="product-price"] span.w_iUH7', 'link_indicator': 'a', - 'image_indicator': 'absolute top-0 left-0', + 'image_indicator': 'img.absolute.top-0.left-0', 'review_indicator': 'span.stars-container' } @@ -48,15 +48,29 @@ 'url': 'https://www.costco.com/CatalogSearch?dept=All&keyword=', 'item_component': 'div', 'item_indicator': { - 'class': 'product-tile-set' + 'data-testid': 'Grid' }, - 'title_indicator': 'span a', - 'price_indicator': 'div.price', - 'link_indicator': 'span.description a', - 'image_indicator': 'img.product-image', - 'review_indicator': 'div.reviews-container' + 'title_indicator': 'div[data-testid^="Text_ProductTile_"]', # Extract the title from the span + 'price_indicator': 'div.MuiTypography-root', # Extract the price element + 'link_indicator': 'a', # Anchor element for the product link + 'image_indicator': 'img', # Use `img` tag for the product image + 'review_indicator': 'div.product-rating' # Review container (verify its presence) } +# TARGET = { +# 'site': 'target', +# 'url': 'https://www.target.com/s?searchTerm=', +# 'item_component': 'div', +# 'item_indicator': { +# 'data-test': '@web/ProductCard/ProductCardVariantDefault' +# }, +# 'title_indicator': 'a[data-test="product-title"]', +# 'price_indicator': 'span[data-test="product-price"]', +# 'link_indicator': 'a[data-test="product-title"]', +# 'image_indicator': 'img[data-test="product-image"]', +# 'review_indicator': 'div[data-test="average-rating"]' +# } + BESTBUY = { 'site': 'bestbuy', 'url': 'https://www.bestbuy.com/site/searchpage.jsp?st=', @@ -93,43 +107,9 @@ def run(self): List of items from the dict """ - # api_url = 'https://redsky.target.com/redsky_aggregations/v1/web/plp_search_v1' - - # page = '/s/' + self.query - # params = { - # 'key': '5938CFDFD3FB4A7DB7C060583C86663C', - # 'channel': 'WEB', - # 'count': '24', - # 'default_purchasability_filter': 'false', - # 'include_sponsored': 'true', - # 'keyword': self.query, - # 'offset': '0', - # 'page': page, - # 'platform': 'desktop', - # 'pricing_store_id': '3991', - # 'useragent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0', - # 'visitor_id': 'AAA', - # } - - # data = requests.get(api_url, params=params).json() - # items = [] - # if 'search' in data['data']: - # for p in data['data']['search']['products']: - # item = { - # 'timestamp': datetime.now().strftime("%d/%m/%Y %H:%M:%S"), - # 'title': html.unescape(p['item']['product_description']['title']), - # 'price': '$' + str(p['price']['current_retail']), - # 'website': 'target', - # #'link': shorten_url(p['item']['enrichment']['buy_url']) - # 'link': p['item']['enrichment']['buy_url'] - # } - # items.append(item) - - # self.result = items - # set up the request parameters params = { - 'api_key': '5938CFDFD3FB4A7DB7C060583C86663C', - 'search_term': 'Iphone', + 'api_key': 'A6D50D2FA74944AEB91D4B18CBFDE4B0', + 'search_term': self.query, 'type': 'search', 'sort_by': 'best_match' } @@ -186,7 +166,7 @@ def run(self): self.result = [] data = response.dict() - + print(data) items = [] for p in data['searchResult']['item']: item = { @@ -195,11 +175,77 @@ def run(self): 'price': '$' + p['sellingStatus']['currentPrice']['value'], 'website': 'ebay', #'link': shorten_url(p['viewItemURL']) - 'link': p['viewItemURL'] + 'link': p['viewItemURL'], + 'image_url': p['galleryURL'] if 'galleryURL' in p else 'https://via.placeholder.com/150', + 'review': p['sellerInfo']['positiveFeedbackPercent'] + '%' if 'sellerInfo' in p and 'positiveFeedbackPercent' in p['sellerInfo'] else 'No Reviews' } items.append(item) self.result = items + + +class scrape_amazon(Thread): + def __init__(self, query): + self.result = [] + self.query = query + super(scrape_amazon, self).__init__() + + def run(self): + """Scrape Amazon product data using Rainforest API + + Parameters + ---------- + query: str + Item to look for in the API + + Returns + ---------- + items: list + List of items from the API response + """ + API_KEY = '34C1B2689C074F3BB120B22625C22F72' + BASE_URL = 'https://api.rainforestapi.com/request' + + params = { + 'api_key': API_KEY, + 'type': 'search', + 'amazon_domain': 'amazon.com', + 'search_term': self.query, + } + + try: + # Send request to Rainforest API + response = requests.get(BASE_URL, params=params) + data = response.json() + + # Extract items from response + items = [] + if "search_results" in data: + for product in data["search_results"]: + try: + item = { + 'timestamp': datetime.now().strftime("%d/%m/%Y %H:%M:%S"), + 'title': product['title'] if 'title' in product else 'No Title', + 'price': f"${product['price']['value']}" if 'price' in product and 'value' in product['price'] else 'No Price', + 'currency': product['price']['currency'] if 'price' in product and 'currency' in product['price'] else 'USD', + 'website': 'amazon', + 'link': product['link'] if 'link' in product else 'No Link', + 'review': f"{product['rating']} stars" if 'rating' in product else 'No Rating', + 'image_url': product['image'] if 'image' in product else 'https://via.placeholder.com/150' + + } + items.append(item) + except Exception as e: + print(f"Error processing product: {e}") + continue + + self.result = items + + except Exception as e: + print(f"Error fetching data from Amazon API: {e}") + self.result = [] + + CONFIGS = [WALMART, COSTCO, BESTBUY] diff --git a/src/frontend/logger.txt b/src/frontend/logger.txt index 228d366..8ad7ed9 100644 --- a/src/frontend/logger.txt +++ b/src/frontend/logger.txt @@ -143,3 +143,26 @@ amazon query:Milk amazon query:Milk amazon query:Milk amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk +amazon query:Milk diff --git a/src/frontend/slash_user_interface.py b/src/frontend/slash_user_interface.py index ac76358..5b7ad7c 100644 --- a/src/frontend/slash_user_interface.py +++ b/src/frontend/slash_user_interface.py @@ -35,7 +35,7 @@ def create_app(db_client=None): def search_product(website, product_name): results = search_items_API(website, product_name) - print(results) + #print(results) return results @@ -390,6 +390,7 @@ def reset_button(): site = [] # rakuten = [] rating = [] + image = [] import random @@ -418,7 +419,7 @@ def get_random_value_from_list(lst): price_str = result['price'] rating_value = result.get('review', '0') # Safely access 'review' print(rating_value) - image = result['image_url'] + image.append(result['image_url']) # Clean and extract price clean_price_str = re.sub(r'[^\d\.\,]', '', price_str) # Remove unwanted characters match = re.search(r'(\d{1,3}(?:,\d{3})*(?:\.\d{1,2})?)', clean_price_str) @@ -441,7 +442,22 @@ def get_random_value_from_list(lst): if len(price): dataframe = pd.DataFrame( - {'Description': description, 'Price': price, 'Link': url, 'Website': site,'Ratings': rating}) + {'Image_URL': image, 'Description': description, 'Price': price, 'Link': url, 'Website': site,'Ratings': rating}) + + def add_http_if_not_present(url): + if url.startswith('http://') or url.startswith('https://'): + return url + else: + return 'https://' + url + dataframe['Link'] = dataframe['Link'].apply(add_http_if_not_present) + + dataframe['Image'] = dataframe.apply( + lambda row: f'', + axis=1 + ) + + dataframe = dataframe.drop(["Image_URL", "Link"], axis=1) + dataframe['Description'] = dataframe['Description'].apply( split_description) dataframe['Product'] = dataframe['Description'].str.split( @@ -458,13 +474,23 @@ def get_random_value_from_list(lst): dataframe = dataframe.reset_index(drop=True) dataframe['Price'] = dataframe['Price'].apply(lambda x: f'{x:.2f}' if x is not None else 'N/A') - def add_http_if_not_present(url): - if url.startswith('http://') or url.startswith('https://'): - return url - else: - return 'https://' + url - dataframe['Link'] = dataframe['Link'].apply(add_http_if_not_present) + st.session_state['dataframe'] = dataframe + + styled_table = ( + dataframe.style + .set_properties(**{'text-align': 'center'}) + .set_table_styles([ + {"selector": "th", "props": [("text-align", "center")]}, + {"selector": "td", "props": [("text-align", "center")]} + ]) + ) + + # st.markdown( + # styled_table.to_html(escape=False), # Allow HTML content + # unsafe_allow_html=True + # ) + st.success("Data successfully scraped and cached.") st.session_state.dataframe = dataframe @@ -596,13 +622,26 @@ def get_button_indices(button_ix): ) # Display styled DataFrame - st.dataframe( - styled_df, - column_config={"Link": st.column_config.LinkColumn("URL to website")}, - use_container_width=True # Ensure the DataFrame uses the maximum width + # st.dataframe( + # styled_df, + # column_config={"Link": st.column_config.LinkColumn("URL to website")}, + # use_container_width=True # Ensure the DataFrame uses the maximum width + # ) + styled_table = ( + filtered_df.style + .set_properties(**{'text-align': 'center'}) + .set_table_styles([ + {"selector": "th", "props": [("text-align", "center")]}, + {"selector": "td", "props": [("text-align", "center")]} + ]) + ) + + st.markdown( + styled_table.to_html(escape=False), # Allow HTML content + unsafe_allow_html=True ) - st.markdown("

Add for favourites

", - unsafe_allow_html=True) + # st.markdown("

Add for favourites

", + # unsafe_allow_html=True) # st.write('Add for favorites', unsafe_allow_html=True) # ////////////////////////////////////////////////////////////////////////////////////////////// # Prints the websites names from filter and dataframe to check diff --git a/src/scraper.py b/src/scraper.py index 754c957..a172a31 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -13,7 +13,7 @@ # local imports import formattr as form -from configs_mt import AMAZON, WALMART, COSTCO, BESTBUY, scrape_ebay, scrape_target +from configs_mt import WALMART, COSTCO, BESTBUY, scrape_ebay, scrape_target class search(Thread): def __init__(self, query, config): @@ -65,11 +65,11 @@ def run(self): review = res.select(self.config['review_indicator']) product = form.formatResult(self.config['site'], title, price, link) - print("Raw Image Data:", image) # Debugging image - print("Raw Review Data:", review) # Debugging review + #print("Raw Image Data:", image) # Debugging image + #print("Raw Review Data:", review) # Debugging review product['image_url'] = image[0]['src'] if image else "https://via.placeholder.com/150" - product['review'] = review[0].text.strip() if review else "No Reviews" + product['review'] = review[0].text.strip() if review else "0 0 0" if product['title'] != '' and product['price'] != '' and product['link'] != '': products.append(product) @@ -137,14 +137,14 @@ def scrape(args, scrapers): i = 0 while i < len(scrapers): - if scrapers[i] == 'amazon': - t_az = search(query, AMAZON) - t_az.start() - i += 1 - if i == len(scrapers): - break + # if scrapers[i] == 'amazon': + # t_az = search(query, AMAZON) + # t_az.start() + # i += 1 + # if i == len(scrapers): + # break if scrapers[i] == 'bestbuy': - print("Bestbuy") + #print("Bestbuy") t_bb = search(query, BESTBUY) t_bb.start() i += 1 @@ -164,6 +164,7 @@ def scrape(args, scrapers): break if scrapers[i] == 'target': t_tg = scrape_target(query) + # t_tg = search(query, TARGET) t_tg.start() i += 1 if i == len(scrapers): @@ -181,14 +182,14 @@ def scrape(args, scrapers): i = 0 while i < len(scrapers) : - if scrapers[i] == 'amazon': - t_az.join() - i += 1 - for sort_by in args['sort']: - local = form.sortList(t_az.result, sort_by, args['des'])[:args.get('num', len(t_az.result))] - overall.extend(local) - if i == len(scrapers): - break + # if scrapers[i] == 'amazon': + # t_az.join() + # i += 1 + # for sort_by in args['sort']: + # local = form.sortList(t_az.result, sort_by, args['des'])[:args.get('num', len(t_az.result))] + # overall.extend(local) + # if i == len(scrapers): + # break if scrapers[i] == 'bestbuy': t_bb.join() i += 1 diff --git a/src/scraper_mt.py b/src/scraper_mt.py index 08d8037..f94b7f3 100644 --- a/src/scraper_mt.py +++ b/src/scraper_mt.py @@ -17,7 +17,7 @@ # local imports import formattr as form -from configs_mt import AMAZON, WALMART, COSTCO, BESTBUY, scrape_ebay, scrape_target +from configs_mt import WALMART, COSTCO, BESTBUY, scrape_target, scrape_ebay, scrape_amazon class search(Thread): @@ -44,6 +44,7 @@ def run(self): """ if self.config['site'] == 'costco': self.query = form.formatSearchQueryForCostco(self.query) + print(self.query) elif self.config['site'] == 'target': self.query = self.query else: @@ -64,6 +65,7 @@ def run(self): results = None products = [] for res in results: + #print(res) title = res.select(self.config['title_indicator']) price = res.select(self.config['price_indicator']) link = res.select(self.config['link_indicator']) @@ -71,12 +73,10 @@ def run(self): review = res.select(self.config['review_indicator']) product = form.formatResult(self.config['site'], title, price, link) - #print("Raw Title Data:", title) # Debugging image - print("Raw Image Data:", image) # Debugging image - print("Raw Review Data:", review) # Debugging review + product['image_url'] = image[0]['src'] if image else "https://via.placeholder.com/150" - product['review'] = review[0].text.strip() if review else "No Reviews" + product['review'] = review[0].text.strip() if review else "0 0 0" if product['title'] != '' and product['price'] != '' and product['link'] != '': products.append(product) @@ -109,6 +109,7 @@ def httpsGet(self, URL): page = s.get(URL, headers=headers) if page.status_code == 200: soup1 = BeautifulSoup(page.content, 'html.parser') + return BeautifulSoup(soup1.prettify(), 'html.parser') else: # TODO add logger @@ -145,13 +146,13 @@ def scrape(args, scrapers): i = 0 while i < len(scrapers): if scrapers[i] == 'amazon': - t_az = search(query, AMAZON) + t_az = scrape_amazon(query) t_az.start() i += 1 if i == len(scrapers): break if scrapers[i] == 'bestbuy': - print("Bestbuy") + #print("Bestbuy") t_bb = search(query, BESTBUY) t_bb.start() i += 1 @@ -171,6 +172,7 @@ def scrape(args, scrapers): break if scrapers[i] == 'target': t_tg = scrape_target(query) + # t_tg = search(query, TARGET) t_tg.start() i += 1 if i == len(scrapers): From c403fec97510824b3916bff2f378945a69d15f28 Mon Sep 17 00:00:00 2001 From: Charles983 <97625324+Charles983@users.noreply.github.com> Date: Sun, 24 Nov 2024 15:17:37 -0500 Subject: [PATCH 06/15] Fixed images fully --- src/frontend/account.py | 2 +- src/frontend/favourites.py | 4 +- src/frontend/firebase.py | 2 +- src/frontend/logger.txt | 18 +++++ ...dc-firebase-adminsdk-60nyc-a335ead1ea.json | 13 +++ src/frontend/slash_user_interface.py | 80 ++++++++++--------- 6 files changed, 77 insertions(+), 42 deletions(-) create mode 100644 src/frontend/shopsync-9ecdc-firebase-adminsdk-60nyc-a335ead1ea.json diff --git a/src/frontend/account.py b/src/frontend/account.py index 855f0a0..210e9ac 100644 --- a/src/frontend/account.py +++ b/src/frontend/account.py @@ -28,7 +28,7 @@ def initialize_firebase(mock=False): firebase_admin.initialize_app() return True - json_path = os.path.join(os.path.dirname(__file__), 'shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json') + json_path = os.path.join(os.path.dirname(__file__), 'shopsync-9ecdc-firebase-adminsdk-60nyc-a335ead1ea.json') try: # Path to Firebase service account key cred = credentials.Certificate(json_path) diff --git a/src/frontend/favourites.py b/src/frontend/favourites.py index b30f16f..a0a57f3 100644 --- a/src/frontend/favourites.py +++ b/src/frontend/favourites.py @@ -14,7 +14,7 @@ def initialize_firebase(mock=False): firebase_admin.initialize_app() return True - json_path = os.path.join(os.path.dirname(__file__), 'shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json') + json_path = os.path.join(os.path.dirname(__file__), 'shopsync-9ecdc-firebase-adminsdk-60nyc-a335ead1ea.json') try: # Path to Firebase service account key cred = credentials.Certificate(json_path) @@ -50,8 +50,8 @@ def app(firestore_client=None): # Create a DataFrame from the user's favorites favorites_df = pd.DataFrame({ + "Image_URL": user_fav_data["Image_URL"], "Description": user_fav_data["Description"], - "Link": user_fav_data["Link"], "Price": user_fav_data["Price"], "Product": user_fav_data["Product"], "Website": user_fav_data["Website"], diff --git a/src/frontend/firebase.py b/src/frontend/firebase.py index cee8038..3076bd5 100644 --- a/src/frontend/firebase.py +++ b/src/frontend/firebase.py @@ -4,7 +4,7 @@ def initialize_firebase(): # Check if Firebase has already been initialized if not firebase_admin._apps: - cred = credentials.Certificate('shopsync-9ecdc-firebase-adminsdk-60nyc-05d8e88f22.json') + cred = credentials.Certificate('shopsync-9ecdc-firebase-adminsdk-60nyc-a335ead1ea.json') # cred = credentials.Certificate('shopsync-se-firebase-adminsdk-nkzuw-ca6838f54f.json') firebase_admin.initialize_app(cred) \ No newline at end of file diff --git a/src/frontend/logger.txt b/src/frontend/logger.txt index 8ad7ed9..b31952c 100644 --- a/src/frontend/logger.txt +++ b/src/frontend/logger.txt @@ -166,3 +166,21 @@ amazon query:Milk amazon query:Milk amazon query:Milk amazon query:Milk +amazon query:Milk +amazon query:milk +amazon query:milk +amazon query:milk +amazon query:milk +amazon query:milk +amazon query:milk +amazon query:milk +amazon query:Apple +amazon query:Apples +amazon query:Apples +amazon query:Apples +amazon query:Apples +amazon query:milk +amazon query:milk +amazon query:Apples +amazon query:Apples +amazon query:Apples diff --git a/src/frontend/shopsync-9ecdc-firebase-adminsdk-60nyc-a335ead1ea.json b/src/frontend/shopsync-9ecdc-firebase-adminsdk-60nyc-a335ead1ea.json new file mode 100644 index 0000000..3c795ac --- /dev/null +++ b/src/frontend/shopsync-9ecdc-firebase-adminsdk-60nyc-a335ead1ea.json @@ -0,0 +1,13 @@ +{ + "type": "service_account", + "project_id": "shopsync-9ecdc", + "private_key_id": "a335ead1ea5c35451f12689886264cd6c6e1a059", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEugIBADANBgkqhkiG9w0BAQEFAASCBKQwggSgAgEAAoIBAQDcWezpDyRYN8JX\nxMyY0k+N/bTV3WWT6+Z3VT3RYkRMoPWtQ3HHXF9pnYubsLl/q640nEvA1dLZIUZI\nKdAxRdeHjdEV91olvAAsy7WLAMJhJjDrP3vIfMfricsYc/9niFBfivZJP5AgwN3K\nPq6VH77KQrB9DTTGsTk6wPAtH6Doa2tAmxGct6FGgY5nG6Fs8BxXSCzvcfPCLvDv\nngPJaFeloXlA4kDmXPwT66cgC1nUjATN49DC7nw1o9Y8xJ9Jc5v8MxjHTy7C/7Pi\nmK6dyIcUb76zAwa+hc6MINaR4jlAnQWhXvtXm2jKGAZCZ22gyEuvfQLqLX/MgMrL\nW4pkfLBrAgMBAAECggEAAn8tTtRcQqlys4I6embSItpjlsROVWsn/bp/tBUGzMKf\n9IBhEULftsCgp2n9lEmk1fJk2+twVPenStAVhm2YGGQ70YlaSGfLcJ4e9f395p5t\n/2zIIWcgXdeEUPjg18ZoqchLGWjytZXTBGSfk0Z5Z8u7m1BH+HPplUNxTygPR+j8\nxuc8j+RGcri7BzuSyTUybsCI1x0cfq5uqp32Hqk6CfXBg6fxhpYO7yVk7N6wT6ec\nKHF6ngRHXQe1dzDHMGi+iHAT01kkx/nofmKdV+HqHyxPm4rbgQOD0pn1ZEADY1PG\n5cvR8cQjWkV1yVrQ9vF6LDoEkHzHPqcfF4GAsNJYBQKBgQD2TZk1h9nY4isqg40i\n00Vp6PqGPqABCRBm0msGMh+CsVy8byDAnLaIRDlgo+JI/FvzfIwZuoeCyFPTkdgN\nKFNZhykOm43FBLXR9PxGlk7CuUK5ASYmJfxufnDenatU+XfdeaL5xFMRSODexrSq\nFmAS5TEU2e+YLqdyc/r1C2Q7DQKBgQDlBsR73Sg6MGayCrs2ruHW3g8ERtahXODH\nACXf5sACUjsV6jr3aKGR31vnABTcZEjrof0Ni/8LbWtBG6J+bmcjsC5VyMLOC4jh\nmvwIewmtQutzOVLaBkvF3/p7wIt+UteYeSPvbjQAnwi540oEktk9N8qkv2GgzAH/\nRfGVjmtbVwKBgHF45JnN4aZS5FIs0yv1K6iUhj6swWhYta65SEdNdkjuz2ucwvkZ\n+dojnE+SkSDQ6sftXFpKHj45bq0tJt1A881uQJMTRSg8eEunU0Zt3xFE6qFzDxFK\nNNbu968H8rQuTnPBozzwnth6u+bGotstfcuWvZr+oKx66fgHyNl2CxJNAoGAYSda\nyFSL0QthNRu6STsskHKImj7Wo4L7008rwexn/VQWvngrZXKcP34pxTdSoh9kk5iW\n+V0u5xEWk3r+lnWNCSWeskNE6BUajuGpEovnEfm2WZ2ymMxc7mbSIhcO1Zqc3JBe\n/x2Xr7/G+twBNSl6QC7fpr2M06JXIovwLIpK3mcCfxLaw55CjnkF8QdPW22RE53g\n5tRzEHHkrCiQBeDnZAP7mCO8I7bd+7qcsuHi4aVAiRp3aeniUnAehW8W6oLgJiNk\nOLRpPU6IWkjFSyyfyvtYHGOkFJxqmQM6vIMkpQx9BfzjjuZXPk4HGhNq3lfW6lfc\nhZPy6tWfQYc4xwDQrEA=\n-----END PRIVATE KEY-----\n", + "client_email": "firebase-adminsdk-60nyc@shopsync-9ecdc.iam.gserviceaccount.com", + "client_id": "111221445997110214622", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-60nyc%40shopsync-9ecdc.iam.gserviceaccount.com", + "universe_domain": "googleapis.com" +} diff --git a/src/frontend/slash_user_interface.py b/src/frontend/slash_user_interface.py index 5b7ad7c..a26329c 100644 --- a/src/frontend/slash_user_interface.py +++ b/src/frontend/slash_user_interface.py @@ -388,7 +388,6 @@ def reset_button(): url = [] price = [] site = [] - # rakuten = [] rating = [] image = [] @@ -655,63 +654,68 @@ def get_button_indices(button_ix): #/////////////////////////////////////////////////////////////////////////////////////////////// if st.session_state.dataframe is not None: + # Add a label for the selectbox st.markdown('Select index to add to favourites', unsafe_allow_html=True) - # Display the selectbox - selected_index = st.selectbox("", [None] + list(range(len(st.session_state.get("dataframe", []))))) + # Create a selectbox to choose an index from the dataframe + selected_index = st.selectbox( + "", + [None] + list(range(len(st.session_state.dataframe))), # Include 'None' for no selection + format_func=lambda x: f"Row {x}" if x is not None else "Select a row" + ) # Close the container div st.markdown("", unsafe_allow_html=True) + # Check if a valid row is selected if selected_index is not None: - fav = pd.DataFrame([st.session_state.dataframe.iloc[selected_index]]) + # Extract the selected row with required columns + fav_row = st.session_state.dataframe.loc[ + [selected_index], + ['Product', 'Description', 'Price', 'Website', 'Ratings', 'Image'] + ] + + fav_row['Ratings'] = fav_row['Ratings'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None) + # Append the row to the favorites table in session state if 'fav' in st.session_state: st.session_state.fav = pd.concat( - [st.session_state.fav, fav], axis=0).drop_duplicates() - st.dataframe(st.session_state.fav.style, column_config={"Link": st.column_config.LinkColumn( - "URL to website"), "Button": st.column_config.LinkColumn("Add to fav")},) - + [st.session_state.fav, fav_row], axis=0 + ).drop_duplicates() else: - st.session_state.fav = fav.copy() - st.dataframe(fav.style, column_config={"Link": st.column_config.LinkColumn( - "URL to website"), "Button": st.column_config.LinkColumn("Add to fav")},) + st.session_state.fav = fav_row + + styled_table = ( + st.session_state.fav.style + .set_properties(**{'text-align': 'center'}) + .set_table_styles([ + {"selector": "th", "props": [("text-align", "center")]}, + {"selector": "td", "props": [("text-align", "center")]} + ]) + ) + st.markdown( + styled_table.to_html(escape=False), # Allow HTML content + unsafe_allow_html=True + ) + #st.dataframe(st.session_state.fav[['Product', 'Description', 'Price', 'Website', 'Ratings', 'Image']], use_container_width=True) + + # Save the favorites table to Firestore user = auth.get_user_by_email(st.session_state.user_email) # Replace with actual user email uid = user.uid # Reference to the user's document in "favourites" collection user_fav_ref = db.collection("favourites").document(uid) - # Get the user's current favorites data, or create a new structure if it doesn't exist - user_fav_doc = user_fav_ref.get() - - if user_fav_doc.exists: - # If the document exists, retrieve the current data - user_fav_data = user_fav_doc.to_dict() - else: - # Initialize empty arrays if document doesn't exist - user_fav_data = { - "Description": [], - "Link": [], - "Price": [], - "Product": [], - # "Rating": [], - "Website": [] - } - - user_fav_data["Description"].append(fav["Description"].values[0]) # Access the actual value - user_fav_data["Link"].append(fav["Link"].values[0]) # Access the actual value - user_fav_data["Price"].append(fav["Price"].values[0]) # Access the actual value - user_fav_data["Product"].append(fav["Product"].values[0]) # Access the actual value - # user_fav_data["Rating"].append(fav["Rating"].values[0]) # Access the actual value - user_fav_data["Website"].append(fav["Website"].values[0]) # Access the actual value - - # Update the user's document in Firestore with the new data + # Convert favorites table to a dictionary format compatible with Firestore + user_fav_data = st.session_state.fav[['Product', 'Description', 'Price', 'Website', 'Ratings', 'Image']].to_dict(orient='list') + + # Save to Firestore user_fav_ref.set(user_fav_data) - - st.success(f"{product} has been added to your favorites!") + st.success(f"{st.session_state.dataframe.loc[selected_index, 'Product']} has been added to your favorites!") + + # Add footer to UI footer = """ """, From 8fbeb0e24bd1f35d71b061ea921354fac92571ff Mon Sep 17 00:00:00 2001 From: Shawty 2084 Date: Mon, 25 Nov 2024 12:03:20 -0500 Subject: [PATCH 13/15] Final UI touches --- .DS_Store | Bin 10244 -> 10244 bytes src/.DS_Store | Bin 6148 -> 6148 bytes src/frontend/.DS_Store | Bin 6148 -> 6148 bytes src/frontend/logger.txt | 7 +++++++ src/frontend/slash_user_interface.py | 4 ++-- 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.DS_Store b/.DS_Store index b7110745066cd07f33025d99cfcf1f101f0d7d25..3ebfd57c0b30b391f0c66bade5ed78d39764a141 100644 GIT binary patch delta 114 zcmZn(XbISGOdvM@Gy?+z3xgg*IzuKyNp8N2OHxjL5>Sj|Zd6czLDexwRQVLRywbsf jW_0<248!2${M-Vd9tM^yht0}@#ym{Z)HX-RuHgaz7VI8r delta 114 zcmZn(XbISGOdwYG1_J{F3xgg*IzuKyNp8N2OHxjL5>SjocD3#Tv8rQ^sPZXr`EUp0 k529%D1sR6H$@#ejKs^jBnG-fE3mWqIIPi delta 15 WcmZoMXffFEn~5oB+vdMa?4kfOvjzwN diff --git a/src/frontend/.DS_Store b/src/frontend/.DS_Store index 3265f84ff7a010a9f917c573c63ed24e428437df..2c4eff545503c2d28103f4421b343ab0ae26af71 100644 GIT binary patch delta 18 ZcmZoMXffCj!^rwmbwYp9=0wKDVgN#?2KWE~ delta 18 ZcmZoMXffCj!^nE}t)X+l=0wKDVgN$l2Il|( diff --git a/src/frontend/logger.txt b/src/frontend/logger.txt index 58c1d2e..b84dc89 100644 --- a/src/frontend/logger.txt +++ b/src/frontend/logger.txt @@ -66,3 +66,10 @@ amazon query:popcorn amazon query:plate amazon query:banana amazon query:wine +amazon query:water +amazon query:mango +amazon query:bananna +amazon query:coffee +amazon query:cocoa +amazon query:apple +amazon query:shoes diff --git a/src/frontend/slash_user_interface.py b/src/frontend/slash_user_interface.py index 0d2ecfb..74222b7 100644 --- a/src/frontend/slash_user_interface.py +++ b/src/frontend/slash_user_interface.py @@ -162,7 +162,7 @@ def split_description(description): } .stSlider{ margin-top: -30px; - margin-left: -50px; + margin-left: -65px; width: 400px !important;} """, @@ -387,7 +387,7 @@ def get_button_indices(button_ix): st.markdown("

Result

", unsafe_allow_html=True) - col1, col2 = st.columns([1, 2])# adjust the columns for price range and filters + col1, col2 = st.columns([1, 1])# adjust the columns for price range and filters with col1: st.session_state.dataframe['Price'] = pd.to_numeric( From 6716dd9a9837208bb21938c46fabb635fb7fa76f Mon Sep 17 00:00:00 2001 From: Spencer Kersey Date: Mon, 25 Nov 2024 13:37:41 -0500 Subject: [PATCH 14/15] Implemented History --- src/frontend/app.py | 14 ++++-- src/frontend/history.py | 71 ++++++++++++++++++++++++++++ src/frontend/slash_user_interface.py | 27 +++++++++++ 3 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 src/frontend/history.py diff --git a/src/frontend/app.py b/src/frontend/app.py index 4c530ed..e9bb35f 100644 --- a/src/frontend/app.py +++ b/src/frontend/app.py @@ -6,6 +6,7 @@ import favourites import logout # Import the logout module import json +import history import slash_user_interface as slash_user_interface import time from streamlit_cookies_controller import CookieController @@ -55,8 +56,8 @@ def run(self): with st.sidebar: app = option_menu( menu_title='ShopSync', - options=['Account', 'Home', 'Favourites', 'Logout'], - icons=['person-circle', 'house-fill', 'star-fill', 'box-arrow-right'], + options=['Account', 'Home', 'Favourites', 'History', 'Logout'], + icons=['person-circle', 'house-fill', 'star-fill', 'box-arrow-right', 'box-arrow-right'], menu_icon='shop', default_index=default_index, styles={ @@ -82,7 +83,13 @@ def run(self): favourites.app() else: st.warning("You need to log in to access the favourites page.") - + + elif app == "History": + if st.session_state.get('logged_in'): + history.app() + else: + st.warning("You need to log in to access the history page.") + elif app == "Logout": logout.app() # Call the logout function @@ -92,4 +99,5 @@ def run(self): app.add_app("Account", account.app) app.add_app("Home", slash_user_interface.app) app.add_app("Favourites", favourites.app) + app.add_app("History", history.app) app.run() \ No newline at end of file diff --git a/src/frontend/history.py b/src/frontend/history.py new file mode 100644 index 0000000..f59f99e --- /dev/null +++ b/src/frontend/history.py @@ -0,0 +1,71 @@ +import streamlit as st +import firebase_admin +from firebase_admin import credentials, firestore, auth +import pandas as pd +import os + +def fetch_title(): + return "History" + +def initialize_firebase(mock=False): + if mock: + # Mock initialization for testing purposes + if not firebase_admin._apps: + firebase_admin.initialize_app() + return True + + json_path = os.path.join(os.path.dirname(__file__), 'shopsync-se-firebase-adminsdk-nkzuw-e871ea65d4.json') + try: + # Path to Firebase service account key + cred = credentials.Certificate(json_path) + firebase_admin.initialize_app(cred) + return True + except Exception as e: + print(f"Error initializing Firebase: {e}") + raise e + +# db = firestore.client() +def app(firestore_client=None): + # Allow mock initialization + if not firebase_admin._apps: + initialize_firebase(mock=False) + + # If firestore_client is None, initialize it + if firestore_client is None: + firestore_client = firestore.client() + + st.title("History") + + # Fetch the user ID (UID) from Firebase Authentication + user_email = st.session_state.user_email # Ensure this is set + user = auth.get_user_by_email(user_email) # Get the user by email + uid = user.uid + + # Reference to the user's document in the "history" collection + user_his_ref = firestore_client.collection("history").document(uid) + user_his_doc = user_his_ref.get() + + if user_his_doc.exists: + user_his_data = user_his_doc.to_dict() + + timestamps = list() + for stamp in user_his_data["Timestamp"]: + temp = pd.Timestamp(stamp, unit='s', tz='US/Eastern') + # temp.floor(freq='h') + timestamps.append(temp.floor(freq='s')) + + # Create a DataFrame from the user's history + history_df = pd.DataFrame({ + "Search": user_his_data["Search"], + "Timestamp": timestamps + }) + + # Display the history DataFrame + st.dataframe(history_df.style, column_config={ + "Link": st.column_config.LinkColumn("URL to Website"), + "Button": st.column_config.LinkColumn("Add to history"), + }) + else: + st.write("You have no history yet.") + +# In your main app file, call history.app() to use this diff --git a/src/frontend/slash_user_interface.py b/src/frontend/slash_user_interface.py index df0c7ca..b8a7ad0 100644 --- a/src/frontend/slash_user_interface.py +++ b/src/frontend/slash_user_interface.py @@ -7,6 +7,7 @@ # Import Libraries import os import sys +import time sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) import streamlit.components.v1 as components import re @@ -245,6 +246,32 @@ def reset_button(): company_list = conf.getCompanies() # results = search_product(website, product) results = search_product(website_dict[website], product) + + user = auth.get_user_by_email(st.session_state.user_email) # Replace with actual user email + uid = user.uid + + # Reference to the user's document in "hisourites" collection + user_his_ref = db.collection("history").document(uid) + + # Get the user's current hisorites data, or create a new structure if it doesn't exist + user_his_doc = user_his_ref.get() + + if user_his_doc.exists: + # If the document exists, retrieve the current data + user_his_data = user_his_doc.to_dict() + else: + # Initialize empty arrays if document doesn't exist + user_his_data = { + "Search": [], + "Timestamp": [] + } + + user_his_data["Search"].append(product) # Access the actual value + user_his_data["Timestamp"].append(time.time()) # Access the actual value + + # Update the user's document in Firestore with the new data + user_his_ref.set(user_his_data) + # Use st.columns based on return values description = [] url = [] From 5e7a6a9f9f7a1a284295cf3f576d0b27875fe57c Mon Sep 17 00:00:00 2001 From: Aastha Gaudani <89483146+Shawty2084@users.noreply.github.com> Date: Mon, 25 Nov 2024 22:10:46 -0500 Subject: [PATCH 15/15] Delete def solution(n):.py deleted irrelevant file --- def solution(n):.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 def solution(n):.py diff --git a/def solution(n):.py b/def solution(n):.py deleted file mode 100644 index 3fbc647..0000000 --- a/def solution(n):.py +++ /dev/null @@ -1,36 +0,0 @@ -def solution(segments): - # Find the maximum byte value (end of all ranges) - max_byte = max(end for _, end in segments) - - # Initialize a difference array - diff = [0] * (max_byte + 2) # +2 to handle 1-based indexing and boundary - - # Mark the start and end of each range in the diff array - for start, end in segments: - diff[start] += 1 - diff[end + 1] -= 1 - - # Sweep through the diff array to calculate cumulative unique bytes - unique_bytes = 0 - current_unique = 0 - result = [] - segment_index = 0 - - # Use a set to track when we reach the end of a segment - segment_ends = {end: idx for idx, (_, end) in enumerate(segments)} - - for i in range(1, max_byte + 1): - current_unique += diff[i] - if current_unique > 0: - unique_bytes += 1 - # Check if this index corresponds to the end of a segment - if i in segment_ends: - result.append(unique_bytes) - - return result - -# Example Input -segments = [[1, 9], [1, 3], [8, 15], [6, 9], [2, 5]] - -# Example Output -print(solution(segments)) # Expected: [9, 3, 15, 9, 5]