-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcontentScript.js
More file actions
42 lines (42 loc) · 18.1 KB
/
contentScript.js
File metadata and controls
42 lines (42 loc) · 18.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
function getTimestamp(){return`[${(new Date).toISOString()}]`}var auto_extract_flag=!1,processing_flag=!1,leads=[],leads_lnglat=new Set;async function saveLeadsToStorage(){try{const a={leads,leads_lnglat:Array.from(leads_lnglat)};await chrome.storage.local.set({gmb_scraper_leads:a});console.log(getTimestamp(),"Leads saved to chrome storage:",leads.length)}catch(a){console.error(getTimestamp(),"Error saving leads to storage:",a)}}
async function loadLeadsFromStorage(){try{const a=await chrome.storage.local.get("gmb_scraper_leads");if(a.gmb_scraper_leads){const e=a.gmb_scraper_leads;leads=e.leads||[];leads_lnglat=new Set(e.leads_lnglat||[]);console.log(getTimestamp(),"Leads loaded from chrome storage:",leads.length)}}catch(a){console.error(getTimestamp(),"Error loading leads from storage:",a),leads=[],leads_lnglat=new Set}}
async function updateLeads(a){Array.isArray(a)?(a.forEach(e=>{e&&e.cid&&!leads_lnglat.has(e.cid)&&(e.profileURL=`https://www.google.com/maps?cid=${e.cid}`,leads_lnglat.add(e.cid),leads.push(e))}),await saveLeadsToStorage()):console.error(getTimestamp(),"updateLeads expects an array")}async function clearLeads(){leads=[];leads_lnglat.clear();await saveLeadsToStorage()}var auto_extract_review_flag=!1,processing_review_flag=!1,reviews=[],review_ids=new Set;
function renderLeadsUI(){var a=document.createElement("div");a.className="extension_gms_page";a.id="extension_gms_page";var e=document.createElement("span");e.id="extension_gms_leads_info";e.className="extension_gms_status";const b=document.createElement("button");b.className="extension_gms_button";b.innerText="Start Auto Extract";b.id="extension_gms_start_btn";b.addEventListener("click",async d=>{d=d.target;if(!d.disabled)if(auto_extract_flag)auto_extract_flag=!1,d.innerText="Start Auto Extract",
d.style.backgroundColor="",console.log(getTimestamp(),"Stopping auto extract...");else{auto_extract_flag=!0;d.innerText="Stop Auto Extract";d.style.backgroundColor="#ea4335";console.log(getTimestamp(),"Starting auto extract...");try{var c=document.querySelector('[role="search"] button');c&&(c.click(),await new Promise(m=>setTimeout(m,3E3)));const h=document.querySelector('[role="feed"]');if(!h)throw console.error(getTimestamp(),"Feed list not found"),Error("Feed list not found");d.innerText="Stop Auto Extract";
c=0;let k=-1;for(;auto_extract_flag;){console.log(getTimestamp(),"Scrolling feed...");h.scrollTop=h.scrollHeight;const m=1E3+Math.floor(3E3*Math.random());await new Promise(q=>setTimeout(q,m));if(0<document.getElementsByClassName("HlvSq").length){console.log(getTimestamp(),"Reached end of results");break}if(k===h.scrollHeight){if(c++,20<=c){console.log(getTimestamp(),"Feed unchanged for 20 iterations, stopping");break}}else c=0,k=h.scrollHeight;console.log(getTimestamp(),`Stale: ${c}, ScrollTop: ${h.scrollTop}, ScrollHeight: ${h.scrollHeight}`)}}catch(h){console.error(getTimestamp(),
"Auto extract error:",h)}finally{d.innerText="Start Auto Extract",d.style.backgroundColor="",auto_extract_flag=!1,console.log(getTimestamp(),"Auto extract finished")}}});const g=document.createElement("button");g.className="extension_gms_button";g.innerText=`Export Results(${leads.length})`;g.id="extension_gms_download_btn";g.style="background-color: #54aced";g.addEventListener("click",async()=>{chrome.runtime.sendMessage({action:"openPage",data:leads})});g.innerText=`Export Results(${leads.length})`;
const f=document.createElement("button");f.className="extension_gms_button";f.innerText="Clear";f.id="extension_gms_clear_btn";f.style="background-color: #4167b2";f.addEventListener("click",async()=>{await clearLeads();g.innerText=`Export Results(${leads.length})`});a.appendChild(e);a.appendChild(b);a.appendChild(g);a.appendChild(f);document.body&&!document.body.contains(a)&&(document.body.insertBefore(a,document.body.firstChild),console.log(getTimestamp(),"extension_gms_page inserted successfully"));
setInterval(()=>{document.body&&!document.body.contains(a)&&(document.body.insertBefore(a,document.body.firstChild),console.log(getTimestamp(),"extension_gms_page inserted successfully"))},1E4)}
function renderReviewUI(){var a=document.createElement("div");a.className="extension_gms_page";a.style="background-color: #ccc;";var e=document.createElement("span");e.id="extension_review_info";e.className="extension_gms_status";const b=document.createElement("button");b.className="extension_gms_button";b.innerText="Start Auto Extract Review";b.id="extension_review_start_btn";b.addEventListener("click",async d=>{d=d.target;if(auto_extract_review_flag)d.innerText="Start Auto Extract Review",auto_extract_review_flag=
!1,console.log(getTimestamp(),"Begin to stop auto extract review!");else{var c=document.querySelectorAll("#reviews-panel");if(0>=c.length)alert("Click one result in the left listing and Open the Reviews panel first!");else if("block"!=window.getComputedStyle(c[c.length-1]).getPropertyValue("display"))alert("Click one result in the left listing and Open the Reviews panel first!");else{d.innerText="Stop Auto Extract Review";d.style="background-color: #ea4335";auto_extract_review_flag=!0;for(console.log(getTimestamp(),
"Begin to start auto extract review!");auto_extract_review_flag;){for(;processing_review_flag;)await new Promise(k=>setTimeout(k,1E3));console.log(getTimestamp(),"Paging!");c=reviews.length;let h=0;for(;3>h;){const k=document.querySelectorAll(".eyxqWe");(2<=k.length?k[k.length-2]:k[0]).scrollIntoView({behavior:"smooth",block:"end"});await new Promise(m=>setTimeout(m,1E3*Math.floor(6*Math.random()+6)));if(reviews.length>c)break;else h+=1}if(3<=h){console.log(getTimestamp(),"No Next Page!");alert("Arrive at the Last Page!");
break}}d.innerText="Start Auto Extract Review";d.style="";auto_extract_review_flag=!1;console.log(getTimestamp(),"Finish auto extract review!")}}});const g=document.createElement("button");g.className="extension_gms_button";g.innerText=`Export Review Results(${reviews.length})`;g.id="extension_review_download_btn";g.style="background-color: #54aced";g.addEventListener("click",async()=>{chrome.runtime.sendMessage({action:"openReviewPage",data:reviews})});const f=document.createElement("button");f.className=
"extension_gms_button";f.innerText="Clear";f.id="extension_review_clear_btn";f.style="background-color: #4167b2";f.addEventListener("click",async()=>{window.reviews=[];window.review_ids.clear();g.innerText=`Export Results(${reviews.length})`});a.appendChild(e);a.appendChild(b);a.appendChild(g);a.appendChild(f);document.body.insertBefore(a,document.body.firstChild)}(()=>{renderLeadsUI()})();
function parseBusinessData(a){let e={};try{if(a&&Array.isArray(a)&&3<a.length){const b=a[3];if(b&&Array.isArray(b)){a=null;6<b.length&&b[6]&&(a=b[6]);if(a&&"string"===typeof a){a.startsWith(")]}'\n")&&(a=a.substring(5));try{a=JSON.parse(a)}catch(g){return console.error(getTimestamp(),"Failed to parse details as JSON:",g),e}}if(a&&Array.isArray(a)){const g=a[6]?.[7]?.[0]||"",f=a[6]?.[11]||"",d=a[6]?.[9]?.[2]||"",c=a[6]?.[9]?.[3]||"",h=Array.isArray(a[6]?.[2])?a[6][2].join(","):"",k=a[6]?.[4]?.[8]||
"",m=a[6]?.[4]?.[7]||"",q=Array.isArray(a[6]?.[13])?a[6][13].join(","):"";e={name:f,phone:a[6]?.[178]?.[0]?.[0]||"",address:h,website:g,category:q,ratingCount:k,averageRating:m,latitude:d,longitude:c,kg_id:a[6]?.[227]?.[0]?.[3]||"",place_id:a[6]?.[227]?.[0]?.[4]||"",business_profile_id:a[6]?.[227]?.[0]?.[5]||"",cid:a[25]?.[3]?.[0]?.[13]?.[0]?.[0]?.[1]||"",reviewExample:a[6]?.[175]?.[9]?.[0]?.[0]?.[2]?.[0]?.[2]?.[15]?.[0]?.[0]||""};const t=a[6]?.[203]?.[0]||[],p="Monday Tuesday Wednesday Thursday Friday Saturday Sunday".split(" ");
p.forEach(r=>{e[r]=""});Array.isArray(t)&&t.forEach(r=>{var l=r?.[1];(l=l?p[l-1]:null)&&(e[l]=r?.[3]?.[0]?.[0]||"")})}}}}catch(b){console.error(getTimestamp(),"Failed to parse APP_INITIALIZATION_STATE structure:",b)}return e}
async function extractBusinessData(a){try{var e=["window.APP_INITIALIZATION_STATE","var APP_INITIALIZATION_STATE","APP_INITIALIZATION_STATE"],b=-1,g=null;for(var f of e)if(b=a.indexOf(f),-1!==b){g=f;break}if(-1!==b){var d=a.indexOf("=",b+g.length);if(-1!==d&&100>d-b){e=-1;for(b=d+1;b<d+50&&b<a.length;b++)if("["===a[b]){e=b;break}if(-1!==e){d=1;g=b=!1;f=null;let c=e+1;for(;c<a.length&&1E7>c-e&&0<d;){const h=a[c];g?g=!1:"\\"===h?g=!0:b||'"'!==h&&"'"!==h?b&&h===f?(b=!1,f=null):b||("["===h?d++:"]"===
h&&d--):(b=!0,f=h);c++}if(0===d){const h=a.substring(e,c);try{const k=JSON.parse(h);console.log(getTimestamp(),"Extracted APP_INITIALIZATION_STATE via manual bracket matching");return parseBusinessData(k)}catch(k){console.warn(getTimestamp(),"Manual extraction matched brackets but JSON parse failed:",k.message)}}else console.warn(getTimestamp(),"Manual extraction could not find matching bracket or exceeded safety limit")}}}}catch(c){console.warn(getTimestamp(),"Manual extraction error:",c.message)}console.log(getTimestamp(),
"Manual extraction failed, falling back to sandbox iframe...");return new Promise(async c=>{try{const h=chrome.runtime.getURL("sandbox.html"),k=document.createElement("iframe");k.style.display="none";k.src=h;const m="req_"+Date.now()+"_"+Math.random().toString(36).substring(2,9);let q,t,p;const r=()=>{q&&window.removeEventListener("message",q);p&&window.removeEventListener("message",p);t&&clearTimeout(t);k.parentNode&&document.body.removeChild(k)};t=setTimeout(()=>{r();console.error(getTimestamp(),
"Timeout: Sandbox took too long to respond for request",m);c({})},6E4);p=l=>{"sandboxReady"===l.data.action&&l.source===k.contentWindow&&(window.removeEventListener("message",p),p=null,k.contentWindow.postMessage({action:"extractAppState",requestId:m,html:a},"*"))};q=l=>{l.source===k.contentWindow&&"appStateExtracted"===l.data.action&&l.data.requestId===m&&(r(),l.data.success?(l=parseBusinessData(l.data.data),c(l)):(console.error(getTimestamp(),"Failed to extract APP_INITIALIZATION_STATE for request",
m,":",l.data.error),c({})))};window.addEventListener("message",p);window.addEventListener("message",q);document.body.appendChild(k)}catch(h){console.error(getTimestamp(),"Failed to extract business data:",h.message),c({})}})}
async function extractBusinessDataForCID(a,e){try{const b=await extractBusinessData(e);if(b&&!leads_lnglat.has(a)){try{if(b.website){document.getElementById("extension_gms_leads_info").innerHTML="Searching Emails for "+b.name+" ... ";const g=await chrome.runtime.sendMessage({action:"email",data:{website:b.website,name:b.name,deep_search:!0}});console.log(getTimestamp(),"social_links:",g);if(g)for(const f in g)b[f]=g[f].join(",")}}catch(g){console.warn(getTimestamp(),"collect email error: ",b,g)}return b}}catch(b){console.warn(getTimestamp(),
"extractBusinessDataForCID error: ",a,b)}return null}
function buildBrowserLikeHeaders(){const a={};var e=(Array.isArray(navigator.languages)&&0<navigator.languages.length?navigator.languages:[navigator.language||"en-US"]).map((b,g)=>0===g?b:`${b};q=${Math.max(.1,1-.1*g).toFixed(1)}`).join(", ");a["user-agent"]=navigator.userAgent;a.accept="text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8";a["accept-language"]=e;a["cache-control"]="max-age=0";a.pragma="no-cache";a["upgrade-insecure-requests"]="1";a["sec-fetch-dest"]=
"document";a["sec-fetch-mode"]="navigate";a["sec-fetch-site"]="none";a["sec-fetch-user"]="?1";a.referer=window.location.href;navigator.userAgentData&&(Array.isArray(navigator.userAgentData.brands)&&(e=navigator.userAgentData.brands.map(b=>`"${b.brand}";v="${b.version}"`).join(", "))&&(a["sec-ch-ua"]=e),"boolean"===typeof navigator.userAgentData.mobile&&(a["sec-ch-ua-mobile"]=navigator.userAgentData.mobile?"?1":"?0"),navigator.userAgentData.platform&&(a["sec-ch-ua-platform"]=`"${navigator.userAgentData.platform}"`));
return a}
async function getPlacePageData(a){try{if(leads_lnglat.has(a))return console.log(getTimestamp(),"Business data already extracted for CID: ",a),null;document.getElementById("extension_gms_leads_info").innerHTML="Fetching page data for place id: "+a;console.log(getTimestamp(),"Fetching page data for CID: ",a);const e=`${atob("aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS9tYXBzLw==")}?cid=${a}`;let b,g;for(let f=1;3>=f;f++){try{if(1<f){const c=1E3*(Math.floor(3*Math.random())+2);console.log(getTimestamp(),`[CONTENT] Waiting ${c/
1E3}s before attempt ${f}...`);await new Promise(h=>setTimeout(h,c))}console.log(getTimestamp(),`[CONTENT] Attempt ${f}: Sending fetch request to background`);const d={userAgent:navigator.userAgent,referer:window.location.href,language:navigator.language||"en-US",headers:buildBrowserLikeHeaders()};b=await chrome.runtime.sendMessage({action:"fetchWithSorryDetection",url:e,cid:a,timeout:3E4,browserContext:d});console.log(getTimestamp(),"[CONTENT] Fetch result:",b);if(b.needsVerification)if(console.warn(getTimestamp(),
"[CONTENT] Google anti-bot verification required"),b.verificationComplete){console.log(getTimestamp(),"[CONTENT] \u2713 Verification completed automatically");const c=1E3*(Math.floor(31*Math.random())+30);console.log(getTimestamp(),`[CONTENT] Sleeping for ${c/1E3}s to avoid re-triggering anti-bot...`);document.getElementById("extension_gms_leads_info").innerHTML=`Verification complete! Waiting ${Math.floor(c/1E3)}s before continuing...`;await new Promise(h=>setTimeout(h,c));console.log(getTimestamp(),
"[CONTENT] Resuming after verification cooldown");document.getElementById("extension_gms_leads_info").innerHTML="Resuming data collection...";continue}else return b.userCanceled?(console.warn(getTimestamp(),"[CONTENT] User canceled verification, skipping this item and continuing..."),document.getElementById("extension_gms_leads_info").innerHTML="Verification canceled. Continuing..."):b.timeout?(console.warn(getTimestamp(),"[CONTENT] Verification timeout (1 minute), skipping this item and continuing..."),
document.getElementById("extension_gms_leads_info").innerHTML="Verification timeout. Continuing..."):(console.warn(getTimestamp(),"[CONTENT] Verification needed but status unclear, skipping..."),document.getElementById("extension_gms_leads_info").innerHTML="Verification issue. Continuing..."),null;if(b.success)return console.log(getTimestamp(),`[CONTENT] Successfully fetched ${b.data.length} bytes`),b.data;console.warn(getTimestamp(),`[CONTENT] Fetch failed: ${b.error}`);g=Error(b.error)}catch(d){console.error(getTimestamp(),
"[CONTENT] Exception during fetch:",d),g=d}if(3>f)if(b&&b.error&&b.error.includes("429")){const d=1E3*Math.floor(60*Math.random()+3);console.log(getTimestamp(),`Rate limited, sleeping for ${d/1E3}s before retry`);await new Promise(c=>setTimeout(c,d))}else{const d=1E3*Math.floor(2*Math.random()+1);console.log(getTimestamp(),`Error occurred, sleeping for ${d/1E3}s before retry`);await new Promise(c=>setTimeout(c,d))}}throw g||Error("Failed to fetch url after 3 attempts");}catch(e){console.warn(getTimestamp(),
"getPlacePageData error: ",a,e)}return null}let messageQueue=Promise.resolve(),seen_cids=new Set;
async function handleSearchMessage(a){try{console.log(getTimestamp(),"Handle new search message received. Content length: ",a.length);processing_flag=!0;var e=[...a.matchAll(/data-cid="([^"]*)"/g)].map(f=>f[1]).filter(f=>{if(seen_cids.has(f)||leads_lnglat.has(f))return!1;seen_cids.add(f);return!0});console.debug(getTimestamp(),"Found CIDs:",e);console.debug(getTimestamp(),`\nTotal: ${e.length} CIDs found`);a=[];for(var b of e){const f=await getPlacePageData(b);f&&a.push({cid:b,data:f});const d=1E3*
Math.floor(3*Math.random());await new Promise(c=>setTimeout(c,d))}e=[];for(b=0;b<a.length;b+=10){const f=a.slice(b,b+10).map(c=>extractBusinessDataForCID(c.cid,c.data)),d=await Promise.all(f);e.push(...d)}console.debug(getTimestamp(),e);const g=e.filter(f=>f&&f.cid);await updateLeads(g);document.getElementById("extension_gms_download_btn").innerText=`Export Results(${leads.length})`;document.getElementById("extension_gms_leads_info").innerHTML="";processing_flag=!1}catch(g){console.warn(getTimestamp(),
g)}finally{processing_flag=!1}}
window.addEventListener("message",async function(a){if(a.data&&"search"===a.data.type&&a.data.data){try{var e=JSON.parse(a.data.data.replace('/*""*/',""));var b=JSON.parse(e.d.slice(5));var g=b[64]}catch(d){console.error(getTimestamp(),"Failed to parse search data:",d);return}if(g&&Array.isArray(g)){console.log(getTimestamp(),`Processing ${g.length} items from search results`);var f=[];for(a=0;a<g.length;++a)try{const d=g[a];if(!d||!Array.isArray(d))continue;const c=d[d.length-1];if(!c)continue;const h=
c[11]||"";if(!h)continue;const k=c[7]?.[0]||"",m=c[178]?.[0]?.[0]||"",q=c[4]?.[8]||"",t=c[4]?.[7]||"",p=Array.isArray(c[13])?c[13].join(";"):"",r=c[78]||"",l=c[37]?.[0]?.[0]?.[29]?.[1]||"",w=Array.isArray(c[2])?c[2].join(","):"",x=c[9]?.[2]||"",y=c[9]?.[3]||"";e=[];try{const n=c[203]?.[0];if(n&&Array.isArray(n))for(b=0;b<n.length;b++){const u=n[b];if(!u)continue;const z=u[0]||"",B=Array.isArray(u[3])?u[3].map(A=>A?.[0]||"").filter(Boolean).join(", "):"",v=u[1];void 0!==v&&e.push({day:z,hours:B,weekDay:v})}}catch(n){console.warn(getTimestamp(),
"Error processing working hours:",n)}e.sort((n,u)=>n.weekDay-u.weekDay);b={};for(const n of e)b[`${n.weekDay}_${n.day}`]=n.hours;(e=r)&&!leads_lnglat.has(e)&&f.push({name:h,phone:m,website:k,address:w,email:"",place_id:r,cid:l,category:p,ratingCount:q,averageRating:t,latitude:x,longitude:y,...b})}catch(d){console.warn(getTimestamp(),"Error processing item at index",a,":",d)}console.log(getTimestamp(),`Found ${f.length} new leads to process`);if(0<f.length)for(let d=0;d<f.length;d+=50)g=f.slice(d,
d+50).map(async c=>{try{if(c.website){document.getElementById("extension_gms_leads_info").innerHTML=`Collecting email for ${c.name}... (${d+1}/${f.length})`;const h=await chrome.runtime.sendMessage({action:"email",data:{website:c.website,name:c.name,deep_search:!0}});if(h)for(const k in h)Array.isArray(h[k])&&(c[k]=h[k].join(","))}}catch(h){console.warn(getTimestamp(),"Collect email error for:",c.name,h)}return c}),await Promise.allSettled(g),d+50<f.length&&await new Promise(c=>setTimeout(c,500));
f.forEach(d=>{d&&d.place_id&&!leads_lnglat.has(d.place_id)&&(d.profileURL=`https://www.google.com/maps?cid=${d.cid}`,leads_lnglat.add(d.place_id),leads.push(d))});console.log(getTimestamp(),`Total leads: ${leads.length}`);document.getElementById("extension_gms_download_btn").innerText=`Export Results(${leads.length})`;document.getElementById("extension_gms_leads_info").innerHTML=`Leads: ${leads.length}`}else console.warn(getTimestamp(),"No valid feed data found in search results")}});