1+ <?php
2+
3+ namespace App \Console \Commands ;
4+
5+ use Illuminate \Console \Command ;
6+ use Illuminate \Support \Facades \Http ;
7+ use Illuminate \Support \Str ;
8+
9+ class ScrapeGoogleUrls extends Command
10+ {
11+ protected $ signature = 'site:google-urls
12+ {domain : The domain to search (e.g. example.com)}
13+ {--output= : Output CSV file path (defaults to domain-urls.csv)}
14+ {--limit=100 : Max number of URLs to retrieve}
15+ {--delay=1 : Delay in seconds between API requests} ' ;
16+
17+ protected $ description = 'Fetch all Google-indexed URLs for a domain via SerpApi and export to CSV ' ;
18+
19+ private string $ apiKey ;
20+ private string $ baseUrl = 'https://serpapi.com/search ' ;
21+
22+ public function handle (): int
23+ {
24+ $ this ->apiKey = config ('services.serpapi.api_key ' );
25+
26+ if (empty ($ this ->apiKey )) {
27+ $ this ->error ('Missing SERPAPI_API_KEY in your .env file. ' );
28+ return self ::FAILURE ;
29+ }
30+
31+ $ domain = $ this ->argument ('domain ' );
32+ $ limit = (int ) $ this ->option ('limit ' );
33+ $ delay = (int ) $ this ->option ('delay ' );
34+ $ output = $ this ->option ('output ' ) ?? Str::slug ($ domain ) . '-urls.csv ' ;
35+
36+ $ this ->info ("Searching Google for indexed URLs on: {$ domain }" );
37+ $ this ->info ("Output file: {$ output }" );
38+ $ this ->newLine ();
39+
40+ $ urls = [];
41+ $ start = 0 ;
42+ $ perPage = 10 ;
43+
44+ $ progress = $ this ->output ->createProgressBar ($ limit );
45+ $ progress ->start ();
46+
47+ while (count ($ urls ) < $ limit ) {
48+ $ response = Http::get ($ this ->baseUrl , [
49+ 'api_key ' => $ this ->apiKey ,
50+ 'engine ' => 'google ' ,
51+ 'q ' => "site: {$ domain }" ,
52+ 'start ' => $ start ,
53+ 'num ' => $ perPage ,
54+ ]);
55+
56+ if ($ response ->failed ()) {
57+ $ this ->newLine ();
58+ $ error = $ response ->json ('error ' ) ?? $ response ->status ();
59+ $ this ->error ("API request failed: {$ error }" );
60+ break ;
61+ }
62+
63+ $ data = $ response ->json ();
64+
65+ // SerpApi returns an error key in the JSON for soft errors
66+ if (!empty ($ data ['error ' ])) {
67+ $ this ->newLine ();
68+ $ this ->error ("SerpApi error: {$ data ['error ' ]}" );
69+ break ;
70+ }
71+
72+ $ items = $ data ['organic_results ' ] ?? [];
73+
74+ if (empty ($ items )) {
75+ $ this ->newLine ();
76+ $ this ->info ('No more results found. ' );
77+ break ;
78+ }
79+
80+ foreach ($ items as $ item ) {
81+ $ url = rtrim ($ item ['link ' ], '/ ' );
82+
83+ if (!in_array ($ url , $ urls )) {
84+ $ urls [] = $ url ;
85+ $ progress ->advance ();
86+ }
87+
88+ if (count ($ urls ) >= $ limit ) {
89+ break ;
90+ }
91+ }
92+
93+ $ start += $ perPage ;
94+
95+ if (count ($ items ) < $ perPage ) {
96+ // Fewer results than requested — end of results
97+ break ;
98+ }
99+
100+ sleep ($ delay );
101+ }
102+
103+ $ progress ->finish ();
104+ $ this ->newLine (2 );
105+
106+ if (empty ($ urls )) {
107+ $ this ->error ('No URLs were found. Check your API key or try a different domain. ' );
108+ return self ::FAILURE ;
109+ }
110+
111+ $ this ->writeCsv ($ urls , $ output );
112+
113+ $ this ->info ('✓ Exported ' . count ($ urls ) . " URLs to {$ output }" );
114+
115+ return self ::SUCCESS ;
116+ }
117+
118+ private function writeCsv (array $ urls , string $ path ): void
119+ {
120+ $ dir = dirname ($ path );
121+
122+ if (!is_dir ($ dir )) {
123+ mkdir ($ dir , 0755 , true );
124+ }
125+
126+ $ handle = fopen ($ path , 'w ' );
127+
128+ // UTF-8 BOM for Excel compatibility
129+ fwrite ($ handle , "\xEF\xBB\xBF" );
130+
131+ fputcsv ($ handle , ['Old URLs ' ]);
132+
133+ foreach ($ urls as $ url ) {
134+ fputcsv ($ handle , [$ url ]);
135+ }
136+
137+ fclose ($ handle );
138+ }
139+ }
0 commit comments