Skip to content

Commit 27f0552

Browse files
authored
Initial Commit (#57)
1 parent 4b004a8 commit 27f0552

2 files changed

Lines changed: 142 additions & 8 deletions

File tree

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
<?php
2+
3+
namespace App\Console\Commands;
4+
5+
use Illuminate\Console\Command;
6+
use Illuminate\Support\Facades\Http;
7+
use Illuminate\Support\Str;
8+
9+
class ScrapeGoogleUrls extends Command
10+
{
11+
protected $signature = 'site:google-urls
12+
{domain : The domain to search (e.g. example.com)}
13+
{--output= : Output CSV file path (defaults to domain-urls.csv)}
14+
{--limit=100 : Max number of URLs to retrieve}
15+
{--delay=1 : Delay in seconds between API requests}';
16+
17+
protected $description = 'Fetch all Google-indexed URLs for a domain via SerpApi and export to CSV';
18+
19+
private string $apiKey;
20+
private string $baseUrl = 'https://serpapi.com/search';
21+
22+
public function handle(): int
23+
{
24+
$this->apiKey = config('services.serpapi.api_key');
25+
26+
if (empty($this->apiKey)) {
27+
$this->error('Missing SERPAPI_API_KEY in your .env file.');
28+
return self::FAILURE;
29+
}
30+
31+
$domain = $this->argument('domain');
32+
$limit = (int) $this->option('limit');
33+
$delay = (int) $this->option('delay');
34+
$output = $this->option('output') ?? Str::slug($domain) . '-urls.csv';
35+
36+
$this->info("Searching Google for indexed URLs on: {$domain}");
37+
$this->info("Output file: {$output}");
38+
$this->newLine();
39+
40+
$urls = [];
41+
$start = 0;
42+
$perPage = 10;
43+
44+
$progress = $this->output->createProgressBar($limit);
45+
$progress->start();
46+
47+
while (count($urls) < $limit) {
48+
$response = Http::get($this->baseUrl, [
49+
'api_key' => $this->apiKey,
50+
'engine' => 'google',
51+
'q' => "site:{$domain}",
52+
'start' => $start,
53+
'num' => $perPage,
54+
]);
55+
56+
if ($response->failed()) {
57+
$this->newLine();
58+
$error = $response->json('error') ?? $response->status();
59+
$this->error("API request failed: {$error}");
60+
break;
61+
}
62+
63+
$data = $response->json();
64+
65+
// SerpApi returns an error key in the JSON for soft errors
66+
if (!empty($data['error'])) {
67+
$this->newLine();
68+
$this->error("SerpApi error: {$data['error']}");
69+
break;
70+
}
71+
72+
$items = $data['organic_results'] ?? [];
73+
74+
if (empty($items)) {
75+
$this->newLine();
76+
$this->info('No more results found.');
77+
break;
78+
}
79+
80+
foreach ($items as $item) {
81+
$url = rtrim($item['link'], '/');
82+
83+
if (!in_array($url, $urls)) {
84+
$urls[] = $url;
85+
$progress->advance();
86+
}
87+
88+
if (count($urls) >= $limit) {
89+
break;
90+
}
91+
}
92+
93+
$start += $perPage;
94+
95+
if (count($items) < $perPage) {
96+
// Fewer results than requested — end of results
97+
break;
98+
}
99+
100+
sleep($delay);
101+
}
102+
103+
$progress->finish();
104+
$this->newLine(2);
105+
106+
if (empty($urls)) {
107+
$this->error('No URLs were found. Check your API key or try a different domain.');
108+
return self::FAILURE;
109+
}
110+
111+
$this->writeCsv($urls, $output);
112+
113+
$this->info('✓ Exported ' . count($urls) . " URLs to {$output}");
114+
115+
return self::SUCCESS;
116+
}
117+
118+
private function writeCsv(array $urls, string $path): void
119+
{
120+
$dir = dirname($path);
121+
122+
if (!is_dir($dir)) {
123+
mkdir($dir, 0755, true);
124+
}
125+
126+
$handle = fopen($path, 'w');
127+
128+
// UTF-8 BOM for Excel compatibility
129+
fwrite($handle, "\xEF\xBB\xBF");
130+
131+
fputcsv($handle, ['Old URLs']);
132+
133+
foreach ($urls as $url) {
134+
fputcsv($handle, [$url]);
135+
}
136+
137+
fclose($handle);
138+
}
139+
}

config/services.php

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@
1818
'token' => env('POSTMARK_TOKEN'),
1919
],
2020

21-
'resend' => [
22-
'key' => env('RESEND_KEY'),
23-
],
24-
2521
'ses' => [
2622
'key' => env('AWS_ACCESS_KEY_ID'),
2723
'secret' => env('AWS_SECRET_ACCESS_KEY'),
@@ -34,10 +30,9 @@
3430
'channel' => env('SLACK_BOT_USER_DEFAULT_CHANNEL'),
3531
],
3632
],
37-
38-
'google-maps' => [
39-
'key' => env('GOOGLE_MAPS_GEOCODING_API_KEY'),
40-
'map_id' => env('GOOGLE_MAP_ID'),
33+
34+
'serpapi' => [
35+
'api_key' => env('SERPAPI_API_KEY'),
4136
],
4237

4338
];

0 commit comments

Comments
 (0)