From 8c077113005d8da3e26bb82177416a3216910ce5 Mon Sep 17 00:00:00 2001
From: Jake Peterson <jake@guardtoro.com>
Date: Fri, 7 Nov 2025 07:56:29 -0800
Subject: [PATCH 1/2] minor: add websocket error code handlers

---
 firecrawl.go | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/firecrawl.go b/firecrawl.go
index 695dc40..8ff3349 100644
--- a/firecrawl.go
+++ b/firecrawl.go
@@ -698,7 +698,7 @@ func (app *FirecrawlApp) makeRequest(method, url string, data map[string]any, he
 		}
 		defer resp.Body.Close()
 
-		if resp.StatusCode != 502 {
+		if resp.StatusCode != 502 && resp.StatusCode != 503 {
 			break
 		}
 
@@ -821,12 +821,18 @@ func (app *FirecrawlApp) handleError(statusCode int, body []byte, action string)
 
 	var message string
 	switch statusCode {
+	case 401:
+		message = fmt.Sprintf("Unauthorized: Failed to %s. %s", action, errorMessage)
 	case 402:
 		message = fmt.Sprintf("Payment Required: Failed to %s. %s", action, errorMessage)
+	case 403:
+		message = fmt.Sprintf("Forbidden: Failed to %s. %s", action, errorMessage)
 	case 408:
 		message = fmt.Sprintf("Request Timeout: Failed to %s as the request timed out. %s", action, errorMessage)
 	case 409:
 		message = fmt.Sprintf("Conflict: Failed to %s due to a conflict. %s", action, errorMessage)
+	case 429:
+		message = fmt.Sprintf("Too Many Requests: Failed to %s. %s", action, errorMessage)
 	case 500:
 		message = fmt.Sprintf("Internal Server Error: Failed to %s. %s", action, errorMessage)
 	default:

From 583b577ff365f8c591ae0b7eca0269209932b5b6 Mon Sep 17 00:00:00 2001
From: Jake Peterson <jake@guardtoro.com>
Date: Fri, 7 Nov 2025 07:56:39 -0800
Subject: [PATCH 2/2] Add batch scrape

---
 firecrawl.go      | 198 ++++++++++++++++++++++++++++++++++++++++++++++
 firecrawl_test.go | 135 +++++++++++++++++++++++++++++++
 2 files changed, 333 insertions(+)

diff --git a/firecrawl.go b/firecrawl.go
index 8ff3349..b51d480 100644
--- a/firecrawl.go
+++ b/firecrawl.go
@@ -144,6 +144,96 @@ type CancelCrawlJobResponse struct {
 	Status  string `json:"status"`
 }
 
+// WebhookSpec represents a webhook specification
+type WebhookSpec struct {
+	URL      string            `json:"url"`
+	Headers  map[string]string `json:"headers,omitempty"`
+	Metadata map[string]any    `json:"metadata,omitempty"`
+	Events   []string          `json:"events,omitempty"`
+}
+
+// FormatSpec represents a format specification
+type FormatSpec struct {
+	Type     string         `json:"type"`
+	FullPage *bool          `json:"fullPage,omitempty"`
+	Quality  *int           `json:"quality,omitempty"`
+	Viewport *Viewport      `json:"viewport,omitempty"`
+	Schema   map[string]any `json:"schema,omitempty"`
+	Prompt   *string        `json:"prompt,omitempty"`
+	Modes    []string       `json:"modes,omitempty"`
+	Tag      *string        `json:"tag,omitempty"`
+}
+
+// Viewport represents viewport dimensions
+type Viewport struct {
+	Width  int `json:"width"`
+	Height int `json:"height"`
+}
+
+// ParserSpec represents a parser specification
+type ParserSpec struct {
+	Type     string `json:"type"`
+	MaxPages *int   `json:"maxPages,omitempty"`
+}
+
+// ActionSpec represents an action specification
+type ActionSpec struct {
+	Type         string    `json:"type"`
+	Milliseconds *int      `json:"milliseconds,omitempty"`
+	Selector     *string   `json:"selector,omitempty"`
+	FullPage     *bool     `json:"fullPage,omitempty"`
+	Quality      *int      `json:"quality,omitempty"`
+	Viewport     *Viewport `json:"viewport,omitempty"`
+	All          *bool     `json:"all,omitempty"`
+	Text         *string   `json:"text,omitempty"`
+	Key          *string   `json:"key,omitempty"`
+	Direction    *string   `json:"direction,omitempty"`
+	Script       *string   `json:"script,omitempty"`
+	Format       *string   `json:"format,omitempty"`
+	Landscape    *bool     `json:"landscape,omitempty"`
+	Scale        *float64  `json:"scale,omitempty"`
+}
+
+// LocationSpec represents location settings
+type LocationSpec struct {
+	Country   *string  `json:"country,omitempty"`
+	Languages []string `json:"languages,omitempty"`
+}
+
+// BatchScrapeParams represents the parameters for a batch scrape request
+type BatchScrapeParams struct {
+	URLs                []string          `json:"urls"`
+	Webhook             *WebhookSpec      `json:"webhook,omitempty"`
+	MaxConcurrency      *int              `json:"maxConcurrency,omitempty"`
+	IgnoreInvalidURLs   *bool             `json:"ignoreInvalidURLs,omitempty"`
+	Formats             []interface{}     `json:"formats,omitempty"`
+	OnlyMainContent     *bool             `json:"onlyMainContent,omitempty"`
+	IncludeTags         []string          `json:"includeTags,omitempty"`
+	ExcludeTags         []string          `json:"excludeTags,omitempty"`
+	MaxAge              *int              `json:"maxAge,omitempty"`
+	Headers             map[string]string `json:"headers,omitempty"`
+	WaitFor             *int              `json:"waitFor,omitempty"`
+	Mobile              *bool             `json:"mobile,omitempty"`
+	SkipTlsVerification *bool             `json:"skipTlsVerification,omitempty"`
+	Timeout             *int              `json:"timeout,omitempty"`
+	Parsers             []interface{}     `json:"parsers,omitempty"`
+	Actions             []ActionSpec      `json:"actions,omitempty"`
+	Location            *LocationSpec     `json:"location,omitempty"`
+	RemoveBase64Images  *bool             `json:"removeBase64Images,omitempty"`
+	BlockAds            *bool             `json:"blockAds,omitempty"`
+	Proxy               *string           `json:"proxy,omitempty"`
+	StoreInCache        *bool             `json:"storeInCache,omitempty"`
+	ZeroDataRetention   *bool             `json:"zeroDataRetention,omitempty"`
+}
+
+// BatchScrapeResponse represents the response for batch scrape operations
+type BatchScrapeResponse struct {
+	Success     bool     `json:"success"`
+	ID          string   `json:"id"`
+	URL         string   `json:"url"`
+	InvalidURLs []string `json:"invalidURLs,omitempty"`
+}
+
 // MapParams represents the parameters for a map request.
 type MapParams struct {
 	IncludeSubdomains *bool   `json:"includeSubdomains,omitempty"`
@@ -626,6 +716,114 @@ func (app *FirecrawlApp) MapURL(url string, params *MapParams) (*MapResponse, er
 	}
 }
 
+// BatchScrape starts a batch scrape job for the specified URLs using the Firecrawl API.
+//
+// Parameters:
+//   - params: Parameters for the batch scrape request, including URLs and optional configuration.
+//
+// Returns:
+//   - *BatchScrapeResponse: The batch scrape response with job ID and URL.
+//   - error: An error if the batch scrape request fails.
+func (app *FirecrawlApp) BatchScrape(params *BatchScrapeParams) (*BatchScrapeResponse, error) {
+	if params == nil || len(params.URLs) == 0 {
+		return nil, fmt.Errorf("urls are required")
+	}
+
+	headers := app.prepareHeaders(nil)
+	batchBody := map[string]any{
+		"urls": params.URLs,
+	}
+
+	if params.Webhook != nil {
+		batchBody["webhook"] = params.Webhook
+	}
+	if params.MaxConcurrency != nil {
+		batchBody["maxConcurrency"] = params.MaxConcurrency
+	}
+	if params.IgnoreInvalidURLs != nil {
+		batchBody["ignoreInvalidURLs"] = params.IgnoreInvalidURLs
+	}
+	if params.Formats != nil {
+		batchBody["formats"] = params.Formats
+	}
+	if params.OnlyMainContent != nil {
+		batchBody["onlyMainContent"] = params.OnlyMainContent
+	}
+	if params.IncludeTags != nil {
+		batchBody["includeTags"] = params.IncludeTags
+	}
+	if params.ExcludeTags != nil {
+		batchBody["excludeTags"] = params.ExcludeTags
+	}
+	if params.MaxAge != nil {
+		batchBody["maxAge"] = params.MaxAge
+	}
+	if params.Headers != nil {
+		batchBody["headers"] = params.Headers
+	}
+	if params.WaitFor != nil {
+		batchBody["waitFor"] = params.WaitFor
+	}
+	if params.Mobile != nil {
+		batchBody["mobile"] = params.Mobile
+	}
+	if params.SkipTlsVerification != nil {
+		batchBody["skipTlsVerification"] = params.SkipTlsVerification
+	}
+	if params.Timeout != nil {
+		batchBody["timeout"] = params.Timeout
+	}
+	if params.Parsers != nil {
+		batchBody["parsers"] = params.Parsers
+	}
+	if params.Actions != nil {
+		batchBody["actions"] = params.Actions
+	}
+	if params.Location != nil {
+		batchBody["location"] = params.Location
+	}
+	if params.RemoveBase64Images != nil {
+		batchBody["removeBase64Images"] = params.RemoveBase64Images
+	}
+	if params.BlockAds != nil {
+		batchBody["blockAds"] = params.BlockAds
+	}
+	if params.Proxy != nil {
+		batchBody["proxy"] = params.Proxy
+	}
+	if params.StoreInCache != nil {
+		batchBody["storeInCache"] = params.StoreInCache
+	}
+	if params.ZeroDataRetention != nil {
+		batchBody["zeroDataRetention"] = params.ZeroDataRetention
+	}
+
+	resp, err := app.makeRequest(
+		http.MethodPost,
+		fmt.Sprintf("%s/v2/batch/scrape", app.APIURL),
+		batchBody,
+		headers,
+		"start batch scrape",
+		withRetries(3),
+		withBackoff(500),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	var batchResponse BatchScrapeResponse
+	err = json.Unmarshal(resp, &batchResponse)
+	if err != nil {
+		return nil, err
+	}
+
+	if !batchResponse.Success {
+		return nil, fmt.Errorf("failed to start batch scrape")
+	}
+
+	return &batchResponse, nil
+}
+
 // SearchURL searches for a URL using the Firecrawl API.
 //
 // Parameters:
diff --git a/firecrawl_test.go b/firecrawl_test.go
index d012bf8..5aff76d 100644
--- a/firecrawl_test.go
+++ b/firecrawl_test.go
@@ -3,6 +3,7 @@ package firecrawl
 import (
 	"log"
 	"os"
+	"strings"
 	"testing"
 	"time"
 
@@ -544,3 +545,137 @@ func TestScrapeURLWithJSONOptions(t *testing.T) {
 	// Check that the extracted data contains the expected fields
 	assert.Contains(t, response.JSON, "mission")
 }
+
+func TestBatchScrapeInvalidAPIKey(t *testing.T) {
+	app, err := NewFirecrawlApp("invalid_api_key", API_URL)
+	require.NoError(t, err)
+
+	params := &BatchScrapeParams{
+		URLs: []string{"https://www.scrapethissite.com"},
+	}
+	_, err = app.BatchScrape(params)
+	assert.Error(t, err)
+	assert.True(t,
+		strings.Contains(err.Error(), "Unauthorized") ||
+			strings.Contains(err.Error(), "Internal Server Error") ||
+			strings.Contains(err.Error(), "Status code 401") ||
+			strings.Contains(err.Error(), "Status code 500"),
+		"Expected error to contain 'Unauthorized' or server error, got: %s", err.Error())
+}
+
+func TestBatchScrapeE2E(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	params := &BatchScrapeParams{
+		URLs: []string{
+			"https://www.scrapethissite.com",
+			"https://roastmywebsite.ai",
+		},
+		Formats:         []interface{}{"markdown"},
+		OnlyMainContent: ptr(true),
+	}
+
+	response, err := app.BatchScrape(params)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+	assert.True(t, response.Success)
+	assert.NotEmpty(t, response.ID)
+	assert.NotEmpty(t, response.URL)
+}
+
+func TestBatchScrapeWithOptionsE2E(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	params := &BatchScrapeParams{
+		URLs: []string{
+			"https://www.scrapethissite.com",
+		},
+		Formats:            []interface{}{"markdown", "html"},
+		OnlyMainContent:    ptr(true),
+		MaxConcurrency:     ptr(2),
+		IgnoreInvalidURLs:  ptr(true),
+		MaxAge:             ptr(3600000),
+		WaitFor:            ptr(1000),
+		Mobile:             ptr(false),
+		BlockAds:           ptr(true),
+		RemoveBase64Images: ptr(true),
+		StoreInCache:       ptr(true),
+	}
+
+	response, err := app.BatchScrape(params)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+	assert.True(t, response.Success)
+	assert.NotEmpty(t, response.ID)
+	assert.NotEmpty(t, response.URL)
+}
+
+func TestBatchScrapeWithWebhookE2E(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	params := &BatchScrapeParams{
+		URLs: []string{
+			"https://www.scrapethissite.com",
+		},
+		Formats: []interface{}{"markdown"},
+		Webhook: &WebhookSpec{
+			URL:    "https://example.com/webhook",
+			Events: []string{"completed", "page"},
+		},
+	}
+
+	response, err := app.BatchScrape(params)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+	assert.True(t, response.Success)
+	assert.NotEmpty(t, response.ID)
+}
+
+func TestBatchScrapeWithInvalidURLs(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	params := &BatchScrapeParams{
+		URLs: []string{
+			"https://www.scrapethissite.com",
+			"not-a-valid-url",
+			"https://roastmywebsite.ai",
+		},
+		Formats:           []interface{}{"markdown"},
+		IgnoreInvalidURLs: ptr(true),
+	}
+
+	response, err := app.BatchScrape(params)
+	require.NoError(t, err)
+	assert.NotNil(t, response)
+	assert.True(t, response.Success)
+	assert.NotEmpty(t, response.ID)
+	if response.InvalidURLs != nil {
+		assert.Contains(t, response.InvalidURLs, "not-a-valid-url")
+	}
+}
+
+func TestBatchScrapeEmptyURLs(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	params := &BatchScrapeParams{
+		URLs: []string{},
+	}
+
+	_, err = app.BatchScrape(params)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "urls are required")
+}
+
+func TestBatchScrapeNilParams(t *testing.T) {
+	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
+	require.NoError(t, err)
+
+	_, err = app.BatchScrape(nil)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "urls are required")
+}