From c7a7284cd2c043bbdb6baff260d2433c42502e1a Mon Sep 17 00:00:00 2001 From: Jason Stirnaman Date: Fri, 2 Jan 2026 11:32:05 -0600 Subject: [PATCH 1/3] chore(link-checker): update configs for v1.3.0 severity classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove exclusions for sites that return 403/429 (bot protection) and 5xx (server errors) - these are now handled by severity classification: - 403/401/429 → info (shown but don't fail CI) - 5xx/timeout → warning (shown but don't fail CI) - 404/410/DNS → error (fail CI) Removed exclusions: - GitHub, Slack, Reddit, StackOverflow - Docker Hub, Grafana, Microsoft Learn - Claude.ai, Dremio, Scarf, InfluxData support Kept exclusions: - Localhost/local network URLs - Example/placeholder URLs - CI-specific workarounds (canonical URLs, file fragments) Added [severity] configuration section with default thresholds. --- .ci/link-checker/default.lycherc.toml | 64 ++++++++-------- .ci/link-checker/production.lycherc.toml | 95 ++++++++---------------- .github/workflows/pr-link-check.yml | 2 +- 3 files changed, 62 insertions(+), 99 deletions(-) diff --git a/.ci/link-checker/default.lycherc.toml b/.ci/link-checker/default.lycherc.toml index f769afc367..c247f67309 100644 --- a/.ci/link-checker/default.lycherc.toml +++ b/.ci/link-checker/default.lycherc.toml @@ -1,38 +1,36 @@ # Lychee link checker configuration -# Generated by link-checker +# Updated for link-checker v1.3.0 with severity-based classification +# +# With severity levels, we no longer need to exclude sites that return: +# - 403/401/429 (classified as "info" - shown but don't fail CI) +# - 5xx/timeout (classified as "warning" - shown but don't fail CI) +# Only 404/410/DNS failures are classified as "error" and fail CI. + [lychee] # Performance settings - -# Maximum number of retries for failed checks - max_retries = 3 - -# Timeout for each link check (in seconds) timeout = 30 - -# Maximum number of concurrent checks max_concurrency = 128 - skip_code_blocks = false # HTTP settings -# Identify the tool to external services user_agent = "Mozilla/5.0 (compatible; link-checker)" -# Accept these HTTP status codes as valid -accept = [200, 201, 202, 203, 204, 206, 301, 302, 303, 304, -307, 308] +# Accept these HTTP status codes as valid (2xx and redirects) +accept = [200, 201, 202, 203, 204, 206, 301, 302, 303, 304, 307, 308] # Skip these URL schemes scheme = ["file", "mailto", "tel"] # Exclude patterns (regex supported) +# NOTE: With v1.3.0 severity classification, we only need to exclude: +# - Non-HTTP URLs (localhost, local networks) +# - Placeholder/example URLs +# - Known false positives (not HTTP status related) exclude = [ - # Localhost URLs + # Localhost and local network URLs "^https?://localhost", "^https?://127\\.0\\.0\\.1", - - # Common CI/CD environments "^https?://.*\\.local", # Example domains used in documentation @@ -42,22 +40,6 @@ exclude = [ "https://example.com/REMOVED_FROM_CODE_BLOCK", "example.com/INLINE_CODE_URL", - # URLs that require authentication - "^https?://.*\\.slack\\.com", - "^https?://.*\\.atlassian\\.net", - - # GitHub URLs (often fail due to rate limiting and bot - # detection) - "^https?://github\\.com", - - # StackExchange network URLs (often block automated requests) - "^https?://.*\\.stackexchange\\.com", - "^https?://stackoverflow\\.com", - "^https?://.*\\.stackoverflow\\.com", - - # Docker Hub URLs (rate limiting and bot detection) - "^https?://hub\\.docker\\.com", - # Common documentation placeholders "YOUR_.*", "REPLACE_.*", @@ -67,8 +49,22 @@ exclude = [ # Request headers [headers] # Add custom headers here if needed -# "Authorization" = "Bearer $GITHUB_TOKEN" # Cache settings cache = true -max_cache_age = "1d" \ No newline at end of file +max_cache_age = "1d" + +# Severity classification (link-checker v1.3.0+) +# These settings control which HTTP status codes fail CI vs show as warnings/info +[severity] +# Error codes fail CI - genuine broken links +error_codes = [404, 410] + +# Warning codes are shown but don't fail CI - transient issues +warning_codes = [500, 502, 503, 504] + +# Info codes are low priority - access restrictions, bot protection +info_codes = [401, 403, 429] + +# Set to true to treat warnings as errors (stricter validation) +strict = false diff --git a/.ci/link-checker/production.lycherc.toml b/.ci/link-checker/production.lycherc.toml index 04fd241a8b..6f72bab1ac 100644 --- a/.ci/link-checker/production.lycherc.toml +++ b/.ci/link-checker/production.lycherc.toml @@ -1,23 +1,20 @@ # Production Link Checker Configuration for InfluxData docs-v2 -# Optimized for performance, reliability, and reduced false positives +# Updated for link-checker v1.3.0 with severity-based classification +# +# With severity levels, we no longer need to exclude sites that return: +# - 403/401/429 (classified as "info" - shown but don't fail CI) +# - 5xx/timeout (classified as "warning" - shown but don't fail CI) +# Only 404/410/DNS failures are classified as "error" and fail CI. + [lychee] # Performance settings - -# Maximum number of retries for failed checks - max_retries = 3 - -# Timeout for each link check (in seconds) timeout = 30 - -# Maximum number of concurrent checks max_concurrency = 128 - skip_code_blocks = false # HTTP settings -# Identify the tool to external services -"User-Agent" = "Mozilla/5.0 (compatible; influxdata-link-checker/1.0; +https://github.com/influxdata/docs-v2)" +"User-Agent" = "Mozilla/5.0 (compatible; influxdata-link-checker/1.3; +https://github.com/influxdata/docs-v2)" accept = [200, 201, 202, 203, 204, 206, 301, 302, 303, 304, 307, 308] # Skip these URL schemes @@ -31,12 +28,15 @@ max_cache_age = "1h" include_verbatim = false # Exclusion patterns for docs-v2 (regex supported) +# NOTE: With v1.3.0 severity classification, we only need to exclude: +# - Non-HTTP URLs (localhost, local networks) +# - Placeholder/example URLs +# - Known false positives (not HTTP status related) +# - CI-specific workarounds exclude = [ - # Localhost URLs + # Localhost and local network URLs "^https?://localhost", "^https?://127\\.0\\.0\\.1", - - # Common CI/CD environments "^https?://.*\\.local", # Example domains used in documentation @@ -46,51 +46,6 @@ exclude = [ "https://example.com/REMOVED_FROM_CODE_BLOCK", "example.com/INLINE_CODE_URL", - # URLs that require authentication - "^https?://.*\\.slack\\.com", - "^https?://.*\\.atlassian\\.net", - - # GitHub URLs (often fail due to rate limiting and bot - # detection) - "^https?://github\\.com", - - # Social media URLs (often block bots) - "^https?://reddit\\.com", - "^https?://.*\\.reddit\\.com", - - # StackExchange network URLs (often block automated requests) - "^https?://.*\\.stackexchange\\.com", - "^https?://stackoverflow\\.com", - "^https?://.*\\.stackoverflow\\.com", - - # Docker Hub URLs (rate limiting and bot detection) - "^https?://hub\\.docker\\.com", - - # InfluxData support URLs (certificate/SSL issues in CI) - "^https?://support\\.influxdata\\.com", - "^https?://influxdata\\.com/slack", - - # NSSM download URLs (403 errors for automated requests) - "^https?://nssm\\.cc/download", - - # AI platforms (often block automated requests) - "^https?://claude\\.ai", - "^https?://.*\\.claude\\.ai", - - # Microsoft Learn documentation (bot detection/rate limiting) - "^https?://learn\\.microsoft\\.com", - "^https?://.*\\.microsoft\\.com/.*", - - # Dremio download URLs (403 errors for automated requests) - "^https?://download\\.dremio\\.com", - - # Scarf analytics tracking pixels (certificate/network errors) - "^https?://static\\.scarf\\.sh", - - # Grafana documentation (bot detection/rate limiting) - "^https?://grafana\\.com", - "^https?://.*\\.grafana\\.com", - # Production site URLs (when testing locally, these should be relative) # This excludes canonical URLs and other absolute production URLs # TODO: Remove after fixing canonical URL generation or link-checker domain replacement @@ -100,7 +55,6 @@ exclude = [ # link-checker converts /path/to/page#fragment to file:///path/to/page#fragment # but the actual file is at /path/to/page/index.html, causing false fragment errors # TODO: Remove after fixing link-checker to handle Hugo pretty URLs with fragments - # See: https://github.com/influxdata/docs-tooling/issues/XXX "^file://.*#", # Common documentation placeholders @@ -109,10 +63,8 @@ exclude = [ "<.*>", ] -# Request headers +# Request headers (help avoid bot detection on some sites) [headers] -# Add custom headers here if needed -# "Authorization" = "Bearer $GITHUB_TOKEN" "Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" "Accept-Language" = "en-US,en;q=0.5" "Accept-Encoding" = "gzip, deflate" @@ -120,6 +72,21 @@ exclude = [ "Connection" = "keep-alive" "Upgrade-Insecure-Requests" = "1" +# Severity classification (link-checker v1.3.0+) +# These settings control which HTTP status codes fail CI vs show as warnings/info +[severity] +# Error codes fail CI - genuine broken links +error_codes = [404, 410] + +# Warning codes are shown but don't fail CI - transient issues +warning_codes = [500, 502, 503, 504] + +# Info codes are low priority - access restrictions, bot protection +info_codes = [401, 403, 429] + +# Set to true to treat warnings as errors (stricter validation) +strict = false + [ci] # CI-specific settings @@ -147,4 +114,4 @@ no_progress = true # Disable progress bar in CI # Summary settings show_success_count = true -show_skipped_count = true +show_skipped_count = true diff --git a/.github/workflows/pr-link-check.yml b/.github/workflows/pr-link-check.yml index 7f33806e0a..f4f280fa23 100644 --- a/.github/workflows/pr-link-check.yml +++ b/.github/workflows/pr-link-check.yml @@ -95,7 +95,7 @@ jobs: curl -L -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ -o link-checker-info.json \ - "https://api.github.com/repos/influxdata/docs-v2/releases/tags/link-checker-v1.2.5" + "https://api.github.com/repos/influxdata/docs-v2/releases/tags/link-checker-v1.3.0" # Extract download URL for linux binary DOWNLOAD_URL=$(jq -r '.assets[] | select(.name | test("link-checker.*linux")) | .url' link-checker-info.json) From cbfc64af4bd6286f49add5324c6babf1c847ca10 Mon Sep 17 00:00:00 2001 From: Jason Stirnaman Date: Sun, 4 Jan 2026 12:34:01 -0600 Subject: [PATCH 2/3] docs(influxdb3): document log-filter values and targeted filtering Add comprehensive documentation for the --log-filter configuration option: - Log levels table (error, warn, info, debug, trace) - Targeted filtering syntax for specific components - Common component names for Core and Enterprise - Debug logging section in write troubleshoot page closes influxdata/DAR#575 --- .gitignore | 1 + .../shared/influxdb3-cli/config-options.md | 87 ++++++++++++++++++- .../influxdb3-write-guides/troubleshoot.md | 27 +++++- 3 files changed, 113 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 5b5487a3d5..3cd0d666d0 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ tmp # User context files for AI assistant tools .context/* !.context/README.md +.task.md # External repos .ext/* diff --git a/content/shared/influxdb3-cli/config-options.md b/content/shared/influxdb3-cli/config-options.md index 46796551ca..bb68462f49 100644 --- a/content/shared/influxdb3-cli/config-options.md +++ b/content/shared/influxdb3-cli/config-options.md @@ -832,7 +832,92 @@ Sets the endpoint of an S3-compatible, HTTP/2-enabled object store cache. #### log-filter -Sets the filter directive for logs. +Sets the filter directive for logs. Use this option to control the verbosity of +server logs globally or for specific components. + +##### Log levels + +The following log levels are available (from least to most verbose): + +| Level | Description | +| :------ | :---------------------------------------------------------------------------------------------------- | +| `error` | Only errors that indicate serious problems | +| `warn` | Warnings and errors | +| `info` | Informational messages, warnings, and errors _(default)_ | +| `debug` | Debug information for troubleshooting, plus all above levels | +| `trace` | Very detailed tracing information, plus all above levels (produces high log volume) | + +##### Basic usage + +To set the log level globally, pass one of the log levels: + + + +```sh +influxdb3 serve --log-filter debug +``` + +##### Targeted filtering + +Globally enabling `debug` or `trace` produces a high volume of log output. +For more targeted debugging, you can set different log levels for specific +components using the format `,=`. + +###### Debug write buffer operations + + + +```sh +influxdb3 serve --log-filter info,influxdb3_write_buffer=debug +``` + +###### Trace WAL operations + + + +```sh +influxdb3 serve --log-filter info,influxdb3_wal=trace +``` + +###### Multiple targeted filters + + + +```sh +influxdb3 serve --log-filter info,influxdb3_write_buffer=debug,influxdb3_wal=debug +``` + +{{% show-in "enterprise" %}} + +###### Debug Enterprise storage engine operations + + + +```sh +influxdb3 serve --log-filter info,influxdb3_pacha_tree=debug +``` + +{{% /show-in %}} + +##### Common component names + +The following are common component names you can use for targeted filtering: + +| Component | Description | +| :------------------------------------ | :------------------------------------------------------- | +| `influxdb3_write_buffer` | Write buffer operations | +| `influxdb3_wal` | Write-ahead log operations | +| `influxdb3_catalog` | Catalog and schema operations | +| `influxdb3_cache` | Caching operations | +{{% show-in "enterprise" %}}`influxdb3_pacha_tree` | Enterprise storage engine operations | +`influxdb3_enterprise` | Enterprise-specific features | +{{% /show-in %}} + +> [!Note] +> Targeted filtering requires knowledge of the codebase component names. +> The component names correspond to Rust package names in the InfluxDB 3 source +> code. Use `debug` or `trace` sparingly on specific components to avoid +> excessive log output. | influxdb3 serve option | Environment variable | | :--------------------- | :------------------- | diff --git a/content/shared/influxdb3-write-guides/troubleshoot.md b/content/shared/influxdb3-write-guides/troubleshoot.md index 4b998e1523..bd4430cd05 100644 --- a/content/shared/influxdb3-write-guides/troubleshoot.md +++ b/content/shared/influxdb3-write-guides/troubleshoot.md @@ -6,7 +6,8 @@ Learn how to avoid unexpected results and recover from errors when writing to - [Review HTTP status codes](#review-http-status-codes) - [Troubleshoot failures](#troubleshoot-failures) - [Troubleshoot rejected points](#troubleshoot-rejected-points) -{{% show-in "core,enterprise" %}}- [Troubleshoot write performance issues](#troubleshoot-write-performance-issues){{% /show-in %}} +{{% show-in "core,enterprise" %}}- [Troubleshoot write performance issues](#troubleshoot-write-performance-issues) + - [Use debug logs for troubleshooting](#use-debug-logs-for-troubleshooting){{% /show-in %}} ## Handle write responses @@ -105,4 +106,28 @@ influxdb3 serve \ Replace {{% code-placeholder-key %}}`PERCENTAGE`{{% /code-placeholder-key %}} with the percentage of available memory to allocate (for example, `35%` for write-heavy workloads). +### Use debug logs for troubleshooting + +For deeper investigation of write issues, enable debug logging for specific +components. Debug logs provide detailed information about write buffer +operations and WAL activity. + +To enable debug logs for write operations, restart {{% product-name %}} with +targeted log filters: + + + +```sh +influxdb3 serve --log-filter info,influxdb3_write_buffer=debug +``` + + + +```sh +influxdb3 serve --log-filter info,influxdb3_wal=debug +``` + +For more information about log levels and targeted filtering, see +[log-filter configuration](/influxdb3/version/reference/config-options/#log-filter). + {{% /show-in %}} From ef0849e2ddf951eb22db91526b9abe1bbc2199cf Mon Sep 17 00:00:00 2001 From: Jason Stirnaman Date: Tue, 6 Jan 2026 13:29:30 -0600 Subject: [PATCH 3/3] Update content/shared/influxdb3-cli/config-options.md --- content/shared/influxdb3-cli/config-options.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shared/influxdb3-cli/config-options.md b/content/shared/influxdb3-cli/config-options.md index bb68462f49..3723912dd1 100644 --- a/content/shared/influxdb3-cli/config-options.md +++ b/content/shared/influxdb3-cli/config-options.md @@ -841,7 +841,7 @@ The following log levels are available (from least to most verbose): | Level | Description | | :------ | :---------------------------------------------------------------------------------------------------- | -| `error` | Only errors that indicate serious problems | +| `error` | Only errors | | `warn` | Warnings and errors | | `info` | Informational messages, warnings, and errors _(default)_ | | `debug` | Debug information for troubleshooting, plus all above levels |