diff --git a/CMakeLists.txt b/CMakeLists.txt index afab7b0..52390c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,6 +129,7 @@ set(SOURCES src/internal/logger.cpp src/internal/http_client.cpp src/internal/url_utils.cpp + src/internal/path_utils.cpp ) set(HEADERS @@ -154,6 +155,7 @@ set(INTERNAL_HEADERS src/internal/logger.h src/internal/http_client.h src/internal/url_utils.h + src/internal/path_utils.h ) # Create library target diff --git a/src/core/client.cpp b/src/core/client.cpp index b22c6fd..4967ade 100644 --- a/src/core/client.cpp +++ b/src/core/client.cpp @@ -5,6 +5,7 @@ #include "databricks/connection_pool.h" #include "../internal/logger.h" +#include "../internal/path_utils.h" #include "../internal/pool_manager.h" #include @@ -501,7 +502,21 @@ Client::Builder& Client::Builder::with_environment_config(const std::string& pro // Try to load from profile file const char* home = std::getenv("HOME"); if (home) { - std::ifstream file(std::string(home) + "/.databrickscfg"); + // Securely construct config file path to prevent path traversal + std::string config_path; + try { + config_path = internal::safe_join_path(home, ".databrickscfg"); + } catch (const std::invalid_argument& e) { + // If path validation fails, skip file loading and continue + internal::get_logger()->warn("Invalid HOME environment variable: {}. Skipping profile file.", e.what()); + config_path = ""; + } + + std::ifstream file; + if (!config_path.empty()) { + file.open(config_path); + } + if (file.is_open()) { std::string line; bool in_profile_section = false; diff --git a/src/core/config.cpp b/src/core/config.cpp index 229876d..6255f66 100644 --- a/src/core/config.cpp +++ b/src/core/config.cpp @@ -3,6 +3,7 @@ #include "databricks/core/config.h" #include "../internal/logger.h" +#include "../internal/path_utils.h" #include #include @@ -19,7 +20,15 @@ AuthConfig AuthConfig::from_profile(const std::string& profile) { throw std::runtime_error("HOME environment variable not set"); } - std::ifstream file(std::string(home) + "/.databrickscfg"); + // Securely construct config file path to prevent path traversal + std::string config_path; + try { + config_path = internal::safe_join_path(home, ".databrickscfg"); + } catch (const std::invalid_argument& e) { + throw std::runtime_error("Invalid HOME environment variable: " + std::string(e.what())); + } + + std::ifstream file(config_path); if (!file.is_open()) { throw std::runtime_error("Could not open ~/.databrickscfg"); } diff --git a/src/internal/logger.cpp b/src/internal/logger.cpp index ac19ff5..2bdef16 100644 --- a/src/internal/logger.cpp +++ b/src/internal/logger.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: MIT #include "logger.h" +#include "path_utils.h" + #include #include @@ -15,10 +17,23 @@ std::shared_ptr get_logger() { std::shared_ptr log; // Check for log file environment variable - const char* log_file = std::getenv("DATABRICKS_LOG_FILE"); + const char* log_file_env = std::getenv("DATABRICKS_LOG_FILE"); try { - if (log_file && std::strlen(log_file) > 0) { + if (log_file_env && std::strlen(log_file_env) > 0) { + // Validate the log file path to prevent path traversal attacks + std::string log_file; + try { + log_file = validate_env_path(log_file_env, "DATABRICKS_LOG_FILE"); + } catch (const std::invalid_argument& e) { + // If validation fails, log warning to stderr and use stderr sink + auto temp_log = spdlog::stderr_color_mt("databricks_temp"); + temp_log->warn("Invalid DATABRICKS_LOG_FILE path: {}. Using stderr instead.", e.what()); + spdlog::drop("databricks_temp"); + log = spdlog::stderr_color_mt("databricks"); + return log; + } + // Log to file log = spdlog::basic_logger_mt("databricks", log_file); } else { diff --git a/src/internal/path_utils.cpp b/src/internal/path_utils.cpp new file mode 100644 index 0000000..ae5b149 --- /dev/null +++ b/src/internal/path_utils.cpp @@ -0,0 +1,138 @@ +// Copyright (c) 2026 Calvin Min +// SPDX-License-Identifier: MIT +#include "path_utils.h" + +#include +#include +#include + +namespace databricks { +namespace internal { + +std::string validate_path(const std::string& path, bool allow_absolute) { + // Check for empty path + if (path.empty()) { + throw std::invalid_argument("Path cannot be empty"); + } + + // Check for null bytes (potential injection) + if (path.find('\0') != std::string::npos) { + throw std::invalid_argument("Path contains null byte"); + } + + // Check for excessively long paths (potential DoS) + const size_t MAX_PATH_LENGTH = 4096; + if (path.length() > MAX_PATH_LENGTH) { + throw std::invalid_argument("Path exceeds maximum length of " + std::to_string(MAX_PATH_LENGTH)); + } + + // Check for path traversal patterns + // Look for ".." as a complete path component (not part of a filename) + size_t pos = 0; + while ((pos = path.find("..", pos)) != std::string::npos) { + // Check if ".." is a standalone component + bool is_start = (pos == 0); + bool is_end = (pos + 2 >= path.length()); + bool before_separator = (pos > 0 && path[pos - 1] == '/'); + bool after_separator = (pos + 2 < path.length() && path[pos + 2] == '/'); + + if ((is_start || before_separator) && (is_end || after_separator)) { + throw std::invalid_argument("Path contains path traversal sequence (..)"); + } + pos++; + } + + // Check for absolute paths if not allowed + if (!allow_absolute && !path.empty() && path[0] == '/') { + throw std::invalid_argument("Absolute paths are not allowed"); + } + + // Additional security checks for suspicious patterns + if (path.find("/../") != std::string::npos || path.find("/./") != std::string::npos) { + throw std::invalid_argument("Path contains suspicious pattern"); + } + + // Check for paths starting with ".." + if (path.find("..") == 0 && (path.length() == 2 || path[2] == '/')) { + throw std::invalid_argument("Path starts with path traversal sequence"); + } + + // Check for paths ending with "/.." + if (path.length() >= 3 && path.substr(path.length() - 3) == "/..") { + throw std::invalid_argument("Path ends with path traversal sequence"); + } + + return path; +} + +std::string safe_join_path(const std::string& base_dir, const std::string& filename) { + // Validate base directory + if (base_dir.empty()) { + throw std::invalid_argument("Base directory cannot be empty"); + } + + validate_path(base_dir, true); + + // Validate filename + if (filename.empty()) { + throw std::invalid_argument("Filename cannot be empty"); + } + + // Filename should not contain directory separators or path traversal + if (filename.find('/') != std::string::npos) { + throw std::invalid_argument("Filename cannot contain directory separators"); + } + + if (filename.find("..") != std::string::npos) { + throw std::invalid_argument("Filename cannot contain path traversal sequence"); + } + + // Check for null bytes + if (filename.find('\0') != std::string::npos) { + throw std::invalid_argument("Filename contains null byte"); + } + + // Construct the path + std::string result = base_dir; + if (!result.empty() && result.back() != '/') { + result += '/'; + } + result += filename; + + // Final validation of the complete path + validate_path(result, true); + + return result; +} + +std::string validate_env_path(const std::string& path, const std::string& env_var_name) { + // Additional validation for environment variable paths + if (path.empty()) { + throw std::invalid_argument("Environment variable " + env_var_name + " is empty"); + } + + // Check for control characters (potential injection) + for (char c : path) { + if (std::iscntrl(static_cast(c)) && c != '\0') { + throw std::invalid_argument("Path from " + env_var_name + " contains control characters"); + } + } + + // Validate the path using standard validation + validate_path(path, true); + + // Additional check: ensure path doesn't start with suspicious patterns + const std::string dangerous_prefixes[] = {"/etc/", "/sys/", "/proc/", "/dev/"}; + + for (const auto& prefix : dangerous_prefixes) { + if (path.find(prefix) == 0) { + throw std::invalid_argument("Path from " + env_var_name + + " points to restricted system directory: " + prefix); + } + } + + return path; +} + +} // namespace internal +} // namespace databricks diff --git a/src/internal/path_utils.h b/src/internal/path_utils.h new file mode 100644 index 0000000..a596429 --- /dev/null +++ b/src/internal/path_utils.h @@ -0,0 +1,67 @@ +// Copyright (c) 2026 Calvin Min +// SPDX-License-Identifier: MIT +#pragma once + +#include + +namespace databricks { +namespace internal { + +/** + * @brief Validate and sanitize a file path to prevent path traversal attacks + * + * This function validates that a path: + * - Does not contain path traversal sequences like "../" or ".." + * - Does not contain null bytes + * - Does not start with unexpected absolute paths + * - Is within reasonable length limits + * + * @param path The file path to validate + * @param allow_absolute If true, allows absolute paths; if false, only relative paths + * @return std::string The validated path (same as input if valid) + * @throws std::invalid_argument if the path contains suspicious patterns + * + * @example + * std::string safe = validate_path("/home/user/.databrickscfg", true); + * // Throws if path contains ".." or other suspicious patterns + */ +std::string validate_path(const std::string& path, bool allow_absolute = true); + +/** + * @brief Safely construct a path by joining base directory with filename + * + * Validates both components and ensures the result does not escape the base directory. + * This is the recommended way to construct file paths from user input. + * + * @param base_dir The base directory (e.g., from HOME environment variable) + * @param filename The filename to append (e.g., ".databrickscfg") + * @return std::string The safely constructed path + * @throws std::invalid_argument if either component is invalid or result escapes base + * + * @example + * const char* home = std::getenv("HOME"); + * std::string config_path = safe_join_path(home, ".databrickscfg"); + */ +std::string safe_join_path(const std::string& base_dir, const std::string& filename); + +/** + * @brief Validate a path from an environment variable + * + * Performs additional validation specific to paths from environment variables, + * including checks for suspicious patterns and reasonable path lengths. + * + * @param path The path from the environment variable + * @param env_var_name Name of the environment variable (for error messages) + * @return std::string The validated path + * @throws std::invalid_argument if the path is invalid or suspicious + * + * @example + * const char* log_file = std::getenv("DATABRICKS_LOG_FILE"); + * if (log_file) { + * std::string safe_log = validate_env_path(log_file, "DATABRICKS_LOG_FILE"); + * } + */ +std::string validate_env_path(const std::string& path, const std::string& env_var_name); + +} // namespace internal +} // namespace databricks