Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
3b292a7
refactor(zip): remove unsafe transmute by sequential reader design\n\…
danalec Nov 24, 2025
acfc02c
refactor(env): replace unsafe env mutations with safe set_var\n\nUse …
danalec Nov 24, 2025
61971b7
fix(errors): remove unwrap/expect and harden ffmpeg handling\n\nUse c…
danalec Nov 24, 2025
eea76d1
fix(errors): remove unwraps in mbox adapter runtime paths\n\nHandle m…
danalec Nov 24, 2025
eea6bdb
fix(errors): harden external process handling and diagnostics\n\nCapt…
danalec Nov 24, 2025
ca52442
fix(errors): remove expect on mtime in cache key\n\nUse anyhow::Conte…
danalec Nov 24, 2025
b60754a
deps: align async_zip to 0.0.17\n\nPrefer newer version to eliminate …
danalec Nov 24, 2025
beab1f9
perf(preproc): use BufReader for file input\n\nReduce syscalls and im…
danalec Nov 24, 2025
8272830
deps: update Cargo.lock after dependency alignment
danalec Nov 24, 2025
95b917c
feat(config): adapter extensions configurable via RgaConfig\n\nAdd --…
danalec Nov 24, 2025
905f57d
perf(preproc): cache env config in rga-preproc\n\nUse OnceCell to cac…
danalec Nov 24, 2025
8f51b65
deps: remove duplicate tokio-util entry; apply adapter extension over…
danalec Nov 24, 2025
5677b35
docs/tests: document adapter extension overrides and add unit tests\n…
danalec Nov 24, 2025
2665c4a
deps: bump to stable minor ranges for core crates\n\nSet caret-compat…
danalec Nov 24, 2025
fc59c7a
deps: remove direct clap v4 to avoid duplicate clap versions\n\nStruc…
danalec Nov 24, 2025
3d02d62
deps: restore tokio-stream and revert async_zip to 0.0.12 for compati…
danalec Nov 24, 2025
3d8ff39
fix(build): use StreamReader for in-memory ZIP entry buffers and move…
danalec Nov 24, 2025
309b740
fix(zip): resolve borrow conflict by precomputing file metadata befor…
danalec Nov 24, 2025
f8bc550
fix(build): import anyhow::Context and remove unused SyncIoBridge import
danalec Nov 24, 2025
fd304ca
fix(build): avoid unsafe env mutations; set debug via env_logger buil…
danalec Nov 24, 2025
e3420b6
fix(tests): normalize CRLF to LF in custom and postproc adapters\n\nW…
danalec Nov 24, 2025
46454e3
deps/cli: migrate StructOpt to clap v4 derive\n\nReplace StructOpt wi…
danalec Nov 24, 2025
a8048e0
cli: fix clap v4 defaults and CommandFactory imports\n\nUse default_v…
danalec Nov 24, 2025
6ed7907
cli: update clap v4 attributes (override_usage, value_delimiter) and …
danalec Nov 24, 2025
add9ac9
deps: align async-compression to 0.3.15 to match async_zip chain and …
danalec Nov 24, 2025
b607e34
deps: bump schemars to 0.9, tree_magic_mini to 3.1.6; enable async-co…
danalec Nov 24, 2025
6e1ca76
deps: bump rusqlite to 0.31 to align hashbrown chain
danalec Nov 24, 2025
252d346
deps: Phase 3 complete and synced\n\nBuild, tests, and clippy passing…
danalec Nov 24, 2025
df936fd
deps(lock): sync Cargo.lock after Phase 3 Dependency Alignment
danalec Nov 24, 2025
4a48d9d
feat(fzf): add --rg-params, --rg-preview-params, and --fzf-params\n\n…
danalec Nov 24, 2025
256ee5e
feat(postproc): make binary marker and page-break prefix configurable…
danalec Nov 24, 2025
c6ebb2a
fix(postproc): resolve lifetime capture by switching to generic input…
danalec Nov 24, 2025
c9011e5
fix(postproc): capture anonymous lifetime in use<'_, T> for opaque type
danalec Nov 24, 2025
d34ef6e
fix(postproc): own page prefix string to satisfy lifetime; update tes…
danalec Nov 24, 2025
18830c9
fix(postproc): return Pin<Box<dyn AsyncRead + Send>> to avoid impl Tr…
danalec Nov 24, 2025
9a07749
fix(postproc): add 'static bound to input to satisfy StreamReader lif…
danalec Nov 24, 2025
5648c56
chore: remove unused anyhow::Context import in rga-fzf-open to fix wa…
danalec Nov 24, 2025
4980e54
fix(fzf): use anyhow::Context in rga-fzf-open and add context to fzf …
danalec Nov 24, 2025
0789897
test(postproc): update postproc_encoding call to include binary marke…
danalec Nov 24, 2025
baaaf12
docs: document adapter extension overrides in README and default conf…
danalec Nov 24, 2025
def6afd
fix(ffmpeg): avoid unwrap in subtitle time regex; propagate error
danalec Nov 24, 2025
69bb53e
fix(mbox): initialize FROM_REGEX without unwrap; propagate errors
danalec Nov 24, 2025
23505b8
initial infer fork
danalec Nov 24, 2025
d21ebf6
merge upstream master
danalec Mar 16, 2026
41062ca
feat(cache): add config hash and file mtime to cache key
danalec Mar 16, 2026
c265f21
refactor(adapters): pass active adapters through recursion to avoid r…
danalec Mar 16, 2026
1b4dfc5
feat(cli): add rga-doctor and cache management tools
danalec Mar 16, 2026
edb7f4a
feat(adapters): add tesseract OCR adapter
danalec Mar 16, 2026
683575c
feat(adapters): add unified streaming for ffmpeg
danalec Mar 16, 2026
8f292b6
feat: implement persistent daemon, password support, and enhanced fzf…
danalec Mar 17, 2026
fafab86
fix: cleanup clap attributes and ensure daemon module is registered
danalec Mar 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,141 changes: 446 additions & 695 deletions Cargo.lock

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@ perf-literal = ["regex/perf-literal"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = {version = "1.0.71", features = ["backtrace"]}
async-compression = { version = "0.4.0", features = ["all", "all-algorithms", "tokio"] }
anyhow = {version = "1.0", features = ["backtrace"]}
async-compression = { version = "0.3.15", features = ["tokio", "gzip", "bzip2", "xz", "zstd"] }
async-stream = "0.3.5"
async-trait = "0.1.68"
async_zip = {version = "0.0.12", features = ["full"]}
bincode = "1.3.3"
bytes = "1.4.0"
clap = {version = "4.3.0", features = ["wrap_help"]}
crossbeam = "0.8.2"
crossbeam-channel = "0.5.8"
derive_more = "0.99.17"
Expand All @@ -35,32 +34,33 @@ dyn-clonable = "0.9.0"
dyn-clone = "1.0.11"
encoding_rs = "0.8.32"
encoding_rs_io = "0.1.7"
env_logger = "0.10.0"
env_logger = "0.10"
glob = "0.3.1"
json_comments = "0.2.1"
lazy_static = "1.4.0"
log = "0.4.17"
log = "0.4"
mailparse = "0.14.0"
memchr = "2.5.0"
mime2ext = "0.1.52"
open = "5"
paste = "1.0.12"
path-clean = "1.0.1"
pretty-bytes = "0.2.2"
regex = "1.8.2"
rusqlite = {version = "0.30.0", features = ["vtab", "bundled"]}
schemars = {version = "0.8.12", features = ["preserve_order"]}
regex = "1"
rusqlite = {version = "0.37", features = ["vtab", "bundled"]}
schemars = {version = "0.9", features = ["preserve_order"]}
serde = {version = "1.0.163", features = ["derive"]}
serde_json = "1.0.96"
serde_json = "1.0"
size_format = "1.0.2"
structopt = "0.3.26"
tempfile = "3.5.0"
tokio = {version = "1.28.1", features = ["full"]}
tokio-rusqlite = "0.5.0"
tokio-stream = {version = "0.1.14", features = ["io-util", "tokio-util"]}
clap = {version = "4", features = ["derive"]}
tempfile = "3"
tokio = {version = "1", features = ["full"]}
tokio-rusqlite = "0.7"
tokio-stream = {version = "0.1", features = ["io-util", "tokio-util"]}
astral-tokio-tar = "0.5.6"
tokio-util = {version = "0.7.8", features = ["io", "full"]}
tree_magic = {package = "tree_magic_mini", version = "3.0.3"}
tokio-util = {version = "0.7.17", features = ["io", "io-util"]}
infer = "0.19"
once_cell = "1.19.0"

[dev-dependencies]
async-recursion = "1.0.4"
Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ demo/

See [the wiki](https://github.com/phiresky/ripgrep-all/wiki/fzf-Integration) for instructions of integrating rga with fzf.

### rga-fzf flags
- `--rg-params=<...>`: extra parameters passed to the list command (`rga --files-with-matches`).
- `--rg-preview-params=<...>`: extra parameters passed to the preview command (`rga --pretty --context 5`).
- `--fzf-params=<...>`: extra parameters appended to the `fzf` invocation (space-separated tokens).

Examples:
- `rga-fzf --rg-params="--hidden --glob !node_modules" --fzf-params="--reverse --prompt='rga> '"`
- `rga-fzf --rg-preview-params="--rga-accurate" "initial query"`

## INSTALLATION

Linux x64, macOS and Windows binaries are available [in GitHub Releases][latestrelease].
Expand Down Expand Up @@ -275,6 +284,19 @@ The config file location leverage the mechanisms defined by
- the [Standard Directories](https://developer.apple.com/library/content/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html#//apple_ref/doc/uid/TP40010672-CH2-SW6)
guidelines on macOS (ex: `~/Library/Application Support/ripgrep-all/config.jsonc`)

### Adapter Extension Overrides
- Configure built-in adapters to match different file extensions without changing code.
- CLI flags:
- `--rga-zip-extensions=ext1,ext2,...` replaces the default ZIP set (e.g., `zip,jar`).
- `--rga-ffmpeg-extensions=ext1,ext2,...` replaces the default FFmpeg set (e.g., `mkv,mp4,avi,mp3,ogg,flac,webm`).
- Config file keys (JSONC):
- `zip_extensions`: array of strings, e.g., `["zip"]`.
- `ffmpeg_extensions`: array of strings, e.g., `["mkv","mp4"]`.
- Notes:
- Overrides fully replace the built-in lists when provided.
- Overrides affect pre-glob filtering when `--rga-accurate` is off.
- Leave unset to use defaults.


## Development

Expand Down
15 changes: 15 additions & 0 deletions doc/config.default.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,19 @@
// See https://github.com/phiresky/ripgrep-all/wiki for more information
// to verify if your custom adapters are picked up correctly, run `rga --rga-list-adapters`
]
,
// Override built-in adapter extensions
// Example: limit ZIP adapter to only .zip entries
"zip_extensions": ["zip"],
// Example: extend FFmpeg adapter to include additional video/audio formats
"ffmpeg_extensions": ["mkv", "mp4", "avi", "mp3", "ogg", "flac", "webm"]
// These overrides fully replace built-in lists; omit to use defaults.

// rga-fzf CLI flags (documented here for convenience; not part of JSON config):
// --rg-params=<...> Extra parameters passed to rga list command
// --rg-preview-params=<...> Extra parameters passed to rga preview command
// --fzf-params=<...> Extra parameters appended to the fzf invocation
// Examples:
// rga-fzf --rg-params="--hidden --glob !node_modules" --fzf-params="--reverse"
// rga-fzf --rg-preview-params="--rga-accurate" "initial query"
}
80 changes: 80 additions & 0 deletions src/adapters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ pub struct AdaptInfo {
pub filepath_hint: PathBuf,
/// true if filepath_hint is an actual file on the file system
pub is_real_file: bool,
/// mtime of the file in unix milliseconds. Only set if is_real_file is true and metadata was already fetched.
pub file_mtime_unix_ms: Option<i64>,
/// depth at which this file is in archives. 0 for real filesystem
pub archive_recursion_depth: i32,
/// stream to read the file from. can be from a file or from some decoder
Expand All @@ -109,6 +111,24 @@ pub struct AdaptInfo {
/// (enabledAdapters, disabledAdapters)
type AdaptersTuple = (Vec<Arc<dyn FileAdapter>>, Vec<Arc<dyn FileAdapter>>);

struct AdapterOverride {
inner: Arc<dyn FileAdapter>,
meta: AdapterMeta,
}
impl GetMetadata for AdapterOverride {
fn metadata(&self) -> &AdapterMeta { &self.meta }
}
#[async_trait]
impl FileAdapter for AdapterOverride {
async fn adapt(
&self,
a: AdaptInfo,
detection_reason: &FileMatcher,
) -> Result<AdaptedFilesIterBox> {
self.inner.adapt(a, detection_reason).await
}
}

pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> AdaptersTuple {
// order in descending priority
let mut adapters: Vec<Arc<dyn FileAdapter>> = vec![];
Expand Down Expand Up @@ -150,6 +170,7 @@ pub fn get_all_adapters(custom_adapters: Option<Vec<CustomAdapterConfig>>) -> Ad
pub fn get_adapters_filtered<T: AsRef<str>>(
custom_adapters: Option<Vec<CustomAdapterConfig>>,
adapter_names: &[T],
config: &RgaConfig,
) -> Result<Vec<Arc<dyn FileAdapter>>> {
let (def_enabled_adapters, def_disabled_adapters) = get_all_adapters(custom_adapters);
let adapters = if !adapter_names.is_empty() {
Expand Down Expand Up @@ -204,6 +225,38 @@ pub fn get_adapters_filtered<T: AsRef<str>>(
} else {
def_enabled_adapters
};
// apply extension overrides
let adapters = adapters
.into_iter()
.map(|a| {
let name = a.metadata().name.clone();
let override_exts = match name.as_str() {
"zip" => config.zip_extensions.as_ref(),
"ffmpeg" => config.ffmpeg_extensions.as_ref(),
_ => None,
};
if let Some(exts) = override_exts {
let fast_matchers = exts
.iter()
.map(|s| FastFileMatcher::FileExtension(s.to_string()))
.collect::<Vec<_>>();
let m = a.metadata();
let meta = AdapterMeta {
name: m.name.clone(),
version: m.version,
description: m.description.clone(),
recurses: m.recurses,
fast_matchers,
slow_matchers: m.slow_matchers.clone(),
keep_fast_matchers_if_accurate: m.keep_fast_matchers_if_accurate,
disabled_by_default: m.disabled_by_default,
};
Arc::new(AdapterOverride { inner: a, meta }) as Arc<dyn FileAdapter>
} else {
a
}
})
.collect::<Vec<_>>();
debug!(
"Chosen available adapters: {}",
adapters
Expand All @@ -214,3 +267,30 @@ pub fn get_adapters_filtered<T: AsRef<str>>(
);
Ok(adapters)
}

#[cfg(test)]
mod tests {
use super::*;
use crate::config::RgaConfig;
#[test]
fn zip_extensions_override_applied() {
let mut cfg = RgaConfig::default();
cfg.zip_extensions = Some(vec!["zzz".to_string()]);
let adapters = get_adapters_filtered(None, &Vec::<String>::new(), &cfg).unwrap();
let zip = adapters.into_iter().find(|a| a.metadata().name == "zip").unwrap();
let fm = &zip.metadata().fast_matchers;
assert!(fm.len() == 1);
match &fm[0] { FastFileMatcher::FileExtension(s) => assert_eq!(s, "zzz") };
}
#[test]
fn ffmpeg_extensions_override_applied() {
let mut cfg = RgaConfig::default();
cfg.ffmpeg_extensions = Some(vec!["abc".to_string(), "DEF".to_string()]);
let adapters = get_adapters_filtered(None, &Vec::<String>::new(), &cfg).unwrap();
let ff = adapters.into_iter().find(|a| a.metadata().name == "ffmpeg").unwrap();
let fm = &ff.metadata().fast_matchers;
assert!(fm.len() == 2);
match &fm[0] { FastFileMatcher::FileExtension(s) => assert_eq!(s, "abc") };
match &fm[1] { FastFileMatcher::FileExtension(s) => assert_eq!(s, "DEF") };
}
}
55 changes: 46 additions & 9 deletions src/adapters/custom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,22 @@ lazy_static! {
mimetypes: Some(strs(&["application/pdf"])),

binary: "pdftotext".to_string(),
args: strs(&["-", "-"]),
args: strs(&["-opw", "$password", "-", "-"]),
disabled_by_default: None,
match_only_by_mime: None,
output_path_hint: Some("${input_virtual_path}.txt.asciipagebreaks".into())
},
CustomAdapterConfig {
name: "tesseract".to_owned(),
version: 1,
description: "Uses tesseract to extract text from images".to_owned(),
extensions: strs(&["jpg", "jpeg", "png", "webp", "tiff", "bmp", "gif"]),
mimetypes: Some(strs(&["image/jpeg", "image/png", "image/webp", "image/tiff", "image/bmp", "image/gif"])),
binary: "tesseract".to_string(),
args: strs(&["stdin", "stdout"]),
disabled_by_default: Some(true),
match_only_by_mime: None,
output_path_hint: None
}
];
}
Expand All @@ -166,7 +178,13 @@ fn proc_wait(mut child: Child, context: impl FnOnce() -> String) -> impl AsyncRe
if res.success() {
yield std::io::Result::Ok(Bytes::new());
} else {
Err(format_err!("{:?}", res)).with_context(context).map_err(to_io_err)?;
let mut stderr_text = String::new();
if let Some(mut stderr) = child.stderr.take() {
use tokio::io::AsyncReadExt as _;
let _ = stderr.read_to_string(&mut stderr_text).await;
}
let err = if stderr_text.is_empty() { format!("{:?}", res) } else { format!("{:?}\n{}", res, stderr_text) };
Err(format_err!("{}", err)).with_context(context).map_err(to_io_err)?;
}
};
StreamReader::new(s)
Expand All @@ -183,17 +201,32 @@ pub fn pipe_output(
let mut cmd = cmd
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| map_exe_error(e, exe_name, help))?;
let mut stdi = cmd.stdin.take().expect("is piped");
let stdo = cmd.stdout.take().expect("is piped");
let mut stdi = cmd.stdin.take().context("stdin not piped")?;
let stdo = cmd.stdout.take().context("stdout not piped")?;
let crlf = regex::bytes::Regex::new("\r\n").unwrap();
let stdo_stream = tokio_util::io::ReaderStream::new(stdo);
let normalized_stream = async_stream::stream! {
for await chunk in stdo_stream {
match chunk {
Err(e) => yield Err(e),
Ok(chunk) => {
let replaced = crlf.replace_all(&chunk, &b"\n"[..]);
yield Ok(bytes::Bytes::copy_from_slice(&replaced));
}
}
}
};
let stdo_norm = StreamReader::new(normalized_stream);

let join = tokio::spawn(async move {
let mut z = inp;
tokio::io::copy(&mut z, &mut stdi).await?;
std::io::Result::Ok(())
});
Ok(Box::pin(stdo.chain(
Ok(Box::pin(stdo_norm.chain(
proc_wait(cmd, move || format!("subprocess: {cmd_log}")).chain(join_handle_to_stream(join)),
)))
}
Expand All @@ -209,7 +242,7 @@ impl GetMetadata for CustomSpawningFileAdapter {
&self.meta
}
}
fn arg_replacer(arg: &str, filepath_hint: &Path) -> Result<String> {
fn arg_replacer(arg: &str, filepath_hint: &Path, config: &RgaConfig) -> Result<String> {
expand_str_ez(arg, |s| match s {
"input_virtual_path" => Ok(filepath_hint.to_string_lossy()),
"input_file_stem" => Ok(filepath_hint
Expand All @@ -220,19 +253,21 @@ fn arg_replacer(arg: &str, filepath_hint: &Path) -> Result<String> {
.extension()
.unwrap_or_default()
.to_string_lossy()),
"password" => Ok(config.password.clone().unwrap_or_default().into()),
e => Err(anyhow::format_err!("unknown replacer ${{{e}}}")),
})
}
impl CustomSpawningFileAdapter {
fn command(
&self,
filepath_hint: &std::path::Path,
config: &RgaConfig,
mut command: tokio::process::Command,
) -> Result<tokio::process::Command> {
command.args(
self.args
.iter()
.map(|arg| arg_replacer(arg, filepath_hint))
.map(|arg| arg_replacer(arg, filepath_hint, config))
.collect::<Result<Vec<_>>>()?,
);
log::debug!("running command {:?}", command);
Expand All @@ -258,7 +293,7 @@ impl FileAdapter for CustomSpawningFileAdapter {

let cmd = Command::new(&self.binary);
let cmd = self
.command(&filepath_hint, cmd)
.command(&filepath_hint, &config, cmd)
.with_context(|| format!("Could not set cmd arguments for {}", self.binary))?;
debug!("executing {:?}", cmd);
let output = pipe_output(&line_prefix, cmd, inp, &self.binary, "")?;
Expand All @@ -268,10 +303,12 @@ impl FileAdapter for CustomSpawningFileAdapter {
.as_deref()
.unwrap_or("${input_virtual_path}.txt"),
&filepath_hint,
&config,
)?),
inp: output,
line_prefix,
is_real_file: false,
file_mtime_unix_ms: None,
archive_recursion_depth: archive_recursion_depth + 1,
postprocess,
config,
Expand Down Expand Up @@ -330,7 +367,7 @@ mod test {

let (a, d) = simple_adapt_info(&filepath, Box::pin(File::open(&filepath).await?));
// let r = adapter.adapt(a, &d)?;
let r = loop_adapt(&adapter, d, a).await?;
let r = loop_adapt(&adapter, d, a, crate::adapters::get_all_adapters(None).0).await?;
let o = adapted_to_vec(r).await?;
assert_eq!(
String::from_utf8(o)?,
Expand Down
3 changes: 2 additions & 1 deletion src/adapters/decompress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ impl FileAdapter for DecompressAdapter {
Ok(one_file(AdaptInfo {
filepath_hint: get_inner_filename(&ai.filepath_hint),
is_real_file: false,
file_mtime_unix_ms: None,
archive_recursion_depth: ai.archive_recursion_depth + 1,
inp: decompress_any(detection_reason, ai.inp)?,
line_prefix: ai.line_prefix,
Expand Down Expand Up @@ -155,7 +156,7 @@ mod tests {
let filepath = test_data_dir().join("short.pdf.gz");

let (a, d) = simple_adapt_info(&filepath, Box::pin(File::open(&filepath).await?));
let r = loop_adapt(&adapter, d, a).await?;
let r = loop_adapt(&adapter, d, a, crate::adapters::get_all_adapters(None).0).await?;
let o = adapted_to_vec(r).await?;
assert_eq!(
String::from_utf8(o)?,
Expand Down
Loading