Skip to content

Commit 205f96c

Browse files
authored
Merge branch 'main' into file-stats-cache
2 parents 4542db8 + 48d20ad commit 205f96c

File tree

142 files changed

+11122
-5353
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

142 files changed

+11122
-5353
lines changed

Cargo.lock

Lines changed: 19 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ci/scripts/check_examples_docs.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,16 @@ ROOT_DIR="$(git rev-parse --show-toplevel)"
3636
EXAMPLES_DIR="$ROOT_DIR/datafusion-examples"
3737
README="$EXAMPLES_DIR/README.md"
3838
README_NEW="$EXAMPLES_DIR/README-NEW.md"
39+
PRETTIER_VERSION="2.7.1"
3940

4041
echo "▶ Generating examples README (Rust generator)…"
4142
cargo run --quiet \
4243
--manifest-path "$EXAMPLES_DIR/Cargo.toml" \
4344
--bin examples-docs \
4445
> "$README_NEW"
4546

46-
echo "▶ Formatting generated README with Prettier"
47-
npx prettier@2.7.1 \
47+
echo "▶ Formatting generated README with prettier ${PRETTIER_VERSION}"
48+
npx "prettier@${PRETTIER_VERSION}" \
4849
--parser markdown \
4950
--write "$README_NEW"
5051

@@ -60,7 +61,7 @@ if ! diff -u "$README" "$README_NEW" > /tmp/examples-readme.diff; then
6061
echo "To update the README locally, run:"
6162
echo ""
6263
echo " cargo run --bin examples-docs \\"
63-
echo " | npx prettier@2.7.1 --parser markdown --write \\"
64+
echo " | npx prettier@${PRETTIER_VERSION} --parser markdown --write \\"
6465
echo " > datafusion-examples/README.md"
6566
echo ""
6667
echo "Diff:"

datafusion-cli/src/exec.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ impl StatementExecutor {
300300
let curr_num_rows = batch.num_rows();
301301
// Stop collecting results if the number of rows exceeds the limit
302302
// results batch should include the last batch that exceeds the limit
303-
if row_count < max_rows + curr_num_rows {
303+
if row_count < max_rows.saturating_add(curr_num_rows) {
304304
// Try to grow the reservation to accommodate the batch in memory
305305
reservation.try_grow(get_record_batch_memory_size(&batch))?;
306306
results.push(batch);

datafusion-examples/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ arrow = { workspace = true }
4040
arrow-schema = { workspace = true }
4141
datafusion = { workspace = true, default-features = true, features = ["parquet_encryption"] }
4242
datafusion-common = { workspace = true }
43+
nom = "8.0.0"
4344
tempfile = { workspace = true }
4445
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot", "fs"] }
4546

datafusion-examples/src/bin/examples-docs.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@
3434
//! cargo run --bin examples-docs -- dataframe
3535
//! ```
3636
37-
use datafusion_examples::utils::examples_docs::{RepoLayout, generate_examples_readme};
37+
use datafusion_examples::utils::example_metadata::{
38+
RepoLayout, generate_examples_readme,
39+
};
3840

3941
fn main() -> Result<(), Box<dyn std::error::Error>> {
4042
let layout = RepoLayout::detect()?;
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Utilities for discovering example groups in the repository filesystem.
19+
//!
20+
//! An example group is defined as a directory containing a `main.rs` file
21+
//! under the examples root. This module is intentionally filesystem-focused
22+
//! and does not perform any parsing or rendering.
23+
24+
use std::fs;
25+
use std::path::{Path, PathBuf};
26+
27+
use datafusion::error::Result;
28+
29+
/// Discovers all example group directories under the given root.
30+
///
31+
/// A directory is considered an example group if it contains a `main.rs` file.
32+
pub fn discover_example_groups(root: &Path) -> Result<Vec<PathBuf>> {
33+
let mut groups = Vec::new();
34+
for entry in fs::read_dir(root)? {
35+
let entry = entry?;
36+
let path = entry.path();
37+
38+
if path.is_dir() && path.join("main.rs").exists() {
39+
groups.push(path);
40+
}
41+
}
42+
groups.sort();
43+
Ok(groups)
44+
}
45+
46+
#[cfg(test)]
47+
mod tests {
48+
use super::*;
49+
50+
use std::fs::{self, File};
51+
52+
use tempfile::TempDir;
53+
54+
#[test]
55+
fn discover_example_groups_finds_dirs_with_main_rs() -> Result<()> {
56+
let tmp = TempDir::new()?;
57+
let root = tmp.path();
58+
59+
// valid example group
60+
let group1 = root.join("group1");
61+
fs::create_dir(&group1)?;
62+
File::create(group1.join("main.rs"))?;
63+
64+
// not an example group
65+
let group2 = root.join("group2");
66+
fs::create_dir(&group2)?;
67+
68+
let groups = discover_example_groups(root)?;
69+
70+
assert_eq!(groups.len(), 1);
71+
assert_eq!(groups[0], group1);
72+
73+
Ok(())
74+
}
75+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Repository layout utilities.
19+
//!
20+
//! This module provides a small helper (`RepoLayout`) that encapsulates
21+
//! knowledge about the DataFusion repository structure, in particular
22+
//! where example groups are located relative to the repository root.
23+
24+
use std::path::{Path, PathBuf};
25+
26+
use datafusion::error::{DataFusionError, Result};
27+
28+
/// Describes the layout of a DataFusion repository.
29+
///
30+
/// This type centralizes knowledge about where example-related
31+
/// directories live relative to the repository root.
32+
#[derive(Debug, Clone)]
33+
pub struct RepoLayout {
34+
root: PathBuf,
35+
}
36+
37+
impl From<&Path> for RepoLayout {
38+
fn from(path: &Path) -> Self {
39+
Self {
40+
root: path.to_path_buf(),
41+
}
42+
}
43+
}
44+
45+
impl RepoLayout {
46+
/// Creates a layout from an explicit repository root.
47+
pub fn from_root(root: PathBuf) -> Self {
48+
Self { root }
49+
}
50+
51+
/// Detects the repository root based on `CARGO_MANIFEST_DIR`.
52+
///
53+
/// This is intended for use from binaries inside the workspace.
54+
pub fn detect() -> Result<Self> {
55+
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
56+
57+
let root = manifest_dir.parent().ok_or_else(|| {
58+
DataFusionError::Execution(
59+
"CARGO_MANIFEST_DIR does not have a parent".to_string(),
60+
)
61+
})?;
62+
63+
Ok(Self {
64+
root: root.to_path_buf(),
65+
})
66+
}
67+
68+
/// Returns the repository root directory.
69+
pub fn root(&self) -> &Path {
70+
&self.root
71+
}
72+
73+
/// Returns the `datafusion-examples/examples` directory.
74+
pub fn examples_root(&self) -> PathBuf {
75+
self.root.join("datafusion-examples").join("examples")
76+
}
77+
78+
/// Returns the directory for a single example group.
79+
///
80+
/// Example: `examples/udf`
81+
pub fn example_group_dir(&self, group: &str) -> PathBuf {
82+
self.examples_root().join(group)
83+
}
84+
}
85+
86+
#[cfg(test)]
87+
mod tests {
88+
use super::*;
89+
90+
#[test]
91+
fn detect_sets_non_empty_root() -> Result<()> {
92+
let layout = RepoLayout::detect()?;
93+
assert!(!layout.root().as_os_str().is_empty());
94+
Ok(())
95+
}
96+
97+
#[test]
98+
fn examples_root_is_under_repo_root() -> Result<()> {
99+
let layout = RepoLayout::detect()?;
100+
let examples_root = layout.examples_root();
101+
assert!(examples_root.starts_with(layout.root()));
102+
assert!(examples_root.ends_with("datafusion-examples/examples"));
103+
Ok(())
104+
}
105+
106+
#[test]
107+
fn example_group_dir_appends_group_name() -> Result<()> {
108+
let layout = RepoLayout::detect()?;
109+
let group_dir = layout.example_group_dir("foo");
110+
assert!(group_dir.ends_with("datafusion-examples/examples/foo"));
111+
Ok(())
112+
}
113+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Documentation generator for DataFusion examples.
19+
//!
20+
//! # Design goals
21+
//!
22+
//! - Keep README.md in sync with runnable examples
23+
//! - Fail fast on malformed documentation
24+
//!
25+
//! # Overview
26+
//!
27+
//! Each example group corresponds to a directory under
28+
//! `datafusion-examples/examples/<group>` containing a `main.rs` file.
29+
//! Documentation is extracted from structured `//!` comments in that file.
30+
//!
31+
//! For each example group, the generator produces:
32+
//!
33+
//! ```text
34+
//! ## <Group Name> Examples
35+
//! ### Group: `<group>`
36+
//! #### Category: Single Process | Distributed
37+
//!
38+
//! | Subcommand | File Path | Description |
39+
//! ```
40+
//!
41+
//! # Usage
42+
//!
43+
//! Generate documentation for a single group only:
44+
//!
45+
//! ```bash
46+
//! cargo run --bin examples-docs -- dataframe
47+
//! ```
48+
//!
49+
//! Generate documentation for all examples:
50+
//!
51+
//! ```bash
52+
//! cargo run --bin examples-docs
53+
//! ```
54+
55+
pub mod discover;
56+
pub mod layout;
57+
pub mod model;
58+
pub mod parser;
59+
pub mod render;
60+
61+
#[cfg(test)]
62+
pub mod test_utils;
63+
64+
pub use layout::RepoLayout;
65+
pub use model::{Category, ExampleEntry, ExampleGroup, GroupName};
66+
pub use parser::parse_main_rs_docs;
67+
pub use render::generate_examples_readme;

0 commit comments

Comments
 (0)