From f0e0401cd9fbfe58f414a74ad3a0c94b0bf51c21 Mon Sep 17 00:00:00 2001 From: Shohei Fujii Date: Sat, 29 Nov 2025 08:27:16 +0900 Subject: [PATCH 1/6] wip --- src/bin/patto-preview.rs | 97 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 4 deletions(-) diff --git a/src/bin/patto-preview.rs b/src/bin/patto-preview.rs index 8901bb5..46956e2 100644 --- a/src/bin/patto-preview.rs +++ b/src/bin/patto-preview.rs @@ -16,11 +16,12 @@ use patto::{ }; use rust_embed::RustEmbed; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use std::collections::{hash_map::Entry, HashMap}; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; -use tokio::fs; -use tokio::sync::oneshot; +use std::time::Duration; +use tokio::{fs, sync::oneshot, task::JoinHandle}; +use tokio::time::{sleep, Instant}; use tower_lsp::jsonrpc::Result as LspResult; use tower_lsp::lsp_types::{ DidChangeTextDocumentParams, DidOpenTextDocumentParams, InitializeParams, InitializeResult, @@ -71,10 +72,20 @@ struct AppState { line_trackers: Arc>>, } +const CONTENT_UPDATE_DEBOUNCE_MS: u64 = 50; +const CONTENT_UPDATE_MAX_WAIT_MS: u64 = 300; + +struct PendingContentUpdate { + latest_text: String, + first_pending_at: Instant, + scheduled_flush: Option>, +} + struct PreviewLspBackend { client: Client, repository: Arc, shutdown_tx: Mutex>>, + pending_updates: Arc>>, } impl PreviewLspBackend { @@ -87,6 +98,7 @@ impl PreviewLspBackend { client, repository, shutdown_tx: Mutex::new(shutdown_tx), + pending_updates: Arc::new(Mutex::new(HashMap::new())), } } @@ -119,8 +131,85 @@ impl PreviewLspBackend { return; } - self.repository.handle_live_file_change(path, text).await; + self.queue_live_content_update(path, text).await; } + + async fn queue_live_content_update(&self, path: PathBuf, text: String) { + let pending_updates = self.pending_updates.clone(); + let repository = self.repository.clone(); + let flush_text = { + let mut pending = pending_updates.lock().unwrap(); + match pending.entry(path.clone()) { + Entry::Vacant(entry) => { + let mut pending_entry = PendingContentUpdate { + latest_text: text.clone(), + first_pending_at: Instant::now(), + scheduled_flush: None, + }; + pending_entry.scheduled_flush = Some(spawn_flush_task( + pending_updates.clone(), + repository.clone(), + path.clone(), + )); + entry.insert(pending_entry); + None + } + Entry::Occupied(mut occupied) => { + let mut flush_now = false; + { + let pending_entry = occupied.get_mut(); + pending_entry.latest_text = text; + + if pending_entry.first_pending_at.elapsed() + >= Duration::from_millis(CONTENT_UPDATE_MAX_WAIT_MS) + { + flush_now = true; + if let Some(handle) = pending_entry.scheduled_flush.take() { + handle.abort(); + } + } else { + if let Some(handle) = pending_entry.scheduled_flush.take() { + handle.abort(); + } + pending_entry.scheduled_flush = Some(spawn_flush_task( + pending_updates.clone(), + repository.clone(), + path.clone(), + )); + } + } + + if flush_now { + Some(occupied.remove().latest_text) + } else { + None + } + } + } + }; + + if let Some(text) = flush_text { + self.repository.handle_live_file_change(path, text).await; + } + } +} + +fn spawn_flush_task( + pending_updates: Arc>>, + repository: Arc, + path: PathBuf, +) -> JoinHandle<()> { + tokio::spawn(async move { + sleep(Duration::from_millis(CONTENT_UPDATE_DEBOUNCE_MS)).await; + let text = { + let mut guard = pending_updates.lock().unwrap(); + guard.remove(&path).map(|entry| entry.latest_text) + }; + + if let Some(text) = text { + repository.handle_live_file_change(path, text).await; + } + }) } #[tower_lsp::async_trait] From 2e0ef33da5009cced64ba81b528f6c876483c971 Mon Sep 17 00:00:00 2001 From: Shohei Fujii Date: Mon, 1 Dec 2025 22:11:01 +0900 Subject: [PATCH 2/6] Extend preview debounce timing and switch to generation-based flush scheduling to avoid redundant live updates --- src/bin/patto-preview.rs | 62 ++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/src/bin/patto-preview.rs b/src/bin/patto-preview.rs index 46956e2..4599ed4 100644 --- a/src/bin/patto-preview.rs +++ b/src/bin/patto-preview.rs @@ -20,7 +20,7 @@ use std::collections::{hash_map::Entry, HashMap}; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use std::time::Duration; -use tokio::{fs, sync::oneshot, task::JoinHandle}; +use tokio::{fs, sync::oneshot}; use tokio::time::{sleep, Instant}; use tower_lsp::jsonrpc::Result as LspResult; use tower_lsp::lsp_types::{ @@ -72,13 +72,13 @@ struct AppState { line_trackers: Arc>>, } -const CONTENT_UPDATE_DEBOUNCE_MS: u64 = 50; -const CONTENT_UPDATE_MAX_WAIT_MS: u64 = 300; +const CONTENT_UPDATE_DEBOUNCE_MS: u64 = 200; +const CONTENT_UPDATE_MAX_WAIT_MS: u64 = 500; struct PendingContentUpdate { latest_text: String, first_pending_at: Instant, - scheduled_flush: Option>, + generation: u64, } struct PreviewLspBackend { @@ -141,47 +141,44 @@ impl PreviewLspBackend { let mut pending = pending_updates.lock().unwrap(); match pending.entry(path.clone()) { Entry::Vacant(entry) => { - let mut pending_entry = PendingContentUpdate { + entry.insert(PendingContentUpdate { latest_text: text.clone(), first_pending_at: Instant::now(), - scheduled_flush: None, - }; - pending_entry.scheduled_flush = Some(spawn_flush_task( + generation: 0, + }); + schedule_debounce_flush( pending_updates.clone(), repository.clone(), path.clone(), - )); - entry.insert(pending_entry); + 0, + ); None } Entry::Occupied(mut occupied) => { let mut flush_now = false; - { + let scheduled_generation = { let pending_entry = occupied.get_mut(); pending_entry.latest_text = text; + pending_entry.generation = pending_entry.generation.wrapping_add(1); if pending_entry.first_pending_at.elapsed() >= Duration::from_millis(CONTENT_UPDATE_MAX_WAIT_MS) { flush_now = true; - if let Some(handle) = pending_entry.scheduled_flush.take() { - handle.abort(); - } - } else { - if let Some(handle) = pending_entry.scheduled_flush.take() { - handle.abort(); - } - pending_entry.scheduled_flush = Some(spawn_flush_task( - pending_updates.clone(), - repository.clone(), - path.clone(), - )); } - } + + pending_entry.generation + }; if flush_now { Some(occupied.remove().latest_text) } else { + schedule_debounce_flush( + pending_updates.clone(), + repository.clone(), + path.clone(), + scheduled_generation, + ); None } } @@ -194,22 +191,31 @@ impl PreviewLspBackend { } } -fn spawn_flush_task( +fn schedule_debounce_flush( pending_updates: Arc>>, repository: Arc, path: PathBuf, -) -> JoinHandle<()> { + generation: u64, +) { tokio::spawn(async move { sleep(Duration::from_millis(CONTENT_UPDATE_DEBOUNCE_MS)).await; let text = { let mut guard = pending_updates.lock().unwrap(); - guard.remove(&path).map(|entry| entry.latest_text) + if guard + .get(&path) + .map(|entry| entry.generation == generation) + .unwrap_or(false) + { + guard.remove(&path).map(|entry| entry.latest_text) + } else { + None + } }; if let Some(text) = text { repository.handle_live_file_change(path, text).await; } - }) + }); } #[tower_lsp::async_trait] From 38eb7a5e786677bf9ea13fc52244998665fb4502 Mon Sep 17 00:00:00 2001 From: Shohei Fujii Date: Tue, 2 Dec 2025 21:03:04 +0900 Subject: [PATCH 3/6] Add tree-sitter Patto grammar crate with build script, parser, tests, and metadata to support .pn syntax parsing. --- tree-sitter-patto/Cargo.lock | 215 ++++++++++++++++++++++++ tree-sitter-patto/Cargo.toml | 14 ++ tree-sitter-patto/build.rs | 12 ++ tree-sitter-patto/grammar.js | 167 ++++++++++++++++++ tree-sitter-patto/package.json | 14 ++ tree-sitter-patto/src/scanner.c | 184 ++++++++++++++++++++ tree-sitter-patto/test/corpus/basic.txt | 67 ++++++++ tree-sitter-patto/tree-sitter.json | 38 +++++ 8 files changed, 711 insertions(+) create mode 100644 tree-sitter-patto/Cargo.lock create mode 100644 tree-sitter-patto/Cargo.toml create mode 100644 tree-sitter-patto/build.rs create mode 100644 tree-sitter-patto/grammar.js create mode 100644 tree-sitter-patto/package.json create mode 100644 tree-sitter-patto/src/scanner.c create mode 100644 tree-sitter-patto/test/corpus/basic.txt create mode 100644 tree-sitter-patto/tree-sitter.json diff --git a/tree-sitter-patto/Cargo.lock b/tree-sitter-patto/Cargo.lock new file mode 100644 index 0000000..50d3f92 --- /dev/null +++ b/tree-sitter-patto/Cargo.lock @@ -0,0 +1,215 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "cc" +version = "1.2.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c481bdbf0ed3b892f6f806287d72acd515b352a4ec27a208489b8c1bc839633a" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "indexmap" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tree-sitter" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" + +[[package]] +name = "tree-sitter-patto" +version = "0.1.0" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" diff --git a/tree-sitter-patto/Cargo.toml b/tree-sitter-patto/Cargo.toml new file mode 100644 index 0000000..b63c669 --- /dev/null +++ b/tree-sitter-patto/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "tree-sitter-patto" +version = "0.1.0" +edition = "2021" +build = "build.rs" + +[lib] +path = "src/lib.rs" + +[dependencies] +tree-sitter = "0.25" + +[build-dependencies] +cc = "1.0" diff --git a/tree-sitter-patto/build.rs b/tree-sitter-patto/build.rs new file mode 100644 index 0000000..fe0e741 --- /dev/null +++ b/tree-sitter-patto/build.rs @@ -0,0 +1,12 @@ +fn main() { + println!("cargo:rerun-if-changed=src/parser.c"); + println!("cargo:rerun-if-changed=src/scanner.c"); + println!("cargo:rerun-if-changed=src/grammar.json"); + println!("cargo:rerun-if-changed=src/node-types.json"); + + cc::Build::new() + .include("src") + .file("src/parser.c") + .file("src/scanner.c") + .compile("tree-sitter-patto"); +} diff --git a/tree-sitter-patto/grammar.js b/tree-sitter-patto/grammar.js new file mode 100644 index 0000000..ff23b1c --- /dev/null +++ b/tree-sitter-patto/grammar.js @@ -0,0 +1,167 @@ +const PREC = { + raw: -1, + command: 1, +}; + +module.exports = grammar({ + name: 'patto', + + extras: $ => [], + + externals: $ => [ + $._newline, + $._indent, + $._dedent, + ], + + conflicts: $ => [ + [$.url_title, $.mail_title] + ], + + rules: { + document: $ => seq( + repeat(choice($.blank_line, $.line_with_newline)), + optional($.line) + ), + + line_with_newline: $ => seq( + $.line, + $._newline, + optional($.block_body) + ), + + line: $ => $.statement, + + blank_line: $ => $._newline, + + block_body: $ => seq( + $._indent, + repeat(choice($.blank_line, $.line_with_newline)), + optional($.line), + $._dedent + ), + + statement: $ => seq( + repeat(choice($.expr_anchor, $.expr_task)), + repeat1(choice( + $.expr_command, + $.expr_img, + $.expr_builtin_symbols, + $.expr_code_inline, + $.expr_math_inline, + $.expr_wiki_link, + $.expr_url_link, + $.expr_local_file_link, + $.expr_mail_link, + $.expr_property, + $.expr_hr, + $.raw_sentence + )), + optional($.trailing_properties) + ), + + _WHITE_SPACE_INLINE: _ => token(/[ \t]/), + + raw_sentence: _ => token(prec(PREC.raw, /[^\[\]{}\n]+/)), + + trailing_properties: $ => repeat1(seq( + repeat1($._WHITE_SPACE_INLINE), + choice($.expr_property, $.expr_anchor, $.expr_task) + )), + + expr_command: $ => prec(PREC.command, seq( + '[', '@', $.builtin_commands, + repeat(seq(repeat1($._WHITE_SPACE_INLINE), $.parameter)), + repeat($._WHITE_SPACE_INLINE), + ']' + )), + + builtin_commands: $ => choice('code', 'math', 'quote', 'table'), + + parameter: $ => choice( + seq($.identifier, '=', $.quoted_string), + $.quoted_string, + $.identifier + ), + + identifier: _ => token(/[A-Za-z0-9\p{L}\-/:_=]+/u), + + quoted_string: _ => token(/"([^"\\]|\\.)*"/), + + expr_img: $ => seq('[', '@img', repeat1($._WHITE_SPACE_INLINE), $.img_body, ']'), + + img_body: $ => choice( + seq($.quoted_string, repeat1($._WHITE_SPACE_INLINE), $.img_path), + seq($.img_path, repeat1($._WHITE_SPACE_INLINE), $.quoted_string), + $.img_path + ), + + img_path: $ => choice($.URL, $.local_file), + + local_file: _ => token(/([\w\p{L}\._\- ]+\/)+[\w\p{L}\._\- ]+\.[A-Za-z0-9._-]+/u), + + expr_builtin_symbols: $ => seq( + '[', $.builtin_symbol_list, repeat1($._WHITE_SPACE_INLINE), $.nested_statement, ']' + ), + + builtin_symbol_list: $ => repeat1(choice('*', '/', '_', '-')), + + nested_statement: $ => repeat1(choice( + $.expr_code_inline, + $.expr_math_inline, + $.expr_wiki_link, + $.expr_url_link, + $.expr_local_file_link, + $.expr_mail_link, + $.raw_sentence + )), + + expr_wiki_link: $ => seq('[', choice($.wiki_link_anchored, $.wiki_link, $.self_link_anchored), ']'), + wiki_link_anchored: $ => seq($.wiki_link, $.expr_anchor), + wiki_link: _ => token(/[^@\[#\]\n][^\[#\]\n]*/), + self_link_anchored: $ => $.expr_anchor, + + expr_url_link: $ => seq('[', choice($.expr_title_url, $.expr_url_title, $.expr_url_only, $.expr_url_url), ']'), + expr_title_url: $ => seq($.url_title, repeat1($._WHITE_SPACE_INLINE), $.URL), + expr_url_title: $ => seq($.URL, repeat1($._WHITE_SPACE_INLINE), $.url_title), + expr_url_only: $ => $.URL, + expr_url_url: $ => seq($.URL, repeat1($._WHITE_SPACE_INLINE), $.URL), + + url_title: _ => token(/[^@\[\]\s#]+(?:\s+[^\[\]\s#]+)*/), + URL: _ => token(/[A-Za-z]+:\/\/[A-Za-z0-9\p{L}:/#%$&?@!()~.=+*_\-]+/u), + + expr_local_file_link: $ => seq('[', choice($.expr_local_file_title, $.expr_title_local_file, $.expr_local_file_only), ']'), + expr_local_file_title: $ => seq($.local_file, repeat1($._WHITE_SPACE_INLINE), $.local_file_title), + expr_title_local_file: $ => seq($.local_file_title, repeat1($._WHITE_SPACE_INLINE), $.local_file), + expr_local_file_only: $ => $.local_file, + local_file_title: _ => token(/[^@\[\] #]+(?: [^\[\] #]+)*/), + + expr_mail_link: $ => seq('[', choice($.expr_title_mail, $.expr_mail_title, $.expr_mail_only, $.expr_mail_mail), ']'), + expr_mail_title: $ => seq($.MAIL, repeat1($._WHITE_SPACE_INLINE), $.mail_title), + expr_title_mail: $ => seq($.mail_title, repeat1($._WHITE_SPACE_INLINE), $.MAIL), + expr_mail_only: $ => $.MAIL, + expr_mail_mail: $ => seq($.MAIL, repeat1($._WHITE_SPACE_INLINE), $.MAIL), + mail_title: _ => token(/[^@\[\]\s#]+(?:\s+[^\[\]\s#]+)*/), + MAIL: _ => token(/mailto:[A-Za-z0-9_+\-]+@[A-Za-z0-9_+\-]+(?:\.[A-Za-z0-9_+\-]+)+/), + + expr_code_inline: $ => seq('[', '`', repeat($._WHITE_SPACE_INLINE), $.inline_code_content, '`', ']'), + inline_code_content: _ => token(/[^`\]]+/), + + expr_math_inline: $ => seq('[', '$', repeat($._WHITE_SPACE_INLINE), $.inline_math_content, '$', ']'), + inline_math_content: _ => token(/[^$\]]+/), + + expr_property: $ => seq('{', '@', $.property_name, repeat(seq(repeat1($._WHITE_SPACE_INLINE), $.property_assignment)), '}'), + property_name: _ => token(/[A-Za-z0-9]+/), + property_assignment: $ => seq($.property_keyword_arg, '=', $.property_keyword_value), + property_keyword_arg: _ => token(/[A-Za-z0-9]+/), + property_keyword_value: _ => token(/[A-Za-z0-9\p{L}\-/:_]+/u), + + expr_anchor: $ => seq('#', $.anchor), + anchor: _ => token(/[A-Za-z0-9\p{L}_\-]+/u), + + expr_task: $ => seq(choice('!', '*', '-'), $.task_due), + task_due: _ => token(/\d{4}-\d{2}-\d{2}(T\d{2}:\d{2})?/), + + expr_hr: _ => token(/-{5,}/), + } +}); diff --git a/tree-sitter-patto/package.json b/tree-sitter-patto/package.json new file mode 100644 index 0000000..0a4efdc --- /dev/null +++ b/tree-sitter-patto/package.json @@ -0,0 +1,14 @@ +{ + "name": "tree-sitter-patto", + "version": "0.1.0", + "description": "Tree-sitter grammar for the Patto note format", + "main": "bindings/node", + "scripts": { + "test": "tree-sitter test" + }, + "keywords": ["tree-sitter", "patto"], + "author": "Patto contributors", + "license": "MIT", + "dependencies": {}, + "devDependencies": {} +} diff --git a/tree-sitter-patto/src/scanner.c b/tree-sitter-patto/src/scanner.c new file mode 100644 index 0000000..e37fa11 --- /dev/null +++ b/tree-sitter-patto/src/scanner.c @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include + +enum TokenType { + NEWLINE, + INDENT, + DEDENT, +}; + +#define MAX_INDENT_DEPTH 256 + +typedef struct { + uint16_t indent_stack[MAX_INDENT_DEPTH]; + uint32_t stack_size; + uint32_t dedent_count; + int32_t pending_indent; +} Scanner; + +static void scanner_reset(Scanner *scanner) { + scanner->indent_stack[0] = 0; + scanner->stack_size = 1; + scanner->dedent_count = 0; + scanner->pending_indent = -1; +} + +static inline bool is_newline(int32_t character) { + return character == '\n' || character == '\r'; +} + +static inline void advance_newline(TSLexer *lexer) { + if (lexer->lookahead == '\r') { + lexer->advance(lexer, true); + if (lexer->lookahead == '\n') { + lexer->advance(lexer, true); + } + } else { + lexer->advance(lexer, true); + } +} + +void *tree_sitter_patto_external_scanner_create() { + Scanner *scanner = (Scanner *)calloc(1, sizeof(Scanner)); + scanner_reset(scanner); + return scanner; +} + +void tree_sitter_patto_external_scanner_destroy(void *payload) { + free(payload); +} + +unsigned tree_sitter_patto_external_scanner_serialize(void *payload, char *buffer) { + Scanner *scanner = (Scanner *)payload; + uint32_t size = 0; + + memcpy(buffer + size, &scanner->stack_size, sizeof(scanner->stack_size)); + size += sizeof(scanner->stack_size); + + uint32_t stack_bytes = scanner->stack_size * sizeof(uint16_t); + memcpy(buffer + size, scanner->indent_stack, stack_bytes); + size += stack_bytes; + + memcpy(buffer + size, &scanner->dedent_count, sizeof(scanner->dedent_count)); + size += sizeof(scanner->dedent_count); + + memcpy(buffer + size, &scanner->pending_indent, sizeof(scanner->pending_indent)); + size += sizeof(scanner->pending_indent); + + return size; +} + +void tree_sitter_patto_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { + Scanner *scanner = (Scanner *)payload; + scanner_reset(scanner); + + if (length == 0) { + return; + } + + uint32_t size = 0; + + if (size + sizeof(scanner->stack_size) > length) { + return; + } + memcpy(&scanner->stack_size, buffer + size, sizeof(scanner->stack_size)); + size += sizeof(scanner->stack_size); + + if (scanner->stack_size == 0 || scanner->stack_size > MAX_INDENT_DEPTH) { + scanner_reset(scanner); + return; + } + + uint32_t stack_bytes = scanner->stack_size * sizeof(uint16_t); + if (size + stack_bytes > length) { + scanner_reset(scanner); + return; + } + memcpy(scanner->indent_stack, buffer + size, stack_bytes); + size += stack_bytes; + + if (size + sizeof(scanner->dedent_count) > length) { + scanner_reset(scanner); + return; + } + memcpy(&scanner->dedent_count, buffer + size, sizeof(scanner->dedent_count)); + size += sizeof(scanner->dedent_count); + + if (size + sizeof(scanner->pending_indent) > length) { + scanner_reset(scanner); + return; + } + memcpy(&scanner->pending_indent, buffer + size, sizeof(scanner->pending_indent)); +} + +bool tree_sitter_patto_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + Scanner *scanner = (Scanner *)payload; + + if (scanner->dedent_count > 0) { + if (valid_symbols[DEDENT]) { + scanner->dedent_count--; + lexer->result_symbol = DEDENT; + return true; + } + } + + if (scanner->pending_indent >= 0) { + if (valid_symbols[INDENT]) { + if (scanner->stack_size < MAX_INDENT_DEPTH) { + scanner->indent_stack[scanner->stack_size++] = (uint16_t)scanner->pending_indent; + } + scanner->pending_indent = -1; + lexer->result_symbol = INDENT; + return true; + } + } + + if (lexer->eof(lexer)) { + if (valid_symbols[DEDENT] && scanner->stack_size > 1) { + scanner->stack_size--; + lexer->result_symbol = DEDENT; + return true; + } + return false; + } + + if (!is_newline(lexer->lookahead) || !valid_symbols[NEWLINE]) { + return false; + } + + advance_newline(lexer); + lexer->result_symbol = NEWLINE; + + while (lexer->lookahead == '\r') { + advance_newline(lexer); + } + + uint32_t indent_length = 0; + while (lexer->lookahead == '\t') { + indent_length++; + lexer->advance(lexer, true); + } + + int32_t next_char = lexer->lookahead; + bool line_is_blank = next_char == '\n' || next_char == '\r' || next_char == 0; + + if (!line_is_blank) { + uint32_t current_indent = scanner->indent_stack[scanner->stack_size - 1]; + if (indent_length > current_indent) { + scanner->pending_indent = (int32_t)indent_length; + } else if (indent_length < current_indent) { + while (scanner->stack_size > 1 && indent_length < scanner->indent_stack[scanner->stack_size - 1]) { + scanner->stack_size--; + scanner->dedent_count++; + } + if (indent_length > scanner->indent_stack[scanner->stack_size - 1]) { + scanner->pending_indent = (int32_t)indent_length; + } + } + } + + return true; +} diff --git a/tree-sitter-patto/test/corpus/basic.txt b/tree-sitter-patto/test/corpus/basic.txt new file mode 100644 index 0000000..4046968 --- /dev/null +++ b/tree-sitter-patto/test/corpus/basic.txt @@ -0,0 +1,67 @@ +======================== +Basic wiki link +======================== +[test note#anchor] + +--- +(document + (line_with_newline + (line + (statement + (expr_wiki_link + (wiki_link_anchored + (wiki_link) + (expr_anchor (anchor)))))))) +======================== +Command with property +======================== +[@code rust] #anchor {@task status=todo due=2024-12-31} + +--- +(document + (line_with_newline + (line + (statement + (expr_command (builtin_commands) (parameter (identifier))) + (trailing_properties + (expr_anchor (anchor)) + (expr_property (property_name) + (property_assignment (property_keyword_arg) (property_keyword_value)) + (property_assignment (property_keyword_arg) (property_keyword_value)))))))) +======================== +Block command +======================== +[@quote] + child + + another child +line + +--- +(document + (line_with_newline + (line + (statement + (expr_command (builtin_commands)))) + (block_body + (line_with_newline + (line + (statement + (raw_sentence)))) + (blank_line) + (line_with_newline + (line + (statement + (raw_sentence)))))) + (line_with_newline + (line + (statement + (raw_sentence)))) +) + +======================================================================== +Test that expects an error, and will fail fast if there's no parse error +:fail-fast +:error +======================================================================== +[@code diff --git a/tree-sitter-patto/tree-sitter.json b/tree-sitter-patto/tree-sitter.json new file mode 100644 index 0000000..a287d0c --- /dev/null +++ b/tree-sitter-patto/tree-sitter.json @@ -0,0 +1,38 @@ +{ + "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/config.schema.json", + "grammars": [ + { + "name": "patto", + "camelcase": "Patto", + "title": "Patto", + "scope": "source.patto", + "file-types": [ + "pn" + ], + "injection-regex": "^patto$", + "class-name": "TreeSitterPatto" + } + ], + "metadata": { + "version": "0.1.0", + "license": "MIT", + "description": "Tree-sitter grammar for the Patto note format", + "authors": [ + { + "name": "Patto contributors" + } + ], + "links": { + "repository": "https://github.com/ompugao/patto" + } + }, + "bindings": { + "c": true, + "go": true, + "node": true, + "python": true, + "rust": true, + "swift": false, + "zig": false + } +} From 51cbfc62a1a50317a712ca620817bb788449c04a Mon Sep 17 00:00:00 2001 From: Shohei Fujii Date: Tue, 2 Dec 2025 21:26:26 +0900 Subject: [PATCH 4/6] Update grammar tokens, expose the Rust language API, and extend the corpus to cover code commands and links --- tree-sitter-patto/grammar.js | 6 +- tree-sitter-patto/src/lib.rs | 20 +++ tree-sitter-patto/test/corpus/basic.txt | 154 ++++++++++++++++++++++++ 3 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 tree-sitter-patto/src/lib.rs diff --git a/tree-sitter-patto/grammar.js b/tree-sitter-patto/grammar.js index ff23b1c..28afb20 100644 --- a/tree-sitter-patto/grammar.js +++ b/tree-sitter-patto/grammar.js @@ -84,7 +84,7 @@ module.exports = grammar({ $.identifier ), - identifier: _ => token(/[A-Za-z0-9\p{L}\-/:_=]+/u), + identifier: _ => token(/[A-Za-z0-9\p{L}\-/:_]+/u), quoted_string: _ => token(/"([^"\\]|\\.)*"/), @@ -118,7 +118,7 @@ module.exports = grammar({ expr_wiki_link: $ => seq('[', choice($.wiki_link_anchored, $.wiki_link, $.self_link_anchored), ']'), wiki_link_anchored: $ => seq($.wiki_link, $.expr_anchor), - wiki_link: _ => token(/[^@\[#\]\n][^\[#\]\n]*/), + wiki_link: _ => token(/[^@`$\[#\]\n][^\[#\]\n]*/), self_link_anchored: $ => $.expr_anchor, expr_url_link: $ => seq('[', choice($.expr_title_url, $.expr_url_title, $.expr_url_only, $.expr_url_url), ']'), @@ -127,7 +127,7 @@ module.exports = grammar({ expr_url_only: $ => $.URL, expr_url_url: $ => seq($.URL, repeat1($._WHITE_SPACE_INLINE), $.URL), - url_title: _ => token(/[^@\[\]\s#]+(?:\s+[^\[\]\s#]+)*/), + url_title: _ => token(/[^@`$#\[\]\s]+(?:\s+[^\[\]\s#`$]+)*/), URL: _ => token(/[A-Za-z]+:\/\/[A-Za-z0-9\p{L}:/#%$&?@!()~.=+*_\-]+/u), expr_local_file_link: $ => seq('[', choice($.expr_local_file_title, $.expr_title_local_file, $.expr_local_file_only), ']'), diff --git a/tree-sitter-patto/src/lib.rs b/tree-sitter-patto/src/lib.rs new file mode 100644 index 0000000..d0c235d --- /dev/null +++ b/tree-sitter-patto/src/lib.rs @@ -0,0 +1,20 @@ +use tree_sitter::Language; + +extern "C" { + fn tree_sitter_patto() -> Language; +} + +/// Returns the tree-sitter [`Language`] for the Patto grammar. +pub fn language() -> Language { + unsafe { tree_sitter_patto() } +} + +/// Returns the JSON description of the syntax tree nodes produced by this grammar. +pub fn node_types_json() -> &'static str { + include_str!("node-types.json") +} + +/// Returns the JSON representation of the grammar itself. +pub fn grammar_json() -> &'static str { + include_str!("grammar.json") +} diff --git a/tree-sitter-patto/test/corpus/basic.txt b/tree-sitter-patto/test/corpus/basic.txt index 4046968..9dc119e 100644 --- a/tree-sitter-patto/test/corpus/basic.txt +++ b/tree-sitter-patto/test/corpus/basic.txt @@ -59,6 +59,160 @@ line (raw_sentence)))) ) +======================== +Code command +======================== +[@code rust] + +--- +(document + (line_with_newline + (line + (statement + (expr_command + (builtin_commands) + (parameter (identifier))))))) + +======================== +Code command without language +======================== +[@code ] + +--- +(document + (line_with_newline + (line + (statement + (expr_command + (builtin_commands)))))) + +======================== +Indented code command with anchor and task +======================== + [@code ăȘでしこ] #anchor1 {@task status=todo due=2024-09-24} + +--- +(document + (line_with_newline + (line + (statement + (raw_sentence) + (expr_command + (builtin_commands) + (parameter (identifier))) + (trailing_properties + (expr_anchor (anchor)) + (expr_property + (property_name) + (property_assignment + (property_keyword_arg) + (property_keyword_value)) + (property_assignment + (property_keyword_arg) + (property_keyword_value)))))))) + +======================== +Math command +======================== +[@math ] + +--- +(document + (line_with_newline + (line + (statement + (expr_command + (builtin_commands)))))) + +======================== +Table command with caption parameter +======================== +[@table caption="test caption"] + +--- +(document + (line_with_newline + (line + (statement + (expr_command + (builtin_commands) + (parameter + (identifier) + (quoted_string))))))) + +======================== +Table command with quoted caption +======================== +[@table "test caption"] + +--- +(document + (line_with_newline + (line + (statement + (expr_command + (builtin_commands) + (parameter + (quoted_string))))))) + +======================== +Anchored wiki link +======================== +[test wiki_page#anchored] + +--- +(document + (line_with_newline + (line + (statement + (expr_wiki_link + (wiki_link_anchored + (wiki_link) + (expr_anchor (anchor)))))))) + +======================== +Self anchored link +======================== +[#anchored] + +--- +(document + (line_with_newline + (line + (statement + (expr_wiki_link + (self_link_anchored + (expr_anchor (anchor)))))))) + +======================== +Image with URL +======================== +[@img "img alt title" https://gyazo.com/path/to/icon.png] + +--- +(document + (line_with_newline + (line + (statement + (expr_img + (img_body + (quoted_string) + (img_path (URL)))))))) + +======================== +Image with local file path +======================== +[@img ./path/to/image.png] + +--- +(document + (line_with_newline + (line + (statement + (expr_img + (img_body + (img_path (local_file)))))))) + ======================================================================== Test that expects an error, and will fail fast if there's no parse error :fail-fast From 7b47416972e97fa801a6ea43391e095562668973 Mon Sep 17 00:00:00 2001 From: Shohei Fujii Date: Wed, 3 Dec 2025 10:39:05 +0900 Subject: [PATCH 5/6] Add tree-sitter query captures and ignore generated parser artifacts --- tree-sitter-patto/.gitignore | 5 ++ tree-sitter-patto/queries/folds.scm | 3 + tree-sitter-patto/queries/highlights.scm | 80 ++++++++++++++++++++++++ tree-sitter-patto/queries/locals.scm | 17 +++++ 4 files changed, 105 insertions(+) create mode 100644 tree-sitter-patto/.gitignore create mode 100644 tree-sitter-patto/queries/folds.scm create mode 100644 tree-sitter-patto/queries/highlights.scm create mode 100644 tree-sitter-patto/queries/locals.scm diff --git a/tree-sitter-patto/.gitignore b/tree-sitter-patto/.gitignore new file mode 100644 index 0000000..e0a122b --- /dev/null +++ b/tree-sitter-patto/.gitignore @@ -0,0 +1,5 @@ +src/grammar.json +src/node-types.json +src/tree_sitter/ +src/parser.c +patto.so diff --git a/tree-sitter-patto/queries/folds.scm b/tree-sitter-patto/queries/folds.scm new file mode 100644 index 0000000..7291c36 --- /dev/null +++ b/tree-sitter-patto/queries/folds.scm @@ -0,0 +1,3 @@ +; Fold capture defined per https://github.com/nvim-treesitter/nvim-treesitter/blob/master/CONTRIBUTING.md + +(block_body) @fold diff --git a/tree-sitter-patto/queries/highlights.scm b/tree-sitter-patto/queries/highlights.scm new file mode 100644 index 0000000..06274da --- /dev/null +++ b/tree-sitter-patto/queries/highlights.scm @@ -0,0 +1,80 @@ +; Capture groups follow the nvim-treesitter guidelines documented in +; https://github.com/nvim-treesitter/nvim-treesitter/blob/master/CONTRIBUTING.md + +; Links and resources +(expr_wiki_link) @markup.link +(expr_url_link) @markup.link +(expr_local_file_link) @markup.link +(expr_mail_link) @markup.link +(expr_img) @markup.link + +(wiki_link) @markup.link.label +(wiki_link_anchored + (wiki_link) @markup.link.label + (expr_anchor (anchor) @markup.link.url)) +(self_link_anchored + (expr_anchor (anchor) @markup.link.url)) + +(URL) @markup.link.url +(url_title) @markup.link.label +(local_file) @markup.link.url +(local_file_title) @markup.link.label +(mail_title) @markup.link.label +(MAIL) @markup.link.url +(img_path) @markup.link.url +(img_body (quoted_string) @markup.link.label) + +; Anchors +(expr_anchor) @markup.link +(anchor) @markup.link.label +("#") @punctuation.special + +; Commands and properties +(expr_command (builtin_commands) @function) +(builtin_commands) @function +(parameter (identifier) @variable.parameter) +(parameter (quoted_string) @string) +(expr_property) @markup.quote +(property_name) @property +(property_keyword_arg) @property +(property_keyword_value) @string +("{") @punctuation.bracket +("}") @punctuation.bracket +("@") @punctuation.special +("=") @operator + +; Inline constructs +(expr_code_inline) @markup.raw +(inline_code_content) @markup.raw +(expr_math_inline) @markup.math +(inline_math_content) @markup.math +(expr_hr) @markup.list + +; Decorated text +(builtin_symbol_list) @punctuation.special +((expr_builtin_symbols + (builtin_symbol_list) @_sym + (nested_statement) @markup.strong) + (#match? @_sym "\\*+")) +((expr_builtin_symbols + (builtin_symbol_list) @_sym + (nested_statement) @markup.italic) + (#match? @_sym "/+")) +((expr_builtin_symbols + (builtin_symbol_list) @_sym + (nested_statement) @markup.underline) + (#match? @_sym "_+")) +((expr_builtin_symbols + (builtin_symbol_list) @_sym + (nested_statement) @markup.strikethrough) + (#match? @_sym "-+")) + +; Tasks +((expr_task) @markup.list.checked + (#match? @markup.list.checked "^-")) +((expr_task) @markup.list.unchecked + (#match? @markup.list.unchecked "^[!*]")) +(task_due) @constant.numeric + +; General text +(raw_sentence) @markup.raw diff --git a/tree-sitter-patto/queries/locals.scm b/tree-sitter-patto/queries/locals.scm new file mode 100644 index 0000000..f2514bb --- /dev/null +++ b/tree-sitter-patto/queries/locals.scm @@ -0,0 +1,17 @@ +; Locals capture names follow https://github.com/nvim-treesitter/nvim-treesitter/blob/master/CONTRIBUTING.md + +(statement + (expr_anchor (anchor) @local.definition)) + +(trailing_properties + (expr_anchor (anchor) @local.definition)) + +(expr_wiki_link + (wiki_link) @local.reference) + +(wiki_link_anchored + (wiki_link) @local.reference + (expr_anchor (anchor) @local.reference)) + +(self_link_anchored + (expr_anchor (anchor) @local.reference)) From 2240bcece05e9cd0ff3cefde81c681612d4bff24 Mon Sep 17 00:00:00 2001 From: Shohei Fujii Date: Wed, 3 Dec 2025 11:16:19 +0900 Subject: [PATCH 6/6] Split builtin command grammar into named tokens and expand corpus expectations accordingly --- tree-sitter-patto/grammar.js | 6 +++- tree-sitter-patto/test/corpus/basic.txt | 44 ++++++++++++++++++++----- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/tree-sitter-patto/grammar.js b/tree-sitter-patto/grammar.js index 28afb20..d6e73ef 100644 --- a/tree-sitter-patto/grammar.js +++ b/tree-sitter-patto/grammar.js @@ -76,7 +76,11 @@ module.exports = grammar({ ']' )), - builtin_commands: $ => choice('code', 'math', 'quote', 'table'), + builtin_commands: $ => choice($.code_command, $.math_command, $.quote_command, $.table_command), + code_command: $ => 'code', + math_command: $ => 'math', + quote_command: $ => 'quote', + table_command: $ => 'table', parameter: $ => choice( seq($.identifier, '=', $.quoted_string), diff --git a/tree-sitter-patto/test/corpus/basic.txt b/tree-sitter-patto/test/corpus/basic.txt index 9dc119e..d4d2584 100644 --- a/tree-sitter-patto/test/corpus/basic.txt +++ b/tree-sitter-patto/test/corpus/basic.txt @@ -13,6 +13,34 @@ Basic wiki link (wiki_link) (expr_anchor (anchor)))))))) ======================== +Basic decorations +======================== +[* strong] +[/ italic] +[_ underscore] + +--- +(document + (line_with_newline + (line + (statement + (expr_builtin_symbols + (builtin_symbol_list + (raw_sentence)))))) + (line_with_newline + (line + (statement + (expr_builtin_symbols + (builtin_symbol_list + (raw_sentence)))))) + (line_with_newline + (line + (statement + (expr_builtin_symbols + (builtin_symbol_list + (raw_sentence)))))) +) +======================== Command with property ======================== [@code rust] #anchor {@task status=todo due=2024-12-31} @@ -22,7 +50,7 @@ Command with property (line_with_newline (line (statement - (expr_command (builtin_commands) (parameter (identifier))) + (expr_command (builtin_commands (code_command)) (parameter (identifier))) (trailing_properties (expr_anchor (anchor)) (expr_property (property_name) @@ -42,7 +70,7 @@ line (line_with_newline (line (statement - (expr_command (builtin_commands)))) + (expr_command (builtin_commands (quote_command))))) (block_body (line_with_newline (line @@ -70,7 +98,7 @@ Code command (line (statement (expr_command - (builtin_commands) + (builtin_commands (code_command)) (parameter (identifier))))))) ======================== @@ -84,7 +112,7 @@ Code command without language (line (statement (expr_command - (builtin_commands)))))) + (builtin_commands (code_command))))))) ======================== Indented code command with anchor and task @@ -98,7 +126,7 @@ Indented code command with anchor and task (statement (raw_sentence) (expr_command - (builtin_commands) + (builtin_commands (code_command)) (parameter (identifier))) (trailing_properties (expr_anchor (anchor)) @@ -122,7 +150,7 @@ Math command (line (statement (expr_command - (builtin_commands)))))) + (builtin_commands (math_command))))))) ======================== Table command with caption parameter @@ -135,7 +163,7 @@ Table command with caption parameter (line (statement (expr_command - (builtin_commands) + (builtin_commands (table_command)) (parameter (identifier) (quoted_string))))))) @@ -151,7 +179,7 @@ Table command with quoted caption (line (statement (expr_command - (builtin_commands) + (builtin_commands (table_command)) (parameter (quoted_string)))))))