Skip to content

Commit bbebe77

Browse files
authored
feat(cli, nvim): Add vectorise subcommand to LSP server (#189)
* feat(cli): add `vectorise` subcommand to LSP server * Auto generate docs * test(cli): Raise JsonRpcInternalError on invalid action * test(cli): Raise JsonRpcInternalError on missing collection and improve test coverage * test(cli): improve test coverage * feat(cli): Return vectorise command execution results * fix(nvim): make sure logging works for all kinds of LSP results --------- Co-authored-by: Davidyz <Davidyz@users.noreply.github.com>
1 parent a805099 commit bbebe77

File tree

6 files changed

+239
-51
lines changed

6 files changed

+239
-51
lines changed

doc/VectorCode-cli.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -765,9 +765,8 @@ Note that:
765765

766766
1. For easier parsing, `--pipe` is assumed to be enabled in LSP mode;
767767
2. At the time this only work with vectorcode setup that uses a **standalone ChromaDB server**, which is not difficult to setup using docker;
768-
3. At the time this only work with `query` subcommand. I will consider adding
769-
support for other subcommand but first I need to figure out how to properly
770-
manage `project_root` across different requests if they change.
768+
3. The LSP server supports `vectorise`, `query` and `ls` subcommands. The other
769+
subcommands may be added in the future.
771770

772771

773772
MCP SERVER ~

docs/cli.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -690,9 +690,8 @@ Note that:
690690
1. For easier parsing, `--pipe` is assumed to be enabled in LSP mode;
691691
2. At the time this only work with vectorcode setup that uses a **standalone
692692
ChromaDB server**, which is not difficult to setup using docker;
693-
3. At the time this only work with `query` subcommand. I will consider adding
694-
support for other subcommand but first I need to figure out how to properly
695-
manage `project_root` across different requests if they change.
693+
3. The LSP server supports `vectorise`, `query` and `ls` subcommands. The other
694+
subcommands may be added in the future.
696695

697696
### MCP Server
698697

lua/vectorcode/jobrunner/lsp.lua

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,7 @@ function jobrunner.run_async(args, callback, bufnr)
101101
end
102102
vim.schedule_wrap(callback)(result, err_message, code)
103103
if result then
104-
logger.debug(
105-
"lsp jobrunner result:\n",
106-
vim.tbl_map(function(item)
107-
item.document = nil
108-
item.chunk = nil
109-
return item
110-
end, vim.deepcopy(result))
111-
)
104+
logger.debug("lsp jobrunner result:\n", result)
112105
end
113106
if err then
114107
logger.info("lsp jobrunner error:\n", err)

src/vectorcode/lsp_main.py

Lines changed: 78 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@
99

1010
import shtab
1111

12+
from vectorcode.subcommands.vectorise import (
13+
chunked_add,
14+
exclude_paths_by_spec,
15+
find_exclude_specs,
16+
load_files_from_include,
17+
)
18+
1219
try: # pragma: nocover
1320
from lsprotocol import types
1421
from pygls.exceptions import (
@@ -29,6 +36,7 @@
2936
Config,
3037
cleanup_path,
3138
config_logging,
39+
expand_globs,
3240
find_project_root,
3341
get_project_config,
3442
parse_cli_args,
@@ -86,14 +94,6 @@ async def execute_command(ls: LanguageServer, args: list[str]):
8694
logger.info("Received command arguments: %s", args)
8795
parsed_args = await parse_cli_args(args)
8896
logger.info("Parsed command arguments: %s", parsed_args)
89-
if parsed_args.action not in {CliAction.query, CliAction.ls}:
90-
error_message = (
91-
f"Unsupported vectorcode subcommand: {str(parsed_args.action)}"
92-
)
93-
logger.error(
94-
error_message,
95-
)
96-
raise JsonRpcInvalidRequest(error_message)
9797
if parsed_args.project_root is None:
9898
if DEFAULT_PROJECT_ROOT is not None:
9999
parsed_args.project_root = DEFAULT_PROJECT_ROOT
@@ -136,12 +136,12 @@ async def execute_command(ls: LanguageServer, args: list[str]):
136136
)
137137
final_results = []
138138
try:
139-
if collection is None:
140-
print("Please specify a project to search in.", file=sys.stderr)
141-
else:
142-
final_results.extend(
143-
await build_query_results(collection, final_configs)
144-
)
139+
assert collection is not None, (
140+
"Failed to find the correct collection."
141+
)
142+
final_results.extend(
143+
await build_query_results(collection, final_configs)
144+
)
145145
finally:
146146
log_message = f"Retrieved {len(final_results)} result{'s' if len(final_results) > 1 else ''} in {round(time.time() - start_time, 2)}s."
147147
ls.progress.end(
@@ -168,11 +168,73 @@ async def execute_command(ls: LanguageServer, args: list[str]):
168168
)
169169
logger.info(f"Retrieved {len(projects)} project(s).")
170170
return projects
171-
except Exception as e:
171+
case CliAction.vectorise:
172+
assert collection is not None, "Failed to find the correct collection."
173+
ls.progress.begin(
174+
progress_token,
175+
types.WorkDoneProgressBegin(
176+
title="VectorCode", message="Vectorising files...", percentage=0
177+
),
178+
)
179+
files = await expand_globs(
180+
final_configs.files
181+
or load_files_from_include(str(final_configs.project_root)),
182+
recursive=final_configs.recursive,
183+
include_hidden=final_configs.include_hidden,
184+
)
185+
if not final_configs.force: # pragma: nocover
186+
# tested in 'vectorise.py'
187+
for spec in find_exclude_specs(final_configs):
188+
if os.path.isfile(spec):
189+
logger.info(f"Loading ignore specs from {spec}.")
190+
files = exclude_paths_by_spec((str(i) for i in files), spec)
191+
stats = {"add": 0, "update": 0, "removed": 0}
192+
collection_lock = asyncio.Lock()
193+
stats_lock = asyncio.Lock()
194+
max_batch_size = await client.get_max_batch_size()
195+
semaphore = asyncio.Semaphore(os.cpu_count() or 1)
196+
tasks = [
197+
asyncio.create_task(
198+
chunked_add(
199+
str(file),
200+
collection,
201+
collection_lock,
202+
stats,
203+
stats_lock,
204+
final_configs,
205+
max_batch_size,
206+
semaphore,
207+
)
208+
)
209+
for file in files
210+
]
211+
for i, task in enumerate(asyncio.as_completed(tasks), start=1):
212+
await task
213+
ls.progress.report(
214+
progress_token,
215+
types.WorkDoneProgressReport(
216+
message="Vectorising files...",
217+
percentage=int(100 * i / len(tasks)),
218+
),
219+
)
220+
ls.progress.end(
221+
progress_token,
222+
types.WorkDoneProgressEnd(
223+
message=f"Vectorised {stats['add'] + stats['update']} files."
224+
),
225+
)
226+
return stats
227+
case _ as c: # pragma: nocover
228+
error_message = f"Unsupported vectorcode subcommand: {str(c)}"
229+
logger.error(
230+
error_message,
231+
)
232+
raise JsonRpcInvalidRequest(error_message)
233+
except Exception as e: # pragma: nocover
172234
if isinstance(e, JsonRpcException):
173235
# pygls exception. raise it as is.
174236
raise
175-
else: # pragma: nocover
237+
else:
176238
# wrap non-pygls errors for error codes.
177239
raise JsonRpcInternalError(message=traceback.format_exc()) from e
178240

src/vectorcode/subcommands/vectorise.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,15 @@ def show_stats(configs: Config, stats):
141141
)
142142

143143

144-
def exclude_paths_by_spec(paths: Iterable[str], specs: pathspec.PathSpec) -> list[str]:
144+
def exclude_paths_by_spec(
145+
paths: Iterable[str], specs: pathspec.PathSpec | str
146+
) -> list[str]:
145147
"""
146148
Files matched by the specs will be excluded.
147149
"""
150+
if isinstance(specs, str):
151+
with open(specs) as fin:
152+
specs = pathspec.GitIgnoreSpec.from_lines(fin.readlines())
148153
return [path for path in paths if not specs.match_file(path)]
149154

150155

@@ -180,6 +185,25 @@ def load_files_from_include(project_root: str) -> list[str]:
180185
return []
181186

182187

188+
def find_exclude_specs(configs: Config) -> list[str]:
189+
"""
190+
Load a list of paths to exclude specs.
191+
Can be `.gitignore` or local/global `vectorcode.exclude`
192+
"""
193+
gitignore_path = os.path.join(str(configs.project_root), ".gitignore")
194+
specs = [
195+
gitignore_path,
196+
]
197+
exclude_spec_path = os.path.join(
198+
str(configs.project_root), ".vectorcode", "vectorcode.exclude"
199+
)
200+
if os.path.isfile(exclude_spec_path):
201+
specs.append(exclude_spec_path)
202+
elif os.path.isfile(GLOBAL_EXCLUDE_SPEC):
203+
specs.append(GLOBAL_EXCLUDE_SPEC)
204+
return specs
205+
206+
183207
async def vectorise(configs: Config) -> int:
184208
assert configs.project_root is not None
185209
client = await get_client(configs)
@@ -198,23 +222,10 @@ async def vectorise(configs: Config) -> int:
198222
)
199223

200224
if not configs.force:
201-
gitignore_path = os.path.join(str(configs.project_root), ".gitignore")
202-
specs = [
203-
gitignore_path,
204-
]
205-
exclude_spec_path = os.path.join(
206-
configs.project_root, ".vectorcode", "vectorcode.exclude"
207-
)
208-
if os.path.isfile(exclude_spec_path):
209-
specs.append(exclude_spec_path)
210-
elif os.path.isfile(GLOBAL_EXCLUDE_SPEC):
211-
specs.append(GLOBAL_EXCLUDE_SPEC)
212-
for spec_path in specs:
225+
for spec_path in find_exclude_specs(configs):
213226
if os.path.isfile(spec_path):
214227
logger.info(f"Loading ignore specs from {spec_path}.")
215-
with open(spec_path) as fin:
216-
spec = pathspec.GitIgnoreSpec.from_lines(fin.readlines())
217-
files = exclude_paths_by_spec((str(i) for i in files), spec)
228+
files = exclude_paths_by_spec((str(i) for i in files), spec_path)
218229
else: # pragma: nocover
219230
logger.info("Ignoring exclude specs.")
220231

0 commit comments

Comments
 (0)