Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ idf_component_register(
"tools/tool_files.c"
"tools/tool_gpio.c"
"tools/gpio_policy.c"
"tools/tool_search_notes.c"
"skills/skill_loader.c"
"onboard/wifi_onboard.c"
INCLUDE_DIRS
Expand Down
1 change: 1 addition & 0 deletions main/agent/context_builder.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ esp_err_t context_build_system_prompt(char *buf, size_t size)
"- write_file: Write/overwrite a file.\n"
"- edit_file: Find-and-replace edit a file.\n"
"- list_dir: List files, optionally filter by prefix.\n"
"- search_notes: Search daily notes for keywords. Returns matching filenames ranked by relevance. Use read_file to view full content.\n"
"- cron_add: Schedule a recurring or one-shot task. The message will trigger an agent turn when the job fires.\n"
"- cron_list: List all scheduled cron jobs.\n"
"- cron_remove: Remove a scheduled cron job by ID.\n"
Expand Down
13 changes: 13 additions & 0 deletions main/tools/tool_registry.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "tools/tool_files.h"
#include "tools/tool_cron.h"
#include "tools/tool_gpio.h"
#include "tools/tool_search_notes.h"

#include <string.h>
#include "esp_log.h"
Expand Down Expand Up @@ -133,6 +134,18 @@ esp_err_t tool_registry_init(void)
};
register_tool(&ld);

/* Register search_notes */
mimi_tool_t sn = {
.name = "search_notes",
.description = "Search daily notes for keywords. Returns matching filenames ranked by relevance. Use read_file to view full content of results.",
.input_schema_json =
"{\"type\":\"object\","
"\"properties\":{\"query\":{\"type\":\"string\",\"description\":\"Space-separated keywords to search for in daily notes\"}},"
"\"required\":[\"query\"]}",
.execute = tool_search_notes_execute,
};
register_tool(&sn);

/* Register cron_add */
mimi_tool_t ca = {
.name = "cron_add",
Expand Down
196 changes: 196 additions & 0 deletions main/tools/tool_search_notes.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#include "tools/tool_search_notes.h"
#include "mimi_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <dirent.h>
#include "esp_log.h"
#include "cJSON.h"

static const char *TAG = "tool_search_notes";

#define MAX_QUERY_WORDS 10
#define MAX_RESULTS 64
#define READ_CAP 4096

typedef struct {
char path[128];
int matches;
} note_result_t;

/**
* Case-insensitive substring search.
* Returns pointer to first occurrence, or NULL.
*/
static const char *contains_nocase(const char *haystack, const char *needle)
{
if (!haystack || !needle || !*needle) return NULL;
for (const char *p = haystack; *p; p++) {
if (tolower((unsigned char)*p) == tolower((unsigned char)*needle)) {
const char *h = p;
const char *n = needle;
while (*h && *n && tolower((unsigned char)*h) == tolower((unsigned char)*n)) {
h++;
n++;
}
if (!*n) return p;
}
}
return NULL;
}

esp_err_t tool_search_notes_execute(const char *input_json, char *output, size_t output_size)
{
cJSON *root = cJSON_Parse(input_json);
if (!root) {
snprintf(output, output_size, "Error: invalid JSON input");
return ESP_ERR_INVALID_ARG;
}

const char *query = cJSON_GetStringValue(cJSON_GetObjectItem(root, "query"));
if (!query || !*query) {
snprintf(output, output_size, "Error: missing or empty 'query' field");
cJSON_Delete(root);
return ESP_ERR_INVALID_ARG;
}

/* Split query into words */
char qbuf[256];
strncpy(qbuf, query, sizeof(qbuf) - 1);
qbuf[sizeof(qbuf) - 1] = '\0';

char *words[MAX_QUERY_WORDS];
int word_count = 0;
char *tok = strtok(qbuf, " ");
while (tok && word_count < MAX_QUERY_WORDS) {
/* Deduplicate: skip if this word was already added */
bool dup = false;
for (int i = 0; i < word_count; i++) {
if (strcasecmp(words[i], tok) == 0) { dup = true; break; }
}
if (!dup) words[word_count++] = tok;
tok = strtok(NULL, " ");
}
Comment on lines +59 to +75
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Reject queries that exceed the local limits.

The parser silently truncates at 255 bytes and silently ignores unique tokens after MAX_QUERY_WORDS. The tool then echoes the original query back in the response, so the reported ranking can disagree with what was actually searched. Return ESP_ERR_INVALID_ARG once either limit is exceeded, or surface the applied limits explicitly.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@main/tools/tool_search_notes.c` around lines 59 - 75, The code silently
truncates queries into qbuf and drops tokens beyond MAX_QUERY_WORDS; update the
parser in tool_search_notes.c (around qbuf, strncpy, strtok, word_count) to
detect these conditions and return ESP_ERR_INVALID_ARG (or otherwise surface the
applied limits) instead of silently truncating: check if strlen(query) >=
sizeof(qbuf) before copying and return ESP_ERR_INVALID_ARG if true, and while
tokenizing count total tokens (separate from deduplicated word_count) and if
total tokens exceed MAX_QUERY_WORDS return ESP_ERR_INVALID_ARG; ensure the error
is returned from the function where qbuf/word_count are used so callers see the
failure.


if (word_count == 0) {
snprintf(output, output_size, "Error: query contains no words");
cJSON_Delete(root);
return ESP_ERR_INVALID_ARG;
}

/* Scan notes directory */
DIR *dir = opendir(MIMI_SPIFFS_BASE);
if (!dir) {
snprintf(output, output_size, "Error: cannot open %s directory", MIMI_SPIFFS_BASE);
cJSON_Delete(root);
return ESP_FAIL;
}

note_result_t results[MAX_RESULTS];
int result_count = 0;

struct dirent *ent;
while ((ent = readdir(dir)) != NULL) {
/* SPIFFS stores flat names like "memory/2026-02-15.md" */
const char *name = ent->d_name;

/* Must be in memory/ subdirectory and end with .md */
if (strncmp(name, "memory/", 7) != 0) continue;
size_t nlen = strlen(name);
if (nlen < 4 || strcmp(name + nlen - 3, ".md") != 0) continue;

/* Skip MEMORY.md */
if (strcmp(name + 7, "MEMORY.md") == 0) continue;

/* Build full path — SPIFFS names are short; skip if somehow too long */
char full_path[128];
int plen = snprintf(full_path, sizeof(full_path), "%s/%s", MIMI_SPIFFS_BASE, name);
if (plen < 0 || (size_t)plen >= sizeof(full_path)) continue;

/* Read file content (up to READ_CAP bytes) */
FILE *f = fopen(full_path, "r");
if (!f) continue;

char *buf = malloc(READ_CAP + 1);
if (!buf) {
fclose(f);
continue;
}

size_t n = fread(buf, 1, READ_CAP, f);
buf[n] = '\0';
Comment on lines +112 to +123
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Only the first 4 KiB of each note is searchable.

fread(..., READ_CAP, ...) stops at 4096 bytes, so hits later in a long note are never counted. main/agent/context_builder.c already instructs the agent to keep appending to daily notes, so this will skew rankings as those files grow. A chunked scan with early exit once all query terms are found would keep memory bounded without dropping tail content.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@main/tools/tool_search_notes.c` around lines 112 - 123, The current code only
reads up to READ_CAP into buf once (using fread) so hits past the first 4 KiB
are ignored; change the single fread into a chunked read loop that reuses the
allocated buffer and repeatedly fread()s up to READ_CAP bytes from FILE *f
(full_path) until EOF or until all query terms are found, checking each chunk
for the query terms as you go and exiting early when every term has been found;
ensure you fclose(f) and free(buf) on all paths, keep buf null-terminated for
string searches (buf[n]=0), and preserve a small overlap between chunks (carry
the last N bytes, where N is at least the longest query term length minus 1) to
avoid missing matches spanning chunk boundaries.

fclose(f);

/* Count distinct query words that appear */
int matches = 0;
for (int i = 0; i < word_count; i++) {
if (contains_nocase(buf, words[i])) {
matches++;
}
}
free(buf);

if (matches > 0) {
if (result_count < MAX_RESULTS) {
strncpy(results[result_count].path, full_path, sizeof(results[result_count].path) - 1);
results[result_count].path[sizeof(results[result_count].path) - 1] = '\0';
results[result_count].matches = matches;
result_count++;
} else {
/* Replace the weakest result if this one scores higher */
int min_idx = 0;
for (int k = 1; k < MAX_RESULTS; k++) {
if (results[k].matches < results[min_idx].matches) min_idx = k;
}
if (matches > results[min_idx].matches) {
strncpy(results[min_idx].path, full_path, sizeof(results[min_idx].path) - 1);
results[min_idx].path[sizeof(results[min_idx].path) - 1] = '\0';
results[min_idx].matches = matches;
}
}
}
}
closedir(dir);

/* Sort by match count descending (simple insertion sort) */
for (int i = 1; i < result_count; i++) {
note_result_t tmp = results[i];
int j = i - 1;
while (j >= 0 && results[j].matches < tmp.matches) {
results[j + 1] = results[j];
j--;
}
results[j + 1] = tmp;
}

/* Format output */
if (result_count == 0) {
snprintf(output, output_size, "No notes matching \"%s\".", query);
ESP_LOGI(TAG, "search_notes: 0 matches for \"%s\"", query);
cJSON_Delete(root);
Comment on lines +171 to +172
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Avoid logging raw query text.

Lines 153 and 169 log user-provided query strings directly. These may contain personal/sensitive data and should be redacted or omitted from logs.

Minimal log-safety change
-    ESP_LOGI(TAG, "search_notes: 0 matches for \"%s\"", query);
+    ESP_LOGI(TAG, "search_notes: 0 matches");
 ...
-    ESP_LOGI(TAG, "search_notes: %d matches for \"%s\"", result_count, query);
+    ESP_LOGI(TAG, "search_notes: %d matches", result_count);

Also applies to: 169-169

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@main/tools/tool_search_notes.c` around lines 153 - 154, The logs in
search_notes are printing the raw user-provided query (ESP_LOGI(TAG,
"search_notes: 0 matches for \"%s\"", query) and the similar log at the later
occurrence) which can expose sensitive data; update the logging calls in the
search_notes flow (references: function search_notes, variable query, logger
macro ESP_LOGI and TAG) to either omit the query or replace it with a redacted
placeholder (e.g., "[REDACTED]" or a boolean/length indicator) before logging,
ensuring you still convey useful status (e.g., "0 matches for query" or "0
matches for query (redacted)") while never writing the raw query to logs.

return ESP_OK;
}

size_t off = 0;
int written = snprintf(output, output_size,
"Found %d notes matching \"%s\" (%d words):\n\n",
result_count, query, word_count);
if (written > 0 && (size_t)written < output_size)
off = (size_t)written;
else
off = output_size > 0 ? output_size - 1 : 0;

for (int i = 0; i < result_count && off < output_size - 1; i++) {
written = snprintf(output + off, output_size - off,
"%d. %s (%d/%d words)\n",
i + 1, results[i].path, results[i].matches, word_count);
if (written < 0 || (size_t)written >= output_size - off) break;
off += (size_t)written;
}

ESP_LOGI(TAG, "search_notes: %d matches for \"%s\"", result_count, query);
cJSON_Delete(root);
return ESP_OK;
}
11 changes: 11 additions & 0 deletions main/tools/tool_search_notes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

#include "esp_err.h"
#include <stddef.h>

/**
* Search daily notes for keywords.
* Input JSON: {"query": "word1 word2 ..."}
* Returns matching filenames ranked by number of distinct query words found.
*/
esp_err_t tool_search_notes_execute(const char *input_json, char *output, size_t output_size);