Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions mito-ai/mito_ai/rules/google_drive_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Copyright (c) Saga Inc.
# Distributed under the terms of the GNU Affero General Public License v3.0 License.

import re
import requests
from typing import Optional, Dict, Any
from urllib.parse import urlparse, parse_qs
import logging

logger = logging.getLogger(__name__)

class GoogleDriveService:
"""Service for fetching content from Google Drive URLs"""

@staticmethod
def extract_file_id(url: str) -> Optional[str]:
"""Extract file ID from Google Drive URL"""
patterns = [
r'/d/([a-zA-Z0-9-_]+)', # Standard Google Drive URL
r'id=([a-zA-Z0-9-_]+)', # URL with id parameter
]

for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)

return None

@staticmethod
def get_file_type(url: str) -> Optional[str]:
"""Determine if URL is a Google Docs URL"""
if '/document/' in url:
return 'document'
return None

@staticmethod
def get_export_url(file_id: str) -> str:
"""Generate export URL for Google Docs file"""
return f"https://docs.google.com/document/d/{file_id}/export?format=txt"

@staticmethod
def fetch_content(url: str) -> Dict[str, Any]:
"""
Fetch content from Google Docs URL

Args:
url: Google Docs URL

Returns:
Dict containing content, file_type, and metadata
"""
try:
# Extract file ID and type
file_id = GoogleDriveService.extract_file_id(url)
if not file_id:
raise ValueError("Invalid Google Docs URL: Could not extract file ID")

file_type = GoogleDriveService.get_file_type(url)
if not file_type:
raise ValueError("Unsupported file type. Only Google Docs are supported")

# Generate export URL
export_url = GoogleDriveService.get_export_url(file_id)

# Fetch content
response = requests.get(export_url, timeout=30)
response.raise_for_status()

content = response.text

return {
'content': content,
'file_type': file_type,
'file_id': file_id,
'success': True,
'error': None
}

except requests.exceptions.RequestException as e:
logger.error(f"Failed to fetch Google Docs content: {e}")
return {
'content': None,
'file_type': None,
'file_id': None,
'success': False,
'error': f"Failed to fetch content: {str(e)}"
}
except Exception as e:
logger.error(f"Error processing Google Docs URL: {e}")
return {
'content': None,
'file_type': None,
'file_id': None,
'success': False,
'error': f"Error processing URL: {str(e)}"
}

@staticmethod
def is_valid_google_docs_url(url: str) -> bool:
"""Check if URL is a valid Google Docs URL"""
if not url:
return False

# Check if it's a Google Docs URL
if not url.startswith('https://docs.google.com/document/'):
return False

# Check if it contains a file ID
file_id = GoogleDriveService.extract_file_id(url)
return file_id is not None
78 changes: 73 additions & 5 deletions mito-ai/mito_ai/rules/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import tornado
import os
from jupyter_server.base.handlers import APIHandler
from mito_ai.rules.utils import RULES_DIR_PATH, get_all_rules, get_rule, set_rules_file
from mito_ai.rules.utils import RULES_DIR_PATH, get_all_rules, get_rule, set_rules_file, set_rule_with_metadata, refresh_google_drive_rules, get_rule_metadata
from mito_ai.rules.google_drive_service import GoogleDriveService


class RulesHandler(APIHandler):
Expand All @@ -21,13 +22,29 @@ def get(self, key: Union[str, None] = None) -> None:
rules = get_all_rules()
self.finish(json.dumps(rules))
else:
# Key provided, return specific rule
# Key provided, return specific rule with metadata
rule_content = get_rule(key)
if rule_content is None:
self.set_status(404)
self.finish(json.dumps({"error": f"Rule with key '{key}' not found"}))
else:
self.finish(json.dumps({"key": key, "content": rule_content}))
# Get rule metadata - strip .md extension if present
rule_name_for_metadata = key.replace('.md', '') if key.endswith('.md') else key
rule_metadata = get_rule_metadata(rule_name_for_metadata)
response_data = {
"key": key,
"content": rule_content
}

# Add metadata if available
if rule_metadata:
response_data.update({
"rule_type": rule_metadata.get("rule_type", "manual"),
"google_drive_url": rule_metadata.get("google_drive_url"),
"last_updated": rule_metadata.get("last_updated")
})

self.finish(json.dumps(response_data))

@tornado.web.authenticated
def put(self, key: str) -> None:
Expand All @@ -37,8 +54,59 @@ def put(self, key: str) -> None:
self.set_status(400)
self.finish(json.dumps({"error": "Content is required"}))
return

set_rules_file(key, data['content'])

# Use the enhanced storage system with metadata
rule_type = data.get('rule_type', 'manual')
google_drive_url = data.get('google_drive_url')
set_rule_with_metadata(key, data['content'], rule_type, google_drive_url)

self.finish(json.dumps({"status": "updated", "rules file ": key}))

@tornado.web.authenticated
def post(self) -> None:
"""Handle POST requests for Google Drive content fetching"""
data = json.loads(self.request.body)

if 'action' not in data:
self.set_status(400)
self.finish(json.dumps({"error": "Action is required"}))
return

if data['action'] == 'fetch_google_drive_content':
url = data.get('url')
if not url:
self.set_status(400)
self.finish(json.dumps({"error": "URL is required"}))
return

# Validate URL
if not GoogleDriveService.is_valid_google_docs_url(url):
self.set_status(400)
self.finish(json.dumps({"error": "Invalid Google Docs URL"}))
return

# Fetch content
result = GoogleDriveService.fetch_content(url)

if result['success']:
self.finish(json.dumps({
"success": True,
"content": result['content'],
"file_type": result['file_type'],
"file_id": result['file_id']
}))
else:
self.set_status(400)
self.finish(json.dumps({
"success": False,
"error": result['error']
}))
elif data['action'] == 'refresh_google_drive_rules':
# Refresh all Google Drive rules
results = refresh_google_drive_rules()
self.finish(json.dumps(results))
else:
self.set_status(400)
self.finish(json.dumps({"error": "Unknown action"}))


80 changes: 79 additions & 1 deletion mito-ai/mito_ai/rules/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# Copyright (c) Saga Inc.
# Distributed under the terms of the GNU Affero General Public License v3.0 License.

from typing import Any, Final, List, Optional
from typing import Any, Final, List, Optional, Dict
import os
import json
from datetime import datetime
from mito_ai.utils.schema import MITO_FOLDER

RULES_DIR_PATH: Final[str] = os.path.join(MITO_FOLDER, 'rules')
RULES_METADATA_FILE: str = os.path.join(RULES_DIR_PATH, 'metadata.json')
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Mutable Constant Path Causes File Operation Errors

The RULES_METADATA_FILE is declared as a mutable str instead of Final[str]. This makes the constant path susceptible to accidental reassignment, which could cause file operations to fail or write to incorrect locations.

Fix in Cursor Fix in Web


def set_rules_file(rule_name: str, value: Any) -> None:
"""
Expand Down Expand Up @@ -54,3 +57,78 @@ def get_all_rules() -> List[str]:
# Log the error if needed and return empty list
print(f"Error reading rules directory: {e}")
return []


def load_rules_metadata() -> Dict[str, Any]:
"""Load rules metadata from file"""
if not os.path.exists(RULES_METADATA_FILE):
return {}

try:
with open(RULES_METADATA_FILE, 'r') as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
return {}


def save_rules_metadata(metadata: Dict[str, Any]) -> None:
"""Save rules metadata to file"""
# Ensure the directory exists
if not os.path.exists(RULES_DIR_PATH):
os.makedirs(RULES_DIR_PATH)

with open(RULES_METADATA_FILE, 'w') as f:
json.dump(metadata, f, indent=2)


def set_rule_with_metadata(rule_name: str, content: str, rule_type: str = "manual", google_drive_url: Optional[str] = None) -> None:
"""Set a rule with metadata including rule type and optional Google Drive URL"""
# Save the content to the .md file
set_rules_file(rule_name, content)

# Update metadata
metadata = load_rules_metadata()
metadata[rule_name] = {
'rule_type': rule_type,
'google_drive_url': google_drive_url,
'last_updated': datetime.now().isoformat()
}
save_rules_metadata(metadata)


def get_rule_metadata(rule_name: str) -> Optional[Dict[str, Any]]:
"""Get rule metadata"""
metadata = load_rules_metadata()
return metadata.get(rule_name)


def refresh_google_drive_rules() -> Dict[str, Any]:
"""Refresh all Google Drive rules"""
from mito_ai.rules.google_drive_service import GoogleDriveService

metadata = load_rules_metadata()
results = {'success': [], 'errors': []}

for rule_name, rule_metadata in metadata.items():
if rule_metadata.get('rule_type') == 'google_doc' and rule_metadata.get('google_drive_url'):
try:
# Fetch fresh content from Google Drive
result = GoogleDriveService.fetch_content(rule_metadata['google_drive_url'])

if result['success']:
# Update the rule content
set_rule_with_metadata(rule_name, result['content'], 'google_doc', rule_metadata['google_drive_url'])
results['success'].append(rule_name)
else:
results['errors'].append({
'rule': rule_name,
'error': result['error']
})

except Exception as e:
results['errors'].append({
'rule': rule_name,
'error': str(e)
})

return results
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Circular Dependency in Google Drive Refresh Function

The refresh_google_drive_rules function imports GoogleDriveService locally. This creates a circular dependency if GoogleDriveService or its dependencies import from this utils module, which may cause import errors or unexpected runtime behavior.

Fix in Cursor Fix in Web

20 changes: 19 additions & 1 deletion mito-ai/src/Extensions/ContextManager/ContextManagerPlugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { Token } from '@lumino/coreutils';
import { fetchVariablesAndUpdateState, Variable } from './VariableInspector';
import { getFiles, File } from './FileInspector';
import { KernelMessage } from '@jupyterlab/services';
import { refreshGoogleDriveRules } from '../../restAPI/RestAPI';

// The provides field in JupyterLab's JupyterFrontEndPlugin expects a token
// that can be used to look up the service in the dependency injection system,
Expand Down Expand Up @@ -43,7 +44,10 @@ export class ContextManager implements IContextManager {
this.notebookTracker = notebookTracker;

// Setup the kernel listener to update context as kernel messages are received
this.setupKernelListener(app, notebookTracker);
this.setupKernelListener(app, notebookTracker);

// Refresh Google Drive rules on startup (non-blocking)
void this.refreshGoogleDriveRulesOnStartup();
}

getNotebookContext(notebookId: string): NotebookContext | undefined {
Expand All @@ -70,6 +74,20 @@ export class ContextManager implements IContextManager {
this.notebookContexts.set(notebookID, context);
}

private refreshGoogleDriveRulesOnStartup = async (): Promise<void> => {
try {
const results = await refreshGoogleDriveRules();
if (results.success.length > 0) {
console.log('Successfully refreshed Google Drive rules on startup:', results.success);
}
if (results.errors.length > 0) {
console.warn('Some Google Drive rules failed to refresh on startup:', results.errors);
}
} catch (error) {
console.warn('Error refreshing Google Drive rules on startup:', error);
}
};

private _startKernelListener = async (app: JupyterFrontEnd, notebookPanel: NotebookPanel | null): Promise<void> => {
if (notebookPanel === null) {
return;
Expand Down
Loading
Loading