diff --git a/mito-ai/mito_ai/rules/google_drive_service.py b/mito-ai/mito_ai/rules/google_drive_service.py new file mode 100644 index 0000000000..e8ad39764e --- /dev/null +++ b/mito-ai/mito_ai/rules/google_drive_service.py @@ -0,0 +1,111 @@ +# Copyright (c) Saga Inc. +# Distributed under the terms of the GNU Affero General Public License v3.0 License. + +import re +import requests +from typing import Optional, Dict, Any +from urllib.parse import urlparse, parse_qs +import logging + +logger = logging.getLogger(__name__) + +class GoogleDriveService: + """Service for fetching content from Google Drive URLs""" + + @staticmethod + def extract_file_id(url: str) -> Optional[str]: + """Extract file ID from Google Drive URL""" + patterns = [ + r'/d/([a-zA-Z0-9-_]+)', # Standard Google Drive URL + r'id=([a-zA-Z0-9-_]+)', # URL with id parameter + ] + + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + + return None + + @staticmethod + def get_file_type(url: str) -> Optional[str]: + """Determine if URL is a Google Docs URL""" + if '/document/' in url: + return 'document' + return None + + @staticmethod + def get_export_url(file_id: str) -> str: + """Generate export URL for Google Docs file""" + return f"https://docs.google.com/document/d/{file_id}/export?format=txt" + + @staticmethod + def fetch_content(url: str) -> Dict[str, Any]: + """ + Fetch content from Google Docs URL + + Args: + url: Google Docs URL + + Returns: + Dict containing content, file_type, and metadata + """ + try: + # Extract file ID and type + file_id = GoogleDriveService.extract_file_id(url) + if not file_id: + raise ValueError("Invalid Google Docs URL: Could not extract file ID") + + file_type = GoogleDriveService.get_file_type(url) + if not file_type: + raise ValueError("Unsupported file type. Only Google Docs are supported") + + # Generate export URL + export_url = GoogleDriveService.get_export_url(file_id) + + # Fetch content + response = requests.get(export_url, timeout=30) + response.raise_for_status() + + content = response.text + + return { + 'content': content, + 'file_type': file_type, + 'file_id': file_id, + 'success': True, + 'error': None + } + + except requests.exceptions.RequestException as e: + logger.error(f"Failed to fetch Google Docs content: {e}") + return { + 'content': None, + 'file_type': None, + 'file_id': None, + 'success': False, + 'error': f"Failed to fetch content: {str(e)}" + } + except Exception as e: + logger.error(f"Error processing Google Docs URL: {e}") + return { + 'content': None, + 'file_type': None, + 'file_id': None, + 'success': False, + 'error': f"Error processing URL: {str(e)}" + } + + @staticmethod + def is_valid_google_docs_url(url: str) -> bool: + """Check if URL is a valid Google Docs URL""" + if not url: + return False + + # Check if it's a Google Docs URL + if not url.startswith('https://docs.google.com/document/'): + return False + + # Check if it contains a file ID + file_id = GoogleDriveService.extract_file_id(url) + return file_id is not None diff --git a/mito-ai/mito_ai/rules/handlers.py b/mito-ai/mito_ai/rules/handlers.py index e7d4f44d6c..41670e17d0 100644 --- a/mito-ai/mito_ai/rules/handlers.py +++ b/mito-ai/mito_ai/rules/handlers.py @@ -7,7 +7,8 @@ import tornado import os from jupyter_server.base.handlers import APIHandler -from mito_ai.rules.utils import RULES_DIR_PATH, get_all_rules, get_rule, set_rules_file +from mito_ai.rules.utils import RULES_DIR_PATH, get_all_rules, get_rule, set_rules_file, set_rule_with_metadata, refresh_google_drive_rules, get_rule_metadata +from mito_ai.rules.google_drive_service import GoogleDriveService class RulesHandler(APIHandler): @@ -21,13 +22,29 @@ def get(self, key: Union[str, None] = None) -> None: rules = get_all_rules() self.finish(json.dumps(rules)) else: - # Key provided, return specific rule + # Key provided, return specific rule with metadata rule_content = get_rule(key) if rule_content is None: self.set_status(404) self.finish(json.dumps({"error": f"Rule with key '{key}' not found"})) else: - self.finish(json.dumps({"key": key, "content": rule_content})) + # Get rule metadata - strip .md extension if present + rule_name_for_metadata = key.replace('.md', '') if key.endswith('.md') else key + rule_metadata = get_rule_metadata(rule_name_for_metadata) + response_data = { + "key": key, + "content": rule_content + } + + # Add metadata if available + if rule_metadata: + response_data.update({ + "rule_type": rule_metadata.get("rule_type", "manual"), + "google_drive_url": rule_metadata.get("google_drive_url"), + "last_updated": rule_metadata.get("last_updated") + }) + + self.finish(json.dumps(response_data)) @tornado.web.authenticated def put(self, key: str) -> None: @@ -37,8 +54,59 @@ def put(self, key: str) -> None: self.set_status(400) self.finish(json.dumps({"error": "Content is required"})) return - - set_rules_file(key, data['content']) + + # Use the enhanced storage system with metadata + rule_type = data.get('rule_type', 'manual') + google_drive_url = data.get('google_drive_url') + set_rule_with_metadata(key, data['content'], rule_type, google_drive_url) + self.finish(json.dumps({"status": "updated", "rules file ": key})) + + @tornado.web.authenticated + def post(self) -> None: + """Handle POST requests for Google Drive content fetching""" + data = json.loads(self.request.body) + + if 'action' not in data: + self.set_status(400) + self.finish(json.dumps({"error": "Action is required"})) + return + + if data['action'] == 'fetch_google_drive_content': + url = data.get('url') + if not url: + self.set_status(400) + self.finish(json.dumps({"error": "URL is required"})) + return + + # Validate URL + if not GoogleDriveService.is_valid_google_docs_url(url): + self.set_status(400) + self.finish(json.dumps({"error": "Invalid Google Docs URL"})) + return + + # Fetch content + result = GoogleDriveService.fetch_content(url) + + if result['success']: + self.finish(json.dumps({ + "success": True, + "content": result['content'], + "file_type": result['file_type'], + "file_id": result['file_id'] + })) + else: + self.set_status(400) + self.finish(json.dumps({ + "success": False, + "error": result['error'] + })) + elif data['action'] == 'refresh_google_drive_rules': + # Refresh all Google Drive rules + results = refresh_google_drive_rules() + self.finish(json.dumps(results)) + else: + self.set_status(400) + self.finish(json.dumps({"error": "Unknown action"})) diff --git a/mito-ai/mito_ai/rules/utils.py b/mito-ai/mito_ai/rules/utils.py index f5cd9886d0..6f4034bf9e 100644 --- a/mito-ai/mito_ai/rules/utils.py +++ b/mito-ai/mito_ai/rules/utils.py @@ -1,11 +1,14 @@ # Copyright (c) Saga Inc. # Distributed under the terms of the GNU Affero General Public License v3.0 License. -from typing import Any, Final, List, Optional +from typing import Any, Final, List, Optional, Dict import os +import json +from datetime import datetime from mito_ai.utils.schema import MITO_FOLDER RULES_DIR_PATH: Final[str] = os.path.join(MITO_FOLDER, 'rules') +RULES_METADATA_FILE: str = os.path.join(RULES_DIR_PATH, 'metadata.json') def set_rules_file(rule_name: str, value: Any) -> None: """ @@ -54,3 +57,78 @@ def get_all_rules() -> List[str]: # Log the error if needed and return empty list print(f"Error reading rules directory: {e}") return [] + + +def load_rules_metadata() -> Dict[str, Any]: + """Load rules metadata from file""" + if not os.path.exists(RULES_METADATA_FILE): + return {} + + try: + with open(RULES_METADATA_FILE, 'r') as f: + return json.load(f) + except (json.JSONDecodeError, IOError): + return {} + + +def save_rules_metadata(metadata: Dict[str, Any]) -> None: + """Save rules metadata to file""" + # Ensure the directory exists + if not os.path.exists(RULES_DIR_PATH): + os.makedirs(RULES_DIR_PATH) + + with open(RULES_METADATA_FILE, 'w') as f: + json.dump(metadata, f, indent=2) + + +def set_rule_with_metadata(rule_name: str, content: str, rule_type: str = "manual", google_drive_url: Optional[str] = None) -> None: + """Set a rule with metadata including rule type and optional Google Drive URL""" + # Save the content to the .md file + set_rules_file(rule_name, content) + + # Update metadata + metadata = load_rules_metadata() + metadata[rule_name] = { + 'rule_type': rule_type, + 'google_drive_url': google_drive_url, + 'last_updated': datetime.now().isoformat() + } + save_rules_metadata(metadata) + + +def get_rule_metadata(rule_name: str) -> Optional[Dict[str, Any]]: + """Get rule metadata""" + metadata = load_rules_metadata() + return metadata.get(rule_name) + + +def refresh_google_drive_rules() -> Dict[str, Any]: + """Refresh all Google Drive rules""" + from mito_ai.rules.google_drive_service import GoogleDriveService + + metadata = load_rules_metadata() + results = {'success': [], 'errors': []} + + for rule_name, rule_metadata in metadata.items(): + if rule_metadata.get('rule_type') == 'google_doc' and rule_metadata.get('google_drive_url'): + try: + # Fetch fresh content from Google Drive + result = GoogleDriveService.fetch_content(rule_metadata['google_drive_url']) + + if result['success']: + # Update the rule content + set_rule_with_metadata(rule_name, result['content'], 'google_doc', rule_metadata['google_drive_url']) + results['success'].append(rule_name) + else: + results['errors'].append({ + 'rule': rule_name, + 'error': result['error'] + }) + + except Exception as e: + results['errors'].append({ + 'rule': rule_name, + 'error': str(e) + }) + + return results diff --git a/mito-ai/src/Extensions/ContextManager/ContextManagerPlugin.ts b/mito-ai/src/Extensions/ContextManager/ContextManagerPlugin.ts index e1d4bcf61e..7611a2c214 100644 --- a/mito-ai/src/Extensions/ContextManager/ContextManagerPlugin.ts +++ b/mito-ai/src/Extensions/ContextManager/ContextManagerPlugin.ts @@ -10,6 +10,7 @@ import { Token } from '@lumino/coreutils'; import { fetchVariablesAndUpdateState, Variable } from './VariableInspector'; import { getFiles, File } from './FileInspector'; import { KernelMessage } from '@jupyterlab/services'; +import { refreshGoogleDriveRules } from '../../restAPI/RestAPI'; // The provides field in JupyterLab's JupyterFrontEndPlugin expects a token // that can be used to look up the service in the dependency injection system, @@ -43,7 +44,10 @@ export class ContextManager implements IContextManager { this.notebookTracker = notebookTracker; // Setup the kernel listener to update context as kernel messages are received - this.setupKernelListener(app, notebookTracker); + this.setupKernelListener(app, notebookTracker); + + // Refresh Google Drive rules on startup (non-blocking) + void this.refreshGoogleDriveRulesOnStartup(); } getNotebookContext(notebookId: string): NotebookContext | undefined { @@ -70,6 +74,20 @@ export class ContextManager implements IContextManager { this.notebookContexts.set(notebookID, context); } + private refreshGoogleDriveRulesOnStartup = async (): Promise => { + try { + const results = await refreshGoogleDriveRules(); + if (results.success.length > 0) { + console.log('Successfully refreshed Google Drive rules on startup:', results.success); + } + if (results.errors.length > 0) { + console.warn('Some Google Drive rules failed to refresh on startup:', results.errors); + } + } catch (error) { + console.warn('Error refreshing Google Drive rules on startup:', error); + } + }; + private _startKernelListener = async (app: JupyterFrontEnd, notebookPanel: NotebookPanel | null): Promise => { if (notebookPanel === null) { return; diff --git a/mito-ai/src/Extensions/SettingsManager/rules/RulesForm.tsx b/mito-ai/src/Extensions/SettingsManager/rules/RulesForm.tsx index f8d43f9904..304a5e35c3 100644 --- a/mito-ai/src/Extensions/SettingsManager/rules/RulesForm.tsx +++ b/mito-ai/src/Extensions/SettingsManager/rules/RulesForm.tsx @@ -3,7 +3,7 @@ * Distributed under the terms of the GNU Affero General Public License v3.0 License. */ -import React, { useState } from 'react'; +import React, { useState, useEffect } from 'react'; import LoadingCircle from '../../../components/LoadingCircle'; import '../../../../style/RulesForm.css'; import { Rule } from './models'; @@ -16,6 +16,8 @@ interface RuleFormProps { onSubmit: (e: React.FormEvent) => void; onClose: () => void; isEditing: boolean; + onGoogleDriveUrlChange?: (url: string) => void; + onFetchGoogleDriveContent?: (url: string) => Promise; } export const RulesForm: React.FC = ({ @@ -24,9 +26,27 @@ export const RulesForm: React.FC = ({ onInputChange, onSubmit, onClose, - isEditing + isEditing, + onGoogleDriveUrlChange, + onFetchGoogleDriveContent }) => { const [isLoading, setIsLoading] = useState(false); + const [isFetchingGoogleDocs, setIsFetchingGoogleDocs] = useState(false); + const [ruleSource, setRuleSource] = useState<'manual' | 'google_doc'>('manual'); + + const ruleSourceOptions = [ + { value: 'manual', label: 'Manual Entry' }, + { value: 'google_doc', label: 'Google Docs Link' } + ]; + + // Set initial rule source based on existing data (only on mount) + useEffect(() => { + if (formData.ruleType === 'google_doc' || formData.googleDriveUrl) { + setRuleSource('google_doc'); + } else { + setRuleSource('manual'); + } + }, []); const handleSubmit = async (e: React.FormEvent): Promise => { e.preventDefault(); @@ -38,6 +58,29 @@ export const RulesForm: React.FC = ({ } }; + const handleGoogleDriveUrlChange = (e: React.ChangeEvent) => { + const url = e.target.value; + onGoogleDriveUrlChange?.(url); + }; + + const handleFetchGoogleDocsContent = async () => { + if (!formData.googleDriveUrl || !onFetchGoogleDriveContent) return; + + setIsFetchingGoogleDocs(true); + try { + await onFetchGoogleDriveContent(formData.googleDriveUrl); + } catch (error) { + console.error('Failed to fetch Google Docs content:', error); + } finally { + setIsFetchingGoogleDocs(false); + } + }; + + const isValidGoogleDocsUrl = (url: string): boolean => { + const googleDocsPattern = /^https:\/\/docs\.google\.com\/document\/d\/[a-zA-Z0-9-_]+/; + return googleDocsPattern.test(url); + }; + return (
{formError &&

{formError}

} @@ -59,18 +102,87 @@ export const RulesForm: React.FC = ({ +
+ + +
+ + {ruleSource === 'google_doc' && ( +
+ + + Paste a public Google Docs link. Then fetch the content to double check you selected the correct content. + +
+ + +
+ {formData.googleDriveUrl && !isValidGoogleDocsUrl(formData.googleDriveUrl) && ( +

Please enter a valid Google Docs URL

+ )} +
+ )} +
+ {ruleSource === 'google_doc' && ( + + Preview the rule content below. Then click the Add Rule button below. + + )}