From 9502deef14d94646217e76da1ff9a8f251d4c7d2 Mon Sep 17 00:00:00 2001 From: Aaron Diamond-Reivich Date: Mon, 6 Oct 2025 15:22:50 -0400 Subject: [PATCH 1/8] mito-ai: add v1 google doc rule import --- mito-ai/mito_ai/rules/google_drive_service.py | 111 ++++++++++ mito-ai/mito_ai/rules/handlers.py | 62 +++++- mito-ai/mito_ai/rules/rules_storage.py | 122 ++++++++++ .../SettingsManager/rules/RulesForm.tsx | 125 ++++++++++- .../SettingsManager/rules/RulesPage.tsx | 67 +++++- .../SettingsManager/rules/models.ts | 3 + mito-ai/src/restAPI/RestAPI.tsx | 27 +++ mito-ai/style/RulesForm.css | 208 +++++++++++++++++- 8 files changed, 697 insertions(+), 28 deletions(-) create mode 100644 mito-ai/mito_ai/rules/google_drive_service.py create mode 100644 mito-ai/mito_ai/rules/rules_storage.py diff --git a/mito-ai/mito_ai/rules/google_drive_service.py b/mito-ai/mito_ai/rules/google_drive_service.py new file mode 100644 index 0000000000..e8ad39764e --- /dev/null +++ b/mito-ai/mito_ai/rules/google_drive_service.py @@ -0,0 +1,111 @@ +# Copyright (c) Saga Inc. +# Distributed under the terms of the GNU Affero General Public License v3.0 License. + +import re +import requests +from typing import Optional, Dict, Any +from urllib.parse import urlparse, parse_qs +import logging + +logger = logging.getLogger(__name__) + +class GoogleDriveService: + """Service for fetching content from Google Drive URLs""" + + @staticmethod + def extract_file_id(url: str) -> Optional[str]: + """Extract file ID from Google Drive URL""" + patterns = [ + r'/d/([a-zA-Z0-9-_]+)', # Standard Google Drive URL + r'id=([a-zA-Z0-9-_]+)', # URL with id parameter + ] + + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + + return None + + @staticmethod + def get_file_type(url: str) -> Optional[str]: + """Determine if URL is a Google Docs URL""" + if '/document/' in url: + return 'document' + return None + + @staticmethod + def get_export_url(file_id: str) -> str: + """Generate export URL for Google Docs file""" + return f"https://docs.google.com/document/d/{file_id}/export?format=txt" + + @staticmethod + def fetch_content(url: str) -> Dict[str, Any]: + """ + Fetch content from Google Docs URL + + Args: + url: Google Docs URL + + Returns: + Dict containing content, file_type, and metadata + """ + try: + # Extract file ID and type + file_id = GoogleDriveService.extract_file_id(url) + if not file_id: + raise ValueError("Invalid Google Docs URL: Could not extract file ID") + + file_type = GoogleDriveService.get_file_type(url) + if not file_type: + raise ValueError("Unsupported file type. Only Google Docs are supported") + + # Generate export URL + export_url = GoogleDriveService.get_export_url(file_id) + + # Fetch content + response = requests.get(export_url, timeout=30) + response.raise_for_status() + + content = response.text + + return { + 'content': content, + 'file_type': file_type, + 'file_id': file_id, + 'success': True, + 'error': None + } + + except requests.exceptions.RequestException as e: + logger.error(f"Failed to fetch Google Docs content: {e}") + return { + 'content': None, + 'file_type': None, + 'file_id': None, + 'success': False, + 'error': f"Failed to fetch content: {str(e)}" + } + except Exception as e: + logger.error(f"Error processing Google Docs URL: {e}") + return { + 'content': None, + 'file_type': None, + 'file_id': None, + 'success': False, + 'error': f"Error processing URL: {str(e)}" + } + + @staticmethod + def is_valid_google_docs_url(url: str) -> bool: + """Check if URL is a valid Google Docs URL""" + if not url: + return False + + # Check if it's a Google Docs URL + if not url.startswith('https://docs.google.com/document/'): + return False + + # Check if it contains a file ID + file_id = GoogleDriveService.extract_file_id(url) + return file_id is not None diff --git a/mito-ai/mito_ai/rules/handlers.py b/mito-ai/mito_ai/rules/handlers.py index e7d4f44d6c..0c5cbb2b1b 100644 --- a/mito-ai/mito_ai/rules/handlers.py +++ b/mito-ai/mito_ai/rules/handlers.py @@ -8,6 +8,8 @@ import os from jupyter_server.base.handlers import APIHandler from mito_ai.rules.utils import RULES_DIR_PATH, get_all_rules, get_rule, set_rules_file +from mito_ai.rules.google_drive_service import GoogleDriveService +from mito_ai.rules.rules_storage import RulesStorage class RulesHandler(APIHandler): @@ -37,8 +39,64 @@ def put(self, key: str) -> None: self.set_status(400) self.finish(json.dumps({"error": "Content is required"})) return - - set_rules_file(key, data['content']) + + # Check if this is a Google Drive rule + google_drive_url = data.get('google_drive_url') + + if google_drive_url: + # Use the new storage system for Google Drive rules + RulesStorage.set_rule(key, data['content'], google_drive_url) + else: + # Use the legacy system for regular rules + set_rules_file(key, data['content']) + self.finish(json.dumps({"status": "updated", "rules file ": key})) + + @tornado.web.authenticated + def post(self) -> None: + """Handle POST requests for Google Drive content fetching""" + data = json.loads(self.request.body) + + if 'action' not in data: + self.set_status(400) + self.finish(json.dumps({"error": "Action is required"})) + return + + if data['action'] == 'fetch_google_drive_content': + url = data.get('url') + if not url: + self.set_status(400) + self.finish(json.dumps({"error": "URL is required"})) + return + + # Validate URL + if not GoogleDriveService.is_valid_google_docs_url(url): + self.set_status(400) + self.finish(json.dumps({"error": "Invalid Google Docs URL"})) + return + + # Fetch content + result = GoogleDriveService.fetch_content(url) + + if result['success']: + self.finish(json.dumps({ + "success": True, + "content": result['content'], + "file_type": result['file_type'], + "file_id": result['file_id'] + })) + else: + self.set_status(400) + self.finish(json.dumps({ + "success": False, + "error": result['error'] + })) + elif data['action'] == 'refresh_google_drive_rules': + # Refresh all Google Drive rules + results = RulesStorage.refresh_google_drive_rules() + self.finish(json.dumps(results)) + else: + self.set_status(400) + self.finish(json.dumps({"error": "Unknown action"})) diff --git a/mito-ai/mito_ai/rules/rules_storage.py b/mito-ai/mito_ai/rules/rules_storage.py new file mode 100644 index 0000000000..7a903c3d40 --- /dev/null +++ b/mito-ai/mito_ai/rules/rules_storage.py @@ -0,0 +1,122 @@ +# Copyright (c) Saga Inc. +# Distributed under the terms of the GNU Affero General Public License v3.0 License. + +import json +import os +from typing import Any, Dict, List, Optional +from datetime import datetime +from mito_ai.utils.schema import MITO_FOLDER +from mito_ai.rules.google_drive_service import GoogleDriveService + +RULES_DIR_PATH: str = os.path.join(MITO_FOLDER, 'rules') +RULES_METADATA_FILE: str = os.path.join(RULES_DIR_PATH, 'metadata.json') + +class RulesStorage: + """Enhanced rules storage with metadata support""" + + @staticmethod + def ensure_rules_directory(): + """Ensure the rules directory exists""" + if not os.path.exists(RULES_DIR_PATH): + os.makedirs(RULES_DIR_PATH) + + @staticmethod + def load_metadata() -> Dict[str, Any]: + """Load rules metadata from file""" + RulesStorage.ensure_rules_directory() + + if not os.path.exists(RULES_METADATA_FILE): + return {} + + try: + with open(RULES_METADATA_FILE, 'r') as f: + return json.load(f) + except (json.JSONDecodeError, IOError): + return {} + + @staticmethod + def save_metadata(metadata: Dict[str, Any]): + """Save rules metadata to file""" + RulesStorage.ensure_rules_directory() + + with open(RULES_METADATA_FILE, 'w') as f: + json.dump(metadata, f, indent=2) + + @staticmethod + def set_rule(rule_name: str, content: str, google_drive_url: Optional[str] = None) -> None: + """Set a rule with optional Google Drive URL""" + RulesStorage.ensure_rules_directory() + + # Save the content to the .md file + file_path = os.path.join(RULES_DIR_PATH, f"{rule_name}.md") + with open(file_path, 'w') as f: + f.write(content) + + # Update metadata + metadata = RulesStorage.load_metadata() + metadata[rule_name] = { + 'google_drive_url': google_drive_url, + 'last_updated': datetime.now().isoformat(), + 'is_google_drive_rule': google_drive_url is not None + } + RulesStorage.save_metadata(metadata) + + @staticmethod + def get_rule(rule_name: str) -> Optional[str]: + """Get rule content""" + if rule_name.endswith('.md'): + rule_name = rule_name[:-3] + + file_path = os.path.join(RULES_DIR_PATH, f"{rule_name}.md") + + if not os.path.exists(file_path): + return None + + with open(file_path, 'r') as f: + return f.read() + + @staticmethod + def get_rule_metadata(rule_name: str) -> Optional[Dict[str, Any]]: + """Get rule metadata""" + metadata = RulesStorage.load_metadata() + return metadata.get(rule_name) + + @staticmethod + def get_all_rules() -> List[str]: + """Get all rule names""" + RulesStorage.ensure_rules_directory() + + try: + return [f for f in os.listdir(RULES_DIR_PATH) if f.endswith('.md')] + except OSError: + return [] + + @staticmethod + def refresh_google_drive_rules() -> Dict[str, Any]: + """Refresh all Google Drive rules""" + metadata = RulesStorage.load_metadata() + results = {'success': [], 'errors': []} + + for rule_name, rule_metadata in metadata.items(): + if rule_metadata.get('is_google_drive_rule') and rule_metadata.get('google_drive_url'): + try: + # Fetch fresh content from Google Drive + result = GoogleDriveService.fetch_content(rule_metadata['google_drive_url']) + + if result['success']: + # Update the rule content + RulesStorage.set_rule(rule_name, result['content'], rule_metadata['google_drive_url']) + results['success'].append(rule_name) + else: + results['errors'].append({ + 'rule': rule_name, + 'error': result['error'] + }) + + except Exception as e: + results['errors'].append({ + 'rule': rule_name, + 'error': str(e) + }) + + return results diff --git a/mito-ai/src/Extensions/SettingsManager/rules/RulesForm.tsx b/mito-ai/src/Extensions/SettingsManager/rules/RulesForm.tsx index f8d43f9904..514fb53b97 100644 --- a/mito-ai/src/Extensions/SettingsManager/rules/RulesForm.tsx +++ b/mito-ai/src/Extensions/SettingsManager/rules/RulesForm.tsx @@ -3,7 +3,7 @@ * Distributed under the terms of the GNU Affero General Public License v3.0 License. */ -import React, { useState } from 'react'; +import React, { useState, useEffect } from 'react'; import LoadingCircle from '../../../components/LoadingCircle'; import '../../../../style/RulesForm.css'; import { Rule } from './models'; @@ -16,6 +16,8 @@ interface RuleFormProps { onSubmit: (e: React.FormEvent) => void; onClose: () => void; isEditing: boolean; + onGoogleDriveUrlChange?: (url: string) => void; + onFetchGoogleDriveContent?: (url: string) => Promise; } export const RulesForm: React.FC = ({ @@ -24,9 +26,25 @@ export const RulesForm: React.FC = ({ onInputChange, onSubmit, onClose, - isEditing + isEditing, + onGoogleDriveUrlChange, + onFetchGoogleDriveContent }) => { const [isLoading, setIsLoading] = useState(false); + const [isFetchingGoogleDocs, setIsFetchingGoogleDocs] = useState(false); + const [ruleSource, setRuleSource] = useState<'manual' | 'google-docs'>('manual'); + + const ruleSourceOptions = [ + { value: 'manual', label: 'Manual Entry' }, + { value: 'google-docs', label: 'Google Docs Link' } + ]; + + // Set initial rule source based on existing data + useEffect(() => { + if (formData.googleDriveUrl) { + setRuleSource('google-docs'); + } + }, [formData.googleDriveUrl]); const handleSubmit = async (e: React.FormEvent): Promise => { e.preventDefault(); @@ -38,6 +56,29 @@ export const RulesForm: React.FC = ({ } }; + const handleGoogleDriveUrlChange = (e: React.ChangeEvent) => { + const url = e.target.value; + onGoogleDriveUrlChange?.(url); + }; + + const handleFetchGoogleDocsContent = async () => { + if (!formData.googleDriveUrl || !onFetchGoogleDriveContent) return; + + setIsFetchingGoogleDocs(true); + try { + await onFetchGoogleDriveContent(formData.googleDriveUrl); + } catch (error) { + console.error('Failed to fetch Google Docs content:', error); + } finally { + setIsFetchingGoogleDocs(false); + } + }; + + const isValidGoogleDocsUrl = (url: string): boolean => { + const googleDocsPattern = /^https:\/\/docs\.google\.com\/document\/d\/[a-zA-Z0-9-_]+/; + return googleDocsPattern.test(url); + }; + return (
{formError &&

{formError}

} @@ -59,18 +100,87 @@ export const RulesForm: React.FC = ({ +
+ + +
+ + {ruleSource === 'google-docs' && ( +
+ + + Paste a public Google Docs link + +
+ + +
+ {formData.googleDriveUrl && !isValidGoogleDocsUrl(formData.googleDriveUrl) && ( +

Please enter a valid Google Docs URL

+ )} +
+ )} +
+ {ruleSource === 'google-docs' && ( + + Content will be automatically fetched from Google Docs. You can edit it below if needed. + + )}