shure-dev · Archillesjakins · Aug 30, 2024 · Aug 30, 2024 · Aug 31, 2024 · Aug 31, 2024
diff --git a/demos/svc-sample.ipynb b/demos/svc-sample.ipynb
diff --git a/extractor.py b/extractor.py
diff --git a/logllm/log_llm.py b/logllm/log_llm.py
@@ -4,119 +4,123 @@
 import json
 import os
 from dotenv import load_dotenv
-import markdown as md
 from openai import OpenAI
 
-# Load environment variables
-load_dotenv()
-
-# Configure Google Generative AI
-genai.configure(api_key=os.getenv('API_KEY'))
-
-generation_config = {
-    "temperature": 0,
-    "top_p": 0.95,
-    "top_k": 64,
-    "max_output_tokens": 8192,
-    "response_mime_type": "application/json",
-}
-
-model = genai.GenerativeModel(
-    model_name="gemini-1.5-flash",
-    generation_config=generation_config,
-)
-
 def init_wandb(project_name):
     wandb.init(project=project_name, settings=wandb.Settings(_disable_stats=True))
 
-def extract_experimental_conditions_gemini(code):
-    user_input = f"""
-         You are an advanced machine learning experiment designer.
-         Extract all experimental conditions and results for logging via wandb API. 
-         Add your original parameters in your JSON response if you want to log other parameters.
-         Extract all information you can find in the given script as int, bool, or float values.
-         If you cannot describe conditions with int, bool, or float values, use a list of natural language.
-         Give advice to improve the accuracy.
-         If you use natural language, the answers should be short.
-         Do not include information already provided in param_name_1 for `condition_as_natural_language`.
-
-         Here is a user's Jupyter Notebook script: {code}
-    """.replace("    ","")
-
-    chat_session = model.start_chat(
-        history=[
-            {"role": "user", "parts": ["Hello! help me analyse data in json format only"]},
-            {"role": "model", "parts": ["Sure I can do that, provide me with data"]},
-        ]
-    )
+# Load environment variables from a .env file
+load_dotenv()
 
-    response = chat_session.send_message(user_input)
-    result = response.candidates[0].content.parts[0].text
-
-    parsed_json = json.loads(result)
-    formatted_json = json.dumps(parsed_json, indent=4, ensure_ascii=False)
-    print("response: ", formatted_json)
-    return formatted_json
-
-def extract_experimental_conditions_openai(code):
-    client = OpenAI()
-
-    system_prompt = """
-        You are an advanced machine learning experiment designer.
-        Extract all experimental conditions and results for logging via wandb API.
-        Add your original parameters in your JSON response if you want to log other parameters.
-        Extract all information you can find in the given script as int, bool, or float values.
-        If you cannot describe conditions with int, bool, or float values, use a list of natural language.
-        Give advice to improve the accuracy.
-        If you use natural language, the answers should be very short.
-        Do not include information already provided in param_name_1 for `condition_as_natural_language`.
-    """.replace("    ", "")
-
-    user_prompt = f"""
-    Here is a user's Jupyter Notebook script:{code}
-    """
-
-    response = client.chat.completions.create(
-        model="gpt-4o-mini-2024-07-18",
-        messages=[
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt},
-        ],
-        response_format={"type": "json_object"},
+# Function to configure Google Generative AI only when needed
+def configure_google_genai():
+    # Set up Google Generative AI with API key and model configuration
+    genai.configure(api_key=os.getenv('API_KEY'))
+
+    # Define generation settings for the model
+    generation_config = {
+        "temperature": 0,  # Controls the randomness of the output
+        "top_p": 0.95,     # Nucleus sampling parameter
+        "top_k": 64,       # Limits the pool of candidates to the top-k
+        "max_output_tokens": 8192,  # Maximum number of tokens in the output
+        "response_mime_type": "application/json",  # Expected response format
+    }
+
+    # Initialize and return the GenerativeModel instance
+    return genai.GenerativeModel(
+        model_name="gemini-1.5-flash",
+        generation_config=generation_config,
     )
 
-    parsed_json = json.loads(response.choices[0].message.content)
-    formatted_json = json.dumps(parsed_json, indent=4, ensure_ascii=False)
-    print(formatted_json)
+# System prompt for guiding the AI model to extract experiment details
+system_prompt = """
+    You are an advanced machine learning experiment designer.
+    Extract all experimental conditions and results for logging via wandb API. 
+    Add your original parameters in your JSON response if you want to log other parameters.
+    Extract all information you can find in the given script as int, bool, or float values.
+    If you cannot describe conditions with int, bool, or float values, use a list of natural language.
+    Give advice to improve the accuracy.
+    If you use natural language, the answers should be very short.
+    Do not include information already provided in param_name_1 for `condition_as_natural_language`.
+    Output JSON schema example:
+    This is just an example, make changes as necessary. Use nested dictionaries if necessary.
+    {{
+        "method":"str",
+        "dataset":"str",
+        "task":"str",
+        "accuracy":"",
+        "other_param_here":{
+            "other_param_here":"",
+            "other_param_here":"",
+        },
+        "other_param_here":"",
+        ...
+        "condition_as_natural_language":["Small dataset."],
+        "advice_to_improve_acc":["Use a bigger dataset.","Use a simpler model."]
+    }}
+""".replace("    ", "")
+
+# Function to extract experimental conditions using the specified provider (Google or OpenAI)
+def extract_experimental_conditions(provider, code):
+    # Combine system prompt with user's code input
+    user_input = f"{system_prompt}\n\nHere is a user's Jupyter Notebook script: {code}"
 
-    return response.choices[0].message.content
+    if provider == "gemini":
+        # Configure and use Google Generative AI if specified
+        model = configure_google_genai()
+        chat_session = model.start_chat(
+            history=[{"role": "user", "parts": ["Hello! help me analyze data in JSON format only and return only json object nothing else"]}]
+        )
+        response = chat_session.send_message(user_input)
+        result = response.candidates[0].content.parts[0].text
+
+    elif provider == "openai":
+        # Use OpenAI's API to get the response
+        client = OpenAI()
+        response = client.chat.completions.create(
+            model="gpt-4o-mini-2024-07-18",
+            messages=[
+                {"role": "system", "content": user_input},
+            ],
+            response_format={"type": "json_object"},
+        )
+        result = response.choices[0].message.content
 
+    else:
+        # Raise an error if an invalid provider is specified
+        raise ValueError("Invalid provider specified. Use 'gemini' or 'openai'.")
+
+    # Parse the result from JSON string to Python dictionary
+    result = json.loads(result)
+    # Format the JSON output for better readability
+    return json.dumps(result, indent=4, ensure_ascii=False)
+
+# Function to log the extracted information to Weights & Biases (W&B)
 def log_to_wandb(response_text):
     try:
+        # Parse the JSON response and log it to W&B
         response_dict = json.loads(response_text)
         wandb.log(response_dict)
-    except json.JSONDecodeError as e:
-        print(f"Error parsing JSON: {e}")
-    except Exception as e:
+    except (json.JSONDecodeError, Exception) as e:
+        # Handle errors in JSON parsing or W&B logging
         print(f"Error logging to W&B: {e}")
 
+# Main function to extract and log experimental conditions from a Jupyter Notebook
 def log_llm(notebook_path, project_name=None, is_logging=False, provider=None):
-    if project_name is None:
-        project_name = os.path.basename(notebook_path).replace(".ipynb", "")
-
+    # Use the notebook file name as the project name if not specified
+    project_name = project_name or os.path.basename(notebook_path).replace(".ipynb", "")
     if is_logging:
+        # Initialize a new W&B run if logging is enabled
         init_wandb(project_name)
 
+    # Extract the code from the notebook
     code_string = extract_notebook_code(notebook_path)
-
-    if provider == "gemini":
-        parsed_json = extract_experimental_conditions_gemini(code_string)
-    elif provider == "openai":
-        parsed_json = extract_experimental_conditions_openai(code_string)
-    else:
-        raise ValueError("Invalid provider specified. Use 'gemini' or 'openai'.")
+    # Extract the experimental conditions using the specified AI provider
+    parsed_json = extract_experimental_conditions(provider, code_string)
 
     if is_logging and parsed_json:
+        # Log the extracted information to W&B
         log_to_wandb(parsed_json)
 
-    print("Response from the provider processed and logged to W&B.")
+    # Inform the user that the process is complete
+    print("Response from the provider processed and logged to W&B.")
diff --git a/logllm/plot.py b/logllm/plot.py
@@ -0,0 +1,85 @@
+import json
+import numpy as np
+import matplotlib.pyplot as plt
+
+def plot_metrics(*models_results):
+    # Process each model to ensure it's in dictionary format
+    processed_models = []
+    for model in models_results:
+        # Convert to dictionary if the input is a JSON string
+        if isinstance(model, str):
+            model = json.loads(model)  # Convert string to dictionary
+        processed_models.append(model)
+
+    # Define keys to exclude from the plot
+    exclude_keys = {'cache_size', 'random_state_tts', 'random_state', 'random_state_1', 'n_estimators'}
+
+    # Initialize containers for metrics, values, and model names
+    metrics = []
+    model_names = []
+    model_values = {}
+
+    for model in processed_models:
+        model_name = model.get("model_name", "Test Model")
+        model_names.append(model_name)
+
+        for key, value in model.items():
+            if key.startswith("result_name_"):
+                metric_name = value
+                metric_index = key.split("_")[-1]  # Extract the index (e.g., "1" from "result_name_1")
+                metric_value = model.get(f"result_value_{metric_index}", None)
+
+                if metric_value is not None:
+                    if metric_name not in metrics:
+                        metrics.append(metric_name)
+                    if metric_name not in model_values:
+                        model_values[metric_name] = []
+
+                    # Add the metric value to the list
+                    model_values[metric_name].append(metric_value)
+
+        # Handle additional numeric values in the model (not using the result_name format)
+        for key, value in model.items():
+            # Exclude specific keys and ensure the value is numeric
+            if key not in exclude_keys and isinstance(value, (int, float)):
+                if key not in metrics:
+                    metrics.append(key)
+                if key not in model_values:
+                    model_values[key] = []
+                model_values[key].append(value)
+
+    # Handle cases where no valid metrics were provided
+    if not metrics or not model_names:
+        print("No valid metrics or model names found.")
+        return
+
+    # Ensure all models have values for all metrics, filling in with 0 if not available
+    for metric in metrics:
+        for i in range(len(model_names)):
+            if len(model_values[metric]) <= i:
+                model_values[metric].append(0)  # Default value if missing
+
+    # Plotting side-by-side bar chart
+    x = np.arange(len(metrics))  # Label locations
+    bar_width = 0.15  # Width of the bars
+    fig, ax = plt.subplots(figsize=(10, 6))
+
+    # Create a bar for each model's performance metrics
+    for i, model_name in enumerate(model_names):
+        values = [model_values[metric][i] for metric in metrics]
+        ax.bar(x + i * bar_width, values, width=bar_width, label=model_name)
+
+    # Customization of the plot
+    ax.set_xlabel('Metric', fontsize=14)
+    ax.set_ylabel('Value', fontsize=14)
+    ax.set_title('Comparison of Model Performance Metrics', fontsize=16)
+    ax.set_xticks(x + bar_width * (len(model_names) - 1) / 2)
+    ax.set_xticklabels(metrics, fontsize=12)
+    ax.legend(title='Models')
+    ax.grid(True, axis='y', linestyle='--', alpha=0.7)
+
+    plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability
+    plt.tight_layout()
+    plt.show()
+
+
diff --git a/logllm/query.py b/logllm/query.py
@@ -9,6 +9,13 @@
 
 # Configure Google Generative AI API Key
 genai.configure(api_key=os.getenv('API_KEY'))
+generation_config = {
+    "temperature": 0,
+    "top_p": 0.95,
+    "top_k": 64,
+    "max_output_tokens": 8192,
+    "response_mime_type": "text/plain",
+}
 
 # Function to query OpenAI
 def query_openai(user_input: str):
@@ -32,20 +39,21 @@ def query_openai(user_input: str):
     return response['choices'][0]['message']['content']
 
 # Function to query Google Gemini
-def query_gemini(user_input: str):
-    model = genai.GenerativeModel("gemini-1.5-flash")
+def query_gemini(user_input: str, code):
+    model = genai.GenerativeModel("gemini-1.5-flash", generation_config=generation_config)
+    user_input = f"{code}"
 
     system_prompt = """
-        Convert the following query to a W&B API query:
+        Please provide the data you want me to convert to a W&B API query:
     """.strip()
 
     user_prompt = f"""
     Here is a user's query: {user_input}
-    """.strip()
+    """
 
     chat_session = model.start_chat(
         history=[
-            {"role": "system", "parts": [system_prompt]},
+            {"role": "model", "parts": [system_prompt]},
             {"role": "user", "parts": [user_prompt]},
         ]
     )
@@ -54,22 +62,15 @@ def query_gemini(user_input: str):
     return response.candidates[0].content.parts[0].text
 
 # General query function that calls the appropriate provider
-def query(user_input: str, provider: str):
+
+def query(provider):
     if provider == 'openai':
-        return query_openai(user_input)
+        return query_openai()
     elif provider == 'gemini':
-        return query_gemini(user_input)
+        return query_gemini()
     else:
         raise ValueError("Invalid provider specified. Use 'openai' or 'gemini'.")
+
 
 # Usage Example:
-notebook_path = "demos/svc-sample.ipynb" 
-
-# Extract experimental conditions and results using log_llm
-parsed_json = log_llm(notebook_path, project_name="Machine learning", is_logging=False, provider="gemini")
-
-# Use the parsed_json as user_input for the query
-response = query("what is the best model? :{parsed_json}", provider="gemini")
 
-# Print the response from the query
-print(response)