Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 38 additions & 64 deletions demos/svc-sample.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion extractor.py

This file was deleted.

186 changes: 95 additions & 91 deletions logllm/log_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,119 +4,123 @@
import json
import os
from dotenv import load_dotenv
import markdown as md
from openai import OpenAI

# Load environment variables
load_dotenv()

# Configure Google Generative AI
genai.configure(api_key=os.getenv('API_KEY'))

generation_config = {
"temperature": 0,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 8192,
"response_mime_type": "application/json",
}

model = genai.GenerativeModel(
model_name="gemini-1.5-flash",
generation_config=generation_config,
)

def init_wandb(project_name):
wandb.init(project=project_name, settings=wandb.Settings(_disable_stats=True))

def extract_experimental_conditions_gemini(code):
user_input = f"""
You are an advanced machine learning experiment designer.
Extract all experimental conditions and results for logging via wandb API.
Add your original parameters in your JSON response if you want to log other parameters.
Extract all information you can find in the given script as int, bool, or float values.
If you cannot describe conditions with int, bool, or float values, use a list of natural language.
Give advice to improve the accuracy.
If you use natural language, the answers should be short.
Do not include information already provided in param_name_1 for `condition_as_natural_language`.

Here is a user's Jupyter Notebook script: {code}
""".replace(" ","")

chat_session = model.start_chat(
history=[
{"role": "user", "parts": ["Hello! help me analyse data in json format only"]},
{"role": "model", "parts": ["Sure I can do that, provide me with data"]},
]
)
# Load environment variables from a .env file
load_dotenv()

response = chat_session.send_message(user_input)
result = response.candidates[0].content.parts[0].text

parsed_json = json.loads(result)
formatted_json = json.dumps(parsed_json, indent=4, ensure_ascii=False)
print("response: ", formatted_json)
return formatted_json

def extract_experimental_conditions_openai(code):
client = OpenAI()

system_prompt = """
You are an advanced machine learning experiment designer.
Extract all experimental conditions and results for logging via wandb API.
Add your original parameters in your JSON response if you want to log other parameters.
Extract all information you can find in the given script as int, bool, or float values.
If you cannot describe conditions with int, bool, or float values, use a list of natural language.
Give advice to improve the accuracy.
If you use natural language, the answers should be very short.
Do not include information already provided in param_name_1 for `condition_as_natural_language`.
""".replace(" ", "")

user_prompt = f"""
Here is a user's Jupyter Notebook script:{code}
"""

response = client.chat.completions.create(
model="gpt-4o-mini-2024-07-18",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
response_format={"type": "json_object"},
# Function to configure Google Generative AI only when needed
def configure_google_genai():
# Set up Google Generative AI with API key and model configuration
genai.configure(api_key=os.getenv('API_KEY'))

# Define generation settings for the model
generation_config = {
"temperature": 0, # Controls the randomness of the output
"top_p": 0.95, # Nucleus sampling parameter
"top_k": 64, # Limits the pool of candidates to the top-k
"max_output_tokens": 8192, # Maximum number of tokens in the output
"response_mime_type": "application/json", # Expected response format
}

# Initialize and return the GenerativeModel instance
return genai.GenerativeModel(
model_name="gemini-1.5-flash",
generation_config=generation_config,
)

parsed_json = json.loads(response.choices[0].message.content)
formatted_json = json.dumps(parsed_json, indent=4, ensure_ascii=False)
print(formatted_json)
# System prompt for guiding the AI model to extract experiment details
system_prompt = """
You are an advanced machine learning experiment designer.
Extract all experimental conditions and results for logging via wandb API.
Add your original parameters in your JSON response if you want to log other parameters.
Extract all information you can find in the given script as int, bool, or float values.
If you cannot describe conditions with int, bool, or float values, use a list of natural language.
Give advice to improve the accuracy.
If you use natural language, the answers should be very short.
Do not include information already provided in param_name_1 for `condition_as_natural_language`.
Output JSON schema example:
This is just an example, make changes as necessary. Use nested dictionaries if necessary.
{{
"method":"str",
"dataset":"str",
"task":"str",
"accuracy":"",
"other_param_here":{
"other_param_here":"",
"other_param_here":"",
},
"other_param_here":"",
...
"condition_as_natural_language":["Small dataset."],
"advice_to_improve_acc":["Use a bigger dataset.","Use a simpler model."]
}}
""".replace(" ", "")

# Function to extract experimental conditions using the specified provider (Google or OpenAI)
def extract_experimental_conditions(provider, code):
# Combine system prompt with user's code input
user_input = f"{system_prompt}\n\nHere is a user's Jupyter Notebook script: {code}"

return response.choices[0].message.content
if provider == "gemini":
# Configure and use Google Generative AI if specified
model = configure_google_genai()
chat_session = model.start_chat(
history=[{"role": "user", "parts": ["Hello! help me analyze data in JSON format only and return only json object nothing else"]}]
)
response = chat_session.send_message(user_input)
result = response.candidates[0].content.parts[0].text

elif provider == "openai":
# Use OpenAI's API to get the response
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o-mini-2024-07-18",
messages=[
{"role": "system", "content": user_input},
],
response_format={"type": "json_object"},
)
result = response.choices[0].message.content

else:
# Raise an error if an invalid provider is specified
raise ValueError("Invalid provider specified. Use 'gemini' or 'openai'.")

# Parse the result from JSON string to Python dictionary
result = json.loads(result)
# Format the JSON output for better readability
return json.dumps(result, indent=4, ensure_ascii=False)

# Function to log the extracted information to Weights & Biases (W&B)
def log_to_wandb(response_text):
try:
# Parse the JSON response and log it to W&B
response_dict = json.loads(response_text)
wandb.log(response_dict)
except json.JSONDecodeError as e:
print(f"Error parsing JSON: {e}")
except Exception as e:
except (json.JSONDecodeError, Exception) as e:
# Handle errors in JSON parsing or W&B logging
print(f"Error logging to W&B: {e}")

# Main function to extract and log experimental conditions from a Jupyter Notebook
def log_llm(notebook_path, project_name=None, is_logging=False, provider=None):
if project_name is None:
project_name = os.path.basename(notebook_path).replace(".ipynb", "")

# Use the notebook file name as the project name if not specified
project_name = project_name or os.path.basename(notebook_path).replace(".ipynb", "")
if is_logging:
# Initialize a new W&B run if logging is enabled
init_wandb(project_name)

# Extract the code from the notebook
code_string = extract_notebook_code(notebook_path)

if provider == "gemini":
parsed_json = extract_experimental_conditions_gemini(code_string)
elif provider == "openai":
parsed_json = extract_experimental_conditions_openai(code_string)
else:
raise ValueError("Invalid provider specified. Use 'gemini' or 'openai'.")
# Extract the experimental conditions using the specified AI provider
parsed_json = extract_experimental_conditions(provider, code_string)

if is_logging and parsed_json:
# Log the extracted information to W&B
log_to_wandb(parsed_json)

print("Response from the provider processed and logged to W&B.")
# Inform the user that the process is complete
print("Response from the provider processed and logged to W&B.")
85 changes: 85 additions & 0 deletions logllm/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import json
import numpy as np
import matplotlib.pyplot as plt

def plot_metrics(*models_results):
# Process each model to ensure it's in dictionary format
processed_models = []
for model in models_results:
# Convert to dictionary if the input is a JSON string
if isinstance(model, str):
model = json.loads(model) # Convert string to dictionary
processed_models.append(model)

# Define keys to exclude from the plot
exclude_keys = {'cache_size', 'random_state_tts', 'random_state', 'random_state_1', 'n_estimators'}

# Initialize containers for metrics, values, and model names
metrics = []
model_names = []
model_values = {}

for model in processed_models:
model_name = model.get("model_name", "Test Model")
model_names.append(model_name)

for key, value in model.items():
if key.startswith("result_name_"):
metric_name = value
metric_index = key.split("_")[-1] # Extract the index (e.g., "1" from "result_name_1")
metric_value = model.get(f"result_value_{metric_index}", None)

if metric_value is not None:
if metric_name not in metrics:
metrics.append(metric_name)
if metric_name not in model_values:
model_values[metric_name] = []

# Add the metric value to the list
model_values[metric_name].append(metric_value)

# Handle additional numeric values in the model (not using the result_name format)
for key, value in model.items():
# Exclude specific keys and ensure the value is numeric
if key not in exclude_keys and isinstance(value, (int, float)):
if key not in metrics:
metrics.append(key)
if key not in model_values:
model_values[key] = []
model_values[key].append(value)

# Handle cases where no valid metrics were provided
if not metrics or not model_names:
print("No valid metrics or model names found.")
return

# Ensure all models have values for all metrics, filling in with 0 if not available
for metric in metrics:
for i in range(len(model_names)):
if len(model_values[metric]) <= i:
model_values[metric].append(0) # Default value if missing

# Plotting side-by-side bar chart
x = np.arange(len(metrics)) # Label locations
bar_width = 0.15 # Width of the bars
fig, ax = plt.subplots(figsize=(10, 6))

# Create a bar for each model's performance metrics
for i, model_name in enumerate(model_names):
values = [model_values[metric][i] for metric in metrics]
ax.bar(x + i * bar_width, values, width=bar_width, label=model_name)

# Customization of the plot
ax.set_xlabel('Metric', fontsize=14)
ax.set_ylabel('Value', fontsize=14)
ax.set_title('Comparison of Model Performance Metrics', fontsize=16)
ax.set_xticks(x + bar_width * (len(model_names) - 1) / 2)
ax.set_xticklabels(metrics, fontsize=12)
ax.legend(title='Models')
ax.grid(True, axis='y', linestyle='--', alpha=0.7)

plt.xticks(rotation=45, ha='right') # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()


35 changes: 18 additions & 17 deletions logllm/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@

# Configure Google Generative AI API Key
genai.configure(api_key=os.getenv('API_KEY'))
generation_config = {
"temperature": 0,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}

# Function to query OpenAI
def query_openai(user_input: str):
Expand All @@ -32,20 +39,21 @@ def query_openai(user_input: str):
return response['choices'][0]['message']['content']

# Function to query Google Gemini
def query_gemini(user_input: str):
model = genai.GenerativeModel("gemini-1.5-flash")
def query_gemini(user_input: str, code):
model = genai.GenerativeModel("gemini-1.5-flash", generation_config=generation_config)
user_input = f"{code}"

system_prompt = """
Convert the following query to a W&B API query:
Please provide the data you want me to convert to a W&B API query:
""".strip()

user_prompt = f"""
Here is a user's query: {user_input}
""".strip()
"""

chat_session = model.start_chat(
history=[
{"role": "system", "parts": [system_prompt]},
{"role": "model", "parts": [system_prompt]},
{"role": "user", "parts": [user_prompt]},
]
)
Expand All @@ -54,22 +62,15 @@ def query_gemini(user_input: str):
return response.candidates[0].content.parts[0].text

# General query function that calls the appropriate provider
def query(user_input: str, provider: str):

def query(provider):
if provider == 'openai':
return query_openai(user_input)
return query_openai()
elif provider == 'gemini':
return query_gemini(user_input)
return query_gemini()
else:
raise ValueError("Invalid provider specified. Use 'openai' or 'gemini'.")


# Usage Example:
notebook_path = "demos/svc-sample.ipynb"

# Extract experimental conditions and results using log_llm
parsed_json = log_llm(notebook_path, project_name="Machine learning", is_logging=False, provider="gemini")

# Use the parsed_json as user_input for the query
response = query("what is the best model? :{parsed_json}", provider="gemini")

# Print the response from the query
print(response)
Loading