-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
89 lines (61 loc) · 2.86 KB
/
config.py
File metadata and controls
89 lines (61 loc) · 2.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Databricks notebook source
# MAGIC %pip install --upgrade --quiet databricks-sdk lxml langchain databricks-vectorsearch cloudpickle openai pypdf llama_index langgraph==0.3.4 sqlalchemy openai mlflow mlflow[databricks] langchain_community databricks-agents databricks-langchain uv torch databricks-connect==16.1.* markdownify ftfy
# COMMAND ----------
dbutils.library.restartPython()
# COMMAND ----------
# DBTITLE 1,Edit this cell with your resource names
catalog = "genai_in_production_demo_catalog"
agent_schema = "agents"
demo_schema = "demo_data"
volumeName = "rag_volume"
folderName = "sample_pdf_folder"
vectorSearchIndexName = "pdf_content_embeddings_index"
# vectorSearchIndexName = "databricks_documentation_index"
chunk_size = 1000
chunk_overlap = 50
embeddings_endpoint = "databricks-gte-large-en"
VECTOR_SEARCH_ENDPOINT_NAME = "one-env-shared-endpoint-4"
chatBotModel = "databricks-meta-llama-3-3-70b-instruct"
max_tokens = 2000
finalchatBotModelName = "rag_bot"
yourEmailAddress = "austin.choi@databricks.com"
# COMMAND ----------
dbutils.widgets.text("catalog_name", catalog)
dbutils.widgets.text("agent_schema", agent_schema)
# COMMAND ----------
catalog_name = dbutils.widgets.get("catalog_name")
# COMMAND ----------
spark.sql(f"CREATE CATALOG IF NOT EXISTS {catalog}")
# COMMAND ----------
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{demo_schema}")
# COMMAND ----------
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{agent_schema}")
# COMMAND ----------
import pandas as pd
df = pd.read_csv('./data/customers.csv')
spark_df = spark.createDataFrame(df)
spark_df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{demo_schema}.customers")
# COMMAND ----------
df = pd.read_csv('./data/franchises.csv')
spark_df = spark.createDataFrame(df)
spark_df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{demo_schema}.franchises")
# COMMAND ----------
df = pd.read_csv('./data/reviews.csv')
spark_df = spark.createDataFrame(df)
spark_df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{demo_schema}.reviews")
# COMMAND ----------
df = pd.read_csv('./data/synthetic_car_data.csv')
spark_df = spark.createDataFrame(df)
spark_df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{demo_schema}.synthetic_car_data")
# COMMAND ----------
df = pd.read_csv('./data/transactions.csv')
spark_df = spark.createDataFrame(df)
spark_df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{demo_schema}.transactions")
# COMMAND ----------
df = pd.read_csv('./data/fs_travel.csv')
spark_df = spark.createDataFrame(df)
spark_df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{demo_schema}.fs_travel")
# COMMAND ----------
df = pd.read_csv('./data/destinations.csv')
spark_df = spark.createDataFrame(df)
spark_df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{demo_schema}.destinations")