From f788079320c930805a887024aadfc08463674f81 Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 9 Aug 2023 13:56:56 -0400 Subject: [PATCH 01/10] Update script --- examples/slackbot/slackbot.py | 102 ++++++++++++++++------------------ 1 file changed, 47 insertions(+), 55 deletions(-) diff --git a/examples/slackbot/slackbot.py b/examples/slackbot/slackbot.py index 094b6d68d..bfae6eaf4 100644 --- a/examples/slackbot/slackbot.py +++ b/examples/slackbot/slackbot.py @@ -1,63 +1,36 @@ -import json, math, openai, os, pyarrow -from slack_sdk.web import WebClient -from slack_sdk.socket_mode import SocketModeClient +import json, math, datetime, openai, os, pyarrow, pandas, asyncio +#from slack_sdk.web import WebClient +from slack_sdk.web.async_client import AsyncWebClient +#from slack_sdk.socket_mode import SocketModeClient +from slack_sdk.socket_mode.aiohttp import SocketModeClient from slack_sdk.socket_mode.response import SocketModeResponse -import sparrow_pi as kt +import sparrow_py as kt -def build_conversation(messages): - message_time = messages.col("ts") - last_message_time = message_time.lag(1) # !!! - is_new_conversation = message_time.seconds_since(last_message_time) > 10 * 60 - - return messages \ - .select("user", "ts", "text", "reactions") \ - .collect(window=kt.windows.Since(is_new_conversation), max=100) - -def build_examples(messages): - duration = kt.minutes(5) # !!! - - coverstation = build_conversation(messages) - shifted_coversation = coverstation.shift_by(duration) # !!! - - reaction_users = coverstation.col("reactions").col("name").collect(kt.windows.Trailing(duration)).flatten() # !!! - participating_users = coverstation.col("user").collect(kt.windows.Trailing(duration)) # !!! - engaged_users = kt.union(reaction_users, participating_users) # !!! - - return kt.record({ "prompt": shifted_coversation, "completion": engaged_users}) \ - .filter(shifted_coversation.is_not_null()) - -def format_prompt(prompt): - return "start -> " + "\n\n".join([f' {msg["user"]} --> {msg["text"]} ' for msg in prompt]) + "\n\n###\n\n" - -def main(): +async def main(): + # Load user label map output_map = {} - with open('./user_output_map.json', 'r') as file: output_map = json.load(file) - print(f'Loaded output map: {output_map}') - - # Initialize Kaskada with a local execution context. + # Initialize clients kt.init_session() - - # Initialize OpenAI openai.api_key = os.environ.get("OPEN_AI_KEY") - - # Initialize Slack slack = SocketModeClient( app_token=os.environ.get("SLACK_APP_TOKEN"), - web_client=WebClient(token=os.environ.get("SLACK_BOT_TOKEN")) + web_client=AsyncWebClient(token=os.environ.get("SLACK_BOT_TOKEN")) ) - min_prob_for_response = 0.50 + # Backfill state with historical data + messages = kt.sources.ArrowSource( + data = pandas.read_parquet("./messages.parquet")[:1], + time_column_name = "ts", + key_column_name = "channel", + ) # Receive Slack messages in real-time - live_messages = kt.sources.read_stream(entity_column="channel", time_column="ts") - - # Receive messages from Slack - def handle_message(client, req): + async def handle_message(client, req): # Acknowledge the message back to Slack - client.send_socket_mode_response(SocketModeResponse(envelope_id=req.envelope_id)) + await client.send_socket_mode_response(SocketModeResponse(envelope_id=req.envelope_id)) if req.type == "events_api" and "event" in req.payload: e = req.payload["event"] @@ -68,26 +41,43 @@ def handle_message(client, req): if "previous_message" in e or e["type"] == "reaction_added": return + data = pyarrow.json.read_json(e) + print(f'Sending message event to kaskada: {e}') # Deliver the message to Kaskada - live_messages.add_event(pyarrow.json.read_json(e)) + messages.add_data(data) + print("Done sending message") slack.socket_mode_request_listeners.append(handle_message) - slack.connect() + await slack.connect() + + # Handle messages + message_time = messages.time_of() + #last_message_time = message_time.lag(1) # !!! + #is_new_conversation = True #message_time.seconds_since(last_message_time) > 10 * 60 + + conversations = messages \ + .select("user", "ts", "text") \ + .collect(max=100) #.collect(window=kt.SinceWindow(predicate=is_new_conversation), max=100) - # Handle messages in realtime # A "conversation" is a list of messages - for conversation in build_conversation(live_messages).start().to_generator(): - if len(conversation) == 0: - continue + start = now = datetime.datetime.now() + print("Listening for new messages...") + async for conversation in conversations.run(materialize=True).iter_rows_async(): + #if len(conversation) == 0 or conversation["_time"] < start: + # continue - print(f'Starting completion on conversation with first message text: {conversation[0]["text"]}') + print(f'Conversation: {conversation}') + print(f'Starting completion on conversation with first message text: {conversation["result"][0]["text"]}') - prompt = format_prompt(conversation) + prompt = "start -> " + "\n\n".join([f' {msg["user"]} --> {msg["text"]} ' for msg in conversation["result"]]) + "\n\n###\n\n" print(f'Using prompt: {prompt}') + # Credentials don't work yet... + continue + # Ask the model who should be notified res = openai.Completion.create( model="davinci:ft-personal:coversation-users-full-kaskada-2023-08-05-14-25-30", @@ -105,7 +95,7 @@ def handle_message(client, req): print(f'Found logprobs: {logprobs}') for user in logprobs: - if math.exp(logprobs[user]) > min_prob_for_response: + if math.exp(logprobs[user]) > 0.50: user = users.strip() # if users include `nil`, stop processing if user == "nil": @@ -153,5 +143,7 @@ def handle_message(client, req): print(f'Posted alert message') + print("Done") + if __name__ == "__main__": - main() \ No newline at end of file + asyncio.run(main()) \ No newline at end of file From 4371412f83c404ff1d51effa485aa1eeabbbc7ce Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 9 Aug 2023 14:49:55 -0400 Subject: [PATCH 02/10] Update script --- examples/slackbot/slackbot.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/slackbot/slackbot.py b/examples/slackbot/slackbot.py index bfae6eaf4..38de273ea 100644 --- a/examples/slackbot/slackbot.py +++ b/examples/slackbot/slackbot.py @@ -21,8 +21,10 @@ async def main(): ) # Backfill state with historical data + historical_data = pandas.read_parquet("./messages.parquet")[:1] + schema = pyarrow.Schema.from_pandas(historical_data) messages = kt.sources.ArrowSource( - data = pandas.read_parquet("./messages.parquet")[:1], + data = historical_data, time_column_name = "ts", key_column_name = "channel", ) @@ -41,13 +43,17 @@ async def handle_message(client, req): if "previous_message" in e or e["type"] == "reaction_added": return - data = pyarrow.json.read_json(e) - - print(f'Sending message event to kaskada: {e}') - - # Deliver the message to Kaskada - messages.add_data(data) - print("Done sending message") + try: + e["ts"] = datetime.datetime.fromtimestamp(float(e["ts"])) + del e["team"] + data = pyarrow.RecordBatch.from_pylist([e], schema=schema) + + print(f'Sending message event to kaskada: {e}') + + # Deliver the message to Kaskada + messages.add_data(data) + print("Done sending message") + except Exception as e: print(e) slack.socket_mode_request_listeners.append(handle_message) await slack.connect() From 1c00798e5fc5a83e2e8810c41a2f00f7871c0d48 Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 9 Aug 2023 11:22:16 -0400 Subject: [PATCH 03/10] Update notebook to reflect what's possible --- examples/slackbot/Notebook.ipynb | 346 +++++++++++++++++------------ examples/slackbot/README.md | 10 - examples/slackbot/messages.parquet | Bin 6238 -> 6814 bytes 3 files changed, 204 insertions(+), 152 deletions(-) diff --git a/examples/slackbot/Notebook.ipynb b/examples/slackbot/Notebook.ipynb index 0708159eb..b143cb370 100644 --- a/examples/slackbot/Notebook.ipynb +++ b/examples/slackbot/Notebook.ipynb @@ -33,83 +33,207 @@ "outputs": [], "source": [ "from datetime import datetime, timedelta\n", - "from slack_sdk.socket_mode import SocketModeClient, SocketModeResponse\n", - "import sparrow_pi as kt\n", + "from slack_sdk.socket_mode import SocketModeClient\n", + "from slack_sdk.socket_mode.response import SocketModeResponse\n", + "import sparrow_py as kt\n", + "import pandas\n", "import openai\n", "import getpass\n", "import pyarrow\n", + "import datetime\n", "\n", "# Initialize Kaskada with a local execution context.\n", "kt.init_session()\n", "\n", "# Initialize OpenAI\n", - "openai.api_key = getpass.getpass('OpenAI: API Key')\n", - "\n", - "# Initialize Slack\n", - "slack = SocketModeClient(\n", - " app_token=getpass.getpass('Slack: App Token'),\n", - " web_client=getpass.getpass('Slack: Bot Token'),\n", - ")" + "#openai.api_key = getpass.getpass('OpenAI: API Key')" ] }, { "cell_type": "markdown", - "id": "9b8a144d-8d79-4943-b99b-d3470ee96283", + "id": "0035f558-23bd-4b4d-95a0-ed5e8fece673", "metadata": {}, "source": [ - "## Prompt Engineering" + "## Fine-tune the model" + ] + }, + { + "cell_type": "markdown", + "id": "6c3c5682-bfe0-44ca-9a5a-52a0da74e5de", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "### Read Historical Messages\n", + "\n", + "***Issue***: `with_key` fails:\n", + "\n", + "```\n", + "$ messages.with_key(kt.record({\n", + " \"channel\": messages.col(\"channel\"),\n", + " \"thread\": messages.col(\"thread_ts\"),\n", + " }))\n", + "\n", + "RuntimeError: error in sparrow-py Rust code\n", + "├╴at src/error.rs:54:21\n", + "│\n", + "├─▶ execute query\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-session/src/session.rs:319:14\n", + "│\n", + "├─▶ internal compute error: spawn compute executor\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute.rs:268:6\n", + "│\n", + "├─▶ internal compute error: unable to create operation\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation.rs:435:6\n", + "│\n", + "├─▶ internal compute error: spead failure\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation/merge.rs:171:106\n", + "│\n", + "╰─▶ Unsupported type Struct([Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"name\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"users\", data_type: List(Field { name: \"item\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) for list spread\n", + " ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation/merge.rs:171:92\n", + "```" ] }, { "cell_type": "code", "execution_count": null, - "id": "7e6fedb9", + "id": "9d224bec-e5a1-4c67-8764-e3dcdbc5e0ac", "metadata": {}, "outputs": [], "source": [ - "def build_conversation(messages):\n", - " message_time = messages.col(\"ts\")\n", - " last_message_time = message_time.lag(1) # !!!\n", - " is_new_conversation = message_time.seconds_since(last_message_time) > 10 * 60\n", - "\n", - " return messages \\\n", - " .select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", - " .collect(window=kt.windows.Since(is_new_conversation), max=100)" + "messages = kt.sources.ArrowSource(\n", + " data = pandas.read_parquet(\"./messages.parquet\"), \n", + " time_column_name = \"ts\", \n", + " key_column_name = \"channel\",\n", + ")\n", + "#messages = messages.with_key(kt.record({ # !!!\n", + "# \"channel\": messages.col(\"channel\"),\n", + "# \"thread\": messages.col(\"thread_ts\"),\n", + "# }))\n", + "messages.preview(5)" ] }, { "cell_type": "markdown", - "id": "9247233a", + "id": "22dd5729-ee7b-4de4-88dc-1642424833f0", "metadata": {}, - "source": [] + "source": [ + "### Construct conversations\n", + "\n", + "***Issue:*** Missing `seconds_since` \n", + "\n", + "***Issue:*** `collect` failes when given a `kt.SinceWindow` with\n", + "\n", + "```\n", + "$ messages.select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", + " .collect(window=kt.SinceWindow(predicate=is_new_conversation), max=100)\n", + "\n", + "ValueError: error[E0010]: Invalid argument type(s)\n", + " --> Builder:1:1\n", + " |\n", + "1 | builder\n", + " | ^^^^^^^\n", + " | |\n", + " | Invalid types for parameter 'window' in call to 'collect'\n", + " | Actual type: i64\n", + " |\n", + " --> built-in signature 'collect(input: T, const max: i64, const min: i64 = 0, window: bool = null, duration: i64 = null) -> list':1:71\n", + " |\n", + "1 | collect(input: T, const max: i64, const min: i64 = 0, window: bool = null, duration: i64 = null) -> list\n", + " | ---- Expected type: bool\n", + "\n", + "\n", + "Arg[0]: Timestream[struct>>>]\n", + "Arg[1]: Literal 100 ()\n", + "Arg[2]: Literal 0 ()\n", + "Arg[3]: Literal True ()\n", + "Arg[4]: Literal None ()\n", + "```" + ] }, { "cell_type": "code", "execution_count": null, - "id": "fdb2d959-d371-4026-9f8d-4ab26cfbf317", + "id": "088e52bd-8f30-4d6e-abbc-4896b88c0837", "metadata": {}, "outputs": [], "source": [ - "def build_examples(messages):\n", - " duration = datetime.timedelta(minutes=5)\n", - "\n", - " coverstation = build_conversation(messages)\n", - " shifted_coversation = coverstation.shift_by(duration) # !!!\n", + "# Construct conversations\n", + "message_time = messages.time_of()\n", + "last_message_time = message_time.lag(1) # !!!\n", + "is_new_conversation = True #message_time.seconds_since(last_message_time) > 10 * 60\n", "\n", - " reaction_users = coverstation.col(\"reactions\").col(\"name\").collect(kt.windows.Trailing(duration)).flatten() # !!!\n", - " participating_users = coverstation.col(\"user\").collect(kt.windows.Trailing(duration)) # !!!\n", - " engaged_users = kt.union(reaction_users, participating_users) # !!!\n", + "conversations = messages \\\n", + " .select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", + " .collect(max=100) #.collect(window=kt.SinceWindow(predicate=is_new_conversation), max=100)\n", "\n", - " return kt.record({ \"prompt\": shifted_coversation, \"completion\": engaged_users}) \\\n", - " .filter(shifted_coversation.is_not_null())" + "conversations.preview(5)" ] }, { "cell_type": "markdown", - "id": "0035f558-23bd-4b4d-95a0-ed5e8fece673", + "id": "5076d2bf-6830-460b-a9cb-948d8f106edc", "metadata": {}, "source": [ - "## Fine-tune the model" + "### Build examples\n", + "\n", + "***Issue***: Unable to field-ref into a list of records\n", + "\n", + "```\n", + "$ conversations.col(\"reactions\").col(\"name\").preview(5)\n", + "\n", + "RuntimeError: error in sparrow-py Rust code\n", + "├╴at src/error.rs:54:21\n", + "│\n", + "├─▶ execute query\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-session/src/session.rs:319:14\n", + "│\n", + "├─▶ internal compute error: spawn compute executor\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute.rs:268:6\n", + "│\n", + "├─▶ internal compute error: unable to create executor\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation.rs:193:18\n", + "│\n", + "╰─▶ Field-ref only works on lists of records, but was List(Field { name: \"item\", data_type: List(Field { name: \"item\", data_type: Struct([Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"name\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"users\", data_type: List(Field { name: \"item\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })\n", + " ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation.rs:192:18\n", + "```\n", + "\n", + "***Issue***: Missing `union` function\n", + "\n", + "***Issue***: Unable to collect a field from a record\n", + "\n", + "```\n", + "$ conversations.col(\"user\").collect(max=10).preview(5)\n", + "\n", + "PanicException: not implemented: list collect evaluator is unsupported\n", + "```\n", + "\n", + "***Issue***: Unable to combine shifted and unshifted values\n", + "\n", + "```\n", + "$ kt.record({\n", + " \"prompt\": conversations.shift_by(datetime.timedelta(minutes=5)),\n", + " \"completion\": conversations,\n", + "}).preview(5)\n", + "\n", + "RuntimeError: error in sparrow-py Rust code\n", + "├╴at src/error.rs:54:21\n", + "│\n", + "├─▶ execute query\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-session/src/session.rs:319:14\n", + "│\n", + "├─▶ internal compute error: spawn compute executor\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute.rs:268:6\n", + "│\n", + "├─▶ internal compute error: unable to create operation\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation.rs:435:6\n", + "│\n", + "├─▶ internal compute error: spead failure\n", + "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation/merge.rs:171:106\n", + "│\n", + "╰─▶ Unsupported type Struct([Field { name: \"ts\", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"user\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"text\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"reactions\", data_type: List(Field { name: \"item\", data_type: Struct([Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"name\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"users\", data_type: List(Field { name: \"item\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) for list spread\n", + " ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation/merge.rs:171:92\n", + "```" ] }, { @@ -119,17 +243,34 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas\n", - "import sparrow_pi.sources as sources\n", + "duration = datetime.timedelta(minutes=5)\n", "\n", - "messages = kt.sources.Parquet(\"./messages.parquet\", time = \"ts\", entity = \"channel\")\n", - "messages = messages.with_key(kt.record({ # !!!\n", - " \"channel\": messages.col(\"channel\"),\n", - " \"thread\": messages.col(\"thread_ts\"),\n", - " }))\n", - "examples = build_examples(messages)\n", + "shifted_coversation = conversations.shift_by(duration) # !!!\n", + "\n", + "#reaction_users = conversations.col(\"reactions\").col(\"name\") #.collect(kt.windows.Trailing(duration)).flatten() # !!!\n", + "#participating_users = conversations.col(\"user\").collect(kt.windows.Trailing(duration)) # !!!\n", + "engaged_users = conversations #kt.union(reaction_users, participating_users) # !!!\n", "\n", - "examples_df = examples.run().to_pandas()" + "examples = kt.record({ \"prompt\": shifted_coversation, \"completion\": engaged_users}) \\\n", + " .filter(shifted_coversation.is_not_null())\n", + "\n", + "examples.preview(5)" + ] + }, + { + "cell_type": "markdown", + "id": "fed3b924-e7de-414b-bba5-b119e40921f0", + "metadata": {}, + "source": [ + "## Fine-tune a model" + ] + }, + { + "cell_type": "markdown", + "id": "e78fa9bd-9c40-403d-a7ee-a15620a88418", + "metadata": {}, + "source": [ + "### Create training dataset" ] }, { @@ -141,6 +282,8 @@ "source": [ "from sklearn import preprocessing\n", "\n", + "examples_df = examples.run().to_pandas()\n", + "\n", "le = preprocessing.LabelEncoder()\n", "le.fit(examples_df.completion.explode())\n", "\n", @@ -157,6 +300,14 @@ "examples_df.to_json(\"examples.jsonl\", orient='records', lines=True)" ] }, + { + "cell_type": "markdown", + "id": "cfc60311-5ca1-49f3-8e35-4070174e0258", + "metadata": {}, + "source": [ + "### Upload to OpenAI" + ] + }, { "cell_type": "code", "execution_count": null, @@ -175,6 +326,14 @@ "training_id = cli.FineTune._get_or_upload('./examples_prepared_train.jsonl', True)" ] }, + { + "cell_type": "markdown", + "id": "f0808e6e-8239-4487-b22b-14b5a00948c6", + "metadata": {}, + "source": [ + "### Create the training job" + ] + }, { "cell_type": "code", "execution_count": null, @@ -193,103 +352,6 @@ ")\n", "print(f'Fine-tuning model with job ID: \"{resp[\"id\"]}\"')" ] - }, - { - "cell_type": "markdown", - "id": "b3e29109-cc00-4bf5-ba23-069e8db1f179", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "## Notify users of conversations they need to know about" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "540afff7-4ebc-427f-8205-1ed145e0c59a", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import json, math\n", - "\n", - "min_prob_for_response = 0.50\n", - "\n", - "# Receive Slack messages in real-time\n", - "live_messages = kt.sources.ArrowSource(entity_column=\"channel\", time_column=\"ts\")\n", - "\n", - "# Receive messages from Slack\n", - "def handle_message(client, req):\n", - " # Acknowledge the message back to Slack\n", - " client.send_socket_mode_response(SocketModeResponse(envelope_id=req.envelope_id))\n", - " \n", - " if req.type == \"events_api\" and \"event\" in req.payload:\n", - " e = req.payload[\"event\"]\n", - " if \"previous_message\" in e or e[\"type\"] == \"reaction_added\":\n", - " return\n", - " # send message events to Kaskada\n", - " live_messages.add_event(pyarrow.json.read_json(e))\n", - "\n", - "slack.socket_mode_request_listeners.append(handle_message)\n", - "slack.connect()\n", - "\n", - "# Handle messages in realtime\n", - "# A \"conversation\" is a list of messages\n", - "for conversation in build_conversation(live_messages).start().to_generator():\n", - " if len(conversation) == 0:\n", - " continue\n", - " \n", - " # Ask the model who should be notified\n", - " res = openai.Completion.create(\n", - " model=\"davinci:ft-personal:coversation-users-full-kaskada-2023-08-05-14-25-30\", \n", - " prompt=format_prompt(conversation),\n", - " logprobs=5,\n", - " max_tokens=1,\n", - " stop=\" end\",\n", - " temperature=0,\n", - " )\n", - "\n", - " users = []\n", - " logprobs = res[\"choices\"][0][\"logprobs\"][\"top_logprobs\"][0]\n", - " for user in logprobs:\n", - " if math.exp(logprobs[user]) > min_prob_for_response:\n", - " user = users.strip()\n", - " # if users include `nil`, stop processing\n", - " if user == \"nil\":\n", - " users = []\n", - " break\n", - " users.append(user)\n", - "\n", - " # alert on most recent message in conversation\n", - " msg = conversation.pop()\n", - " \n", - " # Send notification to users\n", - " for user in users:\n", - " user_id = le.inverse_transform(user)\n", - "\n", - " # get user channel for slackbot\n", - " app = slack.web_client.users_conversations(\n", - " types=\"im\",\n", - " user=user_id,\n", - " )\n", - " \n", - " # confirm user has slackbot installed\n", - " if len(app[\"channels\"]) == 0:\n", - " continue\n", - "\n", - " link = slack.web_client.chat_getPermalink(\n", - " channel=msg[\"channel\"],\n", - " message_ts=msg[\"ts\"],\n", - " )[\"permalink\"]\n", - " \n", - " slack.web_client.chat_postMessage(\n", - " channel=app[\"channels\"][0][\"id\"],\n", - " text=f'You may be interested in this converstation: <{link}|{msg[\"text\"]}>'\n", - " )" - ] } ], "metadata": { @@ -308,7 +370,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/examples/slackbot/README.md b/examples/slackbot/README.md index ee26b707b..08c68b778 100644 --- a/examples/slackbot/README.md +++ b/examples/slackbot/README.md @@ -37,15 +37,5 @@ copy( union_by_name=true) ) ) to 'messages.parquet' (FORMAT PARQUET) -; - - select - from ( - select * from read_json_auto('data/iloveai-initial-export/*/*.json', - format='array', - filename=true, - union_by_name=true) - ) - limit 10 ; ``` \ No newline at end of file diff --git a/examples/slackbot/messages.parquet b/examples/slackbot/messages.parquet index a3ad8a5ac3a2326a0515ccfd52465b1a16306007..170705f99f88049c7de482fa3fe31d6ed79fd5e1 100644 GIT binary patch delta 667 zcmca-Fwb^O22nOqHUn=(mo7NsU8mt^MW6^k+fU2V!D!IG0%Tq4TE z0F-1;%}LEo%_{*)i1Mg1NwDN4=BBDLfTUPUi&Kj*^ng{tbO9BLvH*3lCg+#tm8hyf z%-}4^09u(6Us9|(`5X6AM)}Qic;<7|i!w-hAlw3W1I!$-t)fg~24ZPqS!%bGY$RAf zAt=G5ZD;{!0e!8;AS22n2{b4Jhd~fGqnHggwvADYt&36Zh_cOOT@fipi^+N-l9LaJ zu!pmNOvB|Yh_}E7WQpaeol~)q;D7}@f@Oh4Y_gH4GV3c*;HQmh@yYPXao&k`?YdZj#>Um}=ki^}8_2|t#9DmD_6?@6dmUN5OXnM+cV>5=MW zRY?P;d1{lxCG}ZusllcF*&e8??NgVT{6I` Date: Wed, 9 Aug 2023 12:50:29 -0400 Subject: [PATCH 04/10] Updates per discussion --- examples/slackbot/Notebook.ipynb | 558 +++++++++++++++++++++++-------- 1 file changed, 419 insertions(+), 139 deletions(-) diff --git a/examples/slackbot/Notebook.ipynb b/examples/slackbot/Notebook.ipynb index b143cb370..7141f3abb 100644 --- a/examples/slackbot/Notebook.ipynb +++ b/examples/slackbot/Notebook.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "61ea2e95-6d9d-4068-ab98-8cf94bc4d9d0", "metadata": {}, "outputs": [], @@ -60,46 +60,231 @@ { "cell_type": "markdown", "id": "6c3c5682-bfe0-44ca-9a5a-52a0da74e5de", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ - "### Read Historical Messages\n", - "\n", - "***Issue***: `with_key` fails:\n", - "\n", - "```\n", - "$ messages.with_key(kt.record({\n", - " \"channel\": messages.col(\"channel\"),\n", - " \"thread\": messages.col(\"thread_ts\"),\n", - " }))\n", - "\n", - "RuntimeError: error in sparrow-py Rust code\n", - "├╴at src/error.rs:54:21\n", - "│\n", - "├─▶ execute query\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-session/src/session.rs:319:14\n", - "│\n", - "├─▶ internal compute error: spawn compute executor\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute.rs:268:6\n", - "│\n", - "├─▶ internal compute error: unable to create operation\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation.rs:435:6\n", - "│\n", - "├─▶ internal compute error: spead failure\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation/merge.rs:171:106\n", - "│\n", - "╰─▶ Unsupported type Struct([Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"name\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"users\", data_type: List(Field { name: \"item\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) for list spread\n", - " ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation/merge.rs:171:92\n", - "```" + "### Read Historical Messages" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "9d224bec-e5a1-4c67-8764-e3dcdbc5e0ac", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_time_subsort_key_hash_keysubtypetsusertextteamuser_team...reactionsthread_tsreply_countreply_users_countlatest_replyis_lockedsubscribedlast_readparent_user_idchannel
02023-07-25 19:42:13515750806798332339587generalmessage2023-07-25 19:42:13U05JQJJDJ6P<@U05JQJJDJ6P> has joined the channelNoneNone...NoneNoneNaNNaNNoneNoneNoneNoneNonegeneral
12023-07-25 19:42:14143094307063304068259randommessage2023-07-25 19:42:14U05JQJJDJ6P<@U05JQJJDJ6P> has joined the channelNoneNone...NoneNoneNaNNaNNoneNoneNoneNoneNonerandom
22023-07-25 19:44:2702954779196800164886demomessage2023-07-25 19:44:27U05JQJJDJ6P<@U05JQJJDJ6P> has joined the channelNoneNone...NoneNoneNaNNaNNoneNoneNoneNoneNonedemo
32023-07-26 08:29:35615750806798332339587generalmessage2023-07-26 08:29:35U05JQJJDJ6Pold message 1T05JA5XCR9DT05JA5XCR9D...NoneNoneNaNNaNNoneNoneNoneNoneNonegeneral
42023-07-26 08:29:37715750806798332339587generalmessage2023-07-26 08:29:37U05JQJJDJ6Pold message 2T05JA5XCR9DT05JA5XCR9D...NoneNoneNaNNaNNoneNoneNoneNoneNonegeneral
\n", + "

5 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " _time _subsort _key_hash _key subtype \\\n", + "0 2023-07-25 19:42:13 5 15750806798332339587 general message \n", + "1 2023-07-25 19:42:14 14 3094307063304068259 random message \n", + "2 2023-07-25 19:44:27 0 2954779196800164886 demo message \n", + "3 2023-07-26 08:29:35 6 15750806798332339587 general message \n", + "4 2023-07-26 08:29:37 7 15750806798332339587 general message \n", + "\n", + " ts user text \\\n", + "0 2023-07-25 19:42:13 U05JQJJDJ6P <@U05JQJJDJ6P> has joined the channel \n", + "1 2023-07-25 19:42:14 U05JQJJDJ6P <@U05JQJJDJ6P> has joined the channel \n", + "2 2023-07-25 19:44:27 U05JQJJDJ6P <@U05JQJJDJ6P> has joined the channel \n", + "3 2023-07-26 08:29:35 U05JQJJDJ6P old message 1 \n", + "4 2023-07-26 08:29:37 U05JQJJDJ6P old message 2 \n", + "\n", + " team user_team ... reactions thread_ts reply_count \\\n", + "0 None None ... None None NaN \n", + "1 None None ... None None NaN \n", + "2 None None ... None None NaN \n", + "3 T05JA5XCR9D T05JA5XCR9D ... None None NaN \n", + "4 T05JA5XCR9D T05JA5XCR9D ... None None NaN \n", + "\n", + " reply_users_count latest_reply is_locked subscribed last_read \\\n", + "0 NaN None None None None \n", + "1 NaN None None None None \n", + "2 NaN None None None None \n", + "3 NaN None None None None \n", + "4 NaN None None None None \n", + "\n", + " parent_user_id channel \n", + "0 None general \n", + "1 None random \n", + "2 None demo \n", + "3 None general \n", + "4 None general \n", + "\n", + "[5 rows x 24 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "messages = kt.sources.ArrowSource(\n", " data = pandas.read_parquet(\"./messages.parquet\"), \n", @@ -118,54 +303,113 @@ "id": "22dd5729-ee7b-4de4-88dc-1642424833f0", "metadata": {}, "source": [ - "### Construct conversations\n", - "\n", - "***Issue:*** Missing `seconds_since` \n", - "\n", - "***Issue:*** `collect` failes when given a `kt.SinceWindow` with\n", - "\n", - "```\n", - "$ messages.select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", - " .collect(window=kt.SinceWindow(predicate=is_new_conversation), max=100)\n", - "\n", - "ValueError: error[E0010]: Invalid argument type(s)\n", - " --> Builder:1:1\n", - " |\n", - "1 | builder\n", - " | ^^^^^^^\n", - " | |\n", - " | Invalid types for parameter 'window' in call to 'collect'\n", - " | Actual type: i64\n", - " |\n", - " --> built-in signature 'collect(input: T, const max: i64, const min: i64 = 0, window: bool = null, duration: i64 = null) -> list':1:71\n", - " |\n", - "1 | collect(input: T, const max: i64, const min: i64 = 0, window: bool = null, duration: i64 = null) -> list\n", - " | ---- Expected type: bool\n", - "\n", - "\n", - "Arg[0]: Timestream[struct>>>]\n", - "Arg[1]: Literal 100 ()\n", - "Arg[2]: Literal 0 ()\n", - "Arg[3]: Literal True ()\n", - "Arg[4]: Literal None ()\n", - "```" + "### Construct conversations" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "088e52bd-8f30-4d6e-abbc-4896b88c0837", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_time_subsort_key_hash_keyresult
02023-07-25 19:42:13515750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
12023-07-25 19:42:14143094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...
22023-07-25 19:44:2702954779196800164886demo[{'ts': 1690314267000000000, 'user': 'U05JQJJD...
32023-07-26 08:29:35615750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
42023-07-26 08:29:37715750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
\n", + "
" + ], + "text/plain": [ + " _time _subsort _key_hash _key \\\n", + "0 2023-07-25 19:42:13 5 15750806798332339587 general \n", + "1 2023-07-25 19:42:14 14 3094307063304068259 random \n", + "2 2023-07-25 19:44:27 0 2954779196800164886 demo \n", + "3 2023-07-26 08:29:35 6 15750806798332339587 general \n", + "4 2023-07-26 08:29:37 7 15750806798332339587 general \n", + "\n", + " result \n", + "0 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "1 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "2 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", + "3 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "4 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Construct conversations\n", - "message_time = messages.time_of()\n", - "last_message_time = message_time.lag(1) # !!!\n", - "is_new_conversation = True #message_time.seconds_since(last_message_time) > 10 * 60\n", - "\n", "conversations = messages \\\n", " .select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", - " .collect(max=100) #.collect(window=kt.SinceWindow(predicate=is_new_conversation), max=100)\n", + " .collect(max=20)\n", "\n", "conversations.preview(5)" ] @@ -175,83 +419,119 @@ "id": "5076d2bf-6830-460b-a9cb-948d8f106edc", "metadata": {}, "source": [ - "### Build examples\n", - "\n", - "***Issue***: Unable to field-ref into a list of records\n", - "\n", - "```\n", - "$ conversations.col(\"reactions\").col(\"name\").preview(5)\n", - "\n", - "RuntimeError: error in sparrow-py Rust code\n", - "├╴at src/error.rs:54:21\n", - "│\n", - "├─▶ execute query\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-session/src/session.rs:319:14\n", - "│\n", - "├─▶ internal compute error: spawn compute executor\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute.rs:268:6\n", - "│\n", - "├─▶ internal compute error: unable to create executor\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation.rs:193:18\n", - "│\n", - "╰─▶ Field-ref only works on lists of records, but was List(Field { name: \"item\", data_type: List(Field { name: \"item\", data_type: Struct([Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"name\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"users\", data_type: List(Field { name: \"item\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })\n", - " ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation.rs:192:18\n", - "```\n", - "\n", - "***Issue***: Missing `union` function\n", - "\n", - "***Issue***: Unable to collect a field from a record\n", - "\n", - "```\n", - "$ conversations.col(\"user\").collect(max=10).preview(5)\n", - "\n", - "PanicException: not implemented: list collect evaluator is unsupported\n", - "```\n", - "\n", - "***Issue***: Unable to combine shifted and unshifted values\n", - "\n", - "```\n", - "$ kt.record({\n", - " \"prompt\": conversations.shift_by(datetime.timedelta(minutes=5)),\n", - " \"completion\": conversations,\n", - "}).preview(5)\n", - "\n", - "RuntimeError: error in sparrow-py Rust code\n", - "├╴at src/error.rs:54:21\n", - "│\n", - "├─▶ execute query\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-session/src/session.rs:319:14\n", - "│\n", - "├─▶ internal compute error: spawn compute executor\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute.rs:268:6\n", - "│\n", - "├─▶ internal compute error: unable to create operation\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation.rs:435:6\n", - "│\n", - "├─▶ internal compute error: spead failure\n", - "│ ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation/merge.rs:171:106\n", - "│\n", - "╰─▶ Unsupported type Struct([Field { name: \"ts\", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"user\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"text\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"reactions\", data_type: List(Field { name: \"item\", data_type: Struct([Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"name\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"users\", data_type: List(Field { name: \"item\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) for list spread\n", - " ╰╴at /Users/ryan.michael/work/kaskada/crates/sparrow-runtime/src/execute/operation/merge.rs:171:92\n", - "```" + "### Build examples" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "af7d2a45-eb89-47ce-b471-a39ad8c7bbc7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_time_subsort_key_hash_keyresult
02023-07-25 19:42:13515750806798332339587general[U05JQJJDJ6P]
12023-07-25 19:42:14143094307063304068259random[U05JQJJDJ6P]
22023-07-25 19:44:2702954779196800164886demo[U05JQJJDJ6P]
32023-07-26 08:29:35615750806798332339587general[U05JQJJDJ6P, U05JQJJDJ6P]
42023-07-26 08:29:37715750806798332339587general[U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P]
\n", + "
" + ], + "text/plain": [ + " _time _subsort _key_hash _key \\\n", + "0 2023-07-25 19:42:13 5 15750806798332339587 general \n", + "1 2023-07-25 19:42:14 14 3094307063304068259 random \n", + "2 2023-07-25 19:44:27 0 2954779196800164886 demo \n", + "3 2023-07-26 08:29:35 6 15750806798332339587 general \n", + "4 2023-07-26 08:29:37 7 15750806798332339587 general \n", + "\n", + " result \n", + "0 [U05JQJJDJ6P] \n", + "1 [U05JQJJDJ6P] \n", + "2 [U05JQJJDJ6P] \n", + "3 [U05JQJJDJ6P, U05JQJJDJ6P] \n", + "4 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P] " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "duration = datetime.timedelta(minutes=5)\n", "\n", - "shifted_coversation = conversations.shift_by(duration) # !!!\n", + "shifted_coversation = conversations.shift_by(duration)\n", "\n", - "#reaction_users = conversations.col(\"reactions\").col(\"name\") #.collect(kt.windows.Trailing(duration)).flatten() # !!!\n", - "#participating_users = conversations.col(\"user\").collect(kt.windows.Trailing(duration)) # !!!\n", - "engaged_users = conversations #kt.union(reaction_users, participating_users) # !!!\n", + "reaction_users = messages.col(\"reactions\").flatten().col(\"users\").collect(kt.Trailing(duration)).flatten()\n", + "#participating_users = conversations.col(\"user\").collect(kt.windows.Trailing(duration))\n", + "engaged_users = reaction_users #kt.union(reaction_users, participating_users)\n", "\n", - "examples = kt.record({ \"prompt\": shifted_coversation, \"completion\": engaged_users}) \\\n", + "examples = kt.record({\"prompt\": shifted_coversation, \"completion\": engaged_users}) \\\n", " .filter(shifted_coversation.is_not_null())\n", "\n", "examples.preview(5)" From a6b69c72177de28ef85bb7fe70bdfaf075417a51 Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 9 Aug 2023 15:01:18 -0400 Subject: [PATCH 05/10] wip --- examples/slackbot/Notebook.ipynb | 424 +------------------------------ 1 file changed, 11 insertions(+), 413 deletions(-) diff --git a/examples/slackbot/Notebook.ipynb b/examples/slackbot/Notebook.ipynb index 7141f3abb..ba71160df 100644 --- a/examples/slackbot/Notebook.ipynb +++ b/examples/slackbot/Notebook.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "61ea2e95-6d9d-4068-ab98-8cf94bc4d9d0", "metadata": {}, "outputs": [], @@ -67,234 +67,20 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "9d224bec-e5a1-4c67-8764-e3dcdbc5e0ac", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
_time_subsort_key_hash_keysubtypetsusertextteamuser_team...reactionsthread_tsreply_countreply_users_countlatest_replyis_lockedsubscribedlast_readparent_user_idchannel
02023-07-25 19:42:13515750806798332339587generalmessage2023-07-25 19:42:13U05JQJJDJ6P<@U05JQJJDJ6P> has joined the channelNoneNone...NoneNoneNaNNaNNoneNoneNoneNoneNonegeneral
12023-07-25 19:42:14143094307063304068259randommessage2023-07-25 19:42:14U05JQJJDJ6P<@U05JQJJDJ6P> has joined the channelNoneNone...NoneNoneNaNNaNNoneNoneNoneNoneNonerandom
22023-07-25 19:44:2702954779196800164886demomessage2023-07-25 19:44:27U05JQJJDJ6P<@U05JQJJDJ6P> has joined the channelNoneNone...NoneNoneNaNNaNNoneNoneNoneNoneNonedemo
32023-07-26 08:29:35615750806798332339587generalmessage2023-07-26 08:29:35U05JQJJDJ6Pold message 1T05JA5XCR9DT05JA5XCR9D...NoneNoneNaNNaNNoneNoneNoneNoneNonegeneral
42023-07-26 08:29:37715750806798332339587generalmessage2023-07-26 08:29:37U05JQJJDJ6Pold message 2T05JA5XCR9DT05JA5XCR9D...NoneNoneNaNNaNNoneNoneNoneNoneNonegeneral
\n", - "

5 rows × 24 columns

\n", - "
" - ], - "text/plain": [ - " _time _subsort _key_hash _key subtype \\\n", - "0 2023-07-25 19:42:13 5 15750806798332339587 general message \n", - "1 2023-07-25 19:42:14 14 3094307063304068259 random message \n", - "2 2023-07-25 19:44:27 0 2954779196800164886 demo message \n", - "3 2023-07-26 08:29:35 6 15750806798332339587 general message \n", - "4 2023-07-26 08:29:37 7 15750806798332339587 general message \n", - "\n", - " ts user text \\\n", - "0 2023-07-25 19:42:13 U05JQJJDJ6P <@U05JQJJDJ6P> has joined the channel \n", - "1 2023-07-25 19:42:14 U05JQJJDJ6P <@U05JQJJDJ6P> has joined the channel \n", - "2 2023-07-25 19:44:27 U05JQJJDJ6P <@U05JQJJDJ6P> has joined the channel \n", - "3 2023-07-26 08:29:35 U05JQJJDJ6P old message 1 \n", - "4 2023-07-26 08:29:37 U05JQJJDJ6P old message 2 \n", - "\n", - " team user_team ... reactions thread_ts reply_count \\\n", - "0 None None ... None None NaN \n", - "1 None None ... None None NaN \n", - "2 None None ... None None NaN \n", - "3 T05JA5XCR9D T05JA5XCR9D ... None None NaN \n", - "4 T05JA5XCR9D T05JA5XCR9D ... None None NaN \n", - "\n", - " reply_users_count latest_reply is_locked subscribed last_read \\\n", - "0 NaN None None None None \n", - "1 NaN None None None None \n", - "2 NaN None None None None \n", - "3 NaN None None None None \n", - "4 NaN None None None None \n", - "\n", - " parent_user_id channel \n", - "0 None general \n", - "1 None random \n", - "2 None demo \n", - "3 None general \n", - "4 None general \n", - "\n", - "[5 rows x 24 columns]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "messages = kt.sources.ArrowSource(\n", " data = pandas.read_parquet(\"./messages.parquet\"), \n", " time_column_name = \"ts\", \n", " key_column_name = \"channel\",\n", ")\n", - "#messages = messages.with_key(kt.record({ # !!!\n", - "# \"channel\": messages.col(\"channel\"),\n", - "# \"thread\": messages.col(\"thread_ts\"),\n", - "# }))\n", + "messages = messages.with_key(kt.record({\n", + " \"channel\": messages.col(\"channel\"),\n", + " \"thread\": messages.col(\"thread_ts\"),\n", + " }))\n", "messages.preview(5)" ] }, @@ -308,104 +94,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "088e52bd-8f30-4d6e-abbc-4896b88c0837", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
_time_subsort_key_hash_keyresult
02023-07-25 19:42:13515750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
12023-07-25 19:42:14143094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...
22023-07-25 19:44:2702954779196800164886demo[{'ts': 1690314267000000000, 'user': 'U05JQJJD...
32023-07-26 08:29:35615750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
42023-07-26 08:29:37715750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
\n", - "
" - ], - "text/plain": [ - " _time _subsort _key_hash _key \\\n", - "0 2023-07-25 19:42:13 5 15750806798332339587 general \n", - "1 2023-07-25 19:42:14 14 3094307063304068259 random \n", - "2 2023-07-25 19:44:27 0 2954779196800164886 demo \n", - "3 2023-07-26 08:29:35 6 15750806798332339587 general \n", - "4 2023-07-26 08:29:37 7 15750806798332339587 general \n", - "\n", - " result \n", - "0 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "1 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "2 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", - "3 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "4 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "conversations = messages \\\n", " .select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", @@ -424,104 +116,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "af7d2a45-eb89-47ce-b471-a39ad8c7bbc7", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
_time_subsort_key_hash_keyresult
02023-07-25 19:42:13515750806798332339587general[U05JQJJDJ6P]
12023-07-25 19:42:14143094307063304068259random[U05JQJJDJ6P]
22023-07-25 19:44:2702954779196800164886demo[U05JQJJDJ6P]
32023-07-26 08:29:35615750806798332339587general[U05JQJJDJ6P, U05JQJJDJ6P]
42023-07-26 08:29:37715750806798332339587general[U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P]
\n", - "
" - ], - "text/plain": [ - " _time _subsort _key_hash _key \\\n", - "0 2023-07-25 19:42:13 5 15750806798332339587 general \n", - "1 2023-07-25 19:42:14 14 3094307063304068259 random \n", - "2 2023-07-25 19:44:27 0 2954779196800164886 demo \n", - "3 2023-07-26 08:29:35 6 15750806798332339587 general \n", - "4 2023-07-26 08:29:37 7 15750806798332339587 general \n", - "\n", - " result \n", - "0 [U05JQJJDJ6P] \n", - "1 [U05JQJJDJ6P] \n", - "2 [U05JQJJDJ6P] \n", - "3 [U05JQJJDJ6P, U05JQJJDJ6P] \n", - "4 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P] " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "duration = datetime.timedelta(minutes=5)\n", "\n", From 57c4e97059055e8da8cc3996cc30ff3b0da43891 Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 9 Aug 2023 15:14:29 -0400 Subject: [PATCH 06/10] Run script --- examples/slackbot/run.py | 128 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 examples/slackbot/run.py diff --git a/examples/slackbot/run.py b/examples/slackbot/run.py new file mode 100644 index 000000000..a2743cc21 --- /dev/null +++ b/examples/slackbot/run.py @@ -0,0 +1,128 @@ +import json, math, openai, os +from slack_sdk.web import WebClient +from slack_sdk.socket_mode import SocketModeClient +from slack_sdk.socket_mode.response import SocketModeResponse + +output_map = {} + +with open('./user_output_map.json', 'r') as file: + output_map = json.load(file) + +print(f'Loaded output map: {output_map}') + + +# Initialize OpenAI +openai.api_key = os.environ.get("OPEN_AI_KEY") + +# Initialize Slack +slack = SocketModeClient( + app_token=os.environ.get("SLACK_APP_TOKEN"), + web_client=WebClient(token=os.environ.get("SLACK_BOT_TOKEN")) +) + +min_prob_for_response = 0.50 + +# Format for the OpenAI API +def format_prompt(prompt): + return "start -> " + "\n\n".join([f' {msg["user"]} --> {msg["text"]} ' for msg in prompt]) + "\n\n###\n\n" + +def handle_conversation(conversation): + if len(conversation) == 0: + return + + print(f'Starting prediction on conversation: {conversation[0]["text"]}') + + # Ask the model who should be notified + res = openai.Completion.create( + model="davinci:ft-personal:coversation-users-full-kaskada-2023-08-05-14-25-30", + prompt=format_prompt(conversation), + max_tokens=1, + stop=" end", + temperature=0, + logprobs=5, + ) + + users = [] + logprobs = res["choices"][0]["logprobs"]["top_logprobs"][0] + + print(f'Recieved log probs: {logprobs}') + for user in logprobs: + if math.exp(logprobs[user]) > min_prob_for_response: + # if `nil` user is an option, stop processing + user = user.strip() + if user == "nil": + users = [] + print('Found nil, stopping.') + break + users.append(user) + + print(f'Found users to alert: {users}') + # alert on most recent message in conversation + msg = conversation.pop() + + # Send notification to users + for user_num in users: + if user_num not in output_map: + print(f'User: {user_num} not in output_map, stopping.') + else: + user_id = output_map[user_num] + + print(f'Found user {user_num} in output map: {user_id}') + + link = slack.web_client.chat_getPermalink( + channel=msg["channel"], + message_ts=msg["ts"], + )["permalink"] + + print(f'Got message link: {link}') + + res = slack.web_client.users_conversations( + types="im", + user=user_id, + ) + if len(res["channels"]) == 0: + print(f'User: {user} hasn\'t installed the slackbot yet') + else: + app_channel = res["channels"][0]["id"] + print(f'Got user\'s bot channel id: {app_channel}') + + slack.web_client.chat_postMessage( + channel=app_channel, + text=f'You may be interested in this converstation: <{link}|{msg["text"]}>' + ) + + print(f'Posted alert message') + +# Receive Slack messages in real-time +#live_messages = kt.sources.read_stream(entity_column="channel", time_column="ts") + +# Receive messages from Slack +def handle_message(client, req): + # Acknowledge the message back to Slack + client.send_socket_mode_response(SocketModeResponse(envelope_id=req.envelope_id)) + + # Deliver the message to Kaskada + #live_messages.add_event(pyarrow.json.read_json(req.payload)) + + if req.type == "events_api" and "event" in req.payload: + e = req.payload["event"] + + # ignore message edit, delete, reaction events + if "previous_message" in e or e["type"] == "reaction_added": + return + + # make single-message conversations for now + handle_conversation([e]) + + +# Handle messages in realtime +# A "conversation" is a list of messages +#for conversation in build_conversation(live_messages).start().to_generator(): + + +slack.socket_mode_request_listeners.append(handle_message) +slack.connect() + +# Just not to stop this process +from threading import Event +Event().wait() From 01d794e112a5ee82b06e2cf43d7acfde6209c807 Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 9 Aug 2023 20:24:16 -0400 Subject: [PATCH 07/10] wip --- examples/slackbot/Notebook.ipynb | 39 +++++++++++++------------ examples/slackbot/slackbot.py | 49 +++++++++++++++----------------- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/examples/slackbot/Notebook.ipynb b/examples/slackbot/Notebook.ipynb index ba71160df..0d07db193 100644 --- a/examples/slackbot/Notebook.ipynb +++ b/examples/slackbot/Notebook.ipynb @@ -46,7 +46,7 @@ "kt.init_session()\n", "\n", "# Initialize OpenAI\n", - "#openai.api_key = getpass.getpass('OpenAI: API Key')" + "openai.api_key = getpass.getpass('OpenAI: API Key')" ] }, { @@ -77,10 +77,8 @@ " time_column_name = \"ts\", \n", " key_column_name = \"channel\",\n", ")\n", - "messages = messages.with_key(kt.record({\n", - " \"channel\": messages.col(\"channel\"),\n", - " \"thread\": messages.col(\"thread_ts\"),\n", - " }))\n", + "\n", + "\n", "messages.preview(5)" ] }, @@ -89,7 +87,7 @@ "id": "22dd5729-ee7b-4de4-88dc-1642424833f0", "metadata": {}, "source": [ - "### Construct conversations" + "### Construct prompts" ] }, { @@ -99,11 +97,16 @@ "metadata": {}, "outputs": [], "source": [ - "conversations = messages \\\n", + "messages = messages.with_key(kt.record({\n", + " \"channel\": messages.col(\"channel\"),\n", + " \"thread\": messages.col(\"thread_ts\"),\n", + " }))\n", + "\n", + "prompts = messages \\\n", " .select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", " .collect(max=20)\n", "\n", - "conversations.preview(5)" + "prompts.preview(5)" ] }, { @@ -123,16 +126,16 @@ "source": [ "duration = datetime.timedelta(minutes=5)\n", "\n", - "shifted_coversation = conversations.shift_by(duration)\n", - "\n", - "reaction_users = messages.col(\"reactions\").flatten().col(\"users\").collect(kt.Trailing(duration)).flatten()\n", - "#participating_users = conversations.col(\"user\").collect(kt.windows.Trailing(duration))\n", + "shifted_conversation = conversations.shift_by(duration)\n", + "reaction_users = messages.col(\"reactions\").col(\"users\").flatten().collect(max=100).flatten()\n", + "#reaction_users = messages.col(\"reactions\").flatten().col(\"users\").collect(kt.Trailing(duration)).flatten()\n", + "#participating_users = conversations.col(\"user\").collect(max=100) #kt.windows.Trailing(duration))\n", "engaged_users = reaction_users #kt.union(reaction_users, participating_users)\n", "\n", - "examples = kt.record({\"prompt\": shifted_coversation, \"completion\": engaged_users}) \\\n", - " .filter(shifted_coversation.is_not_null())\n", - "\n", - "examples.preview(5)" + "#examples = kt.record({\"prompt\": shifted_conversation, \"completion\": engaged_users}) \\\n", + "# .filter(shifted_conversation.is_not_null())\n", + "examples = kt.record({\"prompt\": conversations, \"completion\": engaged_users})\n", + "examples.preview(5) # NOTE: completion shouldn't be None" ] }, { @@ -160,7 +163,7 @@ "source": [ "from sklearn import preprocessing\n", "\n", - "examples_df = examples.run().to_pandas()\n", + "examples_df = examples.run().to_pandas().drop([\"_time\", \"_subsort\", \"_key_hash\", \"_key\"], axis=1)\n", "\n", "le = preprocessing.LabelEncoder()\n", "le.fit(examples_df.completion.explode())\n", @@ -171,7 +174,7 @@ "examples_df.prompt = examples_df.prompt.apply(format_prompt)\n", "\n", "def format_completion(completion):\n", - " return \" \" + (\" \".join([le.transform(u) for u in completion]) if len(completion) > 0 else \"nil\") + \" end\"\n", + " return \" \" + (\" \".join(le.transform(completion).astype(str)) if len(completion) > 0 else \"nil\") + \" end\"\n", "examples_df.completion = examples_df.completion.apply(format_completion)\n", "\n", "# Write examples to file\n", diff --git a/examples/slackbot/slackbot.py b/examples/slackbot/slackbot.py index 38de273ea..23737b0f7 100644 --- a/examples/slackbot/slackbot.py +++ b/examples/slackbot/slackbot.py @@ -1,12 +1,11 @@ import json, math, datetime, openai, os, pyarrow, pandas, asyncio -#from slack_sdk.web import WebClient from slack_sdk.web.async_client import AsyncWebClient -#from slack_sdk.socket_mode import SocketModeClient from slack_sdk.socket_mode.aiohttp import SocketModeClient from slack_sdk.socket_mode.response import SocketModeResponse import sparrow_py as kt async def main(): + # Load user label map output_map = {} with open('./user_output_map.json', 'r') as file: @@ -20,6 +19,8 @@ async def main(): web_client=AsyncWebClient(token=os.environ.get("SLACK_BOT_TOKEN")) ) + + # Backfill state with historical data historical_data = pandas.read_parquet("./messages.parquet")[:1] schema = pyarrow.Schema.from_pandas(historical_data) @@ -29,6 +30,8 @@ async def main(): key_column_name = "channel", ) + + # Receive Slack messages in real-time async def handle_message(client, req): # Acknowledge the message back to Slack @@ -43,36 +46,33 @@ async def handle_message(client, req): if "previous_message" in e or e["type"] == "reaction_added": return - try: - e["ts"] = datetime.datetime.fromtimestamp(float(e["ts"])) - del e["team"] - data = pyarrow.RecordBatch.from_pylist([e], schema=schema) - - print(f'Sending message event to kaskada: {e}') - - # Deliver the message to Kaskada - messages.add_data(data) - print("Done sending message") - except Exception as e: print(e) + e["ts"] = datetime.datetime.fromtimestamp(float(e["ts"])) + del e["team"] + data = pyarrow.RecordBatch.from_pylist([e], schema=schema) + messages.add_data(data) slack.socket_mode_request_listeners.append(handle_message) await slack.connect() - # Handle messages - message_time = messages.time_of() - #last_message_time = message_time.lag(1) # !!! - #is_new_conversation = True #message_time.seconds_since(last_message_time) > 10 * 60 + + # Compute conversations from individual messages + messages = messages.with_key(kt.record({ + "channel": messages.col("channel"), + "thread": messages.col("thread_ts"), + })) conversations = messages \ - .select("user", "ts", "text") \ - .collect(max=100) #.collect(window=kt.SinceWindow(predicate=is_new_conversation), max=100) + .select("user", "ts", "text", "reactions") \ + .collect(max=20) - # A "conversation" is a list of messages + + + # Handle each conversation as it occurs start = now = datetime.datetime.now() print("Listening for new messages...") async for conversation in conversations.run(materialize=True).iter_rows_async(): - #if len(conversation) == 0 or conversation["_time"] < start: - # continue + if len(conversation) == 0:#or conversation["_time"] < start: + continue print(f'Conversation: {conversation}') print(f'Starting completion on conversation with first message text: {conversation["result"][0]["text"]}') @@ -81,9 +81,6 @@ async def handle_message(client, req): print(f'Using prompt: {prompt}') - # Credentials don't work yet... - continue - # Ask the model who should be notified res = openai.Completion.create( model="davinci:ft-personal:coversation-users-full-kaskada-2023-08-05-14-25-30", @@ -102,7 +99,7 @@ async def handle_message(client, req): print(f'Found logprobs: {logprobs}') for user in logprobs: if math.exp(logprobs[user]) > 0.50: - user = users.strip() + user = user.strip() # if users include `nil`, stop processing if user == "nil": users = [] From 72a9c5c5434308101940d3fc0f475fc15af3a112 Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 9 Aug 2023 20:32:32 -0400 Subject: [PATCH 08/10] wip --- examples/slackbot/Notebook.ipynb | 665 ++++++++++++++++++++++++++++++- 1 file changed, 645 insertions(+), 20 deletions(-) diff --git a/examples/slackbot/Notebook.ipynb b/examples/slackbot/Notebook.ipynb index 0d07db193..3a944217f 100644 --- a/examples/slackbot/Notebook.ipynb +++ b/examples/slackbot/Notebook.ipynb @@ -27,10 +27,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "61ea2e95-6d9d-4068-ab98-8cf94bc4d9d0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "OpenAI: API Key ········\n" + ] + } + ], "source": [ "from datetime import datetime, timedelta\n", "from slack_sdk.socket_mode import SocketModeClient\n", @@ -67,10 +75,224 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "9d224bec-e5a1-4c67-8764-e3dcdbc5e0ac", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_time_subsort_key_hash_keysubtypetsusertextteamuser_team...reactionsthread_tsreply_countreply_users_countlatest_replyis_lockedsubscribedlast_readparent_user_idchannel
02023-07-25 19:42:13515750806798332339587generalmessage2023-07-25 19:42:13U05JQJJDJ6P<@U05JQJJDJ6P> has joined the channelNoneNone...NoneNoneNaNNaNNoneNoneNoneNoneNonegeneral
12023-07-25 19:42:14143094307063304068259randommessage2023-07-25 19:42:14U05JQJJDJ6P<@U05JQJJDJ6P> has joined the channelNoneNone...NoneNoneNaNNaNNoneNoneNoneNoneNonerandom
22023-07-25 19:44:2702954779196800164886demomessage2023-07-25 19:44:27U05JQJJDJ6P<@U05JQJJDJ6P> has joined the channelNoneNone...NoneNoneNaNNaNNoneNoneNoneNoneNonedemo
32023-07-26 08:29:35615750806798332339587generalmessage2023-07-26 08:29:35U05JQJJDJ6Pold message 1T05JA5XCR9DT05JA5XCR9D...NoneNoneNaNNaNNoneNoneNoneNoneNonegeneral
42023-07-26 08:29:37715750806798332339587generalmessage2023-07-26 08:29:37U05JQJJDJ6Pold message 2T05JA5XCR9DT05JA5XCR9D...NoneNoneNaNNaNNoneNoneNoneNoneNonegeneral
\n", + "

5 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " _time _subsort _key_hash _key subtype \\\n", + "0 2023-07-25 19:42:13 5 15750806798332339587 general message \n", + "1 2023-07-25 19:42:14 14 3094307063304068259 random message \n", + "2 2023-07-25 19:44:27 0 2954779196800164886 demo message \n", + "3 2023-07-26 08:29:35 6 15750806798332339587 general message \n", + "4 2023-07-26 08:29:37 7 15750806798332339587 general message \n", + "\n", + " ts user text \\\n", + "0 2023-07-25 19:42:13 U05JQJJDJ6P <@U05JQJJDJ6P> has joined the channel \n", + "1 2023-07-25 19:42:14 U05JQJJDJ6P <@U05JQJJDJ6P> has joined the channel \n", + "2 2023-07-25 19:44:27 U05JQJJDJ6P <@U05JQJJDJ6P> has joined the channel \n", + "3 2023-07-26 08:29:35 U05JQJJDJ6P old message 1 \n", + "4 2023-07-26 08:29:37 U05JQJJDJ6P old message 2 \n", + "\n", + " team user_team ... reactions thread_ts reply_count \\\n", + "0 None None ... None None NaN \n", + "1 None None ... None None NaN \n", + "2 None None ... None None NaN \n", + "3 T05JA5XCR9D T05JA5XCR9D ... None None NaN \n", + "4 T05JA5XCR9D T05JA5XCR9D ... None None NaN \n", + "\n", + " reply_users_count latest_reply is_locked subscribed last_read \\\n", + "0 NaN None None None None \n", + "1 NaN None None None None \n", + "2 NaN None None None None \n", + "3 NaN None None None None \n", + "4 NaN None None None None \n", + "\n", + " parent_user_id channel \n", + "0 None general \n", + "1 None random \n", + "2 None demo \n", + "3 None general \n", + "4 None general \n", + "\n", + "[5 rows x 24 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "messages = kt.sources.ArrowSource(\n", " data = pandas.read_parquet(\"./messages.parquet\"), \n", @@ -92,15 +314,109 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "088e52bd-8f30-4d6e-abbc-4896b88c0837", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_time_subsort_key_hash_keyresult
02023-07-25 19:42:13515750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
12023-07-25 19:42:14143094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...
22023-07-25 19:44:2702954779196800164886demo[{'ts': 1690314267000000000, 'user': 'U05JQJJD...
32023-07-26 08:29:35615750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
42023-07-26 08:29:37715750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
\n", + "
" + ], + "text/plain": [ + " _time _subsort _key_hash _key \\\n", + "0 2023-07-25 19:42:13 5 15750806798332339587 general \n", + "1 2023-07-25 19:42:14 14 3094307063304068259 random \n", + "2 2023-07-25 19:44:27 0 2954779196800164886 demo \n", + "3 2023-07-26 08:29:35 6 15750806798332339587 general \n", + "4 2023-07-26 08:29:37 7 15750806798332339587 general \n", + "\n", + " result \n", + "0 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "1 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "2 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", + "3 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "4 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "messages = messages.with_key(kt.record({\n", - " \"channel\": messages.col(\"channel\"),\n", - " \"thread\": messages.col(\"thread_ts\"),\n", - " }))\n", + "#messages = messages.with_key(kt.record({\n", + "# \"channel\": messages.col(\"channel\"),\n", + "# \"thread\": messages.col(\"thread_ts\"),\n", + "# }))\n", "\n", "prompts = messages \\\n", " .select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", @@ -119,23 +435,332 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "af7d2a45-eb89-47ce-b471-a39ad8c7bbc7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_time_subsort_key_hash_keypromptcompletion
02023-07-25 20:42:13015750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...None
12023-07-25 20:42:1413094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
22023-07-25 20:44:2722954779196800164886demo[{'ts': 1690314267000000000, 'user': 'U05JQJJD...None
32023-07-26 09:29:35315750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...None
42023-07-26 09:29:37415750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...None
52023-07-26 09:30:1052954779196800164886demo[{'ts': 1690314267000000000, 'user': 'U05JQJJD...None
62023-07-26 09:30:1463094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
72023-07-26 09:30:4073094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
82023-07-26 09:30:4983094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
92023-07-26 09:30:5393094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
102023-07-26 09:30:57103094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
112023-07-26 10:55:07113094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
122023-07-26 10:55:351215750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...None
132023-07-26 10:55:52132954779196800164886demo[{'ts': 1690314267000000000, 'user': 'U05JQJJD...None
142023-07-26 20:37:0002954779196800164886demo[{'ts': 1690314267000000000, 'user': 'U05JQJJD...None
152023-07-26 20:37:00115750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...None
162023-07-26 20:37:0023094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
172023-07-26 20:43:0733094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
182023-07-26 21:16:3042954779196800164886demo[{'ts': 1690314267000000000, 'user': 'U05JQJJD...None
192023-07-26 21:16:30515750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...None
202023-07-26 21:16:3063094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...None
212023-07-26 21:18:25715750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...None
222023-07-26 21:31:04815750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...None
232023-07-26 21:31:19915750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...None
\n", + "
" + ], + "text/plain": [ + " _time _subsort _key_hash _key \\\n", + "0 2023-07-25 20:42:13 0 15750806798332339587 general \n", + "1 2023-07-25 20:42:14 1 3094307063304068259 random \n", + "2 2023-07-25 20:44:27 2 2954779196800164886 demo \n", + "3 2023-07-26 09:29:35 3 15750806798332339587 general \n", + "4 2023-07-26 09:29:37 4 15750806798332339587 general \n", + "5 2023-07-26 09:30:10 5 2954779196800164886 demo \n", + "6 2023-07-26 09:30:14 6 3094307063304068259 random \n", + "7 2023-07-26 09:30:40 7 3094307063304068259 random \n", + "8 2023-07-26 09:30:49 8 3094307063304068259 random \n", + "9 2023-07-26 09:30:53 9 3094307063304068259 random \n", + "10 2023-07-26 09:30:57 10 3094307063304068259 random \n", + "11 2023-07-26 10:55:07 11 3094307063304068259 random \n", + "12 2023-07-26 10:55:35 12 15750806798332339587 general \n", + "13 2023-07-26 10:55:52 13 2954779196800164886 demo \n", + "14 2023-07-26 20:37:00 0 2954779196800164886 demo \n", + "15 2023-07-26 20:37:00 1 15750806798332339587 general \n", + "16 2023-07-26 20:37:00 2 3094307063304068259 random \n", + "17 2023-07-26 20:43:07 3 3094307063304068259 random \n", + "18 2023-07-26 21:16:30 4 2954779196800164886 demo \n", + "19 2023-07-26 21:16:30 5 15750806798332339587 general \n", + "20 2023-07-26 21:16:30 6 3094307063304068259 random \n", + "21 2023-07-26 21:18:25 7 15750806798332339587 general \n", + "22 2023-07-26 21:31:04 8 15750806798332339587 general \n", + "23 2023-07-26 21:31:19 9 15750806798332339587 general \n", + "\n", + " prompt completion \n", + "0 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", + "1 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "2 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", + "3 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", + "4 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", + "5 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", + "6 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "7 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "8 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "9 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "10 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "11 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "12 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", + "13 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", + "14 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", + "15 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", + "16 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "17 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "18 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", + "19 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", + "20 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", + "21 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", + "22 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", + "23 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "duration = datetime.timedelta(minutes=5)\n", + "duration = datetime.timedelta(minutes=60)\n", "\n", - "shifted_conversation = conversations.shift_by(duration)\n", - "reaction_users = messages.col(\"reactions\").col(\"users\").flatten().collect(max=100).flatten()\n", + "shifted_prompts = prompts.shift_by(duration)\n", + "#reaction_users = messages.col(\"reactions\").col(\"users\").flatten().collect(max=100).flatten()\n", "#reaction_users = messages.col(\"reactions\").flatten().col(\"users\").collect(kt.Trailing(duration)).flatten()\n", "#participating_users = conversations.col(\"user\").collect(max=100) #kt.windows.Trailing(duration))\n", - "engaged_users = reaction_users #kt.union(reaction_users, participating_users)\n", + "#engaged_users = reaction_users #kt.union(reaction_users, participating_users)\n", + "engaged_users = prompts.col(\"user\").collect(max=100)\n", "\n", - "#examples = kt.record({\"prompt\": shifted_conversation, \"completion\": engaged_users}) \\\n", - "# .filter(shifted_conversation.is_not_null())\n", - "examples = kt.record({\"prompt\": conversations, \"completion\": engaged_users})\n", - "examples.preview(5) # NOTE: completion shouldn't be None" + "examples = kt.record({\"prompt\": shifted_prompts, \"completion\": engaged_users}) \\\n", + " .filter(shifted_prompts.is_not_null())\n", + "examples.preview(100) # NOTE: completion shouldn't be None" ] }, { From 8dff70edebbab594c30ce2692e867be3ff25a43b Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 9 Aug 2023 21:22:08 -0400 Subject: [PATCH 09/10] wip --- examples/slackbot/Notebook.ipynb | 547 ++++++++++++++++++++++++------- examples/slackbot/slackbot.py | 58 ++-- 2 files changed, 452 insertions(+), 153 deletions(-) diff --git a/examples/slackbot/Notebook.ipynb b/examples/slackbot/Notebook.ipynb index 3a944217f..ff38d07e4 100644 --- a/examples/slackbot/Notebook.ipynb +++ b/examples/slackbot/Notebook.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "61ea2e95-6d9d-4068-ab98-8cf94bc4d9d0", "metadata": {}, "outputs": [ @@ -75,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "id": "9d224bec-e5a1-4c67-8764-e3dcdbc5e0ac", "metadata": {}, "outputs": [ @@ -288,7 +288,7 @@ "[5 rows x 24 columns]" ] }, - "execution_count": 18, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -300,7 +300,6 @@ " key_column_name = \"channel\",\n", ")\n", "\n", - "\n", "messages.preview(5)" ] }, @@ -314,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "id": "088e52bd-8f30-4d6e-abbc-4896b88c0837", "metadata": {}, "outputs": [ @@ -407,7 +406,7 @@ "4 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... " ] }, - "execution_count": 19, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -435,7 +434,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 10, "id": "af7d2a45-eb89-47ce-b471-a39ad8c7bbc7", "metadata": {}, "outputs": [ @@ -471,7 +470,25 @@ " \n", " \n", " 0\n", - " 2023-07-25 20:42:13\n", + " 2023-07-25 19:42:13\n", + " 5\n", + " 15750806798332339587\n", + " general\n", + " None\n", + " [U05JQJJDJ6P]\n", + " \n", + " \n", + " 1\n", + " 2023-07-25 19:42:14\n", + " 14\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P]\n", + " \n", + " \n", + " 2\n", + " 2023-07-25 19:43:13\n", " 0\n", " 15750806798332339587\n", " general\n", @@ -479,8 +496,8 @@ " None\n", " \n", " \n", - " 1\n", - " 2023-07-25 20:42:14\n", + " 3\n", + " 2023-07-25 19:43:14\n", " 1\n", " 3094307063304068259\n", " random\n", @@ -488,8 +505,17 @@ " None\n", " \n", " \n", - " 2\n", - " 2023-07-25 20:44:27\n", + " 4\n", + " 2023-07-25 19:44:27\n", + " 0\n", + " 2954779196800164886\n", + " demo\n", + " None\n", + " [U05JQJJDJ6P]\n", + " \n", + " \n", + " 5\n", + " 2023-07-25 19:45:27\n", " 2\n", " 2954779196800164886\n", " demo\n", @@ -497,8 +523,44 @@ " None\n", " \n", " \n", - " 3\n", - " 2023-07-26 09:29:35\n", + " 6\n", + " 2023-07-26 08:29:35\n", + " 6\n", + " 15750806798332339587\n", + " general\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P]\n", + " \n", + " \n", + " 7\n", + " 2023-07-26 08:29:37\n", + " 7\n", + " 15750806798332339587\n", + " general\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 8\n", + " 2023-07-26 08:30:10\n", + " 1\n", + " 2954779196800164886\n", + " demo\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P]\n", + " \n", + " \n", + " 9\n", + " 2023-07-26 08:30:14\n", + " 15\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P]\n", + " \n", + " \n", + " 10\n", + " 2023-07-26 08:30:35\n", " 3\n", " 15750806798332339587\n", " general\n", @@ -506,8 +568,8 @@ " None\n", " \n", " \n", - " 4\n", - " 2023-07-26 09:29:37\n", + " 11\n", + " 2023-07-26 08:30:37\n", " 4\n", " 15750806798332339587\n", " general\n", @@ -515,8 +577,44 @@ " None\n", " \n", " \n", - " 5\n", - " 2023-07-26 09:30:10\n", + " 12\n", + " 2023-07-26 08:30:40\n", + " 16\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 13\n", + " 2023-07-26 08:30:49\n", + " 17\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 14\n", + " 2023-07-26 08:30:53\n", + " 18\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 15\n", + " 2023-07-26 08:30:57\n", + " 19\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 16\n", + " 2023-07-26 08:31:10\n", " 5\n", " 2954779196800164886\n", " demo\n", @@ -524,8 +622,8 @@ " None\n", " \n", " \n", - " 6\n", - " 2023-07-26 09:30:14\n", + " 17\n", + " 2023-07-26 08:31:14\n", " 6\n", " 3094307063304068259\n", " random\n", @@ -533,8 +631,8 @@ " None\n", " \n", " \n", - " 7\n", - " 2023-07-26 09:30:40\n", + " 18\n", + " 2023-07-26 08:31:40\n", " 7\n", " 3094307063304068259\n", " random\n", @@ -542,8 +640,8 @@ " None\n", " \n", " \n", - " 8\n", - " 2023-07-26 09:30:49\n", + " 19\n", + " 2023-07-26 08:31:49\n", " 8\n", " 3094307063304068259\n", " random\n", @@ -551,8 +649,8 @@ " None\n", " \n", " \n", - " 9\n", - " 2023-07-26 09:30:53\n", + " 20\n", + " 2023-07-26 08:31:53\n", " 9\n", " 3094307063304068259\n", " random\n", @@ -560,8 +658,8 @@ " None\n", " \n", " \n", - " 10\n", - " 2023-07-26 09:30:57\n", + " 21\n", + " 2023-07-26 08:31:57\n", " 10\n", " 3094307063304068259\n", " random\n", @@ -569,8 +667,35 @@ " None\n", " \n", " \n", - " 11\n", - " 2023-07-26 10:55:07\n", + " 22\n", + " 2023-07-26 09:55:07\n", + " 20\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 23\n", + " 2023-07-26 09:55:35\n", + " 8\n", + " 15750806798332339587\n", + " general\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 24\n", + " 2023-07-26 09:55:52\n", + " 2\n", + " 2954779196800164886\n", + " demo\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 25\n", + " 2023-07-26 09:56:07\n", " 11\n", " 3094307063304068259\n", " random\n", @@ -578,8 +703,8 @@ " None\n", " \n", " \n", - " 12\n", - " 2023-07-26 10:55:35\n", + " 26\n", + " 2023-07-26 09:56:35\n", " 12\n", " 15750806798332339587\n", " general\n", @@ -587,8 +712,8 @@ " None\n", " \n", " \n", - " 13\n", - " 2023-07-26 10:55:52\n", + " 27\n", + " 2023-07-26 09:56:52\n", " 13\n", " 2954779196800164886\n", " demo\n", @@ -596,90 +721,180 @@ " None\n", " \n", " \n", - " 14\n", - " 2023-07-26 20:37:00\n", - " 0\n", + " 28\n", + " 2023-07-26 19:37:00\n", + " 3\n", + " 2954779196800164886\n", + " demo\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 29\n", + " 2023-07-26 19:37:00\n", + " 9\n", + " 15750806798332339587\n", + " general\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 30\n", + " 2023-07-26 19:37:00\n", + " 21\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 31\n", + " 2023-07-26 19:38:00\n", + " 14\n", " 2954779196800164886\n", " demo\n", " [{'ts': 1690314267000000000, 'user': 'U05JQJJD...\n", " None\n", " \n", " \n", - " 15\n", - " 2023-07-26 20:37:00\n", - " 1\n", + " 32\n", + " 2023-07-26 19:38:00\n", + " 15\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", " None\n", " \n", " \n", - " 16\n", - " 2023-07-26 20:37:00\n", - " 2\n", + " 33\n", + " 2023-07-26 19:38:00\n", + " 16\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", " None\n", " \n", " \n", - " 17\n", - " 2023-07-26 20:43:07\n", - " 3\n", + " 34\n", + " 2023-07-26 19:43:07\n", + " 22\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 35\n", + " 2023-07-26 19:44:07\n", + " 17\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", " None\n", " \n", " \n", - " 18\n", - " 2023-07-26 21:16:30\n", + " 36\n", + " 2023-07-26 20:16:30\n", " 4\n", " 2954779196800164886\n", " demo\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 37\n", + " 2023-07-26 20:16:30\n", + " 10\n", + " 15750806798332339587\n", + " general\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 38\n", + " 2023-07-26 20:16:30\n", + " 23\n", + " 3094307063304068259\n", + " random\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 39\n", + " 2023-07-26 20:17:30\n", + " 18\n", + " 2954779196800164886\n", + " demo\n", " [{'ts': 1690314267000000000, 'user': 'U05JQJJD...\n", " None\n", " \n", " \n", - " 19\n", - " 2023-07-26 21:16:30\n", - " 5\n", + " 40\n", + " 2023-07-26 20:17:30\n", + " 19\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", " None\n", " \n", " \n", - " 20\n", - " 2023-07-26 21:16:30\n", - " 6\n", + " 41\n", + " 2023-07-26 20:17:30\n", + " 20\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", " None\n", " \n", " \n", - " 21\n", - " 2023-07-26 21:18:25\n", - " 7\n", + " 42\n", + " 2023-07-26 20:18:25\n", + " 11\n", + " 15750806798332339587\n", + " general\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 43\n", + " 2023-07-26 20:19:25\n", + " 21\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", " None\n", " \n", " \n", - " 22\n", - " 2023-07-26 21:31:04\n", - " 8\n", + " 44\n", + " 2023-07-26 20:31:04\n", + " 12\n", + " 15750806798332339587\n", + " general\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 45\n", + " 2023-07-26 20:31:19\n", + " 13\n", + " 15750806798332339587\n", + " general\n", + " None\n", + " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " \n", + " \n", + " 46\n", + " 2023-07-26 20:32:04\n", + " 0\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", " None\n", " \n", " \n", - " 23\n", - " 2023-07-26 21:31:19\n", - " 9\n", + " 47\n", + " 2023-07-26 20:32:19\n", + " 1\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", @@ -691,76 +906,176 @@ ], "text/plain": [ " _time _subsort _key_hash _key \\\n", - "0 2023-07-25 20:42:13 0 15750806798332339587 general \n", - "1 2023-07-25 20:42:14 1 3094307063304068259 random \n", - "2 2023-07-25 20:44:27 2 2954779196800164886 demo \n", - "3 2023-07-26 09:29:35 3 15750806798332339587 general \n", - "4 2023-07-26 09:29:37 4 15750806798332339587 general \n", - "5 2023-07-26 09:30:10 5 2954779196800164886 demo \n", - "6 2023-07-26 09:30:14 6 3094307063304068259 random \n", - "7 2023-07-26 09:30:40 7 3094307063304068259 random \n", - "8 2023-07-26 09:30:49 8 3094307063304068259 random \n", - "9 2023-07-26 09:30:53 9 3094307063304068259 random \n", - "10 2023-07-26 09:30:57 10 3094307063304068259 random \n", - "11 2023-07-26 10:55:07 11 3094307063304068259 random \n", - "12 2023-07-26 10:55:35 12 15750806798332339587 general \n", - "13 2023-07-26 10:55:52 13 2954779196800164886 demo \n", - "14 2023-07-26 20:37:00 0 2954779196800164886 demo \n", - "15 2023-07-26 20:37:00 1 15750806798332339587 general \n", - "16 2023-07-26 20:37:00 2 3094307063304068259 random \n", - "17 2023-07-26 20:43:07 3 3094307063304068259 random \n", - "18 2023-07-26 21:16:30 4 2954779196800164886 demo \n", - "19 2023-07-26 21:16:30 5 15750806798332339587 general \n", - "20 2023-07-26 21:16:30 6 3094307063304068259 random \n", - "21 2023-07-26 21:18:25 7 15750806798332339587 general \n", - "22 2023-07-26 21:31:04 8 15750806798332339587 general \n", - "23 2023-07-26 21:31:19 9 15750806798332339587 general \n", + "0 2023-07-25 19:42:13 5 15750806798332339587 general \n", + "1 2023-07-25 19:42:14 14 3094307063304068259 random \n", + "2 2023-07-25 19:43:13 0 15750806798332339587 general \n", + "3 2023-07-25 19:43:14 1 3094307063304068259 random \n", + "4 2023-07-25 19:44:27 0 2954779196800164886 demo \n", + "5 2023-07-25 19:45:27 2 2954779196800164886 demo \n", + "6 2023-07-26 08:29:35 6 15750806798332339587 general \n", + "7 2023-07-26 08:29:37 7 15750806798332339587 general \n", + "8 2023-07-26 08:30:10 1 2954779196800164886 demo \n", + "9 2023-07-26 08:30:14 15 3094307063304068259 random \n", + "10 2023-07-26 08:30:35 3 15750806798332339587 general \n", + "11 2023-07-26 08:30:37 4 15750806798332339587 general \n", + "12 2023-07-26 08:30:40 16 3094307063304068259 random \n", + "13 2023-07-26 08:30:49 17 3094307063304068259 random \n", + "14 2023-07-26 08:30:53 18 3094307063304068259 random \n", + "15 2023-07-26 08:30:57 19 3094307063304068259 random \n", + "16 2023-07-26 08:31:10 5 2954779196800164886 demo \n", + "17 2023-07-26 08:31:14 6 3094307063304068259 random \n", + "18 2023-07-26 08:31:40 7 3094307063304068259 random \n", + "19 2023-07-26 08:31:49 8 3094307063304068259 random \n", + "20 2023-07-26 08:31:53 9 3094307063304068259 random \n", + "21 2023-07-26 08:31:57 10 3094307063304068259 random \n", + "22 2023-07-26 09:55:07 20 3094307063304068259 random \n", + "23 2023-07-26 09:55:35 8 15750806798332339587 general \n", + "24 2023-07-26 09:55:52 2 2954779196800164886 demo \n", + "25 2023-07-26 09:56:07 11 3094307063304068259 random \n", + "26 2023-07-26 09:56:35 12 15750806798332339587 general \n", + "27 2023-07-26 09:56:52 13 2954779196800164886 demo \n", + "28 2023-07-26 19:37:00 3 2954779196800164886 demo \n", + "29 2023-07-26 19:37:00 9 15750806798332339587 general \n", + "30 2023-07-26 19:37:00 21 3094307063304068259 random \n", + "31 2023-07-26 19:38:00 14 2954779196800164886 demo \n", + "32 2023-07-26 19:38:00 15 15750806798332339587 general \n", + "33 2023-07-26 19:38:00 16 3094307063304068259 random \n", + "34 2023-07-26 19:43:07 22 3094307063304068259 random \n", + "35 2023-07-26 19:44:07 17 3094307063304068259 random \n", + "36 2023-07-26 20:16:30 4 2954779196800164886 demo \n", + "37 2023-07-26 20:16:30 10 15750806798332339587 general \n", + "38 2023-07-26 20:16:30 23 3094307063304068259 random \n", + "39 2023-07-26 20:17:30 18 2954779196800164886 demo \n", + "40 2023-07-26 20:17:30 19 15750806798332339587 general \n", + "41 2023-07-26 20:17:30 20 3094307063304068259 random \n", + "42 2023-07-26 20:18:25 11 15750806798332339587 general \n", + "43 2023-07-26 20:19:25 21 15750806798332339587 general \n", + "44 2023-07-26 20:31:04 12 15750806798332339587 general \n", + "45 2023-07-26 20:31:19 13 15750806798332339587 general \n", + "46 2023-07-26 20:32:04 0 15750806798332339587 general \n", + "47 2023-07-26 20:32:19 1 15750806798332339587 general \n", + "\n", + " prompt \\\n", + "0 None \n", + "1 None \n", + "2 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "3 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "4 None \n", + "5 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", + "6 None \n", + "7 None \n", + "8 None \n", + "9 None \n", + "10 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "11 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "12 None \n", + "13 None \n", + "14 None \n", + "15 None \n", + "16 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", + "17 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "18 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "19 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "20 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "21 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "22 None \n", + "23 None \n", + "24 None \n", + "25 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "26 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "27 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", + "28 None \n", + "29 None \n", + "30 None \n", + "31 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", + "32 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "33 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "34 None \n", + "35 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "36 None \n", + "37 None \n", + "38 None \n", + "39 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", + "40 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "41 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", + "42 None \n", + "43 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "44 None \n", + "45 None \n", + "46 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "47 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", "\n", - " prompt completion \n", - "0 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", - "1 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "2 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", - "3 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", - "4 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", - "5 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", - "6 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "7 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "8 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "9 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "10 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "11 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "12 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", - "13 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", - "14 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", - "15 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", - "16 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "17 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "18 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... None \n", - "19 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", - "20 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... None \n", - "21 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", - "22 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None \n", - "23 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... None " + " completion \n", + "0 [U05JQJJDJ6P] \n", + "1 [U05JQJJDJ6P] \n", + "2 None \n", + "3 None \n", + "4 [U05JQJJDJ6P] \n", + "5 None \n", + "6 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P] \n", + "7 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "8 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P] \n", + "9 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P] \n", + "10 None \n", + "11 None \n", + "12 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "13 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "14 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "15 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "16 None \n", + "17 None \n", + "18 None \n", + "19 None \n", + "20 None \n", + "21 None \n", + "22 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "23 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "24 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "25 None \n", + "26 None \n", + "27 None \n", + "28 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "29 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "30 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "31 None \n", + "32 None \n", + "33 None \n", + "34 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "35 None \n", + "36 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "37 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "38 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "39 None \n", + "40 None \n", + "41 None \n", + "42 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "43 None \n", + "44 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "45 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", + "46 None \n", + "47 None " ] }, - "execution_count": 23, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "duration = datetime.timedelta(minutes=60)\n", + "duration = datetime.timedelta(minutes=1)\n", "\n", "shifted_prompts = prompts.shift_by(duration)\n", "#reaction_users = messages.col(\"reactions\").col(\"users\").flatten().collect(max=100).flatten()\n", "#reaction_users = messages.col(\"reactions\").flatten().col(\"users\").collect(kt.Trailing(duration)).flatten()\n", "#participating_users = conversations.col(\"user\").collect(max=100) #kt.windows.Trailing(duration))\n", "#engaged_users = reaction_users #kt.union(reaction_users, participating_users)\n", - "engaged_users = prompts.col(\"user\").collect(max=100)\n", + "engaged_users = prompts.col(\"user\").collect(max=100).flatten()\n", "\n", "examples = kt.record({\"prompt\": shifted_prompts, \"completion\": engaged_users}) \\\n", " .filter(shifted_prompts.is_not_null())\n", - "examples.preview(100) # NOTE: completion shouldn't be None" + "examples = kt.record({\"prompt\": shifted_prompts, \"completion\": engaged_users})\n", + "examples.preview(100) # NOTE: completion shouldn't be None\n", + "#engaged_users.preview(100)" ] }, { diff --git a/examples/slackbot/slackbot.py b/examples/slackbot/slackbot.py index 23737b0f7..ba286c6f2 100644 --- a/examples/slackbot/slackbot.py +++ b/examples/slackbot/slackbot.py @@ -5,6 +5,7 @@ import sparrow_py as kt async def main(): + start = datetime.datetime.now() # Load user label map output_map = {} @@ -38,79 +39,62 @@ async def handle_message(client, req): await client.send_socket_mode_response(SocketModeResponse(envelope_id=req.envelope_id)) if req.type == "events_api" and "event" in req.payload: - e = req.payload["event"] - - print(f'Received event from slack websocket: {e}') - # ignore message edit, delete, reaction events - if "previous_message" in e or e["type"] == "reaction_added": + if "previous_message" in req.payload["event"] or req.payload["event"]["type"] == "reaction_added": return - e["ts"] = datetime.datetime.fromtimestamp(float(e["ts"])) - del e["team"] - data = pyarrow.RecordBatch.from_pylist([e], schema=schema) + req.payload["event"]["ts"] = datetime.datetime.fromtimestamp(float(req.payload["event"]["ts"])) + del req.payload["event"]["team"] + data = pyarrow.RecordBatch.from_pylist([req.payload["event"]], schema=schema) messages.add_data(data) slack.socket_mode_request_listeners.append(handle_message) await slack.connect() - + # Compute conversations from individual messages - messages = messages.with_key(kt.record({ + conversations = messages.with_key(kt.record({ "channel": messages.col("channel"), "thread": messages.col("thread_ts"), - })) - conversations = messages \ + })) \ .select("user", "ts", "text", "reactions") \ - .collect(max=20) - + .collect(max=3) + # Handle each conversation as it occurs - start = now = datetime.datetime.now() - print("Listening for new messages...") - async for conversation in conversations.run(materialize=True).iter_rows_async(): - if len(conversation) == 0:#or conversation["_time"] < start: + async for row in conversations.run(materialize=True).iter_rows_async(): + conversation = row[" result"] + if len(conversation) == 0 or row["_time"] < start: continue - print(f'Conversation: {conversation}') - print(f'Starting completion on conversation with first message text: {conversation["result"][0]["text"]}') - - prompt = "start -> " + "\n\n".join([f' {msg["user"]} --> {msg["text"]} ' for msg in conversation["result"]]) + "\n\n###\n\n" - - print(f'Using prompt: {prompt}') + print(f'Starting completion on conversation with first message text: {conversation[0]["text"]}') # Ask the model who should be notified res = openai.Completion.create( model="davinci:ft-personal:coversation-users-full-kaskada-2023-08-05-14-25-30", - prompt=prompt, + prompt="start -> " + "\n\n".join([f' {msg["user"]} --> {msg["text"]} ' for msg in conversation]) + "\n\n###\n\n", logprobs=5, max_tokens=1, stop=" end", - temperature=0, + temperature=1, ) - print(f'Received completion response: {res}') - + msg = conversation.pop(0) users = [] logprobs = res["choices"][0]["logprobs"]["top_logprobs"][0] - - print(f'Found logprobs: {logprobs}') + print(f"Predicted interest logprobs: {logprobs}") + print(f"Notifying users: {users}") for user in logprobs: - if math.exp(logprobs[user]) > 0.50: + if math.exp(logprobs[user]) > 0.30: user = user.strip() # if users include `nil`, stop processing if user == "nil": users = [] break users.append(user) - - print(f'Found users to alert: {users}') - - # alert on most recent message in conversation - msg = conversation.pop() - + # Send notification to users for user_num in users: if user_num not in output_map: From 0b65677aa0aadeaeae44e54ddab77a7998327d21 Mon Sep 17 00:00:00 2001 From: Ryan Michael Date: Wed, 9 Aug 2023 21:56:29 -0400 Subject: [PATCH 10/10] Update notebook --- examples/slackbot/Notebook.ipynb | 690 ++++++------------------------- 1 file changed, 133 insertions(+), 557 deletions(-) diff --git a/examples/slackbot/Notebook.ipynb b/examples/slackbot/Notebook.ipynb index ff38d07e4..f758f46a7 100644 --- a/examples/slackbot/Notebook.ipynb +++ b/examples/slackbot/Notebook.ipynb @@ -27,18 +27,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "61ea2e95-6d9d-4068-ab98-8cf94bc4d9d0", "metadata": {}, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - "OpenAI: API Key ········\n" - ] - } - ], + "outputs": [], "source": [ "from datetime import datetime, timedelta\n", "from slack_sdk.socket_mode import SocketModeClient\n", @@ -51,10 +43,7 @@ "import datetime\n", "\n", "# Initialize Kaskada with a local execution context.\n", - "kt.init_session()\n", - "\n", - "# Initialize OpenAI\n", - "openai.api_key = getpass.getpass('OpenAI: API Key')" + "kt.init_session()" ] }, { @@ -303,127 +292,6 @@ "messages.preview(5)" ] }, - { - "cell_type": "markdown", - "id": "22dd5729-ee7b-4de4-88dc-1642424833f0", - "metadata": {}, - "source": [ - "### Construct prompts" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "088e52bd-8f30-4d6e-abbc-4896b88c0837", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
_time_subsort_key_hash_keyresult
02023-07-25 19:42:13515750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
12023-07-25 19:42:14143094307063304068259random[{'ts': 1690314134000000000, 'user': 'U05JQJJD...
22023-07-25 19:44:2702954779196800164886demo[{'ts': 1690314267000000000, 'user': 'U05JQJJD...
32023-07-26 08:29:35615750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
42023-07-26 08:29:37715750806798332339587general[{'ts': 1690314133000000000, 'user': 'U05JQJJD...
\n", - "
" - ], - "text/plain": [ - " _time _subsort _key_hash _key \\\n", - "0 2023-07-25 19:42:13 5 15750806798332339587 general \n", - "1 2023-07-25 19:42:14 14 3094307063304068259 random \n", - "2 2023-07-25 19:44:27 0 2954779196800164886 demo \n", - "3 2023-07-26 08:29:35 6 15750806798332339587 general \n", - "4 2023-07-26 08:29:37 7 15750806798332339587 general \n", - "\n", - " result \n", - "0 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "1 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "2 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", - "3 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "4 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#messages = messages.with_key(kt.record({\n", - "# \"channel\": messages.col(\"channel\"),\n", - "# \"thread\": messages.col(\"thread_ts\"),\n", - "# }))\n", - "\n", - "prompts = messages \\\n", - " .select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", - " .collect(max=20)\n", - "\n", - "prompts.preview(5)" - ] - }, { "cell_type": "markdown", "id": "5076d2bf-6830-460b-a9cb-948d8f106edc", @@ -434,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "id": "af7d2a45-eb89-47ce-b471-a39ad8c7bbc7", "metadata": {}, "outputs": [ @@ -470,435 +338,219 @@ " \n", " \n", " 0\n", - " 2023-07-25 19:42:13\n", - " 5\n", - " 15750806798332339587\n", - " general\n", - " None\n", - " [U05JQJJDJ6P]\n", - " \n", - " \n", - " 1\n", - " 2023-07-25 19:42:14\n", - " 14\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P]\n", - " \n", - " \n", - " 2\n", " 2023-07-25 19:43:13\n", " 0\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 3\n", + " 1\n", " 2023-07-25 19:43:14\n", " 1\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 4\n", - " 2023-07-25 19:44:27\n", - " 0\n", - " 2954779196800164886\n", - " demo\n", - " None\n", - " [U05JQJJDJ6P]\n", - " \n", - " \n", - " 5\n", + " 2\n", " 2023-07-25 19:45:27\n", " 2\n", " 2954779196800164886\n", " demo\n", " [{'ts': 1690314267000000000, 'user': 'U05JQJJD...\n", - " None\n", - " \n", - " \n", - " 6\n", - " 2023-07-26 08:29:35\n", - " 6\n", - " 15750806798332339587\n", - " general\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P]\n", - " \n", - " \n", - " 7\n", - " 2023-07-26 08:29:37\n", - " 7\n", - " 15750806798332339587\n", - " general\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 8\n", - " 2023-07-26 08:30:10\n", - " 1\n", - " 2954779196800164886\n", - " demo\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P]\n", - " \n", - " \n", - " 9\n", - " 2023-07-26 08:30:14\n", - " 15\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P]\n", + " []\n", " \n", " \n", - " 10\n", + " 3\n", " 2023-07-26 08:30:35\n", " 3\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 11\n", + " 4\n", " 2023-07-26 08:30:37\n", " 4\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", - " None\n", - " \n", - " \n", - " 12\n", - " 2023-07-26 08:30:40\n", - " 16\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " []\n", " \n", " \n", - " 13\n", - " 2023-07-26 08:30:49\n", - " 17\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 14\n", - " 2023-07-26 08:30:53\n", - " 18\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 15\n", - " 2023-07-26 08:30:57\n", - " 19\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 16\n", + " 5\n", " 2023-07-26 08:31:10\n", " 5\n", " 2954779196800164886\n", " demo\n", " [{'ts': 1690314267000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 17\n", + " 6\n", " 2023-07-26 08:31:14\n", " 6\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 18\n", + " 7\n", " 2023-07-26 08:31:40\n", " 7\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 19\n", + " 8\n", " 2023-07-26 08:31:49\n", " 8\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 20\n", + " 9\n", " 2023-07-26 08:31:53\n", " 9\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 21\n", + " 10\n", " 2023-07-26 08:31:57\n", " 10\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", - " \n", - " \n", - " 22\n", - " 2023-07-26 09:55:07\n", - " 20\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 23\n", - " 2023-07-26 09:55:35\n", - " 8\n", - " 15750806798332339587\n", - " general\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 24\n", - " 2023-07-26 09:55:52\n", - " 2\n", - " 2954779196800164886\n", - " demo\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " []\n", " \n", " \n", - " 25\n", + " 11\n", " 2023-07-26 09:56:07\n", " 11\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 26\n", + " 12\n", " 2023-07-26 09:56:35\n", " 12\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 27\n", + " 13\n", " 2023-07-26 09:56:52\n", " 13\n", " 2954779196800164886\n", " demo\n", " [{'ts': 1690314267000000000, 'user': 'U05JQJJD...\n", - " None\n", - " \n", - " \n", - " 28\n", - " 2023-07-26 19:37:00\n", - " 3\n", - " 2954779196800164886\n", - " demo\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " []\n", " \n", " \n", - " 29\n", - " 2023-07-26 19:37:00\n", - " 9\n", - " 15750806798332339587\n", - " general\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 30\n", - " 2023-07-26 19:37:00\n", - " 21\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 31\n", + " 14\n", " 2023-07-26 19:38:00\n", " 14\n", " 2954779196800164886\n", " demo\n", " [{'ts': 1690314267000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 32\n", + " 15\n", " 2023-07-26 19:38:00\n", " 15\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 33\n", + " 16\n", " 2023-07-26 19:38:00\n", " 16\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", - " \n", - " \n", - " 34\n", - " 2023-07-26 19:43:07\n", - " 22\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " []\n", " \n", " \n", - " 35\n", + " 17\n", " 2023-07-26 19:44:07\n", " 17\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", - " \n", - " \n", - " 36\n", - " 2023-07-26 20:16:30\n", - " 4\n", - " 2954779196800164886\n", - " demo\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 37\n", - " 2023-07-26 20:16:30\n", - " 10\n", - " 15750806798332339587\n", - " general\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 38\n", - " 2023-07-26 20:16:30\n", - " 23\n", - " 3094307063304068259\n", - " random\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " [U05JQJJDJ6P]\n", " \n", " \n", - " 39\n", + " 18\n", " 2023-07-26 20:17:30\n", " 18\n", " 2954779196800164886\n", " demo\n", " [{'ts': 1690314267000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 40\n", + " 19\n", " 2023-07-26 20:17:30\n", " 19\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", - " None\n", + " []\n", " \n", " \n", - " 41\n", + " 20\n", " 2023-07-26 20:17:30\n", " 20\n", " 3094307063304068259\n", " random\n", " [{'ts': 1690314134000000000, 'user': 'U05JQJJD...\n", - " None\n", - " \n", - " \n", - " 42\n", - " 2023-07-26 20:18:25\n", - " 11\n", - " 15750806798332339587\n", - " general\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " [U05JQJJDJ6P]\n", " \n", " \n", - " 43\n", + " 21\n", " 2023-07-26 20:19:25\n", " 21\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", - " None\n", - " \n", - " \n", - " 44\n", - " 2023-07-26 20:31:04\n", - " 12\n", - " 15750806798332339587\n", - " general\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", - " \n", - " \n", - " 45\n", - " 2023-07-26 20:31:19\n", - " 13\n", - " 15750806798332339587\n", - " general\n", - " None\n", - " [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ...\n", + " [U05JQJJDJ6P]\n", " \n", " \n", - " 46\n", + " 22\n", " 2023-07-26 20:32:04\n", " 0\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", - " None\n", + " [U05JQJJDJ6P]\n", " \n", " \n", - " 47\n", + " 23\n", " 2023-07-26 20:32:19\n", " 1\n", " 15750806798332339587\n", " general\n", " [{'ts': 1690314133000000000, 'user': 'U05JQJJD...\n", - " None\n", + " [U05JQJJDJ6P]\n", " \n", " \n", "\n", @@ -906,176 +558,93 @@ ], "text/plain": [ " _time _subsort _key_hash _key \\\n", - "0 2023-07-25 19:42:13 5 15750806798332339587 general \n", - "1 2023-07-25 19:42:14 14 3094307063304068259 random \n", - "2 2023-07-25 19:43:13 0 15750806798332339587 general \n", - "3 2023-07-25 19:43:14 1 3094307063304068259 random \n", - "4 2023-07-25 19:44:27 0 2954779196800164886 demo \n", - "5 2023-07-25 19:45:27 2 2954779196800164886 demo \n", - "6 2023-07-26 08:29:35 6 15750806798332339587 general \n", - "7 2023-07-26 08:29:37 7 15750806798332339587 general \n", - "8 2023-07-26 08:30:10 1 2954779196800164886 demo \n", - "9 2023-07-26 08:30:14 15 3094307063304068259 random \n", - "10 2023-07-26 08:30:35 3 15750806798332339587 general \n", - "11 2023-07-26 08:30:37 4 15750806798332339587 general \n", - "12 2023-07-26 08:30:40 16 3094307063304068259 random \n", - "13 2023-07-26 08:30:49 17 3094307063304068259 random \n", - "14 2023-07-26 08:30:53 18 3094307063304068259 random \n", - "15 2023-07-26 08:30:57 19 3094307063304068259 random \n", - "16 2023-07-26 08:31:10 5 2954779196800164886 demo \n", - "17 2023-07-26 08:31:14 6 3094307063304068259 random \n", - "18 2023-07-26 08:31:40 7 3094307063304068259 random \n", - "19 2023-07-26 08:31:49 8 3094307063304068259 random \n", - "20 2023-07-26 08:31:53 9 3094307063304068259 random \n", - "21 2023-07-26 08:31:57 10 3094307063304068259 random \n", - "22 2023-07-26 09:55:07 20 3094307063304068259 random \n", - "23 2023-07-26 09:55:35 8 15750806798332339587 general \n", - "24 2023-07-26 09:55:52 2 2954779196800164886 demo \n", - "25 2023-07-26 09:56:07 11 3094307063304068259 random \n", - "26 2023-07-26 09:56:35 12 15750806798332339587 general \n", - "27 2023-07-26 09:56:52 13 2954779196800164886 demo \n", - "28 2023-07-26 19:37:00 3 2954779196800164886 demo \n", - "29 2023-07-26 19:37:00 9 15750806798332339587 general \n", - "30 2023-07-26 19:37:00 21 3094307063304068259 random \n", - "31 2023-07-26 19:38:00 14 2954779196800164886 demo \n", - "32 2023-07-26 19:38:00 15 15750806798332339587 general \n", - "33 2023-07-26 19:38:00 16 3094307063304068259 random \n", - "34 2023-07-26 19:43:07 22 3094307063304068259 random \n", - "35 2023-07-26 19:44:07 17 3094307063304068259 random \n", - "36 2023-07-26 20:16:30 4 2954779196800164886 demo \n", - "37 2023-07-26 20:16:30 10 15750806798332339587 general \n", - "38 2023-07-26 20:16:30 23 3094307063304068259 random \n", - "39 2023-07-26 20:17:30 18 2954779196800164886 demo \n", - "40 2023-07-26 20:17:30 19 15750806798332339587 general \n", - "41 2023-07-26 20:17:30 20 3094307063304068259 random \n", - "42 2023-07-26 20:18:25 11 15750806798332339587 general \n", - "43 2023-07-26 20:19:25 21 15750806798332339587 general \n", - "44 2023-07-26 20:31:04 12 15750806798332339587 general \n", - "45 2023-07-26 20:31:19 13 15750806798332339587 general \n", - "46 2023-07-26 20:32:04 0 15750806798332339587 general \n", - "47 2023-07-26 20:32:19 1 15750806798332339587 general \n", - "\n", - " prompt \\\n", - "0 None \n", - "1 None \n", - "2 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "3 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "4 None \n", - "5 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", - "6 None \n", - "7 None \n", - "8 None \n", - "9 None \n", - "10 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "11 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "12 None \n", - "13 None \n", - "14 None \n", - "15 None \n", - "16 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", - "17 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "18 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "19 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "20 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "21 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "22 None \n", - "23 None \n", - "24 None \n", - "25 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "26 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "27 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", - "28 None \n", - "29 None \n", - "30 None \n", - "31 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", - "32 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "33 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "34 None \n", - "35 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "36 None \n", - "37 None \n", - "38 None \n", - "39 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... \n", - "40 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "41 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... \n", - "42 None \n", - "43 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "44 None \n", - "45 None \n", - "46 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", - "47 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... \n", + "0 2023-07-25 19:43:13 0 15750806798332339587 general \n", + "1 2023-07-25 19:43:14 1 3094307063304068259 random \n", + "2 2023-07-25 19:45:27 2 2954779196800164886 demo \n", + "3 2023-07-26 08:30:35 3 15750806798332339587 general \n", + "4 2023-07-26 08:30:37 4 15750806798332339587 general \n", + "5 2023-07-26 08:31:10 5 2954779196800164886 demo \n", + "6 2023-07-26 08:31:14 6 3094307063304068259 random \n", + "7 2023-07-26 08:31:40 7 3094307063304068259 random \n", + "8 2023-07-26 08:31:49 8 3094307063304068259 random \n", + "9 2023-07-26 08:31:53 9 3094307063304068259 random \n", + "10 2023-07-26 08:31:57 10 3094307063304068259 random \n", + "11 2023-07-26 09:56:07 11 3094307063304068259 random \n", + "12 2023-07-26 09:56:35 12 15750806798332339587 general \n", + "13 2023-07-26 09:56:52 13 2954779196800164886 demo \n", + "14 2023-07-26 19:38:00 14 2954779196800164886 demo \n", + "15 2023-07-26 19:38:00 15 15750806798332339587 general \n", + "16 2023-07-26 19:38:00 16 3094307063304068259 random \n", + "17 2023-07-26 19:44:07 17 3094307063304068259 random \n", + "18 2023-07-26 20:17:30 18 2954779196800164886 demo \n", + "19 2023-07-26 20:17:30 19 15750806798332339587 general \n", + "20 2023-07-26 20:17:30 20 3094307063304068259 random \n", + "21 2023-07-26 20:19:25 21 15750806798332339587 general \n", + "22 2023-07-26 20:32:04 0 15750806798332339587 general \n", + "23 2023-07-26 20:32:19 1 15750806798332339587 general \n", "\n", - " completion \n", - "0 [U05JQJJDJ6P] \n", - "1 [U05JQJJDJ6P] \n", - "2 None \n", - "3 None \n", - "4 [U05JQJJDJ6P] \n", - "5 None \n", - "6 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P] \n", - "7 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "8 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P] \n", - "9 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P] \n", - "10 None \n", - "11 None \n", - "12 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "13 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "14 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "15 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "16 None \n", - "17 None \n", - "18 None \n", - "19 None \n", - "20 None \n", - "21 None \n", - "22 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "23 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "24 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "25 None \n", - "26 None \n", - "27 None \n", - "28 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "29 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "30 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "31 None \n", - "32 None \n", - "33 None \n", - "34 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "35 None \n", - "36 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "37 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "38 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "39 None \n", - "40 None \n", - "41 None \n", - "42 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "43 None \n", - "44 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "45 [U05JQJJDJ6P, U05JQJJDJ6P, U05JQJJDJ6P, U05JQJ... \n", - "46 None \n", - "47 None " + " prompt completion \n", + "0 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... [] \n", + "1 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [] \n", + "2 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... [] \n", + "3 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... [] \n", + "4 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... [] \n", + "5 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... [] \n", + "6 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [] \n", + "7 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [] \n", + "8 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [] \n", + "9 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [] \n", + "10 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [] \n", + "11 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [] \n", + "12 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... [] \n", + "13 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... [] \n", + "14 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... [] \n", + "15 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... [] \n", + "16 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [] \n", + "17 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [U05JQJJDJ6P] \n", + "18 [{'ts': 1690314267000000000, 'user': 'U05JQJJD... [] \n", + "19 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... [] \n", + "20 [{'ts': 1690314134000000000, 'user': 'U05JQJJD... [U05JQJJDJ6P] \n", + "21 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... [U05JQJJDJ6P] \n", + "22 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... [U05JQJJDJ6P] \n", + "23 [{'ts': 1690314133000000000, 'user': 'U05JQJJD... [U05JQJJDJ6P] " ] }, - "execution_count": 10, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Group messages by thread (if present) or channel\n", + "#messages = messages.with_key(kt.record({\n", + "# \"channel\": messages.col(\"channel\"),\n", + "# \"thread\": messages.col(\"thread_ts\"),\n", + "# }))\n", + "\n", + "\n", + "# Build the input prompt from recent messages\n", + "prompts = messages \\\n", + " .select(\"user\", \"ts\", \"text\", \"reactions\") \\\n", + " .collect(max=20)\n", + "\n", + "\n", + "# Build the completion from users who engage after the prompt\n", "duration = datetime.timedelta(minutes=1)\n", "\n", "shifted_prompts = prompts.shift_by(duration)\n", - "#reaction_users = messages.col(\"reactions\").col(\"users\").flatten().collect(max=100).flatten()\n", - "#reaction_users = messages.col(\"reactions\").flatten().col(\"users\").collect(kt.Trailing(duration)).flatten()\n", - "#participating_users = conversations.col(\"user\").collect(max=100) #kt.windows.Trailing(duration))\n", - "#engaged_users = reaction_users #kt.union(reaction_users, participating_users)\n", - "engaged_users = prompts.col(\"user\").collect(max=100).flatten()\n", + "\n", + "reaction_users = messages.collect(max=100).col(\"reactions\").flatten().col(\"users\").flatten().last()\n", + "#reaction_users = messages.collect(kt.Trailing(duration), max=100).col(\"reactions\").flatten().col(\"users\").flatten().last()\n", + "#participating_users = prompts.col(\"user\").collect(max=100) #kt.windows.Trailing(duration))\n", + "engaged_users = reaction_users #kt.union(reaction_users, participating_users)\n", "\n", "examples = kt.record({\"prompt\": shifted_prompts, \"completion\": engaged_users}) \\\n", " .filter(shifted_prompts.is_not_null())\n", - "examples = kt.record({\"prompt\": shifted_prompts, \"completion\": engaged_users})\n", - "examples.preview(100) # NOTE: completion shouldn't be None\n", - "#engaged_users.preview(100)" + "\n", + "\n", + "prompts.preview(5)\n", + "examples.preview(100)" ] }, { @@ -1103,20 +672,24 @@ "source": [ "from sklearn import preprocessing\n", "\n", + "# Extract examples from historical data\n", "examples_df = examples.run().to_pandas().drop([\"_time\", \"_subsort\", \"_key_hash\", \"_key\"], axis=1)\n", "\n", + "\n", + "# Encode user ID labels\n", "le = preprocessing.LabelEncoder()\n", "le.fit(examples_df.completion.explode())\n", "\n", + "\n", "# Format for the OpenAI API\n", "def format_prompt(prompt):\n", " return \"start -> \" + \"\\n\\n\".join([f' {msg[\"user\"]} --> {msg[\"text\"]} ' for msg in prompt]) + \"\\n\\n###\\n\\n\"\n", "examples_df.prompt = examples_df.prompt.apply(format_prompt)\n", - "\n", "def format_completion(completion):\n", " return \" \" + (\" \".join(le.transform(completion).astype(str)) if len(completion) > 0 else \"nil\") + \" end\"\n", "examples_df.completion = examples_df.completion.apply(format_completion)\n", "\n", + "\n", "# Write examples to file\n", "examples_df.to_json(\"examples.jsonl\", orient='records', lines=True)" ] @@ -1141,7 +714,10 @@ "from types import SimpleNamespace\n", "from openai import cli\n", "\n", - "# verifiy data format, split for training & validation\n", + "# Initialize OpenAI\n", + "openai.api_key = getpass.getpass('OpenAI: API Key')\n", + "\n", + "# Verifiy data format, split for training & validation, upload to OpenAI\n", "args = SimpleNamespace(file='./examples.jsonl', quiet=True)\n", "cli.FineTune.prepare_data(args)\n", "training_id = cli.FineTune._get_or_upload('./examples_prepared_train.jsonl', True)"