-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.py
More file actions
146 lines (101 loc) · 3.12 KB
/
models.py
File metadata and controls
146 lines (101 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from typing import List, Dict, Union, Literal, Any, Optional
from pydantic import BaseModel
#### Internal API models between frontend and backend
DataType = Literal["text", "number", "date", "categorical"]
class DerivedSchema(BaseModel):
is_segment: bool # corresponds to a segment of a text col
table_name: Optional[str] = None # table name for this data
derived_from: Optional[str] = None # name of col derived from
derived_how: Optional[Literal["model", "code"]] = None
class Column(BaseModel):
name: str
type: DataType
derivedSchema: Optional[DerivedSchema] = None
extra: Optional[Dict[str, Any]] = None
class DatasetSchema(BaseModel):
name: str
columns: List[Column]
primary_key: Column
origin: Literal["example", "uploaded"] = "uploaded"
has_embeddings: bool = False
has_projection: bool = False
search_result: Optional[Column] = None
class ColumnSummary(BaseModel):
column_name: str
column_type: str
min: str
max: str
approx_unique: str
avg: str = None
std: str = None
q25: str = None
q50: str = None
q75: str = None
count: int
null_percentage: str
class DuckQueryData(BaseModel):
uuid: str
sql: str
type: Literal["arrow", "exec", "json"]
buffers: list = []
class ErrorResponse(BaseModel):
uuid: str
type: Literal["error"] = "error"
error: str
class ExecResponse(BaseModel):
uuid: str
type: Literal["exec"] = "exec"
class JsonResponse(BaseModel):
uuid: str
type: Literal["json"] = "json"
result: Union[List, Dict] # any valid JSON
DuckQueryResult = Union[ExecResponse, JsonResponse, ErrorResponse]
class DatasetUploadResponse(BaseModel):
success: bool
message: str
datasetSchema: DatasetSchema = None
class DatasetVerifyResponse(BaseModel):
success: bool
message: str
class DatasetTokenizeResponse(BaseModel):
success: bool
message: str
class VectorSearchResponse(BaseModel):
success: bool
result: List[Dict[str, Any]]
class TransformResponse(BaseModel):
success: bool
result: Union[List, Dict]
# TODO rename this to TranformSchema
class TaskFormat(BaseModel):
name: str
type: Literal["number", "string", "bool"]
num_replies: Literal["single", "multiple"]
class LLMTransformRequest(BaseModel):
userPrompt: str
taskFormat: TaskFormat
columnData: List[str]
exampleData: Optional[List[str]] = None
exampleResponse: Optional[List[Dict[str, Any]]] = (
None # list of strings, numbers, or bools
)
class LLMTransformCommit(BaseModel):
userPrompt: str
taskFormat: TaskFormat
columnName: str
tableName: str
exampleData: List[str]
exampleResponse: List[Dict[str, Any]]
applyToIndices: List[int]
class CodeTransformRequest(BaseModel):
codeString: str
taskFormat: TaskFormat
columnData: List[str]
class CodeTransformCommit(BaseModel):
codeString: str
taskFormat: TaskFormat
columnName: str
tableName: str
applyToIndices: List[int]
#### public API for running
DataFrameType = Any # Future: make sure this is pd.DataFrame or huggingface dataset