This repository was archived by the owner on Apr 19, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllmint.go
More file actions
144 lines (127 loc) · 4.03 KB
/
llmint.go
File metadata and controls
144 lines (127 loc) · 4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// Package llmint provides foundational types and middleware primitives for
// LLM token economics — cost tracking, cache-aware usage, and provider composition.
package llmint
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
)
// Provider is the core interface every LLM backend must implement.
type Provider interface {
Complete(ctx context.Context, req *Request) (*Response, error)
Name() string
Models() []ModelInfo
}
// Middleware is a function that wraps a Provider with additional behavior.
// Follows the same convention as net/http middleware: the first middleware
// applied is the outermost layer.
type Middleware func(Provider) Provider
// ModelInfo describes pricing and capacity for a single model.
type ModelInfo struct {
ID string
InputPerMTok float64 // USD per million input tokens
OutputPerMTok float64 // USD per million output tokens
CacheReadPerMTok float64 // USD per million cache-read tokens
CacheWritePerMTok float64 // USD per million cache-write tokens
MaxContextTokens int
}
// Message is a single turn in a conversation.
type Message struct {
Role string // "user" | "assistant" | "system"
Content string
}
// Tool describes a function the model may invoke.
type Tool struct {
Name string
Description string
InputSchema json.RawMessage
}
// Request is the canonical input sent to any Provider.
type Request struct {
Model string
Messages []Message
Tools []Tool
MaxTokens int
System string
Metadata map[string]string
}
// Hash returns a deterministic SHA-256 hex digest of the request contents.
// Requests with identical fields always produce the same hash.
func (r *Request) Hash() string {
h := sha256.New()
enc := json.NewEncoder(h)
// Encode each field in a fixed order so the hash is stable.
_ = enc.Encode(r.Model)
_ = enc.Encode(r.Messages)
_ = enc.Encode(r.Tools)
_ = enc.Encode(r.MaxTokens)
_ = enc.Encode(r.System)
// Metadata is omitted from the hash intentionally — it carries
// tracing/routing hints, not semantic request content.
return hex.EncodeToString(h.Sum(nil))
}
// ContentBlock is a typed segment of model output.
type ContentBlock struct {
Type string // "text" | "tool_use" | etc.
Text string
}
// CacheStatus indicates how the prompt cache was utilised for a response.
type CacheStatus int
const (
CacheMiss CacheStatus = iota // No cache tokens used
CacheHit // All input tokens served from cache
CachePartial // Some input tokens served from cache
)
// String returns a human-readable label for the cache status.
func (c CacheStatus) String() string {
switch c {
case CacheHit:
return "hit"
case CachePartial:
return "partial"
default:
return "miss"
}
}
// Usage records raw token counts and computed cost for a single completion.
type Usage struct {
InputTokens int
OutputTokens int
CacheReadTokens int
CacheWriteTokens int
Cost float64
}
// ComputeCost calculates the USD cost of this Usage given a ModelInfo's pricing.
func (u Usage) ComputeCost(info ModelInfo) float64 {
const perMTok = 1_000_000.0
input := float64(u.InputTokens) / perMTok * info.InputPerMTok
output := float64(u.OutputTokens) / perMTok * info.OutputPerMTok
cacheRead := float64(u.CacheReadTokens) / perMTok * info.CacheReadPerMTok
cacheWrite := float64(u.CacheWriteTokens) / perMTok * info.CacheWritePerMTok
return input + output + cacheRead + cacheWrite
}
// Savings records the tokens and cost saved by a single optimisation technique.
type Savings struct {
TokensSaved int
CostSaved float64
Technique string
}
// TotalSavings aggregates a slice of Savings into a single summary.
func TotalSavings(ss []Savings) Savings {
var total Savings
total.Technique = "total"
for _, s := range ss {
total.TokensSaved += s.TokensSaved
total.CostSaved += s.CostSaved
}
return total
}
// Response is the canonical output from any Provider.
type Response struct {
Content []ContentBlock
Usage Usage
Model string
CacheStatus CacheStatus
Savings []Savings
}