-
Notifications
You must be signed in to change notification settings - Fork 381
[Misc] SLO-aware router with profile support #1192
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5652f12
e3e4d4f
dc7c12c
02b8392
91d401e
9f7d77c
d00c6b6
c48804f
f2bfc7d
8cebab0
9d77b54
fe2fb18
632966b
57f8a03
2a6abc5
8d787d9
b43d3b0
323acf2
618b248
0999e1f
35ec945
2c047f5
cea9419
093439e
97ed8c6
77cc884
313e73e
4d7347c
e721e29
ddc8d33
880448f
b19430e
0820696
4e78c55
1476244
3cc6269
07ba0d0
f592133
ff33ab0
b4d94d7
c48e4eb
088cf16
9d20c8e
bbdf5eb
bcf362f
1c076cf
da14870
dda2df3
2284339
2496d22
0d0574b
610c1aa
8f26347
a1690ea
66aff7c
ff1403a
1467ed8
fb4ba2a
3ef9d44
b10ef22
2ff8afc
455aa7e
cf4a1f0
872cf5d
daf3890
75b8459
3f92bb7
38ed814
a931453
db1ab57
e19c139
225bac8
6822b84
3270ca8
18562f2
449bf19
81d1cbc
cb00177
63a03af
57b8c77
54dfa61
7a1cbe6
f2faf80
7cddb5a
a2c4bb8
2de4a84
e12fa29
6148242
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
apiVersion: kustomize.config.k8s.io/v1beta1 | ||
kind: Kustomization | ||
|
||
namespace: aibrix-system | ||
|
||
namePrefix: aibrix- | ||
|
||
resources: | ||
- ../../../gateway/gateway-plugin | ||
|
||
images: | ||
- name: gateway-plugins | ||
newName: aibrix/gateway-plugins | ||
newTag: nightly | ||
|
||
patches: | ||
- patch: |- # Use the '|' and '-' for inline patching | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: gateway-plugins | ||
spec: | ||
template: | ||
spec: | ||
containers: | ||
- name: gateway-plugin | ||
args: | ||
- -v=5 | ||
env: | ||
- name: AIBRIX_POD_METRIC_REFRESH_INTERVAL_MS | ||
value: "60000" | ||
- name: AIBRIX_GPU_OPTIMIZER_TRACING_FLAG | ||
value: "true" | ||
target: | ||
kind: Deployment | ||
name: gateway-plugins | ||
namespace: system | ||
version: v1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: gateway-plugins | ||
namespace: aibrix-system | ||
spec: | ||
replicas: 1 | ||
template: | ||
spec: | ||
affinity: | ||
nodeAffinity: # prevent gateway pod to be placed on gpu node. | ||
preferredDuringSchedulingIgnoredDuringExecution: | ||
- weight: 100 | ||
preference: | ||
matchExpressions: | ||
- key: vke.node.gpu.schedule | ||
operator: NotIn | ||
values: | ||
- nvidia | ||
containers: | ||
- name: gateway-plugin | ||
resources: | ||
limits: | ||
cpu: "2" | ||
memory: 8Gi | ||
requests: | ||
cpu: "2" | ||
memory: 8Gi | ||
env: | ||
- name: AIBRIX_PREFIX_CACHE_TOKENIZER_TYPE | ||
value: "character" | ||
- name: AIBRIX_PREFIX_CACHE_BLOCK_SIZE | ||
value: "128" | ||
- name: AIBRIX_PREFIX_CACHE_BLOCK_NUMBER | ||
value: "200000" | ||
- name: AIBRIX_PREFIX_CACHE_POD_RUNNING_REQUEST_IMBALANCE_ABS_COUNT | ||
value: "16" | ||
- name: AIBRIX_PREFIX_CACHE_STANDARD_DEVIATION_FACTOR | ||
value: "2" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,16 @@ | ||
apiVersion: kustomize.config.k8s.io/v1beta1 | ||
kind: Kustomization | ||
|
||
namespace: aibrix-system | ||
|
||
namePrefix: aibrix- | ||
|
||
resources: | ||
- ../../../gateway/gateway-plugin | ||
- ../../dev/gateway-plugin | ||
|
||
patches: | ||
- path: gateway_plugins_patch.yaml | ||
|
||
images: | ||
- name: busybox | ||
newName: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/busybox | ||
newTag: stable | ||
- name: gateway-plugins | ||
- name: aibrix/gateway-plugins | ||
newName: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/gateway-plugins | ||
newTag: nightly |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,9 @@ type Cache interface { | |
ModelCache | ||
MetricCache | ||
RequestTracker | ||
ProfileCache | ||
types.OutputPredictorProvider | ||
types.RouterProvider | ||
} | ||
|
||
// PodCache defines operations for pod information caching | ||
|
@@ -106,7 +109,10 @@ type MetricCache interface { | |
|
||
// RequestTracker defines operations for track workload statistics | ||
type RequestTracker interface { | ||
// AddRequestCount starts tracking request count | ||
// AddRequestCount tracks the start of a request after routing. | ||
// To support realtime statistics update and access, AddRequestCount can be called multiple times for a request. | ||
// As the result, implementation should ensure thread-safe access to the counterm and idempotency. | ||
// | ||
// Parameters: | ||
// ctx: Routing context | ||
// requestID: Unique request identifier | ||
|
@@ -115,14 +121,18 @@ type RequestTracker interface { | |
// int64: Trace term identifier | ||
AddRequestCount(ctx *types.RoutingContext, requestID string, modelName string) (traceTerm int64) | ||
|
||
// DoneRequestCount completes request count tracking, only one DoneRequestXXX should be called for a request | ||
// DoneRequestCount tracks the completion of a request without usage information like inputTokens and outputTokens. | ||
// Only one DoneRequestXXX should be called for a request. Idemptency is not required. | ||
// | ||
// Parameters: | ||
// requestID: Unique request identifier | ||
// modelName: Name of the model | ||
// traceTerm: Trace term identifier | ||
DoneRequestCount(ctx *types.RoutingContext, requestID string, modelName string, traceTerm int64) | ||
|
||
// DoneRequestTrace completes request tracing, only one DoneRequestXXX should be called for a request | ||
// DoneRequestTrace tracks the completion of a request with usage information like inputTokens and outputTokens. | ||
// Only one DoneRequestXXX should be called for a request. Idemptency is not required. | ||
// | ||
// Parameters: | ||
// ctx: Routing context | ||
// requestID: Unique request identifier | ||
|
@@ -132,3 +142,18 @@ type RequestTracker interface { | |
// traceTerm: Trace term identifier | ||
DoneRequestTrace(ctx *types.RoutingContext, requestID string, modelName string, inputTokens, outputTokens, traceTerm int64) | ||
} | ||
|
||
// ProfileCache defines operations for model profiles | ||
type ProfileCache interface { | ||
// GetModelProfileByPod gets model profile for a pod | ||
// Parameters: | ||
// pod: Pod object | ||
// modelName: Name of the model | ||
GetModelProfileByPod(pod *v1.Pod, modelName string) (*ModelGPUProfile, error) | ||
|
||
// GetModelProfileByDeploymentName gets model profile for a deployment | ||
// Parameters: | ||
// deploymentName: Name of the deployment | ||
// modelName: Name of the model | ||
GetModelProfileByDeploymentName(deploymentName string, modelName string) (*ModelGPUProfile, error) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: we may use other objects to orchestrate pods in future. in that case, deployment might be changed in future. This looks good at this moment. one more problem is, deployment without namespace can not be used to identify a deployment. we need to append the namespace field There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the case of deployment using other objects, the GPU optimizer would have been changed as well (it monitors deployment only). For the support of ray clusters, let me keep a note, leave this comment open, and add an issue after merging. Can you explain the cases where "deployment without namespace can not be used to identify a deployment"? |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
system will be overitten eventually to
aibrix-system
here right? changing tosystem
to be aligned with default setting?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, all other configurations use system instead of aibrix-system. Just keep consistency here.