Skip to content

Commit

Permalink
Add routing algorithms in Gateway plugins (#143)
Browse files Browse the repository at this point in the history
* Add routing algorithms for least request and throughput based

* add local cache request tracker

* use local cache

* add redis for local testing

* remove each time initialization

* add separate service account for gateway-plugin

---------

Co-authored-by: varungupta <[email protected]>
  • Loading branch information
varungup90 and varungupta authored Sep 12, 2024
1 parent 6d28e0e commit 0316a20
Show file tree
Hide file tree
Showing 16 changed files with 454 additions and 105 deletions.
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~
## helm creates objects without aibrix prefix, hence deploying gateway components outside of kustomization
$(KUBECTL) create -k config/dependency


.PHONY: uninstall
uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
Expand Down
57 changes: 25 additions & 32 deletions cmd/plugins/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@ import (
"syscall"
"time"

"github.com/aibrix/aibrix/pkg/plugins/gateway"
ratelimiter "github.com/aibrix/aibrix/pkg/plugins/gateway/rate_limiter"
redis "github.com/redis/go-redis/v9"
"google.golang.org/grpc"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"

"github.com/aibrix/aibrix/pkg/cache"
"github.com/aibrix/aibrix/pkg/plugins/gateway"
ratelimiter "github.com/aibrix/aibrix/pkg/plugins/gateway/rate_limiter"
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
healthPb "google.golang.org/grpc/health/grpc_health_v1"
)
Expand All @@ -54,36 +55,7 @@ func getEnv(key, defaultValue string) string {
return value
}

//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete

func createClient(kubeconfigPath string) (kubernetes.Interface, error) {
var kubeconfig *rest.Config

if kubeconfigPath != "" {
config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath)
if err != nil {
return nil, fmt.Errorf("unable to load kubeconfig from %s: %v", kubeconfigPath, err)
}
kubeconfig = config
} else {
config, err := rest.InClusterConfig()
if err != nil {
return nil, fmt.Errorf("unable to load in-cluster config: %v", err)
}
kubeconfig = config
}

client, err := kubernetes.NewForConfig(kubeconfig)
if err != nil {
return nil, fmt.Errorf("unable to create a client: %v", err)
}

return client, nil
}

// TODO (varun): one or multi plugin ext_proc
func main() {
kubeconfig := flag.String("kubeconfig", "", "absolute path to the kubeconfig file")
flag.IntVar(&grpc_port, "port", 50052, "gRPC port")
flag.Parse()

Expand All @@ -98,8 +70,29 @@ func main() {
}
fmt.Println("Connected to Redis:", pong)

fmt.Println("Starting cache")
stopCh := make(chan struct{})
defer close(stopCh)
var config *rest.Config

// ref: https://github.com/kubernetes-sigs/controller-runtime/issues/878#issuecomment-1002204308
kubeConfig := flag.Lookup("kubeconfig").Value.String()
if kubeConfig == "" {
log.Printf("using in-cluster configuration")
config, err = rest.InClusterConfig()
} else {
log.Printf("using configuration from '%s'", kubeConfig)
config, err = clientcmd.BuildConfigFromFlags("", kubeConfig)
}

if err != nil {
panic(err)
}

cache.NewCache(config, stopCh)

// Connect to K8s cluster
k8sClient, err := createClient(*kubeconfig)
k8sClient, err := kubernetes.NewForConfig(config)
if err != nil {
log.Fatal("Error creating kubernetes client:", err)
}
Expand Down
2 changes: 1 addition & 1 deletion config/gateway/gateway-plugin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace

serviceAccountName: aibrix-gateway-plugin
1 change: 1 addition & 0 deletions config/rbac/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ resources:
# subjects if changing service account names.
- service_account.yaml
- role.yaml
- role_gateway.yaml
- role_binding.yaml
- leader_election_role.yaml
- leader_election_role_binding.yaml
Expand Down
16 changes: 16 additions & 0 deletions config/rbac/role_binding.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,19 @@ subjects:
- kind: ServiceAccount
name: controller-manager
namespace: system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gateway-plugin-role
subjects:
- kind: ServiceAccount
name: gateway-plugin
namespace: system
30 changes: 30 additions & 0 deletions config/rbac/role_gateway.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: gateway-plugin-role
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- model.aibrix.ai
resources:
- modeladapters
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
9 changes: 9 additions & 0 deletions config/rbac/service_account.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,12 @@ metadata:
app.kubernetes.io/managed-by: kustomize
name: controller-manager
namespace: system
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin
namespace: system
47 changes: 46 additions & 1 deletion docs/development/app/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,29 @@ curl http://localhost:8000/v1/chat/completions \
}'
```

```shell
kubectl delete -f docs/development/app/deployment.yaml
```


## Test with envoy gateway

Install envoy gateway and setup HTTP Route
```shell
- if setting up from scratch

make docker-build && make docker-build-plugins
make install && make deploy

OR

- if only want to test gateway plugins

docker build -t aibrix/plugins:v0.1.0 -f gateway.Dockerfile .
kind load docker-image aibrix/plugins:v0.1.0

make install && make deploy
kubectl -n aibrix-system apply -f docs/development/app/redis.yaml
kubectl -n aibrix-system apply -f docs/development/app/gateway-plugin.yaml
```
Check status
Expand Down Expand Up @@ -73,11 +85,44 @@ curl -v http://localhost:8888/v1/chat/completions \
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
# least-request based
for i in {1..10}; do
curl -v http://localhost:8888/v1/chat/completions \
-H "user: your-user-name" \
-H "routing-strategy: least-request" \
-H "model: llama2-70b" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-d '{
"model": "llama2-70b",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}' &
done
# throughput based
for i in {1..10}; do
curl -v http://localhost:8888/v1/chat/completions \
-H "user: your-user-name" \
-H "routing-strategy: throughput" \
-H "model: llama2-70b" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-d '{
"model": "llama2-70b",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}' &
done
```
Delete envoy gateway and corresponding objects
```shell
kubectl -n aibrix-system delete -f docs/development/app/gateway-plugin.yaml
kubectl -n aibrix-system delete -f docs/development/app/redis.yaml
OR
make undeploy && make uninstall
```
Expand Down
50 changes: 1 addition & 49 deletions docs/development/app/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,52 +71,4 @@ spec:
# value: llama2-70b
# backendRefs:
# - name: llama2-70b
# port: 8000
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: pod-reader
namespace: aibrix-system
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: read-pods
namespace: aibrix-system
subjects:
- kind: ServiceAccount
name: default
namespace: aibrix-system
roleRef:
kind: Role
name: pod-reader
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: aibrix-system
name: deployment-reader
rules:
- apiGroups: ["apps"]
resources: ["deployments"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: deployment-reader-binding
namespace: aibrix-system
subjects:
- kind: ServiceAccount
name: default
namespace: aibrix-system
roleRef:
kind: Role
name: deployment-reader
apiGroup: rbac.authorization.k8s.io
# port: 8000
47 changes: 47 additions & 0 deletions docs/development/app/gateway-plugin.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
apiVersion: v1
kind: Service
metadata:
name: aibrix-gateway-plugins
namespace: aibrix-system
spec:
selector:
app: aibrix-gateway-plugins
ports:
- protocol: TCP
port: 50052
targetPort: 50052
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: aibrix-gateway-plugins
namespace: aibrix-system
spec:
replicas: 1
selector:
matchLabels:
app: aibrix-gateway-plugins
template:
metadata:
labels:
app: aibrix-gateway-plugins
spec:
containers:
- name: golang-app-container
image: aibrix/plugins:v0.1.0
ports:
- containerPort: 50052
env:
- name: REDIS_HOST
value: aibrix-redis-master
- name: REDIS_PORT
value: "6379"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace

Loading

0 comments on commit 0316a20

Please sign in to comment.