yesod-mirror/k8s/configs/templates/dev/ai/tabbyml.libsonnet

85 lines
1.9 KiB
Jsonnet
Raw Normal View History

local kube = import "k8s/configs/base.libsonnet";
local linuxserver = import "k8s/configs/templates/core/linuxserver.libsonnet";
local images = import "k8s/configs/images.libsonnet";
local probe(delaySeconds) = {
initialDelaySeconds: delaySeconds,
periodSeconds: 30,
httpGet: {
path: "/v1/health",
port: "http",
},
};
local WebPort = 8081;
local Params = kube.simpleFieldStruct([
"namespace",
"name",
"filePath",
"storageClaimName",
]) {
labels: {},
gatekeeperSidecar: null,
lsParams: linuxserver.AppParams {
name: $.name,
namespace: $.namespace,
filePath: $.filePath,
templatePath: std.thisFile,
baseAppName: "tabbyml",
imageName: "tabbyml/tabby",
labels+: $.labels,
gatekeeperSidecar: $.gatekeeperSidecar,
nodeSelector: {
"gpu": "nvidia",
},
services: [
linuxserver.Service {
suffix: "ui",
spec: kube.SvcUtil.BasicHttpClusterIpSpec(WebPort)
},
],
env: linuxserver.Env {
others: [
kube.NameVal("NVIDIA_VISIBLE_DEVICES", "all"),
kube.NameVal("NVIDIA_DRIVER_CAPABILITIES", "all"),
],
},
args: [
"serve",
"--model", "StarCoder-1B",
"--chat-model", "Qwen2-1.5B-Instruct",
"--device", "cuda",
"--port", "8081",
],
ports: [ kube.DeployUtil.ContainerPort("http", WebPort), ],
pvcs: [
linuxserver.Pvc{
name: "storage",
mountPath: "/data",
bindName: $.storageClaimName,
},
],
resources: {
requests: {
cpu: "2000m",
memory: "4Gi",
},
limits: {
cpu: "5000m",
memory: "8Gi",
},
},
livenessProbe: probe(/*delaySeconds=*/240),
readinessProbe: probe(/*delaySeconds=*/240),
},
};
local App(params) = linuxserver.App(params.lsParams);
{
Params: Params,
WebPort: WebPort,
App(params): App(params),
}