local kube = import "k8s/configs/base.libsonnet"; local linuxserver = import "k8s/configs/templates/core/linuxserver.libsonnet"; local images = import "k8s/configs/images.libsonnet"; local probe(delaySeconds) = { initialDelaySeconds: delaySeconds, periodSeconds: 30, httpGet: { path: "/v1/health", port: "http", }, }; local WebPort = 8081; local Params = kube.simpleFieldStruct([ "namespace", "name", "filePath", "storageClaimName", ]) { labels: {}, gatekeeperSidecar: null, lsParams: linuxserver.AppParams { name: $.name, namespace: $.namespace, filePath: $.filePath, templatePath: std.thisFile, baseAppName: "tabbyml", imageName: "tabbyml/tabby", labels+: $.labels, gatekeeperSidecar: $.gatekeeperSidecar, nodeSelector: { "gpu": "nvidia", }, services: [ linuxserver.Service { suffix: "ui", spec: kube.SvcUtil.BasicHttpClusterIpSpec(WebPort) }, ], env: linuxserver.Env { others: [ kube.NameVal("NVIDIA_VISIBLE_DEVICES", "all"), kube.NameVal("NVIDIA_DRIVER_CAPABILITIES", "all"), ], }, args: [ "serve", "--model", "StarCoder-1B", "--chat-model", "Qwen2-1.5B-Instruct", "--device", "cuda", "--port", "8081", ], ports: [ kube.DeployUtil.ContainerPort("http", WebPort), ], pvcs: [ linuxserver.Pvc{ name: "storage", mountPath: "/data", bindName: $.storageClaimName, }, ], resources: { requests: { cpu: "2000m", memory: "4Gi", }, limits: { cpu: "5000m", memory: "8Gi", }, }, livenessProbe: probe(/*delaySeconds=*/240), readinessProbe: probe(/*delaySeconds=*/240), }, }; local App(params) = linuxserver.App(params.lsParams); { Params: Params, WebPort: WebPort, App(params): App(params), }