local kube = import "k8s/configs/base.libsonnet";
local linuxserver = import "k8s/configs/templates/core/linuxserver.libsonnet";
local images = import "k8s/configs/images.libsonnet";

local probe(delaySeconds) = {
  initialDelaySeconds: delaySeconds,
  periodSeconds: 30,
  httpGet: {
    path: "/v1/health",
    port: "http",
  },
};

local WebPort = 8081;

local Params = kube.simpleFieldStruct([
  "namespace",
  "name",
  "filePath",
  "storageClaimName",
]) {
  labels: {},
  gatekeeperSidecar: null,
  lsParams: linuxserver.AppParams {
    name: $.name,
    namespace: $.namespace,
    filePath: $.filePath,
    templatePath: std.thisFile,
    baseAppName: "tabbyml",
    imageName: "tabbyml/tabby",
    labels+: $.labels,
    gatekeeperSidecar: $.gatekeeperSidecar,
    nodeSelector: {
      "gpu": "nvidia",
    },
    services: [
      linuxserver.Service {
        suffix: "ui",
        spec: kube.SvcUtil.BasicHttpClusterIpSpec(WebPort)
      },
    ],
    env: linuxserver.Env {
      others: [
        kube.NameVal("NVIDIA_VISIBLE_DEVICES", "all"),
        kube.NameVal("NVIDIA_DRIVER_CAPABILITIES", "all"),
      ],
    },
    args: [
         "serve",
         "--model", "StarCoder-1B",
         "--chat-model", "Qwen2-1.5B-Instruct", 
         "--device", "cuda",
         "--port", "8081",
    ],
    ports: [ kube.DeployUtil.ContainerPort("http", WebPort), ],
    pvcs: [
      linuxserver.Pvc{
        name: "storage",
        mountPath: "/data",
        bindName: $.storageClaimName,
      },
    ],
    resources: {
      requests: {
        cpu: "2000m",
        memory: "4Gi",
      },
      limits: {
        cpu: "5000m",
        memory: "8Gi",
      },
    },
    livenessProbe: probe(/*delaySeconds=*/240),
    readinessProbe: probe(/*delaySeconds=*/240),
  },
};

local App(params) = linuxserver.App(params.lsParams);

{
  Params: Params,
  WebPort: WebPort,
  App(params): App(params),
}