continuedev · ai-auxen · May 22, 2026
@@ -0,0 +1,24 @@
+import OpenAI from "./OpenAI.js";
+
+import type { LLMOptions } from "../../index.js";
+
+/**
+ * Auxen — dedicated, OpenAI-compatible LLM endpoints (https://auxen.ai).
+ *
+ * Each Auxen instance is a per-customer dedicated GPU running one open-source
+ * model behind a stable HTTPS endpoint of the form
+ *   https://api.auxen.ai/v1/<instance_id>/v1
+ * authenticated with a per-instance `auxk_*` bearer token.
+ *
+ * Because the apiBase is per-instance, no fixed defaultOptions.apiBase is
+ * provided — users must set `apiBase` in their config to the URL issued by
+ * the Auxen dashboard.
+ */
+class Auxen extends OpenAI {
+  static providerName = "auxen";
+  static defaultOptions: Partial<LLMOptions> = {
+    // apiBase is per-instance — user must provide it via config.
+  };
+}
+
+export default Auxen;
@@ -11,6 +11,7 @@ import { renderTemplatedString } from "../../util/handlebars/renderTemplatedStri
 import { BaseLLM } from "../index";
 import Anthropic from "./Anthropic";
 import Asksage from "./Asksage";
+import Auxen from "./Auxen";
 import Azure from "./Azure";
 import Bedrock from "./Bedrock";
 import BedrockImport from "./BedrockImport";
@@ -120,6 +121,7 @@ export const LLMClasses = [
   TestLLM,
   Cerebras,
   Asksage,
+  Auxen,
   Nebius,
   Nous,
   Venice,

@@ -0,0 +1,62 @@
+---
+title: Auxen
+slug: ../auxen
+---
+
+[Auxen](https://auxen.ai) hosts per-customer **dedicated** LLM endpoints (Llama 3.1/3.2, Qwen 2.5, Mistral, Gemma 2, Mixtral, Phi-3, Command R) on stable HTTPS URLs with an OpenAI-compatible `/v1/chat/completions` API. Each Auxen instance is a dedicated GPU billed per-minute of runtime, not per token.
+
+<Info>
+  Provision an instance from the [Auxen dashboard](https://auxen.ai). You will be issued two values:
+
+  - A per-instance **base URL** of the form `https://api.auxen.ai/v1/inst_xxx/v1`
+  - A per-instance **API key** prefixed `auxk_`
+</Info>
+
+## Configuration
+
+<Tabs>
+   <Tab title="YAML">
+   ```yaml title="config.yaml"
+   name: My Auxen Config
+   version: 0.0.1
+   schema: v1
+
+   models:
+     - name: Auxen Llama 3.1 8B
+       provider: auxen
+       model: llama-3.1-8b
+       apiBase: https://api.auxen.ai/v1/inst_xxx/v1
+       apiKey: auxk_...
+   ```
+   </Tab>
+   <Tab title="JSON">
+   ```json title="config.json"
+   {
+     "models": [
+       {
+         "title": "Auxen Llama 3.1 8B",
+         "provider": "auxen",
+         "model": "llama-3.1-8b",
+         "apiBase": "https://api.auxen.ai/v1/inst_xxx/v1",
+         "apiKey": "auxk_..."
+       }
+     ]
+   }
+   ```
+   </Tab>
+</Tabs>
+
+The `model` field should match the model your Auxen instance is serving. Each Auxen instance is provisioned with one model at creation time.
+
+## Catalog
+
+Auxen-hosted models:
+
+- `llama-3.1-8b`, `llama-3.1-70b`, `llama-3.2-3b`
+- `qwen2.5-7b`, `qwen2.5-14b`, `qwen2.5-32b`
+- `mistral-7b`, `mistral-nemo-12b`, `mixtral-8x7b`
+- `gemma2-9b`, `phi-3-mini`, `command-r-7b`
+
+## Pricing
+
+Auxen bills per-minute of dedicated GPU runtime, not per token. See [auxen.ai/pricing](https://auxen.ai/pricing) for hourly rates by model size.
@@ -236,7 +236,8 @@
             "ovhcloud",
             "venice",
             "inception",
-            "tars"
+            "tars",
+            "auxen"
           ],
           "markdownEnumDescriptions": [
             "### OpenAI\nUse gpt-4, gpt-3.5-turbo, or any other OpenAI model. See [here](https://openai.com/product#made-for-developers) to obtain an API key.\n\n> [Reference](https://docs.continue.dev/reference/Model%20Providers/openai)",
@@ -289,7 +290,8 @@
             "### OVHcloud AI Endpoints is a serverless inference API that provides access to a curated selection of models (e.g., Llama, Mistral, Qwen, Deepseek). It is designed with security and data privacy in mind and is compliant with GDPR. To get started, create an API key on the OVHcloud [AI Endpoints website](https://endpoints.ai.cloud.ovh.net/). For more information, including pricing, visit the OVHcloud [AI Endpoints product page](https://www.ovhcloud.com/en/public-cloud/ai-endpoints/).",
             "### Venice\n Venice.AI is a privacy-focused generative AI platform, allowing users to interact with open-source LLMs without storing any private user data.\nHosted models support the OpenAI API standard, providing seamless integration for users seeking privacy and flexibility.\nTo get started with the Venice API, either purchase a pro account, stake $VVV for daily inference allotments, or fund your account with USD.\nVisit the [API settings page](https://venice.ai/settings/api) or learn more at the [Venice API documentation](https://venice.ai/api).",
             "### Inception\n Inception Labs offer a new generation of diffusion-based LLMs.\nVisit the [API settings page](https://platform.inceptionlabs.ai/) or learn more at the [Inception docs](https://platform.inceptionlabs.ai/docs).",
-            "### TARS\nTARS is an OpenAI-compatible proxy router. To get started, obtain an API key and configure the provider in your config.json."
+            "### TARS\nTARS is an OpenAI-compatible proxy router. To get started, obtain an API key and configure the provider in your config.json.",
+            "### Auxen\n[Auxen](https://auxen.ai) hosts per-customer dedicated LLM endpoints (Llama, Qwen, Mistral, Gemma, Mixtral, Phi, Command R) on a per-instance HTTPS URL with an OpenAI-compatible API, billed per-minute of dedicated GPU runtime. Provision an instance from the [Auxen dashboard](https://auxen.ai) to obtain your per-instance `apiBase` (e.g. `https://api.auxen.ai/v1/inst_xxx/v1`) and `auxk_*` API key."
           ],
           "type": "string"
         },
@@ -536,7 +538,8 @@
                   "kindo",
                   "scaleway",
                   "ovhcloud",
-                  "venice"
+                  "venice",
+                  "auxen"
                 ]
               }
             },

@@ -166,6 +166,12 @@ export function constructLlmApi(config: LLMConfig): BaseLlmApi | undefined {
       return openAICompatible("https://api.scaleway.ai/v1/", config);
     case "fireworks":
       return openAICompatible("https://api.fireworks.ai/inference/v1/", config);
+    case "auxen":
+      // Auxen instance URLs are per-customer. The config.apiBase the user
+      // sets on their config (issued by the Auxen dashboard) is the source
+      // of truth; fall back to the marketing root only so the type-system
+      // is satisfied.
+      return openAICompatible("https://api.auxen.ai/v1/", config);
     case "together":
       return openAICompatible("https://api.together.xyz/v1/", config);
     case "ncompass":

@@ -33,6 +33,7 @@ export const OpenAIConfigSchema = BasePlusConfig.extend({
   useResponsesApi: z.boolean().optional(),
   provider: z.union([
     z.literal("openai"),
+    z.literal("auxen"),
     z.literal("mistral"),
     z.literal("voyage"),
     z.literal("deepinfra"),