-
Notifications
You must be signed in to change notification settings - Fork 622
Add a dedicated OpenAI-compatible LLM adapter #1895
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8b97716
5090773
f1d6dff
b4d0af1
d3e1cad
2cb187a
f6a2a7d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| from typing import Any | ||
|
|
||
| from unstract.sdk1.adapters.base1 import BaseAdapter, OpenAICompatibleLLMParameters | ||
| from unstract.sdk1.adapters.enums import AdapterTypes | ||
|
|
||
| DESCRIPTION = ( | ||
| "Adapter for servers that implement the OpenAI Chat Completions API " | ||
| "(vLLM, LM Studio, self-hosted gateways, and third-party providers). " | ||
| "Use OpenAI for the official OpenAI service." | ||
| ) | ||
|
|
||
|
|
||
| class OpenAICompatibleLLMAdapter(OpenAICompatibleLLMParameters, BaseAdapter): | ||
| @staticmethod | ||
| def get_id() -> str: | ||
| return "openaicompatible|b6d10f33-2c41-49fc-a8c2-58d2b247fc09" | ||
|
|
||
| @staticmethod | ||
| def get_metadata() -> dict[str, Any]: | ||
| return { | ||
| "name": "OpenAI Compatible", | ||
| "version": "1.0.0", | ||
| "adapter": OpenAICompatibleLLMAdapter, | ||
| "description": DESCRIPTION, | ||
| "is_active": True, | ||
| } | ||
|
|
||
| @staticmethod | ||
| def get_name() -> str: | ||
| return "OpenAI Compatible" | ||
|
|
||
| @staticmethod | ||
| def get_description() -> str: | ||
| return DESCRIPTION | ||
|
|
||
| @staticmethod | ||
| def get_provider() -> str: | ||
| return "custom_openai" | ||
|
|
||
| @staticmethod | ||
| def get_icon() -> str: | ||
| return "/icons/adapter-icons/OpenAICompatible.png" | ||
|
|
||
| @staticmethod | ||
| def get_adapter_type() -> AdapterTypes: | ||
| return AdapterTypes.LLM |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| { | ||
| "title": "OpenAI Compatible LLM", | ||
| "type": "object", | ||
| "required": [ | ||
| "adapter_name", | ||
| "api_base" | ||
| ], | ||
| "properties": { | ||
| "adapter_name": { | ||
| "type": "string", | ||
| "title": "Name", | ||
| "default": "", | ||
| "description": "Provide a unique name for this adapter instance. Example: compatible-gateway-1" | ||
| }, | ||
| "api_key": { | ||
| "type": [ | ||
| "string", | ||
| "null" | ||
| ], | ||
| "title": "API Key", | ||
| "format": "password", | ||
| "description": "API key for your OpenAI-compatible endpoint. Leave empty if the endpoint does not require one." | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Medium]
Consider either:
As-is the description ("Leave empty if the endpoint does not require one") silently papers over a common misconfiguration.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added the smaller validation step here: blank or whitespace-only api_key values are now normalized to None, so the keyless-endpoint case stays explicit without expanding the schema. |
||
| }, | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| "model": { | ||
| "type": "string", | ||
| "title": "Model", | ||
| "default": "gpt-4o-mini", | ||
| "description": "The model name expected by your OpenAI-compatible endpoint. Examples: gateway-model, gpt-4o-mini, openai/gpt-4o" | ||
| }, | ||
| "api_base": { | ||
| "type": "string", | ||
| "format": "url", | ||
| "title": "API Base", | ||
| "description": "Base URL for the OpenAI-compatible endpoint. Examples: https://gateway.example.com/v1, https://llm.example.net/openai/v1" | ||
| }, | ||
| "max_tokens": { | ||
| "type": "number", | ||
| "minimum": 0, | ||
| "multipleOf": 1, | ||
| "title": "Maximum Output Tokens", | ||
| "default": 4096, | ||
| "description": "Maximum number of output tokens to limit LLM replies. Leave it empty to use the provider default." | ||
| }, | ||
| "max_retries": { | ||
| "type": "number", | ||
| "minimum": 0, | ||
| "multipleOf": 1, | ||
| "title": "Max Retries", | ||
| "default": 5, | ||
| "description": "The maximum number of times to retry a request if it fails." | ||
| }, | ||
| "timeout": { | ||
| "type": "number", | ||
| "minimum": 0, | ||
| "multipleOf": 1, | ||
| "title": "Timeout", | ||
| "default": 900, | ||
| "description": "Timeout in seconds." | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -539,10 +539,22 @@ def _record_usage( | |
| usage: Mapping[str, int] | None, | ||
| llm_api: str, | ||
| ) -> None: | ||
| prompt_tokens = token_counter(model=model, messages=messages) | ||
| usage_data: Mapping[str, int] = usage or {} | ||
| prompt_tokens = usage_data.get("prompt_tokens") | ||
| if prompt_tokens is None: | ||
| try: | ||
| prompt_tokens = token_counter(model=model, messages=messages) | ||
| except Exception as e: | ||
| prompt_tokens = 0 | ||
| logger.warning( | ||
| "[sdk1][LLM][%s][%s] Failed to estimate prompt tokens; " | ||
| "recording 0 prompt tokens for usage audit: %s", | ||
| model, | ||
| llm_api, | ||
| e, | ||
| ) | ||
|
Comment on lines
+543
to
+555
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pk-zipstack @johnyrahul is this a safe change?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Kept this scoped to usage accounting only. It still uses provider-reported prompt tokens when available, only estimates when they are missing, and the fallback paths are covered by tests now. |
||
| all_tokens = TokenCounterCompat( | ||
| prompt_tokens=usage_data.get("prompt_tokens", 0), | ||
| prompt_tokens=prompt_tokens or 0, | ||
| completion_tokens=usage_data.get("completion_tokens", 0), | ||
| total_tokens=usage_data.get("total_tokens", 0), | ||
| ) | ||
|
Comment on lines
+543
to
560
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Silent zero-token recording risks corrupting billing/usage audit data. When Consider one of:
A warning log alone is easy to miss in aggregated usage reports. This answers the question raised in the prior review thread on this range. 🤖 Prompt for AI Agents |
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,192 @@ | ||
| import json | ||
| from functools import lru_cache | ||
| from importlib import import_module | ||
| from unittest.mock import MagicMock, patch | ||
|
|
||
| from unstract.sdk1.adapters.base1 import OpenAICompatibleLLMParameters | ||
| from unstract.sdk1.adapters.constants import Common | ||
| from unstract.sdk1.adapters.llm1 import adapters | ||
| from unstract.sdk1.adapters.llm1.openai_compatible import OpenAICompatibleLLMAdapter | ||
|
|
||
| OPENAI_COMPATIBLE_DESCRIPTION = ( | ||
| "Adapter for servers that implement the OpenAI Chat Completions API " | ||
| "(vLLM, LM Studio, self-hosted gateways, and third-party providers). " | ||
| "Use OpenAI for the official OpenAI service." | ||
| ) | ||
|
|
||
|
|
||
| @lru_cache(maxsize=1) | ||
| def _load_llm_module() -> object: | ||
| import sys | ||
| from types import ModuleType | ||
|
|
||
| with patch.dict( | ||
| sys.modules, | ||
| { | ||
| # Stub python-magic so importing LLM does not depend on libmagic | ||
| # being available in the test environment. | ||
| "magic": ModuleType("magic") | ||
| }, | ||
| ): | ||
| return import_module("unstract.sdk1.llm") | ||
|
|
||
|
|
||
| def _load_llm_class() -> type: | ||
| return _load_llm_module().LLM | ||
|
|
||
|
|
||
| def test_openai_compatible_adapter_is_registered() -> None: | ||
| adapter_id = OpenAICompatibleLLMAdapter.get_id() | ||
|
|
||
| assert adapter_id in adapters | ||
| assert adapters[adapter_id][Common.MODULE] is OpenAICompatibleLLMAdapter | ||
|
|
||
|
|
||
| def test_openai_compatible_validate_prefixes_model() -> None: | ||
| validated = OpenAICompatibleLLMParameters.validate( | ||
| { | ||
| "api_base": "https://gateway.example.com/v1", | ||
| "api_key": "test-key", | ||
| "model": "gateway-model", | ||
| } | ||
| ) | ||
|
|
||
| assert validated["model"] == "custom_openai/gateway-model" | ||
|
|
||
|
|
||
| def test_openai_compatible_validate_preserves_prefixed_model() -> None: | ||
| validated = OpenAICompatibleLLMParameters.validate( | ||
| { | ||
| "api_base": "https://gateway.example.com/v1", | ||
| "model": "custom_openai/openai/gpt-4o", | ||
| } | ||
| ) | ||
|
|
||
| assert validated["model"] == "custom_openai/openai/gpt-4o" | ||
| assert validated["api_key"] is None | ||
|
|
||
|
|
||
| def test_openai_compatible_validate_normalizes_blank_api_key_to_none() -> None: | ||
| validated = OpenAICompatibleLLMParameters.validate( | ||
| { | ||
| "api_base": "https://gateway.example.com/v1", | ||
| "api_key": " ", | ||
| "model": "gateway-model", | ||
| } | ||
| ) | ||
|
|
||
| assert validated["api_key"] is None | ||
|
|
||
|
|
||
| def test_openai_compatible_schema_is_loadable() -> None: | ||
| schema = json.loads(OpenAICompatibleLLMAdapter.get_json_schema()) | ||
|
|
||
| assert schema["title"] == "OpenAI Compatible LLM" | ||
| assert schema["properties"]["api_key"]["type"] == ["string", "null"] | ||
| assert "gateway-model" in schema["properties"]["model"]["description"] | ||
| assert "ERNIE" not in schema["properties"]["model"]["description"] | ||
| assert "qianfan" not in schema["properties"]["api_base"]["description"].lower() | ||
| assert "default" not in schema["properties"]["api_base"] | ||
|
|
||
|
|
||
| def test_openai_compatible_adapter_uses_distinct_description_and_icon() -> None: | ||
| metadata = OpenAICompatibleLLMAdapter.get_metadata() | ||
|
|
||
| assert OpenAICompatibleLLMAdapter.get_description() == OPENAI_COMPATIBLE_DESCRIPTION | ||
| assert metadata["description"] == OPENAI_COMPATIBLE_DESCRIPTION | ||
| assert OpenAICompatibleLLMAdapter.get_icon() == ( | ||
| "/icons/adapter-icons/OpenAICompatible.png" | ||
| ) | ||
|
|
||
|
|
||
|
greptile-apps[bot] marked this conversation as resolved.
|
||
| def test_record_usage_uses_reported_prompt_tokens_without_estimating() -> None: | ||
| llm_module = _load_llm_module() | ||
| llm_cls = llm_module.LLM | ||
|
|
||
| llm = llm_cls.__new__(llm_cls) | ||
| llm._platform_api_key = "platform-key" | ||
| llm.platform_kwargs = {"run_id": "run-1"} | ||
| llm.adapter = MagicMock() | ||
| llm.adapter.get_provider.return_value = "custom_openai" | ||
|
|
||
| with ( | ||
| patch.object(llm_module, "token_counter") as mock_token_counter, | ||
| patch.object(llm_module, "Audit") as mock_audit, | ||
| ): | ||
| llm._record_usage( | ||
| model="custom_openai/gateway-model", | ||
| messages=[{"role": "user", "content": "hello"}], | ||
| usage={"prompt_tokens": 3, "completion_tokens": 4, "total_tokens": 7}, | ||
| llm_api="complete", | ||
| ) | ||
|
|
||
| mock_token_counter.assert_not_called() | ||
| mock_audit.return_value.push_usage_data.assert_called_once() | ||
| assert ( | ||
| mock_audit.return_value.push_usage_data.call_args.kwargs[ | ||
| "token_counter" | ||
| ].prompt_llm_token_count | ||
| == 3 | ||
| ) | ||
|
|
||
|
|
||
| def test_record_usage_tolerates_unmapped_models_without_prompt_tokens() -> None: | ||
| llm_module = _load_llm_module() | ||
| llm_cls = llm_module.LLM | ||
|
|
||
| llm = llm_cls.__new__(llm_cls) | ||
| llm._platform_api_key = "platform-key" | ||
| llm.platform_kwargs = {"run_id": "run-1"} | ||
| llm.adapter = MagicMock() | ||
| llm.adapter.get_provider.return_value = "custom_openai" | ||
|
|
||
| with ( | ||
| patch.object(llm_module, "token_counter", side_effect=Exception("unmapped")), | ||
| patch.object(llm_module, "Audit") as mock_audit, | ||
| patch.object(llm_module.logger, "warning") as mock_warning, | ||
| ): | ||
| llm._record_usage( | ||
| model="custom_openai/gateway-model", | ||
| messages=[{"role": "user", "content": "hello"}], | ||
| usage={"completion_tokens": 4, "total_tokens": 7}, | ||
| llm_api="complete", | ||
| ) | ||
|
|
||
| mock_audit.return_value.push_usage_data.assert_called_once() | ||
| assert ( | ||
| mock_audit.return_value.push_usage_data.call_args.kwargs[ | ||
| "token_counter" | ||
| ].prompt_llm_token_count | ||
| == 0 | ||
| ) | ||
| assert "recording 0 prompt tokens for usage audit" in mock_warning.call_args.args[0] | ||
|
|
||
|
|
||
| def test_record_usage_uses_estimated_prompt_tokens_when_usage_has_none() -> None: | ||
| llm_module = _load_llm_module() | ||
| llm_cls = llm_module.LLM | ||
|
|
||
| llm = llm_cls.__new__(llm_cls) | ||
| llm._platform_api_key = "platform-key" | ||
| llm.platform_kwargs = {"run_id": "run-1"} | ||
| llm.adapter = MagicMock() | ||
| llm.adapter.get_provider.return_value = "custom_openai" | ||
|
|
||
| with ( | ||
| patch.object(llm_module, "token_counter", return_value=9) as mock_token_counter, | ||
| patch.object(llm_module, "Audit") as mock_audit, | ||
| ): | ||
| llm._record_usage( | ||
| model="custom_openai/gateway-model", | ||
| messages=[{"role": "user", "content": "hello"}], | ||
| usage={"prompt_tokens": None, "completion_tokens": 4, "total_tokens": 13}, | ||
| llm_api="complete", | ||
| ) | ||
|
|
||
| mock_token_counter.assert_called_once() | ||
| assert ( | ||
| mock_audit.return_value.push_usage_data.call_args.kwargs[ | ||
| "token_counter" | ||
| ].prompt_llm_token_count | ||
| == 9 | ||
| ) | ||
Uh oh!
There was an error while loading. Please reload this page.