diff --git a/gallery/index.yaml b/gallery/index.yaml index d54fc8a582d6..7d4dd81b0070 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,41 @@ --- +- name: "glm-4.7-reap-268b-a32b" + url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + urls: + - https://huggingface.co/mradermacher/GLM-4.7-REAP-268B-A32B-GGUF + description: | + **Model Name:** GLM-4.7-REAP-268B-A32B + **Base Model:** `cerebras/GLM-4.7-REAP-268B-A32B` + **Quantization:** Q2_K (98.2 GB), Q4_K_S (152.7 GB) + **Format:** GGUF + **Use Cases:** Text generation, understanding, and reasoning. + **Optimization:** Efficient for deployment with low memory requirements. + **Features:** + - Based on the original GLM-4.7 model. + - Quantized for efficiency (Q2_K and Q4_K_S). + - Supports multi-part file concatenation. + - Optimized for speed and performance. + + This model is a quantized version of the original GLM-4.7, tailored for efficient deployment with minimal memory usage. + overrides: + parameters: + model: llama-cpp/models/GLM-4.7-REAP-268B-A32B.Q4_K_M.gguf + name: GLM-4.7-REAP-268B-A32B-GGUF + backend: llama-cpp + template: + use_tokenizer_template: true + known_usecases: + - chat + function: + grammar: + disable: true + description: Imported from https://huggingface.co/mradermacher/GLM-4.7-REAP-268B-A32B-GGUF + options: + - use_jinja:true + files: + - filename: llama-cpp/models/GLM-4.7-REAP-268B-A32B.Q4_K_M.gguf + sha256: fdf2d9293b70b361ec9a78bb39d78cfc09a197993f4345bd916d353c4a55120c + uri: https://huggingface.co/mradermacher/GLM-4.7-REAP-268B-A32B-GGUF/resolve/main/GLM-4.7-REAP-268B-A32B.Q4_K_M.gguf - &qwen-tts urls: - https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice