diff --git a/models/ollama/qwen3.5-9b-q4_k_m.yaml b/models/ollama/qwen3.5-9b-q4_k_m.yaml new file mode 100644 index 0000000..bb181b7 --- /dev/null +++ b/models/ollama/qwen3.5-9b-q4_k_m.yaml @@ -0,0 +1,52 @@ +provider: ollama +authType: api_key +model: qwen3.5:9b-q4_K_M +params: + - path: max_tokens + label: Max tokens + description: Maximum number of output tokens the model may generate. + group: generation_length + type: integer + range: + min: 1 + + - path: temperature + label: Temperature + description: Controls randomness. Lower values make outputs more focused; higher values make them more varied. + group: sampling + type: number + default: 0.7 + range: + min: 0 + max: 2 + step: 0.1 + + - path: top_p + label: Top P + description: Controls nucleus sampling by limiting generation to tokens within the selected cumulative probability. + group: sampling + type: number + default: 0.9 + range: + min: 0 + max: 1 + step: 0.01 + + - path: top_k + label: Top K + description: Limits generation to the selected number of highest-probability tokens. + group: sampling + type: integer + default: 40 + range: + min: 1 + + - path: num_ctx + label: Context length + description: Controls the context window size in tokens. + group: generation_length + type: integer + default: 32768 + range: + min: 2048 + max: 131072