> ## Documentation Index
> Fetch the complete documentation index at: https://docs.geekhub.mx/llms.txt
> Use this file to discover all available pages before exploring further.

# POST /v1/chat/completions

> Chat response generation with any model

## Request body

<ParamField body="model" type="string" required>
  The namespaced model ID, e.g. `anthropic/claude-sonnet-4-6`. See [Chat models](/en/models/chat).
</ParamField>

<ParamField body="messages" type="array" required>
  Conversation messages.

  <Expandable title="Each message structure">
    <ParamField body="role" type="'system' | 'user' | 'assistant' | 'tool'" required>
      Message role. `system` defines behavior, `user` is the human, `assistant` is the model.
    </ParamField>

    <ParamField body="content" type="string" required>
      Message content.
    </ParamField>

    <ParamField body="name" type="string">
      Optional speaker identifier.
    </ParamField>
  </Expandable>
</ParamField>

<ParamField body="temperature" type="number" default="varies by model">
  Between 0 and 2. Higher = more creative, lower = more deterministic.
</ParamField>

<ParamField body="top_p" type="number">
  Between 0 and 1. Nucleus sampling. Alternative to `temperature`.
</ParamField>

<ParamField body="max_tokens" type="integer">
  Maximum tokens to generate. Default varies by model.
</ParamField>

<ParamField body="stream" type="boolean" default="false">
  If `true`, responds with Server-Sent Events. See Streaming section below.
</ParamField>

<ParamField body="stop" type="string | string[]">
  Sequences that end generation.
</ParamField>

## Response (non-streaming)

<ResponseField name="id" type="string">
  Your `request_id` (format `req_<24hex>`). Useful for tracing.
</ResponseField>

<ResponseField name="object" type="string">
  Always `"chat.completion"`.
</ResponseField>

<ResponseField name="created" type="integer">
  Unix timestamp.
</ResponseField>

<ResponseField name="model" type="string">
  The namespaced model id (e.g. `anthropic/claude-sonnet-4-6`).
</ResponseField>

<ResponseField name="choices" type="array">
  Array with a single element (`n > 1` not supported yet).

  <Expandable title="choice">
    <ResponseField name="index" type="integer">0</ResponseField>
    <ResponseField name="message.role" type="string">`assistant`</ResponseField>
    <ResponseField name="message.content" type="string">Generated text</ResponseField>

    <ResponseField name="finish_reason" type="string">
      `stop` | `length` | `content_filter` | `tool_calls`
    </ResponseField>
  </Expandable>
</ResponseField>

<ResponseField name="usage" type="object">
  Tokens consumed. Charged at request completion.

  <Expandable title="usage">
    <ResponseField name="prompt_tokens" type="integer" />

    <ResponseField name="completion_tokens" type="integer" />

    <ResponseField name="total_tokens" type="integer" />
  </Expandable>
</ResponseField>

## Streaming

For real-time responses, send `"stream": true`. You'll receive Server-Sent Events:

```
data: {"id":"req_xxx","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}

data: {"id":"req_xxx","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}

data: {"id":"req_xxx","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" world"},"finish_reason":null}]}

data: {"id":"req_xxx","object":"chat.completion.chunk","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}

data: [DONE]
```

Each `data:` is a JSON with a `delta.content` that is **the next text fragment** (can be a word, a syllable, or even a single character).

### TypeScript parser

```typescript theme={null}
const res = await fetch("https://api.geekhub.mx/v1/chat/completions", {
  method: "POST",
  headers: {
    Authorization: `Bearer ${process.env.GEEKHUB_API_KEY}`,
    "Content-Type": "application/json",
  },
  body: JSON.stringify({
    model: "anthropic/claude-haiku-4-5",
    messages: [{ role: "user", content: "Count to 5" }],
    stream: true,
  }),
});

const reader = res.body!.getReader();
const decoder = new TextDecoder();
let buffer = "";

while (true) {
  const { value, done } = await reader.read();
  if (done) break;
  buffer += decoder.decode(value, { stream: true });

  let nl: number;
  while ((nl = buffer.indexOf("\n\n")) !== -1) {
    const block = buffer.slice(0, nl);
    buffer = buffer.slice(nl + 2);
    const data = block.split("\n").find(l => l.startsWith("data:"))?.slice(5).trim();
    if (!data || data === "[DONE]") continue;
    const chunk = JSON.parse(data);
    process.stdout.write(chunk.choices[0]?.delta?.content ?? "");
  }
}
```

## Examples per provider

<CodeGroup>
  ```python Claude Sonnet 4.6 theme={null}
  response = client.chat.completions.create(
      model="anthropic/claude-sonnet-4-6",
      messages=[
          {"role": "system", "content": "You are a concise assistant."},
          {"role": "user", "content": "Capital of Mexico?"}
      ],
      max_tokens=200,
  )
  ```

  ```python GPT-5 theme={null}
  response = client.chat.completions.create(
      model="openai/gpt-5",
      messages=[{"role": "user", "content": "Explain QPS in one line"}],
  )
  ```

  ```python Gemini 2.5 Pro theme={null}
  response = client.chat.completions.create(
      model="google/gemini-2.5-pro",
      messages=[{"role": "user", "content": "Hi"}],
      temperature=0.5,
  )
  ```

  ```python DeepSeek Reasoner theme={null}
  response = client.chat.completions.create(
      model="deepseek/deepseek-reasoner",
      messages=[{"role": "user", "content": "Solve: 23 * 17 step by step"}],
  )
  ```
</CodeGroup>

## Common errors

See [Errors](/en/concepts/errors) for the full catalog. The most frequent in chat:

* `400 invalid_request_error` — malformed body (Zod tells you the field in `message`)
* `402 insufficient_balance` — no balance
* `404 model_not_found` — invalid model id (probably missing namespace)
* `502 provider_unavailable` — provider bounced (sometimes it's a rejected prompt)
