Streaming Responses

Implement real-time streaming responses in various languages and frameworks.

Python Streaming

Basic Streaming

from openai import OpenAI
 
client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
 
stream = client.chat.completions.create(
    model="llama-2-7b-chat",
    messages=[{"role": "user", "content": "Tell me a story"}],
    stream=True
)
 
for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

Async Streaming

from openai import AsyncOpenAI
import asyncio
 
async def stream_chat():
    client = AsyncOpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
 
    stream = await client.chat.completions.create(
        model="llama-2-7b-chat",
        messages=[{"role": "user", "content": "Count to 20"}],
        stream=True
    )
 
    async for chunk in stream:
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)
 
asyncio.run(stream_chat())

JavaScript Streaming

Fetch API

async function streamChat(message) {
  const response = await fetch('http://localhost:8080/v1/chat/completions', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      model: 'llama-2-7b-chat',
      messages: [{ role: 'user', content: message }],
      stream: true
    })
  });
 
  const reader = response.body.getReader();
  const decoder = new TextDecoder();
 
  while (true) {
    const { done, value } = await reader.read();
    if (done) break;
 
    const chunk = decoder.decode(value);
    const lines = chunk.split('\n').filter(line => line.trim());
 
    for (const line of lines) {
      if (line.startsWith('data: ')) {
        const data = JSON.parse(line.slice(6));
        const content = data.choices[0]?.delta?.content;
        if (content) {
          process.stdout.write(content);
        }
      }
    }
  }
}
 
streamChat("Tell me a joke");

OpenAI SDK

import OpenAI from 'openai';
 
const client = new OpenAI({
  baseURL: 'http://localhost:8080/v1',
  apiKey: 'not-needed'
});
 
const stream = await client.chat.completions.create({
  model: 'llama-2-7b-chat',
  messages: [{ role: 'user', content: 'Write a poem' }],
  stream: true
});
 
for await (const chunk of stream) {
  process.stdout.write(chunk.choices[0]?.delta?.content || '');
}

React Example

import { useState } from 'react';
 
function ChatComponent() {
  const [messages, setMessages] = useState([]);
  const [input, setInput] = useState('');
  const [isStreaming, setIsStreaming] = useState(false);
 
  async function handleSubmit(e) {
    e.preventDefault();
    if (!input.trim()) return;
 
    setIsStreaming(true);
    setMessages(prev => [...prev, { role: 'user', content: input }]);
    setInput('');
 
    const response = await fetch('http://localhost:8080/v1/chat/completions', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        model: 'llama-2-7b-chat',
        messages: [...messages, { role: 'user', content: input }],
        stream: true
      })
    });
 
    const reader = response.body.getReader();
    const decoder = new TextDecoder();
    let assistantMessage = '';
 
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
 
      const chunk = decoder.decode(value);
      const lines = chunk.split('\n').filter(line => line.trim());
 
      for (const line of lines) {
        if (line.startsWith('data: ')) {
          const data = JSON.parse(line.slice(6));
          const content = data.choices[0]?.delta?.content;
          if (content) {
            assistantMessage += content;
            setMessages(prev => [
              ...prev.slice(0, -1),
              { role: 'assistant', content: assistantMessage }
            ]);
          }
        }
      }
    }
 
    setIsStreaming(false);
  }
 
  return (
    <div>
      <div className="messages">
        {messages.map((msg, i) => (
          <div key={i} className={msg.role}>
            {msg.content}
          </div>
        ))}
      </div>
 
      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={(e) => setInput(e.target.value)}
          disabled={isStreaming}
        />
        <button type="submit" disabled={isStreaming}>
          Send
        </button>
      </form>
    </div>
  );
}

Next Steps

Python Examples - More Python examples
REST API - Direct API usage
API Reference - Complete API documentation