Implement real-time streaming responses in various languages and frameworks.
from openai import OpenAI
client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
stream = client.chat.completions.create(
model="llama-2-7b-chat",
messages=[{"role": "user", "content": "Tell me a story"}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
from openai import AsyncOpenAI
import asyncio
async def stream_chat():
client = AsyncOpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
stream = await client.chat.completions.create(
model="llama-2-7b-chat",
messages=[{"role": "user", "content": "Count to 20"}],
stream=True
)
async for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
asyncio.run(stream_chat())
async function streamChat(message) {
const response = await fetch('http://localhost:8080/v1/chat/completions', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: 'llama-2-7b-chat',
messages: [{ role: 'user', content: message }],
stream: true
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n').filter(line => line.trim());
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = JSON.parse(line.slice(6));
const content = data.choices[0]?.delta?.content;
if (content) {
process.stdout.write(content);
}
}
}
}
}
streamChat("Tell me a joke");
import OpenAI from 'openai';
const client = new OpenAI({
baseURL: 'http://localhost:8080/v1',
apiKey: 'not-needed'
});
const stream = await client.chat.completions.create({
model: 'llama-2-7b-chat',
messages: [{ role: 'user', content: 'Write a poem' }],
stream: true
});
for await (const chunk of stream) {
process.stdout.write(chunk.choices[0]?.delta?.content || '');
}
import { useState } from 'react';
function ChatComponent() {
const [messages, setMessages] = useState([]);
const [input, setInput] = useState('');
const [isStreaming, setIsStreaming] = useState(false);
async function handleSubmit(e) {
e.preventDefault();
if (!input.trim()) return;
setIsStreaming(true);
setMessages(prev => [...prev, { role: 'user', content: input }]);
setInput('');
const response = await fetch('http://localhost:8080/v1/chat/completions', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: 'llama-2-7b-chat',
messages: [...messages, { role: 'user', content: input }],
stream: true
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
let assistantMessage = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n').filter(line => line.trim());
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = JSON.parse(line.slice(6));
const content = data.choices[0]?.delta?.content;
if (content) {
assistantMessage += content;
setMessages(prev => [
...prev.slice(0, -1),
{ role: 'assistant', content: assistantMessage }
]);
}
}
}
}
setIsStreaming(false);
}
return (
<div>
<div className="messages">
{messages.map((msg, i) => (
<div key={i} className={msg.role}>
{msg.content}
</div>
))}
</div>
<form onSubmit={handleSubmit}>
<input
value={input}
onChange={(e) => setInput(e.target.value)}
disabled={isStreaming}
/>
<button type="submit" disabled={isStreaming}>
Send
</button>
</form>
</div>
);
}