Interactive API Examples

Try editing the code examples below. The beautiful code editor shows you exactly how to use the Inferno API.

Simple Inference Request

Edit this code to see how the Inferno API works

Endpoint:POST /inference

// Configure the API
const API_KEY = 'your_api_key';
const BASE_URL = 'http://localhost:8080';

// Make an inference request
async function runInference() {
const response = await fetch(`${BASE_URL}/inference`, {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    model: 'llama-2-7b',
    prompt: 'What is machine learning?',
    max_tokens: 100,
    temperature: 0.7
  })
});

const result = await response.json();
console.log(result.choices[0].text);
}

runInference();

Open Sandbox

Python Example

Python version using the requests library

Endpoint:POST /inference

import requests

API_KEY = "your_api_key"
BASE_URL = "http://localhost:8080"

headers = {
  "Authorization": f"Bearer {API_KEY}",
  "Content-Type": "application/json"
}

response = requests.post(
  f"{BASE_URL}/inference",
  headers=headers,
  json={
      "model": "llama-2-7b",
      "prompt": "What is machine learning?",
      "max_tokens": 100,
      "temperature": 0.7
  }
)

result = response.json()
print(result["choices"][0]["text"])

Open Sandbox

TypeScript Example

TypeScript with proper type definitions

Endpoint:POST /inference

interface InferenceRequest {
model: string;
prompt: string;
max_tokens: number;
temperature: number;
}

interface InferenceResponse {
choices: Array<{ text: string }>;
}

const API_KEY = 'your_api_key';
const BASE_URL = 'http://localhost:8080';

async function runInference(): Promise<void> {
const response = await fetch(`${BASE_URL}/inference`, {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    model: 'llama-2-7b',
    prompt: 'What is machine learning?',
    max_tokens: 100,
    temperature: 0.7
  } as InferenceRequest)
});

const result: InferenceResponse = await response.json();
console.log(result.choices[0].text);
}

runInference();

Open Sandbox

Streaming Inference

Stream tokens as they're generated

Endpoint:POST /inference/stream

const API_KEY = 'your_api_key';
const BASE_URL = 'http://localhost:8080';

async function streamInference() {
const response = await fetch(`${BASE_URL}/inference/stream`, {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${API_KEY}`,
    'Content-Type': 'application/json',
    'Accept': 'text/event-stream'
  },
  body: JSON.stringify({
    model: 'llama-2-7b',
    prompt: 'Tell me a short story',
    max_tokens: 200,
    stream: true
  })
});

const reader = response.body.getReader();
const decoder = new TextDecoder();

while (true) {
  const { done, value } = await reader.read();
  if (done) break;

  const chunk = decoder.decode(value);
  const lines = chunk.split('\n');

  for (const line of lines) {
    if (line.startsWith('data: ')) {
      const data = JSON.parse(line.slice(6));
      if (data.token) {
        process.stdout.write(data.token);
      } else if (data.done) {
        console.log('\nStream complete!');
      }
    }
  }
}
}

streamInference();

Open Sandbox

WebSocket Connection

Real-time streaming with WebSocket

Endpoint:WebSocket ws://localhost:8080/ws

const ws = new WebSocket('ws://localhost:8080/ws');

ws.onopen = () => {
// Authenticate
ws.send(JSON.stringify({
  type: 'auth',
  token: 'YOUR_API_KEY'
}));

// Send inference request
ws.send(JSON.stringify({
  type: 'inference',
  id: 'req_' + Date.now(),
  model: 'llama-2-7b',
  prompt: 'Explain quantum physics',
  max_tokens: 200,
  stream: true
}));
};

ws.onmessage = (event) => {
const data = JSON.parse(event.data);

if (data.type === 'token') {
  process.stdout.write(data.token);
} else if (data.type === 'complete') {
  console.log('\nDone!');
  ws.close();
}
};

Open Sandbox

WebSocket with Python

Python WebSocket implementation

Endpoint:WebSocket ws://localhost:8080/ws

import websocket
import json

def on_message(ws, message):
  data = json.loads(message)

  if data['type'] == 'token':
      print(data['token'], end='', flush=True)
  elif data['type'] == 'complete':
      print('\nDone!')
      ws.close()

def on_open(ws):
  # Authenticate
  ws.send(json.dumps({
      'type': 'auth',
      'token': 'YOUR_API_KEY'
  }))

  # Send inference request
  ws.send(json.dumps({
      'type': 'inference',
      'id': 'req_123',
      'model': 'llama-2-7b',
      'prompt': 'Explain quantum physics',
      'max_tokens': 200,
      'stream': True
  }))

ws = websocket.WebSocketApp('ws://localhost:8080/ws',
                          on_message=on_message,
                          on_open=on_open)
ws.run_forever()

Open Sandbox

Batch Processing

Process multiple prompts efficiently

Endpoint:POST /batch

const API_KEY = 'your_api_key';
const BASE_URL = 'http://localhost:8080';

async function submitBatch() {
const response = await fetch(`${BASE_URL}/batch`, {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    model: 'llama-2-7b',
    requests: [
      { id: 'req1', prompt: 'What is AI?' },
      { id: 'req2', prompt: 'Explain machine learning' },
      { id: 'req3', prompt: 'What is deep learning?' }
    ],
    max_tokens: 100,
    webhook_url: 'https://example.com/webhook'
  })
});

const result = await response.json();
console.log('Batch ID:', result.batch_id);

// Check status
const statusResponse = await fetch(`${BASE_URL}/batch/${result.batch_id}`, {
  headers: {
    'Authorization': `Bearer ${API_KEY}`
  }
});

const status = await statusResponse.json();
console.log('Status:', status);
}

submitBatch();

Open Sandbox

OpenAI SDK - Python

OpenAI Python SDK pointing to Inferno

Endpoint:OpenAI SDK

from openai import OpenAI

# Point the OpenAI client to your Inferno instance
client = OpenAI(
  base_url="http://localhost:8080/v1",
  api_key="your_api_key"  # or "not-needed" if auth disabled
)

# Use it exactly like the OpenAI API
response = client.chat.completions.create(
  model="llama-2-7b",
  messages=[
      {"role": "system", "content": "You are a helpful assistant."},
      {"role": "user", "content": "What is the capital of France?"}
  ],
  temperature=0.7,
  max_tokens=100
)

print(response.choices[0].message.content)

Open Sandbox

OpenAI SDK - Node.js

OpenAI Node.js SDK pointing to Inferno

Endpoint:OpenAI SDK

import OpenAI from 'openai';

// Point the OpenAI client to your Inferno instance
const client = new OpenAI({
baseURL: 'http://localhost:8080/v1',
apiKey: 'your_api_key'  // or 'not-needed' if auth disabled
});

// Use it exactly like the OpenAI API
const response = await client.chat.completions.create({
model: 'llama-2-7b',
messages: [
  { role: 'system', content: 'You are a helpful assistant.' },
  { role: 'user', content: 'What is the capital of France?' }
],
temperature: 0.7,
max_tokens: 100
});

console.log(response.choices[0].message.content);

Open Sandbox

Interactive API Examples

Simple Inference Request

Python Example

TypeScript Example

Streaming Inference

WebSocket Connection

WebSocket with Python

Batch Processing

OpenAI SDK - Python

OpenAI SDK - Node.js

Next Steps