Overview
WebSocket support enables real-time, bidirectional communication with AI providers. Use this for interactive applications that require low-latency streaming.
WebSocket support is currently available for relay functionality. Standard chat completions use Server-Sent Events (SSE) for streaming.
Endpoint
ws://localhost:18080/v1/ws
Upgrade from HTTP to WebSocket using standard WebSocket handshake.
Authentication
Query Parameter (Recommended)
const ws = new WebSocket (
'ws://localhost:18080/v1/ws?apiKey=sk-test-123'
);
const ws = new WebSocket (
'ws://localhost:18080/v1/ws' ,
[ 'access_token' , 'sk-test-123' ]
);
Disable Authentication (Development Only)
websocket_auth : false # Not recommended for production
Disabling WebSocket authentication allows unauthenticated access. Only use in development environments.
Message Protocol
Client to Server
Send JSON messages to the server:
{
"type" : "chat.message" ,
"model" : "gemini-2.5-pro" ,
"messages" : [
{ "role" : "user" , "content" : "Hello!" }
],
"stream" : true
}
Server to Client
Receive streaming chunks or complete responses:
{
"type" : "chat.chunk" ,
"content" : "Hello" ,
"done" : false
}
{
"type" : "chat.chunk" ,
"content" : "!" ,
"done" : true
}
Examples
JavaScript/Browser
const ws = new WebSocket (
'ws://localhost:18080/v1/ws?apiKey=sk-test-123'
);
ws . onopen = () => {
console . log ( 'Connected' );
// Send chat request
ws . send ( JSON . stringify ({
type: 'chat.message' ,
model: 'gemini-2.5-pro' ,
messages: [
{ role: 'user' , content: 'Tell me a joke' }
],
stream: true
}));
};
ws . onmessage = ( event ) => {
const data = JSON . parse ( event . data );
if ( data . type === 'chat.chunk' ) {
process . stdout . write ( data . content );
if ( data . done ) {
console . log ( ' \n Complete!' );
ws . close ();
}
}
};
ws . onerror = ( error ) => {
console . error ( 'WebSocket error:' , error );
};
ws . onclose = () => {
console . log ( 'Disconnected' );
};
Python
import asyncio
import websockets
import json
async def chat ():
uri = "ws://localhost:18080/v1/ws?apiKey=sk-test-123"
async with websockets.connect(uri) as ws:
# Send request
await ws.send(json.dumps({
"type" : "chat.message" ,
"model" : "gemini-2.5-pro" ,
"messages" : [
{ "role" : "user" , "content" : "Tell me a joke" }
],
"stream" : True
}))
# Receive chunks
async for message in ws:
data = json.loads(message)
if data[ "type" ] == "chat.chunk" :
print (data[ "content" ], end = "" , flush = True )
if data[ "done" ]:
print ( " \n Complete!" )
break
asyncio.run(chat())
Node.js
import WebSocket from 'ws' ;
const ws = new WebSocket (
'ws://localhost:18080/v1/ws?apiKey=sk-test-123'
);
ws . on ( 'open' , () => {
console . log ( 'Connected' );
ws . send ( JSON . stringify ({
type: 'chat.message' ,
model: 'gemini-2.5-pro' ,
messages: [
{ role: 'user' , content: 'Tell me a joke' }
],
stream: true
}));
});
ws . on ( 'message' , ( data ) => {
const message = JSON . parse ( data );
if ( message . type === 'chat.chunk' ) {
process . stdout . write ( message . content );
if ( message . done ) {
console . log ( ' \n Complete!' );
ws . close ();
}
}
});
ws . on ( 'error' , ( error ) => {
console . error ( 'WebSocket error:' , error );
});
ws . on ( 'close' , () => {
console . log ( 'Disconnected' );
});
Message Types
Request Types
Type Description Required Fields chat.messageSend chat completion request model, messagespingKeep-alive ping None cancelCancel ongoing request request_id
Response Types
Type Description Fields chat.chunkStreaming content chunk content, donechat.completeFull response content, usageerrorError occurred message, codepongPing response timestamp
Configuration
Server Configuration
# WebSocket settings
websocket_auth : true
websocket_timeout : 300 # Seconds
websocket_max_message_size : 1048576 # 1MB
Custom Route
Register WebSocket on a custom path:
import (
" github.com/traylinx/switchAILocal/internal/api "
" github.com/traylinx/switchAILocal/internal/relay "
)
server := api . NewServer ( cfg , authManager , accessManager , configPath , luaEngine )
// Custom WebSocket relay
relay := relay . NewRelay ( server . GetHandlers ())
server . AttachWebsocketRoute ( "/custom/ws" , relay )
Advanced Features
Connection Management
class WebSocketManager {
constructor ( url , apiKey ) {
this . url = ` ${ url } ?apiKey= ${ apiKey } ` ;
this . ws = null ;
this . reconnectAttempts = 0 ;
this . maxReconnects = 5 ;
}
connect () {
this . ws = new WebSocket ( this . url );
this . ws . onopen = () => {
console . log ( 'Connected' );
this . reconnectAttempts = 0 ;
};
this . ws . onclose = () => {
console . log ( 'Disconnected' );
this . reconnect ();
};
this . ws . onerror = ( error ) => {
console . error ( 'Error:' , error );
};
}
reconnect () {
if ( this . reconnectAttempts >= this . maxReconnects ) {
console . error ( 'Max reconnection attempts reached' );
return ;
}
this . reconnectAttempts ++ ;
const delay = Math . min ( 1000 * Math . pow ( 2 , this . reconnectAttempts ), 30000 );
console . log ( `Reconnecting in ${ delay } ms...` );
setTimeout (() => this . connect (), delay );
}
send ( message ) {
if ( this . ws && this . ws . readyState === WebSocket . OPEN ) {
this . ws . send ( JSON . stringify ( message ));
}
}
close () {
if ( this . ws ) {
this . ws . close ();
}
}
}
const manager = new WebSocketManager (
'ws://localhost:18080/v1/ws' ,
'sk-test-123'
);
manager . connect ();
Keep-Alive
Send periodic pings to maintain connection:
const PING_INTERVAL = 30000 ; // 30 seconds
const ws = new WebSocket (
'ws://localhost:18080/v1/ws?apiKey=sk-test-123'
);
let pingInterval ;
ws . onopen = () => {
// Start ping interval
pingInterval = setInterval (() => {
if ( ws . readyState === WebSocket . OPEN ) {
ws . send ( JSON . stringify ({ type: 'ping' }));
}
}, PING_INTERVAL );
};
ws . onclose = () => {
clearInterval ( pingInterval );
};
Request Cancellation
let currentRequestId = null ;
function sendRequest ( content ) {
currentRequestId = Date . now (). toString ();
ws . send ( JSON . stringify ({
type: 'chat.message' ,
request_id: currentRequestId ,
model: 'gemini-2.5-pro' ,
messages: [{ role: 'user' , content }],
stream: true
}));
}
function cancelRequest () {
if ( currentRequestId ) {
ws . send ( JSON . stringify ({
type: 'cancel' ,
request_id: currentRequestId
}));
}
}
// Usage
sendRequest ( 'Tell me a long story' );
// Cancel after 2 seconds
setTimeout (() => cancelRequest (), 2000 );
Error Handling
ws . onmessage = ( event ) => {
const data = JSON . parse ( event . data );
if ( data . type === 'error' ) {
console . error ( `Error ${ data . code } : ${ data . message } ` );
switch ( data . code ) {
case 'auth_error' :
console . log ( 'Authentication failed' );
break ;
case 'rate_limit' :
console . log ( 'Rate limit exceeded' );
break ;
case 'provider_error' :
console . log ( 'Provider unavailable' );
break ;
default :
console . log ( 'Unknown error' );
}
}
};
Reuse WebSocket connections instead of creating new ones for each request: // Good: Single persistent connection
const ws = new WebSocket ( url );
// Reuse for multiple requests
// Avoid: New connection per request
function sendMessage ( content ) {
const ws = new WebSocket ( url ); // Don't do this
ws . send ( content );
ws . close ();
}
Buffer and send messages in batches when possible: const messageQueue = [];
let sendTimer = null ;
function queueMessage ( message ) {
messageQueue . push ( message );
if ( ! sendTimer ) {
sendTimer = setTimeout (() => {
ws . send ( JSON . stringify ( messageQueue ));
messageQueue . length = 0 ;
sendTimer = null ;
}, 100 ); // Batch every 100ms
}
}
Enable WebSocket compression for large messages: // Browser (automatic with permessage-deflate)
const ws = new WebSocket ( url );
// Node.js with ws library
import WebSocket from 'ws' ;
const ws = new WebSocket ( url , {
perMessageDeflate: true
});
Troubleshooting
Connection Refused
Cause : WebSocket endpoint not available
Solution :
Verify server is running: http://localhost:18080
Check WebSocket is enabled in config
Try HTTP endpoint first: /v1/models
Authentication Failed
Cause : Invalid or missing API key
Solution :
Verify API key in query parameter or header
Check key is configured in config.yaml
Confirm websocket_auth: true if using authentication
Connection Timeout
Cause : Idle connection exceeded timeout
Solution :
Implement periodic ping messages
Increase websocket_timeout in config
Enable automatic reconnection
Next Steps