Skip to main content

Overview

While switchAILocal uses MiniLM by default, you can integrate custom embedding providers for specialized use cases like:
  • Domain-specific models (medical, legal, code)
  • Multilingual embeddings
  • Higher-dimensional vectors
  • Cloud embedding APIs

Embedding Engine Interface

The semantic tier expects an implementation of the EmbeddingEngine interface:
type EmbeddingEngine interface {
    // Embed computes the embedding vector for a text
    Embed(text string) ([]float32, error)

    // CosineSimilarity computes similarity between two vectors
    CosineSimilarity(a, b []float32) float64

    // IsEnabled returns whether the engine is ready
    IsEnabled() bool
}

Custom ONNX Model

Replace the default MiniLM model with your own ONNX model.

Step 1: Export Your Model

# Example: Export a SentenceTransformer model to ONNX
from sentence_transformers import SentenceTransformer
import torch

# Load your model
model = SentenceTransformer('your-custom-model')

# Export to ONNX
dummy_input = {
    'input_ids': torch.randint(0, 1000, (1, 128)),
    'attention_mask': torch.ones(1, 128, dtype=torch.long),
    'token_type_ids': torch.zeros(1, 128, dtype=torch.long)
}

torch.onnx.export(
    model,
    (dummy_input,),
    'custom_model.onnx',
    input_names=['input_ids', 'attention_mask', 'token_type_ids'],
    output_names=['last_hidden_state'],
    dynamic_axes={
        'input_ids': {0: 'batch', 1: 'sequence'},
        'attention_mask': {0: 'batch', 1: 'sequence'},
        'token_type_ids': {0: 'batch', 1: 'sequence'},
        'last_hidden_state': {0: 'batch', 1: 'sequence'}
    }
)

Step 2: Configure switchAILocal

intelligence:
  embedding:
    enabled: true
    model-path: "/path/to/custom_model.onnx"
    vocab-path: "/path/to/custom_vocab.txt"

Step 3: Verify Compatibility

Ensure your model:
  • Accepts input_ids, attention_mask, token_type_ids as inputs
  • Outputs last_hidden_state tensor
  • Uses BERT-style tokenization

Custom Go Implementation

Implement a completely custom embedding engine.

Step 1: Implement the Interface

package customembed

import (
    "bytes"
    "encoding/json"
    "fmt"
    "io"
    "math"
    "net/http"
    "sync"
)

// CloudEmbeddingEngine calls an external embedding API
type CloudEmbeddingEngine struct {
    apiURL    string
    apiKey    string
    dimension int
    enabled   bool
    mu        sync.RWMutex
}

func NewCloudEmbeddingEngine(apiURL, apiKey string, dimension int) *CloudEmbeddingEngine {
    return &CloudEmbeddingEngine{
        apiURL:    apiURL,
        apiKey:    apiKey,
        dimension: dimension,
        enabled:   true,
    }
}

func (e *CloudEmbeddingEngine) Embed(text string) ([]float32, error) {
    e.mu.RLock()
    defer e.mu.RUnlock()

    if !e.enabled {
        return nil, fmt.Errorf("engine not enabled")
    }

    // Prepare request
    reqBody, _ := json.Marshal(map[string]interface{}{
        "input": text,
        "model": "text-embedding-ada-002",
    })

    req, err := http.NewRequest("POST", e.apiURL, bytes.NewReader(reqBody))
    if err != nil {
        return nil, err
    }

    req.Header.Set("Content-Type", "application/json")
    req.Header.Set("Authorization", "Bearer "+e.apiKey)

    // Make request
    resp, err := http.DefaultClient.Do(req)
    if err != nil {
        return nil, err
    }
    defer resp.Body.Close()

    // Parse response
    body, _ := io.ReadAll(resp.Body)
    var result struct {
        Data []struct {
            Embedding []float32 `json:"embedding"`
        } `json:"data"`
    }

    if err := json.Unmarshal(body, &result); err != nil {
        return nil, err
    }

    if len(result.Data) == 0 {
        return nil, fmt.Errorf("no embedding returned")
    }

    return result.Data[0].Embedding, nil
}

func (e *CloudEmbeddingEngine) CosineSimilarity(a, b []float32) float64 {
    if len(a) != len(b) || len(a) == 0 {
        return 0.0
    }

    var dotProduct, normA, normB float64
    for i := range a {
        dotProduct += float64(a[i]) * float64(b[i])
        normA += float64(a[i]) * float64(a[i])
        normB += float64(b[i]) * float64(b[i])
    }

    normA = math.Sqrt(normA)
    normB = math.Sqrt(normB)

    if normA == 0 || normB == 0 {
        return 0.0
    }

    return dotProduct / (normA * normB)
}

func (e *CloudEmbeddingEngine) IsEnabled() bool {
    e.mu.RLock()
    defer e.mu.RUnlock()
    return e.enabled
}

Step 2: Integrate with Intelligence Service

package main

import (
    "github.com/traylinx/switchAILocal/internal/intelligence"
    "github.com/traylinx/switchAILocal/internal/intelligence/semantic"
    "yourpackage/customembed"
)

func main() {
    // Create custom embedding engine
    embedEngine := customembed.NewCloudEmbeddingEngine(
        "https://api.openai.com/v1/embeddings",
        "your-api-key",
        1536, // OpenAI ada-002 dimension
    )

    // Create semantic tier with custom engine
    semanticTier := semantic.NewTier(embedEngine, 0.85)
    if err := semanticTier.Initialize("intents.yaml"); err != nil {
        log.Fatal(err)
    }

    // Use in intelligence service
    // (Implementation depends on your integration approach)
}

Hybrid Approach

Combine local and cloud embeddings for different use cases.
package hybridembed

import (
    "github.com/traylinx/switchAILocal/internal/intelligence/embedding"
    "yourpackage/customembed"
)

type HybridEngine struct {
    local  *embedding.Engine
    cloud  *customembed.CloudEmbeddingEngine
    useCloud bool
}

func (e *HybridEngine) Embed(text string) ([]float32, error) {
    // Use local engine for short texts
    if len(text) < 500 {
        return e.local.Embed(text)
    }
    
    // Use cloud for complex/long texts
    if e.useCloud {
        return e.cloud.Embed(text)
    }
    
    return e.local.Embed(text)
}

func (e *HybridEngine) CosineSimilarity(a, b []float32) float64 {
    // Delegate to local engine (same algorithm)
    return e.local.CosineSimilarity(a, b)
}

func (e *HybridEngine) IsEnabled() bool {
    return e.local.IsEnabled() || e.cloud.IsEnabled()
}

Caching Layer

Add caching to reduce API calls:
package cachedembed

import (
    "crypto/sha256"
    "fmt"
    "sync"
)

type CachedEmbeddingEngine struct {
    engine EmbeddingEngine
    cache  map[string][]float32
    mu     sync.RWMutex
    maxSize int
}

func NewCachedEmbeddingEngine(engine EmbeddingEngine, maxSize int) *CachedEmbeddingEngine {
    return &CachedEmbeddingEngine{
        engine:  engine,
        cache:   make(map[string][]float32),
        maxSize: maxSize,
    }
}

func (e *CachedEmbeddingEngine) Embed(text string) ([]float32, error) {
    // Generate cache key
    hash := sha256.Sum256([]byte(text))
    key := fmt.Sprintf("%x", hash)

    // Check cache
    e.mu.RLock()
    if vec, ok := e.cache[key]; ok {
        e.mu.RUnlock()
        return vec, nil
    }
    e.mu.RUnlock()

    // Generate embedding
    vec, err := e.engine.Embed(text)
    if err != nil {
        return nil, err
    }

    // Store in cache
    e.mu.Lock()
    if len(e.cache) >= e.maxSize {
        // Simple eviction: clear entire cache
        e.cache = make(map[string][]float32)
    }
    e.cache[key] = vec
    e.mu.Unlock()

    return vec, nil
}

func (e *CachedEmbeddingEngine) CosineSimilarity(a, b []float32) float64 {
    return e.engine.CosineSimilarity(a, b)
}

func (e *CachedEmbeddingEngine) IsEnabled() bool {
    return e.engine.IsEnabled()
}

Multilingual Models

ModelDimensionsLanguagesUse Case
paraphrase-multilingual-MiniLM-L12-v238450+General multilingual
LaBSE768109Cross-lingual search
multilingual-e5-large1024100+High-quality multilingual

Domain-Specific Models

ModelDimensionsDomainUse Case
BiomedNLP-PubMedBERT768MedicalBiomedical text
legal-bert-base-uncased768LegalLegal documents
codebert-base768CodeSource code similarity

High-Dimensional Models

ModelDimensionsPerformanceUse Case
text-embedding-ada-002 (OpenAI)1536Cloud APIGeneral-purpose
gte-large1024Local ONNXHigh accuracy
e5-large-v21024Local ONNXBalanced quality

Testing Your Custom Engine

package customembed_test

import (
    "testing"
    "yourpackage/customembed"
)

func TestCustomEngine(t *testing.T) {
    engine := customembed.NewCloudEmbeddingEngine(
        "https://api.openai.com/v1/embeddings",
        "test-key",
        1536,
    )

    // Test embedding generation
    vec, err := engine.Embed("test query")
    if err != nil {
        t.Fatalf("Failed to generate embedding: %v", err)
    }

    if len(vec) != 1536 {
        t.Errorf("Expected 1536 dimensions, got %d", len(vec))
    }

    // Test similarity computation
    vec1, _ := engine.Embed("machine learning")
    vec2, _ := engine.Embed("artificial intelligence")
    vec3, _ := engine.Embed("baking recipes")

    sim1 := engine.CosineSimilarity(vec1, vec2)
    sim2 := engine.CosineSimilarity(vec1, vec3)

    if sim1 <= sim2 {
        t.Errorf("Expected higher similarity for related texts")
    }
}

Best Practices

Match Dimensions - Ensure all embeddings have the same dimensionality for valid comparisons.
Normalize Vectors - Apply L2 normalization for optimal cosine similarity computation.
API Rate Limits - Implement caching and rate limiting when using cloud embedding APIs.
Performance - Local models (5-10ms) are significantly faster than API calls (50-200ms).

Next Steps