npm install cactus-react-native react-native-nitro-modulesGet started with Cactus in just a few lines of code:
import { CactusLM, type CactusLMMessage } from 'cactus-react-native';
// Create a new instance
const cactusLM = new CactusLM();
// Download the model
await cactusLM.download({
onProgress: (progress) => console.log(`Download: ${Math.round(progress * 100)}%`)
});
// Generate a completion
const messages: CactusLMMessage[] = [
{ role: 'user', content: 'What is the capital of France?' }
];
const result = await cactusLM.complete({ messages });
console.log(result.response); // "The capital of France is Paris."
// Clean up resources
await cactusLM.destroy();Using the React Hook:
import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
useEffect(() => {
// Download the model if not already available
if (!cactusLM.isDownloaded) {
cactusLM.download();
}
}, []);
const handleGenerate = () => {
// Generate a completion
cactusLM.complete({
messages: [{ role: 'user', content: 'Hello!' }],
});
};
if (cactusLM.isDownloading) {
return (
<Text>
Downloading model: {Math.round(cactusLM.downloadProgress * 100)}%
</Text>
);
}
return (
<>
<Button onPress={handleGenerate} title="Generate" />
<Text>{cactusLM.completion}</Text>
</>
);
};Choose model quantization and NPU acceleration with Pro models.
import { CactusLM } from 'cactus-react-native';
// Use int8 for better accuracy (default)
const cactusLM = new CactusLM({
model: 'lfm2-vl-450m',
options: {
quantization: 'int8', // 'int4' or 'int8'
pro: false
}
});
// Use pro models for NPU acceleration
const cactusPro = new CactusLM({
model: 'lfm2-vl-450m',
options: {
quantization: 'int8',
pro: true
}
});Generate text responses from the model by providing a conversation history.
import { CactusLM, type CactusLMMessage } from 'cactus-react-native';
const cactusLM = new CactusLM();
const messages: CactusLMMessage[] = [{ role: 'user', content: 'Hello, World!' }];
const onToken = (token: string) => { console.log('Token:', token) };
const result = await cactusLM.complete({ messages, onToken });
console.log('Completion result:', result);import { useCactusLM, type CactusLMMessage } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
const handleComplete = async () => {
const messages: CactusLMMessage[] = [{ role: 'user', content: 'Hello, World!' }];
const result = await cactusLM.complete({ messages });
console.log('Completion result:', result);
};
return (
<>
<Button title="Complete" onPress={handleComplete} />
<Text>{cactusLM.completion}</Text>
</>
);
};Vision allows you to pass images along with text prompts, enabling the model to analyze and understand visual content.
import { CactusLM, type CactusLMMessage } from 'cactus-react-native';
// Vision-capable model
const cactusLM = new CactusLM({ model: 'lfm2-vl-450m' });
const messages: CactusLMMessage[] = [
{
role: 'user',
content: "What's in the image?",
images: ['path/to/your/image'],
},
];
const result = await cactusLM.complete({ messages });
console.log('Response:', result.response);import { useCactusLM, type CactusLMMessage } from 'cactus-react-native';
const App = () => {
// Vision-capable model
const cactusLM = useCactusLM({ model: 'lfm2-vl-450m' });
const handleAnalyze = async () => {
const messages: CactusLMMessage[] = [
{
role: 'user',
content: "What's in the image?",
images: ['path/to/your/image'],
},
];
await cactusLM.complete({ messages });
};
return (
<>
<Button title="Analyze Image" onPress={handleAnalyze} />
<Text>{cactusLM.completion}</Text>
</>
);
};Enable the model to generate function calls by defining available tools and their parameters.
import { CactusLM, type CactusLMMessage, type CactusLMTool } from 'cactus-react-native';
const tools: CactusLMTool[] = [
{
name: 'get_weather',
description: 'Get current weather for a location',
parameters: {
type: 'object',
properties: {
location: {
type: 'string',
description: 'City name',
},
},
required: ['location'],
},
},
];
const cactusLM = new CactusLM();
const messages: CactusLMMessage[] = [
{ role: 'user', content: "What's the weather in San Francisco?" },
];
const result = await cactusLM.complete({ messages, tools });
console.log('Response:', result.response);
console.log('Function calls:', result.functionCalls);import { useCactusLM, type CactusLMMessage, type CactusLMTool } from 'cactus-react-native';
const tools: CactusLMTool[] = [
{
name: 'get_weather',
description: 'Get current weather for a location',
parameters: {
type: 'object',
properties: {
location: {
type: 'string',
description: 'City name',
},
},
required: ['location'],
},
},
];
const App = () => {
const cactusLM = useCactusLM();
const handleComplete = async () => {
const messages: CactusLMMessage[] = [
{ role: 'user', content: "What's the weather in San Francisco?" },
];
const result = await cactusLM.complete({ messages, tools });
console.log('Response:', result.response);
console.log('Function calls:', result.functionCalls);
};
return <Button title="Complete" onPress={handleComplete} />;
};RAG allows you to provide a corpus of documents that the model can reference during generation, enabling it to answer questions based on your data.
import { CactusLM, type CactusLMMessage } from 'cactus-react-native';
const cactusLM = new CactusLM({
corpusDir: 'path/to/your/corpus', // Directory containing .txt files
});
const messages: CactusLMMessage[] = [
{ role: 'user', content: 'What information is in the documents?' },
];
const result = await cactusLM.complete({ messages });
console.log(result.response);import { useCactusLM, type CactusLMMessage } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM({
corpusDir: 'path/to/your/corpus', // Directory containing .txt files
});
const handleAsk = async () => {
const messages: CactusLMMessage[] = [
{ role: 'user', content: 'What information is in the documents?' },
];
await cactusLM.complete({ messages });
};
return (
<>
<Button title="Ask Question" onPress={handleAsk} />
<Text>{cactusLM.completion}</Text>
</>
);
};Convert text into tokens using the model's tokenizer.
import { CactusLM } from 'cactus-react-native';
const cactusLM = new CactusLM();
const result = await cactusLM.tokenize({ text: 'Hello, World!' });
console.log('Token IDs:', result.tokens);import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
const handleTokenize = async () => {
const result = await cactusLM.tokenize({ text: 'Hello, World!' });
console.log('Token IDs:', result.tokens);
};
return <Button title="Tokenize" onPress={handleTokenize} />;
};Calculate perplexity scores for a window of tokens within a sequence.
import { CactusLM } from 'cactus-react-native';
const cactusLM = new CactusLM();
const tokens = [123, 456, 789, 101, 112];
const result = await cactusLM.scoreWindow({
tokens,
start: 1,
end: 3,
context: 2
});
console.log('Score:', result.score);import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
const handleScoreWindow = async () => {
const tokens = [123, 456, 789, 101, 112];
const result = await cactusLM.scoreWindow({
tokens,
start: 1,
end: 3,
context: 2
});
console.log('Score:', result.score);
};
return <Button title="Score Window" onPress={handleScoreWindow} />;
};Convert text and images into numerical vector representations that capture semantic meaning, useful for similarity search and semantic understanding.
import { CactusLM } from 'cactus-react-native';
const cactusLM = new CactusLM();
const result = await cactusLM.embed({ text: 'Hello, World!' });
console.log('Embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
const handleEmbed = async () => {
const result = await cactusLM.embed({ text: 'Hello, World!' });
console.log('Embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);
};
return <Button title="Embed" onPress={handleEmbed} />;
};import { CactusLM } from 'cactus-react-native';
const cactusLM = new CactusLM({ model: 'lfm2-vl-450m' });
const result = await cactusLM.imageEmbed({ imagePath: 'path/to/your/image.jpg' });
console.log('Image embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM({ model: 'lfm2-vl-450m' });
const handleImageEmbed = async () => {
const result = await cactusLM.imageEmbed({ imagePath: 'path/to/your/image.jpg' });
console.log('Image embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);
};
return <Button title="Embed Image" onPress={handleImageEmbed} />;
};The CactusSTT class provides audio transcription and audio embedding capabilities using speech-to-text models such as Whisper and Moonshine.
Transcribe audio to text with streaming support. Accepts either a file path or raw PCM audio samples.
import { CactusSTT } from 'cactus-react-native';
const cactusSTT = new CactusSTT({ model: 'whisper-small' });
// Transcribe from file path
const result = await cactusSTT.transcribe({
audio: 'path/to/audio.wav',
onToken: (token) => console.log('Token:', token)
});
console.log('Transcription:', result.response);
// Or transcribe from raw PCM samples
const pcmSamples: number[] = [/* ... */];
const result2 = await cactusSTT.transcribe({
audio: pcmSamples,
onToken: (token) => console.log('Token:', token)
});
console.log('Transcription:', result2.response);import { useCactusSTT } from 'cactus-react-native';
const App = () => {
const cactusSTT = useCactusSTT({ model: 'whisper-small' });
const handleTranscribe = async () => {
// Transcribe from file path
const result = await cactusSTT.transcribe({
audio: 'path/to/audio.wav',
});
console.log('Transcription:', result.response);
const pcmSamples: number[] = [/* ... */];
const result2 = await cactusSTT.transcribe({
audio: pcmSamples,
});
console.log('Transcription:', result2.response);
};
return (
<>
<Button onPress={handleTranscribe} title="Transcribe" />
<Text>{cactusSTT.transcription}</Text>
</>
);
};Transcribe audio in real-time with incremental results. Each call to streamTranscribeProcess feeds an audio chunk and returns the currently confirmed and pending text.
import { CactusSTT } from 'cactus-react-native';
const cactusSTT = new CactusSTT({ model: 'whisper-small' });
await cactusSTT.streamTranscribeStart({
confirmationThreshold: 0.99, // confidence required to confirm text
minChunkSize: 32000, // minimum samples before processing
});
const audioChunk: number[] = [/* PCM samples as bytes */];
const result = await cactusSTT.streamTranscribeProcess({ audio: audioChunk });
console.log('Confirmed:', result.confirmed);
console.log('Pending:', result.pending);
const final = await cactusSTT.streamTranscribeStop();
console.log('Final confirmed:', final.confirmed);import { useCactusSTT } from 'cactus-react-native';
const App = () => {
const cactusSTT = useCactusSTT({ model: 'whisper-small' });
const handleStart = async () => {
await cactusSTT.streamTranscribeStart({ confirmationThreshold: 0.99 });
};
const handleChunk = async (audioChunk: number[]) => {
const result = await cactusSTT.streamTranscribeProcess({ audio: audioChunk });
console.log('Confirmed:', result.confirmed);
console.log('Pending:', result.pending);
};
const handleStop = async () => {
const final = await cactusSTT.streamTranscribeStop();
console.log('Final:', final.confirmed);
};
return (
<>
<Button onPress={handleStart} title="Start" />
<Button onPress={handleStop} title="Stop" />
<Text>{cactusSTT.streamTranscribeConfirmed}</Text>
<Text>{cactusSTT.streamTranscribePending}</Text>
</>
);
};Generate embeddings from audio files for audio understanding.
import { CactusSTT } from 'cactus-react-native';
const cactusSTT = new CactusSTT();
const result = await cactusSTT.audioEmbed({
audioPath: 'path/to/audio.wav'
});
console.log('Audio embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);import { useCactusSTT } from 'cactus-react-native';
const App = () => {
const cactusSTT = useCactusSTT();
const handleAudioEmbed = async () => {
const result = await cactusSTT.audioEmbed({
audioPath: 'path/to/audio.wav'
});
console.log('Audio embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);
};
return <Button title="Embed Audio" onPress={handleAudioEmbed} />;
};Detect the spoken language in an audio file. Only available on the class, not the hook.
import { CactusSTT } from 'cactus-react-native';
const cactusSTT = new CactusSTT({ model: 'whisper-small' });
const result = await cactusSTT.detectLanguage({
audio: 'path/to/audio.wav',
options: { useVad: true },
});
console.log('Language:', result.language); // e.g. 'en'
console.log('Confidence:', result.confidence);The CactusAudio class provides voice activity detection (VAD), speaker diarization, and speaker embedding extraction.
import { CactusAudio } from 'cactus-react-native';
const cactusAudio = new CactusAudio({ model: 'silero-vad' });
const result = await cactusAudio.vad({
audio: 'path/to/audio.wav',
options: {
threshold: 0.5,
minSpeechDurationMs: 250,
minSilenceDurationMs: 100,
}
});
console.log('Speech segments:', result.segments);
// [{ start: 0, end: 16000 }, { start: 32000, end: 48000 }, ...]
console.log('Total time (ms):', result.totalTime);import { CactusAudio } from 'cactus-react-native';
const cactusAudio = new CactusAudio({ model: 'silero-vad' });
const result = await cactusAudio.diarize({
audio: 'path/to/audio.wav',
options: {
numSpeakers: 2,
minSpeakers: 1,
maxSpeakers: 4,
}
});
console.log('Number of speakers:', result.numSpeakers);
console.log('Scores:', result.scores);import { CactusAudio } from 'cactus-react-native';
const cactusAudio = new CactusAudio({ model: 'silero-vad' });
const result = await cactusAudio.embedSpeaker({
audio: 'path/to/audio.wav',
});
console.log('Speaker embedding:', result.embedding);import { useCactusAudio } from 'cactus-react-native';
const App = () => {
const cactusAudio = useCactusAudio({ model: 'silero-vad' });
const handleVAD = async () => {
const result = await cactusAudio.vad({
audio: 'path/to/audio.wav',
});
console.log('Speech segments:', result.segments);
};
const handleDiarize = async () => {
const result = await cactusAudio.diarize({
audio: 'path/to/audio.wav',
});
console.log('Speakers:', result.numSpeakers);
};
return (
<>
<Button title="Detect Speech" onPress={handleVAD} />
<Button title="Diarize" onPress={handleDiarize} />
</>
);
};The CactusIndex class provides a vector database for storing and querying embeddings with metadata. Enabling similarity search and retrieval.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleInit = async () => {
await cactusIndex.init();
};
return <Button title="Initialize Index" onPress={handleInit} />
};Add documents with their embeddings and metadata to the index.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
await cactusIndex.add({
ids: [1, 2, 3],
documents: ['First document', 'Second document', 'Third document'],
embeddings: [
[0.1, 0.2, ...],
[0.3, 0.4, ...],
[0.5, 0.6, ...]
],
metadatas: ['metadata1', 'metadata2', 'metadata3']
});import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleAdd = async () => {
await cactusIndex.add({
ids: [1, 2, 3],
documents: ['First document', 'Second document', 'Third document'],
embeddings: [
[0.1, 0.2, ...],
[0.3, 0.4, ...],
[0.5, 0.6, ...]
],
metadatas: ['metadata1', 'metadata2', 'metadata3']
});
};
return <Button title="Add Documents" onPress={handleAdd} />;
};Search for similar documents using embedding vectors.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
const result = await cactusIndex.query({
embeddings: [[0.1, 0.2, ...]],
options: {
topK: 5,
scoreThreshold: 0.7
}
});
console.log('IDs:', result.ids);
console.log('Scores:', result.scores);import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleQuery = async () => {
const result = await cactusIndex.query({
embeddings: [[0.1, 0.2, ...]],
options: {
topK: 5,
scoreThreshold: 0.7
}
});
console.log('IDs:', result.ids);
console.log('Scores:', result.scores);
};
return <Button title="Query Index" onPress={handleQuery} />;
};Get documents by their IDs.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
const result = await cactusIndex.get({ ids: [1, 2, 3] });
console.log('Documents:', result.documents);
console.log('Metadatas:', result.metadatas);
console.log('Embeddings:', result.embeddings);import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleGet = async () => {
const result = await cactusIndex.get({ ids: [1, 2, 3] });
console.log('Documents:', result.documents);
console.log('Metadatas:', result.metadatas);
console.log('Embeddings:', result.embeddings);
};
return <Button title="Get Documents" onPress={handleGet} />;
};Mark documents as deleted by their IDs.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
await cactusIndex.delete({ ids: [1, 2, 3] });import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleDelete = async () => {
await cactusIndex.delete({ ids: [1, 2, 3] });
};
return <Button title="Delete Documents" onPress={handleDelete} />;
};Optimize the index by removing deleted documents and reorganizing data.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
await cactusIndex.compact();import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleCompact = async () => {
await cactusIndex.compact();
};
return <Button title="Compact Index" onPress={handleCompact} />;
};new CactusLM(params?: CactusLMParams)
Parameters:
model- Model slug or absolute path to a model file (default:'qwen3-0.6b').corpusDir- Directory containing text files for RAG (default:undefined).cacheIndex- Whether to cache the RAG corpus index on disk (default:false).options- Model options for quantization and NPU acceleration:quantization- Quantization type:'int4'|'int8'(default:'int8').pro- Enable NPU-accelerated models (default:false).
download(params?: CactusLMDownloadParams): Promise<void>
Downloads the model. If the model is already downloaded, returns immediately with progress 1. Throws an error if a download is already in progress.
Parameters:
onProgress- Callback for download progress (0-1).
init(): Promise<void>
Initializes the model and prepares it for inference. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
complete(params: CactusLMCompleteParams): Promise<CactusLMCompleteResult>
Performs text completion with optional streaming and tool support. Automatically calls init() if not already initialized. Throws an error if a generation (completion or embedding) is already in progress.
Parameters:
messages- Array ofCactusLMMessageobjects.options- Generation options:temperature- Sampling temperature.topP- Nucleus sampling threshold.topK- Top-K sampling limit.maxTokens- Maximum number of tokens to generate (default:512).stopSequences- Array of strings to stop generation.forceTools- Force the model to call one of the provided tools (default:false).telemetryEnabled- Enable telemetry for this request (default:true).confidenceThreshold- Confidence threshold below which cloud handoff is triggered (default:0.7).toolRagTopK- Number of tools to select via RAG when tool list is large (default:2).includeStopSequences- Whether to include stop sequences in the response (default:false).useVad- Whether to use VAD preprocessing (default:true).enableThinking- Whether to enable thinking/reasoning output if supported by the model (default: unset).
tools- Array ofCactusLMToolobjects for function calling.onToken- Callback for streaming tokens.
prefill(params: CactusLMPrefillParams): Promise<CactusLMPrefillResult>
Runs prompt prefill without generating any output tokens. Useful for measuring prefill performance or warming up the model's KV cache. Automatically calls init() if not already initialized. Throws an error if a generation is already in progress.
Parameters:
messages- Array ofCactusLMMessageobjects.options- Same options ascomplete.tools- Array ofCactusLMToolobjects.
tokenize(params: CactusLMTokenizeParams): Promise<CactusLMTokenizeResult>
Converts text into tokens using the model's tokenizer.
Parameters:
text- Text to tokenize.
scoreWindow(params: CactusLMScoreWindowParams): Promise<CactusLMScoreWindowResult>
Calculates the log-probability score for a window of tokens within a sequence.
Parameters:
tokens- Array of token IDs.start- Start index of the window.end- End index of the window.context- Number of context tokens before the window.
embed(params: CactusLMEmbedParams): Promise<CactusLMEmbedResult>
Generates embeddings for the given text. Automatically calls init() if not already initialized. Throws an error if a generation (completion or embedding) is already in progress.
Parameters:
text- Text to embed.normalize- Whether to normalize the embedding vector (default:false).
imageEmbed(params: CactusLMImageEmbedParams): Promise<CactusLMImageEmbedResult>
Generates embeddings for the given image. Requires a vision-capable model. Automatically calls init() if not already initialized. Throws an error if a generation (completion or embedding) is already in progress.
Parameters:
imagePath- Path to the image file.
stop(): Promise<void>
Stops ongoing generation.
reset(): Promise<void>
Resets the model's internal state, clearing any cached context. Automatically calls stop() first.
destroy(): Promise<void>
Releases all resources associated with the model. Automatically calls stop() first. Safe to call even if the model is not initialized.
getModels(): Promise<CactusModel[]>
Returns available models.
getModelName(): string
Returns the computed model identifier including quantization and pro suffix (e.g., 'qwen3-0.6b-int8', 'lfm2-vl-450m-int4-pro').
The useCactusLM hook manages a CactusLM instance with reactive state. When model parameters (model, corpusDir, cacheIndex, options) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
completion: string- Current generated text. Automatically accumulated during streaming. Cleared before each new completion and when callingreset()ordestroy().isGenerating: boolean- Whether the model is currently running an operation. Shared bycomplete,tokenize,scoreWindow,embed, andimageEmbed.isInitializing: boolean- Whether the model is initializing.isDownloaded: boolean- Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.isDownloading: boolean- Whether the model is being downloaded.downloadProgress: number- Download progress (0-1). Reset to0after download completes.error: string | null- Last error message from any operation, ornullif there is no error. Cleared before starting new operations.
download(params?: CactusLMDownloadParams): Promise<void>- Downloads the model. UpdatesisDownloadinganddownloadProgressstate during download. SetsisDownloadedtotrueon success.init(): Promise<void>- Initializes the model for inference. SetsisInitializingtotrueduring initialization.complete(params: CactusLMCompleteParams): Promise<CactusLMCompleteResult>- Generates text completions. Automatically accumulates tokens in thecompletionstate during streaming. SetsisGeneratingtotruewhile generating. Clearscompletionbefore starting.tokenize(params: CactusLMTokenizeParams): Promise<CactusLMTokenizeResult>- Converts text into tokens. SetsisGeneratingtotrueduring operation.scoreWindow(params: CactusLMScoreWindowParams): Promise<CactusLMScoreWindowResult>- Calculates log-probability scores for a window of tokens. SetsisGeneratingtotrueduring operation.embed(params: CactusLMEmbedParams): Promise<CactusLMEmbedResult>- Generates embeddings for the given text. SetsisGeneratingtotrueduring operation.imageEmbed(params: CactusLMImageEmbedParams): Promise<CactusLMImageEmbedResult>- Generates embeddings for the given image. SetsisGeneratingtotruewhile generating.stop(): Promise<void>- Stops ongoing generation. Clears any errors.reset(): Promise<void>- Resets the model's internal state, clearing cached context. Also clears thecompletionstate.destroy(): Promise<void>- Releases all resources associated with the model. Clears thecompletionstate. Automatically called when the component unmounts.getModels(): Promise<CactusModel[]>- Returns available models.
new CactusSTT(params?: CactusSTTParams)
Parameters:
model- Model slug or absolute path to a model file (default:'whisper-small').options- Model options for quantization and NPU acceleration:quantization- Quantization type:'int4'|'int8'(default:'int8').pro- Enable NPU-accelerated models (default:false).
download(params?: CactusSTTDownloadParams): Promise<void>
Downloads the model. If the model is already downloaded, returns immediately with progress 1. Throws an error if a download is already in progress.
Parameters:
onProgress- Callback for download progress (0-1).
init(): Promise<void>
Initializes the model and prepares it for inference. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
transcribe(params: CactusSTTTranscribeParams): Promise<CactusSTTTranscribeResult>
Transcribes audio to text with optional streaming support. Accepts either a file path or raw PCM audio samples. Automatically calls init() if not already initialized. Throws an error if a generation is already in progress.
Parameters:
audio- Path to the audio file or raw PCM samples as a byte array.prompt- Optional prompt to guide transcription (default:'<|startoftranscript|><|en|><|transcribe|><|notimestamps|>').options- Transcription options:temperature- Sampling temperature.topP- Nucleus sampling threshold.topK- Top-K sampling limit.maxTokens- Maximum number of tokens to generate (default:384).stopSequences- Array of strings to stop generation.useVad- Whether to apply VAD to strip silence before transcription (default:true).telemetryEnabled- Enable telemetry for this request (default:true).confidenceThreshold- Confidence threshold for quality assessment (default:0.7).cloudHandoffThreshold- Max entropy threshold above which cloud handoff is triggered.includeStopSequences- Whether to include stop sequences in the response (default:false).
onToken- Callback for streaming tokens.
streamTranscribeStart(options?: CactusSTTStreamTranscribeStartOptions): Promise<void>
Starts a streaming transcription session. Automatically calls init() if not already initialized. If a session is already active, returns immediately.
Parameters:
confirmationThreshold- Fuzzy match ratio required to confirm a transcription segment (default:0.99).minChunkSize- Minimum number of audio samples before processing (default:32000).telemetryEnabled- Enable telemetry for this session (default:true).language- Language code for transcription (e.g.,'en','es','fr'). If not set, language is auto-detected.
streamTranscribeProcess(params: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>
Feeds audio samples into the streaming session and returns the current transcription state. Throws an error if no session is active.
Parameters:
audio- PCM audio samples as a byte array.
streamTranscribeStop(): Promise<CactusSTTStreamTranscribeStopResult>
Stops the streaming session and returns the final confirmed transcription text. Throws an error if no session is active.
detectLanguage(params: CactusSTTDetectLanguageParams): Promise<CactusSTTDetectLanguageResult>
Detects the spoken language in the given audio. Automatically calls init() if not already initialized. Throws an error if a generation is already in progress.
Parameters:
audio- Path to the audio file or raw PCM samples as a byte array.options:useVad- Whether to apply VAD before detection (default:true).
audioEmbed(params: CactusSTTAudioEmbedParams): Promise<CactusSTTAudioEmbedResult>
Generates embeddings for the given audio file. Automatically calls init() if not already initialized. Throws an error if a generation is already in progress.
Parameters:
audioPath- Path to the audio file.
stop(): Promise<void>
Stops ongoing transcription or embedding generation.
reset(): Promise<void>
Resets the model's internal state. Automatically calls stop() first.
destroy(): Promise<void>
Releases all resources associated with the model. Stops any active streaming session. Automatically calls stop() first. Safe to call even if the model is not initialized.
getModels(): Promise<CactusModel[]>
Returns available speech-to-text models.
getModelName(): string
Returns the computed model identifier including quantization and pro suffix (e.g., 'whisper-small-int8').
The useCactusSTT hook manages a CactusSTT instance with reactive state. When model parameters (model, options) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
transcription: string- Current transcription text. Automatically accumulated during streaming. Cleared before each new transcription and when callingreset()ordestroy().streamTranscribeConfirmed: string- Accumulated confirmed text from the active streaming session. Updated after each successfulstreamTranscribeProcesscall and finalized bystreamTranscribeStop.streamTranscribePending: string- Uncommitted (in-progress) text from the current audio chunk. Cleared when the session stops.isGenerating: boolean- Whether the model is currently transcribing or embedding. Both operations share this flag.isStreamTranscribing: boolean- Whether a streaming transcription session is currently active.isInitializing: boolean- Whether the model is initializing.isDownloaded: boolean- Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.isDownloading: boolean- Whether the model is being downloaded.downloadProgress: number- Download progress (0-1). Reset to0after download completes.error: string | null- Last error message from any operation, ornullif there is no error. Cleared before starting new operations.
download(params?: CactusSTTDownloadParams): Promise<void>- Downloads the model. UpdatesisDownloadinganddownloadProgressstate during download. SetsisDownloadedtotrueon success.init(): Promise<void>- Initializes the model for inference. SetsisInitializingtotrueduring initialization.transcribe(params: CactusSTTTranscribeParams): Promise<CactusSTTTranscribeResult>- Transcribes audio to text. Automatically accumulates tokens in thetranscriptionstate during streaming. SetsisGeneratingtotruewhile generating. Clearstranscriptionbefore starting.audioEmbed(params: CactusSTTAudioEmbedParams): Promise<CactusSTTAudioEmbedResult>- Generates embeddings for the given audio. SetsisGeneratingtotrueduring operation.streamTranscribeStart(options?: CactusSTTStreamTranscribeStartOptions): Promise<void>- Starts a streaming transcription session. If a session is already active, returns immediately. ClearsstreamTranscribeConfirmedandstreamTranscribePendingbefore starting. SetsisStreamTranscribingtotrue.streamTranscribeProcess(params: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>- Feeds audio and returns incremental results. Appends confirmed text tostreamTranscribeConfirmedand updatesstreamTranscribePending.streamTranscribeStop(): Promise<CactusSTTStreamTranscribeStopResult>- Stops the session and returns the final result. SetsisStreamTranscribingtofalse. Appends final confirmed text tostreamTranscribeConfirmedand clearsstreamTranscribePending.stop(): Promise<void>- Stops ongoing generation. Clears any errors.reset(): Promise<void>- Resets the model's internal state. Also clears thetranscriptionstate.destroy(): Promise<void>- Releases all resources associated with the model. Clears thetranscription,streamTranscribeConfirmed, andstreamTranscribePendingstate. Automatically called when the component unmounts.getModels(): Promise<CactusModel[]>- Returns available speech-to-text models.
new CactusAudio(params?: CactusAudioParams)
Parameters:
model- Model slug or absolute path to an audio model file (default:'silero-vad').options- Model options:quantization- Quantization type:'int4'|'int8'(default:'int8').pro- Enable NPU-accelerated models (default:false).
download(params?: CactusAudioDownloadParams): Promise<void>
Downloads the audio model. If the model is already downloaded, returns immediately with progress 1. Throws an error if a download is already in progress.
Parameters:
onProgress- Callback for download progress (0-1).
init(): Promise<void>
Initializes the audio model. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
vad(params: CactusAudioVADParams): Promise<CactusAudioVADResult>
Runs voice activity detection on the given audio. Automatically calls init() if not already initialized.
Parameters:
audio- Path to the audio file or raw PCM samples as a byte array.options- VAD options:threshold- Speech probability threshold (default: model default).negThreshold- Silence probability threshold.minSpeechDurationMs- Minimum speech segment duration in ms.maxSpeechDurationS- Maximum speech segment duration in seconds.minSilenceDurationMs- Minimum silence duration before ending a segment.speechPadMs- Padding added to each speech segment in ms.windowSizeSamples- Processing window size in samples.samplingRate- Audio sampling rate.minSilenceAtMaxSpeech- Minimum silence at max speech duration.useMaxPossSilAtMaxSpeech- Whether to use maximum possible silence at max speech.
diarize(params: CactusAudioDiarizeParams): Promise<CactusAudioDiarizeResult>
Runs speaker diarization on the given audio. Automatically calls init() if not already initialized.
Parameters:
audio- Path to the audio file or raw PCM samples as a byte array.options- Diarize options:stepMs- Step size in milliseconds.threshold- Diarization threshold.numSpeakers- Expected number of speakers.minSpeakers- Minimum number of speakers.maxSpeakers- Maximum number of speakers.
embedSpeaker(params: CactusAudioEmbedSpeakerParams): Promise<CactusAudioEmbedSpeakerResult>
Extracts a speaker embedding vector from the given audio. Automatically calls init() if not already initialized.
Parameters:
audio- Path to the audio file or raw PCM samples as a byte array.
destroy(): Promise<void>
Releases all resources associated with the model. Safe to call even if the model is not initialized.
getModels(): Promise<CactusModel[]>
Returns available audio models.
getModelName(): string
Returns the computed model identifier including quantization and pro suffix (e.g., 'silero-vad-int8').
The useCactusAudio hook manages a CactusAudio instance with reactive state. When model parameters (model, options) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
isInitializing: boolean- Whether the model is initializing.isDownloaded: boolean- Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.isDownloading: boolean- Whether the model is being downloaded.downloadProgress: number- Download progress (0-1). Reset to0after download completes.error: string | null- Last error message, ornull.
download(params?: CactusAudioDownloadParams): Promise<void>- Downloads the model. UpdatesisDownloadinganddownloadProgressstate during download. SetsisDownloadedtotrueon success.init(): Promise<void>- Initializes the model.vad(params: CactusAudioVADParams): Promise<CactusAudioVADResult>- Runs voice activity detection.diarize(params: CactusAudioDiarizeParams): Promise<CactusAudioDiarizeResult>- Runs speaker diarization.embedSpeaker(params: CactusAudioEmbedSpeakerParams): Promise<CactusAudioEmbedSpeakerResult>- Extracts a speaker embedding.destroy(): Promise<void>- Releases all resources. Automatically called when the component unmounts.getModels(): Promise<CactusModel[]>- Returns available audio models.
new CactusIndex(name: string, embeddingDim: number)
Parameters:
name- Name of the index.embeddingDim- Dimension of the embedding vectors.
init(): Promise<void>
Initializes the index and prepares it for operations. Must be called before using any other methods.
add(params: CactusIndexAddParams): Promise<void>
Adds documents with their embeddings and metadata to the index.
Parameters:
ids- Array of document IDs.documents- Array of document texts.embeddings- Array of embedding vectors (each vector must matchembeddingDim).metadatas- Optional array of metadata strings.
query(params: CactusIndexQueryParams): Promise<CactusIndexQueryResult>
Searches for similar documents using embedding vectors.
Parameters:
embeddings- Array of query embedding vectors.options- Query options:topK- Number of top results to return (default: 10).scoreThreshold- Minimum similarity score threshold (default: -1.0).
get(params: CactusIndexGetParams): Promise<CactusIndexGetResult>
Retrieves documents by their IDs.
Parameters:
ids- Array of document IDs to retrieve.
delete(params: CactusIndexDeleteParams): Promise<void>
Deletes documents from the index by their IDs.
Parameters:
ids- Array of document IDs to delete.
compact(): Promise<void>
Optimizes the index by removing deleted documents and reorganizing data for better performance. Call after a series of deletions.
destroy(): Promise<void>
Releases all resources associated with the index from memory.
The useCactusIndex hook manages a CactusIndex instance with reactive state. When index parameters (name or embeddingDim) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
isInitializing: boolean- Whether the index is initializing.isProcessing: boolean- Whether the index is processing an operation (add, query, get, delete, or compact).error: string | null- Last error message from any operation, ornullif there is no error. Cleared before starting new operations.
init(): Promise<void>- Initializes the index. SetsisInitializingtotrueduring initialization.add(params: CactusIndexAddParams): Promise<void>- Adds documents to the index. SetsisProcessingtotrueduring operation.query(params: CactusIndexQueryParams): Promise<CactusIndexQueryResult>- Searches for similar documents. SetsisProcessingtotrueduring operation.get(params: CactusIndexGetParams): Promise<CactusIndexGetResult>- Retrieves documents by IDs. SetsisProcessingtotrueduring operation.delete(params: CactusIndexDeleteParams): Promise<void>- Deletes documents. SetsisProcessingtotrueduring operation.compact(): Promise<void>- Optimizes the index. SetsisProcessingtotrueduring operation.destroy(): Promise<void>- Releases all resources. Automatically called when the component unmounts.
getRegistry(): Promise<{ [key: string]: CactusModel }>
Returns all available models from HuggingFace, keyed by model slug. Result is cached across calls.
import { getRegistry } from 'cactus-react-native';
const registry = await getRegistry();
const model = registry['qwen3-0.6b'];
console.log(model);interface CactusLMParams {
model?: string;
corpusDir?: string;
cacheIndex?: boolean;
options?: CactusModelOptions;
}interface CactusLMDownloadParams {
onProgress?: (progress: number) => void;
}interface CactusLMMessage {
role: 'user' | 'assistant' | 'system';
content?: string;
images?: string[];
}interface CactusLMCompleteOptions {
temperature?: number;
topP?: number;
topK?: number;
maxTokens?: number;
stopSequences?: string[];
forceTools?: boolean;
telemetryEnabled?: boolean;
confidenceThreshold?: number;
toolRagTopK?: number;
includeStopSequences?: boolean;
useVad?: boolean;
enableThinking?: boolean;
}interface CactusLMTool {
name: string;
description: string;
parameters: {
type: 'object';
properties: {
[key: string]: {
type: string;
description: string;
};
};
required: string[];
};
}interface CactusLMCompleteParams {
messages: CactusLMMessage[];
options?: CactusLMCompleteOptions;
tools?: CactusLMTool[];
onToken?: (token: string) => void;
}interface CactusLMPrefillParams {
messages: CactusLMMessage[];
options?: CactusLMCompleteOptions;
tools?: CactusLMTool[];
}interface CactusLMPrefillResult {
success: boolean;
error: string | null;
prefillTokens: number;
prefillTps: number;
totalTimeMs: number;
ramUsageMb: number;
}interface CactusLMCompleteResult {
success: boolean;
response: string;
thinking?: string;
functionCalls?: {
name: string;
arguments: { [key: string]: any };
}[];
cloudHandoff?: boolean;
confidence?: number;
timeToFirstTokenMs: number;
totalTimeMs: number;
prefillTokens: number;
prefillTps: number;
decodeTokens: number;
decodeTps: number;
totalTokens: number;
ramUsageMb?: number;
}interface CactusLMTokenizeParams {
text: string;
}interface CactusLMTokenizeResult {
tokens: number[];
}interface CactusLMScoreWindowParams {
tokens: number[];
start: number;
end: number;
context: number;
}interface CactusLMScoreWindowResult {
score: number;
}interface CactusLMEmbedParams {
text: string;
normalize?: boolean;
}interface CactusLMEmbedResult {
embedding: number[];
}interface CactusLMImageEmbedParams {
imagePath: string;
}interface CactusLMImageEmbedResult {
embedding: number[];
}interface CactusModel {
slug: string;
capabilities: string[];
quantization: {
int4: {
sizeMb: number;
url: string;
pro?: {
apple: string;
};
};
int8: {
sizeMb: number;
url: string;
pro?: {
apple: string;
};
};
};
}interface CactusModelOptions {
quantization?: 'int4' | 'int8';
pro?: boolean;
}interface CactusSTTParams {
model?: string;
options?: CactusModelOptions;
}interface CactusSTTDownloadParams {
onProgress?: (progress: number) => void;
}interface CactusSTTTranscribeOptions {
temperature?: number;
topP?: number;
topK?: number;
maxTokens?: number;
stopSequences?: string[];
useVad?: boolean;
telemetryEnabled?: boolean;
confidenceThreshold?: number;
cloudHandoffThreshold?: number;
includeStopSequences?: boolean;
}interface CactusSTTTranscribeParams {
audio: string | number[];
prompt?: string;
options?: CactusSTTTranscribeOptions;
onToken?: (token: string) => void;
}interface CactusSTTTranscribeResult {
success: boolean;
response: string;
cloudHandoff?: boolean;
confidence?: number;
timeToFirstTokenMs: number;
totalTimeMs: number;
prefillTokens: number;
prefillTps: number;
decodeTokens: number;
decodeTps: number;
totalTokens: number;
ramUsageMb?: number;
}interface CactusSTTAudioEmbedParams {
audioPath: string;
}interface CactusSTTAudioEmbedResult {
embedding: number[];
}interface CactusSTTStreamTranscribeStartOptions {
confirmationThreshold?: number;
minChunkSize?: number;
telemetryEnabled?: boolean;
language?: string;
}interface CactusSTTStreamTranscribeProcessParams {
audio: number[];
}interface CactusSTTStreamTranscribeProcessResult {
success: boolean;
confirmed: string;
pending: string;
bufferDurationMs?: number;
confidence?: number;
cloudHandoff?: boolean;
cloudResult?: string;
cloudJobId?: number;
cloudResultJobId?: number;
timeToFirstTokenMs?: number;
totalTimeMs?: number;
prefillTokens?: number;
prefillTps?: number;
decodeTokens?: number;
decodeTps?: number;
totalTokens?: number;
ramUsageMb?: number;
}interface CactusSTTStreamTranscribeStopResult {
success: boolean;
confirmed: string;
}interface CactusSTTDetectLanguageOptions {
useVad?: boolean;
}interface CactusSTTDetectLanguageParams {
audio: string | number[];
options?: CactusSTTDetectLanguageOptions;
}interface CactusSTTDetectLanguageResult {
language: string;
confidence?: number;
}interface CactusAudioParams {
model?: string;
options?: CactusModelOptions;
}interface CactusAudioDownloadParams {
onProgress?: (progress: number) => void;
}interface CactusAudioVADOptions {
threshold?: number;
negThreshold?: number;
minSpeechDurationMs?: number;
maxSpeechDurationS?: number;
minSilenceDurationMs?: number;
speechPadMs?: number;
windowSizeSamples?: number;
samplingRate?: number;
minSilenceAtMaxSpeech?: number;
useMaxPossSilAtMaxSpeech?: boolean;
}interface CactusAudioVADSegment {
start: number;
end: number;
}interface CactusAudioVADResult {
segments: CactusAudioVADSegment[];
totalTime: number;
ramUsage: number;
}interface CactusAudioVADParams {
audio: string | number[];
options?: CactusAudioVADOptions;
}interface CactusAudioDiarizeOptions {
stepMs?: number;
threshold?: number;
numSpeakers?: number;
minSpeakers?: number;
maxSpeakers?: number;
}interface CactusAudioDiarizeParams {
audio: string | number[];
options?: CactusAudioDiarizeOptions;
}interface CactusAudioDiarizeResult {
success: boolean;
error: string | null;
numSpeakers: number;
scores: number[];
totalTimeMs: number;
ramUsageMb: number;
}interface CactusAudioEmbedSpeakerParams {
audio: string | number[];
}interface CactusAudioEmbedSpeakerResult {
success: boolean;
error: string | null;
embedding: number[];
totalTimeMs: number;
ramUsageMb: number;
}interface CactusIndexParams {
name: string;
embeddingDim: number;
}interface CactusIndexAddParams {
ids: number[];
documents: string[];
embeddings: number[][];
metadatas?: string[];
}interface CactusIndexGetParams {
ids: number[];
}interface CactusIndexGetResult {
documents: string[];
metadatas: string[];
embeddings: number[][];
}interface CactusIndexQueryOptions {
topK?: number;
scoreThreshold?: number;
}interface CactusIndexQueryParams {
embeddings: number[][];
options?: CactusIndexQueryOptions;
}interface CactusIndexQueryResult {
ids: number[][];
scores: number[][];
}interface CactusIndexDeleteParams {
ids: number[];
}- Model Selection - Choose smaller models for faster inference on mobile devices.
- Memory Management - Always call
destroy()when you're done with models to free up resources. - VAD - Use
useVad: true(the default) when transcribing audio with silence, to strip non-speech regions and speed up transcription.
Check out our example app for a complete React Native implementation.
