tessl/npm-google--genai

Google Gen AI JavaScript SDK for building applications powered by Gemini with content generation, image/video generation, function calling, caching, and real-time live sessions

Overview

Eval results

Files

Live API (Experimental)

Name: tessl/npm-google--genai
Author: tessl

The Live module provides real-time bidirectional communication with models via WebSocket, supporting multimodal streaming input and output including text, audio, and video.

Capabilities

connect

Establish a WebSocket connection to a model for real-time interaction.

/**
 * Establish WebSocket connection to model
 * @param params - Live connection parameters
 * @returns Promise resolving to session
 */
function connect(
  params: LiveConnectParameters
): Promise<Session>;

interface LiveConnectParameters {
  /** Model name (e.g., 'gemini-2.0-flash-exp') */
  model: string;
  /** Connection configuration */
  config?: LiveConnectConfig;
  /** Event callbacks */
  callbacks?: LiveCallbacks;
}

Usage Examples:

import { GoogleGenAI } from '@google/genai';

const client = new GoogleGenAI({ apiKey: 'YOUR_API_KEY' });

// Connect to model
const session = await client.live.connect({
  model: 'gemini-2.0-flash-exp',
  config: {
    responseModalities: ['TEXT', 'AUDIO']
  },
  callbacks: {
    onopen: () => {
      console.log('Session opened');
    },
    onmessage: (message) => {
      console.log('Message received:', message);
    },
    onerror: (error) => {
      console.error('Error:', error);
    },
    onclose: () => {
      console.log('Session closed');
    }
  }
});

// Send text input
session.send({
  clientContent: {
    turns: [{
      role: 'user',
      parts: [{ text: 'Hello, how are you?' }]
    }]
  }
});

// Close when done
session.close();

Session Class

Represents an active WebSocket session for real-time communication.

/**
 * Live WebSocket session
 */
class Session {
  /**
   * Send client message via WebSocket
   * @param message - Message to send
   */
  send(message: LiveClientMessage): void;

  /**
   * Send real-time input (audio/video)
   * @param params - Realtime input parameters
   */
  sendRealtimeInput(params: LiveSendRealtimeInputParameters): void;

  /**
   * Send tool/function response
   * @param params - Tool response parameters
   */
  sendToolResponse(params: LiveSendToolResponseParameters): void;

  /**
   * Close the WebSocket connection
   */
  close(): void;
}

Live Music

Specialized sub-module for live music generation.

/**
 * Live music generation
 */
class LiveMusic {
  /**
   * Connect to live music generation
   * @param params - Music connection parameters
   * @returns Promise resolving to music session
   */
  connect(params: LiveMusicConnectParameters): Promise<LiveMusicSession>;
}

interface LiveMusicConnectParameters {
  /** Model name */
  model: string;
  /** Music generation configuration */
  config?: LiveMusicConfig;
  /** Event callbacks */
  callbacks?: LiveCallbacks;
}

Usage Examples:

// Access live music
const musicSession = await client.live.music.connect({
  model: 'music-generation-model',
  config: {
    tempo: 120,
    scale: Scale.C_MAJOR_A_MINOR,
    mode: MusicGenerationMode.QUALITY
  },
  callbacks: {
    onmessage: (message) => {
      // Handle music chunks
      console.log('Music chunk received');
    }
  }
});

Types

LiveConnectConfig

Configuration for live connection.

interface LiveConnectConfig {
  /** Output modalities (TEXT, AUDIO, IMAGE) */
  responseModalities?: Modality[];
  /** System instruction */
  systemInstruction?: Content | string;
  /** Tools available */
  tools?: ToolListUnion;
  /** Tool configuration */
  toolConfig?: ToolConfig;
  /** Generation config */
  generationConfig?: GenerationConfig;
  /** Speech config */
  speechConfig?: SpeechConfig;
}

interface GenerationConfig {
  /** Temperature */
  temperature?: number;
  /** Top-P */
  topP?: number;
  /** Top-K */
  topK?: number;
  /** Max output tokens */
  maxOutputTokens?: number;
}

interface SpeechConfig {
  /** Voice preset */
  voiceConfig?: VoiceConfig;
}

interface VoiceConfig {
  /** Preset voice name */
  presetVoice?: string;
}

LiveCallbacks

Event callbacks for live session.

interface LiveCallbacks {
  /** Called when connection opens */
  onopen?: () => void;
  /** Called when message received */
  onmessage?: (message: LiveServerMessage) => void;
  /** Called on error */
  onerror?: (error: Error) => void;
  /** Called when connection closes */
  onclose?: () => void;
}

LiveClientMessage

Messages sent from client to model.

interface LiveClientMessage {
  /** Setup configuration */
  setup?: LiveClientSetup;
  /** Client content */
  clientContent?: LiveClientContent;
  /** Realtime input */
  realtimeInput?: RealtimeInput;
  /** Tool response */
  toolResponse?: ToolResponse;
}

interface LiveClientSetup {
  /** Generation config */
  generationConfig?: GenerationConfig;
  /** System instruction */
  systemInstruction?: Content;
  /** Tools */
  tools?: Tool[];
}

interface LiveClientContent {
  /** Turn information */
  turns?: Content[];
  /** Turn complete flag */
  turnComplete?: boolean;
}

LiveServerMessage

Messages received from model.

interface LiveServerMessage {
  /** Setup complete */
  setupComplete?: LiveSetupComplete;
  /** Server content */
  serverContent?: LiveServerContent;
  /** Tool call */
  toolCall?: ToolCall;
  /** Tool call cancellation */
  toolCallCancellation?: ToolCallCancellation;
}

interface LiveServerContent {
  /** Model turn */
  modelTurn?: Content;
  /** Turn complete flag */
  turnComplete?: boolean;
  /** Interrupted flag */
  interrupted?: boolean;
}

RealtimeInput

Real-time audio/video input.

interface RealtimeInput {
  /** Media chunks */
  mediaChunks?: MediaChunk[];
}

interface MediaChunk {
  /** MIME type */
  mimeType?: string;
  /** Base64-encoded data */
  data?: string;
}

LiveSendRealtimeInputParameters

Parameters for sending realtime input.

interface LiveSendRealtimeInputParameters {
  /** Realtime input */
  realtimeInput: RealtimeInput;
}

LiveSendToolResponseParameters

Parameters for sending tool response.

interface LiveSendToolResponseParameters {
  /** Tool response */
  toolResponse: ToolResponse;
}

ToolResponse

Tool execution response.

interface ToolResponse {
  /** Function responses */
  functionResponses?: FunctionResponse[];
}

Music Generation Types

interface LiveMusicConfig {
  /** Tempo (BPM) */
  tempo?: number;
  /** Musical scale */
  scale?: Scale;
  /** Generation mode */
  mode?: MusicGenerationMode;
}

enum Scale {
  C_MAJOR_A_MINOR = 'C_MAJOR_A_MINOR',
  C_SHARP_D_FLAT_MAJOR_A_SHARP_B_FLAT_MINOR = 'C_SHARP_D_FLAT_MAJOR_A_SHARP_B_FLAT_MINOR',
  // ... additional scales
}

enum MusicGenerationMode {
  QUALITY = 'QUALITY',
  DIVERSITY = 'DIVERSITY',
  VOCALIZATION = 'VOCALIZATION'
}

Complete Examples

Text-based Live Chat

import { GoogleGenAI } from '@google/genai';

const client = new GoogleGenAI({ apiKey: 'YOUR_API_KEY' });

// Connect with text modality
const session = await client.live.connect({
  model: 'gemini-2.0-flash-exp',
  config: {
    responseModalities: ['TEXT']
  },
  callbacks: {
    onopen: () => {
      console.log('Connected');
    },
    onmessage: (message) => {
      if (message.serverContent?.modelTurn) {
        const text = message.serverContent.modelTurn.parts?.[0]?.text;
        if (text) {
          console.log('Model:', text);
        }
      }
    },
    onerror: (error) => {
      console.error('Error:', error);
    }
  }
});

// Send messages
session.send({
  clientContent: {
    turns: [{
      role: 'user',
      parts: [{ text: 'What is quantum computing?' }]
    }],
    turnComplete: true
  }
});

// Continue conversation
setTimeout(() => {
  session.send({
    clientContent: {
      turns: [{
        role: 'user',
        parts: [{ text: 'Can you explain it more simply?' }]
      }],
      turnComplete: true
    }
  });
}, 3000);

// Close after 10 seconds
setTimeout(() => {
  session.close();
}, 10000);

Audio Input and Output

// Connect with audio modalities
const session = await client.live.connect({
  model: 'gemini-2.0-flash-exp',
  config: {
    responseModalities: ['TEXT', 'AUDIO'],
    speechConfig: {
      voiceConfig: {
        presetVoice: 'en-US-Standard-A'
      }
    }
  },
  callbacks: {
    onmessage: (message) => {
      if (message.serverContent?.modelTurn) {
        message.serverContent.modelTurn.parts?.forEach(part => {
          if (part.text) {
            console.log('Text:', part.text);
          }
          if (part.inlineData?.mimeType?.startsWith('audio/')) {
            console.log('Received audio chunk');
            // Process audio data
            const audioData = part.inlineData.data;
            // Play or save audio
          }
        });
      }
    }
  }
});

// Send audio input (e.g., from microphone)
const audioChunk = getAudioFromMicrophone(); // Your audio capture logic

session.sendRealtimeInput({
  realtimeInput: {
    mediaChunks: [{
      mimeType: 'audio/pcm',
      data: audioChunk
    }]
  }
});

Live Function Calling

import { Tool, Type } from '@google/genai';

const weatherTool: Tool = {
  functionDeclarations: [{
    name: 'getWeather',
    description: 'Get current weather',
    parameters: {
      type: Type.OBJECT,
      properties: {
        location: { type: Type.STRING }
      },
      required: ['location']
    }
  }]
};

const session = await client.live.connect({
  model: 'gemini-2.0-flash-exp',
  config: {
    responseModalities: ['TEXT'],
    tools: [weatherTool]
  },
  callbacks: {
    onmessage: (message) => {
      // Handle regular content
      if (message.serverContent?.modelTurn) {
        console.log('Model:', message.serverContent.modelTurn.parts?.[0]?.text);
      }

      // Handle function calls
      if (message.toolCall) {
        console.log('Tool call requested:', message.toolCall);

        // Execute function
        const functionCall = message.toolCall.functionCalls?.[0];
        if (functionCall?.name === 'getWeather') {
          const location = functionCall.args?.location;
          const weather = { temperature: 22, condition: 'sunny' };

          // Send response
          session.sendToolResponse({
            toolResponse: {
              functionResponses: [{
                name: 'getWeather',
                response: weather,
                id: functionCall.id
              }]
            }
          });
        }
      }
    }
  }
});

// Ask weather question
session.send({
  clientContent: {
    turns: [{
      role: 'user',
      parts: [{ text: 'What is the weather in Tokyo?' }]
    }],
    turnComplete: true
  }
});

Screen Sharing / Video Input

// Connect for video processing
const session = await client.live.connect({
  model: 'gemini-2.0-flash-exp',
  config: {
    responseModalities: ['TEXT']
  },
  callbacks: {
    onmessage: (message) => {
      if (message.serverContent?.modelTurn) {
        console.log('Analysis:', message.serverContent.modelTurn.parts?.[0]?.text);
      }
    }
  }
});

// Send video frames (e.g., from screen capture)
function sendVideoFrame(frameData: string) {
  session.sendRealtimeInput({
    realtimeInput: {
      mediaChunks: [{
        mimeType: 'image/jpeg',
        data: frameData
      }]
    }
  });
}

// Send initial question
session.send({
  clientContent: {
    turns: [{
      role: 'user',
      parts: [{ text: 'Analyze what you see on the screen' }]
    }],
    turnComplete: true
  }
});

// Continuously send frames
const frameInterval = setInterval(() => {
  const frame = captureScreenFrame(); // Your screen capture logic
  sendVideoFrame(frame);
}, 1000); // Send 1 frame per second

// Stop after 30 seconds
setTimeout(() => {
  clearInterval(frameInterval);
  session.close();
}, 30000);

Interruption Handling

const session = await client.live.connect({
  model: 'gemini-2.0-flash-exp',
  config: {
    responseModalities: ['TEXT', 'AUDIO']
  },
  callbacks: {
    onmessage: (message) => {
      // Check if model was interrupted
      if (message.serverContent?.interrupted) {
        console.log('Model response was interrupted');
      }

      if (message.serverContent?.modelTurn && !message.serverContent.interrupted) {
        console.log('Model:', message.serverContent.modelTurn.parts?.[0]?.text);
      }
    }
  }
});

// Send first message
session.send({
  clientContent: {
    turns: [{
      role: 'user',
      parts: [{ text: 'Tell me a long story' }]
    }],
    turnComplete: true
  }
});

// Interrupt with new message
setTimeout(() => {
  session.send({
    clientContent: {
      turns: [{
        role: 'user',
        parts: [{ text: 'Actually, just tell me a joke instead' }]
      }],
      turnComplete: true
    }
  });
}, 2000);

Multi-turn Conversation

const conversationHistory: Content[] = [];

const session = await client.live.connect({
  model: 'gemini-2.0-flash-exp',
  config: {
    responseModalities: ['TEXT'],
    systemInstruction: 'You are a helpful coding assistant'
  },
  callbacks: {
    onmessage: (message) => {
      if (message.serverContent?.modelTurn && message.serverContent.turnComplete) {
        // Save to history
        conversationHistory.push(message.serverContent.modelTurn);

        const text = message.serverContent.modelTurn.parts?.[0]?.text;
        console.log('Assistant:', text);
      }
    }
  }
});

// Helper to send message
function sendMessage(text: string) {
  const userTurn: Content = {
    role: 'user',
    parts: [{ text }]
  };

  conversationHistory.push(userTurn);

  session.send({
    clientContent: {
      turns: [userTurn],
      turnComplete: true
    }
  });
}

// Conversation
sendMessage('How do I read a file in Python?');

setTimeout(() => {
  sendMessage('Can you show me an example with error handling?');
}, 3000);

setTimeout(() => {
  sendMessage('What about reading CSV files?');
}, 6000);

Error Recovery

let session: Session | null = null;
let reconnectAttempts = 0;
const MAX_RECONNECT_ATTEMPTS = 3;

async function connectWithRetry() {
  try {
    session = await client.live.connect({
      model: 'gemini-2.0-flash-exp',
      config: {
        responseModalities: ['TEXT']
      },
      callbacks: {
        onopen: () => {
          console.log('Connected');
          reconnectAttempts = 0;
        },
        onmessage: (message) => {
          // Handle messages
        },
        onerror: (error) => {
          console.error('Error:', error);
        },
        onclose: () => {
          console.log('Connection closed');

          // Attempt reconnect
          if (reconnectAttempts < MAX_RECONNECT_ATTEMPTS) {
            reconnectAttempts++;
            console.log(`Reconnecting... (attempt ${reconnectAttempts})`);
            setTimeout(connectWithRetry, 2000);
          }
        }
      }
    });
  } catch (error) {
    console.error('Connection failed:', error);

    if (reconnectAttempts < MAX_RECONNECT_ATTEMPTS) {
      reconnectAttempts++;
      setTimeout(connectWithRetry, 2000);
    }
  }
}

// Start connection
await connectWithRetry();

Live Music Generation

import { Scale, MusicGenerationMode } from '@google/genai';

const musicSession = await client.live.music.connect({
  model: 'music-generation-model',
  config: {
    tempo: 120,
    scale: Scale.C_MAJOR_A_MINOR,
    mode: MusicGenerationMode.QUALITY
  },
  callbacks: {
    onmessage: (message) => {
      if (message.serverContent?.modelTurn) {
        message.serverContent.modelTurn.parts?.forEach(part => {
          if (part.inlineData?.mimeType?.startsWith('audio/')) {
            console.log('Music chunk received');
            // Play or save audio chunk
            const audioData = part.inlineData.data;
            playAudioChunk(audioData);
          }
        });
      }
    }
  }
});

// Request music generation
musicSession.send({
  clientContent: {
    turns: [{
      role: 'user',
      parts: [{ text: 'Generate upbeat electronic music' }]
    }],
    turnComplete: true
  }
});

Real-time Translation

const session = await client.live.connect({
  model: 'gemini-2.0-flash-exp',
  config: {
    responseModalities: ['TEXT', 'AUDIO'],
    systemInstruction: 'Translate speech from English to Spanish in real-time'
  },
  callbacks: {
    onmessage: (message) => {
      if (message.serverContent?.modelTurn) {
        const translated = message.serverContent.modelTurn.parts?.[0]?.text;
        console.log('Translated:', translated);

        // Output audio translation if available
        const audiopart = message.serverContent.modelTurn.parts?.find(
          p => p.inlineData?.mimeType?.startsWith('audio/')
        );
        if (audiopart?.inlineData?.data) {
          playAudioChunk(audiopart.inlineData.data);
        }
      }
    }
  }
});

// Send audio in chunks as user speaks
microphoneStream.on('data', (audioChunk) => {
  session.sendRealtimeInput({
    realtimeInput: {
      mediaChunks: [{
        mimeType: 'audio/pcm',
        data: audioChunk.toString('base64')
      }]
    }
  });
});

Install with Tessl CLI