tessl/npm-discordjs--voice

Implementation of the Discord Voice API for Node.js with comprehensive audio playback, reception, and end-to-end encryption support

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Audio Reception

Name: tessl/npm-discordjs--voice
Author: tessl

Functionality for receiving and processing audio streams from other users in voice channels, with support for per-user audio streams, speaking state tracking, and flexible stream ending behaviors.

Capabilities

Voice Receiver

Main class for receiving audio from other users in a voice connection.

/**
 * Attaches to a VoiceConnection to receive audio packets from other users
 * Note: This feature is not officially documented by Discord and may not work reliably
 */
class VoiceReceiver {
  /** The voice connection this receiver is attached to */
  readonly voiceConnection: VoiceConnection;
  /** Maps audio SSRCs to Discord user IDs */
  readonly ssrcMap: SSRCMap;
  /** Tracks speaking states of users */
  readonly speaking: SpeakingMap;
  /** Current audio stream subscriptions by user ID */
  readonly subscriptions: Map<string, AudioReceiveStream>;

  constructor(voiceConnection: VoiceConnection);
}

Usage Example:

import { VoiceConnectionStatus, entersState } from "@discordjs/voice";

// Wait for connection to be ready
await entersState(connection, VoiceConnectionStatus.Ready, 30_000);

// Access the receiver
const receiver = connection.receiver;

// Subscribe to a user's audio
const userId = "123456789012345678";
const audioStream = receiver.subscribe(userId, {
  end: {
    behavior: EndBehaviorType.AfterSilence,
    duration: 100, // End after 100ms of silence
  },
});

// Process received audio
audioStream.on("data", (chunk) => {
  console.log(`Received ${chunk.length} bytes of audio from ${userId}`);
});

audioStream.on("end", () => {
  console.log(`Audio stream from ${userId} ended`);
});

Audio Receive Streams

Readable streams of Opus packets received from specific users.

/**
 * Readable stream of Opus packets from a specific user
 */
class AudioReceiveStream extends Readable {
  /** End behavior configuration for this stream */
  readonly end: EndBehavior;

  constructor(options: AudioReceiveStreamOptions);
  
  /** Push audio data to the stream */
  push(buffer: Buffer | null): boolean;
}

interface AudioReceiveStreamOptions extends ReadableOptions {
  /** Configuration for when the stream should end */
  end: EndBehavior;
}

/**
 * Creates default options for audio receive streams
 * @returns Default configuration with manual end behavior
 */
function createDefaultAudioReceiveStreamOptions(): AudioReceiveStreamOptions;

Stream End Behaviors

Configuration for when audio receive streams should automatically end.

enum EndBehaviorType {
  /** Stream only ends when manually destroyed */
  Manual,
  /** Stream ends after period of silence/no audio packets */
  AfterSilence,
  /** Stream ends after period of no audio packets (including silence) */
  AfterInactivity
}

type EndBehavior = 
  | {
      behavior: EndBehaviorType.AfterInactivity | EndBehaviorType.AfterSilence;
      /** Duration in milliseconds before ending stream */
      duration: number;
    }
  | {
      behavior: EndBehaviorType.Manual;
    };

Usage Example:

// Stream that ends after 5 seconds of silence
const silenceEndStream = receiver.subscribe(userId, {
  end: {
    behavior: EndBehaviorType.AfterSilence,
    duration: 5000,
  },
});

// Stream that ends after 10 seconds of no packets
const inactivityEndStream = receiver.subscribe(userId, {
  end: {
    behavior: EndBehaviorType.AfterInactivity, 
    duration: 10000,
  },
});

// Stream that never auto-ends
const manualEndStream = receiver.subscribe(userId, {
  end: {
    behavior: EndBehaviorType.Manual,
  },
});

// Manually end the stream later
setTimeout(() => {
  manualEndStream.destroy();
}, 60000);

SSRC Mapping

Maps Synchronization Source (SSRC) identifiers to Discord user information.

/**
 * Maps audio SSRCs to user data in voice connections
 */
class SSRCMap extends EventEmitter {
  /**
   * Update the map with new user data
   * @param data - User voice data to update
   */
  update(data: VoiceUserData): void;

  /**
   * Remove user data by user ID
   * @param userId - Discord user ID to remove
   */
  delete(userId: string): VoiceUserData | undefined;

  /**
   * Get user data by audio SSRC
   * @param ssrc - Audio SSRC to look up
   */
  get(ssrc: number): VoiceUserData | undefined;

  /**
   * Get user data by user ID
   * @param userId - Discord user ID to look up  
   */
  getByUserId(userId: string): VoiceUserData | undefined;
}

interface VoiceUserData {
  /** SSRC of the user's audio stream */
  audioSSRC: number;
  /** Discord user ID */
  userId: string;
  /** SSRC of user's video stream (if exists, cannot be 0) */
  videoSSRC?: number;
}

// SSRCMap Events
interface SSRCMap extends EventEmitter {
  /** Emitted when new user data is created */
  on(event: "create", listener: (newData: VoiceUserData) => void): this;
  /** Emitted when existing user data is updated */  
  on(event: "update", listener: (oldData: VoiceUserData | undefined, newData: VoiceUserData) => void): this;
  /** Emitted when user data is deleted */
  on(event: "delete", listener: (deletedData: VoiceUserData) => void): this;
}

Usage Example:

const receiver = connection.receiver;

// Monitor SSRC changes
receiver.ssrcMap.on("create", (userData) => {
  console.log(`New user joined: ${userData.userId} (SSRC: ${userData.audioSSRC})`);
});

receiver.ssrcMap.on("update", (oldData, newData) => {
  console.log(`User ${newData.userId} SSRC updated: ${oldData?.audioSSRC} -> ${newData.audioSSRC}`);
});

receiver.ssrcMap.on("delete", (userData) => {
  console.log(`User left: ${userData.userId}`);
});

// Get user by SSRC
const userData = receiver.ssrcMap.get(12345);
if (userData) {
  console.log(`SSRC 12345 belongs to user ${userData.userId}`);
}

Speaking State Tracking

Tracks which users are currently speaking in the voice channel.

/**
 * Tracks speaking states of users in a voice channel
 */
class SpeakingMap extends EventEmitter {
  /** Delay in ms after packet received before marking user as not speaking */
  static readonly DELAY: 100;

  /** Currently speaking users mapped to start timestamps */
  readonly users: Map<string, number>;

  /** Handle incoming audio packet from user */
  onPacket(userId: string): void;
}

// SpeakingMap Events  
interface SpeakingMap extends EventEmitter {
  /** Emitted when a user starts speaking */
  on(event: "start", listener: (userId: string) => void): this;
  /** Emitted when a user stops speaking */
  on(event: "end", listener: (userId: string) => void): this;
}

Usage Example:

const receiver = connection.receiver;

// Monitor speaking state changes
receiver.speaking.on("start", (userId) => {
  console.log(`User ${userId} started speaking`);
});

receiver.speaking.on("end", (userId) => {
  console.log(`User ${userId} stopped speaking`);
});

// Check current speaking users
for (const [userId, startTime] of receiver.speaking.users) {
  const duration = Date.now() - startTime;
  console.log(`User ${userId} has been speaking for ${duration}ms`);
}

Complete Reception Example

Here's a comprehensive example showing how to receive and process audio from multiple users:

import {
  joinVoiceChannel,
  VoiceConnectionStatus,
  EndBehaviorType,
  entersState,
} from "@discordjs/voice";
import { createWriteStream } from "fs";

// Join voice channel and wait for ready state
const connection = joinVoiceChannel({
  channelId: "123456789012345678",
  guildId: "987654321098765432", 
  adapterCreator: guild.voiceAdapterCreator,
});

await entersState(connection, VoiceConnectionStatus.Ready, 30_000);

const receiver = connection.receiver;

// Track users joining/leaving
receiver.ssrcMap.on("create", (user) => {
  console.log(`${user.userId} joined the voice channel`);
  
  // Start recording this user
  const audioStream = receiver.subscribe(user.userId, {
    end: {
      behavior: EndBehaviorType.AfterSilence,
      duration: 1000, // End after 1 second of silence
    },
  });

  // Save audio to file
  const outputPath = `./recordings/${user.userId}-${Date.now()}.pcm`;
  const writeStream = createWriteStream(outputPath);
  audioStream.pipe(writeStream);

  audioStream.on("end", () => {
    console.log(`Finished recording ${user.userId}`);
    writeStream.end();
  });
});

// Monitor speaking activity  
receiver.speaking.on("start", (userId) => {
  console.log(`🎤 ${userId} started speaking`);
});

receiver.speaking.on("end", (userId) => {
  console.log(`🔇 ${userId} stopped speaking`);
});

// Process received audio in real-time
receiver.subscriptions.forEach((stream, userId) => {
  stream.on("data", (chunk) => {
    // Process audio chunk (e.g., voice recognition, audio analysis)
    console.log(`Received ${chunk.length} bytes from ${userId}`);
  });
});

Important Notes

Experimental Feature: Audio reception is not officially documented by Discord and may not work reliably
Opus Format: Received audio is in Opus format and may need decoding for processing
Performance: Receiving audio from many users simultaneously can be resource-intensive
Permissions: The bot needs appropriate permissions to receive audio in voice channels
Rate Limiting: Discord may rate limit or restrict audio reception capabilities

Error Handling

Handle errors that may occur during audio reception:

receiver.on("error", (error) => {
  console.error("Audio reception error:", error);
});

// Handle individual stream errors
audioStream.on("error", (error) => {
  console.error(`Stream error for user ${userId}:`, error);
});

// Handle connection issues
connection.on("stateChange", (oldState, newState) => {
  if (newState.status === VoiceConnectionStatus.Disconnected) {
    console.log("Connection lost - audio reception stopped");
  }
});

Install with Tessl CLI