net.dv8tion.jda.internal.audio.AudioPacket Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of JDA Show documentation
Show all versions of JDA Show documentation
Java wrapper for the popular chat & VOIP service: Discord https://discord.com
/*
* Copyright 2015 Austin Keener, Michael Ritter, Florian Spieß, and the JDA contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.dv8tion.jda.internal.audio;
import com.iwebpp.crypto.TweetNaclFast;
import net.dv8tion.jda.internal.utils.IOUtil;
import java.net.DatagramPacket;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.util.Arrays;
/**
* Represents the contents of a audio packet that was either received from Discord or
* will be sent to discord.
*
* @see RFC 3350 - RTP: A Transport Protocol for Real-Time Applications
*/
public class AudioPacket
{
public static final int RTP_HEADER_BYTE_LENGTH = 12;
/**
* Bit index 0 and 1 represent the RTP Protocol version used. Discord uses the latest RTP protocol version, 2.
* Bit index 2 represents whether or not we pad. Opus uses an internal padding system, so RTP padding is not used.
* Bit index 3 represents if we use extensions.
* Bit index 4 to 7 represent the CC or CSRC count. CSRC is Combined SSRC.
*/
public static final byte RTP_VERSION_PAD_EXTEND = (byte) 0x80; //Binary: 1000 0000
/**
* This is Discord's RTP Profile Payload type.
* I've yet to find actual documentation on what the bits inside this value represent.
*/
public static final byte RTP_PAYLOAD_TYPE = (byte) 0x78; //Binary: 0100 1000
/**
* This defines the extension type used by discord for presumably video?
*/
public static final short RTP_DISCORD_EXTENSION = (short) 0xBEDE;
public static final int PT_INDEX = 1;
public static final int SEQ_INDEX = 2;
public static final int TIMESTAMP_INDEX = 4;
public static final int SSRC_INDEX = 8;
private final byte type;
private final char seq;
private final int timestamp;
private final int ssrc;
private final byte[] rawPacket;
private final ByteBuffer encodedAudio;
public AudioPacket(DatagramPacket packet)
{
this(Arrays.copyOf(packet.getData(), packet.getLength()));
}
public AudioPacket(byte[] rawPacket)
{
this.rawPacket = rawPacket;
ByteBuffer buffer = ByteBuffer.wrap(rawPacket);
this.seq = buffer.getChar(SEQ_INDEX);
this.timestamp = buffer.getInt(TIMESTAMP_INDEX);
this.ssrc = buffer.getInt(SSRC_INDEX);
this.type = buffer.get(PT_INDEX);
final byte profile = buffer.get(0);
final byte[] data = buffer.array();
final boolean hasExtension = (profile & 0x10) != 0; // extension bit is at 000X
final byte cc = (byte) (profile & 0x0f); // CSRC count - we ignore this for now
final int csrcLength = cc * 4; // defines count of 4-byte words
// it seems as if extensions only exist without a csrc list being present
final short extension = hasExtension ? IOUtil.getShortBigEndian(data, RTP_HEADER_BYTE_LENGTH + csrcLength) : 0;
int offset = RTP_HEADER_BYTE_LENGTH + csrcLength;
if (hasExtension && extension == RTP_DISCORD_EXTENSION)
offset = getPayloadOffset(data, csrcLength);
this.encodedAudio = ByteBuffer.allocate(data.length - offset);
this.encodedAudio.put(data, offset, encodedAudio.capacity());
((Buffer) this.encodedAudio).flip();
}
public AudioPacket(ByteBuffer buffer, char seq, int timestamp, int ssrc, ByteBuffer encodedAudio)
{
this.seq = seq;
this.ssrc = ssrc;
this.timestamp = timestamp;
this.encodedAudio = encodedAudio;
this.type = RTP_PAYLOAD_TYPE;
this.rawPacket = generateRawPacket(buffer, seq, timestamp, ssrc, encodedAudio);
}
private int getPayloadOffset(byte[] data, int csrcLength)
{
// headerLength defines number of 4-byte words in the extension
final short headerLength = IOUtil.getShortBigEndian(data, RTP_HEADER_BYTE_LENGTH + 2 + csrcLength);
int i = RTP_HEADER_BYTE_LENGTH // RTP header = 12 bytes
+ 4 // header which defines a profile and length each 2-bytes = 4 bytes
+ csrcLength // length of CSRC list (this seems to be always 0 when an extension exists)
+ headerLength * 4; // number of 4-byte words in extension = len * 4 bytes
// strip excess 0 bytes
while (data[i] == 0)
i++;
return i;
}
@SuppressWarnings("unused")
public byte[] getHeader()
{
//The first 12 bytes of the rawPacket are the RTP Discord Nonce.
return Arrays.copyOf(rawPacket, RTP_HEADER_BYTE_LENGTH);
}
public byte[] getNoncePadded()
{
byte[] nonce = new byte[TweetNaclFast.SecretBox.nonceLength];
//The first 12 bytes are the rawPacket are the RTP Discord Nonce.
System.arraycopy(rawPacket, 0, nonce, 0, RTP_HEADER_BYTE_LENGTH);
return nonce;
}
public byte[] getRawPacket()
{
return rawPacket;
}
public ByteBuffer getEncodedAudio()
{
return encodedAudio;
}
public char getSequence()
{
return seq;
}
public int getSSRC()
{
return ssrc;
}
public int getTimestamp()
{
return timestamp;
}
protected ByteBuffer asEncryptedPacket(TweetNaclFast.SecretBox boxer, ByteBuffer buffer, byte[] nonce, int nlen)
{
//Xsalsa20's Nonce is 24 bytes long, however RTP (and consequently Discord)'s nonce is a different length
// so we need to create a 24 byte array, and copy the nonce into it.
// we will leave the extra bytes as nulls. (Java sets non-populated bytes as 0).
byte[] extendedNonce = nonce;
if (nlen == 0) // this means the header is the nonce!
extendedNonce = getNoncePadded();
//Create our SecretBox encoder with the secretKey provided by Discord.
byte[] array = encodedAudio.array();
int offset = encodedAudio.arrayOffset() + encodedAudio.position();
int length = encodedAudio.remaining();
byte[] encryptedAudio = boxer.box(array, offset, length, extendedNonce);
((Buffer) buffer).clear();
int capacity = RTP_HEADER_BYTE_LENGTH + encryptedAudio.length + nlen;
if (capacity > buffer.remaining())
buffer = ByteBuffer.allocate(capacity);
populateBuffer(seq, timestamp, ssrc, ByteBuffer.wrap(encryptedAudio), buffer);
if (nlen > 0) // this means we append the nonce to the payload
buffer.put(nonce, 0, nlen);
((Buffer) buffer).flip();
return buffer;
}
protected static AudioPacket decryptAudioPacket(AudioEncryption encryption, DatagramPacket packet, byte[] secretKey)
{
TweetNaclFast.SecretBox boxer = new TweetNaclFast.SecretBox(secretKey);
AudioPacket encryptedPacket = new AudioPacket(packet);
if (encryptedPacket.type != RTP_PAYLOAD_TYPE)
return null;
byte[] extendedNonce;
byte[] rawPacket = encryptedPacket.getRawPacket();
switch (encryption)
{
case XSALSA20_POLY1305:
extendedNonce = encryptedPacket.getNoncePadded();
break;
case XSALSA20_POLY1305_SUFFIX:
extendedNonce = new byte[TweetNaclFast.SecretBox.nonceLength];
System.arraycopy(rawPacket, rawPacket.length - extendedNonce.length, extendedNonce, 0, extendedNonce.length);
break;
case XSALSA20_POLY1305_LITE:
extendedNonce = new byte[TweetNaclFast.SecretBox.nonceLength];
System.arraycopy(rawPacket, rawPacket.length - 4, extendedNonce, 0, 4);
break;
default:
AudioConnection.LOG.debug("Failed to decrypt audio packet, unsupported encryption mode!");
return null;
}
ByteBuffer encodedAudio = encryptedPacket.encodedAudio;
int length = encodedAudio.remaining();
int offset = encodedAudio.arrayOffset() + encodedAudio.position();
switch (encryption)
{
case XSALSA20_POLY1305:
// length = encodedAudio.remaining();
break;
case XSALSA20_POLY1305_LITE:
length -= 4;
break;
case XSALSA20_POLY1305_SUFFIX:
length -= TweetNaclFast.SecretBox.nonceLength;
break;
default:
AudioConnection.LOG.debug("Failed to decrypt audio packet, unsupported encryption mode!");
return null;
}
final byte[] decryptedAudio = boxer.open(encodedAudio.array(), offset, length, extendedNonce);
if (decryptedAudio == null)
{
AudioConnection.LOG.trace("Failed to decrypt audio packet");
return null;
}
final byte[] decryptedRawPacket = new byte[RTP_HEADER_BYTE_LENGTH + decryptedAudio.length];
//first 12 bytes of rawPacket are the RTP header
//the rest is the audio data we just decrypted
System.arraycopy(encryptedPacket.rawPacket, 0, decryptedRawPacket, 0, RTP_HEADER_BYTE_LENGTH);
System.arraycopy(decryptedAudio, 0, decryptedRawPacket, RTP_HEADER_BYTE_LENGTH, decryptedAudio.length);
return new AudioPacket(decryptedRawPacket);
}
private static byte[] generateRawPacket(ByteBuffer buffer, char seq, int timestamp, int ssrc, ByteBuffer data)
{
if (buffer == null)
buffer = ByteBuffer.allocate(RTP_HEADER_BYTE_LENGTH + data.remaining());
populateBuffer(seq, timestamp, ssrc, data, buffer);
return buffer.array();
}
private static void populateBuffer(char seq, int timestamp, int ssrc, ByteBuffer data, ByteBuffer buffer)
{
buffer.put(RTP_VERSION_PAD_EXTEND);
buffer.put(RTP_PAYLOAD_TYPE);
buffer.putChar(seq);
buffer.putInt(timestamp);
buffer.putInt(ssrc);
buffer.put(data);
((Buffer) data).flip();
}
}