
christophedetroyer.bencoding.Reader Maven / Gradle / Ivy
package christophedetroyer.bencoding;
import christophedetroyer.bencoding.types.*;
import org.apache.commons.io.IOUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Reader
{
private int currentByteIndex;
private byte[] datablob;
////////////////////////////////////////////////////////////////////////////
//// CONSTRUCTORS //////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
public Reader(File file) throws IOException
{
datablob = IOUtils.toByteArray(new FileInputStream(file));
}
public Reader(String s)
{
datablob = s.getBytes();
}
////////////////////////////////////////////////////////////////////////////
//// PARSER ////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
/**
* Starts reading from the beginning of the file.
* Keeps reading single types and adds them to the list to finally return
* them.
* @return
*/
public synchronized List read()
{
this.currentByteIndex = 0;
long fileSize = datablob.length;
List dataTypes = new ArrayList();
while (currentByteIndex < fileSize)
dataTypes.add(readSingleType());
return dataTypes;
}
/**
* Tries to read in an object starting at the current byte index.
* If not possible throws an exception.
*
* @return Returns an Object that represents either BByteString,
* BDictionary, BInt or BList.
*/
private IBencodable readSingleType()
{
// Read in the byte at current position and dispatch over it.
byte current = datablob[currentByteIndex];
switch (current)
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return readByteString();
case 'd':
return readDictionary();
case 'i':
return readInteger();
case 'l':
return readList();
}
throw new Error("Parser in invalid state at byte " + currentByteIndex);
}
////////////////////////////////////////////////////////////////////////////
//// BENCODING READ TYPES //////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
/**
* Reads in a list starting from the current byte index. Throws an error if
* not called on an appropriate index.
* A list of values is encoded as le . The contents consist of the bencoded
* elements of the list, in order, concatenated. A list consisting of the string "spam"
* and the number 42 would be encoded as: l4:spami42ee. Note the absence of separators
* between elements.
*
* @return BList object.
*/
private BList readList()
{
// If we got here, the current byte is an 'l'.
if (readCurrentByte() != 'l')
throw new Error("Error parsing list. Was expecting a 'l' but got " + readCurrentByte());
currentByteIndex++; // Skip over the 'l'
BList list = new BList();
while (readCurrentByte() != 'e')
list.add(readSingleType());
currentByteIndex++; // Skip the 'e'
return list;
}
/**
* Reads in a bytestring strating at the current position.
* Throws an error if not possible.
* A byte string (a sequence of bytes, not necessarily characters) is encoded as :.
* The length is encoded in base 10, like integers, but must be non-negative (zero is allowed);
* the contents are just the bytes that make up the string. The string "spam" would be encoded as 4:spam.
* The specification does not deal with encoding of characters outside the ASCII set; to mitigate this,
* some BitTorrent applications explicitly communicate the encoding (most commonly UTF-8) in various
* non-standard ways. This is identical to how netstrings work, except that netstrings additionally
* append a comma suffix after the byte sequence.
*
* @return BByteString
*/
private BByteString readByteString()
{
String lengthAsString = "";
int lengthAsInt;
byte[] bsData;
// Build up a string of ascii chars representing the size.
byte current = readCurrentByte();
while (current >= 48 && current <= 57)
{
lengthAsString = lengthAsString + Character.toString((char)current);
currentByteIndex++;
current = readCurrentByte();
}
lengthAsInt = Integer.parseInt(lengthAsString);
if (readCurrentByte() != ':')
throw new Error("Read length of byte string and was expecting ':' but got " + readCurrentByte());
currentByteIndex++; // Skip over the ':'.
// Read the actual data
bsData = new byte[lengthAsInt];
for (int i = 0; i < lengthAsInt; i++)
{
bsData[i] = readCurrentByte();
currentByteIndex++;
}
return new BByteString(bsData);
}
/**
* Reads in a dictionary. Each dictionary consists of N bytestrings mapped to any other value.
* Example: d3:foo3:bare == ({foo, bar})
* A dictionary is encoded as de. The elements of the dictionary are encoded each key
* immediately followed by its value. All keys must be byte strings and must appear in
* lexicographical order. A dictionary that associates the values 42 and "spam" with the keys
* "foo" and "bar", respectively (in other words, {"bar": "spam", "foo": 42}), would be encoded as
* follows: d3:bar4:spam3:fooi42ee.
* (This might be easier to read by inserting some spaces: d 3:bar 4:spam 3:foo i42e e.)
*
* @return BDictionary representing the dictionary.
*/
private BDictionary readDictionary()
{
// If we got here, the current byte is an 'd'.
if (readCurrentByte() != 'd')
throw new Error("Error parsing dictionary. Was expecting a 'd' but got " + readCurrentByte());
currentByteIndex++; // Skip over the 'd'
BDictionary dict = new BDictionary();
while (readCurrentByte() != 'e')
{
// Each dictionary *must* map BByteStrings to any other value.
BByteString key = (BByteString) readSingleType();
IBencodable value = readSingleType();
dict.add(key, value);
}
currentByteIndex++; // Skip the 'e'
return dict;
}
/**
* Parses an integer in Bencode fromat.
* Example: 123 == i123e
* An integer is encoded as ie.
* Leading zeros are not allowed (although the number zero is still represented as "0").
* Negative values are encoded by prefixing the number with a minus sign.
* The number 42 would thus be encoded as i42e, 0 as i0e, and -42 as i-42e.
* Negative zero is not permitted.
*
* @return BInt representing the value of the parsed integer.
*/
private BInt readInteger()
{
// If we got here, the current byte is an 'i'.
if (readCurrentByte() != 'i')
throw new Error("Error parsing integer. Was expecting an 'i' but got " + readCurrentByte());
currentByteIndex++;// Skip the 'i'.
// Read in the integer number by number.
// They are represented as ASCII numbers.
String intString = "";
byte current = readCurrentByte();
//45 negative mark
while (current >= 48 && current <= 57 || current == 45)
{
intString = intString + Character.toString((char)current);
currentByteIndex++;
current = readCurrentByte();
}
if (readCurrentByte() != 'e')
throw new Error("Error parsing integer. Was expecting 'e' at end but got " + readCurrentByte());
currentByteIndex++; // Skip past 'e'
return new BInt(Long.parseLong(intString));
}
////////////////////////////////////////////////////////////////////////////
//// HELPERS ///////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
/**
* Returns the byte in the current position of the file.
*
* @return byte
*/
private byte readCurrentByte()
{
return datablob[currentByteIndex];
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy