sx.blah.discord.util.MessageTokenizer Maven / Gradle / Ivy
Show all versions of Discord4J Show documentation
/*
* This file is part of Discord4J.
*
* Discord4J is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Discord4J is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Discord4J. If not, see .
*/
package sx.blah.discord.util;
import com.vdurmont.emoji.Emoji;
import com.vdurmont.emoji.EmojiManager;
import sx.blah.discord.Discord4J;
import sx.blah.discord.api.IDiscordClient;
import sx.blah.discord.handle.obj.*;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Used to traverse through a message's content and step through tokens like mentions, characters, words, etc.
*
* The tokenizer has a pointer of the index it is at in the message. Every time a next method is called, the
* pointer moves past the next found token.
*
*
For example, if the content is this is a string of words
, when {@link #nextWord()} is first called,
* it will return this
, and move to the first space. Calling {@link #nextChar()} will return that space
* and move to i
.
*
* @author chrislo27
*/
public class MessageTokenizer {
/**
* Regex for matching any Discord mention format.
*/
public static final String ANY_MENTION_REGEX = "<(?:(?:@[!&]?)|#)(\\d+)>";
/**
* Regex for matching custom emoji.
*/
public static final String CUSTOM_EMOJI_REGEX = "<:[A-Za-z0-9_]{2,}:\\d+>";
/**
* Regex for matching invite URLs.
*/
public static final String INVITE_REGEX = "(?:discord\\.gg/)([\\w-]+)";
/**
* Regex for matching a word.
*/
public static final String WORD_REGEX = "(?:\\s|\\n)+";
/**
* Pattern for Discord's mention formats.
*/
public static final Pattern ANY_MENTION_PATTERN = Pattern.compile(ANY_MENTION_REGEX);
/**
* Pattern for Discord's custom emoji.
*/
public static final Pattern CUSTOM_EMOJI_PATTERN = Pattern.compile(CUSTOM_EMOJI_REGEX);
/**
* Pattern for Discord invite URLs.
*/
public static final Pattern INVITE_PATTERN = Pattern.compile(INVITE_REGEX);
/**
* Pattern for words.
*/
public static final Pattern WORD_PATTERN = Pattern.compile(WORD_REGEX);
/**
* The content of the message that is being tokenized.
*/
private final String content;
/**
* The client that owns the tokenizer.
*/
private final IDiscordClient client;
/**
* The current position of the pointer in the message.
*/
private volatile int currentPosition = 0;
/**
* The remaining substring.
*/
private volatile String remaining;
public MessageTokenizer(IMessage message) {
this(message.getClient(), message.getContent());
}
public MessageTokenizer(IDiscordClient client, String content) {
if (content == null)
throw new IllegalArgumentException("Content cannot be null!");
if (content.length() == 0)
throw new IllegalArgumentException("Content must have length!");
if (client == null)
throw new IllegalArgumentException("Client cannot be null!");
this.content = content;
this.client = client;
stepForward(0);
}
/**
* Moves the pointer forward by the given amount.
*
* @param amount The amount to move forward.
* @return The new pointer position.
*/
public int stepForward(int amount) {
return stepTo(currentPosition + amount);
}
/**
* Moves the pointer forward to the given position.
*
* @param index The index to move to.
* @return The new pointer position.
*/
public int stepTo(int index) {
currentPosition = Math.max(0, Math.min(index, content.length()));
remaining = content.substring(currentPosition);
return currentPosition;
}
/**
* Gets whether the pointer is not at the end of the content.
*
* @return Whether the pointer is not at the end of the content.
*/
public boolean hasNext() {
return currentPosition < content.length();
}
/**
* Gets whether the pointer is not at the end of the content.
*
*
This is equivalent to {@link #hasNext()}.
*
* @return Whether the pointer is not at the end of the content.
*/
public boolean hasNextChar() {
return hasNext();
}
/**
* Gets the next character in the content and moves the pointer forward.
*
* @return The next character in the content.
* @throws IllegalStateException If there is no next character.
*/
public char nextChar() {
if (!hasNextChar())
throw new IllegalStateException("Reached end of string!");
char c = content.charAt(currentPosition);
stepForward(1);
return c;
}
/**
* Gets whether the content has the given string sequence.
*
* @param sequence The string sequence to look for.
* @return Whether the content has the given string sequence.
*/
public boolean hasNextSequence(String sequence) {
return remaining.contains(sequence);
}
/**
* Gets the next sequence as a token.
*
* @param sequence The string sequence to look for.
* @return The next sequence as a token.
*/
public Token nextSequence(String sequence) {
if (!hasNextSequence(sequence))
throw new IllegalStateException("The sequence \"" + sequence + "\" was not found!");
final int index = remaining.indexOf(sequence);
Token t = new Token(this, currentPosition + index, currentPosition + index + sequence.length());
stepForward(index + sequence.length());
return t;
}
/**
* Gets whether the content has a next word. A word is delimited by whitespace.
*
*
This is equivalent to {@link #hasNext()}.
*
* @return Whether the content has a next word.
*/
public boolean hasNextWord() {
return hasNext();
}
/**
* Gets the next word in the content and moves the pointer forward.
*
* @return The next word in the content.
*/
public Token nextWord() {
if (!hasNextWord())
throw new IllegalStateException("No more words found!");
{
Matcher matcher = WORD_PATTERN.matcher(remaining);
if (matcher.find()) {
if (matcher.start() == 0) {
stepTo(currentPosition + matcher.end());
}
}
}
Matcher matcher = WORD_PATTERN.matcher(remaining);
final int end;
boolean found = true;
if (!matcher.find()) {
end = content.length();
found = false;
} else {
end = currentPosition + matcher.start();
}
Token token = new Token(this, currentPosition, end);
stepTo(found ? (currentPosition + matcher.end()) : content.length());
return token;
}
/**
* Gets whether the pointer is not at the end of the content.
*
*
This is equivalent to {@link #hasNext()}.
*
* @return Whether the pointer is not at the end of the content.
*/
public boolean hasNextLine() {
return hasNext();
}
/**
* Gets the next line of content as a token.
*
* @return The next line of content as a token.
*/
public Token nextLine() {
if (!hasNextLine())
throw new IllegalStateException("No more lines found!");
int indexOfNewline = remaining.indexOf('\n');
if (indexOfNewline == -1) {
indexOfNewline = content.length() - currentPosition;
}
Token token = new Token(this, currentPosition, currentPosition + indexOfNewline);
stepForward(indexOfNewline + 1);
return token;
}
/**
* Gets whether the content matches the given pattern.
*
* @param pattern The pattern to match with.
* @return Whether the content matches the given pattern.
*/
public boolean hasNextRegex(Pattern pattern) {
return hasNext() && pattern.matcher(remaining).find();
}
/**
* Gets the next string of the content that matches the given pattern as a token.
*
* @param pattern The pattern to match with.
* @return The next string of the content that matches the given pattern as a token.
*/
public Token nextRegex(Pattern pattern) {
if (!hasNextRegex(pattern))
throw new IllegalStateException("No more occurrences found!");
Matcher matcher = pattern.matcher(remaining);
if (!matcher.find())
throw new IllegalStateException("Couldn't find any matches!");
final int start = currentPosition + matcher.start();
final int end = currentPosition + matcher.end();
stepTo(end);
return new Token(this, start, end);
}
/**
* Gets whether the content has an invite.
*
*
This is equivalent to hasNextRegex(INVITE_PATTERN)
*
* @return Whether the content has an invite.
*/
public boolean hasNextInvite() {
return hasNextRegex(INVITE_PATTERN);
}
/**
* Gets the next invite in the content and moves the pointer forward.
*
* @return The next invite in the content.
*/
public InviteToken nextInvite() {
if (!hasNextInvite())
throw new IllegalStateException("No more invites found!");
Matcher matcher = INVITE_PATTERN.matcher(remaining);
if (!matcher.find())
throw new IllegalStateException("Couldn't find any matches!");
final int start = currentPosition + matcher.start();
final int end = currentPosition + matcher.end();
stepTo(end);
return new InviteToken(this, start, end);
}
/**
* Gets whether the content has a mention.
*
*
This is equivalent to hasNextRegex(ANY_MENTION_PATTERN)
*
* @return Whether the content has a mention.
*/
public boolean hasNextMention() {
return hasNextRegex(ANY_MENTION_PATTERN);
}
/**
* Gets the next mention in the content and moves the pointer forward.
*
* @return The next mention in the content.
*/
public MentionToken nextMention() {
if (!hasNextMention())
throw new IllegalStateException("No more mentions found!");
Token t = nextRegex(ANY_MENTION_PATTERN);
final int lessThan = t.getStartIndex();
final int greaterThan = t.getEndIndex();
final String matched = t.getContent();
final char type = matched.charAt(1);
if (type == '@') {
if (matched.charAt(2) == '&') {
return new RoleMentionToken(this, lessThan, greaterThan);
}
return new UserMentionToken(this, lessThan, greaterThan);
} else if (type == '#') {
return new ChannelMentionToken(this, lessThan, greaterThan);
}
// should NEVER happen because hasNextMention will ensure we get a mention
throw new IllegalStateException("Couldn't find a mention even though it was found!");
}
/**
* Gets whether the content has a custom emoji.
*
*
This is equivalent to hasNextRegex(CUSTOM_EMOJI_PATTERN)
*
* @return Whether the content has a custom emoji.
*/
public boolean hasNextEmoji() {
return hasNextRegex(CUSTOM_EMOJI_PATTERN);
}
/**
* Gets the next custom emoji in the content and moves the pointer forward.
*
* @return The next custom emoji in the content.
*/
public CustomEmojiToken nextEmoji() {
if (!hasNextEmoji())
throw new IllegalStateException("No more custom server emojis found!");
Token t = nextRegex(CUSTOM_EMOJI_PATTERN);
final int lessThan = t.getStartIndex();
final int greaterThan = t.getEndIndex();
return new CustomEmojiToken(this, lessThan, greaterThan);
}
/**
* Gets whether the content has a unicode emoji.
*
* @param emoji The unicode emoji to search for.
* @return Whether the content has a unicode emoji.
*/
public boolean hasNextUnicodeEmoji(Emoji emoji) {
return hasNextSequence(emoji.getUnicode());
}
/**
* Gets the next unicode emoji in the content and moves the pointer forward.
*
* @param emoji The unicode emoji to search for.
* @return The next unicode emoji in the content.
*/
public UnicodeEmojiToken nextUnicodeEmoji(Emoji emoji) {
Token t = nextSequence(emoji.getUnicode());
return new UnicodeEmojiToken(this, t.startIndex, t.endIndex);
}
/**
* Gets the content of the tokenizer.
*
* @return The content of the tokenizer.
*/
public String getContent() {
return content;
}
/**
* Gets the client that owns the tokenizer.
*
* @return The client that owns the tokenizer.
*/
public IDiscordClient getClient() {
return client;
}
/**
* Gets the current position of the pointer in the message.
*
* @return The current position of the pointer in the message.
*/
public int getCurrentPosition() {
return currentPosition;
}
/**
* Gets the remaining substring of the original content.
*
* @return The remaining substring.
*/
public String getRemainingContent() {
return remaining;
}
/**
* A part of a message with the content and position.
*/
public static class Token {
/**
* The tokenizer which produced the token.
*/
private final MessageTokenizer tokenizer;
/**
* The start index of the tokenizer's contents. (Inclusive)
*/
private final int startIndex;
/**
* The end index of the tokenizer's contents. (Exclusive)
*/
private final int endIndex;
/**
* The content of the token.
*/
private final String content;
Token(MessageTokenizer tokenizer, int startIndex, int endIndex) {
if (startIndex < 0 || startIndex >= tokenizer.getContent().length())
throw new IllegalArgumentException("Start index must be within range of content! (Got " + startIndex +
" for startIndex, must be between 0 and " + (tokenizer.getContent().length() - 1) +
", inclusive)");
if (endIndex <= startIndex)
throw new IllegalArgumentException(
"End index cannot be before or at start index! (Start index is " + startIndex + ", got " +
endIndex + ")");
if (endIndex > tokenizer.getContent().length())
throw new IllegalArgumentException(
"End index must be within content's length! (End index is " + endIndex + ", length is " +
tokenizer.getContent().length() + ")");
this.tokenizer = tokenizer;
this.startIndex = startIndex;
this.endIndex = endIndex;
content = tokenizer.getContent().substring(startIndex, endIndex);
}
/**
* Gets the tokenizer which produced the token.
*
* @return The tokenizer which produced the token.
*/
public MessageTokenizer getTokenizer() {
return tokenizer;
}
/**
* Gets the content of the token.
*
* @return The content of the token.
*/
public String getContent() {
return content;
}
/**
* Gets the start index of the tokenizer's contents. (Inclusive)
*
* @return The start index of the tokenizer's contents.
*/
public int getStartIndex() {
return startIndex;
}
/**
* Gets the end index of the tokenizer's contents. (Exclusive)
*
* @return The end index of the tokenizer's contents.
*/
public int getEndIndex() {
return endIndex;
}
@Override
public String toString() {
return content;
}
}
/**
* A token for a mention.
*
* @param The type of object that is mentioned.
*/
public static abstract class MentionToken extends Token {
/**
* The mentioned object.
*/
protected T mention;
private MentionToken(MessageTokenizer tokenizer, int startIndex, int endIndex, T mentionObject) {
super(tokenizer, startIndex, endIndex);
mention = mentionObject;
}
/**
* Gets the mentioned object.
*
* @return The mentioned object.
*/
public T getMentionObject() {
return mention;
}
}
/**
* A mention token for a user.
*/
public static class UserMentionToken extends MentionToken {
/**
* Whether the mention was a nickname mention.
*/
private final boolean isNickname;
private UserMentionToken(MessageTokenizer tokenizer, int startIndex, int endIndex) {
super(tokenizer, startIndex, endIndex, null);
mention = tokenizer.getClient().getUserByID(Long.parseUnsignedLong(getContent().replaceAll("<@!?", "").replace(">", "")));
isNickname = getContent().contains("<@!");
}
/**
* Gets whether the mention was a nickname mention.
*
* @return Whether the mention was a nickname mention.
*/
public boolean isNickname() {
return isNickname;
}
}
/**
* A mention token for a role.
*/
public static class RoleMentionToken extends MentionToken {
private RoleMentionToken(MessageTokenizer tokenizer, int startIndex, int endIndex) {
super(tokenizer, startIndex, endIndex, null);
mention = tokenizer.getClient().getRoleByID(Long.parseUnsignedLong(getContent().replace("<@&", "").replace(">", "")));
}
}
/**
* A mention token for a channel.
*/
public static class ChannelMentionToken extends MentionToken {
private ChannelMentionToken(MessageTokenizer tokenizer, int startIndex, int endIndex) {
super(tokenizer, startIndex, endIndex, null);
mention = tokenizer.getClient().getChannelByID(Long.parseUnsignedLong(getContent().replace("<#", "").replace(">", "")));
}
}
/**
* A token for a custom emoji.
*/
public static class CustomEmojiToken extends Token {
/**
* The custom emoji.
*/
private final IEmoji emoji;
private CustomEmojiToken(MessageTokenizer tokenizer, int startIndex, int endIndex) {
super(tokenizer, startIndex, endIndex);
final String content = getContent();
final long emojiId = Long.parseUnsignedLong(content.substring(content.lastIndexOf(":") + 1, content.lastIndexOf('>')));
emoji = tokenizer.getClient().getGuilds().stream()
.map(guild -> guild.getEmojiByID(emojiId)).filter(Objects::nonNull).findFirst()
.orElse(null);
}
/**
* Gets the custom emoji.
*
* @return The custom emoji.
*/
public IEmoji getEmoji() {
return emoji;
}
}
/**
* A token for an invite.
*/
public static class InviteToken extends Token {
/**
* The invite.
*/
private final IInvite invite;
private InviteToken(MessageTokenizer tokenizer, int startIndex, int endIndex) {
super(tokenizer, startIndex, endIndex);
invite = RequestBuffer.request(() -> {
try {
return tokenizer.getClient()
.getInviteForCode(getContent().substring(getContent().lastIndexOf("/")));
} catch (DiscordException e) {
Discord4J.LOGGER.error(LogMarkers.UTIL, "Discord4J Internal Exception", e);
}
return null;
}).get();
}
/**
* Gets the invite.
*
* @return The invite.
*/
public IInvite getInvite() {
return invite;
}
}
/**
* A token for a unicode emoji.
*/
public static class UnicodeEmojiToken extends Token {
/**
* The unicode emoji.
*/
private final Emoji emoji;
private UnicodeEmojiToken(MessageTokenizer tokenizer, int startIndex, int endIndex) {
super(tokenizer, startIndex, endIndex);
String content = getContent();
boolean isUnicode = EmojiManager.isEmoji(content);
emoji = isUnicode ? EmojiManager.getByUnicode(content) : EmojiManager.getForAlias(content);
}
/**
* Gets the unicode emoji.
*
* @return The unicode emoji.
*/
public Emoji getEmoji() {
return emoji;
}
}
}