All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.vdurmont.emoji.EmojiParser Maven / Gradle / Ivy

There is a newer version: 5.1.1
Show newest version
package com.vdurmont.emoji;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Provides methods to parse strings with emojis.
 *
 * @author Vincent DURMONT [[email protected]]
 */
public class EmojiParser {
  private static final Pattern ALIAS_CANDIDATE_PATTERN =
    Pattern.compile("(?<=:)\\+?(\\w|\\||\\-)+(?=:)");

  /**
   * See {@link #parseToAliases(String, FitzpatrickAction)} with the action
   * "PARSE"
   *
   * @param input the string to parse
   *
   * @return the string with the emojis replaced by their alias.
   */
  public static String parseToAliases(String input) {
    return parseToAliases(input, FitzpatrickAction.PARSE);
  }

  /**
   * Replaces the emoji's unicode occurrences by one of their alias
   * (between 2 ':').
* Example: 😄 will be replaced by :smile:
*
* When a fitzpatrick modifier is present with a PARSE action, a "|" will be * appendend to the alias, with the fitzpatrick type.
* Example: 👦🏿 will be replaced by * :boy|type_6:
* The fitzpatrick types are: type_1_2, type_3, type_4, type_5, type_6
*
* When a fitzpatrick modifier is present with a REMOVE action, the modifier * will be deleted.
* Example: 👦🏿 will be replaced by :boy:
*
* When a fitzpatrick modifier is present with a IGNORE action, the modifier * will be ignored.
* Example: 👦🏿 will be replaced by :boy:🏿
* * @param input the string to parse * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers * * @return the string with the emojis replaced by their alias. */ public static String parseToAliases( String input, final FitzpatrickAction fitzpatrickAction ) { EmojiTransformer emojiTransformer = new EmojiTransformer() { public String transform(UnicodeCandidate unicodeCandidate) { switch (fitzpatrickAction) { default: case PARSE: if (unicodeCandidate.hasFitzpatrick()) { return ":" + unicodeCandidate.getEmoji().getAliases().get(0) + "|" + unicodeCandidate.getFitzpatrickType() + ":"; } case REMOVE: return ":" + unicodeCandidate.getEmoji().getAliases().get(0) + ":"; case IGNORE: return ":" + unicodeCandidate.getEmoji().getAliases().get(0) + ":" + unicodeCandidate.getFitzpatrickUnicode(); } } }; return parseFromUnicode(input, emojiTransformer); } /** * Replaces the emoji's aliases (between 2 ':') occurrences and the html * representations by their unicode.
* Examples:
* :smile: will be replaced by 😄
* &#128516; will be replaced by 😄
* :boy|type_6: will be replaced by 👦🏿 * * @param input the string to parse * * @return the string with the aliases and html representations replaced by * their unicode. */ public static String parseToUnicode(String input) { // Get all the potential aliases List candidates = getAliasCandidates(input); // Replace the aliases by their unicode String result = input; for (AliasCandidate candidate : candidates) { Emoji emoji = EmojiManager.getForAlias(candidate.alias); if (emoji != null) { if ( emoji.supportsFitzpatrick() || (!emoji.supportsFitzpatrick() && candidate.fitzpatrick == null) ) { String replacement = emoji.getUnicode(); if (candidate.fitzpatrick != null) { replacement += candidate.fitzpatrick.unicode; } result = result.replace( ":" + candidate.fullString + ":", replacement ); } } } // Replace the html for (Emoji emoji : EmojiManager.getAll()) { result = result.replace(emoji.getHtmlHexadecimal(), emoji.getUnicode()); result = result.replace(emoji.getHtmlDecimal(), emoji.getUnicode()); } return result; } protected static List getAliasCandidates(String input) { List candidates = new ArrayList(); Matcher matcher = ALIAS_CANDIDATE_PATTERN.matcher(input); matcher = matcher.useTransparentBounds(true); while (matcher.find()) { String match = matcher.group(); if (!match.contains("|")) { candidates.add(new AliasCandidate(match, match, null)); } else { String[] splitted = match.split("\\|"); if (splitted.length == 2 || splitted.length > 2) { candidates.add(new AliasCandidate(match, splitted[0], splitted[1])); } else { candidates.add(new AliasCandidate(match, match, null)); } } } return candidates; } /** * See {@link #parseToHtmlDecimal(String, FitzpatrickAction)} with the action * "PARSE" * * @param input the string to parse * * @return the string with the emojis replaced by their html decimal * representation. */ public static String parseToHtmlDecimal(String input) { return parseToHtmlDecimal(input, FitzpatrickAction.PARSE); } /** * Replaces the emoji's unicode occurrences by their html representation.
* Example: 😄 will be replaced by &#128516;
*
* When a fitzpatrick modifier is present with a PARSE or REMOVE action, the * modifier will be deleted from the string.
* Example: 👦🏿 will be replaced by * &#128102;
*
* When a fitzpatrick modifier is present with a IGNORE action, the modifier * will be ignored and will remain in the string.
* Example: 👦🏿 will be replaced by * &#128102;🏿 * * @param input the string to parse * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers * * @return the string with the emojis replaced by their html decimal * representation. */ public static String parseToHtmlDecimal( String input, final FitzpatrickAction fitzpatrickAction ) { EmojiTransformer emojiTransformer = new EmojiTransformer() { public String transform(UnicodeCandidate unicodeCandidate) { switch (fitzpatrickAction) { default: case PARSE: case REMOVE: return unicodeCandidate.getEmoji().getHtmlDecimal(); case IGNORE: return unicodeCandidate.getEmoji().getHtmlDecimal() + unicodeCandidate.getFitzpatrickUnicode(); } } }; return parseFromUnicode(input, emojiTransformer); } /** * See {@link #parseToHtmlHexadecimal(String, FitzpatrickAction)} with the * action "PARSE" * * @param input the string to parse * * @return the string with the emojis replaced by their html hex * representation. */ public static String parseToHtmlHexadecimal(String input) { return parseToHtmlHexadecimal(input, FitzpatrickAction.PARSE); } /** * Replaces the emoji's unicode occurrences by their html hex * representation.
* Example: 👦 will be replaced by &#x1f466;
*
* When a fitzpatrick modifier is present with a PARSE or REMOVE action, the * modifier will be deleted.
* Example: 👦🏿 will be replaced by * &#x1f466;
*
* When a fitzpatrick modifier is present with a IGNORE action, the modifier * will be ignored and will remain in the string.
* Example: 👦🏿 will be replaced by * &#x1f466;🏿 * * @param input the string to parse * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers * * @return the string with the emojis replaced by their html hex * representation. */ public static String parseToHtmlHexadecimal( String input, final FitzpatrickAction fitzpatrickAction ) { EmojiTransformer emojiTransformer = new EmojiTransformer() { public String transform(UnicodeCandidate unicodeCandidate) { switch (fitzpatrickAction) { default: case PARSE: case REMOVE: return unicodeCandidate.getEmoji().getHtmlHexadecimal(); case IGNORE: return unicodeCandidate.getEmoji().getHtmlHexadecimal() + unicodeCandidate.getFitzpatrickUnicode(); } } }; return parseFromUnicode(input, emojiTransformer); } /** * Removes all emojis from a String * * @param str the string to process * * @return the string without any emoji */ public static String removeAllEmojis(String str) { EmojiTransformer emojiTransformer = new EmojiTransformer() { public String transform(UnicodeCandidate unicodeCandidate) { return ""; } }; return parseFromUnicode(str, emojiTransformer); } /** * Removes a set of emojis from a String * * @param str the string to process * @param emojisToRemove the emojis to remove from this string * * @return the string without the emojis that were removed */ public static String removeEmojis( String str, final Collection emojisToRemove ) { EmojiTransformer emojiTransformer = new EmojiTransformer() { public String transform(UnicodeCandidate unicodeCandidate) { if (!emojisToRemove.contains(unicodeCandidate.getEmoji())) { return unicodeCandidate.getEmoji().getUnicode() + unicodeCandidate.getFitzpatrickUnicode(); } return ""; } }; return parseFromUnicode(str, emojiTransformer); } /** * Removes all the emojis in a String except a provided set * * @param str the string to process * @param emojisToKeep the emojis to keep in this string * * @return the string without the emojis that were removed */ public static String removeAllEmojisExcept( String str, final Collection emojisToKeep ) { EmojiTransformer emojiTransformer = new EmojiTransformer() { public String transform(UnicodeCandidate unicodeCandidate) { if (emojisToKeep.contains(unicodeCandidate.getEmoji())) { return unicodeCandidate.getEmoji().getUnicode() + unicodeCandidate.getFitzpatrickUnicode(); } return ""; } }; return parseFromUnicode(str, emojiTransformer); } /** * Detects all unicode emojis in input string and replaces them with the * return value of transformer.transform() * * @param input the string to process * @param transformer emoji transformer to apply to each emoji * * @return input string with all emojis transformed */ public static String parseFromUnicode( String input, EmojiTransformer transformer ) { int prev = 0; StringBuilder sb = new StringBuilder(); List replacements = getUnicodeCandidates(input); for (UnicodeCandidate candidate : replacements) { sb.append(input.substring(prev, candidate.getEmojiStartIndex())); sb.append(transformer.transform(candidate)); prev = candidate.getFitzpatrickEndIndex(); } return sb.append(input.substring(prev)).toString(); } public static List extractEmojis(String input) { List emojis = getUnicodeCandidates(input); List result = new ArrayList(); for (UnicodeCandidate emoji : emojis) { result.add(emoji.getEmoji().getUnicode()); } return result; } /** * Generates a list UnicodeCandidates found in input string. A * UnicodeCandidate is created for every unicode emoticon found in input * string, additionally if Fitzpatrick modifier follows the emoji, it is * included in UnicodeCandidate. Finally, it contains start and end index of * unicode emoji itself (WITHOUT Fitzpatrick modifier whether it is there or * not!). * * @param input String to find all unicode emojis in * @return List of UnicodeCandidates for each unicode emote in text */ protected static List getUnicodeCandidates(String input) { char[] inputCharArray = input.toCharArray(); List candidates = new ArrayList(); UnicodeCandidate next; for (int i = 0; (next = getNextUnicodeCandidate(inputCharArray, i)) != null; i = next.getFitzpatrickEndIndex()) { candidates.add(next); } return candidates; } /** * Finds the next UnicodeCandidate after a given starting index * * @param chars char array to find UnicodeCandidate in * @param start starting index for search * @return the next UnicodeCandidate or null if no UnicodeCandidate is found after start index */ protected static UnicodeCandidate getNextUnicodeCandidate(char[] chars, int start) { for (int i = start; i < chars.length; i++) { int emojiEnd = getEmojiEndPos(chars, i); if (emojiEnd != -1) { Emoji emoji = EmojiManager.getByUnicode(new String(chars, i, emojiEnd - i)); String fitzpatrickString = (emojiEnd + 2 <= chars.length) ? new String(chars, emojiEnd, 2) : null; return new UnicodeCandidate( emoji, fitzpatrickString, i ); } } return null; } /** * Returns end index of a unicode emoji if it is found in text starting at * index startPos, -1 if not found. * This returns the longest matching emoji, for example, in * "\uD83D\uDC68\u200D\uD83D\uDC69\u200D\uD83D\uDC66" * it will find alias:family_man_woman_boy, NOT alias:man * * @param text the current text where we are looking for an emoji * @param startPos the position in the text where we should start looking for * an emoji end * * @return the end index of the unicode emoji starting at startPos. -1 if not * found */ protected static int getEmojiEndPos(char[] text, int startPos) { int best = -1; for (int j = startPos + 1; j <= text.length; j++) { EmojiTrie.Matches status = EmojiManager.isEmoji(Arrays.copyOfRange( text, startPos, j )); if (status.exactMatch()) { best = j; } else if (status.impossibleMatch()) { return best; } } return best; } public static class UnicodeCandidate { private final Emoji emoji; private final Fitzpatrick fitzpatrick; private final int startIndex; private UnicodeCandidate(Emoji emoji, String fitzpatrick, int startIndex) { this.emoji = emoji; this.fitzpatrick = Fitzpatrick.fitzpatrickFromUnicode(fitzpatrick); this.startIndex = startIndex; } public Emoji getEmoji() { return emoji; } public boolean hasFitzpatrick() { return getFitzpatrick() != null; } public Fitzpatrick getFitzpatrick() { return fitzpatrick; } public String getFitzpatrickType() { return hasFitzpatrick() ? fitzpatrick.name().toLowerCase() : ""; } public String getFitzpatrickUnicode() { return hasFitzpatrick() ? fitzpatrick.unicode : ""; } public int getEmojiStartIndex() { return startIndex; } public int getEmojiEndIndex() { return startIndex + emoji.getUnicode().length(); } public int getFitzpatrickEndIndex() { return getEmojiEndIndex() + (fitzpatrick != null ? 2 : 0); } } protected static class AliasCandidate { public final String fullString; public final String alias; public final Fitzpatrick fitzpatrick; private AliasCandidate( String fullString, String alias, String fitzpatrickString ) { this.fullString = fullString; this.alias = alias; if (fitzpatrickString == null) { this.fitzpatrick = null; } else { this.fitzpatrick = Fitzpatrick.fitzpatrickFromType(fitzpatrickString); } } } /** * Enum used to indicate what should be done when a Fitzpatrick modifier is * found. */ public enum FitzpatrickAction { /** * Tries to match the Fitzpatrick modifier with the previous emoji */ PARSE, /** * Removes the Fitzpatrick modifier from the string */ REMOVE, /** * Ignores the Fitzpatrick modifier (it will stay in the string) */ IGNORE } public interface EmojiTransformer { String transform(UnicodeCandidate unicodeCandidate); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy