com.twitter.twittertext.Autolink Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of twitter-text Show documentation
Show all versions of twitter-text Show documentation
Text processing routines for Twitter Tweets
The newest version!
// Copyright 2018 Twitter, Inc.
// Licensed under the Apache License, Version 2.0
// http://www.apache.org/licenses/LICENSE-2.0
// CHECKSTYLE:OFF JavadocMethodRegex
package com.twitter.twittertext;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.twitter.twittertext.Extractor.Entity;
/**
* A class for adding HTML links to hashtag, username and list references in Tweet text.
*/
public class Autolink {
/**
* Default CSS class for auto-linked list URLs
*/
public static final String DEFAULT_LIST_CLASS = "tweet-url list-slug";
/**
* Default CSS class for auto-linked username URLs
*/
public static final String DEFAULT_USERNAME_CLASS = "tweet-url username";
/**
* Default CSS class for auto-linked hashtag URLs
*/
public static final String DEFAULT_HASHTAG_CLASS = "tweet-url hashtag";
/**
* Default CSS class for auto-linked cashtag URLs
*/
public static final String DEFAULT_CASHTAG_CLASS = "tweet-url cashtag";
/**
* Default href for username links (the username without the @ will be appended)
*/
public static final String DEFAULT_USERNAME_URL_BASE = "https://twitter.com/";
/**
* Default href for list links (the username/list without the @ will be appended)
*/
public static final String DEFAULT_LIST_URL_BASE = "https://twitter.com/";
/**
* Default href for hashtag links (the hashtag without the # will be appended)
*/
public static final String DEFAULT_HASHTAG_URL_BASE = "https://twitter.com/search?q=%23";
/**
* Default href for cashtag links (the cashtag without the $ will be appended)
*/
public static final String DEFAULT_CASHTAG_URL_BASE = "https://twitter.com/search?q=%24";
/**
* Default attribute for invisible span tag
*/
public static final String DEFAULT_INVISIBLE_TAG_ATTRS =
"style='position:absolute;left:-9999px;'";
public interface LinkAttributeModifier {
/**
* This lets you modify the attributes depending on the entity.
*/
void modify(Entity entity, Map attributes);
}
public interface LinkTextModifier {
/**
* This lets you modify the text depending on the entity.
*/
CharSequence modify(Entity entity, CharSequence text);
}
protected String urlClass = null;
protected String listClass;
protected String usernameClass;
protected String hashtagClass;
protected String cashtagClass;
protected String usernameUrlBase;
protected String listUrlBase;
protected String hashtagUrlBase;
protected String cashtagUrlBase;
protected String invisibleTagAttrs;
protected boolean noFollow = true;
protected boolean usernameIncludeSymbol = false;
protected String symbolTag = null;
protected String textWithSymbolTag = null;
protected String urlTarget = null;
protected LinkAttributeModifier linkAttributeModifier = null;
protected LinkTextModifier linkTextModifier = null;
private Extractor extractor = new Extractor();
private static CharSequence escapeHTML(CharSequence text) {
StringBuilder builder = new StringBuilder(text.length() * 2);
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
switch (c) {
case '&':
builder.append("&");
break;
case '>':
builder.append(">");
break;
case '<':
builder.append("<");
break;
case '"':
builder.append(""");
break;
case '\'':
builder.append("'");
break;
default:
builder.append(c);
break;
}
}
return builder;
}
public Autolink() {
this(true);
}
public Autolink(boolean noFollow) {
urlClass = null;
listClass = DEFAULT_LIST_CLASS;
usernameClass = DEFAULT_USERNAME_CLASS;
hashtagClass = DEFAULT_HASHTAG_CLASS;
cashtagClass = DEFAULT_CASHTAG_CLASS;
usernameUrlBase = DEFAULT_USERNAME_URL_BASE;
listUrlBase = DEFAULT_LIST_URL_BASE;
hashtagUrlBase = DEFAULT_HASHTAG_URL_BASE;
cashtagUrlBase = DEFAULT_CASHTAG_URL_BASE;
invisibleTagAttrs = DEFAULT_INVISIBLE_TAG_ATTRS;
extractor.setExtractURLWithoutProtocol(false);
this.noFollow = noFollow;
}
public String escapeBrackets(String text) {
final int len = text.length();
if (len == 0) {
return text;
}
final StringBuilder sb = new StringBuilder(len + 16);
for (int i = 0; i < len; ++i) {
char c = text.charAt(i);
if (c == '>') {
sb.append(">");
} else if (c == '<') {
sb.append("<");
} else {
sb.append(c);
}
}
return sb.toString();
}
public void linkToText(Entity entity, CharSequence originalText, Map attributes,
StringBuilder builder) {
if (noFollow) {
attributes.put("rel", "nofollow");
}
if (linkAttributeModifier != null) {
linkAttributeModifier.modify(entity, attributes);
}
CharSequence text = originalText;
if (linkTextModifier != null) {
text = linkTextModifier.modify(entity, originalText);
}
// append tag
builder.append(" entry : attributes.entrySet()) {
builder.append(" ").append(escapeHTML(entry.getKey())).append("=\"").
append(escapeHTML(entry.getValue())).append("\"");
}
builder.append(">").append(text).append("");
}
public void linkToTextWithSymbol(Entity entity, CharSequence symbol, CharSequence originalText,
Map attributes, StringBuilder builder) {
final CharSequence taggedSymbol = symbolTag == null || symbolTag.length() == 0 ? symbol :
String.format("<%s>%s%s>", symbolTag, symbol, symbolTag);
final CharSequence text = escapeHTML(originalText);
final CharSequence taggedText = textWithSymbolTag == null ||
textWithSymbolTag.length() == 0 ? text :
String.format("<%s>%s%s>", textWithSymbolTag, text, textWithSymbolTag);
final boolean includeSymbol =
usernameIncludeSymbol || !Regex.AT_SIGNS.matcher(symbol).matches();
if (includeSymbol) {
linkToText(entity, taggedSymbol.toString() + taggedText, attributes, builder);
} else {
builder.append(taggedSymbol);
linkToText(entity, taggedText, attributes, builder);
}
}
public void linkToHashtag(Entity entity, String text, StringBuilder builder) {
// Get the original hash char from text as it could be a full-width char.
final CharSequence hashChar = text.subSequence(entity.getStart(), entity.getStart() + 1);
final CharSequence hashtag = entity.getValue();
final Map attrs = new LinkedHashMap<>();
attrs.put("href", hashtagUrlBase + hashtag);
attrs.put("title", "#" + hashtag);
if (Regex.RTL_CHARACTERS.matcher(text).find()) {
attrs.put("class", hashtagClass + " rtl");
} else {
attrs.put("class", hashtagClass);
}
linkToTextWithSymbol(entity, hashChar, hashtag, attrs, builder);
}
public void linkToCashtag(Entity entity, String text, StringBuilder builder) {
final CharSequence cashtag = entity.getValue();
final Map attrs = new LinkedHashMap<>();
attrs.put("href", cashtagUrlBase + cashtag);
attrs.put("title", "$" + cashtag);
attrs.put("class", cashtagClass);
linkToTextWithSymbol(entity, "$", cashtag, attrs, builder);
}
public void linkToMentionAndList(Entity entity, String text, StringBuilder builder) {
String mention = entity.getValue();
// Get the original at char from text as it could be a full-width char.
final CharSequence atChar = text.subSequence(entity.getStart(), entity.getStart() + 1);
final Map attrs = new LinkedHashMap<>();
if (entity.listSlug != null) {
mention += entity.listSlug;
attrs.put("class", listClass);
attrs.put("href", listUrlBase + mention);
} else {
attrs.put("class", usernameClass);
attrs.put("href", usernameUrlBase + mention);
}
linkToTextWithSymbol(entity, atChar, mention, attrs, builder);
}
public void linkToURL(Entity entity, String text, StringBuilder builder) {
final CharSequence url = entity.getValue();
CharSequence linkText = escapeHTML(url);
if (entity.displayURL != null && entity.expandedURL != null) {
// Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste
// should contain the full original URL (expanded_url), not the display URL.
//
// Method: Whenever possible, we actually emit HTML that contains expanded_url, and use
// font-size:0 to hide those parts that should not be displayed
// (because they are not part of display_url).
// Elements with font-size:0 get copied even though they are not visible.
// Note that display:none doesn't work here. Elements with display:none don't get copied.
//
// Additionally, we want to *display* ellipses, but we don't want them copied.
// To make this happen we wrap the ellipses in a tco-ellipsis class and provide an onCopy
// handler that sets display:none on everything with the tco-ellipsis class.
//
// As an example: The user tweets "hi http://longdomainname.com/foo"
// This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo"
// This will get rendered as:
//
// …
//
// http://longdomai
//
//
// nname.com/foo
//
//
//
// …
//
//
// Exception: pic.twitter.com images, for which expandedUrl =
// "https://twitter.com/username/status/1234/photo/1
// For those URLs, display_url is not a substring of expanded_url,
// so we don't do anything special to render the elided parts.
// For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
final String displayURLSansEllipses = entity.displayURL.replace("…", "");
final int diplayURLIndexInExpandedURL = entity.expandedURL.indexOf(displayURLSansEllipses);
if (diplayURLIndexInExpandedURL != -1) {
final String beforeDisplayURL =
entity.expandedURL.substring(0, diplayURLIndexInExpandedURL);
final String afterDisplayURL = entity.expandedURL.substring(
diplayURLIndexInExpandedURL + displayURLSansEllipses.length());
final String precedingEllipsis = entity.displayURL.startsWith("…") ? "…" : "";
final String followingEllipsis = entity.displayURL.endsWith("…") ? "…" : "";
final String invisibleSpan = "";
final StringBuilder sb = new StringBuilder("");
sb.append(precedingEllipsis);
sb.append(invisibleSpan).append(" ");
sb.append(invisibleSpan).append(escapeHTML(beforeDisplayURL)).append("");
sb.append("").append(escapeHTML(displayURLSansEllipses))
.append("");
sb.append(invisibleSpan).append(escapeHTML(afterDisplayURL)).append("");
sb.append("").append(invisibleSpan).append(" ")
.append(followingEllipsis).append("");
linkText = sb;
} else {
linkText = entity.displayURL;
}
}
final Map attrs = new LinkedHashMap<>();
attrs.put("href", url.toString());
if (urlClass != null) {
attrs.put("class", urlClass);
}
if (urlClass != null && urlClass.length() != 0) {
attrs.put("class", urlClass);
}
if (urlTarget != null && urlTarget.length() != 0) {
attrs.put("target", urlTarget);
}
linkToText(entity, linkText, attrs, builder);
}
public String autoLinkEntities(String text, List entities) {
final StringBuilder builder = new StringBuilder(text.length() * 2);
int beginIndex = 0;
for (Entity entity : entities) {
builder.append(text.subSequence(beginIndex, entity.start));
switch (entity.type) {
case URL:
linkToURL(entity, text, builder);
break;
case HASHTAG:
linkToHashtag(entity, text, builder);
break;
case MENTION:
linkToMentionAndList(entity, text, builder);
break;
case CASHTAG:
linkToCashtag(entity, text, builder);
break;
default:
break;
}
beginIndex = entity.end;
}
builder.append(text.subSequence(beginIndex, text.length()));
return builder.toString();
}
/**
* Auto-link hashtags, URLs, usernames and lists.
*
* @param originalText of the Tweet to auto-link
* @return text with auto-link HTML added
*/
public String autoLink(String originalText) {
final String text = escapeBrackets(originalText);
// extract entities
final List entities = extractor.extractEntitiesWithIndices(text);
return autoLinkEntities(text, entities);
}
/**
* Auto-link the @username and @username/list references in the provided text.
* Links to @username references will have the usernameClass CSS classes added.
* Links to @username/list references will have the listClass CSS class added.
*
* @param text of the Tweet to auto-link
* @return text with auto-link HTML added
*/
public String autoLinkUsernamesAndLists(String text) {
return autoLinkEntities(text, extractor.extractMentionsOrListsWithIndices(text));
}
/**
* Auto-link #hashtag references in the provided Tweet text. The #hashtag links will have the
* hashtagClass CSS class added.
*
* @param text of the Tweet to auto-link
* @return text with auto-link HTML added
*/
public String autoLinkHashtags(String text) {
return autoLinkEntities(text, extractor.extractHashtagsWithIndices(text));
}
/**
* Auto-link URLs in the Tweet text provided.
* This only auto-links URLs with protocol.
*
* @param text of the Tweet to auto-link
* @return text with auto-link HTML added
*/
public String autoLinkURLs(String text) {
return autoLinkEntities(text, extractor.extractURLsWithIndices(text));
}
/**
* Auto-link $cashtag references in the provided Tweet text. The $cashtag links will have the
* cashtagClass CSS class added.
*
* @param text of the Tweet to auto-link
* @return text with auto-link HTML added
*/
public String autoLinkCashtags(String text) {
return autoLinkEntities(text, extractor.extractCashtagsWithIndices(text));
}
/**
* Get CSS class for auto-linked URLs
*
* @return String that represents the CSS class
*/
public String getUrlClass() {
return urlClass;
}
/**
* Set the CSS class for auto-linked URLs
*
* @param urlClass new CSS value.
*/
public void setUrlClass(String urlClass) {
this.urlClass = urlClass;
}
/**
* Get CSS class for auto-linked list URLs
*
* @return String that represents the CSS class
*/
public String getListClass() {
return listClass;
}
/**
* Set the CSS class for auto-linked list URLs
*
* @param listClass new CSS value.
*/
public void setListClass(String listClass) {
this.listClass = listClass;
}
/**
* Get CSS class for auto-linked username URLs
*
* @return String that represents the CSS class
*/
public String getUsernameClass() {
return usernameClass;
}
/**
* Set the CSS class for auto-linked username URLs
*
* @param usernameClass new CSS value.
*/
public void setUsernameClass(String usernameClass) {
this.usernameClass = usernameClass;
}
/**
* Get CSS class for auto-linked hashtag URLs
*
* @return String that represents the CSS class
*/
public String getHashtagClass() {
return hashtagClass;
}
/**
* Set the CSS class for auto-linked hashtag URLs
*
* @param hashtagClass new CSS value.
*/
public void setHashtagClass(String hashtagClass) {
this.hashtagClass = hashtagClass;
}
/**
* Get CSS class for auto-linked cashtag URLs
*
* @return String that represents the CSS class
*/
public String getCashtagClass() {
return cashtagClass;
}
/**
* Set the CSS class for auto-linked cashtag URLs
*
* @param cashtagClass new CSS value.
*/
public void setCashtagClass(String cashtagClass) {
this.cashtagClass = cashtagClass;
}
/**
* Get the href value for username links (to which the username will be appended)
*
* @return String that represents the href value of username link
*/
public String getUsernameUrlBase() {
return usernameUrlBase;
}
/**
* Set the href base for username links.
*
* @param usernameUrlBase new href base value
*/
public void setUsernameUrlBase(String usernameUrlBase) {
this.usernameUrlBase = usernameUrlBase;
}
/**
* Get the href value for list links (to which the username/list will be appended)
*
* @return String that represents the href value of username/list link
*/
public String getListUrlBase() {
return listUrlBase;
}
/**
* Set the href base for list links.
*
* @param listUrlBase new href base value
*/
public void setListUrlBase(String listUrlBase) {
this.listUrlBase = listUrlBase;
}
/**
* Get the href value for hashtag links (to which the hashtag will be appended)
*
* @return String that represents the href value of hashtag link
*/
public String getHashtagUrlBase() {
return hashtagUrlBase;
}
/**
* Set the href base for hashtag links.
*
* @param hashtagUrlBase new href base value
*/
public void setHashtagUrlBase(String hashtagUrlBase) {
this.hashtagUrlBase = hashtagUrlBase;
}
/**
* Get the href value for cashtag links (to which the cashtag will be appended)
*
* @return String that represents the href value of cashtag link
*/
public String getCashtagUrlBase() {
return cashtagUrlBase;
}
/**
* Set the href base for cashtag links.
*
* @param cashtagUrlBase new href base value
*/
public void setCashtagUrlBase(String cashtagUrlBase) {
this.cashtagUrlBase = cashtagUrlBase;
}
/**
* Checks if the current URL links will include rel="nofollow" (true by default)
*
* @return Boolean indicating if there's a rel=nofollow
*/
public boolean isNoFollow() {
return noFollow;
}
/**
* Set if the current URL links will include rel="nofollow" (true by default)
*
* @param noFollow new noFollow value
*/
public void setNoFollow(boolean noFollow) {
this.noFollow = noFollow;
}
/**
* Set if the at mark '@' should be included in the link (false by default)
*
* @param usernameIncludeSymbol if username includes symbol
*/
public void setUsernameIncludeSymbol(boolean usernameIncludeSymbol) {
this.usernameIncludeSymbol = usernameIncludeSymbol;
}
/**
* Set HTML tag to be applied around #/@/# symbols in hashtags/usernames/lists/cashtag
*
* @param tag HTML tag without bracket. e.g., "b" or "s"
*/
public void setSymbolTag(String tag) {
this.symbolTag = tag;
}
/**
* Set HTML tag to be applied around text part of hashtags/usernames/lists/cashtag
*
* @param tag HTML tag without bracket. e.g., "b" or "s"
*/
public void setTextWithSymbolTag(String tag) {
this.textWithSymbolTag = tag;
}
/**
* Set the value of the target attribute in auto-linked URLs
*
* @param target target value e.g., "_blank"
*/
public void setUrlTarget(String target) {
this.urlTarget = target;
}
/**
* Set a modifier to modify attributes of a link based on an entity
*
* @param modifier LinkAttributeModifier instance
*/
public void setLinkAttributeModifier(LinkAttributeModifier modifier) {
this.linkAttributeModifier = modifier;
}
/**
* Set a modifier to modify text of a link based on an entity
*
* @param modifier LinkTextModifier instance
*/
public void setLinkTextModifier(LinkTextModifier modifier) {
this.linkTextModifier = modifier;
}
}