
gate.corpora.twitter.Tweet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of format-twitter Show documentation
Show all versions of format-twitter Show documentation
Document Format plugin to support reading and writing Twitter style JSON files
The newest version!
/*
* Copyright (c) 1995-2014, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at http://gate.ac.uk/gate/licence.html).
*
* $Id: Tweet.java 19779 2016-11-24 10:18:32Z markagreenwood $
*/
package gate.corpora.twitter;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.fasterxml.jackson.databind.JsonNode;
import gate.Factory;
import gate.FeatureMap;
import gate.corpora.RepositioningInfo;
public class Tweet {
private StringBuilder string;
private Set annotations;
public Set getAnnotations() {
return this.annotations;
}
public int getLength() {
return this.string.length();
}
public String getString() {
return this.string.toString();
}
/**
* Used by the JSONTWeetFormat; the DocumentContent contains only the main text;
* the annotation feature map contains all the other JSON data, recursively.
*/
public Tweet(JsonNode json, boolean handleEntities) {
string = new StringBuilder();
Iterator keys = json.fieldNames();
FeatureMap features = Factory.newFeatureMap();
annotations = new HashSet();
while (keys.hasNext()) {
String key = keys.next();
features.put(key, TweetUtils.process(json.get(key)));
}
unpackTweets(features,null,"tweet");
/*while (keys.hasNext()) {
String key = keys.next();
if (key.equals(TweetUtils.DEFAULT_TEXT_ATTRIBUTE)) {
RepositioningInfo repos = new RepositioningInfo();
string = unescape(json.get(key).asText(), repos);
if(handleEntities) processEntities(json, 0L, repos);
} else if(key.equals("entities") && handleEntities) {
// do nothing - don't add entities as a feature
} else {
features.put(key, TweetUtils.process(json.get(key)));
}
}*/
annotations.add(new PreAnnotation(0L, string.length(), TweetUtils.TWEET_ANNOTATION_TYPE, features));
}
private void unpackTweets(FeatureMap features, String path, String type) {
String expandedPath = path == null ? "" : path+".";
if (features.containsKey("retweeted_status")) {
unpackTweets((FeatureMap)features.get("retweeted_status"), expandedPath+"retweeted_status", "retweet");
return;
}
if (features.containsKey("full_text")) {
unpackTextAndEntities(features, expandedPath, "full_text", type);
} else if (features.containsKey("extended_tweet")) {
unpackTweets((FeatureMap)features.get("extended_tweet"), expandedPath+"extended_tweet", type);
} else if (features.containsKey("text")) {
unpackTextAndEntities(features, expandedPath, "text", type);
}
if (features.containsKey("quoted_status")) {
unpackTweets((FeatureMap)features.get("quoted_status"), expandedPath+"quoted_status", "quotedTweet");
}
}
private void unpackTextAndEntities(FeatureMap features, String expandedPath, String key, String type) {
if (string.length() != 0) {
string.append("\n\n");
}
String content = features.remove(key).toString();
boolean hasEntities = features.containsKey(TweetUtils.ENTITIES_ATTRIBUTE);
RepositioningInfo repos = new RepositioningInfo();
content = unescape(content, repos);
long start = string.length();
string.append(content);
FeatureMap segmentFeatures = Factory.newFeatureMap();
segmentFeatures.put("textPath", expandedPath+key);
segmentFeatures.put("tweetType", type);
if (hasEntities) segmentFeatures.put("entitiesPath", expandedPath+TweetUtils.ENTITIES_ATTRIBUTE);
annotations.add(new PreAnnotation(start, string.length(), "TweetSegment", segmentFeatures));
if (!hasEntities) return;
FeatureMap entities = (FeatureMap)features.remove(TweetUtils.ENTITIES_ATTRIBUTE);
for (Map.Entry
© 2015 - 2025 Weber Informatics LLC | Privacy Policy