
org.lumongo.example.twitter.TwitterCrawler Maven / Gradle / Ivy
The newest version!
package org.lumongo.example.twitter;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.UpdateOptions;
import org.apache.log4j.Logger;
import org.bson.Document;
import org.lumongo.util.LogUtil;
import twitter4j.StallWarning;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import twitter4j.TwitterFactory;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.auth.AccessToken;
import java.io.IOException;
public class TwitterCrawler {
@SuppressWarnings("unused")
private final static Logger LOG = Logger.getLogger(TwitterCrawler.class);
//TODO make these configurable
private final static String CONSUMER_KEY = "xxx";
private final static String CONSUMER_KEY_SECRET = "xxx";
private final static String ACCESS_TOKEN = "xxx";
private final static String ACCESS_TOKEN_SECRET = "xxx";
public void start() throws TwitterException, IOException {
Twitter twitter = new TwitterFactory().getInstance();
twitter.setOAuthConsumer(CONSUMER_KEY, CONSUMER_KEY_SECRET);
AccessToken accessToken = new AccessToken(ACCESS_TOKEN, ACCESS_TOKEN_SECRET);
twitter.setOAuthAccessToken(accessToken);
MongoClient mongo = new MongoClient();
MongoDatabase db = mongo.getDatabase("twitter");
final MongoCollection collection = db.getCollection("sample");
StatusListener listener = new StatusListener() {
@Override
public void onStatus(Status status) {
Document tweet = new Document();
tweet.put("_id", status.getId());
tweet.put("createdAt", status.getCreatedAt());
tweet.put("favoriteCount", status.getFavoriteCount());
tweet.put("retweetCount", status.getRetweetCount());
if (status.getGeoLocation() != null) {
tweet.put("lat", status.getGeoLocation().getLatitude());
tweet.put("long", status.getGeoLocation().getLongitude());
}
tweet.put("screenName", status.getUser().getScreenName());
if (status.getRetweetedStatus() != null) {
tweet.put("retweetedStatus", status.getRetweetedStatus().getText());
}
tweet.put("text", status.getText());
Document query = new Document();
query.put("_id", status.getId());
collection.replaceOne(query, tweet, new UpdateOptions().upsert(true));
}
@Override
public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
}
@Override
public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
}
@Override
public void onException(Exception ex) {
ex.printStackTrace();
}
@Override
public void onScrubGeo(long userId, long upToStatusId) {
}
@Override
public void onStallWarning(StallWarning warning) {
}
};
final TwitterStream twitterStream = new TwitterStreamFactory().getInstance();
twitterStream.setOAuthConsumer(CONSUMER_KEY, CONSUMER_KEY_SECRET);
twitterStream.setOAuthAccessToken(accessToken);
twitterStream.addListener(listener);
twitterStream.sample();
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
twitterStream.cleanUp();
}
});
}
public static void main(String[] args) throws Exception {
LogUtil.loadLogConfig();
TwitterCrawler tm = new TwitterCrawler();
tm.start();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy