All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gr.iti.mklab.sfc.subscribers.impl.TwitterSubscriber Maven / Gradle / Ivy

Go to download

Monitors a set of social streams (e.g. Twitter status updates) and collects the incoming content.

The newest version!
package gr.iti.mklab.sfc.subscribers.impl;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import twitter4j.FilterQuery;
import twitter4j.ResponseList;
import twitter4j.StallWarning;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import twitter4j.TwitterFactory;
import twitter4j.TwitterStreamFactory;
import twitter4j.User;
import twitter4j.conf.ConfigurationBuilder;
import gr.iti.mklab.framework.abstractions.socialmedia.items.TwitterItem;
import gr.iti.mklab.framework.common.domain.Item;
import gr.iti.mklab.framework.common.domain.config.Configuration;
import gr.iti.mklab.framework.common.domain.feeds.Feed;
import gr.iti.mklab.framework.common.domain.feeds.AccountFeed;
import gr.iti.mklab.framework.common.domain.feeds.KeywordsFeed;
import gr.iti.mklab.framework.common.domain.feeds.LocationFeed;
import gr.iti.mklab.sfc.streams.StreamException;
import gr.iti.mklab.sfc.subscribers.Subscriber;

/**
 * Class for retrieving real-time Twitter content by subscribing on Twitter Streaming API. 
 * Twitter content can be based on keywords,twitter users or locations or be 
 * a random sampling (1%) of currently posted statuses. 
 * 
 * @author Manos Schinas - [email protected]
 *
 */
public class TwitterSubscriber extends Subscriber {
	
	private Logger  logger = LogManager.getLogger(TwitterSubscriber.class);
	
	private BlockingQueue queue = new LinkedBlockingQueue();
	
	public enum AccessLevel {
		
		PUBLIC(400, 5000, 25),
		EXTENDED(400, 75000, 25);
		
		private int filterMaxKeywords;
		private int filterMaxFollows;
		private int filterMaxLocations;
		
		private AccessLevel(int filterMaxKeywords,
						   int filterMaxFollows,
						   int filterMaxLocations) {
			this.filterMaxKeywords = filterMaxKeywords;
			this.filterMaxFollows = filterMaxFollows;
			this.filterMaxLocations = filterMaxLocations;
		}

		public int getFilterMaxKeywords() {
			return filterMaxKeywords;
		}

		public int getFilterMaxFollows() {
			return filterMaxFollows;
		}

		public int getFilterMaxLocations() {
			return filterMaxLocations;
		}

	}
	
	private AccessLevel accessLevel = AccessLevel.PUBLIC;
	private StatusListener listener = null;
	
	private twitter4j.TwitterStream twitterStream  = null;
	private Twitter twitterApi;

	private int numberOfConsumers = 10;
	private List streamConsumers = new ArrayList();

	private ExecutorService executorService;
	
	public TwitterSubscriber() {
	
	}
	
	@Override
	public synchronized void subscribe(Set feeds) throws StreamException {
		
		if (twitterStream == null) {
			logger.error("Stream is closed");
			throw new StreamException("Stream is closed", null);
		} 
		else {
			
			List keywordsList = new ArrayList();
			Set users = new HashSet();
			Set userids = new HashSet();
			List locs = new ArrayList();
			
			logger.info(feeds.size() + " feeds to track");
			for(Feed feed : feeds) {
				if(KeywordsFeed.class.isInstance(feed)) {
					KeywordsFeed keywordFeed = (KeywordsFeed) feed;
					keywordsList.addAll(keywordFeed.getKeywords());	
				}
				else if(AccountFeed.class.isInstance(feed)) {
					AccountFeed accountFeed = (AccountFeed) feed;	
					users.add(accountFeed.getUsername());
				}
				else if(LocationFeed.class.isInstance(feed)) {
					double[] location = new double[2];
					
					location[0] = ((LocationFeed) feed).getLocation().getLatitude();
					location[1] = ((LocationFeed) feed).getLocation().getLongitude();
					locs.add(location);
				}
			}
			
			userids.addAll(getUserIds(users));
			
			String[] keywords = new String[keywordsList.size()];
			long[] follows = new long[Math.min(userids.size(), accessLevel.filterMaxFollows)];
			double[][] locations = new double[locs.size()][2];
			
			for(int i=0; i= accessLevel.filterMaxKeywords) {
					break;
				}
			}
			
			int index = 0;
			for(Long userId : userids) {
				follows[index++] = userId;
				if(index >= accessLevel.filterMaxFollows) {
					break;
				}
			}
			
			for(int i=0; i= accessLevel.filterMaxLocations) {
					break;
				}
			}
			
			if (!ensureFilterLimits(keywords, follows, locations)) {
				logger.error("Filter exceeds Twitter's public access level limits");
				throw new StreamException("Filter exceeds Twitter's public access level limits");
			}

			FilterQuery filterQuery = getFilterQuery(keywords, follows, locations);
			if (filterQuery != null) {			
				logger.info("Start tracking from twitter stream");
				twitterStream.shutdown();
				twitterStream.filter(filterQuery);	
			}
			else {
				logger.info("Start sampling from twitter stream");
				twitterStream.sample();
			}
		}
		
	}
	
	private Set getUserIds(Collection followsUsernames) {
		
		Set ids = new HashSet();
		
		List usernames = new ArrayList(followsUsernames.size());
		for(String username : followsUsernames) {
			usernames.add(username);
		}
		
		int size = usernames.size();
		int start = 0;
		int end = Math.min(start+100, size);
		
		while(start < size) {
			List sublist = usernames.subList(start, end);
			String[] _usernames = sublist.toArray(new String[sublist.size()]);
			try {
				System.out.println("Request for " + _usernames.length + " users ");
				ResponseList users = twitterApi.lookupUsers(_usernames);
				System.out.println(users.size() + " users ");
				for(User user : users) {
					long id = user.getId();
					ids.add(id);
				}
			} catch (TwitterException e) {
				logger.error("Error while getting user ids from twitter...");
				logger.error("Exception in getUserIds: ", e);
				break;
			}
			
			start = end + 1;
			end = Math.min(start+100, size);
		}
		
		return ids;
	}

	@Override
	public void stop() {
		if (listener != null) {
			if(twitterStream != null) {
				logger.info("Shutdown TwiterStream.");
				twitterStream.shutdown();
			}
			listener = null;
			twitterStream  = null;
		}
		
		for(TwitterStreamConsumer consumer : this.streamConsumers) {
			consumer.stop();
		}
		
		this.executorService.shutdown();
	}
	
	private StatusListener getListener() { 
		return new StatusListener() {
			long items = 0, deletion = 0;
			
			@Override
			public void onStatus(Status status) {
				if(status != null) {
					try {
						queue.add(status);
						if((++items)%5000==0) {
							logger.info(items + " incoming items from twitter. " + deletion + " deletions.");
							logger.info(queue.size() + " statuses in queue");
						}
						
						if(queue.size() > 2000) {
							logger.info("Twitter Queue size > 2000. Clear to prevent heapsize overflow.");
							queue.clear();
						}
					}
					catch(Exception e) {
						logger.error("Exception onStatus: ", e);
					}
				}
			}
			
			@Override
			public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
					try {
						deletion++;
						String id = "Twitter#" + statusDeletionNotice.getStatusId();
						
						if(storageHandler != null) {
							storageHandler.delete(id);
						}
					}
					catch(Exception e) {
						logger.error("Exception onDeletionNotice: ", e);
					}
				
			}
			
			@Override
			public void onTrackLimitationNotice(int numOfLimitedStatuses) {
				synchronized(this) {
					logger.error("Rate limit: " + numOfLimitedStatuses);
				}
			}
			
			@Override
			public void onException(Exception ex) {
				synchronized(this) {
					logger.error("Internal stream error occured: " + ex.getMessage());
				}
			}
			@Override
			public void onScrubGeo(long userid, long id) {
				logger.info("Remove appropriate geolocation information for user " + userid + " up to tweet with id " + id);
			}

			@Override
			public void onStallWarning(StallWarning warn) {	
				if(warn != null) {
					logger.error("Stall Warning " + warn.getMessage() + "(" + warn.getPercentFull() + ")");
				}
			}
		};
	}

	
	private boolean ensureFilterLimits(String[] keywords, long[] follows, double[][] locations) {
		if (keywords != null && keywords.length > accessLevel.getFilterMaxKeywords()) 
			return false;
		if (follows != null && follows.length > accessLevel.getFilterMaxFollows()) 
			return false;
		if (locations != null && (locations.length/2) > accessLevel.getFilterMaxLocations()) 
			return false;
		
		return true;
	}
	
	private FilterQuery getFilterQuery(String[] keywords, long[] follows, double[][] locations) {
		FilterQuery query = new FilterQuery();
		boolean empty = true;
		if (keywords != null && keywords.length > 0) {
			logger.info(keywords.length + " keywords to track.");
			query = query.track(keywords);
			empty = false;
		}
		
		if (follows != null && follows.length > 0) {
			logger.info(follows.length + " users to follow.");
			query = query.follow(follows);
			empty = false;
		}
		
		if (locations != null && locations.length > 0) {
			logger.info(locations.length + " locations to track.");
			query = query.locations(locations);
			empty = false;
		}
		
		if (empty) {
			return null;
		}
		else {
			return query;
		}
	}

	@Override
	public void open(Configuration config) throws StreamException {

		if (twitterStream != null) {
			logger.error("#Twitter : Stream is already opened");
			try {
				throw new StreamException("Stream is already opened", null);
			} catch (StreamException e) {
				e.printStackTrace();
			}
		}
		
		String oAuthConsumerKey 		= 	config.getParameter(KEY);
		String oAuthConsumerSecret 		= 	config.getParameter(SECRET);
		String oAuthAccessToken 		= 	config.getParameter(ACCESS_TOKEN);
		String oAuthAccessTokenSecret 	= 	config.getParameter(ACCESS_TOKEN_SECRET);
		
		String accessLevel = config.getParameter("AccessLevel");
		if(accessLevel != null && accessLevel.equals("public")) {
			this.accessLevel = AccessLevel.PUBLIC;
		}
		
		if (oAuthConsumerKey == null || oAuthConsumerSecret == null ||
				oAuthAccessToken == null || oAuthAccessTokenSecret == null) {
			logger.error("#Twitter : Stream requires authentication");
			throw new StreamException("Stream requires authentication");
		}
		
		logger.info("Twitter Credentials: \n" + 
				"\t\t\toAuthConsumerKey:  " + oAuthConsumerKey  + "\n" +
				"\t\t\toAuthConsumerSecret:  " + oAuthConsumerSecret  + "\n" +
				"\t\t\toAuthAccessToken:  " + oAuthAccessToken + "\n" +
				"\t\t\toAuthAccessTokenSecret:  " + oAuthAccessTokenSecret);
		
		ConfigurationBuilder cb = new ConfigurationBuilder();
		cb.setJSONStoreEnabled(false)
			.setOAuthConsumerKey(oAuthConsumerKey)
			.setOAuthConsumerSecret(oAuthConsumerSecret)
			.setOAuthAccessToken(oAuthAccessToken)
			.setOAuthAccessTokenSecret(oAuthAccessTokenSecret);
		twitter4j.conf.Configuration conf = cb.build();
		
		this.executorService = Executors.newFixedThreadPool(numberOfConsumers);
		for(int i=0; i feeds = new HashSet();
		feeds.add(feed);
		
		TwitterSubscriber sub = new TwitterSubscriber();
		sub.open(config);
		
		sub.subscribe(feeds);
		
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy