All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cz.mallat.uasparser.UASparser Maven / Gradle / Ivy

The newest version!
package cz.mallat.uasparser;

import cz.mallat.uasparser.fileparser.Entry;
import cz.mallat.uasparser.fileparser.PHPFileParser;
import cz.mallat.uasparser.fileparser.Section;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * User agent parser.
 * 
 * @author oli
 * 
 */
public class UASparser {

	static final String INFO_URL = "http://user-agent-string.info";

	private Map robotsMap;
	private Map osMap;
	private Map browserMap;
	private Map browserTypeMap;
	private Map browserRegMap;
	private Map browserOsMap;
	private Map osRegMap;

	/**
	 * Use the given filename to load the definition file from the local filesystem
	 * 
	 * @param localDefinitionFilename
	 * @throws IOException
	 */
	public UASparser(String localDefinitionFilename) throws IOException {
		loadDataFromFile(new File(localDefinitionFilename));
	}

	/**
	 * Use the given inputstream to load the definition file from the local filesystem
	 * 
	 * @param inputStreamToDefinitionFile
	 * @throws IOException
	 */
	public UASparser(InputStream inputStreamToDefinitionFile) throws IOException {
		loadDataFromFile(inputStreamToDefinitionFile);
	}

	/**
	 * Constructor for inherented classes
	 */
	protected UASparser() {
		// empty
	}

	/**
	 * When a class inherents from this class, it probably has to override this method
	 */
	protected void checkDataMaps() throws IOException {
		// empty for this base class
	}

	/**
	 * Parse the given user agent string and returns a UserAgentInfo object with the related data
	 * 
	 * @param useragent
	 * @throws IOException
	 *             may happen when the retrieval of the data file fails
	 * @return
	 */
	public UserAgentInfo parse(String useragent) throws IOException {
		UserAgentInfo retObj = new UserAgentInfo();

		if (useragent == null) {
			return retObj;
		}
		useragent = useragent.trim();

		// check that the data maps are up-to-date
		checkDataMaps();

		// first check if it's a robot
		if (!processRobot(useragent, retObj)) {
			// search for a browser on the browser regex patterns
			boolean osFound = processBrowserRegex(useragent, retObj);

			if (!osFound) {
				// search the OS regex patterns for the used OS
				processOsRegex(useragent, retObj);
			}
		}
		return retObj;
	}

	/**
	 * Searches in the os regex table. if found a match copies the os data
	 * 
	 * @param useragent
	 * @param retObj
	 */
	private void processOsRegex(String useragent, UserAgentInfo retObj) {
		for (Map.Entry entry : osRegMap.entrySet()) {
			Pattern pattern = Pattern.compile(entry.getKey(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
			Matcher matcher = pattern.matcher(useragent);
			if (matcher.find()) {
				// simply copy the OS data into the result object
				Long idOs = entry.getValue();
				OsEntry os = osMap.get(idOs);
				if (os != null) {
					os.copyTo(retObj);
				}
				break;
			}
		}
	}

	/**
	 * Searchs in the browser regex table. if found a match copies the browser data and if possible os data
	 * 
	 * @param useragent
	 * @param retObj
	 * @return
	 */
	private boolean processBrowserRegex(String useragent, UserAgentInfo retObj) {
		boolean osFound = false;
		for (Map.Entry entry : browserRegMap.entrySet()) {
			Pattern pattern = Pattern.compile(entry.getKey(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
			Matcher matcher = pattern.matcher(useragent);
			if (matcher.find()) {
				// if a browse was found...
				Long idBrowser = entry.getValue();
				// ... but the browser type from browser type map into the typ
				copyType(retObj, idBrowser);
				// get all the browser data from the browser map
				BrowserEntry be = browserMap.get(idBrowser);
				if (be != null) {
					// first try to get the browser version from the first subgroup of the regex
					String browserVersionInfo = null;
					if (matcher.groupCount() > 0) {
						browserVersionInfo = matcher.group(1);
					}
					// copy the browser data into the result
					be.copyTo(retObj, browserVersionInfo);
				}
				// check if this browser has exactly one OS mapped
				Long idOs = browserOsMap.get(idBrowser);
				if (idOs != null) {
					osFound = true;
					OsEntry os = osMap.get(idOs);
					if (os != null) {
						os.copyTo(retObj);
					}
				}
				break;
			}
		}
		return osFound;
	}

	/**
	 * Sets the source type, if possible
	 * 
	 * @param retObj
	 * @param idBrowser
	 */
	private void copyType(UserAgentInfo retObj, Long idBrowser) {
		BrowserEntry be = browserMap.get(idBrowser);
		if (be != null) {
			Long type = be.getType();
			if (type != null) {
				String typeString = browserTypeMap.get(type);
				if (typeString != null) {
					retObj.setTyp(typeString);
				}
			}
		}
	}

	/**
	 * Checks if the useragent comes from a robot. if yes copies all the data to the result object
	 * 
	 * @param useragent
	 * @param retObj
	 * @return true if the useragent belongs to a robot, else false
	 */
	private boolean processRobot(String useragent, UserAgentInfo retObj) {
		if (robotsMap.containsKey(useragent)) {
			retObj.setTyp("Robot");
			RobotEntry robotEntry = robotsMap.get(useragent);
			robotEntry.copyTo(retObj);
			if (robotEntry.getOsId() != null) {
				OsEntry os = osMap.get(robotEntry.getOsId());
				if (os != null) {
					os.copyTo(retObj);
				}
			}
			return true;
		}
		return false;
	}

	/**
	 * loads the data file and creates all internal data structs
	 * 
	 * @param definitionFile
	 * @throws IOException
	 */
	protected void loadDataFromFile(File definitionFile) throws IOException {
		PHPFileParser fp = new PHPFileParser(definitionFile);
		createInternalDataStructre(fp.getSections());
	}

	/**
	 * loads the data file and creates all internal data structs
	 * 
	 * @param is
	 * @throws IOException
	 */
	protected void loadDataFromFile(InputStream is) throws IOException {
		PHPFileParser fp = new PHPFileParser(is);
		createInternalDataStructre(fp.getSections());
	}

	/**
	 * Creates the internal data structes from the seciontList
	 * 
	 * @param sectionList
	 */
	protected void createInternalDataStructre(List
sectionList) { for (Section sec : sectionList) { if ("robots".equals(sec.getName())) { Map robotsMapTmp = new HashMap(); for (Entry en : sec.getEntries()) { RobotEntry re = new RobotEntry(en.getData()); robotsMapTmp.put(re.getUserAgentString(), re); } robotsMap = robotsMapTmp; } else if ("os".equals(sec.getName())) { Map osMapTmp = new HashMap(); for (Entry en : sec.getEntries()) { OsEntry oe = new OsEntry(en.getData()); osMapTmp.put(Long.parseLong(en.getKey()), oe); } osMap = osMapTmp; } else if ("browser".equals(sec.getName())) { Map browserMapTmp = new HashMap(); for (Entry en : sec.getEntries()) { BrowserEntry be = new BrowserEntry(en.getData()); browserMapTmp.put(Long.parseLong(en.getKey()), be); } browserMap = browserMapTmp; } else if ("browser_type".equals(sec.getName())) { Map browserTypeMapTmp = new HashMap(); for (Entry en : sec.getEntries()) { browserTypeMapTmp.put(Long.parseLong(en.getKey()), en.getData().iterator().next()); } browserTypeMap = browserTypeMapTmp; } else if ("browser_reg".equals(sec.getName())) { Map browserRegMapTmp = new LinkedHashMap(); for (Entry en : sec.getEntries()) { Iterator it = en.getData().iterator(); browserRegMapTmp.put(convertPerlToJavaRegex(it.next()), Long.parseLong(it.next())); } browserRegMap = browserRegMapTmp; } else if ("browser_os".equals(sec.getName())) { Map browserOsMapTmp = new HashMap(); for (Entry en : sec.getEntries()) { browserOsMapTmp.put(Long.parseLong(en.getKey()), Long.parseLong(en.getData().iterator().next())); } browserOsMap = browserOsMapTmp; } else if ("os_reg".equals(sec.getName())) { Map osRegMapTmp = new LinkedHashMap(); for (Entry en : sec.getEntries()) { Iterator it = en.getData().iterator(); osRegMapTmp.put(convertPerlToJavaRegex(it.next()), Long.parseLong(it.next())); } osRegMap = osRegMapTmp; } } } /** * Converts a PERL style regex into the Java style. That means in removes the leading and the last / and removes the modifiers * * @param regex * @return */ private String convertPerlToJavaRegex(String regex) { regex = regex.substring(1); int lastIndex = regex.lastIndexOf('/'); regex = regex.substring(0, lastIndex); return regex; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy