com.moviejukebox.plugin.ImdbInfo Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of yamj Show documentation
Show all versions of yamj Show documentation
Static analysis of MovieJukebox project
/*
* Copyright (c) 2004-2012 YAMJ Members
* http://code.google.com/p/moviejukebox/people/list
*
* Web: http://code.google.com/p/moviejukebox/
*
* This software is licensed under a Creative Commons License
* See this page: http://code.google.com/p/moviejukebox/wiki/License
*
* For any reuse or distribution, you must make clear to others the
* license terms of this work.
*/
package com.moviejukebox.plugin;
import com.moviejukebox.imdbapi.ImdbApi;
import com.moviejukebox.imdbapi.model.ImdbMovieDetails;
import com.moviejukebox.imdbapi.search.SearchObject;
import com.moviejukebox.model.ImdbSiteDataDefinition;
import com.moviejukebox.model.Movie;
import com.moviejukebox.tools.HTMLTools;
import com.moviejukebox.tools.PropertiesUtil;
import com.moviejukebox.tools.StringTools;
import com.moviejukebox.tools.WebBrowser;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
public class ImdbInfo {
private static final Logger logger = Logger.getLogger(ImdbInfo.class);
private static final String logMessage = "ImdbInfo: ";
private static final String DEFAULT_SITE = "us";
private static final String OBJECT_MOVIE = "movie";
private static final String OBJECT_PERSON = "person";
private static final Map MATCHES_DATA_PER_SITE = new HashMap();
private final String imdbSite = PropertiesUtil.getProperty("imdb.site", DEFAULT_SITE);
private String preferredSearchEngine;
private WebBrowser webBrowser;
private String objectType = OBJECT_MOVIE;
private ImdbSiteDataDefinition siteDef;
private static final String[] SEARCH_ORDER = {"Titles (Exact Matches)", "Popular Titles", "Titles (Partial Matches)", "Titles (Approx Matches)"};
static {
MATCHES_DATA_PER_SITE.put("us", new ImdbSiteDataDefinition("http://www.imdb.com/", "ISO-8859-1", "Director|Directed by", "Cast", "Release Date", "Runtime", "Country",
"Company", "Genre", "Quotes", "Plot", "Rated", "Certification", "Original Air Date", "Writer|Writing credits", "Taglines"));
MATCHES_DATA_PER_SITE.put("fr", new ImdbSiteDataDefinition("http://www.imdb.fr/", "ISO-8859-1", "Réalisateur|Réalisé par", "Ensemble", "Date de sortie", "Durée", "Pays",
"Société", "Genre", "Citation", "Intrigue", "Rated", "Classification", "Date de sortie", "Scénaristes|Scénaristes", "Taglines"));
MATCHES_DATA_PER_SITE.put("es", new ImdbSiteDataDefinition("http://www.imdb.es/", "ISO-8859-1", "Director|Dirigida por", "Reparto", "Fecha de Estreno", "Duración", "País",
"Compañía", "Género", "Quotes", "Trama", "Rated", "Clasificación", "Fecha de Estreno", "Escritores|Créditos del guión", "Taglines"));
MATCHES_DATA_PER_SITE.put("de", new ImdbSiteDataDefinition("http://www.imdb.de/", "ISO-8859-1", "Regisseur|Regie", "Besetzung", "Premierendatum", "Länge", "Land",
"Firma", "Genre", "Quotes", "Handlung", "Rated", "Altersfreigabe", "Premierendatum", "Guionista|Buch", "Taglines"));
MATCHES_DATA_PER_SITE.put("it", new ImdbSiteDataDefinition("http://www.imdb.it/", "ISO-8859-1", "Regista|Registi|Regia di", "Cast", "Data di uscita", "Durata",
"Nazionalità", "Compagnia", "Genere", "Quotes", "Trama", "Rated", "Certification", "Data di uscita", "Sceneggiatore|Scritto da", "Taglines"));
MATCHES_DATA_PER_SITE.put("pt", new ImdbSiteDataDefinition("http://www.imdb.pt/", "ISO-8859-1", "Diretor|Dirigido por", "Elenco", "Data de Lançamento", "Duração",
"País", "Companhia", "Gênero", "Quotes", "Argumento", "Rated", "Certificação", "Data de Lançamento",
"Roteirista|Créditos como roteirista", "Taglines"));
// Use this as a workaround for English speakers abroad who get localised versions of imdb.com
MATCHES_DATA_PER_SITE.put("labs", new ImdbSiteDataDefinition("http://akas.imdb.com/", "ISO-8859-1", "Director|Directors|Directed by", "Cast", "Release Date", "Runtime", "Country",
"Production Co", "Genres", "Quotes", "Storyline", "Rated", "Certification", "Original Air Date", "Writer|Writers|Writing credits", "Taglines"));
// TODO: Leaving this as labs.imdb.com for the time being, but will be updated to www.imdb.com
MATCHES_DATA_PER_SITE.put("us2", new ImdbSiteDataDefinition("http://labs.imdb.com/", "ISO-8859-1", "Director|Directors|Directed by", "Cast", "Release Date", "Runtime", "Country",
"Production Co", "Genres", "Quotes", "Storyline", "Rated", "Certification", "Original Air Date", "Writer|Writers|Writing credits", "Taglines"));
// Not 100% sure these are correct
MATCHES_DATA_PER_SITE.put("it2", new ImdbSiteDataDefinition("http://www.imdb.it/", "ISO-8859-1", "Regista|Registi|Regia di", "Attori", "Data di uscita", "Durata",
"Nazionalità", "Compagnia", "Genere", "Quotes", "Trama", "Rated", "Certification", "Data di uscita", "Sceneggiatore|Scritto da", "Taglines"));
}
public void setPreferredSearchEngine(String preferredSearchEngine) {
this.preferredSearchEngine = preferredSearchEngine;
}
public ImdbInfo() {
webBrowser = new WebBrowser();
preferredSearchEngine = PropertiesUtil.getProperty("imdb.id.search", "imdb");
siteDef = MATCHES_DATA_PER_SITE.get(imdbSite);
if (siteDef == null) {
logger.warn(logMessage + "No site definition for " + imdbSite + " using the default instead " + DEFAULT_SITE);
siteDef = MATCHES_DATA_PER_SITE.get(DEFAULT_SITE);
}
}
/**
* Retrieve the IMDb matching the specified movie name and year. This
* routine is based on a IMDb request.
*/
public String getImdbId(String movieName, String year) {
objectType = OBJECT_MOVIE;
if ("google".equalsIgnoreCase(preferredSearchEngine)) {
return getImdbIdFromGoogle(movieName, year);
} else if ("yahoo".equalsIgnoreCase(preferredSearchEngine)) {
return getImdbIdFromYahoo(movieName, year);
} else if ("none".equalsIgnoreCase(preferredSearchEngine)) {
return Movie.UNKNOWN;
} else {
return getImdbIdFromImdbApi(movieName, year);
}
}
/**
* Get the IMDb ID for a person. Note: The job is not used in this search.
*
* @param movieName
* @param job
* @return
*/
public String getImdbPersonId(String personName, String movieId) {
try {
if (StringTools.isValidString(movieId)) {
StringBuilder sb = new StringBuilder(siteDef.getSite());
sb.append("search/name?name=");
sb.append(URLEncoder.encode(personName, siteDef.getCharset().displayName())).append("&role=").append(movieId);
logger.debug(logMessage + "Querying IMDB for " + sb.toString());
String xml = webBrowser.request(sb.toString());
// Check if this is an exact match (we got a person page instead of a results list)
Pattern titleregex = Pattern.compile(Pattern.quote("", ""), " -1) {
StringTokenizer st = new StringTokenizer(xml.substring(beginIndex + 7), "/\"");
imdbId = st.nextToken();
}
if (imdbId.startsWith(objectType.equals(OBJECT_MOVIE) ? "tt" : "nm")) {
logger.debug("Found IMDb ID: " + imdbId);
return imdbId;
} else {
return Movie.UNKNOWN;
}
} catch (Exception error) {
logger.error(logMessage + "Failed retreiving IMDb Id for movie : " + movieName);
logger.error(logMessage + "Error : " + error.getMessage());
return Movie.UNKNOWN;
}
}
/**
* Retrieve the IMDb matching the specified movie name and year. This
* routine is base on a IMDb request.
*/
private String getImdbIdFromImdb(String movieName, String year) {
/*
* IMDb matches seem to come in several "flavours".
*
* Firstly, if there is one exact match it returns the matching IMDb page.
*
* If that fails to produce a unique hit then a list of possible matches are returned categorised as:
* Popular Titles (Displaying ? Results)
* Titles (Exact Matches) (Displaying ? Results)
* Titles (Partial Matches) (Displaying ? Results)
*
* We should check the Exact match section first, then the poplar titles and finally the partial matches.
*
* Note: That even with exact matches there can be more than 1 hit, for example "Star Trek"
*/
// logger.info(logMessage + "Movie Name: '" + movieName + "' (" + year + ")"); // XXX DEBUG
StringBuilder sb = new StringBuilder(siteDef.getSite());
sb.append("find?q=");
try {
sb.append(URLEncoder.encode(movieName, siteDef.getCharset().displayName()));
} catch (UnsupportedEncodingException ex) {
// Failed to encode the movie name for some reason!
logger.debug(logMessage + "Failed to encode movie name: " + movieName);
sb.append(movieName);
}
if (StringTools.isValidString(year)) {
sb.append("+%28").append(year).append("%29");
}
sb.append(";s=");
sb.append(objectType.equals(OBJECT_MOVIE) ? "tt" : "nm");
sb.append(";site=aka");
logger.debug(logMessage + "Querying IMDB for " + sb.toString());
String xml;
try {
xml = webBrowser.request(sb.toString());
} catch (IOException ex) {
logger.error(logMessage + "Failed retreiving IMDb Id for movie : " + movieName);
logger.error(logMessage + "Error : " + ex.getMessage());
return Movie.UNKNOWN;
}
// Check if this is an exact match (we got a movie page instead of a results list)
Pattern titleregex = Pattern.compile(Pattern.quote("", "\"."), "", "").toLowerCase();
String formattedMovieName;
if (StringTools.isValidString(otherMovieName)) {
if (StringTools.isValidString(year) && otherMovieName.endsWith(")") && otherMovieName.contains("(")) {
otherMovieName = otherMovieName.substring(0, otherMovieName.lastIndexOf('(') - 1);
formattedMovieName = otherMovieName + " (" + year + ")";
} else {
formattedMovieName = otherMovieName + "";
}
} else {
sb = new StringBuilder();
try {
sb.append(URLEncoder.encode(movieName, siteDef.getCharset().displayName()).replace("+", " "));
} catch (UnsupportedEncodingException ex) {
logger.debug(logMessage + "Failed to encode movie name: " + movieName);
sb.append(movieName);
}
sb.append("");
if (StringTools.isValidString(year)) {
sb.append(" (").append(year).append(")");
}
otherMovieName = sb.toString();
formattedMovieName = otherMovieName;
}
// logger.debug(logMessage + "Title search: '" + formattedMovieName + "'"); // XXX DEBUG
for (String searchResult : HTMLTools.extractTags(xml, "