org.dspace.app.util.GoogleMetadata Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dspace-api Show documentation
Show all versions of dspace-api Show documentation
DSpace core data model and service APIs.
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.util;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ListMultimap;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.authorize.factory.AuthorizeServiceFactory;
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
import org.dspace.content.Item;
import org.dspace.content.MetadataField;
import org.dspace.content.MetadataSchema;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.jdom.Element;
/**
* Configuration and mapping for Google Scholar output metadata
*
* @author Sands Fish
*/
@SuppressWarnings("deprecation")
public class GoogleMetadata {
private final static Logger log = LogManager.getLogger(GoogleMetadata.class);
protected static final String GOOGLE_PREFIX = "google.";
protected ItemService itemService;
protected Item item;
protected String itemURL;
// Configuration keys and fields
protected static Map googleScholarSettings = new HashMap<>();
// Google field names (e.g. citation_fieldname) and formatted metadata
// values
protected ListMultimap metadataMappings = ArrayListMultimap.create();
protected final String TITLE = "citation_title";
protected final String JOURNAL_TITLE = "citation_journal_title";
protected final String PUBLISHER = "citation_publisher";
protected final String AUTHORS = "citation_author";
protected final String DATE = "citation_date";
protected final String VOLUME = "citation_volume";
protected final String ISSUE = "citation_issue";
protected final String FIRSTPAGE = "citation_firstpage";
protected final String LASTPAGE = "citation_lastpage";
protected final String DOI = "citation_doi";
protected final String PMID = "citation_pmid";
protected final String ABSTRACT = "citation_abstract_html_url";
protected final String FULLTEXT = "citation_fulltext_html_url";
protected final String PDF = "citation_pdf_url";
protected final String ISSN = "citation_issn";
protected final String ISBN = "citation_isbn";
protected final String LANGUAGE = "citation_language";
protected final String KEYWORDS = "citation_keywords";
protected final String CONFERENCE = "citation_conference";
protected final String DISSERTATION_ID = "identifiers.dissertation";
protected final String DISSERTATION_NAME = "citation_dissertation_name";
protected final String DISSERTATION_INSTITUTION = "citation_dissertation_institution";
protected final String PATENT_ID = "identifiers.patent";
protected final String PATENT_NUMBER = "citation_patent_number";
protected final String PATENT_COUNTRY = "citation_patent_country";
protected final String TECH_REPORT_ID = "identifiers.technical_report";
protected final String TECH_REPORT_NUMBER = "citation_technical_report_number";
protected final String TECH_REPORT_INSTITUTION = "citation_technical_report_institution";
protected final int SINGLE = 0;
protected final int MULTI = 1;
protected final int ALL_FIELDS_IN_OPTION = 2;
private static GoogleBitstreamComparator googleBitstreamComparator = null;
private final ConfigurationService configurationService
= DSpaceServicesFactory.getInstance().getConfigurationService();
/**
* Load configured fields from google-metadata.properties.
*/
private void loadGoogleScholarSettings()
throws MalformedURLException, IOException {
String googleConfigFile = configurationService
.getProperty("google-metadata.config");
log.info("Using [{}] for Google Metadata configuration", googleConfigFile);
File loadedFile = new File(googleConfigFile);
URL url;
try {
url = loadedFile.toURI().toURL();
} catch (MalformedURLException mux) {
log.error("Can't find Google Metadata configuration file: {}",
googleConfigFile, mux);
throw mux;
}
Properties properties = new Properties();
InputStream is;
try {
is = url.openStream();
properties.load(is);
} catch (IOException iox) {
log.error("Could not read Google Metadata configuration file: {}",
googleConfigFile, iox);
throw iox;
}
Enumeration propertyNames = properties.propertyNames();
while (propertyNames.hasMoreElements()) {
String key = ((String) propertyNames.nextElement()).trim();
if (key.startsWith(GOOGLE_PREFIX)) {
String name = key.substring(GOOGLE_PREFIX.length());
String field = properties.getProperty(key);
if (null != name && !name.equals("") && null != field
&& !field.equals("")) {
googleScholarSettings.put(name.trim(), field.trim());
}
}
}
logConfiguration();
}
/**
* Dump Metadata field mapping to log
*/
public static void logConfiguration() {
if (!log.isDebugEnabled()) {
return;
}
log.debug("Google Metadata Configuration Mapping:");
for (String name : googleScholarSettings.keySet()) {
log.debug(" {} => {}", name, googleScholarSettings.get(name));
}
}
/**
* Wrap the item, parse all configured fields and generate metadata field
* values.
*
* @param context context
* @param item The item being viewed to extract metadata from
* @throws SQLException if database error.
* @throws java.io.IOException passed through.
*/
public GoogleMetadata(Context context, Item item)
throws SQLException, IOException {
if (googleScholarSettings.isEmpty()) {
loadGoogleScholarSettings();
}
// Hold onto the item in case we need to refresh a stale parse
this.item = item;
this.itemService = ContentServiceFactory.getInstance().getItemService();
itemURL = HandleServiceFactory.getInstance().getHandleService().resolveToURL(context, item.getHandle());
googleBitstreamComparator = new GoogleBitstreamComparator(context, googleScholarSettings);
parseItem();
}
/**
* Add a single metadata value to the Google field, defaulting to the
* first-encountered instance of the field for this Item.
*
* @param fieldName metadata field name
* @return successful?
*/
protected boolean addSingleField(String fieldName) {
String config = googleScholarSettings.get(fieldName);
if (null == config || config.equals("")) {
return false;
}
if (log.isDebugEnabled()) {
log.debug("Processing " + fieldName);
}
if (config.equals("$handle")) {
if (null != itemURL && !itemURL.equals("")) {
metadataMappings.put(fieldName, itemURL);
return true;
} else {
return false;
}
}
if (config.equals("$simple-pdf")) {
String pdf_url = getPDFSimpleUrl(item);
if (pdf_url.length() > 0) {
metadataMappings.put(fieldName, pdf_url);
return true;
} else {
return false;
}
}
MetadataValue v = resolveMetadataField(config);
if (null != v && (null != v.getValue()) && !v.getValue().trim().equals("")) {
metadataMappings.put(fieldName, v.getValue());
return true;
} else {
// No values found
return false;
}
}
/**
* A singular version of resolveMetadata to return only one field value
* instead of an aggregate.
*
* @param configFilter list of DC metadata fields separated by "|" characters
* @return The first configured match of metadata field for the item.
*/
protected MetadataValue resolveMetadataField(String configFilter) {
ArrayList fields = resolveMetadata(configFilter, SINGLE);
if (null != fields && fields.size() > 0) {
return fields.get(0);
}
return null;
}
/**
* A plural version of resolveMetadata for aggregate fields.
*
* @param configFilter list of DC metadata fields separated by "|" characters
* @return Aggregate of all matching metadata fields configured in the first
* option field-set to return any number of filter matches.
*/
protected ArrayList resolveMetadataFields(String configFilter) {
ArrayList fields = resolveMetadata(configFilter, MULTI);
if (null != fields && fields.size() > 0) {
return fields;
}
return null;
}
/**
* Aggregate an array of DCValues present on the current item that pass the
* configuration filter.
*
* @param configFilter list of DC metadata fields separated by "|" characters
* @param returnType GoogleMetadata.SINGLE / GoogleMetadata.MULTI / GoogleMetadata.ALL_FIELDS_IN_OPTION
* @return Array of configuration to item-field matches
*/
protected ArrayList resolveMetadata(String configFilter,
int returnType) {
if (null == configFilter || configFilter.trim().equals("")
|| !configFilter.contains(".")) {
log.error("The configuration string [" + configFilter
+ "] is invalid.");
return null;
} else {
configFilter = configFilter.trim();
}
ArrayList> parsedOptions = parseOptions(configFilter);
if (log.isDebugEnabled()) {
log.debug("Resolved Fields For This Item Per Configuration Filter:");
for (int i = 0; i < parsedOptions.size(); i++) {
ArrayList optionFields = parsedOptions.get(i);
log.debug("Option " + (i + 1) + ":");
for (String f : optionFields) {
log.debug("{" + f + "}");
}
}
}
// Iterate through each configured option's field-set until
// we have a match.
for (ArrayList optionFields : parsedOptions) {
int optionMatches = 0;
String[] components;
List values;
ArrayList resolvedFields = new ArrayList<>();
for (String field : optionFields) {
components = parseComponents(field);
values = itemService.getMetadata(item, components[0], components[1],
components[2], Item.ANY);
if (values.size() > 0) {
for (MetadataValue v : values) {
resolvedFields.add(v);
if (returnType == SINGLE) {
if (!resolvedFields.isEmpty()) {
if (log.isDebugEnabled()) {
log
.debug("Resolved Field Value For This Item:");
for (MetadataValue r : resolvedFields) {
log.debug("{" + r.getValue() + "}");
}
}
return resolvedFields;
}
}
}
}
}
// If the item had any of the fields contained in this option,
// return them, otherwise move on to the next option's field-set.
if (!resolvedFields.isEmpty()) {
if (log.isDebugEnabled()) {
log.debug("Resolved Field Values For This Item:");
for (MetadataValue v : resolvedFields) {
log.debug("{" + v.getValue() + "}");
}
}
// Check to see if this is a full option match
if (ALL_FIELDS_IN_OPTION == returnType) {
if (resolvedFields.size() == optionMatches) {
return resolvedFields;
}
// Otherwise, if there are any matches for the option,
// return them.
} else if (MULTI == returnType) {
return resolvedFields;
}
}
}
return null;
}
/**
* Parse first-match path of metadata field-group options for the given
* configuration.
*
* @param configFilter list of DC metadata fields separated by "|" characters
* @return array of parsed options or null
*/
protected ArrayList> parseOptions(String configFilter) {
ArrayList options = new ArrayList<>();
ArrayList> parsedOptions = new ArrayList<>();
if (null == configFilter || configFilter.equals("")) {
return null;
}
if (configFilter.contains("|")) {
String[] configOptions = configFilter.split("\\|");
for (String option : configOptions) {
options.add(option.trim());
}
} else {
options = new ArrayList<>();
options.add(configFilter);
}
// Parse first-match path options. The first option (field-set)
// to match fields present in the item is used.
ArrayList parsedFields;
// Parse the fields for each field-set in order.
for (String option : options) {
ArrayList fields;
parsedFields = new ArrayList<>();
if (option.contains(",")) {
fields = parseFields(option);
} else {
fields = new ArrayList<>();
fields.add(option);
}
// Parse field list for this field-set, expanding any wildcards.
for (String field : fields) {
if (field.contains("*")) {
ArrayList wc = parseWildcard(field);
for (String wcField : wc) {
if (!parsedFields.contains(wcField)) {
parsedFields.add(wcField);
}
}
} else {
if (!parsedFields.contains(field)) {
parsedFields.add(field);
}
}
}
parsedOptions.add(parsedFields);
}
if (null != parsedOptions) {
return parsedOptions;
} else {
return null;
}
}
/**
* Build a Vector of fields that can be added to when expanding wildcards.
*
* @param configString - Value of one metadata field configuration
* @return A vector of raw field configurations.
*/
protected ArrayList parseFields(String configString) {
ArrayList fields = new ArrayList<>();
for (String field : configString.split("\\,")) {
fields.add(field.trim());
}
return fields;
}
/**
* Pull apart an individual field structure.
*
* @param field The configured field for one metadata field map
* @return Schema, Element, Qualifier of metadata field
*/
protected String[] parseComponents(String field) {
int index = 0;
String[] components = new String[3];
for (String c : field.split("\\.")) {
components[index] = c.trim();
index++;
}
return components;
}
/**
* Expand any wildcard characters to an array of all matching fields for
* this item. No order consistency is implied.
*
* @param field The field identifier containing a wildcard character.
* @return Expanded field list.
*/
protected ArrayList parseWildcard(String field) {
if (!field.contains("*")) {
return null;
} else {
String[] components = parseComponents(field);
for (int i = 0; i < components.length; i++) {
if (components[i].trim().equals("*")) {
components[i] = Item.ANY;
}
}
List allMD = itemService.getMetadata(item, components[0], components[1],
components[2], Item.ANY);
ArrayList expandedDC = new ArrayList<>();
for (MetadataValue v : allMD) {
// De-dup multiple occurrences of field names in item
if (!expandedDC.contains(buildFieldName(v))) {
expandedDC.add(buildFieldName(v));
}
}
if (log.isDebugEnabled()) {
log.debug("Field Names From Expanded Wildcard \"" + field
+ "\"");
for (String v : expandedDC) {
log.debug(" " + v);
}
}
return expandedDC;
}
}
/**
* Construct metadata field name out of Metadatum components
*
* @param v The Metadatum to construct a name for.
* @return The complete metadata field name.
*/
protected String buildFieldName(MetadataValue v) {
StringBuilder name = new StringBuilder();
MetadataField metadataField = v.getMetadataField();
MetadataSchema metadataSchema = v.getMetadataField().getMetadataSchema();
name.append(metadataSchema.getName()).append(".").append(metadataField.getElement());
if (null != metadataField.getQualifier()) {
name.append(".").append(metadataField.getQualifier());
}
return name.toString();
}
/**
* Using metadata field mappings contained in the loaded configuration,
* parse through configured metadata fields, building valid Google metadata
* value strings. Field names and values contained in metadataMappings.
*/
protected void parseItem() {
// TITLE
addSingleField(TITLE);
// AUTHORS (multi)
addMultipleValues(AUTHORS);
// DATE
addSingleField(DATE);
// ISSN
addSingleField(ISSN);
// ISBN
addSingleField(ISBN);
// JOURNAL_TITLE
addSingleField(JOURNAL_TITLE);
// VOLUME
addSingleField(VOLUME);
// ISSUE
addSingleField(ISSUE);
// FIRSTPAGE
addSingleField(FIRSTPAGE);
// LASTPAGE
addSingleField(LASTPAGE);
// DOI
addSingleField(DOI);
// PMID
addSingleField(PMID);
// ABSTRACT_HTML_URL ('$handle' variable substitution if present)
addSingleField(ABSTRACT);
// FULLTEXT_HTML_URL ('$handle' variable substitution if present)
addSingleField(FULLTEXT);
// PDF_URL ('$handle' variable substitution if present)
addSingleField(PDF);
// LANGUAGE
addSingleField(LANGUAGE);
// KEYWORDS (multi)
addAggregateValues(KEYWORDS, ";");
// CONFERENCE
addSingleField(CONFERENCE);
// Dissertations
if (itemIsDissertation()) {
if (log.isDebugEnabled()) {
log.debug("ITEM TYPE: DISSERTATION");
}
addSingleField(DISSERTATION_NAME);
addSingleField(DISSERTATION_INSTITUTION);
}
// Patents
if (itemIsPatent()) {
if (log.isDebugEnabled()) {
log.debug("ITEM TYPE: PATENT");
}
addSingleField(PATENT_NUMBER);
// Use config value for patent country. Should be a literal.
String countryConfig = googleScholarSettings.get(PATENT_COUNTRY);
if (null != countryConfig && !countryConfig.trim().equals("")) {
metadataMappings.put(PATENT_COUNTRY, countryConfig.trim());
}
addSingleField(PUBLISHER);
}
// Tech Reports
if (itemIsTechReport()) {
if (log.isDebugEnabled()) {
log.debug("ITEM TYPE: TECH REPORT");
}
addSingleField(TECH_REPORT_NUMBER);
addSingleField(TECH_REPORT_INSTITUTION);
}
if (!itemIsDissertation() && !itemIsTechReport()) {
// PUBLISHER
addSingleField(PUBLISHER);
}
}
/**
* Fetch retaining the order of the values for any given key in which they
* where added (like authors).
*
* Usage: {@code GoogleMetadata gmd = new GoogleMetadata(item); for(Entry mapping : googlemd.getMappings()) ...}
*
* @return Iterable of metadata fields mapped to Google-formatted values
*/
public Collection> getMappings() {
return metadataMappings.entries();
}
/**
* Produce meta elements that can easily be put into the head.
*
* @return List of elements
*/
public List disseminateList() {
List metas = new ArrayList<>();
for (Entry m : getMappings()) {
Element e = new Element("meta");
e.setNamespace(null);
e.setAttribute("name", m.getKey());
e.setAttribute("content", m.getValue());
metas.add(e);
}
return metas;
}
// Getters for individual metadata fields...
/**
* @return the citation_title
*/
public List getTitle() {
return metadataMappings.get(TITLE);
}
/**
* @return the citation_journal_title
*/
public List getJournalTitle() {
return metadataMappings.get(JOURNAL_TITLE);
}
/**
* @return the citation_publisher
*/
public List getPublisher() {
return metadataMappings.get(PUBLISHER);
}
/**
* @return the citation_authors
*/
public List getAuthors() {
return metadataMappings.get(AUTHORS);
}
/**
* @return the citation_date
*/
public List getDate() {
return metadataMappings.get(DATE);
}
/**
* @return the citation_volume
*/
public List getVolume() {
return metadataMappings.get(VOLUME);
}
/**
* @return the citation_issue
*/
public List getIssue() {
return metadataMappings.get(ISSUE);
}
/**
* @return the citation_firstpage
*/
public List getFirstpage() {
return metadataMappings.get(FIRSTPAGE);
}
/**
* @return the citation_lastpage
*/
public List getLastpage() {
return metadataMappings.get(LASTPAGE);
}
/**
* @return the citation_doi
*/
public List getDOI() {
return metadataMappings.get(DOI);
}
/**
* @return the citation_pmid
*/
public List getPmid() {
return metadataMappings.get(PMID);
}
/**
* @return the citation_abstract_html_url
*/
public List getAbstractHTMLURL() {
return metadataMappings.get(ABSTRACT);
}
/**
* @return the citation_fulltext_html_url
*/
public List getFulltextHTMLURL() {
return metadataMappings.get(FULLTEXT);
}
/**
* @return the citation_pdf_url
*/
public List getPDFURL() {
return metadataMappings.get(PDF);
}
/**
* @return the citation_issn
*/
public List getISSN() {
return metadataMappings.get(ISSN);
}
/**
* @return the citation_isbn
*/
public List getISBN() {
return metadataMappings.get(ISBN);
}
/**
* @return the citation_language
*/
public List getLanguage() {
return metadataMappings.get(LANGUAGE);
}
/**
* @return the citation_keywords
*/
public List getKeywords() {
return metadataMappings.get(KEYWORDS);
}
/**
* @return the citation_conference
*/
public List getConference() {
return metadataMappings.get(CONFERENCE);
}
/**
* @return the citation_dissertation_name
*/
public List getDissertationName() {
return metadataMappings.get(DISSERTATION_NAME);
}
/**
* @return the citation_dissertation_institution
*/
public List getDissertationInstitution() {
return metadataMappings.get(DISSERTATION_INSTITUTION);
}
/**
* @return the citation_patent_number
*/
public List getPatentNumber() {
return metadataMappings.get(PATENT_NUMBER);
}
/**
* @return the citation_patent_country
*/
public List getPatentCountry() {
return metadataMappings.get(PATENT_COUNTRY);
}
/**
* @return the citation_technical_report_number
*/
public List getTechnicalReportNumber() {
return metadataMappings.get(TECH_REPORT_NUMBER);
}
/**
* @return the citation_technical_report_institution
*/
public List getTechnicalReportInstitution() {
return metadataMappings.get(TECH_REPORT_INSTITUTION);
}
/**
* Gets the URL to a PDF using a very basic strategy by assuming that the PDF
* is in the default content bundle, and that the item only has one public bitstream
* and it is a PDF.
*
* @param item item to get PDF URL from
* @return URL that the PDF can be directly downloaded from
*/
protected String getPDFSimpleUrl(Item item) {
try {
Bitstream bitstream = findLinkableFulltext(item);
if (bitstream != null) {
StringBuilder path = new StringBuilder();
path.append(configurationService.getProperty("dspace.ui.url"));
if (item.getHandle() != null) {
path.append("/bitstream/");
path.append(item.getHandle());
path.append("/");
path.append(bitstream.getSequenceID());
} else {
path.append("/retrieve/");
path.append(bitstream.getID());
}
path.append("/");
path.append(Util.encodeBitstreamName(bitstream.getName(), Constants.DEFAULT_ENCODING));
return path.toString();
}
} catch (UnsupportedEncodingException ex) {
log.debug(ex.getMessage());
} catch (SQLException ex) {
log.debug(ex.getMessage());
}
return "";
}
/**
* A bitstream is considered linkable fulltext when it is either
*
* - the item's only bitstream (in the ORIGINAL bundle); or
* - the primary bitstream
*
* Additionally, this bitstream must be publicly viewable.
*
* @param item bitstream's parent item
* @return a linkable bitstream or null if none found
* @throws SQLException if database error
*/
protected Bitstream findLinkableFulltext(Item item) throws SQLException {
Bitstream bestSoFar = null;
List contentBundles = itemService.getBundles(item, "ORIGINAL");
for (Bundle bundle : contentBundles) {
List bitstreams = bundle.getBitstreams();
Collections.sort(bitstreams, googleBitstreamComparator);
for (Bitstream candidate : bitstreams) {
if (candidate.equals(bundle.getPrimaryBitstream())) { // is primary -> use this one
if (isPublic(candidate)) {
return candidate;
}
} else {
if (bestSoFar == null && isPublic(
candidate)) { //if bestSoFar is null but the candidate is not public you don't use it and try
// to find another
bestSoFar = candidate;
}
}
}
}
return bestSoFar;
}
/**
* Find out whether bitstream is readable by the public.
*
* @param bitstream the target bitstream
* @return whether bitstream is readable by the Anonymous group
*/
protected boolean isPublic(Bitstream bitstream) {
if (bitstream == null) {
return false;
}
boolean result = false;
Context context = null;
try {
context = new Context();
result = AuthorizeServiceFactory.getInstance().getAuthorizeService()
.authorizeActionBoolean(context, bitstream, Constants.READ, true);
} catch (SQLException e) {
log.error(
"Cannot determine whether bitstream is public, assuming it isn't. bitstream_id=" + bitstream.getID(),
e);
}
return result;
}
/**
* @param field to aggregate all values of in a matching option
* @param delimiter to delimit field values with
*/
protected void addAggregateValues(String field, String delimiter) {
String authorConfig = googleScholarSettings.get(field);
ArrayList fields = resolveMetadataFields(authorConfig);
if (null != fields && !fields.isEmpty()) {
StringBuilder fieldMetadata = new StringBuilder();
int count = 0;
for (MetadataValue metadataValue : fields) {
fieldMetadata.append(metadataValue.getValue());
if (count < fields.size() - 1) {
fieldMetadata.append(delimiter).append(" ");
count++;
}
}
metadataMappings.put(field, fieldMetadata.toString());
}
}
/**
* If metadata field contains multiple values, then add each value to the map separately
*
* @param FIELD metadata field
*/
protected void addMultipleValues(String FIELD) {
String fieldConfig = googleScholarSettings.get(FIELD);
ArrayList fields = resolveMetadataFields(fieldConfig);
if (null != fields && !fields.isEmpty()) {
for (MetadataValue field : fields) {
//TODO if this is author field, first-name first
metadataMappings.put(FIELD, field.getValue());
}
}
}
/**
* Determine, based on config values, if this item is a dissertation.
*
* @return boolean
*/
protected boolean itemIsDissertation() {
String dConfig = googleScholarSettings.get(DISSERTATION_ID);
if (null == dConfig || dConfig.trim().equals("")) {
return false;
} else {
return identifyItemType(dConfig);
}
}
/**
* Determine, based on config values, if this item is a patent.
*
* @return boolean
*/
protected boolean itemIsPatent() {
String dConfig = googleScholarSettings.get(PATENT_ID);
if (null == dConfig || dConfig.trim().equals("")) {
return false;
} else {
return identifyItemType(dConfig);
}
}
/**
* Determine, based on config values, if this item is a tech report.
*
* @return boolean
*/
protected boolean itemIsTechReport() {
String dConfig = googleScholarSettings.get(TECH_REPORT_ID);
if (null == dConfig || dConfig.trim().equals("")) {
return false;
} else {
return identifyItemType(dConfig);
}
}
/**
* Identifies if this item matches a particular configuration of fields and
* values for those fields to identify the type based on a type- cataloging
* metadata practice.
*
* @param dConfig configured fields (from google-metadata.properties)
* @return item matches configuration
*/
protected boolean identifyItemType(String dConfig) {
// FIXME: Shouldn't have to parse identifiers for every identification.
ArrayList> options = parseOptions(dConfig);
HashMap> mdPairs = new HashMap<>();
// Parse field/value pairs from field identifier string
for (ArrayList option : options) {
String pair = option.get(0);
String[] parsedPair = pair.split("\\:");
if (2 == parsedPair.length) {
// If we've encountered this field before, add the value to the
// list
if (mdPairs.containsKey(parsedPair[0].trim())) {
mdPairs.get(parsedPair[0].trim()).add(parsedPair[1]);
if (log.isDebugEnabled()) {
log.debug("Registering Type Identifier: " + parsedPair[0] + " => " + parsedPair[1]);
}
} else {
// Otherwise, add it as the first occurrence of this field
ArrayList newField = new ArrayList<>();
newField.add(parsedPair[1].trim());
mdPairs.put(parsedPair[0].trim(), newField);
if (log.isDebugEnabled()) {
log.debug("Registering Type Identifier: " + parsedPair[0] + " => " + parsedPair[1]);
}
}
} else {
log.error("Malformed field identifier name/value pair");
}
}
// Build config string without values, only field names
StringBuilder sb = new StringBuilder();
for (String value : mdPairs.keySet()) {
sb.append(value).append(" | ");
}
// Check resolved/present metadata fields against configured values
ArrayList presentMD = resolveMetadataFields(sb.toString());
if (null != presentMD && !presentMD.isEmpty()) {
for (MetadataValue v : presentMD) {
String fieldName = buildFieldName(v);
if (mdPairs.containsKey(fieldName)) {
for (String configValue : mdPairs.get(fieldName)) {
if (configValue.equals(v.getValue())) {
return true;
}
}
}
}
}
return false;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy