edu.indiana.lib.twinpeaks.search.singlesearch.musepeer.Response Maven / Gradle / Ivy
/**********************************************************************************
*
* Copyright (c) 2006, 2007, 2008, 2009 The Sakai Foundation
*
* Licensed under the Educational Community License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.opensource.org/licenses/ECL-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**********************************************************************************/
package edu.indiana.lib.twinpeaks.search.singlesearch.musepeer;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.extern.slf4j.Slf4j;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import edu.indiana.lib.osid.base.repository.http.CreatorPartStructure;
import edu.indiana.lib.osid.base.repository.http.DOIPartStructure;
import edu.indiana.lib.osid.base.repository.http.DataSource;
import edu.indiana.lib.osid.base.repository.http.DatePartStructure;
import edu.indiana.lib.osid.base.repository.http.EditionPartStructure;
import edu.indiana.lib.osid.base.repository.http.EndPagePartStructure;
import edu.indiana.lib.osid.base.repository.http.InLineCitationPartStructure;
import edu.indiana.lib.osid.base.repository.http.IsnIdentifierPartStructure;
import edu.indiana.lib.osid.base.repository.http.IssuePartStructure;
import edu.indiana.lib.osid.base.repository.http.LanguagePartStructure;
import edu.indiana.lib.osid.base.repository.http.PagesPartStructure;
import edu.indiana.lib.osid.base.repository.http.PublisherPartStructure;
import edu.indiana.lib.osid.base.repository.http.SourceTitlePartStructure;
import edu.indiana.lib.osid.base.repository.http.StartPagePartStructure;
import edu.indiana.lib.osid.base.repository.http.SubjectPartStructure;
import edu.indiana.lib.osid.base.repository.http.TypePartStructure;
import edu.indiana.lib.osid.base.repository.http.URLPartStructure;
import edu.indiana.lib.osid.base.repository.http.VolumePartStructure;
import edu.indiana.lib.osid.base.repository.http.YearPartStructure;
import edu.indiana.lib.twinpeaks.search.MatchItem;
import edu.indiana.lib.twinpeaks.search.QueryBase;
import edu.indiana.lib.twinpeaks.search.SearchResultBase;
import edu.indiana.lib.twinpeaks.util.DomUtils;
import edu.indiana.lib.twinpeaks.util.SearchException;
import edu.indiana.lib.twinpeaks.util.SessionContext;
import edu.indiana.lib.twinpeaks.util.StatusUtils;
import edu.indiana.lib.twinpeaks.util.StringUtils;
/**
* Parse the Musepeer XML response
*/
@Slf4j
public class Response extends SearchResultBase
{
private SessionContext sessionContext;
/**
* Constructor
*/
public Response() {
super();
}
/**
* Save various attributes of the general search request
*
* @param query
* The QueryBase extension that sent the search request
*/
public void initialize(QueryBase query) {
super.initialize(query);
sessionContext = SessionContext.getInstance(_sessionId);
}
/**
* Parse the search engine response as XML Overrides
* SearchResultBase#parseResponse()
*
* @return Response as a DOM Document
*/
protected Document parseResponse() throws SearchException {
try {
return DomUtils.parseXmlBytes(_searchResponseBytes);
} catch (Exception exception) {
throw new SearchException(exception.toString());
}
}
/**
* Parse the response
*/
public void doParse()
{
Document responseDocument = getSearchResponseDocument();
Element resultElement;
NodeList recordList;
/*
* Examine each RECORD
*/
resultElement = responseDocument.getDocumentElement();
recordList = DomUtils.getElementList(resultElement, "RECORD");
/*********************
if (recordList.getLength() == 0)
{
String errorText = "Unexpected server response (no search result records provided)";
LogUtils.displayXml(log, errorText, responseDocument);
StatusUtils.setGlobalError(sessionContext, errorText);
throw new SearchException(errorText);
}
**********************/
for (int i = 0; i < recordList.getLength(); i++)
{
MatchItem item;
Element dataElement, recordElement;
NodeList nodeList;
String title, description;
String database, hit, target;
String recordId, recordType;
String content;
/*
* Pick up the database name & related information
*/
recordElement = (Element) recordList.item(i);
hit = recordElement.getAttribute("hit");
target = recordElement.getAttribute("sourceID");
database = recordElement.getAttribute("source");
recordId = recordElement.getAttribute("identifier");
/*
* Update hit count
*/
StatusUtils.updateHits(sessionContext, target);
/*
* The information we want resides in the DATA portion of the
* document
*/
if ((dataElement = DomUtils.getElement(recordElement, "DATA")) == null) {
log.error("No DATA element present in server response");
displayXml(recordElement);
throw new SearchException(
"Missing mandatory element in server response");
}
title = getText(dataElement, "TITLE");
if (StringUtils.isNull(title)) {
log.debug("No TITLE text in server response");
title = "";
}
description = getText(dataElement, "DESCRIPTION");
if (StringUtils.isNull(description)) {
log.debug("No DESCRIPTION text in server response");
description = "";
}
/*
* Save select search result data
*/
item = new MatchItem();
/*
* Title, abstract, record ID
*/
log.debug("Adding TITLE: " + title);
item.setDisplayName(title);
item.setDescription(description);
item.setId(recordId);
/*
* Publisher, language
*/
addPartStructure(dataElement, "PUBLICATION", item,
PublisherPartStructure.getPartStructureId());
addPartStructure(dataElement, "LANGUAGE", item,
LanguagePartStructure.getPartStructureId());
/*
* In-line Citation information
*/
if (!addPartStructure(dataElement, "CITATION", item,
InLineCitationPartStructure.getPartStructureId())) {
if (!addPartStructure(dataElement, "SOURCE", item,
InLineCitationPartStructure.getPartStructureId())) {
if (!addPartStructure(dataElement, "DESCRIPTION", item,
InLineCitationPartStructure.getPartStructureId())) {
addPartStructure(dataElement, "TITLE", item,
InLineCitationPartStructure.getPartStructureId());
}
}
}
/*
* Title, volume, issue
*/
if (!addPartStructure(dataElement, "CITATION-JOURNAL-TITLE", item,
SourceTitlePartStructure.getPartStructureId())) {
addPartStructure(dataElement, "SOURCE", item,
SourceTitlePartStructure.getPartStructureId());
}
addPartStructure(dataElement, "CITATION-VOLUME", item,
VolumePartStructure.getPartStructureId());
addPartStructure(dataElement, "CITATION-ISSUE", item,
IssuePartStructure.getPartStructureId());
addPartStructure(dataElement, "CITATION-PART", item,
EditionPartStructure.getPartStructureId());
/*
* Pages
*/
addPartStructure(dataElement, "CITATION-PAGES", item,
PagesPartStructure.getPartStructureId());
addPartStructure(dataElement, "CITATION-START-PAGE", item,
StartPagePartStructure.getPartStructureId());
addPartStructure(dataElement, "CITATION-END-PAGE", item,
EndPagePartStructure.getPartStructureId());
/*
* Date and Year
*/
addPartStructure(dataElement, "CITATION-DATE", item,
DatePartStructure.getPartStructureId());
if (!addPartStructure(dataElement, "CITATION-DATE-YEAR", item,
YearPartStructure.getPartStructureId())) {
addPartStructure(dataElement, "CITATION-DATE", item,
YearPartStructure.getPartStructureId());
}
/*
* Type of publication
*/
if (!addPartStructure(dataElement, "TYPE", item, TypePartStructure
.getPartStructureId())) {
if (!addPartStructure(dataElement, "PUBLICATION-TYPE", item,
TypePartStructure.getPartStructureId())) {
if (getText(dataElement, "CITATION-JOURNAL-TITLE") != null) {
item.addPartStructure(TypePartStructure
.getPartStructureId(), "Journal");
}
}
}
/*
* URL
*/
addPartStructure(dataElement, "URL", item, URLPartStructure
.getPartStructureId());
/*
* Identifiers (ISSN, ISBN, DOI)
*/
addPartStructure(dataElement, "ISBN", item,
IsnIdentifierPartStructure.getPartStructureId());
addPartStructure(dataElement, "ISSN", item,
IsnIdentifierPartStructure.getPartStructureId());
if (!addPartStructure(dataElement, "CITATION-DOI", item,
DOIPartStructure.getPartStructureId())) {
addPartStructure(dataElement, "DOI", item, DOIPartStructure
.getPartStructureId());
}
/*
* Author (add each in turn)
*/
addPartStructureList(dataElement, "AUTHOR", item,
CreatorPartStructure.getPartStructureId());
/*
* Subject (add each)
*/
addPartStructureList(dataElement, "SUBJECT", item,
SubjectPartStructure.getPartStructureId());
doRegexParse(database, item);
/*
* Save the asset component we just created
*/
addItem(item);
}
}
/**
* This method does its best to map data contained in an inLineCitation to
* other fields such as volume, issue, etc. in the case that they are empty.
* It compares the citation to a known set of regular expressions contained
* in REGULAR_EXPRESSION. Adding a new regular expression entails adding a
* new case for parsing in this method.
*
* @param citation
* inLineCitation to be parsed
*/
private void doRegexParse(String database, MatchItem item)
{
Pattern pattern;
Matcher matcher;
boolean hasVolume = false;
boolean hasIssue = false;
boolean hasDate = false;
boolean hasYear = false;
boolean hasStartPage = false;
boolean hasEndPage = false;
boolean hasSourceTitle = false;
try
{
String citation;
DataSource dataSource;
boolean regExpFound;
citation = (String) ((MatchItem.PartPair) getPartPair(
InLineCitationPartStructure.getPartStructureId(), item))
.getValue() ;
dataSource = new DataSource(database, citation);
if (!dataSource.findRegExp())
{
return;
}
hasVolume = recordHasPart(VolumePartStructure.getPartStructureId(),
item);
hasIssue = recordHasPart(IssuePartStructure.getPartStructureId(),
item);
hasDate = recordHasPart(DatePartStructure.getPartStructureId(),
item);
hasYear = recordHasPart(YearPartStructure.getPartStructureId(),
item);
hasStartPage = recordHasPart(StartPagePartStructure
.getPartStructureId(), item);
hasEndPage = recordHasPart(EndPagePartStructure
.getPartStructureId(), item);
hasSourceTitle = recordHasPart(SourceTitlePartStructure
.getPartStructureId(), item);
if (!hasVolume) {
pattern = Pattern.compile(dataSource.getVolumeToken());
matcher = pattern.matcher(citation);
if (matcher.find()) {
addPartStructure(item, VolumePartStructure.getInstance()
.getId(), matcher.group());
}
}
if (!hasIssue) {
pattern = Pattern.compile(dataSource.getIssueToken());
matcher = pattern.matcher(citation);
if (matcher.find()) {
addPartStructure(item, IssuePartStructure.getInstance()
.getId(), matcher.group().replaceAll("\\D", ""));
}
}
if (!hasDate) {
pattern = Pattern.compile(dataSource.getDateToken());
matcher = pattern.matcher(citation);
if (matcher.find()) {
String date = matcher.group().substring(
dataSource.getReplaceStartToken(),
matcher.group().length()
- dataSource.getReplaceEndToken());
addPartStructure(item, DatePartStructure.getInstance()
.getId(), date);
}
}
if (!hasYear) {
pattern = Pattern.compile(dataSource.getYearToken());
matcher = pattern.matcher(citation);
if (matcher.find()) {
String year = matcher.group().substring(
dataSource.getReplaceStartToken(),
matcher.group().length()
- dataSource.getReplaceEndToken());
addPartStructure(item, YearPartStructure.getInstance()
.getId(), year);
}
}
if (!hasStartPage || !hasEndPage) {
pattern = Pattern.compile(dataSource.getPagesToken());
matcher = pattern.matcher(citation);
if (matcher.find()) {
createPagesPart(matcher.group(), item);
}
}
if(!hasSourceTitle) {
pattern = Pattern.compile(dataSource.getSourceTitleToken());
matcher = pattern.matcher( citation );
if( matcher.find() ) {
String sourceTitle = matcher.group().substring( 0,
matcher.group().length()-1 );
addPartStructure(item, SourceTitlePartStructure.getInstance().getId(),
sourceTitle );
}
}
} catch (org.osid.repository.RepositoryException e) {
log.warn("doRegexParse() failed", e);
}
}
private void createPagesPart(String text, MatchItem item)
throws org.osid.repository.RepositoryException {
try {
if (text == null || text.equals(""))
return;
else if (text.charAt(0) == ',') {
// getting a poorly formatted field
return;
}
addPartStructure(item, PagesPartStructure.getInstance().getId(),
text);
// get start and end page if possible
String[] pages = text.split("-");
if (pages.length == 0) {
// cannot create start/end page.
return;
}
String spage = pages[0].trim();
// delete all non-digit chars (ie: p., pp., etc.)
spage = spage.replaceAll("\\D", "");
log.debug("======================&&&& Start page: spage &&&================");
// create startPage part
addPartStructure(item,
StartPagePartStructure.getInstance().getId(), spage);
// end page
if (pages.length == 2) {
String epage = pages[1].trim();
epage = epage.replaceAll("\\D", "");
addPartStructure(item, EndPagePartStructure.getInstance()
.getId(), epage);
}
} catch (StringIndexOutOfBoundsException e) {
log.warn("createPagesPart()", e);
}
}
/**
* This method searches the current record for a Part using its
* PartStructure Type.
*
* @param partStructureId
* PartStructure Type of Part you need.
* @return the Part if it exists in the current record, null if it does not.
*/
private boolean recordHasPart(org.osid.shared.Id partStructureId,
MatchItem item) {
if (this.getPartPair(partStructureId, item) == null) {
return false;
} else {
return true;
}
}
private MatchItem.PartPair getPartPair(org.osid.shared.Id partStructureId,
MatchItem item) {
Iterator partPairIterator = item.partPairIterator();
while (partPairIterator.hasNext()) {
MatchItem.PartPair partPair = (MatchItem.PartPair) partPairIterator
.next();
if (partPair.getId().equals(partStructureId)) {
return partPair;
}
}
return null;
}
/*
* Helpers
*/
/**
* Locate (and save as PartStructure id/value pairs) all matching items
*
* @param rootElement
* Start looking here
* @param partDataName
* Name of the XML element we're looking for
* @param item
* Current MatchItem (eg Asset)
* @param id
* Part ID
* @return true if PartStructure data was added, false if none found
*/
private boolean addPartStructureList(Element parentElement,
String partDataName, MatchItem item, org.osid.shared.Id id) {
NodeList nodeList = DomUtils
.getElementList(parentElement, partDataName);
boolean partsAdded = false;
for (int i = 0; i < nodeList.getLength(); i++) {
Element element = (Element) nodeList.item(i);
String text = DomUtils.getText(element);
if (!StringUtils.isNull(text)) {
addPartStructure(item, id, text);
partsAdded = true;
}
}
return partsAdded;
}
/**
* Save (add new) PartStructure data
*
* @param item
* Current MatchItem (eg Asset)
* @param id
* Part ID
* @param value
* Part value
* @return true If Part data was added, false if no data was found
*/
private boolean addPartStructure(MatchItem item, org.osid.shared.Id id,
String value) {
boolean partAdded = false;
String text = value;
if (text != null) {
text = text.trim();
}
if (!StringUtils.isNull(text)) {
item.addPartStructure(id, text);
partAdded = true;
}
return partAdded;
}
/**
* Locate (in response XML) and save PartStructure data
*
* @param parentElement
* Parent Element - the search starts here
* @param partDataName
* The name of the child element where Part data is found
* @param item
* Current MatchItem (eg Asset)
* @param id
* Part ID
* @return true If Part data was added, false if no data was found
*/
private boolean addPartStructure(Element parentElement,
String partDataName, MatchItem item, org.osid.shared.Id id) {
String value = getText(parentElement, partDataName);
return addPartStructure(item, id, value);
}
/**
* Locate text
*
* @param parent
* Search from here
* @param name
* Find this element
* @return Text (null if none)
*/
private String getText(Element parent, String name) {
Element element = DomUtils.getElement(parent, name);
String text = null;
if (element != null) {
text = DomUtils.getText(element);
}
return text;
}
/**
* Display XML (with optional warning header)
*
* @param errorText
* Error message (null for none)
* @param recordElement
* The XML object to disolay (Document, Element)
*/
private static void displayXml(String errorText, Object xmlObject) {
try
{
log.debug("{} {}", errorText, xmlObject);
}
catch (Exception ignore) { }
}
/**
* Display XML information
*
* @param xmlObject
* XML to display (Document, Element)
*/
private void displayXml(Object xmlObject) {
try
{
log.debug("{}", xmlObject);
}
catch (Exception ignore) { }
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy