com.hfg.citation.ncbi.NCBI_eFetch Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.citation.ncbi;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.hfg.bio.seq.format.SeqCitation;
import com.hfg.citation.CitationRetriever;
import com.hfg.citation.Journal;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.util.User;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.io.HTTPUtil;
import com.hfg.xml.XMLTag;
public class NCBI_eFetch implements CitationRetriever
{
private User mUser;
// https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=22368089&tool=my_tool&email=my_email@example.com
private String mBaseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi";
// https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/[email protected]&ids=10.1093/nar/gks1195
private String mIDConvertBaseURL = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/";
// Ex: Science. 2002 Nov 8;298(5596):1248-51
private static final Pattern JOURNAL_CITATION_PATTERN = Pattern.compile("([^\\.]+)\\.\\s+(\\d{4})[^\\;]+\\;(\\d+)(?:\\((\\d+)\\))?\\:([\\d\\-]+)");
//---------------------------------------------------------------------------
public NCBI_eFetch(User inUser)
{
mUser = inUser;
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public String getBaseQueryURL()
{
return mBaseURL;
}
//---------------------------------------------------------------------------
public NCBI_eFetch setBaseQueryURL(String inValue)
{
mBaseURL = inValue;
return this;
}
//---------------------------------------------------------------------------
@Override
public MedlineCitation fetch(SeqCitation inQueryData)
throws IOException
{
if (! StringUtil.isSet(inQueryData.getPubMedId())
&& StringUtil.isSet(inQueryData.getDOI()))
{
lookupPubMedIdFromDOI(inQueryData);
}
String url = composeQueryURL(inQueryData);
HttpURLConnection conn = HTTPUtil.openConnection(url);
MedlineCitation citation = null;
int responseCode = conn.getResponseCode();
if (200 == responseCode)
{
InputStream stream = new BufferedInputStream(conn.getInputStream());
XMLTag xmlTag = new XMLTag(stream);
XMLTag pubmedArticleTag = xmlTag.getRequiredSubtagByName(PubmedXML.PUBMED_ARTICLE);
XMLTag medlineCitationTag = pubmedArticleTag.getRequiredSubtagByName(PubmedXML.MEDLINE_CITATION);
citation = new MedlineCitation(medlineCitationTag);
// References
XMLTag pubmedDataTag = pubmedArticleTag.getOptionalSubtagByName(PubmedXML.PUBMED_DATA);
if (pubmedDataTag != null)
{
XMLTag refListTag = pubmedDataTag.getOptionalSubtagByName(PubmedXML.REFERENCE_LIST);
if (refListTag != null)
{
citation.setReferences(parseReferences(refListTag));
}
}
}
return citation;
}
//---------------------------------------------------------------------------
private String composeQueryURL(SeqCitation inQueryData)
{
StringBuilderPlus url = new StringBuilderPlus(getBaseQueryURL()).setDelimiter("&")
.append("?")
.append("tool=com_hfg")
.delimitedAppend("email=" + mUser.getEmail())
.delimitedAppend("db=pubmed")
.delimitedAppend("format=xml")
.delimitedAppend("id=" + inQueryData.getPubMedId());
return url.toString();
}
//---------------------------------------------------------------------------
private void lookupPubMedIdFromDOI(SeqCitation inQueryData)
throws IOException
{
StringBuilderPlus url = new StringBuilderPlus(mIDConvertBaseURL).setDelimiter("&")
.append("?")
.append("tool=com_hfg")
.delimitedAppend("email=" + mUser.getEmail())
.delimitedAppend("ids=" + inQueryData.getDOI());
HttpURLConnection conn = HTTPUtil.openConnection(url.toString());
InputStream stream = new BufferedInputStream(conn.getInputStream());
XMLTag xmlTag = new XMLTag(stream);
/*
Example response:
tool=my_tool;email=my_email%40example.com;ids=10.1093%2Fnar%2Fgks1195
*/
List recordTags = xmlTag.getSubtagsByName("record");
if (CollectionUtil.hasValues(recordTags))
{
XMLTag recordTag = recordTags.get(0);
inQueryData.setPubMedId(recordTag.getAttributeValue("pmid"));
}
}
//---------------------------------------------------------------------------
private List parseReferences(XMLTag inRefListTag)
{
List references = null;
List referenceTags = inRefListTag.getSubtagsByName(PubmedXML.REFERENCE);
if (CollectionUtil.hasValues(referenceTags))
{
references = new ArrayList<>(referenceTags.size());
for (XMLTag referenceTag : referenceTags)
{
MedlineCitation citation = new MedlineCitation();
references.add(citation);
XMLTag citationTag = referenceTag.getOptionalSubtagByName(PubmedXML.CITATION);
if (citationTag != null)
{
citation.setRawContent(citationTag.getContent().trim());
Matcher m = JOURNAL_CITATION_PATTERN.matcher(citation.toString());
if (m.matches())
{
Journal journal = new Journal().setAbbrev(m.group(1));
citation.setJournal(journal);
citation.setYear(Integer.parseInt(m.group(2)));
citation.setVolume(m.group(3));
citation.setIssue(m.group(4));
citation.setPages(m.group(5));
}
}
XMLTag articleIdListTag = referenceTag.getOptionalSubtagByName(PubmedXML.ARTICLE_ID_LIST);
if (articleIdListTag != null)
{
List articleIdTags = referenceTag.getSubtagsByName(PubmedXML.ARTICLE_ID);
if (CollectionUtil.hasValues(articleIdTags))
{
for (XMLTag articleIdTag : articleIdTags)
{
if (articleIdTag.getAttributeValue(PubmedXML.ID_TYPE_ATT).equalsIgnoreCase("pubmed"))
{
citation.setPubMedId(articleIdTag.getContent().trim());
}
}
}
}
}
}
return references;
}
}