org.apache.tika.parser.geoinfo.GeographicInformationParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.geoinfo;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.sis.internal.util.CheckedArrayList;
import org.apache.sis.internal.util.CheckedHashSet;
import org.apache.sis.metadata.iso.DefaultMetadata;
import org.apache.sis.metadata.iso.DefaultMetadataScope;
import org.apache.sis.metadata.iso.constraint.DefaultLegalConstraints;
import org.apache.sis.metadata.iso.extent.DefaultGeographicBoundingBox;
import org.apache.sis.metadata.iso.extent.DefaultGeographicDescription;
import org.apache.sis.metadata.iso.identification.DefaultDataIdentification;
import org.apache.sis.storage.DataStore;
import org.apache.sis.storage.DataStoreException;
import org.apache.sis.storage.DataStores;
import org.apache.sis.storage.UnsupportedStorageException;
import org.apache.sis.util.collection.CodeListSet;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.opengis.metadata.Identifier;
import org.opengis.metadata.citation.Citation;
import org.opengis.metadata.citation.CitationDate;
import org.opengis.metadata.citation.OnlineResource;
import org.opengis.metadata.citation.ResponsibleParty;
import org.opengis.metadata.constraint.Restriction;
import org.opengis.metadata.distribution.DigitalTransferOptions;
import org.opengis.metadata.distribution.Distribution;
import org.opengis.metadata.distribution.Distributor;
import org.opengis.metadata.distribution.Format;
import org.opengis.metadata.extent.Extent;
import org.opengis.metadata.extent.GeographicExtent;
import org.opengis.metadata.identification.Identification;
import org.opengis.metadata.identification.Keywords;
import org.opengis.metadata.identification.Progress;
import org.opengis.metadata.identification.TopicCategory;
import org.opengis.util.InternationalString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class GeographicInformationParser extends AbstractParser{
private static final Logger LOG = LoggerFactory.getLogger(GeographicInformationParser.class);
public static final String geoInfoType="text/iso19139+xml";
private final Set SUPPORTED_TYPES =
Collections.singleton(MediaType.text("iso19139+xml"));
@Override
public Set getSupportedTypes(ParseContext parseContext) {
return SUPPORTED_TYPES;
}
@Override
public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
metadata.set(Metadata.CONTENT_TYPE,geoInfoType);
DataStore dataStore= null;
DefaultMetadata defaultMetadata=null;
XHTMLContentHandler xhtmlContentHandler=new XHTMLContentHandler(contentHandler,metadata);
TemporaryResources tmp = TikaInputStream.isTikaInputStream(inputStream) ? null
: new TemporaryResources();
try {
TikaInputStream tikaInputStream = TikaInputStream.get(inputStream,tmp);
File file= tikaInputStream.getFile();
dataStore = DataStores.open(file);
defaultMetadata=new DefaultMetadata(dataStore.getMetadata());
if(defaultMetadata!=null)
extract(xhtmlContentHandler, metadata, defaultMetadata);
} catch (UnsupportedStorageException e) {
throw new TikaException("UnsupportedStorageException",e);
} catch (DataStoreException e) {
throw new TikaException("DataStoreException", e);
} finally {
if (tmp != null) {
tmp.dispose();
}
}
}
private void extract(XHTMLContentHandler xhtmlContentHandler, Metadata metadata,
DefaultMetadata defaultMetadata) throws SAXException {
getMetaDataCharacterSet(metadata, defaultMetadata);
getMetaDataContact(metadata, defaultMetadata);
getMetaDataIdentificationInfo(metadata, defaultMetadata);
getMetaDataDistributionInfo(metadata, defaultMetadata);
getMetaDataDateInfo(metadata, defaultMetadata);
getMetaDataResourceScope(metadata, defaultMetadata);
getMetaDataParentMetaDataTitle(metadata, defaultMetadata);
getMetaDataIdetifierCode(metadata, defaultMetadata);
getMetaDataStandard(metadata, defaultMetadata);
extractContent(xhtmlContentHandler, defaultMetadata);
}
private void extractContent(XHTMLContentHandler xhtmlContentHandler, DefaultMetadata defaultMetadata) throws SAXException{
xhtmlContentHandler.startDocument();
xhtmlContentHandler.newline();
xhtmlContentHandler.newline();
ArrayList identifications= (ArrayList) defaultMetadata.getIdentificationInfo();
for(Identification i:identifications) {
xhtmlContentHandler.startElement("h1");
xhtmlContentHandler.characters(i.getCitation().getTitle().toString());
xhtmlContentHandler.endElement("h1");
xhtmlContentHandler.newline();
ArrayList responsiblePartyArrayList = (ArrayList) i.getCitation().getCitedResponsibleParties();
for (ResponsibleParty r : responsiblePartyArrayList) {
xhtmlContentHandler.startElement("h3");
xhtmlContentHandler.newline();
xhtmlContentHandler.characters("CitedResponsiblePartyRole " + r.getRole().toString());
xhtmlContentHandler.characters("CitedResponsiblePartyName " + r.getIndividualName().toString());
xhtmlContentHandler.endElement("h3");
xhtmlContentHandler.newline();
}
xhtmlContentHandler.startElement("p");
xhtmlContentHandler.newline();
xhtmlContentHandler.characters("IdentificationInfoAbstract " + i.getAbstract().toString());
xhtmlContentHandler.endElement("p");
xhtmlContentHandler.newline();
Collection extentList=((DefaultDataIdentification) i).getExtents();
for(Extent e:extentList){
ArrayList geoElements= (ArrayList) e.getGeographicElements();
for(GeographicExtent g:geoElements) {
if (g instanceof DefaultGeographicBoundingBox) {
xhtmlContentHandler.startElement("tr");
xhtmlContentHandler.startElement("td");
xhtmlContentHandler.characters("GeographicElementWestBoundLatitude");
xhtmlContentHandler.endElement("td");
xhtmlContentHandler.startElement("td");
xhtmlContentHandler.characters(String.valueOf(((DefaultGeographicBoundingBox) g).getWestBoundLongitude()));
xhtmlContentHandler.endElement("td");
xhtmlContentHandler.endElement("tr");
xhtmlContentHandler.startElement("tr");
xhtmlContentHandler.startElement("td");
xhtmlContentHandler.characters("GeographicElementEastBoundLatitude");
xhtmlContentHandler.endElement("td");
xhtmlContentHandler.startElement("td");
xhtmlContentHandler.characters(String.valueOf(((DefaultGeographicBoundingBox) g).getEastBoundLongitude()));
xhtmlContentHandler.endElement("td");
xhtmlContentHandler.endElement("tr");
xhtmlContentHandler.startElement("tr");
xhtmlContentHandler.startElement("td");
xhtmlContentHandler.characters("GeographicElementNorthBoundLatitude");
xhtmlContentHandler.endElement("td");
xhtmlContentHandler.startElement("td");
xhtmlContentHandler.characters(String.valueOf(((DefaultGeographicBoundingBox) g).getNorthBoundLatitude()));
xhtmlContentHandler.endElement("td");
xhtmlContentHandler.endElement("tr");
xhtmlContentHandler.startElement("tr");
xhtmlContentHandler.startElement("td");
xhtmlContentHandler.characters("GeographicElementSouthBoundLatitude");
xhtmlContentHandler.endElement("td");
xhtmlContentHandler.startElement("td");
xhtmlContentHandler.characters(String.valueOf(((DefaultGeographicBoundingBox) g).getSouthBoundLatitude()));
xhtmlContentHandler.endElement("td");
xhtmlContentHandler.endElement("tr");
}
}
}
}
xhtmlContentHandler.newline();
xhtmlContentHandler.endDocument();
}
private void getMetaDataCharacterSet(Metadata metadata, DefaultMetadata defaultMetaData){
Map charsetMap = defaultMetaData.getLocalesAndCharsets();
for(Charset c : charsetMap.values()){
metadata.add("CharacterSet",c.name());
}
}
private void getMetaDataContact(Metadata metadata, DefaultMetadata defaultMetaData){
CheckedArrayList contactSet= (CheckedArrayList) defaultMetaData.getContacts();
for(ResponsibleParty rparty:contactSet){
if(rparty.getRole()!=null)
metadata.add("ContactRole",rparty.getRole().name());
if(rparty.getOrganisationName()!=null)
metadata.add("ContactPartyName-",rparty.getOrganisationName().toString());
}
}
private void getMetaDataIdentificationInfo(Metadata metadata, DefaultMetadata defaultMetaData){
ArrayList identifications= (ArrayList) defaultMetaData.getIdentificationInfo();
for(Identification i:identifications){
DefaultDataIdentification defaultDataIdentification= (DefaultDataIdentification) i;
if(i.getCitation()!=null && i.getCitation().getTitle()!=null)
metadata.add("IdentificationInfoCitationTitle ",i.getCitation().getTitle().toString());
ArrayList dateArrayList= (ArrayList) i.getCitation().getDates();
for (CitationDate d:dateArrayList){
if(d.getDateType()!=null)
metadata.add("CitationDate ",d.getDateType().name()+"-->"+d.getDate());
}
ArrayList responsiblePartyArrayList= (ArrayList) i.getCitation().getCitedResponsibleParties();
for(ResponsibleParty r:responsiblePartyArrayList){
if(r.getRole()!=null)
metadata.add("CitedResponsiblePartyRole ",r.getRole().toString());
if(r.getIndividualName()!=null)
metadata.add("CitedResponsiblePartyName ",r.getIndividualName().toString());
if(r.getOrganisationName()!=null)
metadata.add("CitedResponsiblePartyOrganizationName ", r.getOrganisationName().toString());
if(r.getPositionName()!=null)
metadata.add("CitedResponsiblePartyPositionName ",r.getPositionName().toString());
if(r.getContactInfo()!=null){
for(String s:r.getContactInfo().getAddress().getElectronicMailAddresses()) {
metadata.add("CitedResponsiblePartyEMail ",s.toString());
}
}
}
if(i.getAbstract()!=null)
metadata.add("IdentificationInfoAbstract ",i.getAbstract().toString());
for(Progress p:i.getStatus()) {
metadata.add("IdentificationInfoStatus ",p.name());
}
ArrayList formatArrayList= (ArrayList) i.getResourceFormats();
for(Format f:formatArrayList){
if(f.getName()!=null)
metadata.add("ResourceFormatSpecificationAlternativeTitle ",f.getName().toString());
}
Map localeCharsetMap = defaultDataIdentification.getLocalesAndCharsets();
for(Locale l : localeCharsetMap.keySet()) {
metadata.add("IdentificationInfoLanguage-->",l.getDisplayLanguage(Locale.ENGLISH));
}
CodeListSet categoryList= (CodeListSet) defaultDataIdentification.getTopicCategories();
for(TopicCategory t:categoryList){
metadata.add("IdentificationInfoTopicCategory-->",t.name());
}
ArrayList keywordList= (ArrayList) i.getDescriptiveKeywords();
int j=1;
for(Keywords k:keywordList){
j++;
ArrayList stringList= (ArrayList) k.getKeywords();
for(InternationalString s:stringList){
metadata.add("Keywords "+j ,s.toString());
}
if(k.getType()!=null)
metadata.add("KeywordsType "+j,k.getType().name());
if(k.getThesaurusName()!=null && k.getThesaurusName().getTitle()!=null)
metadata.add("ThesaurusNameTitle "+j,k.getThesaurusName().getTitle().toString());
if(k.getThesaurusName()!=null && k.getThesaurusName().getAlternateTitles()!=null)
metadata.add("ThesaurusNameAlternativeTitle "+j,k.getThesaurusName().getAlternateTitles().toString());
ArrayListcitationDates= (ArrayList) k.getThesaurusName().getDates();
for(CitationDate cd:citationDates) {
if(cd.getDateType()!=null)
metadata.add("ThesaurusNameDate ",cd.getDateType().name() +"-->" + cd.getDate());
}
}
ArrayList constraintList= (ArrayList) i.getResourceConstraints();
for(DefaultLegalConstraints c:constraintList){
for(Restriction r:c.getAccessConstraints()){
metadata.add("AccessContraints ",r.name());
}
for(InternationalString s:c.getOtherConstraints()){
metadata.add("OtherConstraints ",s.toString());
}
for(Restriction r:c.getUseConstraints()) {
metadata.add("UserConstraints ",r.name());
}
}
Collection extentList=((DefaultDataIdentification) i).getExtents();
for(Extent e:extentList){
ArrayList geoElements= (ArrayList) e.getGeographicElements();
for(GeographicExtent g:geoElements){
if(g instanceof DefaultGeographicDescription){
if(((DefaultGeographicDescription) g).getGeographicIdentifier()!=null && ((DefaultGeographicDescription) g).getGeographicIdentifier().getCode()!=null )
metadata.add("GeographicIdentifierCode ",((DefaultGeographicDescription) g).getGeographicIdentifier().getCode().toString());
if(((DefaultGeographicDescription) g).getGeographicIdentifier()!=null && ((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority()!=null && ((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority().getTitle()!=null )
metadata.add("GeographicIdentifierAuthorityTitle ",((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority().getTitle().toString());
for(InternationalString s:((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority().getAlternateTitles()) {
metadata.add("GeographicIdentifierAuthorityAlternativeTitle ",s.toString());
}
for(CitationDate cd:((DefaultGeographicDescription) g).getGeographicIdentifier().getAuthority().getDates()){
if(cd.getDateType()!=null && cd.getDate()!=null)
metadata.add("GeographicIdentifierAuthorityDate ",cd.getDateType().name()+" "+cd.getDate().toString());
}
}
}
}
}
}
private void getMetaDataDistributionInfo(Metadata metadata, DefaultMetadata defaultMetaData){
Distribution distribution=defaultMetaData.getDistributionInfo();
ArrayList distributionFormat= (ArrayList) distribution.getDistributionFormats();
for(Format f:distributionFormat){
if(f.getName()!=null)
metadata.add("DistributionFormatSpecificationAlternativeTitle ",f.getName().toString());
}
ArrayList distributorList= (ArrayList) distribution.getDistributors();
for(Distributor d:distributorList){
if(d!=null && d.getDistributorContact()!=null && d.getDistributorContact().getRole()!=null)
metadata.add("Distributor Contact ",d.getDistributorContact().getRole().name());
if(d!=null && d.getDistributorContact()!=null && d.getDistributorContact().getOrganisationName()!=null)
metadata.add("Distributor Organization Name ",d.getDistributorContact().getOrganisationName().toString());
}
ArrayList transferOptionsList= (ArrayList) distribution.getTransferOptions();
for(DigitalTransferOptions d:transferOptionsList){
ArrayList onlineResourceList= (ArrayList) d.getOnLines();
for(OnlineResource or:onlineResourceList){
if(or.getLinkage()!=null)
metadata.add("TransferOptionsOnlineLinkage ",or.getLinkage().toString());
if(or.getProtocol()!=null)
metadata.add("TransferOptionsOnlineProtocol ",or.getProtocol());
if(or.getApplicationProfile()!=null)
metadata.add("TransferOptionsOnlineProfile ",or.getApplicationProfile());
if(or.getName()!=null)
metadata.add("TransferOptionsOnlineName ",or.getName());
if(or.getDescription()!=null)
metadata.add("TransferOptionsOnlineDescription ",or.getDescription().toString());
if(or.getFunction()!=null)
metadata.add("TransferOptionsOnlineFunction ",or.getFunction().name());
}
}
}
private void getMetaDataDateInfo(Metadata metadata, DefaultMetadata defaultMetaData){
ArrayList citationDateList= (ArrayList) defaultMetaData.getDateInfo();
for(CitationDate c:citationDateList){
if(c.getDateType()!=null)
metadata.add("DateInfo ",c.getDateType().name()+" "+c.getDate());
}
}
private void getMetaDataResourceScope(Metadata metadata, DefaultMetadata defaultMetaData){
ArrayList scopeList= (ArrayList) defaultMetaData.getMetadataScopes();
for(DefaultMetadataScope d:scopeList){
if(d.getResourceScope()!=null)
metadata.add("MetaDataResourceScope ",d.getResourceScope().name());
}
}
private void getMetaDataParentMetaDataTitle(Metadata metadata, DefaultMetadata defaultMetaData){
Citation parentMetaData=defaultMetaData.getParentMetadata();
if(parentMetaData!=null && parentMetaData.getTitle()!=null)
metadata.add("ParentMetaDataTitle",parentMetaData.getTitle().toString());
}
private void getMetaDataIdetifierCode(Metadata metadata, DefaultMetadata defaultMetaData){
Identifier identifier= defaultMetaData.getMetadataIdentifier();
if(identifier!=null)
metadata.add("MetaDataIdentifierCode",identifier.getCode());
}
private void getMetaDataStandard(Metadata metadata, DefaultMetadata defaultMetaData){
ArrayList citationList= (ArrayList) defaultMetaData.getMetadataStandards();
for(Citation c:citationList){
if(c.getTitle()!=null)
metadata.add("MetaDataStandardTitle ",c.getTitle().toString());
if(c.getEdition()!=null)
metadata.add("MetaDataStandardEdition ",c.getEdition().toString());
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy