
it.tidalwave.bluebill.mobile.resources.ResourcesGenerator Maven / Gradle / Ivy
The newest version!
/***********************************************************************************************************************
*
* blueBill Resources - open source birding
* Copyright (C) 2009-2011 by Tidalwave s.a.s. (http://www.tidalwave.it)
*
***********************************************************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
***********************************************************************************************************************
*
* WWW: http://bluebill.tidalwave.it
* SCM: https://java.net/hg/bluebill~resources-src
*
**********************************************************************************************************************/
package it.tidalwave.bluebill.mobile.resources;
import javax.annotation.Nonnull;
import java.util.Collections;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import it.tidalwave.util.Id;
import org.json.me.JSONException;
import org.openrdf.model.Statement;
import org.openrdf.model.ValueFactory;
import org.openrdf.repository.RepositoryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.sail.memory.MemoryStore;
import org.openrdf.elmo.ElmoManager;
import org.openrdf.elmo.ElmoModule;
import org.openrdf.elmo.sesame.SesameManagerFactory;
import it.tidalwave.util.NotFoundException;
import it.tidalwave.netbeans.util.Locator;
import it.tidalwave.openrdf.elmo.ElmoManagerThreadLocal;
import it.tidalwave.bluebill.taxonomy.Taxonomy;
import it.tidalwave.bluebill.taxonomy.TaxonomyManager;
import it.tidalwave.bluebill.taxonomy.elmo.impl.ElmoExporter;
import it.tidalwave.bluebill.taxonomy.io.Exporter;
import it.tidalwave.bluebill.taxonomy.io.Importer;
import it.tidalwave.role.Identifiable;
import lombok.Cleanup;
import lombok.extern.slf4j.Slf4j;
import static it.tidalwave.bluebill.taxonomy.elmo.ElmoTaxonomyVocabulary.*;
import static it.tidalwave.bluebill.mobile.resources.RdfUtils.*;
/***********************************************************************************************************************
*
* @author Fabrizio Giudici
* @version $Id$
*
**********************************************************************************************************************/
@Slf4j
public class ResourcesGenerator
{
private static final Id COL_ID = new Id("http://bluebill.tidalwave.it/taxonomy/CatalogueOfLife");
protected final File resourcesFolder;
protected final File targetFolder;
protected SesameManagerFactory sesameManagerFactory;
protected Repository repository;
private int count = 0;
/*******************************************************************************************************************
*
*
******************************************************************************************************************/
public static void main (final @Nonnull String ... args)
throws RepositoryException, IOException, RDFParseException, RDFHandlerException, NotFoundException, JSONException
{
final ResourcesGenerator resourceGenerator = new ResourcesGenerator(args);
resourceGenerator.loadTaxonomies();
resourceGenerator.createFachSheetsZip();
}
/*******************************************************************************************************************
*
*
******************************************************************************************************************/
private ResourcesGenerator (final @Nonnull String ... args)
throws IOException
{
final String baseDir = args[0];
resourcesFolder = new File(baseDir + "/target/");
targetFolder = new File(baseDir + "/target/");
}
/*******************************************************************************************************************
*
* Loads all the taxonomies into the repository.
*
******************************************************************************************************************/
private void loadTaxonomies()
throws RepositoryException, IOException, RDFParseException, RDFHandlerException, NotFoundException, JSONException
{
log.info("loadTaxonomies()");
final Importer importer = Locator.find(TaxonomyManager.class).as(Importer.class);
initialize();
importer.import_(repository, new File(targetFolder, "AOU7th.rdf"));
exportTaxonomy(targetFolder, "AOU 7th edition");
close();
initialize();
importer.import_(repository, new File(targetFolder, "BirdsOfIndia2009.rdf"));
exportTaxonomy(targetFolder, "Birds of India 2009");
close();
initialize();
importer.import_(repository, new File(targetFolder, "EBNItalia2003.rdf"));
exportTaxonomy(targetFolder, "EBN Italia 2003");
close();
initialize();
importer.import_(repository, new File(targetFolder, "EBNItalia2011.rdf"));
exportTaxonomy(targetFolder, "EBN Italia 2011");
close();
initialize();
importer.import_(repository, new File(targetFolder, "RobertsVII.rdf"));
exportTaxonomy(targetFolder, "Roberts VII Edition");
close();
}
/*******************************************************************************************************************
*
*
******************************************************************************************************************/
private void createFachSheetsZip()
throws RepositoryException, IOException, RDFParseException, RDFHandlerException, NotFoundException, JSONException
{
log.info("createFactSheetsZip()");
final Importer importer = Locator.find(TaxonomyManager.class).as(Importer.class);
initialize();
importer.import_(repository, new File(targetFolder, "AOU7th.rdf"));
importer.import_(repository, new File(targetFolder, "BirdsOfIndia2009.rdf"));
importer.import_(repository, new File(targetFolder, "EBNItalia2003.rdf"));
importer.import_(repository, new File(targetFolder, "EBNItalia2011.rdf"));
importer.import_(repository, new File(targetFolder, "RobertsVII.rdf"));
importer.import_(repository, new File(targetFolder, "ITIS.rdf"));
importer.import_(repository, new File(targetFolder, "Wikipedia.rdf"));
importer.import_(repository, new File(targetFolder, "WikipediaPhotos.rdf"));
@Cleanup final ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(new File(targetFolder, "factsheets.zip")));
for (final Id id : getTaxaIds())
{
createTaxonRepository(zos, id);
}
zos.close();
createTaxonomyCatalog();
// new ElmoExporter(repository).export(new File(targetFolder, "Merged.n3"));
close();
}
/*******************************************************************************************************************
*
*
******************************************************************************************************************/
private void createTaxonomyCatalog()
throws RepositoryException, IOException, RDFParseException, RDFHandlerException, NotFoundException, JSONException
{
final Repository taxonomyCatalogRepository = new SailRepository(new MemoryStore());
taxonomyCatalogRepository.initialize();
final RepositoryConnection catalogConnection = taxonomyCatalogRepository.getConnection();
final RepositoryConnection repoConnection = repository.getConnection();
final ValueFactory valueFactory = repoConnection.getValueFactory();
for (final Id taxonomyId : getTaxonomyIds())
{
if (!taxonomyId.equals(COL_ID))
{
addStatements(catalogConnection, findStatementsWithSubject(repoConnection, taxonomyId));
}
}
catalogConnection.commit();
catalogConnection.close();
repoConnection.close();
new ElmoExporter(taxonomyCatalogRepository).export(new File(targetFolder, "taxonomies.n3"));
}
/*******************************************************************************************************************
*
* Returns all the ids of taxonomies imported into the repository.
*
* @return the ids
*
******************************************************************************************************************/
@Nonnull
private SortedSet getTaxonomyIds()
throws RepositoryException
{
log.debug("getTaxonomyIds()");
final RepositoryConnection conn = repository.getConnection();
final ValueFactory valueFactory = conn.getValueFactory();
final RepositoryResult statements = conn.getStatements(null, null, valueFactory.createURI("http://bluebill.tidalwave.it/rdf/taxo/2010/08/28#TaxonomyScheme"), false);
final SortedSet ids = new TreeSet();
while (statements.hasNext())
{
ids.add(new Id(statements.next().getSubject().stringValue()));
}
conn.close();
return ids;
}
/*******************************************************************************************************************
*
* Returns all the ids of species imported into the repository.
*
* @return the ids
*
* FIXME: should only return species?
*
******************************************************************************************************************/
@Nonnull
private SortedSet getTaxaIds()
throws RepositoryException
{
log.debug("getTaxaIds()");
// return new TreeSet(Arrays.asList(new Id("urn:lsid:catalogueoflife.org:taxon:eff82234-29c1-102b-9a4a-00304854f820:ac2010")));
final RepositoryConnection conn = repository.getConnection();
final ValueFactory valueFactory = conn.getValueFactory();
final RepositoryResult statements = conn.getStatements(null, valueFactory.createURI(URI_SCIENTIFIC_NAME_ID), null, false);
final SortedSet ids = new TreeSet();
while (statements.hasNext())
{
ids.add(new Id(statements.next().getObject().stringValue()));
}
conn.close();
return ids;
}
/*******************************************************************************************************************
*
* Creates a repository with all the information for the given taxon and adds it to the resources .zip as a .n3
* file.
*
* @param zos the .zip resource
* @param taxonId the id of the taxon
*
******************************************************************************************************************/
private void createTaxonRepository (final @Nonnull ZipOutputStream zos, final @Nonnull Id taxonId)
throws RDFHandlerException, NotFoundException, JSONException, IOException, RepositoryException, RDFParseException
{
log.info("crateTaxonRepository({})", taxonId);
final Repository taxonRepository = new SailRepository(new MemoryStore());
taxonRepository.initialize();
final RepositoryConnection taxonRepoConnection = taxonRepository.getConnection();
final RepositoryConnection repoConnection = repository.getConnection();
for (final Id subjectId : findAllRelevantSubjectIds(taxonId, repoConnection))
{
addStatements(taxonRepoConnection, findStatementsWithSubject(repoConnection, subjectId));
}
addStatements(taxonRepoConnection, loadXenoCantoStatements(taxonId));
taxonRepoConnection.commit();
taxonRepoConnection.close();
repoConnection.close();
final String normalizedTaxonId = taxonId.stringValue().replace(':', '_');
log.info(">>>> generating {}: {}...", ++count, normalizedTaxonId + ".n3");
zos.putNextEntry(new ZipEntry(normalizedTaxonId + ".n3"));
new ElmoExporter(taxonRepository).export(zos, normalizedTaxonId + ".n3");
taxonRepository.shutDown();
}
/*******************************************************************************************************************
*
* XenoCanto stuff must be read taxon by taxon because it contains internal ids which are not unique.
* TODO: have XenoCanto to export a single file, then import it once and navigate through statements to get the
* required stuff.
*
******************************************************************************************************************/
@Nonnull
private List loadXenoCantoStatements (final @Nonnull Id taxonId)
throws IOException, RepositoryException, RDFParseException
{
log.info("loadXenoCantoStatements({}", taxonId);
final String zipEntryName = "xenocanto-" + taxonId.stringValue().replace(':', '_') + ".n3";
log.debug(">>>> zip entry: {}", zipEntryName);
@Cleanup final ZipFile zipFile = new ZipFile("target/xeno-canto.zip");
final ZipEntry zipEntry = zipFile.getEntry(zipEntryName);
if (zipEntry == null)
{
log.warn(">>>> resource not found");
return Collections.emptyList();
}
@Cleanup final InputStream is = zipFile.getInputStream(zipEntry);
@Cleanup final Reader r = new InputStreamReader(is, "UTF-8");
final Repository taxonXenoCantoRepository = new SailRepository(new MemoryStore());
taxonXenoCantoRepository.initialize();
final RepositoryConnection taxonXenoCantoRepoConnection = taxonXenoCantoRepository.getConnection();
taxonXenoCantoRepoConnection.add(r, "http://foo.bar", RDFFormat.N3); // FIXME
r.close();
zipFile.close();
return taxonXenoCantoRepoConnection.getStatements(null, null, null, false).asList();
}
/*******************************************************************************************************************
*
* Initializes the repository.
*
******************************************************************************************************************/
protected void initialize()
throws RepositoryException
{
repository = new SailRepository(new MemoryStore());
repository.initialize();
final ElmoModule elmoModule = new ElmoModule();
sesameManagerFactory = new SesameManagerFactory(elmoModule, repository);
final ElmoManager em = sesameManagerFactory.createElmoManager();
ElmoManagerThreadLocal.set(em);
targetFolder.mkdirs();
}
/*******************************************************************************************************************
*
* Closes the repository.
*
******************************************************************************************************************/
protected void close()
{
ElmoManagerThreadLocal.get().close();
ElmoManagerThreadLocal.set(null);
sesameManagerFactory.close();
}
/*******************************************************************************************************************
*
* Exports a taxonomy into .n3 and .json formats.
*
* @param folder the folder where to export
* @param taxonomyName the name of the taxonomy to export
*
******************************************************************************************************************/
protected void exportTaxonomy (final @Nonnull File folder,
final @Nonnull String taxonomyName)
throws RepositoryException, IOException, RDFHandlerException, NotFoundException, JSONException
{
final Taxonomy taxonomy = Locator.find(TaxonomyManager.class).findTaxonomyByName(taxonomyName, repository);
final String fileName = taxonomy.as(Identifiable.class).getId().stringValue().replaceAll("[:,/]", "_");
taxonomy.as(Exporter.class).export(new File(folder, fileName + ".n3"));
final File jsonFile = new File(folder, fileName + ".json");
final Writer w3 = new OutputStreamWriter(new FileOutputStream(jsonFile), "UTF-8");
final TaxonomyJSONExporter exporter = new TaxonomyJSONExporter();
exporter.export(taxonomy, w3);
w3.close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy