All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.tidalwave.bluebill.mobile.resources.ResourcesGenerator Maven / Gradle / Ivy

The newest version!
/***********************************************************************************************************************
 *
 * blueBill Resources - open source birding
 * Copyright (C) 2009-2011 by Tidalwave s.a.s. (http://www.tidalwave.it)
 *
 ***********************************************************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations under the License.
 *
 ***********************************************************************************************************************
 *
 * WWW: http://bluebill.tidalwave.it
 * SCM: https://java.net/hg/bluebill~resources-src
 *
 **********************************************************************************************************************/
package it.tidalwave.bluebill.mobile.resources;

import javax.annotation.Nonnull;
import java.util.Collections;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import it.tidalwave.util.Id;
import org.json.me.JSONException;
import org.openrdf.model.Statement;
import org.openrdf.model.ValueFactory;
import org.openrdf.repository.RepositoryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.sail.memory.MemoryStore;
import org.openrdf.elmo.ElmoManager;
import org.openrdf.elmo.ElmoModule;
import org.openrdf.elmo.sesame.SesameManagerFactory;
import it.tidalwave.util.NotFoundException;
import it.tidalwave.netbeans.util.Locator;
import it.tidalwave.openrdf.elmo.ElmoManagerThreadLocal;
import it.tidalwave.bluebill.taxonomy.Taxonomy;
import it.tidalwave.bluebill.taxonomy.TaxonomyManager;
import it.tidalwave.bluebill.taxonomy.elmo.impl.ElmoExporter;
import it.tidalwave.bluebill.taxonomy.io.Exporter;
import it.tidalwave.bluebill.taxonomy.io.Importer;
import it.tidalwave.role.Identifiable;
import lombok.Cleanup;
import lombok.extern.slf4j.Slf4j;
import static it.tidalwave.bluebill.taxonomy.elmo.ElmoTaxonomyVocabulary.*;
import static it.tidalwave.bluebill.mobile.resources.RdfUtils.*;

/***********************************************************************************************************************
 *
 * @author  Fabrizio Giudici
 * @version $Id$
 *
 **********************************************************************************************************************/
@Slf4j
public class ResourcesGenerator
  {
    private static final Id COL_ID = new Id("http://bluebill.tidalwave.it/taxonomy/CatalogueOfLife");
        
    protected final File resourcesFolder;

    protected final File targetFolder;

    protected SesameManagerFactory sesameManagerFactory;

    protected Repository repository;

    private int count = 0;

    /*******************************************************************************************************************
     *
     *
     ******************************************************************************************************************/
    public static void main (final @Nonnull String ... args)
      throws RepositoryException, IOException, RDFParseException, RDFHandlerException, NotFoundException, JSONException
      {
        final ResourcesGenerator resourceGenerator = new ResourcesGenerator(args);
        resourceGenerator.loadTaxonomies();
        resourceGenerator.createFachSheetsZip();
      }

    /*******************************************************************************************************************
     *
     *
     ******************************************************************************************************************/
    private ResourcesGenerator (final @Nonnull String ... args)
      throws IOException
      {
        final String baseDir = args[0];
        resourcesFolder = new File(baseDir + "/target/");
        targetFolder = new File(baseDir + "/target/");
      }

    /*******************************************************************************************************************
     *
     * Loads all the taxonomies into the repository.
     *
     ******************************************************************************************************************/
    private void loadTaxonomies()
      throws RepositoryException, IOException, RDFParseException, RDFHandlerException, NotFoundException, JSONException
      {
        log.info("loadTaxonomies()");
        final Importer importer = Locator.find(TaxonomyManager.class).as(Importer.class);

        initialize();
        importer.import_(repository, new File(targetFolder, "AOU7th.rdf"));
        exportTaxonomy(targetFolder, "AOU 7th edition");
        close();

        initialize();
        importer.import_(repository, new File(targetFolder, "BirdsOfIndia2009.rdf"));
        exportTaxonomy(targetFolder, "Birds of India 2009");
        close();

        initialize();
        importer.import_(repository, new File(targetFolder, "EBNItalia2003.rdf"));
        exportTaxonomy(targetFolder, "EBN Italia 2003");
        close();

        initialize();
        importer.import_(repository, new File(targetFolder, "EBNItalia2011.rdf"));
        exportTaxonomy(targetFolder, "EBN Italia 2011");
        close();

        initialize();
        importer.import_(repository, new File(targetFolder, "RobertsVII.rdf"));
        exportTaxonomy(targetFolder, "Roberts VII Edition");
        close();
      }

    /*******************************************************************************************************************
     *
     *
     ******************************************************************************************************************/
    private void createFachSheetsZip()
      throws RepositoryException, IOException, RDFParseException, RDFHandlerException, NotFoundException, JSONException
      {
        log.info("createFactSheetsZip()");
        final Importer importer = Locator.find(TaxonomyManager.class).as(Importer.class);
        
        initialize();
        importer.import_(repository, new File(targetFolder, "AOU7th.rdf"));
        importer.import_(repository, new File(targetFolder, "BirdsOfIndia2009.rdf"));
        importer.import_(repository, new File(targetFolder, "EBNItalia2003.rdf"));
        importer.import_(repository, new File(targetFolder, "EBNItalia2011.rdf"));
        importer.import_(repository, new File(targetFolder, "RobertsVII.rdf"));
        importer.import_(repository, new File(targetFolder, "ITIS.rdf"));
        importer.import_(repository, new File(targetFolder, "Wikipedia.rdf"));
        importer.import_(repository, new File(targetFolder, "WikipediaPhotos.rdf"));

        @Cleanup final ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(new File(targetFolder, "factsheets.zip")));

        for (final Id id : getTaxaIds())
          {
            createTaxonRepository(zos, id);
          }
        
        zos.close();
        
        createTaxonomyCatalog();
        
//        new ElmoExporter(repository).export(new File(targetFolder, "Merged.n3"));
        close();
      }
    
    /*******************************************************************************************************************
     *
     * 
     ******************************************************************************************************************/
    private void createTaxonomyCatalog()
      throws RepositoryException, IOException, RDFParseException, RDFHandlerException, NotFoundException, JSONException
      {
        final Repository taxonomyCatalogRepository = new SailRepository(new MemoryStore());
        taxonomyCatalogRepository.initialize();
        final RepositoryConnection catalogConnection = taxonomyCatalogRepository.getConnection();
        final RepositoryConnection repoConnection = repository.getConnection();
        
        final ValueFactory valueFactory = repoConnection.getValueFactory();
        
        for (final Id taxonomyId : getTaxonomyIds())
          {
            if (!taxonomyId.equals(COL_ID))
              {
                addStatements(catalogConnection, findStatementsWithSubject(repoConnection, taxonomyId));
              }
          }

        catalogConnection.commit();
        catalogConnection.close();
        repoConnection.close();
        new ElmoExporter(taxonomyCatalogRepository).export(new File(targetFolder, "taxonomies.n3"));
      }

    /*******************************************************************************************************************
     *
     * Returns all the ids of taxonomies imported into the repository.
     * 
     * @return  the ids
     * 
     ******************************************************************************************************************/
    @Nonnull
    private SortedSet getTaxonomyIds()
      throws RepositoryException
      {
        log.debug("getTaxonomyIds()");
        final RepositoryConnection conn = repository.getConnection();
        final ValueFactory valueFactory = conn.getValueFactory();
        final RepositoryResult statements = conn.getStatements(null, null, valueFactory.createURI("http://bluebill.tidalwave.it/rdf/taxo/2010/08/28#TaxonomyScheme"), false);
        final SortedSet ids = new TreeSet();

        while (statements.hasNext())
          {
            ids.add(new Id(statements.next().getSubject().stringValue()));
          }

        conn.close();

        return ids;
      }

    /*******************************************************************************************************************
     *
     * Returns all the ids of species imported into the repository.
     * 
     * @return  the ids
     * 
     * FIXME: should only return species?
     *
     ******************************************************************************************************************/
    @Nonnull
    private SortedSet getTaxaIds()
      throws RepositoryException
      {
        log.debug("getTaxaIds()");
//        return new TreeSet(Arrays.asList(new Id("urn:lsid:catalogueoflife.org:taxon:eff82234-29c1-102b-9a4a-00304854f820:ac2010")));
        final RepositoryConnection conn = repository.getConnection();
        final ValueFactory valueFactory = conn.getValueFactory();
        final RepositoryResult statements = conn.getStatements(null, valueFactory.createURI(URI_SCIENTIFIC_NAME_ID), null, false);
        final SortedSet ids = new TreeSet();

        while (statements.hasNext())
          {
            ids.add(new Id(statements.next().getObject().stringValue()));
          }

        conn.close();

        return ids;
      }

    /*******************************************************************************************************************
     *
     * Creates a repository with all the information for the given taxon and adds it to the resources .zip as a .n3
     * file.
     * 
     * @param  zos       the .zip resource
     * @param  taxonId   the id of the taxon
     *
     ******************************************************************************************************************/
    private void createTaxonRepository (final @Nonnull ZipOutputStream zos, final @Nonnull Id taxonId)
      throws RDFHandlerException, NotFoundException, JSONException, IOException, RepositoryException, RDFParseException
      {
        log.info("crateTaxonRepository({})", taxonId);
        
        final Repository taxonRepository = new SailRepository(new MemoryStore());
        taxonRepository.initialize();
        final RepositoryConnection taxonRepoConnection = taxonRepository.getConnection();
        final RepositoryConnection repoConnection = repository.getConnection();

        for (final Id subjectId : findAllRelevantSubjectIds(taxonId, repoConnection))
          {
            addStatements(taxonRepoConnection, findStatementsWithSubject(repoConnection, subjectId));
          }

        addStatements(taxonRepoConnection, loadXenoCantoStatements(taxonId));                
        
        taxonRepoConnection.commit();
        taxonRepoConnection.close();
        repoConnection.close();
        
        final String normalizedTaxonId = taxonId.stringValue().replace(':', '_');
        log.info(">>>> generating {}: {}...", ++count, normalizedTaxonId + ".n3");
        zos.putNextEntry(new ZipEntry(normalizedTaxonId + ".n3"));
        new ElmoExporter(taxonRepository).export(zos, normalizedTaxonId + ".n3");
        taxonRepository.shutDown();
      }
    
    /*******************************************************************************************************************
     *
     * XenoCanto stuff must be read taxon by taxon because it contains internal ids which are not unique.
     * TODO: have XenoCanto to export a single file, then import it once and navigate through statements to get the
     * required stuff.
     *
     ******************************************************************************************************************/
    @Nonnull
    private List loadXenoCantoStatements (final @Nonnull Id taxonId) 
      throws IOException, RepositoryException, RDFParseException
      { 
        log.info("loadXenoCantoStatements({}", taxonId);
        final String zipEntryName = "xenocanto-" + taxonId.stringValue().replace(':', '_') + ".n3";
        log.debug(">>>> zip entry: {}", zipEntryName);
        @Cleanup final ZipFile zipFile = new ZipFile("target/xeno-canto.zip");
        final ZipEntry zipEntry = zipFile.getEntry(zipEntryName);
        
        if (zipEntry == null)
          {
            log.warn(">>>> resource not found");
            return Collections.emptyList();
          }
        
        @Cleanup final InputStream is = zipFile.getInputStream(zipEntry);
        @Cleanup final Reader r = new InputStreamReader(is, "UTF-8");
        
        final Repository taxonXenoCantoRepository = new SailRepository(new MemoryStore());
        taxonXenoCantoRepository.initialize();
        final RepositoryConnection taxonXenoCantoRepoConnection = taxonXenoCantoRepository.getConnection();        
        taxonXenoCantoRepoConnection.add(r, "http://foo.bar", RDFFormat.N3); // FIXME
        r.close();
        zipFile.close();
        
        return taxonXenoCantoRepoConnection.getStatements(null, null, null, false).asList();
      }

    /*******************************************************************************************************************
     *
     * Initializes the repository.
     *
     ******************************************************************************************************************/
    protected void initialize()
      throws RepositoryException
      {
        repository = new SailRepository(new MemoryStore());
        repository.initialize();
        final ElmoModule elmoModule = new ElmoModule();
        sesameManagerFactory = new SesameManagerFactory(elmoModule, repository);
        final ElmoManager em = sesameManagerFactory.createElmoManager();
        ElmoManagerThreadLocal.set(em);
        targetFolder.mkdirs();
      }

    /*******************************************************************************************************************
     *
     * Closes the repository.
     *
     ******************************************************************************************************************/
    protected void close()
      {
        ElmoManagerThreadLocal.get().close();
        ElmoManagerThreadLocal.set(null);
        sesameManagerFactory.close();
      }

    /*******************************************************************************************************************
     *
     * Exports a taxonomy into .n3 and .json formats.
     * 
     * @param  folder        the folder where to export
     * @param  taxonomyName  the name of the taxonomy to export
     *
     ******************************************************************************************************************/
    protected void exportTaxonomy (final @Nonnull File folder,
                                   final @Nonnull String taxonomyName)
      throws RepositoryException, IOException, RDFHandlerException, NotFoundException, JSONException
      {
        final Taxonomy taxonomy = Locator.find(TaxonomyManager.class).findTaxonomyByName(taxonomyName, repository);
        final String fileName = taxonomy.as(Identifiable.class).getId().stringValue().replaceAll("[:,/]", "_");
        taxonomy.as(Exporter.class).export(new File(folder, fileName + ".n3"));

        final File jsonFile = new File(folder, fileName + ".json");
        final Writer w3 = new OutputStreamWriter(new FileOutputStream(jsonFile), "UTF-8");
        final TaxonomyJSONExporter exporter = new TaxonomyJSONExporter();
        exporter.export(taxonomy, w3);
        w3.close();
      }
  }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy