All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.tidalwave.bluebill.taxonomy.birds.clements.Clements2008Importer Maven / Gradle / Ivy

The newest version!
/***********************************************************************************************************************
 *
 * blueBill Resources - open source birding
 * Copyright (C) 2009-2011 by Tidalwave s.a.s. (http://www.tidalwave.it)
 *
 ***********************************************************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations under the License.
 *
 ***********************************************************************************************************************
 *
 * WWW: http://bluebill.tidalwave.it
 * SCM: https://java.net/hg/bluebill~resources-src
 *
 **********************************************************************************************************************/
package it.tidalwave.bluebill.taxonomy.birds.clements;

import it.tidalwave.role.LocalizedDisplayable;
import it.tidalwave.util.logging.Logger;
import javax.annotation.Nonnull;
import java.util.Scanner;
import java.util.regex.MatchResult;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.openrdf.repository.Repository;
import it.tidalwave.bluebill.taxonomy.Taxonomy;
import it.tidalwave.bluebill.taxonomy.Taxon;
import it.tidalwave.bluebill.taxonomy.birds.BirdTaxonomyImporter;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;
import static it.tidalwave.bluebill.taxonomy.elmo.ElmoTaxonomyVocabulary.*;

/***********************************************************************************************************************
 *
 * @author  Fabrizio Giudici
 * @version $Id$
 *
 **********************************************************************************************************************/
public class Clements2008Importer extends BirdTaxonomyImporter
  {
    private static final String CLASS = Clements2008Importer.class.getName();
    private static final Logger logger = Logger.getLogger(CLASS);

    private static final String[] LANGUAGES = new String[] { "en" };

    /*******************************************************************************************************************
     *
     *
     ******************************************************************************************************************/
    @Nonnull
    public Taxonomy run (final @Nonnull Repository repository,
                         final @Nonnull String taxonomyName)
      throws Exception
      {
        init(repository);

        final String idPrefix = ID_TAXONOMY_PREFIX + "Clements/6.3.2";
        final Taxonomy taxonomy = createTaxonomy(repository, idPrefix, Clements2008Importer.class, LANGUAGES);
        final Taxon aves = createAves(taxonomy, idPrefix);

        final InputStream is = new GZIPInputStream(getClass().getResourceAsStream(sourceResourceName));
        final BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));

        br.readLine();
        br.readLine();
        br.readLine();

        String previousEnglishName = "";

        for (;;)
          {
            final String s = br.readLine();

            if (s == null)
              {
                break;
              }

//            System.err.println(s);
            final Scanner scanner = new Scanner(s).useDelimiter("\\$");
            final int sort = scanner.nextInt();
            scanner.next(); // page
            final String categoryName = scanner.next();

            final String scientificName = scanner.next();
            String englishName = scanner.next();

            if ("".equals(englishName))
              {
                englishName = previousEnglishName;
              }

            if (!categoryName.equals("species") && !categoryName.equals("subspecies") && !categoryName.equals("group"))
              {
                continue;
              }

            final String range = scanner.next(); // range

            final String orderName = scanner.next();
            String familyNameWithEnglishName = scanner.next();

            if (familyNameWithEnglishName.equals("Tyranni Incertae Sedis"))
              {
                familyNameWithEnglishName += " (" + familyNameWithEnglishName + ")";
              }

            final Scanner scanner2 = new Scanner(scientificName).useDelimiter(" ");
            final String genusName = scanner2.next();
            final String speciesName = scanner2.next();
            final String subSpeciesName = scanner2.hasNext() ? scanner2.next() : null;

            final Scanner scanner3 = new Scanner(familyNameWithEnglishName);
            scanner3.findInLine("(\\w*) \\((\\w*)"); // e.g. "Struthionidae (Ostrich)"

            MatchResult result = null;
            try
              {
                result = scanner3.match();
              }
            catch (IllegalStateException e)
              {
                throw new RuntimeException(familyNameWithEnglishName, e);
              }
            final String familyName = result.group(1);
            final String familyEnglishName = result.group(2);

            logger.info("%d %s %s %s %s %s %s %s", sort, categoryName, englishName, orderName, familyName, genusName, speciesName, subSpeciesName);

            //
            // FIXME: some names contains / and note between [...] - I don't know how to handle them
            //
            if ((subSpeciesName != null) && (subSpeciesName.contains("/") || subSpeciesName.contains("[")))
              {
                brokenSpecies.add(scientificName + " / " + englishName);
                continue;
              }

            previousEnglishName = englishName;
            final LocalizedDisplayable familyCommonNames  = createDisplayable("en", familyEnglishName);
            final LocalizedDisplayable speciesCommonNames = createDisplayable("en", englishName);
            createPath(aves, idPrefix, orderName, familyName, genusName, speciesName, subSpeciesName, familyCommonNames, speciesCommonNames);
          }

        br.close();
        taxonUniqueIdManager.close();

        return taxonomy;
      }
  }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy