All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.tidalwave.metadata.text.NameCanonicalizer Maven / Gradle / Ivy

The newest version!
/***********************************************************************************************************************
 *
 * blueMarine Metadata - open source media workflow
 * Copyright (C) 2007-2011 by Tidalwave s.a.s. (http://www.tidalwave.it)
 *
 ***********************************************************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations under the License.
 *
 ***********************************************************************************************************************
 *
 * WWW: http://bluemarine.tidalwave.it
 * SCM: https://kenai.com/hg/bluemarine~metadata-src
 *
 **********************************************************************************************************************/
package it.tidalwave.metadata.text;

import javax.annotation.Nonnull;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import it.tidalwave.util.logging.Logger;
import java.util.Locale;

/*******************************************************************************
 *
 * @author  fritz
 * @version $Id$
 *
 ******************************************************************************/
public class NameCanonicalizer
  {
    private final static String CLASS = NameCanonicalizer.class.getName();
    private final static Logger logger = Logger.getLogger(CLASS);
    
    private static final List STRIP_FROM_CAMERA_MAKER = 
            Arrays.asList("CORPORATION", "CAMERA", "Camera", "COMPANY", "Company", "COMPUTER", "Computer", "Corporation", "CORP", "IMAGING", 
            "INC", "Inc", "Co", "CO", "Ltd", "LTD", ",", "\\.",
            "OPTICAL", "Optical");
         
    private static final List STRIP_FROM_CAMERA_MODEL = 
            Arrays.asList("HP", "KODAK", "Konica", "NIKON", "Nikon", "PENTAX");
          
    private Map nameMap = new HashMap();

    private final static NameCanonicalizer makerCanonicalizer = new NameCanonicalizer("Maker.properties");
    private final static NameCanonicalizer cameraModelCanonicalizer = new NameCanonicalizer("CameraModel.properties");
    
    /*******************************************************************************
     * 
     * @param fileName
     * 
     *******************************************************************************/
    public NameCanonicalizer (@Nonnull final String fileName)
      {
        BufferedReader br = null;

        try
          {
            final InputStream is = NameCanonicalizer.class.getResourceAsStream(fileName);
            br = new BufferedReader(new InputStreamReader(is));

            for (;;)
              {
                String string = br.readLine();

                if (string == null)
                  {
                    break;
                  }

                string = string.trim();

                if (string.equals("") || string.startsWith("#"))
                  {
                    continue;
                  }

                final int i = string.indexOf(':');
                final String name = string.substring(0, i).trim();
                final String normalizedName = string.substring(i + 1).trim();
                nameMap.put(name, normalizedName);
              }

            logger.info("Normalization map: " + nameMap);
          }
        catch (IOException e)
          {
            logger.throwing(CLASS, "ctor", e);
          }
        finally
          {
            if (br != null)
              {
                try
                  {
                    br.close();
                  }
                catch (IOException e)
                  {
                    logger.throwing(CLASS, "ctor", e);
                  }
              }
          }
      }

    /*******************************************************************************
     * 
     * @param name
     * @return
     * 
     *******************************************************************************/
    @Nonnull
    public String normalized (@Nonnull final String name)
      {
        String normalized = nameMap.get(name.trim());
        normalized = (normalized != null) ? normalized : name.replace("-", " ");
        logger.finest("name normalization: " + name + " -> " + normalized);
        
        return normalized;
      }
    
    /*******************************************************************************
     * 
     * @param name
     * @return
     * 
     *******************************************************************************/
    @Nonnull
    public static String canonicalCameraModel (@Nonnull final String name)
      {
        String result = name;
        
        for (final String s : STRIP_FROM_CAMERA_MODEL)
          {
            result = result.replaceAll(s, "");
            result = result.replaceAll("  ", " ").trim();
          }
        
        return cameraModelCanonicalizer.normalized(result);
      }
    
    /*******************************************************************************
     * 
     * @param name
     * @return
     * 
     *******************************************************************************/
    @Nonnull
    public static String canonicalMaker (@Nonnull String name)
      {
        String result = name;
        
        for (final String s : STRIP_FROM_CAMERA_MAKER)
          {
            result = result.replaceAll(s, "");
            result = result.replaceAll("  ", " ").trim();
          }
        
        final StringBuilder builder = new StringBuilder();
        
        for (final String s : result.split("[ -]"))
          {
            if (s.length() > 0)
              {
                if (builder.length() > 0)
                  {
                    builder.append(" ");
                  }

                builder.append(s.substring(0, 1).toUpperCase(Locale.getDefault()));

                if (s.length() > 1) 
                  {
                    builder.append(s.substring(1).toLowerCase(Locale.getDefault()));
                  };
              }
          }
        
        return makerCanonicalizer.normalized(builder.toString());
      }
  }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy