All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.analysis.sinks.DateRecognizerSinkTokenizer Maven / Gradle / Ivy

There is a newer version: 3.6.2
Show newest version
package org.apache.lucene.analysis.sinks;
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.analysis.SinkTokenizer;
import org.apache.lucene.analysis.Token;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.text.ParseException;
import java.util.List;
import java.util.Date;


/**
 * Attempts to parse the {@link org.apache.lucene.analysis.Token#termBuffer()} as a Date using a {@link java.text.DateFormat}.
 * If the value is a Date, it will add it to the sink.
 * 

* Also marks the sink token with {@link org.apache.lucene.analysis.Token#type()} equal to {@link #DATE_TYPE} * * **/ public class DateRecognizerSinkTokenizer extends SinkTokenizer { public static final String DATE_TYPE = "date"; protected DateFormat dateFormat; /** * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object. */ public DateRecognizerSinkTokenizer() { this(null, SimpleDateFormat.getDateInstance()); } public DateRecognizerSinkTokenizer(DateFormat dateFormat) { this(null, dateFormat); } /** * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object. * @param input The input list of Tokens that are already Dates. They should be marked as type {@link #DATE_TYPE} for completeness */ public DateRecognizerSinkTokenizer(List/**/ input) { this(input, SimpleDateFormat.getDateInstance()); } /** * * @param input * @param dateFormat The date format to use to try and parse the date. Note, this SinkTokenizer makes no attempt to synchronize the DateFormat object */ public DateRecognizerSinkTokenizer(List/**/ input, DateFormat dateFormat) { super(input); this.dateFormat = dateFormat; } public void add(Token t) { //Check to see if this token is a date if (t != null) { try { Date date = dateFormat.parse(t.term());//We don't care about the date, just that we can parse it as a date if (date != null) { t.setType(DATE_TYPE); super.add(t); } } catch (ParseException e) { } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy