All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.dependencycheck.data.lucene.AlphaNumericFilter Maven / Gradle / Ivy

/*
 * This file is part of dependency-check-core.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Copyright (c) 2017 Jeremy Long. All Rights Reserved.
 */
package org.owasp.dependencycheck.data.lucene;

import java.io.IOException;
import java.util.ArrayDeque;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

/**
 * A simple alphanumeric filter that removes non-alphanumeric characters from
 * the terms. If a term contains a non-alphanumeric character it may be split
 * into multiple terms:
 *
 * 
 * 
 * 
 * 
 * 
 * 
termresults in
bobbob
bob-catbob cat
#$%[skipped]
* * @author jeremy long */ public final class AlphaNumericFilter extends AbstractTokenizingFilter { /** * The position increment attribute. */ private final PositionIncrementAttribute posIncrementAttribute = addAttribute(PositionIncrementAttribute.class); /** * Used to count the number of terms skipped as they were only made up of * special characters. */ private int skipCounter; /** * Constructs a new AlphaNumericFilter. * * @param stream the TokenStream that this filter will process */ public AlphaNumericFilter(TokenStream stream) { super(stream); } /** * {@inheritDoc} */ @Override public boolean incrementToken() throws IOException { final ArrayDeque tokens = getTokens(); final CharTermAttribute termAtt = getTermAtt(); if (tokens.isEmpty()) { String[] parts; skipCounter = 0; while (input.incrementToken()) { final String text = new String(termAtt.buffer(), 0, termAtt.length()); if (text.isEmpty()) { return true; } parts = text.split("[^a-zA-Z0-9]"); if (parts.length == 0) { skipCounter += posIncrementAttribute.getPositionIncrement(); } else { if (skipCounter != 0) { posIncrementAttribute.setPositionIncrement(posIncrementAttribute.getPositionIncrement() + skipCounter); } for (String part : parts) { if (!part.isEmpty()) { tokens.add(part); } } break; } } } return addTerm(); } /** * {@inheritDoc} */ @Override public void reset() throws IOException { super.reset(); skipCounter = 0; } /** * {@inheritDoc} */ @Override public void end() throws IOException { super.end(); posIncrementAttribute.setPositionIncrement(posIncrementAttribute.getPositionIncrement() + skipCounter); } /** * {@inheritDoc} */ @Override public int hashCode() { return new HashCodeBuilder(13, 103) .appendSuper(super.hashCode()) .append(posIncrementAttribute) .append(skipCounter) .build(); } /** * {@inheritDoc} */ @Override public boolean equals(Object obj) { if (obj == null || !(obj instanceof AlphaNumericFilter)) { return false; } if (this == obj) { return true; } final AlphaNumericFilter rhs = (AlphaNumericFilter) obj; return new EqualsBuilder() .appendSuper(super.equals(obj)) .append(skipCounter, rhs.skipCounter) .append(posIncrementAttribute, rhs.posIncrementAttribute) .isEquals(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy