All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unimi.dsi.io.DelimitedWordReader Maven / Gradle / Ivy

Go to download

Blazegraph Modifications to the DSI utils. This are forked from version 1.10.0 under LGPLv2.1.

There is a newer version: 2.1.4
Show newest version
package it.unimi.dsi.io;

/*		 
 * DSI utilities
 *
 * Copyright (C) 2009 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 2.1 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */


import it.unimi.dsi.fastutil.Hash;
import it.unimi.dsi.fastutil.chars.CharOpenHashSet;
import it.unimi.dsi.fastutil.chars.CharSet;
import it.unimi.dsi.fastutil.chars.CharSets;
import it.unimi.dsi.lang.MutableString;

import java.io.Reader;

/** A word reader that breaks words on a given set of characters.
 * 
 * 

This class is a simple subclass of {@link FastBufferedReader}. It * overwrites {@link #isWordConstituent(char)} so that word constituents * are defined negatively by a set of delimiters defined at construction time. * There is a {@link DelimitedWordReader#DelimitedWordReader(String) constructor * accepting the delimiter set as a string}. Note that LF and CR are always considered to be delimiters. * */ public class DelimitedWordReader extends FastBufferedReader { private static final long serialVersionUID = 1L; /** The set of delimiters used to break the character stream into words. */ private final CharOpenHashSet delimiters; @Override protected boolean isWordConstituent( char c ) { return ! delimiters.contains( c ); } private void addCrLf() { delimiters.add( '\n' ); delimiters.add( '\r' ); } /** Creates a new delimited word reader with a given buffer size and set of delimiters. * The wrapped reader will have to be set later using {@link #setReader(Reader)}. * * @param bufferSize the size in bytes of the internal buffer. * @param delimiters a set of characters that will be considered word delimiters. */ public DelimitedWordReader( int bufferSize, CharSet delimiters ) { super( bufferSize ); this.delimiters = new CharOpenHashSet( delimiters, Hash.VERY_FAST_LOAD_FACTOR ); addCrLf(); } /** Creates a new delimited word reader with a buffer of {@link #DEFAULT_BUFFER_SIZE} characters. * The wrapped reader will have to be set later using {@link #setReader(Reader)}. * @param delimiters a set of characters that will be considered word delimiters. */ public DelimitedWordReader( CharSet delimiters ) { this.delimiters = new CharOpenHashSet( delimiters, Hash.VERY_FAST_LOAD_FACTOR ); addCrLf(); } /** Creates a new delimited word reader with a buffer of {@link #DEFAULT_BUFFER_SIZE} characters. * The wrapped reader will have to be set later using {@link #setReader(Reader)}. * @param delimiters a set of characters that will be considered word delimiters, specified as a string. */ public DelimitedWordReader( String delimiters ) { this( new CharOpenHashSet( delimiters.toCharArray() ) ); } /** Creates a new delimited word reader with a given buffer size and set of delimiters. * The wrapped reader will have to be set later using {@link #setReader(Reader)}. * * @param bufferSize the size in bytes of the internal buffer, specified as a string. * @param delimiters a set of characters that will be considered word delimiters, specified as a string. */ public DelimitedWordReader( String bufferSize, String delimiters ) { this( Integer.parseInt( bufferSize ), new CharOpenHashSet( delimiters.toCharArray() ) ); } /** Creates a new delimited word reader by wrapping a given reader with a given buffer size and using a set of delimiters. * * @param r a reader to wrap. * @param bufferSize the size in bytes of the internal buffer. * @param delimiters a set of characters that will be considered word delimiters. */ public DelimitedWordReader( Reader r, int bufferSize, CharSet delimiters ) { super( r, bufferSize ); this.delimiters = new CharOpenHashSet( delimiters, Hash.VERY_FAST_LOAD_FACTOR ); addCrLf(); } /** Creates a new delimited word reader by wrapping a given reader with a buffer of {@link #DEFAULT_BUFFER_SIZE} characters using a given set of delimiters. * * @param r a reader to wrap. * @param delimiters a set of characters that will be considered word delimiters. */ public DelimitedWordReader( Reader r, CharSet delimiters ) { super( r ); this.delimiters = new CharOpenHashSet( delimiters, Hash.VERY_FAST_LOAD_FACTOR ); addCrLf(); } /** Creates a new delimited word reader by wrapping a given fragment of a character array and using a set delimiters. * *

The effect of {@link #setReader(Reader)} on a buffer created with * this constructor is undefined. * * @param array the array that will be wrapped by the reader. * @param offset the first character to be used. * @param length the number of character to be used. * @param delimiters a set of characters that will be considered word delimiters. */ public DelimitedWordReader( char[] array, int offset, int length, CharSet delimiters ) { super( array, offset, length ); this.delimiters = new CharOpenHashSet( delimiters, Hash.VERY_FAST_LOAD_FACTOR ); addCrLf(); } /** Creates a new delimited word reader by wrapping a given character array and using a set delimiters. * *

The effect of {@link #setReader(Reader)} on a buffer created with * this constructor is undefined. * * @param array the array that will be wrapped by the reader. * @param delimiters a set of characters that will be considered word delimiters. */ public DelimitedWordReader( char[] array, CharSet delimiters ) { super( array ); this.delimiters = new CharOpenHashSet( delimiters, Hash.VERY_FAST_LOAD_FACTOR ); addCrLf(); } /** Creates a new delimited word reader by wrapping a given mutable string and using a set of delimiters. * *

The effect of {@link #setReader(Reader)} on a buffer created with * this constructor is undefined. * * @param s the mutable string that will be wrapped by the reader. * @param delimiters a set of characters that will be considered word delimiters. */ public DelimitedWordReader( MutableString s, CharSet delimiters ) { super( s, CharSets.EMPTY_SET ); this.delimiters = new CharOpenHashSet( delimiters, Hash.VERY_FAST_LOAD_FACTOR ); addCrLf(); } public String toSpec() { return toString(); } public String toString() { final String className = getClass().getName(); CharOpenHashSet additionalDelimiters = (CharOpenHashSet)delimiters.clone(); additionalDelimiters.remove( '\n' ); additionalDelimiters.remove( '\r' ); String delimiters = new String( additionalDelimiters.toCharArray() ); if ( bufferSize == DEFAULT_BUFFER_SIZE ) return className + "(\"" + delimiters + "\")"; return className + "(" + bufferSize + ",\"" + delimiters + "\")"; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy