de.uni_leipzig.asv.utils.IniReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of toolbox-utils Show documentation
Show all versions of toolbox-utils Show documentation
ASV Toolbox is a modular collection of tools for the exploration of written language data. They work either on word lists or text and solve several linguistic classification and clustering tasks. The topics covered contain language detection, POS-tagging, base form reduction, named entity recognition, and terminology extraction. On a more abstract level, the algorithms deal with various kinds of word similarity, using pattern based and statistical approaches. The collection can be used to work on large real world data sets as well as for studying the underlying algorithms. The ASV Toolbox can work on plain text files and connect to a MySQL database. While it is especially designed to work with corpora of the Leipzig Corpora Collection, it can easily be adapted to other sources.
The newest version!
/*******************************************************************************
* The MIT License (MIT)
* Copyright (c) 2007, University of Leipzig, Institut für Informatik,
* Abteilung Autmatische Sprachverarbeitung
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
package de.uni_leipzig.asv.utils;
// standard imports
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;
/**
* This class reads file for the IniFile class.
*
# Format of Inifiles should look like this:
*
# comment
*
* [PRIMARY_KEY]
* SECONDARY_KEY_1=value
* SECONDARY_KEY_2=value
*
* @author Stefan Bordag
* @date 28.12.2001
* @see de.uni_leipzig.asv.bordag.sgz.util.IniFile
* @see de.uni_leipzig.asv.bordag.sgz.util.IniWriter
*/
public class IniReader {
// actually a double Hashtable
protected Hashtable> entries = null;
// the file which was used to read the information
protected String file = null;
/**
* Default constructor hidden away to prevent creating nonsense instances
*/
private IniReader() {
}
/**
* Convenience constructor
*/
public IniReader(File file) throws FileNotFoundException {
init(file.getAbsolutePath());
}
/**
* Creates an instance of the IniReader and reads the file, thus cashing the
* data
*/
public IniReader(String fileName) throws FileNotFoundException {
init(fileName);
}
/**
* Initializes the reader
*/
private void init(String fileName) throws FileNotFoundException {
this.file = fileName;
try {
this.entries = new Hashtable>();
BufferedReader reader = null;
if (fileName != null) {
reader = new BufferedReader(new FileReader(fileName));
}
Vector fileLines = getValidLines(reader);
this.entries = fillEntries(this.entries, fileLines);
} catch (FileNotFoundException fnf) {
this.entries = new Hashtable>();
throw fnf;
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* Returns an ordered list containing only valid IniReader-valid lines, i.e.
* containing either at least one
*/
private Vector getValidLines(BufferedReader reader)
throws IOException {
Vector fileLines = new Vector();
if (reader == null) {
return fileLines;
}
String line = reader.readLine();
while (line != null) {
if (valid(line)) {
fileLines.add(line);
}
line = reader.readLine();
}
return fileLines;
}
/**
* Fills the double Hashtable
*/
private Hashtable> fillEntries(
Hashtable> table,
Vector lines) {
Hashtable temp = new Hashtable();
String line = null;
lines = reverse(lines);
for (Enumeration e = lines.elements(); e.hasMoreElements();) {
line = e.nextElement();
if (isPrimaryKey(line)) {
if (temp != null) {
table.put(line.substring(1, line.length() - 1), temp); // done
// with
// a
// primary
// key
}
temp = new Hashtable();
} else if (isSecondaryKey(line)) {
if (temp == null) {
Debugger.getInstance().println("malformed ini file.",
Debugger.MED_LEVEL);
}
temp.put(getKey(line), convertNewLines(getValue(line)));
}
}
return table;
}
private String convertNewLines(String string) {
if (string.lastIndexOf("~") > -1) {
StringTokenizer tokenizer = new StringTokenizer(string, "~");
String retVal = "";
int i = 0;
while (tokenizer.hasMoreTokens()) {
String curToken = tokenizer.nextToken();
if (i > 0) {
retVal = retVal + "\n" + curToken;
} else {
retVal = retVal + "" + curToken;
}
i++;
}
return retVal;
}
return string;
}
/**
* Reverses a Vector
*/
private Vector reverse(Vector lines) {
Vector newLines = new Vector(lines.capacity());
for (String string : lines) {
newLines.add(0, string);
}
return newLines;
}
/**
* Returns the String before the ASSIGNMENT string, removing all whitespace
* between the last char and the assignment string
*/
private String getKey(String line) {
String retVal = line.substring(0, line.indexOf(IniFile.ASSIGNMENT));
retVal = retVal.trim();
return retVal;
}
/**
* Returns the String after the ASSIGNMENT string, removing all whitespace
* between the assignment string and the first non-whitespace char
*/
private String getValue(String line) {
String retVal = line.substring(line.indexOf(IniFile.ASSIGNMENT)
+ IniFile.ASSIGNMENT.length(), line.length());
retVal = retVal.trim();
return retVal;
}
/**
* Computes whether the given line of a file is valid for IniReader or not
*/
private boolean valid(String line) {
if (line.indexOf(IniFile.COMMENT) == 0) {
return false; // it's a comment
}
if (line.indexOf(IniFile.ASSIGNMENT) != -1) // check whether it as a
// secondary key entry
{
return true; // it's a secondary key entry
}
if ((line.indexOf(IniFile.PKEY_LEFT_BRACKET) != -1) && // or a primary
(// key
line.indexOf(IniFile.PKEY_RIGHT_BRACKET) != -1)) // but that
// one
// mus thave
// both brackets
// present
{
return true; // it's a primary key
}
return false; // some other line, probably malformed
}
/**
* Returns whether the line contains a secondary key or not
*/
private boolean isSecondaryKey(String line) {
if ((line.indexOf(IniFile.PKEY_LEFT_BRACKET) != -1) && // or a primary
(// key
line.indexOf(IniFile.PKEY_RIGHT_BRACKET) != -1)) // but that
// one
// mus thave
// both brackets
// present
{
return false; // it's a primary key
}
if (line.indexOf(IniFile.ASSIGNMENT) != -1) // check whether it as a
// secondary key entry
{
return true; // it's a secondary key entry
}
return false;
}
/**
* Returns whether the line contains a primary key or not
*/
private boolean isPrimaryKey(String line) {
if ((line.indexOf(IniFile.PKEY_LEFT_BRACKET) != -1) && // or a primary
(// key
line.indexOf(IniFile.PKEY_RIGHT_BRACKET) != -1)) // but that
// one
// mus thave
// both brackets
// present
{
return true; // it's a primary key
}
return false;
}
/**
* Returns the whole Hashtable
*/
public Hashtable> getPrimaryKeys() {
return this.entries;
}
/**
* Returns a printrepresentation of this class
*/
@Override
public String toString() {
return "IniReader with filename " + this.file;
}
}