org.daisy.braille.utils.pef.TextInputDetector Maven / Gradle / Ivy
/*
* Braille Utils (C) 2010-2011 Daisy Consortium
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.daisy.braille.utils.pef;
import org.daisy.dotify.api.factory.FactoryProperties;
import org.daisy.dotify.api.table.BrailleConstants;
import org.daisy.dotify.api.table.BrailleConverter;
import org.daisy.dotify.api.table.Table;
import org.daisy.dotify.api.table.TableCatalogService;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Provides a method for detecting a table based on text input.
*
* @author Joel Håkansson
*/
public class TextInputDetector {
private final TableCatalogService factory;
/**
* Creates a new TextInputDetector.
*
* @param factory the table catalog
*/
public TextInputDetector(TableCatalogService factory) {
this.factory = factory;
}
private BitSet analyze(InputStream is) throws IOException {
BitSet set = new BitSet(256);
int val;
while ((val = is.read()) > -1) {
set.set(val);
}
is.close();
return set;
}
private BitSet getTableThumbprint(BrailleConverter c) throws IOException {
ByteArrayInputStream bis = new ByteArrayInputStream(
c.toText(BrailleConstants.BRAILLE_PATTERNS_256).getBytes(c.getPreferredCharset().name())
);
return analyze(bis);
}
/**
* @param input
* @param clearCodes set to true to clear the LF, CR, FF, and SUB fields.
* @return returns a thumb print for the input stream
* @throws InputDetectionException
*/
private BitSet readInput(InputStream input, boolean clearCodes) throws IOException {
BitSet inputThumbprint;
inputThumbprint = analyze(input);
if (clearCodes) {
inputThumbprint.clear(0x0a); //LF
inputThumbprint.clear(0x0c); //FF
inputThumbprint.clear(0x0d); //CR
inputThumbprint.clear(0x1a); //SUB
}
return inputThumbprint;
}
private Map> analyzeTableCatalog(boolean eightDot) {
Logger logger = Logger.getLogger(TextHandler.class.getCanonicalName());
Map> tables = new HashMap<>();
BitSet tableThumbprint;
for (FactoryProperties fp : factory.list()) {
try {
Table type = factory.newTable(fp.getIdentifier());
BrailleConverter c = type.newBrailleConverter();
if (!eightDot && c.supportsEightDot()) {
continue;
}
tableThumbprint = getTableThumbprint(c);
Map t = tables.get(tableThumbprint);
String sample = c.toText(BrailleConstants.BRAILLE_PATTERNS_256);
if (t == null) {
t = new HashMap<>();
t.put(sample, type);
tables.put(tableThumbprint, t);
} else {
Table x = t.get(sample);
if (x != null) {
logger.fine(
"Ignoring " + type.getDisplayName() + " since it is identical to " + x.getDisplayName()
);
} else {
logger.fine(type.getDisplayName() + " has the same bit set as another table.");
t.put(sample, type);
}
}
} catch (IOException e) {
logger.log(Level.WARNING, "Could not read table thumbprint", e);
}
}
return tables;
}
/**
* Detects tables matching the supplied text input. Only tables that create
* non-identical output are included. Therefore, more than one match
* indicates that the output would look different depending on the
* table chosen.
*
* @param is text input stream
* @return returns a list of matching tables
* @throws IOException if input cannot be read
*/
public List detect(InputStream is) throws IOException {
BitSet inputThumbprint = readInput(is, true);
List res = new ArrayList();
//BitSet tableThumbprint;
int size = 0;
for (int i = inputThumbprint.nextSetBit(0); i >= 0; i = inputThumbprint.nextSetBit(i + 1)) {
size++;
}
Logger logger = Logger.getLogger(TextHandler.class.getCanonicalName());
Map> tables = analyzeTableCatalog(size > (64 - 4));
// 6-dot minus cleared codes
for (BitSet key : tables.keySet()) {
BitSet tableThumbprint = (BitSet) key.clone();
tableThumbprint.and(inputThumbprint);
if (tableThumbprint.equals(inputThumbprint)) {
Map yy = tables.get(key);
if (yy != null) {
Collection coll = yy.values();
StringBuilder sb = new StringBuilder();
for (Table t : coll) {
sb.append(" '" + t.getDisplayName() + "'");
}
if (coll.size() > 1) {
// there are more than one table with the same bit set producing different text results
logger.warning(
"These (matching) tables uses the same bytes for different braille characters:" +
sb.toString()
);
}
logger.fine("Input matches table(s):" + sb.toString());
res.addAll(coll);
}
}
}
return res;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy