eu.interedition.collatex.simple.SimpleWitness Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of collatex-core Show documentation
Show all versions of collatex-core Show documentation
A Java library for collating textual sources, for example, to produce an apparatus.
/*
* Copyright (c) 2015 The Interedition Development Group.
*
* This file is part of CollateX.
*
* CollateX is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CollateX is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CollateX. If not, see .
*/
package eu.interedition.collatex.simple;
import eu.interedition.collatex.Token;
import eu.interedition.collatex.Witness;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class SimpleWitness implements Iterable, Witness, Comparator {
private final String sigil;
private final List tokens = new ArrayList<>();
public SimpleWitness(String sigil) {
this.sigil = sigil;
}
public SimpleWitness(String sigil, String content) {
this(sigil, content, SimplePatternTokenizer.BY_WS_OR_PUNCT, SimpleTokenNormalizers.LC_TRIM_WS);
}
public SimpleWitness(String sigil,
String content,
Function> tokenizer,
Function normalizer) {
this(sigil);
setTokenContents(tokenizer.apply(content), normalizer);
}
public List getTokens() {
return tokens;
}
public void setTokenContents(Stream tokenContents, Function normalizer) {
setTokens(tokenContents.map(content -> new SimpleToken(SimpleWitness.this, content, normalizer.apply(content))).collect(Collectors.toList()));
}
public void setTokens(List tokens) {
this.tokens.clear();
this.tokens.addAll(tokens);
}
@Override
public String getSigil() {
return sigil;
}
@Override
public Iterator iterator() {
return Collections.unmodifiableList(tokens).iterator();
}
@Override
public String toString() {
return getSigil();
}
@Override
public int compare(SimpleToken o1, SimpleToken o2) {
final int o1Index = tokens.indexOf(o1);
final int o2Index = tokens.indexOf(o2);
if (o1Index < 0) {
throw new IllegalArgumentException(o1.toString());
}
if (o2Index < 0) {
throw new IllegalArgumentException();
}
return (o1Index - o2Index);
}
public static final Pattern PUNCT = Pattern.compile("\\p{Punct}");
public static final Function TOKEN_NORMALIZER = input -> {
final String normalized = PUNCT.matcher(input.trim().toLowerCase()).replaceAll("");
return (normalized == null || normalized.length() == 0 ? input : normalized);
};
}