net.sf.okapi.lib.verification.QualityCheckSession Maven / Gradle / Ivy
/*===========================================================================
Copyright (C) 2010-2017 by the Okapi Framework contributors
-----------------------------------------------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
===========================================================================*/
package net.sf.okapi.lib.verification;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.XMLWriter;
import net.sf.okapi.common.exceptions.OkapiException;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.filters.IFilterConfigurationMapper;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.StartDocument;
import net.sf.okapi.common.resource.StartSubDocument;
public class QualityCheckSession {
public static final String FILE_EXTENSION = ".qcs";
private static final String SERIALSIGNATURE = "OQCS";
private static final long SERIALVERSIONUID = 2L;
private static final long SERIALVERSIONUID_1BLOCK = 1L; // Versions using a simple writeUTF for the parameters
// Keep block size way below 64K because of UTF character size for non-ASCII
private static final int MAXBLOCKLEN = (65000/3);
Map rawDocs; // Temporary solution waiting for the DB
IFilterConfigurationMapper fcMapper;
private Parameters params;
private List issues;
private QualityChecker checker;
private LocaleId sourceLocale = LocaleId.ENGLISH;
private LocaleId targetLocale = LocaleId.FRENCH;
private IFilter filter;
private boolean modified;
private boolean autoRefresh;
public QualityCheckSession() {
reset();
}
public boolean isModified() {
return modified;
}
public void setModified(boolean modified) {
this.modified = modified;
}
public List getIssues() {
return issues;
}
public Parameters getParameters() {
return params;
}
public void setParameters(Parameters params) {
this.params = params;
}
public boolean getAutoRefresh() {
return autoRefresh;
}
public void setAutoRefresh(boolean autoRefresh) {
this.autoRefresh = autoRefresh;
}
/**
* Adds a raw document to the session. If this is the first document added
* to the session, the locales of the session are automatically set to the
* source and target locale of this document.
*
* @param rawDoc the raw document to add (it must have an input URI and its
* source and target locale set).
*/
public void addRawDocument(RawDocument rawDoc) {
URI uri = rawDoc.getInputURI();
rawDocs.put(uri, rawDoc);
// If it is the first document: its locales become the default
if (rawDocs.size() == 1) {
sourceLocale = rawDoc.getSourceLocale();
targetLocale = rawDoc.getTargetLocale();
}
modified = true;
}
public List getDocuments() {
return new ArrayList<>(rawDocs.values());
}
public Map getDocumentsMap() {
return rawDocs;
}
public void setFilterConfigurationMapper(IFilterConfigurationMapper fcMapper) {
this.fcMapper = fcMapper;
}
public IFilterConfigurationMapper getFilterConfigurationMapper() {
return fcMapper;
}
public LocaleId getSourceLocale() {
return sourceLocale;
}
public void setSourceLocale(LocaleId sourceLocale) {
if (!this.sourceLocale.equals(sourceLocale)) {
modified = true;
}
this.sourceLocale = sourceLocale;
}
public LocaleId getTargetLocale() {
return targetLocale;
}
public void setTargetLocale(LocaleId targetLocale) {
if (!this.targetLocale.equals(targetLocale)) {
modified = true;
}
this.targetLocale = targetLocale;
}
public void reset() {
rawDocs = new HashMap<>();
issues = new ArrayList<>();
params = new Parameters();
checker = new QualityChecker();
}
public void resetDisabledIssues() {
for (Issue issue : issues) {
issue.setEnabled(true);
}
modified = true;
}
public int getDocumentCount() {
return rawDocs.size();
}
public void recheckDocument(URI docId) {
startProcess(sourceLocale, targetLocale);
RawDocument rd = rawDocs.get(docId);
if (rd != null) {
executeRecheck(rd, null);
}
}
public void recheckAll(List sigList) {
if (rawDocs.size() == 0) {
issues.clear();
return;
}
startProcess(sourceLocale, targetLocale);
for (RawDocument rd : rawDocs.values()) {
executeRecheck(rd, sigList);
}
}
private void executeRecheck(RawDocument rd,
List sigList) {
try {
// Process the document
filter = fcMapper.createFilter(rd.getFilterConfigId(), filter);
if (filter == null) {
throw new OkapiException("Unsupported filter type.");
}
if (params.getCheckXliffSchema()) {
if ("okf_xliff".equals(filter.getName())) {
ValidateXliffSchema.validateXliffSchema(rd.getInputURI());
}
}
filter.open(rd);
while (filter.hasNext()) {
Event event = filter.next();
switch (event.getEventType()) {
case START_DOCUMENT:
StartDocument sd = event.getStartDocument();
// If signatures exists, don't create the list from the current issues
if (sigList == null) {
sigList = clearIssues(rd.getInputURI(), true);
} else {
clearIssues(rd.getInputURI(), false);
}
processStartDocument(sd, sigList);
break;
case START_SUBDOCUMENT:
processStartSubDocument(event.getStartSubDocument());
break;
case TEXT_UNIT:
processTextUnit(event.getTextUnit());
break;
default: // Do nothing
break;
}
}
} finally {
if (filter != null) {
filter.close();
}
}
}
// Gets all signatures
private List getAllSignatures() {
ArrayList list = new ArrayList<>();
Iterator iter = issues.iterator();
while (iter.hasNext()) {
Issue issue = iter.next();
if (!issue.getEnabled()) {
list.add(issue.getSignature());
}
}
return list;
}
public List clearIssues(URI docId,
boolean generateSigList) {
ArrayList sigList = null;
// Create signature list if needed
if (generateSigList) {
sigList = new ArrayList<>();
}
Iterator iter = issues.iterator();
while (iter.hasNext()) {
Issue issue = iter.next();
if (issue.getDocumentURI().equals(docId)) {
// Generate signature if the issue is disabled
if (generateSigList && !issue.getEnabled()) {
sigList.add(issue.getSignature());
}
// Remove issue
iter.remove();
}
}
return sigList;
}
public void saveSession(String path) {
try {
saveSessionToStream(new FileOutputStream(path));
} catch (IOException e) {
throw new OkapiIOException("Error while saving session.", e);
}
}
private void saveSessionToStream(OutputStream outputStream) {
DataOutputStream dos = null;
try {
dos = new DataOutputStream(outputStream);
// Header
dos.writeBytes(SERIALSIGNATURE);
// Try to write backward compatible file if possible
long version = (getLongStringBlockCount(params.toString())>1 ? SERIALVERSIONUID : SERIALVERSIONUID_1BLOCK);
dos.writeLong(version);
// Locales
dos.writeUTF(sourceLocale.toString());
dos.writeUTF(targetLocale.toString());
// Parameters
// Save differently depending on the version
if ( version == SERIALVERSIONUID_1BLOCK ) {
dos.writeUTF(params.toString());
}
else {
writeLongString(dos, params.toString());
}
// Document list
dos.writeInt(rawDocs.size());
for (RawDocument rd : rawDocs.values()) {
dos.writeUTF(rd.getInputURI().toString());
dos.writeUTF(rd.getFilterConfigId());
dos.writeUTF(rd.getEncoding());
}
// Issues to keep disabled
List list = getAllSignatures();
dos.writeInt(list.size());
for (String sig : list) {
dos.writeUTF(sig);
}
modified = false;
} catch (IOException e) {
throw new OkapiIOException("Error while saving session.", e);
} finally {
if (dos != null) {
try {
dos.close();
} catch (IOException e) {
throw new OkapiIOException("Error closing session file.", e);
}
}
}
}
public void loadSession(String path) {
try {
loadSessionFromStream(new FileInputStream(path));
} catch (Throwable e) {
throw new OkapiIOException("Error reading session file.\n" + e.getMessage(), e);
}
}
private void loadSessionFromStream(InputStream inputStream) {
reset();
DataInputStream dis = null;
try {
dis = new DataInputStream(inputStream);
// Header
byte[] buf = new byte[4];
dis.read(buf, 0, 4);
String tmp = new String(buf);
if (!tmp.equals(SERIALSIGNATURE)) {
throw new OkapiIOException("Invalid signature: This file is not a QCS file, or is corrupted.");
}
long version = dis.readLong();
if (version > SERIALVERSIONUID) {
// For now just check the number, later we may have different ways of reading
throw new OkapiIOException("Invalid version number: This file is not a QCS file, or is corrupted.");
}
// Locales
tmp = dis.readUTF(); // Source
sourceLocale = LocaleId.fromString(tmp);
tmp = dis.readUTF(); // Target
targetLocale = LocaleId.fromString(tmp);
// Parameters
// Allow for backward compatibility
if ( version == SERIALVERSIONUID_1BLOCK ) {
tmp = dis.readUTF();
}
else {
tmp = readLongString(dis);
}
params.fromString(tmp);
// Document list
int count = dis.readInt();
for (int i = 0; i < count; i++) {
tmp = dis.readUTF();
URI uri = new URI(tmp);
String configId = dis.readUTF();
String encoding = dis.readUTF();
RawDocument rd = new RawDocument(uri, encoding, sourceLocale, targetLocale);
rd.setFilterConfigId(configId);
rawDocs.put(uri, rd);
}
// Signatures of issues to keep disabled
List sigList = new ArrayList<>();
count = dis.readInt();
for (int i = 0; i < count; i++) {
sigList.add(dis.readUTF());
}
recheckAll(sigList);
modified = false;
} catch (Throwable e) {
throw new OkapiIOException("Error reading session file.\n" + e.getMessage(), e);
} finally {
if (dis != null) {
try {
dis.close();
} catch (IOException e) {
throw new OkapiIOException("Error closing session file.", e);
}
}
}
}
public void startProcess(LocaleId srcLoc,
LocaleId trgLoc) {
checker.startProcess(srcLoc, trgLoc, params, issues);
}
public void processStartDocument(StartDocument startDoc,
List sigList) {
checker.processStartDocument(startDoc, sigList);
}
public void processStartSubDocument(StartSubDocument startSubDoc) {
checker.processStartSubDocument(startSubDoc);
}
public void processTextUnit(ITextUnit textUnit) {
checker.processTextUnit(textUnit);
}
public void generateReport (String rootDir) {
// Replace the rootDir variable if needed
String finalPath = Util.fillRootDirectoryVariable(params.getOutputPath(), rootDir);
// Build the common root for the issues if needed
String inputRoot = null;
if ( !params.getShowFullPath() ) {
inputRoot = buildIssuesDocRoot();
}
// Generate the report in the selected format
switch (params.getOutputType()) {
case 1: // Text
generateTabDelimitedReport(finalPath, inputRoot);
break;
case 2: // XML
generateXMLReport(finalPath, inputRoot);
break;
default: // HTML
generateHTMLReport(finalPath, inputRoot);
}
}
/**
* Gets the common root for the document URIs of the current issues.
* @return the common path or an empty string (never null).
*/
private String buildIssuesDocRoot () {
// Note that rawDocs is not always set, so we cannot use it
List dirs = new ArrayList<>(issues.size());
for ( Issue issue : issues ) {
dirs.add(Util.getDirectoryName(issue.getDocumentURI().getPath()));
}
if ( !dirs.isEmpty() ) {
String root = Util.longestCommonDir(!Util.isOSCaseSensitive(), dirs.toArray(new String[0]));
root = Util.ensureSeparator(root, true); // The slash should be cross-platform on URIs
return root;
}
return "";
}
private void generateHTMLReport (String finalPath,
String inputRoot)
{
XMLWriter writer = null;
try {
// Create the output file
writer = new XMLWriter(finalPath);
writer.writeStartDocument();
writer.writeStartElement("html");
writer.writeRawXML(""
+ "Quality Check Report ");
writer.writeStartElement("body");
writer.writeLineBreak();
writer.writeElementString("h1", "Quality Check Report");
// Process the issues
URI docId = null;
String inputPath = null;
for (Issue issue : issues) {
// Skip disabled issues
if ( !issue.getEnabled() ) {
continue;
}
// Do we start a new input document?
if ((docId == null) || !docId.equals(issue.getDocumentURI())) {
// Ruler only after first input document
if ( docId != null ) {
writer.writeRawXML("
");
}
docId = issue.getDocumentURI();
// Determine if showing full or relative path
inputPath = docId.getPath();
if ( !params.getShowFullPath() ) {
inputPath = inputPath.replaceFirst(inputRoot, "");
}
writer.writeElementString("p", "Input: " + inputPath);
}
String position = String.format("ID=%s", issue.getTuId());
if (issue.getTuName() != null) {
position += (" (" + issue.getTuName() + ")");
}
if (issue.getSegId() != null) {
position += String.format(", segment=%s", issue.getSegId());
}
writer.writeStartElement("p");
writer.writeString(position + ":");
writer.writeRawXML("
");
writer.writeString(issue.getMessage());
writer.writeEndElementLineBreak(); // p
writer.writeRawXML("");
writer.writeRawXML("S: '" + highlight(issue.getSource(), issue.getSourceStart(), issue.getSourceEnd()) + "'");
writer.writeRawXML("
");
writer.writeRawXML("T: '" + highlight(issue.getTarget(), issue.getTargetStart(), issue.getTargetEnd()) + "'");
writer.writeRawXML("
");
writer.writeLineBreak();
} // End of for issues
// If docId is still null it means there was nothing to report
if ( docId == null ) {
writer.writeRawXML("No issue detected.
");
}
// Write end of document
writer.writeEndElementLineBreak(); // body
writer.writeEndElementLineBreak(); // html
writer.writeEndDocument();
}
finally {
if (writer != null) {
writer.close();
}
}
}
private void generateTabDelimitedReport (String finalPath,
String inputRoot)
{
// Create the output file
try (PrintWriter writer = new PrintWriter(new File(finalPath), "UTF-8")) {
writer.println("Quality Check Report\t\t\t");
// Process the issues
URI docId = null;
for (Issue issue : issues) {
// Skip disabled issues
if (!issue.getEnabled()) {
continue;
}
// Do we start a new input document?
if ((docId == null) || !docId.equals(issue.getDocumentURI())) {
docId = issue.getDocumentURI();
// Determine if showing full or relative path
String inputPath = docId.getPath();
if ( !params.getShowFullPath() ) {
inputPath = inputPath.replaceFirst(inputRoot, "");
}
writer.println(inputPath + "\t\t\t");
}
String position = String.format("ID=%s", issue.getTuId());
if (issue.getTuName() != null) {
position += (" (" + issue.getTuName() + ")");
}
if (issue.getSegId() != null) {
position += String.format(", segment=%s", issue.getSegId());
}
// positionmessagesourcetarget
writer.print(position + "\t");
writer.print(issue.getMessage() + "\t");
writer.print(escape(issue.getSource()) + "\t");
writer.println(escape(issue.getTarget()));
} // End of for issues
} catch ( Throwable e ) {
throw new OkapiIOException("Error when creating the report.\n" + e.getMessage(), e);
}
}
private void generateXMLReport (String finalPath,
String inputRoot)
{
XMLWriter writer = null;
try {
// Create the output file
writer = new XMLWriter(finalPath);
writer.writeStartDocument();
writer.writeStartElement("qualityCheckReport");
writer.writeLineBreak();
writer.writeStartElement("issues");
writer.writeLineBreak();
// Process the issues
URI docId = null;
String inputPath = null;
for (Issue issue : issues) {
// Skip disabled issues
if ( !issue.getEnabled() ) {
continue;
}
// Do we start a new input document?
if ((docId == null) || !docId.equals(issue.getDocumentURI())) {
// Ruler only after first input document
if (docId != null) {
writer.writeRawXML("
");
}
docId = issue.getDocumentURI();
// Determine if showing full or relative path
inputPath = docId.getPath();
if ( !params.getShowFullPath() ) {
inputPath = inputPath.replaceFirst(inputRoot, "");
}
}
writer.writeStartElement("issue");
writer.writeLineBreak();
writeIndentedElementString(writer, "input", inputPath);
writeIndentedElementString(writer, "tuName", issue.getTuName());
writeIndentedElementString(writer, "tuId", issue.getTuId());
writeIndentedElementString(writer, "segId", issue.getSegId());
writeIndentedElementString(writer, "severity", Integer.toString(issue.getDisplaySeverity()));
writeIndentedElementString(writer, "issueType", issue.getIssueType().toString());
writeIndentedElementString(writer, "message", issue.getMessage());
writeIndentedElementStringHilite(writer, "source", issue.getSource(), issue.getSourceStart(), issue.getSourceEnd());
writeIndentedElementStringHilite(writer, "target", issue.getTarget(), issue.getTargetStart(), issue.getTargetEnd());
writer.writeEndElementLineBreak(); // issue
} // End of for issues
// Write end of document
writer.writeEndElementLineBreak(); // issues
writer.writeEndElementLineBreak(); // qualityCheckReport
writer.writeEndDocument();
}
finally {
if (writer != null) {
writer.close();
}
}
}
private static void writeIndentedElementString (XMLWriter writer,
String element,
String text)
{
writer.writeString("\t");
writer.writeElementString(element, text);
writer.writeLineBreak();
}
private static void writeIndentedElementStringHilite(XMLWriter writer,
String element,
String text,
int start,
int end) {
if (end > 0) {
writer.writeString("\t");
writer.writeStartElement(element);
writer.writeString(text.substring(0, start));
writer.writeElementString("hi", text.substring(start, end));
writer.writeString(text.substring(end));
writer.writeEndElementLineBreak(); // element
} else {
writeIndentedElementString(writer, element, text);
}
}
private String escape(String text) {
return text.replaceAll("\t", "\\t");
}
private String highlight(String text,
int start,
int end) {
if (end > 0) {
// Add placeholder for the highlights
StringBuilder buf = new StringBuilder(text);
buf.insert(start, '\u0017');
buf.insert(end + 1, '\u0018');
String tmp = Util.escapeToXML(buf.toString(), 0, false, null);
tmp = tmp.replace("\u0017", "");
tmp = tmp.replace("\u0018", "");
return tmp.replace("\n", "
");
}
// Else: just escape the string
return Util.escapeToXML(text, 0, false, null).replace("\n", "
");
}
private int getLongStringBlockCount (String data) {
int r = (data.length() % MAXBLOCKLEN);
int n = (data.length() / MAXBLOCKLEN);
return n + ((r > 0) ? 1 : 0);
}
private void writeLongString (DataOutputStream dos,
String data)
throws IOException
{
int r = (data.length() % MAXBLOCKLEN);
int n = (data.length() / MAXBLOCKLEN);
int count = n + ((r > 0) ? 1 : 0);
dos.writeInt(count); // Number of blocks
int pos = 0;
// Write the full blocks
for ( int i=0; i 0 ) {
dos.writeUTF(data.substring(pos));
}
}
private String readLongString (DataInputStream dis)
throws IOException
{
StringBuilder tmp = new StringBuilder();
int count = dis.readInt(); // Read number of blocks
for ( int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy