net.sf.okapi.steps.diffleverage.FileAligner Maven / Gradle / Ivy
package net.sf.okapi.steps.diffleverage;
import java.net.URI;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import net.sf.okapi.common.exceptions.OkapiException;
/**
* Match up (align) files based on full path name. There is a bi-lingual case (source matched with target files) and
* tri-lingual (new source matched with old source, matched with old target).
*
* @author HARGRAVEJE
* @param
*
*/
public class FileAligner implements Iterable> {
// used for both new source and source in the bi-lingual case
private List> newFiles;
private Map> trgFilesMap;
private URI newRootUri;
private Map> oldSrcFilesMap;
private Map> oldTrgFilesMap;
private List> alignedFiles;
private boolean lowerCase;
private boolean trilingual;
/**
* Tri-lingual alignment (new source matched with old source, matched with old target). This method will lower case
* the file paths by default.
*
* @param newFiles
* - new source files
* @param oldSrcFiles
* - old source files (i.e, from previous translation)
* @param oldTrgFiles
* - old target files that match the old source files.
* @param newRootUri
* - root directory of the new source files.
* @param oldSrcRootUri
* - root directory of the old source files.
* @param oldTrgRootUri
* - root directory of the new target files.
*/
public FileAligner(List> newFiles, List> oldSrcFiles,
List> oldTrgFiles, URI newRootUri, URI oldSrcRootUri, URI oldTrgRootUri) {
this(true, newFiles, oldSrcFiles, oldTrgFiles, newRootUri, oldSrcRootUri, oldTrgRootUri);
}
/**
* Tri-lingual alignment (new source matched with old source, matched with old target).
*
* @param lowerCase
* - true to lower case file paths before matching, false to leave as-is
* @param newFiles
* - new source files
* @param oldSrcFiles
* - old source files (i.e, from previous translation)
* @param oldTrgFiles
* - old target files that match the old source files.
* @param newRootUri
* - root directory of the new source files.
* @param oldSrcRootUri
* - root directory of the old source files.
* @param oldTrgRootUri
* - root directory of the new target files.
*/
public FileAligner(boolean lowerCase, List> newFiles,
List> oldSrcFiles, List> oldTrgFiles, URI newRootUri,
URI oldSrcRootUri, URI oldTrgRootUri) {
this.lowerCase = lowerCase;
this.trilingual = true;
this.newFiles = newFiles;
this.newRootUri = newRootUri;
oldTrgFilesMap = new TreeMap<>();
oldSrcFilesMap = new TreeMap<>();
createMatchingMap(lowerCase, oldSrcFilesMap, newFiles, oldSrcFiles, newRootUri,
oldSrcRootUri);
// put old files into our sorted map
for (FileLikeThing f : oldTrgFiles) {
String key = getRealtivePath(f.getPath(), oldTrgRootUri);
if (lowerCase) {
key = key.toLowerCase();
}
if (oldTrgFilesMap.containsKey(key)) {
// FIXME: somehow we have a duplicate, throw an exception for now
throw new OkapiException("Duplicate path entry: " + key);
} else {
oldTrgFilesMap.put(key, f);
}
}
}
/**
* Bi-lingual alignment (match source with target files). This method will lower case the file paths by default.
*
* @param srcFiles
* - source files
* @param trgFiles
* - target files
* @param srcRootUri
* - source root directory
* @param trgRootUri
* - target root directory
*/
public FileAligner(List> srcFiles, List> trgFiles,
URI srcRootUri, URI trgRootUri) {
this(true, srcFiles, trgFiles, srcRootUri, trgRootUri);
}
/**
* Bi-lingual alignment (match source with target files)
*
* @param lowerCase
* - true to lower case file paths before matching, false to leave as-is.
* @param srcFiles
* - source files
* @param trgFiles
* - target files
* @param srcRootUri
* - source root directory
* @param trgRootUri
* - target root directory
*/
public FileAligner(boolean lowerCase, List> srcFiles,
List> trgFiles, URI srcRootUri, URI trgRootUri) {
this.newFiles = srcFiles; // reuse newFiles field for bi-lingual alignment
this.newRootUri = srcRootUri;
this.lowerCase = lowerCase;
this.trilingual = false;
this.trgFilesMap = new TreeMap<>();
createMatchingMap(lowerCase, trgFilesMap, srcFiles, trgFiles, srcRootUri, trgRootUri);
}
/*
* Create map of the matching files. Either old source or target.
*/
private void createMatchingMap(boolean lowerCase,
Map> matchingFileMap, List> newFiles,
List> matchingFiles, URI newRootUri, URI matchingRotUri) {
// put old files into our sorted map
for (FileLikeThing f : matchingFiles) {
String key = getRealtivePath(f.getPath(), matchingRotUri);
if (lowerCase) {
key = key.toLowerCase();
}
if (matchingFileMap.containsKey(key)) {
// FIXME: somehow we have a duplicate, throw an exception for now
throw new OkapiException("Duplicate path entry: " + key);
} else {
matchingFileMap.put(key, f);
}
}
}
/**
* match up (align) new files (i.e., source our new source) with
*/
public void align() {
alignedFiles = new LinkedList<>();
for (FileLikeThing f : newFiles) {
String key = getRealtivePath(f.getPath(), newRootUri);
if (lowerCase) {
key = key.toLowerCase();
}
FileLikeThing o = null;
if (trilingual) {
o = oldSrcFilesMap.get(key);
if (o != null) {
if (oldTrgFilesMap != null) {
FileLikeThing t = oldTrgFilesMap.get(key);
// we found an old source and matching target
alignedFiles.add(new FileAlignment<>(f, o, t));
} else {
// we found an old source file without a matching target
alignedFiles.add(new FileAlignment<>(f, o));
}
} else {
// this is a new file not found in the old source
alignedFiles.add(new FileAlignment<>(f));
}
} else {
o = trgFilesMap.get(key);
if (o != null) {
// source file matches target
alignedFiles.add(new FileAlignment<>(f, o));
} else {
// source file without a target
alignedFiles.add(new FileAlignment<>(f));
}
}
}
}
/**
* Iterator over aligned {@link FileLikeThing}s
*/
public Iterator> iterator() {
return alignedFiles.iterator();
}
/**
* Get {@link List} of {@link FileLikeThing}s
*
* @return {@link FileAlignment} as a list
*/
public List> getAlignments() {
return alignedFiles;
}
/*
* Return path minus the root
*/
private static String getRealtivePath(URI path, URI root) {
String r = path.relativize(root).toString();
return path.toString().replaceFirst(r, "");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy