org.apache.uima.tools.migration.IbmUimaToApacheUima Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of uimaj-tools Show documentation
Show all versions of uimaj-tools Show documentation
Tooling supporting UIMA use
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.tools.migration;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.util.FileUtils;
/**
* Migration utility for converting from IBM UIMA to Apache UIMA.
* Updates package names and does various other string replacements.
* Should be run on java code, descriptors, and other files that may have UIMA
* package names in them (e.g., launch configurations, scripts).
*/
public class IbmUimaToApacheUima {
private static List replacements = new ArrayList();
private static int MAX_FILE_SIZE = 1000000; //don't update files bigger than this
private static Set extensions = new HashSet();
private static int filesScanned = 0;
private static int filesModified = 0;
private static List filesNeedingManualAttention = new ArrayList();
private static Set ibmPackageNames = new HashSet();
private static final Pattern IMPORT_PATTERN = Pattern.compile("(?m)^\\s*import\\s+([^;]*);\\s*$");
private static final Pattern CLASS_NAME_PATTERN = Pattern.compile("public\\s+(final\\s+|abstract\\s+)*class\\s+([A-Za-z0-9_]+)");
private static final Pattern GET_NEXT_INDEX_PATTERN = Pattern.compile("JCas\\.getNextIndex\\(\\)");
private static final Pattern THROW_FEAT_MISSING_PATTERN = Pattern.compile("JCas\\.throwFeatMissing");
private static final Pattern PACKAGE_PATTERN = Pattern.compile("(?m)^\\s*package\\s+([A-Za-z0-9_.]+)\\s*;") ;
private static final Pattern GETDOCANNOT_PATTERN = Pattern.compile("[Jj][Cc][Aa][Ss](\\(\\))?\\s*\\.\\s*getDocumentAnnotation\\(");
/**
* Main program. Expects one argument, the name of a directory containing files to
* update. Subdirectories are processed recursively.
* @param args Command line arguments
* @throws IOException if an I/O error occurs
*/
public static void main(String[] args) throws IOException{
//parse command line
String dir = null;
for (int i = 0; i < args.length; i++) {
if (args[i].startsWith("-")) {
if (args[i].equals("-ext")) {
if (i + 1 >= args.length) {
printUsageAndExit();
}
parseCommaSeparatedList(args[++i], extensions);
}
else {
System.err.println("Unknown switch " + args[i]);
printUsageAndExit();
}
}
else {
if (dir != null) {
printUsageAndExit();
}
else {
dir = args[i];
}
}
}
if (dir == null) {
printUsageAndExit();
}
//read resource files
//map from IBM UIMA package names to Apache UIMA package names
readMapping("packageMapping.txt", replacements, true);
//other string replacements
readMapping("stringReplacements.txt", replacements, false);
//from system property, get list of file extensions to exclude
//do the replacements
System.out.println("Migrating your files...");
replaceInAllFiles(new File(args[0]));
System.out.println("Migration complete.");
System.out.println("Scanned " + filesScanned + " files. " + filesModified + " files modified.");
if (filesNeedingManualAttention.size() > 0) {
System.out.println("The following files may need manual attention:");
for (int i = 0; i < filesNeedingManualAttention.size(); i++) {
System.out.println(" " + filesNeedingManualAttention.get(i));
}
System.out.println("See the \"Migrating from IBM UIMA to Apache UIMA\" chapter in the " +
"\"UIMA Overview and Setup\" document for details.");
}
else {
System.out.println("No problems were detected. However, if the code does not compilie " +
"and run, see the \"Migrating from IBM UIMA to Apache UIMA\" chapter in the " +
"\"UIMA Overview and Setup\" document for assistance.");
}
}
/**
* Parses a comma separated list, entering each value into the results Collection.
* Trailing empty strings are included in the results Collection.
* @param string string to parse
* @param results Collection to which each value will be added
*/
private static void parseCommaSeparatedList(String string, Collection results) {
String[] components = string.split(",",-1);
for (int i = 0; i < components.length; i++) {
results.add(components[i]);
}
}
private static void printUsageAndExit() {
System.err.println("Usage: java " + IbmUimaToApacheUima.class.getName() + " [-ext ]");
System.err.println(" is a comma separated list of file extensions to process, e.g.: java,xml,properties");
System.err.println("\tUse a trailing comma to include files with no extension (meaning their name contains no dot)");
System.exit(1);
}
/**
* Applies the necessary replacements to all files in the given directory.
* Subdirectories are processed recursively.
*
* @param dir diretory containing files to replace
* @throws IOException if an I/O error occurs
*/
private static void replaceInAllFiles(File dir) throws IOException {
File[] fileList = dir.listFiles();
for (int i = 0; i < fileList.length; i++) {
File file = fileList[i];
if (file.isFile()) {
//skip files with extensions specified in the excludes list
if (!extensions.isEmpty()) {
String filename = file.getName();
String ext="";
int lastDot = filename.lastIndexOf('.');
if (lastDot > -1) {
ext = filename.substring(lastDot+1);
}
if (!extensions.contains(ext.toLowerCase())) {
continue;
}
}
//skip files that we can't read and write
if (!file.canRead()) {
System.err.println("Skipping unreadable file: " + file.getCanonicalPath());
continue;
}
if (!file.canWrite()) {
System.err.println("Skipping unwritable file: " + file.getCanonicalPath());
continue;
}
//skip files that are too big
if (file.length() > MAX_FILE_SIZE) {
System.out.println("Skipping file " + file.getCanonicalPath() + " with size: " + file.length() + " bytes");
continue;
}
//do the replacements
replaceInFile(file);
}
//recursively call on subdirectories
if (file.isDirectory()) {
replaceInAllFiles(file);
}
}
}
/**
* Applies replacements to a single file.
* @param file the file to process
*/
private static void replaceInFile(File file) throws IOException {
//read file
String original;
try {
original = FileUtils.file2String(file);
}
catch(IOException e) {
System.err.println("Error reading " + file.getCanonicalPath());
System.err.println(e.getMessage());
return;
}
String contents = original;
//apply replacements
Iterator iter = replacements.iterator();
while (iter.hasNext()) {
Replacement replacement = (Replacement)iter.next();
contents = contents.replaceAll(replacement.regex, replacement.replacementStr);
}
//for .java files do some additional processing
if (file.getName().endsWith(".java")) {
//updates for JCas/JCasRegistry refactoring
contents = applyJCasRefactoring(contents);
//remove duplicate imports (can be caused by some replacements)
contents = removeDuplicateImports(contents);
}
//write file if it changed
if (!contents.equals(original)) {
FileUtils.saveString2File(contents, file);
filesModified++;
}
filesScanned++;
//check for situations that may need manual attention,
//updates filesNeedingManualAttention field
checkForManualAttentionNeeded(file, original);
}
/*
* Applies changes needed due to JCas/JCasRegistry refactoring. These are a little
* more complicated than simple regex replacements.
*
* JCas.getNextIndex -> JCasRegistry.register(ThisClass.class)
* JCas.throwFeatMissing -> jcasType.jcas.throwFeatMissing [in cover class]
* JCas.throwFeatMissing -> jcas.throwFeatMissing [in _Type class]
*/
private static String applyJCasRefactoring(String contents) {
//find the class name, we'll need it later
Matcher classNameMatcher = CLASS_NAME_PATTERN.matcher(contents);
if (!classNameMatcher.find())
return contents;
String className = classNameMatcher.group(2);
//replace getNextIndex
Matcher getNextIndexMatcher = GET_NEXT_INDEX_PATTERN.matcher(contents);
String replacement = "org.apache.uima.jcas.JCasRegistry.register(" + className + ".class)";
contents = getNextIndexMatcher.replaceAll(replacement);
//replace throwFeatMissing (replacement depends on whether we're in _Type object or not)
Matcher throwFeatMissingMatcher = THROW_FEAT_MISSING_PATTERN.matcher(contents);
if (className.endsWith("_Type")) {
contents = throwFeatMissingMatcher.replaceAll("this.jcas.throwFeatMissing");
}
else {
contents = throwFeatMissingMatcher.replaceAll("this.jcasType.jcas.throwFeatMissing");
}
return contents;
}
/**
* Remove duplicate imports from a Java source file.
*/
private static String removeDuplicateImports(String contents) {
HashSet classes = new HashSet();
Matcher matcher = IMPORT_PATTERN.matcher(contents);
int pos = 0;
int endOfLastDuplicate = 0;
StringBuffer result = null;
while (matcher.find(pos)) {
String className = matcher.group(1);
//account for whitespace in class name
className = className.replaceAll("\\s*","");
if (!classes.add(className)) {
//duplicate import found. Do not append the import,
//but get everything else before it.
if (result == null) {
result = new StringBuffer(contents.length());
}
result.append(contents.substring(endOfLastDuplicate, matcher.start()));
endOfLastDuplicate = matcher.end();
}
pos = matcher.end();
}
if (result == null) {
//no duplicates found
return contents;
}
else {
result.append(contents.substring(endOfLastDuplicate));
return result.toString();
}
}
/**
* Scans for certain patterns in the string that indicate situations
* that the migration tool doesn't resolve and may require user
* attention. Updated the filesNeedingManualAttention field with a String
* which is the file path plus the reason the file was flagged.
*
* @param contents string to scan
* @return true if the file needs manual attention
*/
private static void checkForManualAttentionNeeded(File file, String contents) {
// UIMA package name (includes most common case of DocumentAnnotation)
Matcher packageNameMatcher = PACKAGE_PATTERN.matcher(contents);
if (packageNameMatcher.find()) {
String packageName = packageNameMatcher.group(1);
if (ibmPackageNames.contains(packageName)) {
filesNeedingManualAttention.add(file.getPath() + " (Uses an IBM UIMA Package Name)");
return;
}
}
//JCas.getDocumentAnnotation (fuzzy, only matches if variable name / method
//ends with jcas)
if (GETDOCANNOT_PATTERN.matcher(contents).find()) {
filesNeedingManualAttention.add(file.getPath() + " (Calls JCas.getDocumentAnnotation())");
return;
}
//xi:include
if (contents.indexOf("= 0) {
filesNeedingManualAttention.add(file.getPath() + " (Uses xi:include)");
return;
}
}
/**
* Reads a mapping from a resource file, and populates a List of
* Replacement objects. We don't use a Map because the order in which
* the replacements are applied can be important.
*
* @param fileName name of file to read from (looked up looking using Class.getResource())
* @param mappings List to which Replacement objects will be added.
* Each object contains the regex to search for and the replacement string.
* @param treatAsPackageNames if true, the keys in the resource file will be considered
* package names, and this routine will produce regexes that replace any fully-qualified
* class name belonging to that package. Also in this case updates the
* static ibmPackageNames HashSet.
*/
private static void readMapping(String fileName, List mappings, boolean treatAsPackageNames) throws IOException {
URL pkgListFile = IbmUimaToApacheUima.class.getResource(fileName);
InputStream inStream = pkgListFile.openStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
String line = reader.readLine();
while (line != null) {
String[] mapping = line.split(" ");
String regex, replaceStr;
if (treatAsPackageNames) {
//we do special processing for package names to try to handle the case where
//user code exists in a package prefixed by com.ibm.uima.
//We only replace the package name when it appears as part of a fully-qualified
//class name in that package, not as a prefix of another package.
//turn package name into a regex (have to escape the . and,
//technically, should allow whitepsace around dots)
String pkgRegex = mapping[0].replaceAll("\\.", "\\\\s*\\\\.\\\\s*");
//form regex that will find any fully-qualified class name in this package
regex = pkgRegex+"(\\.(\\*|[A-Z]\\w*))";
replaceStr = mapping[1] + "$1";
ibmPackageNames.add(mapping[0]);
}
else {
//form regex from src, by escaping dots and allowing whitespace
regex = mapping[0].replaceAll("\\.", "\\\\s*\\\\.\\\\s*");
replaceStr = mapping[1];
}
Replacement replacement = new Replacement(regex, replaceStr);
mappings.add(replacement);
line = reader.readLine();
}
inStream.close();
}
private static class Replacement {
String regex;
String replacementStr;
Replacement(String regex, String replacement) {
this.regex = regex;
this.replacementStr = replacement;
}
}
}