org.apache.uima.ep_launcher.RemoteLauncher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of uimaj-ep-launcher Show documentation
Show all versions of uimaj-ep-launcher Show documentation
Adds launch support for Analysis Engines
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ep_launcher;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.impl.XCASDeserializer;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import org.apache.uima.cas.impl.XmiCasSerializer;
import org.apache.uima.ep_launcher.LauncherConstants.InputFormat;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.XMLSerializer;
import org.apache.uima.util.impl.ProcessTrace_impl;
import org.xml.sax.SAXException;
/**
* The RemoteLauncher runs the actual Analysis Engine in the launched VM.
*/
public class RemoteLauncher {
public static final String DESCRIPTOR_PARAM = "-descriptor";
public static final String INPUT_RESOURCE_PARAM = "-inputResource";
public static final String INPUT_RECURSIVE_PARAM = "-recursive";
public static final String INPUT_FORMAT_PARAM = "-format";
public static final String INPUT_ENCODING_PARAM = "-encoding";
public static final String INPUT_LANGUAGE_PARAM = "-language";
public static final String OUTPUT_FOLDER_PARAM = "-output";
public static final String OUTPUT_CLEAR_PARAM = "-clear";
private static File descriptor;
private static File inputResource;
private static boolean inputRecursive;
private static InputFormat inputFormat = InputFormat.CAS;
private static String inputEncoding = java.nio.charset.Charset.defaultCharset().name();
private static String inputLanguage;
private static File outputFolder;
private static boolean outputFolderClear;
private static boolean parseCmdLineArgs(String[] args) {
int necessaryArgCount = 0;
int index = 0;
while (index < args.length) {
String arg = args[index++];
if (DESCRIPTOR_PARAM.equals(arg)) {
if (index >= args.length) {
return false;
}
descriptor = new File(args[index++]);
necessaryArgCount++;
} else if (INPUT_RESOURCE_PARAM.equals(arg)) {
if (index >= args.length) {
return false;
}
inputResource = new File(args[index++]);
necessaryArgCount++;
} else if (INPUT_RECURSIVE_PARAM.equals(arg)) {
if (index >= args.length) {
return false;
}
inputRecursive = Boolean.parseBoolean(args[index++]);
} else if (INPUT_FORMAT_PARAM.equals(arg)) {
if (index >= args.length) {
return false;
}
String inputFormatName = args[index++];
if (InputFormat.CAS.toString().equals(inputFormatName)) {
inputFormat = InputFormat.CAS;
} else if (InputFormat.PLAIN_TEXT.toString().equals(inputFormatName)) {
inputFormat = InputFormat.PLAIN_TEXT;
} else {
System.err.println("Unkown input format: " + inputFormatName);
return false;
}
} else if (INPUT_ENCODING_PARAM.equals(arg)) {
if (index >= args.length) {
return false;
}
inputEncoding = args[index++];
} else if (INPUT_LANGUAGE_PARAM.equals(arg)) {
if (index >= args.length) {
return false;
}
inputLanguage = args[index++];
} else if (OUTPUT_FOLDER_PARAM.equals(arg)) {
if (index >= args.length) {
return false;
}
outputFolder = new File(args[index++]);
} else if (OUTPUT_CLEAR_PARAM.equals(arg)) {
if (index >= args.length) {
return false;
}
outputFolderClear = Boolean.parseBoolean(args[index++]);
}
}
return necessaryArgCount == 2;
}
private static void processFile(File inputFile, InputFormat format, AnalysisEngine aAE, CAS aCAS)
throws IOException, AnalysisEngineProcessException {
if (InputFormat.PLAIN_TEXT.equals(format)) {
String document = FileUtils.file2String(inputFile, inputEncoding);
document = document.trim();
// put document text in CAS
aCAS.setDocumentText(document);
if (inputLanguage != null)
aCAS.setDocumentLanguage(inputLanguage);
} else if (InputFormat.CAS.equals(format)) {
if (inputFile.getName().endsWith(".xmi")) {
try (FileInputStream inputStream = new FileInputStream(inputFile)) {
XmiCasDeserializer.deserialize(inputStream, aCAS, true);
} catch (SAXException e) {
throw new IOException(e.getMessage());
}
} else if (inputFile.getName().endsWith(".xcas")) {
try (FileInputStream inputStream = new FileInputStream(inputFile)) {
XCASDeserializer.deserialize(inputStream, aCAS, true);
} catch (SAXException e) {
throw new IOException(e.getMessage());
}
}
} else {
throw new IllegalStateException("Unexpected format!");
}
// process
aAE.process(aCAS);
if (outputFolder != null) {
File inputDirectory;
if (inputResource.isFile()) {
inputDirectory = inputResource.getParentFile();
} else {
inputDirectory = inputResource;
}
String inputFilePath = inputFile.getPath();
String relativeInputFilePath;
if (inputFilePath.startsWith(inputDirectory.getPath())) {
relativeInputFilePath = inputFilePath.substring(inputDirectory.getPath().length());
} else {
System.err.println(
"Error: Unable to construct output file path, output file will not be written!");
return;
}
String outputFilePath = new File(outputFolder.getPath(), relativeInputFilePath).getPath();
// cutoff file ending
int fileTypeIndex = outputFilePath.lastIndexOf(".");
if (fileTypeIndex != -1) {
outputFilePath = outputFilePath.substring(0, fileTypeIndex);
}
File outputFile = new File(outputFilePath + ".xmi");
// Create sub-directories
if (!outputFile.getParentFile().exists()) {
outputFile.getParentFile().mkdirs();
}
try (FileOutputStream out = new FileOutputStream(outputFile)) {
// write XMI
XmiCasSerializer ser = new XmiCasSerializer(aCAS.getTypeSystem());
XMLSerializer xmlSer = new XMLSerializer(out, false);
try {
ser.serialize(aCAS, xmlSer.getContentHandler());
} catch (SAXException e) {
throw new IOException(e.getMessage());
}
}
}
// reset the CAS to prepare it for processing the next document
aCAS.reset();
}
private static void findAndProcessFiles(File inputResource, FileFilter fileFilter,
AnalysisEngine aAE, CAS aCAS) throws IOException, AnalysisEngineProcessException {
// Figure out if input resource is file or directory
if (inputResource.isDirectory()) {
// get all files in the input directory
File[] files = inputResource.listFiles(fileFilter);
if (files != null) {
for (int i = 0; i < files.length; i++) {
if (!files[i].isDirectory()) {
processFile(files[i], inputFormat, aAE, aCAS);
} else {
findAndProcessFiles(files[i], fileFilter, aAE, aCAS);
}
}
}
} else if (inputResource.isFile()) {
// Just process the single file
processFile(inputResource, inputFormat, aAE, aCAS);
}
}
private static boolean deleteFile(File file) {
if (file.isDirectory()) {
File subFiles[] = file.listFiles();
boolean success = true;
for (File subFile : subFiles) {
success = success && deleteFile(subFile);
}
return success;
} else {
return file.delete();
}
}
public static void main(String[] args) throws Exception {
// debug / testing : see if jvm arg passed in...
// MemoryMXBean memoryMxBean = ManagementFactory.getMemoryMXBean();
// long maxHeap = memoryMxBean.getHeapMemoryUsage().getMax();
// System.out.println("JVM MaxHeap: " + maxHeap);
// show what command line args (not jvm args) got passed
// System.out.println("Cmdline args: ");
// for (int i = 0; i < args.length; i++) {
// System.out.println(" arg " + i + " = " + args[i]);
// }
if (!parseCmdLineArgs(args)) {
throw new IllegalArgumentException("Passed arguments are invalid!");
}
if (outputFolder != null && outputFolderClear) {
File filesToDelete[] = outputFolder.listFiles();
for (File file : filesToDelete) {
deleteFile(file);
}
}
// get Resource Specifier from XML file
XMLInputSource in = new XMLInputSource(descriptor);
ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
// create Analysis Engine
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
// create a CAS
CAS cas = ae.newCAS();
// Create a file filter depending on the format
// to filter out all file which do not have the
// expected file ending
FileFilter fileFilter;
if (InputFormat.CAS.equals(inputFormat)) {
fileFilter = new FileFilter() {
@Override
public boolean accept(File file) {
return file.getName().endsWith(".xmi") || file.getName().endsWith(".xcas")
|| (inputRecursive && file.isDirectory());
}
};
} else if (InputFormat.PLAIN_TEXT.equals(inputFormat)) {
fileFilter = new FileFilter() {
@Override
public boolean accept(File file) {
return file.getName().endsWith(".txt") || (inputRecursive && file.isDirectory());
}
};
} else {
throw new IllegalStateException("Unexpected input format!");
}
findAndProcessFiles(inputResource, fileFilter, ae, cas);
ae.collectionProcessComplete(new ProcessTrace_impl());
ae.destroy();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy