Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.eval;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import org.apache.log4j.Level;
import org.apache.tika.eval.db.Cols;
import org.apache.tika.eval.db.H2Util;
import org.apache.tika.eval.db.JDBCUtil;
import org.apache.tika.eval.db.TableInfo;
import org.apache.tika.eval.io.XMLLogMsgHandler;
import org.apache.tika.eval.io.XMLLogReader;
import org.apache.tika.eval.reports.ResultsReporter;
import org.apache.tika.io.IOExceptionWithCause;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This is a very task specific class that reads a log file and updates
* the "comparisons" table. It should not be run in a multithreaded environment.
*/
public class XMLErrorLogUpdater {
private static final Logger LOG = LoggerFactory.getLogger(ResultsReporter.class);
private Statement statement;
public static void main(String[] args) throws Exception {
XMLErrorLogUpdater writer = new XMLErrorLogUpdater();
Path xmlLogFileA = Paths.get(args[0]);
Path xmlLogFileB = Paths.get(args[1]);
Path db = Paths.get(args[2]);
JDBCUtil dbUtil = new H2Util(db);
Connection connection = dbUtil.getConnection();
writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_A, xmlLogFileA);
writer.update(connection, ExtractComparer.EXTRACT_EXCEPTION_TABLE_B, xmlLogFileB);
connection.commit();
}
public void update(Connection connection, TableInfo tableInfo, Path xmlLogFile) throws Exception {
statement = connection.createStatement();
XMLLogReader reader = new XMLLogReader();
try (InputStream is = Files.newInputStream(xmlLogFile)) {
reader.read(is, new ErrorMsgUpdater(tableInfo.getName()));
} catch (IOException e) {
throw new RuntimeException("Problem reading: "+xmlLogFile.toAbsolutePath().toString());
} finally {
try {
connection.commit();
statement.close();
} catch (SQLException e) {
throw new RuntimeException("Failed to close db connection!", e);
}
}
}
private class ErrorMsgUpdater implements XMLLogMsgHandler {
private final String errorTablename;
private ErrorMsgUpdater(String errorTablename) {
this.errorTablename = errorTablename;
}
@Override
public void handleMsg(Level level, String xml) throws SQLException, IOException {
if (! level.equals(Level.ERROR)) {
return;
}
XMLStreamReader reader = null;
try {
reader = XMLInputFactory.newInstance().createXMLStreamReader(new StringReader(xml));
} catch (XMLStreamException e) {
throw new IOExceptionWithCause(e);
}
String type = null;
String resourceId = null;
try {
while (reader.hasNext() && type == null && resourceId == null) {
reader.next();
switch (reader.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
if ("timed_out".equals(reader.getLocalName())) {
resourceId = reader.getAttributeValue("", "resourceId");
update(errorTablename, resourceId,
AbstractProfiler.PARSE_ERROR_TYPE.TIMEOUT);
} else if ("oom".equals(reader.getLocalName())) {
resourceId = reader.getAttributeValue("", "resourceId");
update(errorTablename, resourceId, AbstractProfiler.PARSE_ERROR_TYPE.OOM);
}
break;
}
}
reader.close();
} catch (XMLStreamException e) {
throw new IOExceptionWithCause(e);
}
}
private void update(String errorTableName,
String filePath, AbstractProfiler.PARSE_ERROR_TYPE type) throws SQLException {
int containerId = getContainerId(filePath);
String sql = "SELECT count(1) from "+errorTableName +
" where "+Cols.CONTAINER_ID +
" = "+containerId + " or "+
Cols.FILE_PATH + "='"+filePath+"'";
ResultSet rs = statement.executeQuery(sql);
//now try to figure out if that file already exists
//in parse errors
int hitCount = 0;
while (rs.next()) {
hitCount = rs.getInt(1);
}
//if it does, update all records matching that path or container id
if (hitCount > 0) {
sql = "UPDATE " + errorTableName +
" SET " + Cols.PARSE_ERROR_ID +
" = " + type.ordinal() + ","+
Cols.FILE_PATH + "='" +filePath+"'"+
" where "+Cols.CONTAINER_ID +
"="+containerId + " or "+
Cols.FILE_PATH + "='"+filePath+"'";;
} else {
//if not and container id > -1
//insert full record
if (containerId > -1) {
sql = "INSERT INTO " + errorTableName +
" ("+Cols.CONTAINER_ID+","+Cols.FILE_PATH +","+Cols.PARSE_ERROR_ID +")"+
" values (" + containerId + ", '" + filePath + "'," +
type.ordinal() + ");";
} else {
//if container id == -1, insert only file path and parse error type id
sql = "INSERT INTO " + errorTableName +
" ("+Cols.FILE_PATH.name()+","+Cols.PARSE_ERROR_ID +")"+
"values ('" + filePath + "'," +
type.ordinal() + ");";
}
}
int updated = statement.executeUpdate(sql);
if (updated == 0) {
//TODO: log
LOG.warn("made no updates in xmlerrorlogupdater!");
} else if (updated > 1) {
LOG.warn("made too many updates");
}
}
private int getContainerId(String resourceId) throws SQLException {
int containerId = -1;
String sql = "SELECT " + Cols.CONTAINER_ID.name() +
" from " + ExtractProfiler.CONTAINER_TABLE.getName()+
" where " + Cols.FILE_PATH +
" ='"+resourceId+"'";
ResultSet rs = statement.executeQuery(sql);
int resultCount = 0;
while (rs.next()) {
containerId = rs.getInt(1);
resultCount++;
}
rs.close();
if (resultCount == 0) {
LOG.warn("Should have found a container for: {}", resourceId);
} else if (resultCount > 1) {
LOG.error("Records ids should be unique: {}", resourceId);
}
/*
if (containerId < 0) {
System.err.println("CONTAINER ID < 0!!!");
sql = "SELECT MAX("+ Cols.CONTAINER_ID.name() +
") from "+ExtractProfiler.CONTAINER_TABLE.getName();
rs = statement.executeQuery(sql);
while (rs.next()) {
containerId = rs.getInt(1);
}
rs.close();
if (containerId < 0) {
//log and abort
//return -1?
} else {
containerId++;
}
}*/
return containerId;
}
}
}