com.jaeksoft.searchlib.crawler.database.DatabaseCrawlSqlThread Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2010-2015 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.crawler.database;
import java.io.IOException;
import java.net.URISyntaxException;
import java.security.NoSuchAlgorithmException;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import com.jaeksoft.pojodbc.Query;
import com.jaeksoft.pojodbc.Transaction;
import com.jaeksoft.pojodbc.connection.JDBCConnection;
import com.jaeksoft.searchlib.Client;
import com.jaeksoft.searchlib.Logging;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.LanguageEnum;
import com.jaeksoft.searchlib.crawler.FieldMapContext;
import com.jaeksoft.searchlib.crawler.common.process.CrawlStatus;
import com.jaeksoft.searchlib.function.expression.SyntaxError;
import com.jaeksoft.searchlib.index.IndexDocument;
import com.jaeksoft.searchlib.query.ParseException;
import com.jaeksoft.searchlib.util.DatabaseUtils;
import com.jaeksoft.searchlib.util.InfoCallback;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.util.Variables;
public class DatabaseCrawlSqlThread extends DatabaseCrawlThread {
private final ReadWriteLock rwl = new ReadWriteLock();
private final DatabaseCrawlSql databaseCrawl;
public DatabaseCrawlSqlThread(Client client, DatabaseCrawlMaster crawlMaster, DatabaseCrawlSql databaseCrawl,
Variables variables, InfoCallback infoCallback) {
super(client, crawlMaster, databaseCrawl, infoCallback);
this.databaseCrawl = (DatabaseCrawlSql) databaseCrawl.duplicate();
this.databaseCrawl.applyVariables(variables);
}
private boolean index(Transaction transaction, List indexDocumentList, int limit,
List pkList) throws NoSuchAlgorithmException, IOException, URISyntaxException, SearchLibException,
InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException,
InterruptedException {
int i = indexDocumentList.size();
if (i == 0 || i < limit)
return false;
setStatus(CrawlStatus.INDEXATION);
client.updateDocuments(indexDocumentList);
rwl.w.lock();
try {
pendingIndexDocumentCount -= i;
updatedIndexDocumentCount += i;
} finally {
rwl.w.unlock();
}
DatabaseUtils.update(transaction, pkList, null, databaseCrawl.getSqlUpdateMode(), databaseCrawl.getSqlUpdate());
pkList.clear();
indexDocumentList.clear();
if (infoCallback != null)
infoCallback.setInfo(updatedIndexDocumentCount + " document(s) indexed");
sleepMs(databaseCrawl.getMsSleep());
return true;
}
private boolean delete(Transaction transaction, List deleteDocumentList, int limit)
throws NoSuchAlgorithmException, IOException, URISyntaxException, SearchLibException,
InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException, InterruptedException {
int i = deleteDocumentList.size();
if (i == 0 || i < limit)
return false;
setStatus(CrawlStatus.DELETION);
client.deleteDocuments(client.getSchema().getUniqueField(), deleteDocumentList);
rwl.w.lock();
try {
pendingDeleteDocumentCount -= i;
updatedDeleteDocumentCount += i;
} finally {
rwl.w.unlock();
}
DatabaseUtils.update(transaction, deleteDocumentList, null, databaseCrawl.getSqlUpdateMode(),
databaseCrawl.getSqlUpdate());
deleteDocumentList.clear();
if (infoCallback != null)
infoCallback.setInfo(updatedDeleteDocumentCount + " document(s) deleted");
sleepMs(databaseCrawl.getMsSleep());
return true;
}
final private void runner_update(Transaction transaction, ResultSet resultSet, TreeSet columns)
throws NoSuchAlgorithmException, SQLException, IOException, URISyntaxException, SearchLibException,
InstantiationException, IllegalAccessException, ClassNotFoundException, ParseException, SyntaxError,
InterruptedException {
String dbPrimaryKey = databaseCrawl.getPrimaryKey();
DatabaseFieldMap databaseFieldMap = databaseCrawl.getFieldMap();
int bufferSize = databaseCrawl.getBufferSize();
IndexDocument indexDocument = null;
IndexDocument lastFieldContent = null;
boolean merge = false;
String lastPrimaryKey = null;
List indexDocumentList = new ArrayList(0);
List pkList = new ArrayList(0);
FieldMapContext context = new FieldMapContext(client, databaseCrawl.getLang());
Set filePathSet = new TreeSet();
int faultTolerancy = 10;
while (!isAborted()) {
try {
if (!resultSet.next())
break;
faultTolerancy = 10;
} catch (SQLException e) {
if (faultTolerancy <= 0)
throw e;
Logging.error(e.getMessage() + " Vendor Error Number: " + e.getErrorCode() + " Counters: "
+ this.getCountInfo(), e);
faultTolerancy--;
continue;
}
if (dbPrimaryKey != null && dbPrimaryKey.length() == 0)
dbPrimaryKey = null;
if (dbPrimaryKey != null) {
merge = false;
String pKey = resultSet.getString(dbPrimaryKey);
if (pKey != null && lastPrimaryKey != null)
if (pKey.equals(lastPrimaryKey))
merge = true;
lastPrimaryKey = pKey;
}
if (!merge) {
if (index(transaction, indexDocumentList, bufferSize, pkList))
setStatus(CrawlStatus.CRAWL);
indexDocument = new IndexDocument(context.lang);
indexDocumentList.add(indexDocument);
filePathSet.clear();
pendingIndexDocumentCount++;
pkList.add(lastPrimaryKey);
}
LanguageEnum lang = databaseCrawl.getLang();
IndexDocument newFieldContents = new IndexDocument(lang);
databaseFieldMap.mapResultSet(context, resultSet, columns, newFieldContents, filePathSet);
if (merge && lastFieldContent != null) {
indexDocument.addIfNotAlreadyHere(newFieldContents);
} else
indexDocument.add(newFieldContents);
lastFieldContent = newFieldContents;
}
index(transaction, indexDocumentList, 0, pkList);
}
final private void runner_delete(Transaction transaction, ResultSet resultSet, TreeSet columns)
throws NoSuchAlgorithmException, SQLException, IOException, URISyntaxException, SearchLibException,
InstantiationException, IllegalAccessException, ClassNotFoundException, InterruptedException {
List deleteKeyList = new ArrayList(0);
String uniqueKeyDeleteField = databaseCrawl.getUniqueKeyDeleteField();
int bf = databaseCrawl.getBufferSize();
while (resultSet.next() && !isAborted()) {
if (delete(transaction, deleteKeyList, bf))
setStatus(CrawlStatus.CRAWL);
String uKey = resultSet.getString(uniqueKeyDeleteField);
if (uKey != null) {
deleteKeyList.add(uKey);
pendingDeleteDocumentCount++;
}
}
delete(transaction, deleteKeyList, 0);
}
@Override
public void runner() throws Exception {
setStatus(CrawlStatus.STARTING);
JDBCConnection connectionManager = databaseCrawl.getNewJdbcConnection();
String sqlUpdate = databaseCrawl.getSqlUpdate();
if (sqlUpdate != null && sqlUpdate.length() == 0)
sqlUpdate = null;
Transaction transaction = null;
try {
transaction = databaseCrawl.getNewTransaction(connectionManager);
Query query = transaction.prepare(databaseCrawl.getSqlSelect());
query.getStatement().setFetchSize(databaseCrawl.getFetchSize());
ResultSet resultSet = query.getResultSet();
setStatus(CrawlStatus.CRAWL);
// Store the list of columns in a treeset
ResultSetMetaData metaData = resultSet.getMetaData();
TreeSet columns = new TreeSet();
int columnCount = metaData.getColumnCount();
for (int i = 1; i <= columnCount; i++)
columns.add(metaData.getColumnLabel(i));
String ukDeleteField = databaseCrawl.getUniqueKeyDeleteField();
if (ukDeleteField != null && ukDeleteField.length() == 0)
ukDeleteField = null;
if (ukDeleteField != null)
runner_delete(transaction, resultSet, columns);
else
runner_update(transaction, resultSet, columns);
if (updatedIndexDocumentCount > 0 || updatedDeleteDocumentCount > 0) {
transaction.commit();
client.reload();
}
} finally {
if (transaction != null)
transaction.close();
}
}
}