
com.jaeksoft.searchlib.crawler.database.DatabaseCrawlMongoDb Maven / Gradle / Ivy
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2010-2014 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.crawler.database;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.Set;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.util.Variables;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.mongodb.util.JSON;
public class DatabaseCrawlMongoDb extends DatabaseCrawlAbstract {
private String databaseName;
private String collectionName;
private String criteria;
private String projection;
public DatabaseCrawlMongoDb(DatabaseCrawlMaster crawlMaster,
DatabasePropertyManager propertyManager, String name) {
super(crawlMaster, propertyManager, name);
databaseName = null;
collectionName = null;
criteria = null;
projection = null;
}
public void applyVariables(Variables variables) {
if (variables == null)
return;
databaseName = variables.replace(databaseName);
collectionName = variables.replace(collectionName);
criteria = variables.replace(criteria);
projection = variables.replace(projection);
}
public DatabaseCrawlMongoDb(DatabaseCrawlMaster crawlMaster,
DatabasePropertyManager propertyManager) {
this(crawlMaster, propertyManager, null);
}
protected DatabaseCrawlMongoDb(DatabaseCrawlMongoDb crawl) {
super((DatabaseCrawlMaster) crawl.threadMaster, crawl.propertyManager);
crawl.copyTo(this);
}
@Override
public DatabaseCrawlAbstract duplicate() {
return new DatabaseCrawlMongoDb(this);
}
@Override
public void copyTo(DatabaseCrawlAbstract crawlAbstract) {
super.copyTo(crawlAbstract);
DatabaseCrawlMongoDb crawl = (DatabaseCrawlMongoDb) crawlAbstract;
crawl.databaseName = this.databaseName;
crawl.collectionName = this.collectionName;
crawl.criteria = this.criteria;
crawl.projection = this.projection;
}
@Override
public DatabaseCrawlEnum getType() {
return DatabaseCrawlEnum.DB_MONGO_DB;
}
protected final static String DBCRAWL_ATTR_DB_NAME = "databaseName";
protected final static String DBCRAWL_ATTR_COLLECTION_NAME = "collectionName";
protected final static String DBCRAWL_NODE_NAME_CRITERIA = "criteria";
protected final static String DBCRAWL_NODE_NAME_PROJECTION = "projection";
public DatabaseCrawlMongoDb(DatabaseCrawlMaster crawlMaster,
DatabasePropertyManager propertyManager, XPathParser xpp, Node item)
throws XPathExpressionException {
super(crawlMaster, propertyManager, xpp, item);
setDatabaseName(XPathParser.getAttributeString(item,
DBCRAWL_ATTR_DB_NAME));
setCollectionName(XPathParser.getAttributeString(item,
DBCRAWL_ATTR_COLLECTION_NAME));
Node sqlNode = xpp.getNode(item, DBCRAWL_NODE_NAME_CRITERIA);
if (sqlNode != null)
setCriteria(xpp.getNodeString(sqlNode, true));
sqlNode = xpp.getNode(item, DBCRAWL_NODE_NAME_PROJECTION);
if (sqlNode != null)
setProjection(xpp.getNodeString(sqlNode, true));
}
@Override
public void writeXml(XmlWriter xmlWriter) throws SAXException {
xmlWriter.startElement(DBCRAWL_NODE_NAME, DBCRAWL_ATTR_NAME, getName(),
DBCRAWL_ATTR_TYPE, getType().name(), DBCRAWL_ATTR_USER,
getUser(), DBCRAWL_ATTR_PASSWORD, getPassword(),
DBCRAWL_ATTR_URL, getUrl(), DBCRAWL_ATTR_LANG, getLang()
.getCode(), DBCRAWL_ATTR_BUFFER_SIZE, Integer
.toString(getBufferSize()), DBCRAWL_ATTR_MSSLEEP,
Integer.toString(getMsSleep()), DBCRAWL_ATTR_DB_NAME,
getDatabaseName(), DBCRAWL_ATTR_COLLECTION_NAME,
getCollectionName());
xmlWriter.startElement(DBCRAWL_NODE_NAME_MAP);
getFieldMap().store(xmlWriter);
xmlWriter.endElement();
String criteria = getCriteria();
if (!StringUtils.isEmpty(criteria)) {
xmlWriter.startElement(DBCRAWL_NODE_NAME_CRITERIA);
xmlWriter.textNode(criteria);
xmlWriter.endElement();
}
String projection = getProjection();
if (!StringUtils.isEmpty(projection)) {
xmlWriter.startElement(DBCRAWL_NODE_NAME_PROJECTION);
xmlWriter.textNode(projection);
xmlWriter.endElement();
}
xmlWriter.endElement();
}
/**
* @return the databaseName
*/
public String getDatabaseName() {
return databaseName;
}
/**
* @param databaseName
* the databaseName to set
*/
public void setDatabaseName(String databaseName) {
this.databaseName = databaseName;
}
/**
* @return the criteria
*/
public String getCriteria() {
return criteria;
}
/**
* @param criteria
* the criteria to set
*/
public void setCriteria(String criteria) {
this.criteria = criteria;
}
/**
* @return the projection
*/
public String getProjection() {
return projection;
}
/**
* @param projection
* the projection to set
*/
public void setProjection(String projection) {
this.projection = projection;
}
/**
* @return the collectionName
*/
public String getCollectionName() {
return collectionName;
}
/**
* @param collectionName
* the collectionName to set
*/
public void setCollectionName(String collectionName) {
this.collectionName = collectionName;
}
MongoClient getMongoClient() throws URISyntaxException,
UnknownHostException {
String user = getUser();
String password = getPassword();
URI uri = new URI(getUrl());
MongoCredential credential = null;
if (!StringUtils.isEmpty(user) && !StringUtils.isEmpty(password)) {
credential = MongoCredential.createMongoCRCredential(user,
databaseName, password.toCharArray());
return new MongoClient(new ServerAddress(uri.getHost(),
uri.getPort()), Arrays.asList(credential));
}
return new MongoClient(new ServerAddress(uri.getHost(), uri.getPort()));
}
DBCollection getCollection(MongoClient mongoClient) throws IOException {
if (StringUtils.isEmpty(databaseName))
throw new IOException("No database name.");
DB db = mongoClient.getDB(databaseName);
if (StringUtils.isEmpty(collectionName))
throw new IOException("No collection name.");
return db.getCollection(collectionName);
}
DBObject getCriteriaObject() {
if (StringUtils.isEmpty(criteria))
return null;
return (DBObject) JSON.parse(criteria);
}
DBObject getProjectionObject() {
if (StringUtils.isEmpty(projection))
return null;
return (DBObject) JSON.parse(projection);
}
@Override
public String test() throws Exception {
URI uri = new URI(getUrl());
StringBuilder sb = new StringBuilder();
if (!"mongodb".equals(uri.getScheme()))
throw new SearchLibException("Wrong scheme: " + uri.getScheme()
+ ". The URL should start with: mongodb://");
MongoClient mongoClient = null;
try {
mongoClient = getMongoClient();
sb.append("Connection established.");
sb.append(StringUtils.LF);
if (!StringUtils.isEmpty(databaseName)) {
DB db = mongoClient.getDB(databaseName);
if (db == null)
throw new SearchLibException("Database not found: "
+ databaseName);
Set collections = db.getCollectionNames();
if (collections == null)
throw new SearchLibException("No collection found.");
sb.append("Collections found:");
sb.append(StringUtils.LF);
for (String collection : collections) {
sb.append(collection);
sb.append(StringUtils.LF);
}
if (!StringUtils.isEmpty(collectionName)) {
DBCollection dbCollection = db
.getCollection(collectionName);
if (dbCollection == null)
throw new SearchLibException("Collection "
+ collectionName + " not found.");
sb.append("Collection " + collectionName + " contains "
+ dbCollection.count() + " document(s).");
sb.append(StringUtils.LF);
if (!StringUtils.isEmpty(criteria)) {
DBCursor cursor = dbCollection.find(
getCriteriaObject(), getProjectionObject());
try {
sb.append("Query returns " + cursor.count()
+ " document(s).");
sb.append(StringUtils.LF);
} finally {
cursor.close();
}
}
}
}
} finally {
if (mongoClient != null)
mongoClient.close();
}
return sb.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy