All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.crawler.database.DatabaseCrawlMongoDb Maven / Gradle / Ivy

Go to download

OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.

The newest version!
/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2010-2014 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.crawler.database;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.util.Arrays;

import javax.xml.xpath.XPathExpressionException;

import org.apache.commons.lang3.StringUtils;
import org.bson.Document;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.util.Variables;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.MongoIterable;

public class DatabaseCrawlMongoDb extends DatabaseCrawlAbstract {

	private String databaseName;
	private String collectionName;
	private String criteria;
	private String projection;

	public DatabaseCrawlMongoDb(DatabaseCrawlMaster crawlMaster, DatabasePropertyManager propertyManager, String name) {
		super(crawlMaster, propertyManager, name);
		databaseName = null;
		collectionName = null;
		criteria = null;
		projection = null;
	}

	public void applyVariables(Variables variables) {
		if (variables == null)
			return;
		databaseName = variables.replace(databaseName);
		collectionName = variables.replace(collectionName);
		criteria = variables.replace(criteria);
		projection = variables.replace(projection);
	}

	public DatabaseCrawlMongoDb(DatabaseCrawlMaster crawlMaster, DatabasePropertyManager propertyManager) {
		this(crawlMaster, propertyManager, null);
	}

	protected DatabaseCrawlMongoDb(DatabaseCrawlMongoDb crawl) {
		super((DatabaseCrawlMaster) crawl.threadMaster, crawl.propertyManager);
		crawl.copyTo(this);
	}

	@Override
	public DatabaseCrawlAbstract duplicate() {
		return new DatabaseCrawlMongoDb(this);
	}

	@Override
	public void copyTo(DatabaseCrawlAbstract crawlAbstract) {
		super.copyTo(crawlAbstract);
		DatabaseCrawlMongoDb crawl = (DatabaseCrawlMongoDb) crawlAbstract;
		crawl.databaseName = this.databaseName;
		crawl.collectionName = this.collectionName;
		crawl.criteria = this.criteria;
		crawl.projection = this.projection;
	}

	@Override
	public DatabaseCrawlEnum getType() {
		return DatabaseCrawlEnum.DB_MONGO_DB;
	}

	protected final static String DBCRAWL_ATTR_DB_NAME = "databaseName";
	protected final static String DBCRAWL_ATTR_COLLECTION_NAME = "collectionName";
	protected final static String DBCRAWL_NODE_NAME_CRITERIA = "criteria";
	protected final static String DBCRAWL_NODE_NAME_PROJECTION = "projection";

	public DatabaseCrawlMongoDb(DatabaseCrawlMaster crawlMaster, DatabasePropertyManager propertyManager,
			XPathParser xpp, Node item) throws XPathExpressionException {
		super(crawlMaster, propertyManager, xpp, item);
		setDatabaseName(XPathParser.getAttributeString(item, DBCRAWL_ATTR_DB_NAME));
		setCollectionName(XPathParser.getAttributeString(item, DBCRAWL_ATTR_COLLECTION_NAME));
		Node sqlNode = xpp.getNode(item, DBCRAWL_NODE_NAME_CRITERIA);
		if (sqlNode != null)
			setCriteria(xpp.getNodeString(sqlNode, true));
		sqlNode = xpp.getNode(item, DBCRAWL_NODE_NAME_PROJECTION);
		if (sqlNode != null)
			setProjection(xpp.getNodeString(sqlNode, true));
	}

	@Override
	public void writeXml(XmlWriter xmlWriter) throws SAXException {
		xmlWriter.startElement(DBCRAWL_NODE_NAME, DBCRAWL_ATTR_NAME, getName(), DBCRAWL_ATTR_TYPE, getType().name(),
				DBCRAWL_ATTR_USER, getUser(), DBCRAWL_ATTR_PASSWORD, getPassword(), DBCRAWL_ATTR_URL, getUrl(),
				DBCRAWL_ATTR_LANG, getLang().getCode(), DBCRAWL_ATTR_BUFFER_SIZE, Integer.toString(getBufferSize()),
				DBCRAWL_ATTR_MSSLEEP, Integer.toString(getMsSleep()), DBCRAWL_ATTR_DB_NAME, getDatabaseName(),
				DBCRAWL_ATTR_COLLECTION_NAME, getCollectionName());
		xmlWriter.startElement(DBCRAWL_NODE_NAME_MAP);
		getFieldMap().store(xmlWriter);
		xmlWriter.endElement();
		String criteria = getCriteria();
		if (!StringUtils.isEmpty(criteria)) {
			xmlWriter.startElement(DBCRAWL_NODE_NAME_CRITERIA);
			xmlWriter.textNode(criteria);
			xmlWriter.endElement();
		}
		String projection = getProjection();
		if (!StringUtils.isEmpty(projection)) {
			xmlWriter.startElement(DBCRAWL_NODE_NAME_PROJECTION);
			xmlWriter.textNode(projection);
			xmlWriter.endElement();
		}
		xmlWriter.endElement();
	}

	/**
	 * @return the databaseName
	 */
	public String getDatabaseName() {
		return databaseName;
	}

	/**
	 * @param databaseName
	 *            the databaseName to set
	 */
	public void setDatabaseName(String databaseName) {
		this.databaseName = databaseName;
	}

	/**
	 * @return the criteria
	 */
	public String getCriteria() {
		return criteria;
	}

	/**
	 * @param criteria
	 *            the criteria to set
	 */
	public void setCriteria(String criteria) {
		this.criteria = criteria;
	}

	/**
	 * @return the projection
	 */
	public String getProjection() {
		return projection;
	}

	/**
	 * @param projection
	 *            the projection to set
	 */
	public void setProjection(String projection) {
		this.projection = projection;
	}

	/**
	 * @return the collectionName
	 */
	public String getCollectionName() {
		return collectionName;
	}

	/**
	 * @param collectionName
	 *            the collectionName to set
	 */
	public void setCollectionName(String collectionName) {
		this.collectionName = collectionName;
	}

	MongoClient getMongoClient() throws URISyntaxException, UnknownHostException {
		String user = getUser();
		String password = getPassword();
		URI uri = new URI(getUrl());
		MongoCredential credential = null;
		if (!StringUtils.isEmpty(user) && !StringUtils.isEmpty(password)) {
			credential = MongoCredential.createMongoCRCredential(user, databaseName, password.toCharArray());
			return new MongoClient(new ServerAddress(uri.getHost(), uri.getPort()), Arrays.asList(credential));
		}
		return new MongoClient(new ServerAddress(uri.getHost(), uri.getPort()));
	}

	MongoCollection getCollection(MongoClient mongoClient) throws IOException {
		if (StringUtils.isEmpty(databaseName))
			throw new IOException("No database name.");
		MongoDatabase db = mongoClient.getDatabase(databaseName);
		if (StringUtils.isEmpty(collectionName))
			throw new IOException("No collection name.");
		return db.getCollection(collectionName);
	}

	Document getCriteriaObject() {
		if (StringUtils.isEmpty(criteria))
			return null;
		return Document.parse(criteria);
	}

	Document getProjectionObject() {
		if (StringUtils.isEmpty(projection))
			return null;
		return Document.parse(projection);
	}

	@Override
	public String test() throws Exception {
		URI uri = new URI(getUrl());
		StringBuilder sb = new StringBuilder();
		if (!"mongodb".equals(uri.getScheme()))
			throw new SearchLibException(
					"Wrong scheme: " + uri.getScheme() + ". The URL should start with: mongodb://");
		MongoClient mongoClient = null;
		try {
			mongoClient = getMongoClient();
			sb.append("Connection established.");
			sb.append(StringUtils.LF);
			if (!StringUtils.isEmpty(databaseName)) {
				MongoDatabase db = mongoClient.getDatabase(databaseName);
				if (db == null)
					throw new SearchLibException("Database not found: " + databaseName);
				MongoIterable collections = db.listCollectionNames();
				if (collections == null)
					throw new SearchLibException("No collection found.");
				sb.append("Collections found:");
				sb.append(StringUtils.LF);
				for (String collection : collections) {
					sb.append(collection);
					sb.append(StringUtils.LF);
				}
				if (!StringUtils.isEmpty(collectionName)) {
					MongoCollection dbCollection = db.getCollection(collectionName);
					if (dbCollection == null)
						throw new SearchLibException("Collection " + collectionName + " not found.");
					sb.append("Collection " + collectionName + " contains " + dbCollection.count() + " document(s).");
					sb.append(StringUtils.LF);
					if (!StringUtils.isEmpty(criteria)) {
						long count = dbCollection.count(getCriteriaObject());
						sb.append("Query returns " + count + " document(s).");
						sb.append(StringUtils.LF);
					}
				}
			}
		} finally {
			if (mongoClient != null)
				mongoClient.close();
		}
		return sb.toString();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy