com.jaeksoft.searchlib.crawler.web.database.AbstractPatternNameValueManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2014 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.crawler.web.database;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.util.LinkUtils;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
public abstract class AbstractPatternNameValueManager {
final protected ReadWriteLock rwl = new ReadWriteLock();
// For better performances, pattern are grouped by hostname in a map
private Map> itemMap = null;
private File xmlFile;
public AbstractPatternNameValueManager(File indexDir, String filename)
throws SearchLibException {
xmlFile = new File(indexDir, filename);
itemMap = new TreeMap>();
try {
load();
} catch (ParserConfigurationException e) {
throw new SearchLibException(e);
} catch (SAXException e) {
throw new SearchLibException(e);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (XPathExpressionException e) {
throw new SearchLibException(e);
}
}
protected abstract void load() throws ParserConfigurationException,
SAXException, IOException, XPathExpressionException,
SearchLibException;
protected abstract T getNewItem(Node node);
protected void load(String XPP_PATH) throws ParserConfigurationException,
SAXException, IOException, XPathExpressionException,
SearchLibException {
if (!xmlFile.exists())
return;
XPathParser xpp = new XPathParser(xmlFile);
NodeList nodeList = xpp.getNodeList(XPP_PATH);
int l = nodeList.getLength();
for (int i = 0; i < l; i++) {
Node node = nodeList.item(i);
T item = getNewItem(node);
addItem(item);
}
}
protected abstract void store() throws IOException,
TransformerConfigurationException, SAXException;
protected void store(String rootNodeName) throws IOException,
TransformerConfigurationException, SAXException {
if (!xmlFile.exists())
xmlFile.createNewFile();
PrintWriter pw = new PrintWriter(xmlFile);
try {
XmlWriter xmlWriter = new XmlWriter(pw, "UTF-8");
xmlWriter.startElement(rootNodeName);
Iterator> it = itemMap.values().iterator();
while (it.hasNext())
for (AbstractPatternNameValueItem item : it.next())
item.writeXml(xmlWriter);
xmlWriter.endElement();
xmlWriter.endDocument();
} finally {
pw.close();
}
}
public List getItems(String url) throws MalformedURLException,
URISyntaxException {
rwl.r.lock();
try {
String host = LinkUtils.newEncodedURL(url).getHost();
List itList = itemMap.get(host);
if (itList == null)
return null;
List itemList = new ArrayList(0);
Iterator it = itList.iterator();
while (it.hasNext()) {
T item = it.next();
if (url.startsWith(item.getPattern()))
itemList.add(item);
}
return itemList;
} finally {
rwl.r.unlock();
}
}
private void delItemWithoutLock(T item) throws MalformedURLException,
URISyntaxException {
String host = LinkUtils.newEncodedURL(item.getPattern()).getHost();
List itemList = itemMap.get(host);
if (itemList == null)
return;
itemList.remove(item);
if (itemList.size() == 0)
itemMap.remove(host);
}
public void delItem(T item) throws SearchLibException {
rwl.w.lock();
try {
delItemWithoutLock(item);
store();
} catch (MalformedURLException e) {
throw new SearchLibException(e);
} catch (TransformerConfigurationException e) {
throw new SearchLibException(e);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (SAXException e) {
throw new SearchLibException(e);
} catch (URISyntaxException e) {
throw new SearchLibException(e);
} finally {
rwl.w.unlock();
}
}
private void addItemWithoutLock(T item) throws MalformedURLException,
URISyntaxException {
String host = item.extractUrl().getHost();
List itemList = itemMap.get(host);
if (itemList == null) {
itemList = new ArrayList(0);
itemMap.put(host, itemList);
}
itemList.add(item);
}
public void addItem(T item) throws SearchLibException {
rwl.w.lock();
try {
addItemWithoutLock(item);
store();
} catch (TransformerConfigurationException e) {
throw new SearchLibException(e);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (SAXException e) {
throw new SearchLibException(e);
} catch (URISyntaxException e) {
throw new SearchLibException(e);
} finally {
rwl.w.unlock();
}
}
public void updateItem(AbstractPatternNameValueItem item)
throws SearchLibException {
rwl.w.lock();
try {
boolean found = false;
Iterator> it = itemMap.values().iterator();
while (it.hasNext() && !found)
found = it.next().contains(item);
if (!found)
throw new SearchLibException("Unknown item");
store();
} catch (TransformerConfigurationException e) {
throw new SearchLibException(e);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (SAXException e) {
throw new SearchLibException(e);
} finally {
rwl.w.unlock();
}
}
public int getItems(String startsWith, long start, long rows, List list)
throws SearchLibException {
rwl.r.lock();
try {
Iterator> it = itemMap.values().iterator();
long end = start + rows;
int pos = 0;
int total = 0;
while (it.hasNext())
for (T item : it.next()) {
if (startsWith != null) {
if (!item.getPattern().startsWith(startsWith)) {
pos++;
continue;
}
}
if (pos >= start && pos < end)
list.add(item);
total++;
pos++;
}
return total;
} finally {
rwl.r.unlock();
}
}
}