
com.jaeksoft.searchlib.crawler.web.database.PatternManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface,
the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and
easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and
Linux/Unix/BSD.
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2008-2013 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.crawler.web.database;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.IOUtils;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
public class PatternManager {
final private ReadWriteLock rwl = new ReadWriteLock();
// For better performances, pattern are grouped by hostname in a map
private Map> patternMap = null;
private File patternFile;
public PatternManager(File indexDir, String filename)
throws SearchLibException {
patternFile = new File(indexDir, filename);
patternMap = new TreeMap>();
try {
load();
} catch (ParserConfigurationException e) {
throw new SearchLibException(e);
} catch (SAXException e) {
throw new SearchLibException(e);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (XPathExpressionException e) {
throw new SearchLibException(e);
} catch (URISyntaxException e) {
throw new SearchLibException(e);
}
}
private void load() throws ParserConfigurationException, SAXException,
IOException, XPathExpressionException, SearchLibException,
URISyntaxException {
if (!patternFile.exists())
return;
XPathParser xpp = new XPathParser(patternFile);
NodeList nodeList = xpp.getNodeList("/patterns/pattern");
int l = nodeList.getLength();
List patternList = new ArrayList(l);
for (int i = 0; i < l; i++)
patternList
.add(new PatternItem(DomUtils.getText(nodeList.item(i))));
addListWithoutStoreAndLock(patternList, true);
}
private void store() throws IOException, TransformerConfigurationException,
SAXException {
if (!patternFile.exists())
patternFile.createNewFile();
PrintWriter pw = new PrintWriter(patternFile);
try {
XmlWriter xmlWriter = new XmlWriter(pw, "UTF-8");
xmlWriter.startElement("patterns");
Iterator> it = patternMap.values().iterator();
while (it.hasNext()) {
for (PatternItem item : it.next()) {
xmlWriter.startElement("pattern");
xmlWriter.textNode(item.getPattern());
xmlWriter.endElement();
}
}
xmlWriter.endElement();
xmlWriter.endDocument();
} finally {
pw.close();
}
}
private void addListWithoutStoreAndLock(List patternList,
boolean bDeleteAll) throws SearchLibException,
MalformedURLException, URISyntaxException {
if (bDeleteAll)
patternMap.clear();
if (patternList == null)
return;
for (PatternItem item : patternList) {
if (!bDeleteAll && findPattern(item) != null)
item.setStatus(PatternItem.Status.ALREADY);
else {
addPatternWithoutLock(item);
item.setStatus(PatternItem.Status.INJECTED);
}
}
}
public void addList(List patternList, boolean bDeleteAll)
throws SearchLibException {
rwl.w.lock();
try {
addListWithoutStoreAndLock(patternList, bDeleteAll);
store();
} catch (IOException e) {
throw new SearchLibException(e);
} catch (TransformerConfigurationException e) {
throw new SearchLibException(e);
} catch (SAXException e) {
throw new SearchLibException(e);
} catch (URISyntaxException e) {
throw new SearchLibException(e);
} finally {
rwl.w.unlock();
}
}
private int delPatternWithoutLock(String sPattern)
throws MalformedURLException, URISyntaxException {
String mapKey = new PatternItem(sPattern).getTopDomainOrHost();
List itemList = patternMap.get(mapKey);
if (itemList == null)
return 0;
int count = 0;
Iterator it = itemList.iterator();
while (it.hasNext()) {
if (it.next().sPattern.equals(sPattern)) {
it.remove();
count++;
}
}
return count;
}
public int delPattern(Collection patterns)
throws SearchLibException {
rwl.w.lock();
try {
int count = 0;
for (String pattern : patterns)
count += delPatternWithoutLock(pattern);
store();
return count;
} catch (MalformedURLException e) {
throw new SearchLibException(e);
} catch (TransformerConfigurationException e) {
throw new SearchLibException(e);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (SAXException e) {
throw new SearchLibException(e);
} catch (URISyntaxException e) {
throw new SearchLibException(e);
} finally {
rwl.w.unlock();
}
}
public void delPatternItem(Collection patterns)
throws SearchLibException {
rwl.w.lock();
try {
for (PatternItem pattern : patterns)
delPatternWithoutLock(pattern.sPattern);
store();
} catch (MalformedURLException e) {
throw new SearchLibException(e);
} catch (TransformerConfigurationException e) {
throw new SearchLibException(e);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (SAXException e) {
throw new SearchLibException(e);
} catch (URISyntaxException e) {
throw new SearchLibException(e);
} finally {
rwl.w.unlock();
}
}
private void addPatternWithoutLock(PatternItem patternItem)
throws MalformedURLException, URISyntaxException {
String mapKey = patternItem.getTopDomainOrHost();
List itemList = patternMap.get(mapKey);
if (itemList == null) {
itemList = new ArrayList();
patternMap.put(mapKey, itemList);
}
itemList.add(patternItem);
}
public void addPattern(PatternItem patternItem) throws SearchLibException {
rwl.w.lock();
try {
addPatternWithoutLock(patternItem);
store();
} catch (TransformerConfigurationException e) {
throw new SearchLibException(e);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (SAXException e) {
throw new SearchLibException(e);
} catch (URISyntaxException e) {
throw new SearchLibException(e);
} finally {
rwl.w.unlock();
}
}
public int getPatterns(String startsWith, long start, long rows,
List list) throws SearchLibException {
rwl.r.lock();
try {
if (StringUtils.isEmpty(startsWith))
startsWith = null;
Iterator> it = patternMap.values().iterator();
long end = start + rows;
int pos = 0;
int total = 0;
while (it.hasNext())
for (PatternItem item : it.next()) {
if (startsWith != null) {
if (!item.getPattern().startsWith(startsWith)) {
pos++;
continue;
}
}
if (rows == 0 || pos < end) {
if (pos >= start)
list.add(item);
}
total++;
pos++;
}
return total;
} finally {
rwl.r.unlock();
}
}
public int getPatterns(String startsWith, List list)
throws SearchLibException {
rwl.r.lock();
try {
if (StringUtils.isEmpty(startsWith))
startsWith = null;
Iterator> it = patternMap.values().iterator();
int total = 0;
while (it.hasNext())
for (PatternItem item : it.next()) {
if (startsWith != null)
if (!item.getPattern().startsWith(startsWith))
continue;
list.add(item.sPattern);
total++;
}
return total;
} finally {
rwl.r.unlock();
}
}
private PatternItem findPattern(PatternItem pattern)
throws MalformedURLException, URISyntaxException {
rwl.r.lock();
try {
List patternList = patternMap.get(pattern
.getTopDomainOrHost());
if (patternList == null)
return null;
String sPattern = pattern.getPattern();
for (PatternItem patternItem : patternList)
if (patternItem.getPattern().equals(sPattern))
return patternItem;
return null;
} finally {
rwl.r.unlock();
}
}
// TODO Matcher should be lock free
final public boolean matchPattern(URL url) {
rwl.r.lock();
try {
if (url == null)
return false;
List patternList = patternMap.get(PatternItem
.getTopDomainOrHost(url.getHost()));
if (patternList == null)
return false;
String sUrl = url.toExternalForm();
for (PatternItem patternItem : patternList)
if (patternItem.match(sUrl))
return true;
return false;
} finally {
rwl.r.unlock();
}
}
final private static void addLine(List list, String pattern) {
pattern = pattern.trim();
if (pattern.length() == 0)
return;
if (pattern.indexOf(':') == -1)
pattern = "http://" + pattern;
PatternItem item = new PatternItem();
item.setPattern(pattern);
list.add(item);
}
final private static void addLines(List list, String lines)
throws IOException {
if (lines == null)
return;
StringReader sr = null;
BufferedReader br = null;
try {
sr = new StringReader(lines);
br = new BufferedReader(sr);
String line;
while ((line = br.readLine()) != null)
addLine(list, line);
} finally {
IOUtils.close(br, sr);
}
}
public static List getPatternList(String pattern)
throws IOException {
List patternList = new ArrayList(0);
addLines(patternList, pattern);
return patternList;
}
public static List getPatternList(List patterns)
throws IOException {
List patternList = new ArrayList(0);
if (patterns != null)
for (String sPattern : patterns)
addLines(patternList, sPattern);
return patternList;
}
public static List getPatternList(BufferedReader reader)
throws IOException {
List patternList = new ArrayList();
String line;
while ((line = reader.readLine()) != null)
addLine(patternList, line);
return patternList;
}
public static String getStringPatternList(List patternList) {
StringWriter sw = null;
PrintWriter pw = null;
try {
sw = new StringWriter();
pw = new PrintWriter(sw);
for (PatternItem item : patternList)
pw.println(item.getPattern());
return sw.toString();
} finally {
if (pw != null)
IOUtils.closeQuietly(pw);
if (sw != null)
IOUtils.closeQuietly(sw);
}
}
public static final int countStatus(List patternList,
PatternItem.Status status) {
if (patternList == null)
return 0;
int count = 0;
for (PatternItem patternItem : patternList)
if (patternItem.getStatus() == status)
count++;
return count;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy