
org.structr.web.common.DownloadHelper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of structr-ui Show documentation
Show all versions of structr-ui Show documentation
Structr is an open source framework based on the popular Neo4j graph database.
The newest version!
/**
* Copyright (C) 2010-2016 Structr GmbH
*
* This file is part of Structr .
*
* Structr is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Structr is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Structr. If not, see .
*/
package org.structr.web.common;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.lang3.StringUtils;
import org.structr.core.GraphObjectMap;
import org.structr.core.property.GenericProperty;
import org.structr.files.text.FulltextTokenizer;
import org.structr.web.Importer;
//~--- classes ----------------------------------------------------------------
/**
* Download utility class.
*/
public class DownloadHelper {
private static final Logger logger = Logger.getLogger(DownloadHelper.class.getName());
//~--- methods --------------------------------------------------------
public static InputStream getInputStream(final String address) {
try {
final URL originalUrl = new URL(address);
final HttpClient client = Importer.getHttpClient();
final GetMethod get = new GetMethod(originalUrl.toString());
get.addRequestHeader("User-Agent", "curl/7.35.0");
get.addRequestHeader("Connection", "close");
get.getParams().setParameter("http.protocol.single-cookie-header", true);
get.setFollowRedirects(true);
client.executeMethod(get);
return get.getResponseBodyAsStream();
} catch (MalformedURLException ex) {
logger.log(Level.SEVERE, "Can't download content from malformed URL " + address, ex);
} catch (IOException ex) {
logger.log(Level.SEVERE, "Can't download content from URL " + address, ex);
}
return null;
}
public static GraphObjectMap getContextObject(final String searchTerm, final String text, final int contextLength) {
final GraphObjectMap contextObject = new GraphObjectMap();
final Set contextValues = new LinkedHashSet<>();
final String[] searchParts = searchTerm.split("[\\s,;]+");
final GenericProperty contextKey = new GenericProperty("context");
for (final String searchString : searchParts) {
final String lowerCaseSearchString = searchString.toLowerCase();
final String lowerCaseText = text.toLowerCase();
final StringBuilder wordBuffer = new StringBuilder();
final StringBuilder lineBuffer = new StringBuilder();
final int textLength = text.length();
/*
* we take an average word length of 8 characters, multiply
* it by the desired prefix and suffix word count, add 20%
* and try to extract up to prefixLength words.
*/
// modify these parameters to tune prefix and suffix word extraction
// loop variables
int newlineCount = 0;
int wordCount = 0; // wordCount starts at 1 because we include the matching word
int pos = -1;
do {
// find next occurrence
pos = lowerCaseText.indexOf(lowerCaseSearchString, pos + 1);
if (pos > 0) {
lineBuffer.setLength(0);
wordBuffer.setLength(0);
wordCount = 0;
newlineCount = 0;
// fetch context words before search hit
for (int i = pos; i >= 0; i--) {
final char c = text.charAt(i);
if (!Character.isAlphabetic(c) && !Character.isDigit(c) && !FulltextTokenizer.SpecialChars.contains(c)) {
wordCount += flushWordBuffer(lineBuffer, wordBuffer, true);
// store character in buffer
wordBuffer.insert(0, c);
if (c == '\n') {
// increase newline count
newlineCount++;
} else {
// reset newline count
newlineCount = 0;
}
// paragraph boundary reached
if (newlineCount > 1) {
break;
}
// stop if we collected half of the desired word count
if (wordCount > contextLength / 2) {
break;
}
} else {
// store character in buffer
wordBuffer.insert(0, c);
// reset newline count
newlineCount = 0;
}
}
wordCount += flushWordBuffer(lineBuffer, wordBuffer, true);
wordBuffer.setLength(0);
// fetch context words after search hit
for (int i = pos + 1; i < textLength; i++) {
final char c = text.charAt(i);
if (!Character.isAlphabetic(c) && !Character.isDigit(c) && !FulltextTokenizer.SpecialChars.contains(c)) {
wordCount += flushWordBuffer(lineBuffer, wordBuffer, false);
// store character in buffer
wordBuffer.append(c);
if (c == '\n') {
// increase newline count
newlineCount++;
} else {
// reset newline count
newlineCount = 0;
}
// paragraph boundary reached
if (newlineCount > 1) {
break;
}
// stop if we collected enough words
if (wordCount > contextLength) {
break;
}
} else {
// store character in buffer
wordBuffer.append(c);
// reset newline count
newlineCount = 0;
}
}
wordCount += flushWordBuffer(lineBuffer, wordBuffer, false);
// replace single newlines with space
contextValues.add(lineBuffer.toString().trim());
}
} while (pos >= 0);
}
contextObject.put(contextKey, contextValues);
return contextObject;
}
//~--- private methods --------------------------------------------------------
private static int flushWordBuffer(final StringBuilder lineBuffer, final StringBuilder wordBuffer, final boolean prepend) {
int wordCount = 0;
if (wordBuffer.length() > 0) {
final String word = wordBuffer.toString().replaceAll("[\\n\\t]+", " ");
if (StringUtils.isNotBlank(word)) {
if (prepend) {
lineBuffer.insert(0, word);
} else {
lineBuffer.append(word);
}
// increase word count
wordCount = 1;
}
wordBuffer.setLength(0);
}
return wordCount;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy