com.hfg.util.io.HTTPFileObj Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.util.io;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.net.URL;
import java.net.HttpURLConnection;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.hfg.security.LoginCredentials;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
import org.apache.commons.net.ftp.FTPFile;
//------------------------------------------------------------------------------
/**
* Implementation of FileObj for HTTP files.
*
* @author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class HTTPFileObj extends FileObjImpl
{
//###########################################################################
// PRIVATE FIELDS
//###########################################################################
private String mURL;
private String mScheme;
private Long mSize;
private Calendar mTimestamp;
private int mNumRedirects;
private String mUserAgentString = sDefaultUserAgentString;
private String mRequestedPath;
private Boolean mIsDirectory;
private LoginCredentials mCredentials;
private static String sDefaultUserAgentString;
private static final int MAX_REDIRECTS = 10;
// Some places don't like to talk to or don't recognize Java. Pretend we're IE
private static final String USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)";
private final static Logger LOGGER = Logger.getLogger(HTTPFileObj.class.getName());
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public HTTPFileObj(String inURL)
{
mURL = inURL;
init();
}
//---------------------------------------------------------------------------
public HTTPFileObj(URI inURI)
{
if (! inURI.getScheme().startsWith("http"))
{
throw new RuntimeIOException("URIs with scheme " + StringUtil.singleQuote(inURI.getScheme())
+ " cannot be used to create a " + getClass().getSimpleName() + "!");
}
mURL = inURI.toString();
init();
}
//---------------------------------------------------------------------------
private void init()
{
mRequestedPath = getPath();
try
{
mScheme = new URL(mURL).getProtocol();
}
catch (MalformedURLException e)
{
throw new RuntimeIOException(e);
}
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
@Override
public String getScheme()
{
return mScheme;
}
//---------------------------------------------------------------------------
public static void setDefaultUserAgentString(String inValue)
{
sDefaultUserAgentString = inValue;
}
//---------------------------------------------------------------------------
public static String getDefaultUserAgentString()
{
return sDefaultUserAgentString;
}
//---------------------------------------------------------------------------
public HTTPFileObj setUserAgentString(String inValue)
{
mUserAgentString = inValue;
return this;
}
//---------------------------------------------------------------------------
public String geUserAgentString()
{
return mUserAgentString;
}
//---------------------------------------------------------------------------
public static Logger getLogger()
{
return LOGGER;
}
//---------------------------------------------------------------------------
@Override
public URI getURI()
{
URI uri;
try
{
uri = new URI(mURL);
}
catch (URISyntaxException e)
{
throw new RuntimeIOException(e);
}
return uri;
}
//---------------------------------------------------------------------------
public String getProtocol()
{
return toURL().getProtocol();
}
//---------------------------------------------------------------------------
public String getHost()
{
return toURL().getHost();
}
//---------------------------------------------------------------------------
public int getPort()
{
return toURL().getPort();
}
//---------------------------------------------------------------------------
public String getQuery()
{
return toURL().getQuery();
}
//---------------------------------------------------------------------------
private URL toURL()
{
URL url;
try
{
url = new URL(mURL);
}
catch (MalformedURLException e)
{
throw new RuntimeException(e);
}
return url;
}
//---------------------------------------------------------------------------
public String getURL()
{
return mURL;
}
//---------------------------------------------------------------------------
@Override
public String getName()
{
String name;
// Find the last '/' that isn't at the end of the URL
int index = mURL.substring(0, mURL.length() - 1).lastIndexOf('/');
if (index > 0)
{
name = mURL.substring(index + 1);
}
else
{
name = mURL;
}
// Remove any trailing '/'
if (name.endsWith("/"))
{
name = name.substring(0, name.length() - 1);
}
// Don't include arguments in the name
index = name.indexOf('?');
if (index > 0)
{
name = name.substring(0, index);
}
return name;
}
//---------------------------------------------------------------------------
@Override
public HTTPFileObj getParentDir()
{
HTTPFileObj parent = null;
int index = mURL.lastIndexOf('/');
if (index > 0)
{
parent = new HTTPFileObj(mURL.substring(0, index));
}
return parent;
}
//---------------------------------------------------------------------------
@Override
public List listFiles()
{
List fileObjs = null;
if (isDirectory())
{
List links = getLinksInDir(mURL);
if (CollectionUtil.hasValues(links))
{
fileObjs = new ArrayList<>(links.size());
for (String link : links)
{
fileObjs.add(new HTTPFileObj(link));
}
}
}
return fileObjs;
}
//---------------------------------------------------------------------------
@Override
public String getPath()
{
String path;
// Remove http://
int index = mURL.indexOf("//");
if (index > 0)
{
path = mURL.substring(index + 2);
}
else
{
path = mURL;
}
// Remove the server or server:port
index = path.indexOf("/");
if (index > 0)
{
path = path.substring(index + 1);
}
// Don't include arguments in the path
index = path.indexOf('?');
if (index > 0)
{
path = path.substring(0, index);
}
return path;
}
//---------------------------------------------------------------------------
public HTTPFileObj setRequestedPath(String inValue)
{
mRequestedPath = inValue;
return this;
}
//---------------------------------------------------------------------------
@Override
public String getRequestedPath()
{
return mRequestedPath != null ? mRequestedPath : getPath();
}
//---------------------------------------------------------------------------
@Override
public long length()
{
if (null == mSize)
{
getFileInfo();
}
return mSize != null ? mSize : -1;
}
//---------------------------------------------------------------------------
@Override
public Long lastModified()
{
Long returnValue = null;
Long lastModifiedOverride = super.lastModified();
if (lastModifiedOverride != null)
{
returnValue = lastModifiedOverride;
}
else
{
if (null == mTimestamp)
{
getFileInfo();
}
if (mTimestamp != null)
{
returnValue = mTimestamp.getTimeInMillis();
}
}
return returnValue;
}
//---------------------------------------------------------------------------
@Override
public Calendar getTimestamp()
{
if (null == mTimestamp)
{
getFileInfo();
}
return mTimestamp;
}
//---------------------------------------------------------------------------
@Override
public InputStream getInputStream()
throws IOException
{
HttpURLConnection conn = (HttpURLConnection) new URL(mURL).openConnection();
conn.setRequestProperty("User-Agent", USER_AGENT);
conn.connect();
return new BufferedInputStream(conn.getInputStream());
}
//---------------------------------------------------------------------------
/**
* HTTPFileObj does not currently support HTTP PUT operations.
* @return An OutputStream for writing content to this file location
* @throws UnsupportedOperationException
*/
@Override
public OutputStream getOutputStream()
throws IOException
{
throw new UnsupportedOperationException();
}
//---------------------------------------------------------------------------
@Override
public boolean writeToLocalDir(File inLocalDir)
throws IOException
{
boolean result = super.writeToLocalDir(inLocalDir);
if (null == mSize
|| mSize < 0)
{
mSize = new File(inLocalDir, getName()).length();
}
return result;
}
//---------------------------------------------------------------------------
@Override
public boolean copyLocallyPreservingPath(File inLocalDir)
throws IOException
{
boolean result = super.copyLocallyPreservingPath(inLocalDir);
if (null == mSize
|| mSize < 0)
{
mSize = new File(inLocalDir, getPath()).length();
}
return result;
}
//---------------------------------------------------------------------------
@Override
public long writeToStream(OutputStream stream)
throws IOException
{
long fileSize = super.writeToStream(stream);
if (null == mSize
|| mSize < 0)
{
mSize = fileSize;
}
return fileSize;
}
//---------------------------------------------------------------------------
@Override
public boolean writeToLocalFile(File inLocalFile)
throws IOException
{
boolean result = super.writeToLocalFile(inLocalFile);
if (null == mSize
|| mSize < 0)
{
mSize = inLocalFile.length();
}
return result;
}
//---------------------------------------------------------------------------
@Override
public boolean exists()
{
getFileInfo();
return (mSize != null);
}
//---------------------------------------------------------------------------
@Override
public boolean canRead()
{
return true;
}
//---------------------------------------------------------------------------
// TODO
@Override
public boolean canWrite()
{
return false;
}
//---------------------------------------------------------------------------
@Override
public boolean isFile()
{
return (! isDirectory());
}
//---------------------------------------------------------------------------
@Override
public boolean isDirectory()
{
if (null == mIsDirectory)
{
String path = getPath();
boolean result = path.endsWith("/");
if (! result)
{
result = (null == getQuery()); // If a query was specified, assume it returns a file
if (result)
{
// Check if adding a '/' to the end of the URL returns successfully
HttpURLConnection conn = null;
try
{
mNumRedirects = 0;
StringBuilderPlus url = new StringBuilderPlus(getProtocol() + "://" + getHost());
int port = getPort();
if (port > 0
&& port != 80
&& port != 443)
{
url.append(":" + port);
}
url.append("/" + path)
.append("/"); // adding a trailing '/'
String query = new URL(mURL).getQuery();
if (query != null)
{
url.append("?" + query);
}
conn = establishConnectionFollowingRedirects(url.toString());
final int responseCode = conn.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK)
{
result = true;
}
}
catch (Exception e)
{
// Ignore
}
finally
{
if (conn != null) conn.disconnect();
}
}
}
mIsDirectory = result;
}
return mIsDirectory;
}
//---------------------------------------------------------------------------
@Override
public boolean mkdirs()
throws IOException
{
// TODO: Should this throw an UnimplementedMethodException?
return false;
}
//---------------------------------------------------------------------------
@Override
public boolean delete()
{
// TODO: Should this throw an UnimplementedMethodException?
return false;
}
//---------------------------------------------------------------------------
public HTTPFileObj setCredentials(LoginCredentials inValue)
{
mCredentials = inValue;
return this;
}
//---------------------------------------------------------------------------
public LoginCredentials getCredentials()
{
return mCredentials;
}
//###########################################################################
// PRIVATE METHODS
//###########################################################################
//---------------------------------------------------------------------------
private void getFileInfo()
{
HttpURLConnection conn = null;
try
{
mSize = null;
mTimestamp = null;
mNumRedirects = 0;
conn = establishConnectionFollowingRedirects(mURL);
final int responseCode = conn.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK)
{
// Since we may have been redirected, update the URL
mURL = conn.getURL().toString();
mSize = (long) conn.getContentLength();
long lastModifiedMilliseconds = conn.getLastModified();
if (lastModifiedMilliseconds > 0)
{
mTimestamp = new GregorianCalendar();
mTimestamp.setTime(new Date(conn.getLastModified()));
}
}
}
catch (Exception e)
{
throw new RuntimeException(e.toString());
}
finally
{
if (conn != null) conn.disconnect();
}
}
//---------------------------------------------------------------------------
private HttpURLConnection establishConnectionFollowingRedirects(String inURL)
throws IOException
{
HttpURLConnection conn = (HttpURLConnection) new URL(inURL).openConnection();
conn.setRequestMethod("HEAD");
conn.setUseCaches(false);
conn.setInstanceFollowRedirects(false);
if (StringUtil.isSet(geUserAgentString()))
{
// Pretend to be a browser. Some sites don't want to talk to Java
conn.setRequestProperty("User-Agent", geUserAgentString());
}
if (getCredentials() != null)
{
String base64Auth = Base64.getEncoder().encodeToString((getCredentials().getUser() + ":" + new String(getCredentials().getPassword())).getBytes());
conn.setRequestProperty("Authorization", "Basic "+ base64Auth);
}
conn.connect();
if (getLogger().isLoggable(Level.FINE))
{
LOGGER.log(Level.FINE, "RESPONSE CODE: " + conn.getResponseCode());
LOGGER.log(Level.FINE, "RESPONSE MSG: " + conn.getResponseMessage());
LOGGER.log(Level.FINE, "CONTENT LENGTH: " + conn.getContentLength());
LOGGER.log(Level.FINE, "LAST MODIFIED: " + conn.getLastModified());
}
final int responseCode = conn.getResponseCode();
if ( responseCode == HttpURLConnection.HTTP_MOVED_PERM
|| responseCode == HttpURLConnection.HTTP_MOVED_TEMP
|| responseCode == HttpURLConnection.HTTP_SEE_OTHER)
{
final String newLocation = conn.getHeaderField("Location");
final String msg = getURL() + (responseCode == HttpURLConnection.HTTP_MOVED_PERM ? " permanently" : "") + " moved to " + newLocation;
LOGGER.log(Level.INFO, msg);
mNumRedirects++;
if (mNumRedirects > MAX_REDIRECTS)
{
throw new IOException("More than " + MAX_REDIRECTS + " redirects!");
}
conn.disconnect();
final URL newURL = new URL(new URL(getURL()), newLocation);
conn = establishConnectionFollowingRedirects(newURL.toString()); // Recurse
}
return conn;
}
//---------------------------------------------------------------------------
private List getLinksInDir(String inDirURL)
{
List links = new ArrayList<>();
try
{
String url = inDirURL;
HttpURLConnection conn = null;
int numAttempts = 0;
while (numAttempts < 3)
{
conn = establishConnectionFollowingRedirects(url);
if (conn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM
|| conn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM)
{
url = conn.getHeaderField("Location");
numAttempts++;
}
else
{
break;
}
}
// System.out.println("RESPONSE CODE: " + conn.getResponseCode());
// System.out.println("RESPONSE MSG: " + conn.getResponseMessage());
// System.out.println("CONTENT LENGTH: " + conn.getContentLength());
// System.out.println("CONTENT TYPE: " + conn.getContentType());
if (conn.getResponseCode() == HttpURLConnection.HTTP_OK)
{
BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line;
StringBuilderPlus dirHTML = new StringBuilderPlus();
while ((line = reader.readLine()) != null)
{
dirHTML.appendln(line);
}
reader.close();
// Painful but we need to scrape the page looking for the directory's contents.
// (Assuming it IS a directory.)
Pattern p = Pattern.compile("