All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.util.io.HTTPFileObj Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.util.io;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.net.URL;
import java.net.HttpURLConnection;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hfg.security.LoginCredentials;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
import org.apache.commons.net.ftp.FTPFile;

//------------------------------------------------------------------------------
/**
 * Implementation of FileObj for HTTP files.
 *
 * @author J. Alex Taylor, hairyfatguy.com
 */
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------

public class HTTPFileObj extends FileObjImpl
{

   //###########################################################################
   // PRIVATE FIELDS
   //###########################################################################

   private String mURL;
   private String mScheme;
   private Long   mSize;
   private Calendar mTimestamp;
   private int    mNumRedirects;
   private String mUserAgentString = sDefaultUserAgentString;
   private String mRequestedPath;
   private Boolean mIsDirectory;
   private LoginCredentials mCredentials;

   private static String sDefaultUserAgentString;

   private static final int MAX_REDIRECTS = 10;

   // Some places don't like to talk to or don't recognize Java. Pretend we're IE
   private static final String USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)";

   private final static Logger LOGGER = Logger.getLogger(HTTPFileObj.class.getName());

   //###########################################################################
   // CONSTRUCTORS
   //###########################################################################

   //---------------------------------------------------------------------------
   public HTTPFileObj(String inURL)
   {
      mURL = inURL;
      init();
   }

   //---------------------------------------------------------------------------
   public HTTPFileObj(URI inURI)
   {
      if (! inURI.getScheme().startsWith("http"))
      {
         throw new RuntimeIOException("URIs with scheme " + StringUtil.singleQuote(inURI.getScheme())
                                      + " cannot be used to create a " + getClass().getSimpleName() + "!");
      }

      mURL = inURI.toString();
      init();
   }

   //---------------------------------------------------------------------------
   private void init()
   {
      mRequestedPath = getPath();

      try
      {
         mScheme = new URL(mURL).getProtocol();
      }
      catch (MalformedURLException e)
      {
         throw new RuntimeIOException(e);
      }
   }


   //###########################################################################
   // PUBLIC METHODS
   //###########################################################################

   //---------------------------------------------------------------------------
   @Override
   public String getScheme()
   {
      return mScheme;
   }

   //---------------------------------------------------------------------------
   public static void setDefaultUserAgentString(String inValue)
   {
      sDefaultUserAgentString = inValue;
   }

   //---------------------------------------------------------------------------
   public static String getDefaultUserAgentString()
   {
      return sDefaultUserAgentString;
   }


   //---------------------------------------------------------------------------
   public HTTPFileObj setUserAgentString(String inValue)
   {
      mUserAgentString = inValue;
      return this;
   }

   //---------------------------------------------------------------------------
   public String geUserAgentString()
   {
      return mUserAgentString;
   }

   //---------------------------------------------------------------------------
   public static Logger getLogger()
   {
      return LOGGER;
   }

   //---------------------------------------------------------------------------
   @Override
   public URI getURI()
   {
      URI uri;

      try
      {
         uri = new URI(mURL);
      }
      catch (URISyntaxException e)
      {
         throw new RuntimeIOException(e);
      }

      return uri;
   }

   //---------------------------------------------------------------------------
   public String getProtocol()
   {
      return toURL().getProtocol();
   }

   //---------------------------------------------------------------------------
   public String getHost()
   {
      return toURL().getHost();
   }

   //---------------------------------------------------------------------------
   public int getPort()
   {
      return toURL().getPort();
   }

   //---------------------------------------------------------------------------
   public String getQuery()
   {
      return toURL().getQuery();
   }

   //---------------------------------------------------------------------------
   private URL toURL()
   {
      URL url;
      try
      {
         url = new URL(mURL);
      }
      catch (MalformedURLException e)
      {
         throw new RuntimeException(e);
      }

      return url;
   }

   //---------------------------------------------------------------------------
   public String getURL()
   {
      return mURL;
   }

   //---------------------------------------------------------------------------
   @Override
   public String getName()
   {
      String name;

      // Find the last '/' that isn't at the end of the URL
      int index = mURL.substring(0, mURL.length() - 1).lastIndexOf('/');
      if (index > 0)
      {
         name = mURL.substring(index + 1);
      }
      else
      {
         name = mURL;
      }

      // Remove any trailing '/'
      if (name.endsWith("/"))
      {
         name = name.substring(0, name.length() - 1);
      }

      // Don't include arguments in the name
      index = name.indexOf('?');
      if (index > 0)
      {
         name = name.substring(0, index);
      }

      return name;
   }

   //---------------------------------------------------------------------------
   @Override
   public HTTPFileObj getParentDir()
   {
      HTTPFileObj parent = null;

      int index = mURL.lastIndexOf('/');
      if (index > 0)
      {
         parent = new HTTPFileObj(mURL.substring(0, index));
      }

      return parent;
   }

   //---------------------------------------------------------------------------
   @Override
   public List listFiles()
   {
      List fileObjs = null;

      if (isDirectory())
      {
         List links = getLinksInDir(mURL);
         if (CollectionUtil.hasValues(links))
         {
            fileObjs = new ArrayList<>(links.size());
            for (String link : links)
            {
               fileObjs.add(new HTTPFileObj(link));
            }
         }
      }

      return fileObjs;
   }

   //---------------------------------------------------------------------------
   @Override
   public String getPath()
   {
      String path;

      // Remove http://
      int index = mURL.indexOf("//");
      if (index > 0)
      {
         path = mURL.substring(index + 2);
      }
      else
      {
         path = mURL;
      }

      // Remove the server or server:port
      index = path.indexOf("/");
      if (index > 0)
      {
         path = path.substring(index + 1);
      }

      // Don't include arguments in the path
      index = path.indexOf('?');
      if (index > 0)
      {
         path = path.substring(0, index);
      }

      return path;
   }

   //---------------------------------------------------------------------------
   public HTTPFileObj setRequestedPath(String inValue)
   {
      mRequestedPath = inValue;
      return this;
   }

   //---------------------------------------------------------------------------
   @Override
   public String getRequestedPath()
   {
      return mRequestedPath != null ? mRequestedPath : getPath();
   }

   //---------------------------------------------------------------------------
   @Override
   public long length()
   {
      if (null == mSize)
      {
         getFileInfo();
      }

      return mSize != null ? mSize : -1;
   }

   //---------------------------------------------------------------------------
   @Override
   public Long lastModified()
   {
      Long returnValue = null;

      Long lastModifiedOverride = super.lastModified();
      if (lastModifiedOverride != null)
      {
         returnValue = lastModifiedOverride;
      }
      else
      {
         if (null == mTimestamp)
         {
            getFileInfo();
         }

         if (mTimestamp != null)
         {
            returnValue = mTimestamp.getTimeInMillis();
         }
      }

      return returnValue;
   }

   //---------------------------------------------------------------------------
   @Override
   public Calendar getTimestamp()
   {
      if (null == mTimestamp)
      {
         getFileInfo();
      }

      return mTimestamp;
   }

   //---------------------------------------------------------------------------
   @Override
   public InputStream getInputStream()
   throws IOException
   {
      HttpURLConnection conn = (HttpURLConnection) new URL(mURL).openConnection();
      conn.setRequestProperty("User-Agent", USER_AGENT);
      conn.connect();

      return new BufferedInputStream(conn.getInputStream());
   }

   //---------------------------------------------------------------------------
   /**
    * HTTPFileObj does not currently support HTTP PUT operations.
    * @return An OutputStream for writing content to this file location
    * @throws UnsupportedOperationException
    */
   @Override
   public OutputStream getOutputStream()
   throws IOException
   {
      throw new UnsupportedOperationException();
   }


   //---------------------------------------------------------------------------
   @Override
   public boolean writeToLocalDir(File inLocalDir)
         throws IOException
   {
      boolean result = super.writeToLocalDir(inLocalDir);
      if (null ==  mSize
          || mSize < 0)
      {
         mSize = new File(inLocalDir, getName()).length();
      }

      return result;
   }

   //---------------------------------------------------------------------------
   @Override
   public boolean copyLocallyPreservingPath(File inLocalDir)
   throws IOException
   {
      boolean result = super.copyLocallyPreservingPath(inLocalDir);
      if (null ==  mSize
          || mSize < 0)
      {
         mSize = new File(inLocalDir, getPath()).length();
      }

      return result;
   }

   //---------------------------------------------------------------------------
   @Override
   public long writeToStream(OutputStream stream)
   throws IOException
   {
      long fileSize = super.writeToStream(stream);
      if (null ==  mSize
          || mSize < 0)
      {
         mSize = fileSize;
      }

      return fileSize;
   }

   //---------------------------------------------------------------------------
   @Override
   public boolean writeToLocalFile(File inLocalFile)
         throws IOException
   {
      boolean result = super.writeToLocalFile(inLocalFile);
      if (null ==  mSize
          || mSize < 0)
      {
         mSize = inLocalFile.length();
      }

      return result;
   }

   //---------------------------------------------------------------------------
   @Override
   public boolean exists()
   {
      getFileInfo();

      return (mSize != null);
   }

   //---------------------------------------------------------------------------
   @Override
   public boolean canRead()
   {
      return true;
   }

   //---------------------------------------------------------------------------
   // TODO
   @Override
   public boolean canWrite()
   {
      return false;
   }

   //---------------------------------------------------------------------------
   @Override
   public boolean isFile()
   {
      return (! isDirectory());
   }

   //---------------------------------------------------------------------------
   @Override
   public boolean isDirectory()
   {
      if (null == mIsDirectory)
      {
         String path = getPath();

         boolean result = path.endsWith("/");
         if (! result)
         {
            result = (null == getQuery()); // If a query was specified, assume it returns a file
            if (result)
            {
               // Check if adding a '/' to the end of the URL returns successfully
               HttpURLConnection conn = null;
               try
               {
                  mNumRedirects = 0;

                  StringBuilderPlus url = new StringBuilderPlus(getProtocol() + "://" + getHost());
                  int port = getPort();
                  if (port > 0
                      && port != 80
                      && port != 443)
                  {
                     url.append(":" + port);
                  }

                  url.append("/" + path)
                        .append("/"); // adding a trailing '/'

                  String query = new URL(mURL).getQuery();
                  if (query != null)
                  {
                     url.append("?" + query);
                  }

                  conn = establishConnectionFollowingRedirects(url.toString());

                  final int responseCode = conn.getResponseCode();
                  if (responseCode == HttpURLConnection.HTTP_OK)
                  {
                     result = true;
                  }
               }
               catch (Exception e)
               {
                  // Ignore
               }
               finally
               {
                  if (conn != null) conn.disconnect();
               }
            }
         }

         mIsDirectory = result;
      }

      return mIsDirectory;
   }

   //---------------------------------------------------------------------------
   @Override
   public boolean mkdirs()
         throws IOException
   {
      // TODO: Should this throw an UnimplementedMethodException?
      return false;
   }

   //---------------------------------------------------------------------------
   @Override
   public boolean delete()
   {
      // TODO: Should this throw an UnimplementedMethodException?
      return false;
   }

   //---------------------------------------------------------------------------
   public HTTPFileObj setCredentials(LoginCredentials inValue)
   {
      mCredentials = inValue;
      return this;
   }

   //---------------------------------------------------------------------------
   public LoginCredentials getCredentials()
   {
      return mCredentials;
   }

   //###########################################################################
   // PRIVATE METHODS
   //###########################################################################

   //---------------------------------------------------------------------------
   private void getFileInfo()
   {
      HttpURLConnection conn = null;
      try
      {
         mSize = null;
         mTimestamp = null;
         mNumRedirects = 0;

         conn = establishConnectionFollowingRedirects(mURL);

         final int responseCode = conn.getResponseCode();
         if (responseCode == HttpURLConnection.HTTP_OK)
         {
            // Since we may have been redirected, update the URL
            mURL = conn.getURL().toString();

            mSize = (long) conn.getContentLength();

            long lastModifiedMilliseconds = conn.getLastModified();
            if (lastModifiedMilliseconds > 0)
            {
               mTimestamp = new GregorianCalendar();
               mTimestamp.setTime(new Date(conn.getLastModified()));
            }
         }
      }
      catch (Exception e)
      {
         throw new RuntimeException(e.toString());
      }
      finally
      {
         if (conn != null) conn.disconnect();
      }
   }

   //---------------------------------------------------------------------------
   private HttpURLConnection establishConnectionFollowingRedirects(String inURL)
         throws IOException
   {
      HttpURLConnection conn = (HttpURLConnection) new URL(inURL).openConnection();
      conn.setRequestMethod("HEAD");
      conn.setUseCaches(false);
      conn.setInstanceFollowRedirects(false);
      if (StringUtil.isSet(geUserAgentString()))
      {
         // Pretend to be a browser. Some sites don't want to talk to Java
         conn.setRequestProperty("User-Agent", geUserAgentString());
      }

      if (getCredentials() != null)
      {
         String base64Auth = Base64.getEncoder().encodeToString((getCredentials().getUser() + ":" + new String(getCredentials().getPassword())).getBytes());
         conn.setRequestProperty("Authorization", "Basic "+ base64Auth);
      }


      conn.connect();

      if (getLogger().isLoggable(Level.FINE))
      {
         LOGGER.log(Level.FINE, "RESPONSE CODE: " + conn.getResponseCode());
         LOGGER.log(Level.FINE, "RESPONSE MSG: " + conn.getResponseMessage());
         LOGGER.log(Level.FINE, "CONTENT LENGTH: " + conn.getContentLength());
         LOGGER.log(Level.FINE, "LAST MODIFIED: " + conn.getLastModified());
      }

      final int responseCode = conn.getResponseCode();
      if (   responseCode == HttpURLConnection.HTTP_MOVED_PERM
             || responseCode == HttpURLConnection.HTTP_MOVED_TEMP
             || responseCode == HttpURLConnection.HTTP_SEE_OTHER)
      {
         final String newLocation = conn.getHeaderField("Location");

         final String msg = getURL() + (responseCode == HttpURLConnection.HTTP_MOVED_PERM ? " permanently" : "") + " moved to " + newLocation;
         LOGGER.log(Level.INFO, msg);

         mNumRedirects++;
         if (mNumRedirects > MAX_REDIRECTS)
         {
            throw new IOException("More than " + MAX_REDIRECTS + " redirects!");
         }

         conn.disconnect();

         final URL newURL = new URL(new URL(getURL()), newLocation);

         conn = establishConnectionFollowingRedirects(newURL.toString()); // Recurse
      }

      return conn;
   }

   //---------------------------------------------------------------------------
   private List getLinksInDir(String inDirURL)
   {
      List links = new ArrayList<>();

      try
      {
         String url = inDirURL;

         HttpURLConnection conn = null;
         int numAttempts = 0;
         while (numAttempts < 3)
         {
            conn = establishConnectionFollowingRedirects(url);
            if (conn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM
                || conn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM)
            {
               url = conn.getHeaderField("Location");
               numAttempts++;
            }
            else
            {
               break;
            }
         }


//         System.out.println("RESPONSE CODE: " + conn.getResponseCode());
//         System.out.println("RESPONSE MSG: " + conn.getResponseMessage());
//         System.out.println("CONTENT LENGTH: " + conn.getContentLength());
//         System.out.println("CONTENT TYPE: " + conn.getContentType());

         if (conn.getResponseCode() == HttpURLConnection.HTTP_OK)
         {
            BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
            String line;
            StringBuilderPlus dirHTML = new StringBuilderPlus();
            while ((line = reader.readLine()) != null)
            {
               dirHTML.appendln(line);
            }
            reader.close();

            // Painful but we need to scrape the page looking for the directory's contents.
            // (Assuming it IS a directory.)
            Pattern p = Pattern.compile("




© 2015 - 2024 Weber Informatics LLC | Privacy Policy