com.hfg.util.mime.MHTML_Generator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.util.mime;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.hfg.html.HTMLDoc;
import com.hfg.html.HTMLTag;
import com.hfg.html.Img;
import com.hfg.image.ImageFormat;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.io.FileBytes;
import com.hfg.xml.XMLizable;
//------------------------------------------------------------------------------
/**
Tool for creating an mhtml web archive.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class MHTML_Generator
{
private URL mRootURL;
private List mEntities = new ArrayList<>(25);
private String mBoundary = "974767299852498929531610575";
private static final String CRLF = "\r\n";
//##########################################################################
// CONSTRUCTORS
//##########################################################################
//--------------------------------------------------------------------------
public MHTML_Generator()
{
}
//--------------------------------------------------------------------------
public MHTML_Generator(URL inRootURL)
throws IOException
{
mRootURL = inRootURL;
addPage(inRootURL);
}
//##########################################################################
// PUBLIC METHODS
//##########################################################################
//--------------------------------------------------------------------------
public MHTML_Generator addPage(URL inURL)
throws IOException
{
HTMLDoc doc = new HTMLDoc(inURL);
addPage(doc);
return this;
}
//--------------------------------------------------------------------------
public MHTML_Generator addPage(CharSequence inHTML)
throws IOException
{
BufferedInputStream stream = new BufferedInputStream(new ByteArrayInputStream(inHTML.toString().getBytes()));
HTMLDoc doc = new HTMLDoc(stream);
addPage(doc);
return this;
}
//--------------------------------------------------------------------------
public MHTML_Generator addPage(HTMLDoc inHTMLDoc)
throws IOException
{
/*
------MultipartBoundary--vZx2mSzm4I514tSUI1vnL5GCUmWrMJFQNvWcDoNt5X----
Content-Type: text/html
Content-ID:
Content-Transfer-Encoding: quoted-printable
Content-Location: http://hairyfatguy.com/
*/
MimeEntity pageEntity = new MimeEntity()
.setContentType(MimeType.TEXT_HTML)
.setContentTransferEncoding(ContentTransferEncoding.QUOTED_PRINTABLE)
.setContentLocation(inHTMLDoc.getURL());
pageEntity.setContent(inHTMLDoc.toHTML());
mEntities.add(pageEntity);
// Note that scripts don't appear to be part of the spec.
// Now add any external CSS
// TODO
// Now add the images
Set images = new HashSet<>();
recursivelyExtractImages(inHTMLDoc.getRootNode(), images);
if (CollectionUtil.hasValues(images))
{
for (Img image : images)
{
String urlString = image.getSrc();
if (! urlString.startsWith("http")
&& mRootURL != null)
{
urlString = mRootURL + (! mRootURL.toString().endsWith("/") && ! urlString.startsWith("/") ? "/" : "") + urlString;
}
try
{
URL url = new URL(urlString);
URLConnection conn = url.openConnection();
FileBytes rawImgData = new FileBytes(image.getSrc()).setData(conn.getInputStream());
/*
------MultipartBoundary--vZx2mSzm4I514tSUI1vnL5GCUmWrMJFQNvWcDoNt5X----
Content-Type: image/jpeg
Content-Transfer-Encoding: base64
Content-Location: http://hairyfatguy.com/images/hfg_logo.jpg
*/
MimeEntity imgEntity = new MimeEntity(null, rawImgData)
.setContentType(ImageFormat.guessFormatFromName(new File(url.toString()).getName()).getMimeType())
.setContentTransferEncoding(ContentTransferEncoding.BASE64)
.setContentLocation(urlString);
mEntities.add(imgEntity);
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
return this;
}
//--------------------------------------------------------------------------
public void generate(OutputStream inStream)
throws IOException
{
// Generate the header
/*
From:
Snapshot-Content-Location: http://hairyfatguy.com/
Subject: hairyfatguy.com
Date: Tue, 4 Apr 2023 22:23:38 -0000
MIME-Version: 1.0
Content-Type: multipart/related;
type="text/html";
boundary="----MultipartBoundary--vZx2mSzm4I514tSUI1vnL5GCUmWrMJFQNvWcDoNt5X----"
*/
inStream.write("From: ".getBytes());
inStream.write(CRLF.getBytes());
if (mRootURL != null)
{
inStream.write(("Snapshot-Content-Location: " + mRootURL).getBytes());
inStream.write(CRLF.getBytes());
inStream.write(("Subject: " + mRootURL.getFile()).getBytes());
inStream.write(CRLF.getBytes());
}
inStream.write(("Date: " + new Date()).getBytes());
inStream.write(CRLF.getBytes());
inStream.write("MIME-Version: 1.0".getBytes());
inStream.write(CRLF.getBytes());
inStream.write(("Content-Type: multipart/related;"
+ CRLF + " type=\"" + MimeType.TEXT_HTML + "\";"
+ CRLF + " boundary=\"" + mBoundary + "\"").getBytes());
inStream.write(CRLF.getBytes());
inStream.write(CRLF.getBytes());
// Now write out the MIME entities
if (CollectionUtil.hasValues(mEntities))
{
for (MimeEntity entity : mEntities)
{
inStream.write("--".getBytes());
inStream.write(mBoundary.getBytes());
inStream.write(CRLF.getBytes());
entity.write(inStream);
}
inStream.write("--".getBytes());
inStream.write(mBoundary.getBytes());
inStream.write("--".getBytes());
inStream.write(CRLF.getBytes());
inStream.write(CRLF.getBytes());
}
inStream.flush();
}
//--------------------------------------------------------------------------
private void recursivelyExtractImages(HTMLTag inHTMLTag, Set inImgSet)
{
List subtags = inHTMLTag.getSubtags();
if (CollectionUtil.hasValues(subtags))
{
for (XMLizable subtag : subtags)
{
if (subtag instanceof Img)
{
inImgSet.add((Img) subtag);
}
else if (subtag instanceof HTMLTag)
{
recursivelyExtractImages((HTMLTag)subtag, inImgSet);
}
}
}
}
}