All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.util.mime.MHTML_Generator Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.util.mime;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.hfg.html.HTMLDoc;
import com.hfg.html.HTMLTag;
import com.hfg.html.Img;
import com.hfg.image.ImageFormat;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.io.FileBytes;
import com.hfg.xml.XMLizable;

//------------------------------------------------------------------------------
/**
 Tool for creating an mhtml web archive.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg XML/HTML Coding Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class MHTML_Generator { private URL mRootURL; private List mEntities = new ArrayList<>(25); private String mBoundary = "974767299852498929531610575"; private static final String CRLF = "\r\n"; //########################################################################## // CONSTRUCTORS //########################################################################## //-------------------------------------------------------------------------- public MHTML_Generator() { } //-------------------------------------------------------------------------- public MHTML_Generator(URL inRootURL) throws IOException { mRootURL = inRootURL; addPage(inRootURL); } //########################################################################## // PUBLIC METHODS //########################################################################## //-------------------------------------------------------------------------- public MHTML_Generator addPage(URL inURL) throws IOException { HTMLDoc doc = new HTMLDoc(inURL); addPage(doc); return this; } //-------------------------------------------------------------------------- public MHTML_Generator addPage(CharSequence inHTML) throws IOException { BufferedInputStream stream = new BufferedInputStream(new ByteArrayInputStream(inHTML.toString().getBytes())); HTMLDoc doc = new HTMLDoc(stream); addPage(doc); return this; } //-------------------------------------------------------------------------- public MHTML_Generator addPage(HTMLDoc inHTMLDoc) throws IOException { /* ------MultipartBoundary--vZx2mSzm4I514tSUI1vnL5GCUmWrMJFQNvWcDoNt5X---- Content-Type: text/html Content-ID: Content-Transfer-Encoding: quoted-printable Content-Location: http://hairyfatguy.com/ */ MimeEntity pageEntity = new MimeEntity() .setContentType(MimeType.TEXT_HTML) .setContentTransferEncoding(ContentTransferEncoding.QUOTED_PRINTABLE) .setContentLocation(inHTMLDoc.getURL()); pageEntity.setContent(inHTMLDoc.toHTML()); mEntities.add(pageEntity); // Note that scripts don't appear to be part of the spec. // Now add any external CSS // TODO // Now add the images Set images = new HashSet<>(); recursivelyExtractImages(inHTMLDoc.getRootNode(), images); if (CollectionUtil.hasValues(images)) { for (Img image : images) { String urlString = image.getSrc(); if (! urlString.startsWith("http") && mRootURL != null) { urlString = mRootURL + (! mRootURL.toString().endsWith("/") && ! urlString.startsWith("/") ? "/" : "") + urlString; } try { URL url = new URL(urlString); URLConnection conn = url.openConnection(); FileBytes rawImgData = new FileBytes(image.getSrc()).setData(conn.getInputStream()); /* ------MultipartBoundary--vZx2mSzm4I514tSUI1vnL5GCUmWrMJFQNvWcDoNt5X---- Content-Type: image/jpeg Content-Transfer-Encoding: base64 Content-Location: http://hairyfatguy.com/images/hfg_logo.jpg */ MimeEntity imgEntity = new MimeEntity(null, rawImgData) .setContentType(ImageFormat.guessFormatFromName(new File(url.toString()).getName()).getMimeType()) .setContentTransferEncoding(ContentTransferEncoding.BASE64) .setContentLocation(urlString); mEntities.add(imgEntity); } catch (Exception e) { e.printStackTrace(); } } } return this; } //-------------------------------------------------------------------------- public void generate(OutputStream inStream) throws IOException { // Generate the header /* From: Snapshot-Content-Location: http://hairyfatguy.com/ Subject: hairyfatguy.com Date: Tue, 4 Apr 2023 22:23:38 -0000 MIME-Version: 1.0 Content-Type: multipart/related; type="text/html"; boundary="----MultipartBoundary--vZx2mSzm4I514tSUI1vnL5GCUmWrMJFQNvWcDoNt5X----" */ inStream.write("From: ".getBytes()); inStream.write(CRLF.getBytes()); if (mRootURL != null) { inStream.write(("Snapshot-Content-Location: " + mRootURL).getBytes()); inStream.write(CRLF.getBytes()); inStream.write(("Subject: " + mRootURL.getFile()).getBytes()); inStream.write(CRLF.getBytes()); } inStream.write(("Date: " + new Date()).getBytes()); inStream.write(CRLF.getBytes()); inStream.write("MIME-Version: 1.0".getBytes()); inStream.write(CRLF.getBytes()); inStream.write(("Content-Type: multipart/related;" + CRLF + " type=\"" + MimeType.TEXT_HTML + "\";" + CRLF + " boundary=\"" + mBoundary + "\"").getBytes()); inStream.write(CRLF.getBytes()); inStream.write(CRLF.getBytes()); // Now write out the MIME entities if (CollectionUtil.hasValues(mEntities)) { for (MimeEntity entity : mEntities) { inStream.write("--".getBytes()); inStream.write(mBoundary.getBytes()); inStream.write(CRLF.getBytes()); entity.write(inStream); } inStream.write("--".getBytes()); inStream.write(mBoundary.getBytes()); inStream.write("--".getBytes()); inStream.write(CRLF.getBytes()); inStream.write(CRLF.getBytes()); } inStream.flush(); } //-------------------------------------------------------------------------- private void recursivelyExtractImages(HTMLTag inHTMLTag, Set inImgSet) { List subtags = inHTMLTag.getSubtags(); if (CollectionUtil.hasValues(subtags)) { for (XMLizable subtag : subtags) { if (subtag instanceof Img) { inImgSet.add((Img) subtag); } else if (subtag instanceof HTMLTag) { recursivelyExtractImages((HTMLTag)subtag, inImgSet); } } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy