All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pdfbox.pdmodel.font.ToUnicodeWriter Maven / Gradle / Ivy

Go to download

The Apache PDFBox library is an open source Java tool for working with PDF documents.

There is a newer version: 3.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pdfbox.pdmodel.font;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.pdfbox.util.Charsets;
import org.apache.pdfbox.util.Hex;

/**
 * Writes ToUnicode Mapping Files.
 *
 * @author John Hewson
 */
final class ToUnicodeWriter
{
    private final Map cidToUnicode = new TreeMap();
    private int wMode;

    /**
     * To test corner case of PDFBOX-4302.
     */
    static final int MAX_ENTRIES_PER_OPERATOR = 100;

    /**
     * Creates a new ToUnicode CMap writer.
     */
    ToUnicodeWriter()
    {
        this.wMode = 0;
    }

    /**
     * Sets the WMode (writing mode) of this CMap.
     *
     * @param wMode 1 for vertical, 0 for horizontal (default)
     */
    public void setWMode(int wMode)
    {
        this.wMode = wMode;
    }

    /**
     * Adds the given CID to Unicode mapping.
     *
     * @param cid CID
     * @param text Unicode text, up to 512 bytes.
     */
    public void add(int cid, String text)
    {
        if (cid < 0 || cid > 0xFFFF)
        {
            throw new IllegalArgumentException("CID is not valid");
        }

        if (text == null || text.isEmpty())
        {
            throw new IllegalArgumentException("Text is null or empty");
        }

        cidToUnicode.put(cid, text);
    }

    /**
     * Writes the CMap as ASCII to the given output stream.
     *
     * @param out ASCII output stream
     * @throws IOException if the stream could not be written
     */
    public void writeTo(OutputStream out) throws IOException
    {
        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, Charsets.US_ASCII));

        writeLine(writer, "/CIDInit /ProcSet findresource begin");
        writeLine(writer, "12 dict begin\n");

        writeLine(writer, "begincmap");
        writeLine(writer, "/CIDSystemInfo");
        writeLine(writer, "<< /Registry (Adobe)");
        writeLine(writer, "/Ordering (UCS)");
        writeLine(writer, "/Supplement 0");
        writeLine(writer, ">> def\n");

        writeLine(writer, "/CMapName /Adobe-Identity-UCS" + " def");
        writeLine(writer, "/CMapType 2 def\n"); // 2 = ToUnicode

        if (wMode != 0)
        {
            writeLine(writer, "/WMode /" + wMode + " def");
        }

        // ToUnicode always uses 16-bit CIDs
        writeLine(writer, "1 begincodespacerange");
        writeLine(writer, "<0000> ");
        writeLine(writer, "endcodespacerange\n");

        // CID -> Unicode mappings, we use ranges to generate a smaller CMap
        List srcFrom = new ArrayList();
        List srcTo = new ArrayList();
        List dstString = new ArrayList();

        int srcPrev = -1;
        String dstPrev = null;

        int srcCode1 = -1;

        for (Map.Entry entry : cidToUnicode.entrySet())
        {
            int cid = entry.getKey();
            String text = entry.getValue();

            if (cid == srcPrev + 1 &&                                 // CID must be last CID + 1
                dstPrev.codePointCount(0, dstPrev.length()) == 1 &&   // no UTF-16 surrogates
                text.codePointAt(0) == dstPrev.codePointAt(0) + 1 &&  // dstString must be prev + 1
                dstPrev.codePointAt(0) + 1 <= 255 - (cid - srcCode1)) // increment last byte only
            {
                // extend range
                srcTo.set(srcTo.size() - 1, cid);
            }
            else
            {
                // begin range
                srcCode1 = cid;
                srcFrom.add(cid);
                srcTo.add(cid);
                dstString.add(text);
            }
            srcPrev = cid;
            dstPrev = text;
        }

        // limit entries per operator
        int batchCount = (int) Math.ceil(srcFrom.size() /
                                         (double) MAX_ENTRIES_PER_OPERATOR);
        for (int batch = 0; batch < batchCount; batch++)
        {
            int count = batch == batchCount - 1 ?
                            srcFrom.size() - MAX_ENTRIES_PER_OPERATOR * batch :
                            MAX_ENTRIES_PER_OPERATOR;
            writer.write(count + " beginbfrange\n");
            for (int j = 0; j < count; j++)
            {
                int index = batch * MAX_ENTRIES_PER_OPERATOR + j;
                writer.write('<');
                writer.write(Hex.getChars(srcFrom.get(index).shortValue()));
                writer.write("> ");

                writer.write('<');
                writer.write(Hex.getChars(srcTo.get(index).shortValue()));
                writer.write("> ");

                writer.write('<');
                writer.write(Hex.getCharsUTF16BE(dstString.get(index)));
                writer.write(">\n");
            }
            writeLine(writer, "endbfrange\n");
        }

        // footer
        writeLine(writer, "endcmap");
        writeLine(writer, "CMapName currentdict /CMap defineresource pop");
        writeLine(writer, "end");
        writeLine(writer, "end");

        writer.flush();
    }

    private void writeLine(BufferedWriter writer, String text) throws IOException
    {
        writer.write(text);
        writer.write('\n');
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy