All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nu.validator.io.NcrEscapingWindows1252OutputStreamWriter Maven / Gradle / Ivy

Go to download

An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)

There is a newer version: 20.7.2
Show newest version
/*
 * Copyright (c) 2006 Henri Sivonen
 *
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.io;

import java.io.IOException;
import java.io.OutputStream;
import java.io.Writer;
import java.util.Arrays;

public class NcrEscapingWindows1252OutputStreamWriter extends Writer {

    private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;

    private static int[] CODE_POINTS = {
        0x0152,
        0x0153,
        0x0160,
        0x0161,
        0x0178,
        0x017D,
        0x017E,
        0x0192,
        0x02C6,
        0x02DC,
        0x2013,
        0x2014,
        0x2018,
        0x2019,
        0x201A,
        0x201C,
        0x201D,
        0x201E,
        0x2020,
        0x2021,
        0x2022,
        0x2026,
        0x2030,
        0x2039,
        0x203A,
        0x20AC,
        0x2122
    };
    
    private static int[] BYTES = {
        0x8C,
        0x9C,
        0x8A,
        0x9A,
        0x9F,
        0x8E,
        0x9E,
        0x83,
        0x88,
        0x98,
        0x96,
        0x97,
        0x91,
        0x92,
        0x82,
        0x93,
        0x94,
        0x84,
        0x86,
        0x87,
        0x95,
        0x85,
        0x89,
        0x8B,
        0x9B,
        0x80,
        0x99  
    };
    
    private OutputStream out;
    
    private int prev;
    
    public NcrEscapingWindows1252OutputStreamWriter(OutputStream out) {
        super();
        this.out = out;
        this.prev = 0;
    }

    @Override
    public void write(char[] buf, int offset, int count) throws IOException {
        int end = offset + count;
        for (int i = offset; i < end; i++) {
            this.write(buf[i]);
        }
    }

    @Override
    public void flush() throws IOException {
        out.flush();
    }

    @Override
    public void close() throws IOException {
        out.close();
    }

    /**
     * @see java.io.Writer#write(int)
     */
    @Override
    public void write(int c) throws IOException {
        int i = -1;
        c &= 0xFFFF; // per API contract
        if (c < 0x80) {
            out.write(c);
            prev = 0;
            return;
        } else if (c < 0xA0) {
            prev = 0;
            // silent loss
            return;
        } else if (c < 0x100) {
            out.write(c);
            prev = 0;
            return;            
        } else if ((i = Arrays.binarySearch(CODE_POINTS, c)) >= 0) {
            out.write(BYTES[i]);
            prev = 0;
            return;                        
        } if ((c & 0xFC00) == 0xDC00) {
            // Got a low surrogate. See if prev was high surrogate
            if (prev != 0) {
                int intVal = (prev << 10) + c + SURROGATE_OFFSET;
                prev = 0;
                this.writeNcr(intVal);
                return;
            } else {
                prev = 0;
                // silent loss
                return;
            }
        } else if ((c & 0xFC00) == 0xD800) {
            // silent loss if prev already was surrogate
            prev = c;
            return;
        } else {
            this.writeNcr(c);
            prev = 0;
            return;
        }
    }

    private void writeNcr(int c) throws IOException {
        out.write('&');
        out.write('#');
        
        String str = Integer.toString(c);
        for (int i = 0; i < str.length(); i++) {
            out.write(str.charAt(i));
        }
        
        out.write(';');
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy