All Downloads are FREE. Search and download functionalities are using the official Maven repository.

common.amp.armor_decoder.go Maven / Gradle / Ivy

There is a newer version: 2.9.1
Show newest version
package amp

import (
	"bufio"
	"bytes"
	"encoding/base64"
	"fmt"
	"io"

	"golang.org/x/net/html"
)

// ErrUnknownVersion is the error returned when the first character inside the
// element encoding (but outside the base64 encoding) is not '0'.
type ErrUnknownVersion byte

func (err ErrUnknownVersion) Error() string {
	return fmt.Sprintf("unknown armor version indicator %+q", byte(err))
}

func isASCIIWhitespace(b byte) bool {
	switch b {
	// https://infra.spec.whatwg.org/#ascii-whitespace
	case '\x09', '\x0a', '\x0c', '\x0d', '\x20':
		return true
	default:
		return false
	}
}

func splitASCIIWhitespace(data []byte, atEOF bool) (advance int, token []byte, err error) {
	var i, j int
	// Skip initial whitespace.
	for i = 0; i < len(data); i++ {
		if !isASCIIWhitespace(data[i]) {
			break
		}
	}
	// Look for next whitespace.
	for j = i; j < len(data); j++ {
		if isASCIIWhitespace(data[j]) {
			return j + 1, data[i:j], nil
		}
	}
	// We reached the end of data without finding more whitespace. Only
	// consider it a token if we are at EOF.
	if atEOF && i < j {
		return j, data[i:j], nil
	}
	// Otherwise, request more data.
	return i, nil, nil
}

func decodeToWriter(w io.Writer, r io.Reader) (int64, error) {
	tokenizer := html.NewTokenizer(r)
	// Set a memory limit on token sizes, otherwise the tokenizer will
	// buffer text indefinitely if it is not broken up by other token types.
	tokenizer.SetMaxBuf(elementSizeLimit)
	active := false
	total := int64(0)
	for {
		tt := tokenizer.Next()
		switch tt {
		case html.ErrorToken:
			err := tokenizer.Err()
			if err == io.EOF {
				err = nil
			}
			if err == nil && active {
				return total, fmt.Errorf("missing 
tag") } return total, err case html.TextToken: if active { // Re-join the separate chunks of text and // feed them to the decoder. scanner := bufio.NewScanner(bytes.NewReader(tokenizer.Text())) scanner.Split(splitASCIIWhitespace) for scanner.Scan() { n, err := w.Write(scanner.Bytes()) total += int64(n) if err != nil { return total, err } } if err := scanner.Err(); err != nil { return total, err } } case html.StartTagToken: tn, _ := tokenizer.TagName() if string(tn) == "pre" { if active { // nesting not allowed return total, fmt.Errorf("unexpected %s", tokenizer.Token()) } active = true } case html.EndTagToken: tn, _ := tokenizer.TagName() if string(tn) == "pre" { if !active { // stray end tag return total, fmt.Errorf("unexpected %s", tokenizer.Token()) } active = false } } } } // NewArmorDecoder returns a new AMP armor decoder. func NewArmorDecoder(r io.Reader) (io.Reader, error) { pr, pw := io.Pipe() go func() { _, err := decodeToWriter(pw, r) pw.CloseWithError(err) }() // The first byte inside the element encoding is a server–client // protocol version indicator. var version [1]byte _, err := pr.Read(version[:]) if err != nil { pr.CloseWithError(err) return nil, err } switch version[0] { case '0': return base64.NewDecoder(base64.StdEncoding, pr), nil default: err := ErrUnknownVersion(version[0]) pr.CloseWithError(err) return nil, err } }



© 2015 - 2024 Weber Informatics LLC | Privacy Policy