common.amp.armor_decoder.go Maven / Gradle / Ivy
package amp
import (
"bufio"
"bytes"
"encoding/base64"
"fmt"
"io"
"golang.org/x/net/html"
)
// ErrUnknownVersion is the error returned when the first character inside the
// element encoding (but outside the base64 encoding) is not '0'.
type ErrUnknownVersion byte
func (err ErrUnknownVersion) Error() string {
return fmt.Sprintf("unknown armor version indicator %+q", byte(err))
}
func isASCIIWhitespace(b byte) bool {
switch b {
// https://infra.spec.whatwg.org/#ascii-whitespace
case '\x09', '\x0a', '\x0c', '\x0d', '\x20':
return true
default:
return false
}
}
func splitASCIIWhitespace(data []byte, atEOF bool) (advance int, token []byte, err error) {
var i, j int
// Skip initial whitespace.
for i = 0; i < len(data); i++ {
if !isASCIIWhitespace(data[i]) {
break
}
}
// Look for next whitespace.
for j = i; j < len(data); j++ {
if isASCIIWhitespace(data[j]) {
return j + 1, data[i:j], nil
}
}
// We reached the end of data without finding more whitespace. Only
// consider it a token if we are at EOF.
if atEOF && i < j {
return j, data[i:j], nil
}
// Otherwise, request more data.
return i, nil, nil
}
func decodeToWriter(w io.Writer, r io.Reader) (int64, error) {
tokenizer := html.NewTokenizer(r)
// Set a memory limit on token sizes, otherwise the tokenizer will
// buffer text indefinitely if it is not broken up by other token types.
tokenizer.SetMaxBuf(elementSizeLimit)
active := false
total := int64(0)
for {
tt := tokenizer.Next()
switch tt {
case html.ErrorToken:
err := tokenizer.Err()
if err == io.EOF {
err = nil
}
if err == nil && active {
return total, fmt.Errorf("missing
tag")
}
return total, err
case html.TextToken:
if active {
// Re-join the separate chunks of text and
// feed them to the decoder.
scanner := bufio.NewScanner(bytes.NewReader(tokenizer.Text()))
scanner.Split(splitASCIIWhitespace)
for scanner.Scan() {
n, err := w.Write(scanner.Bytes())
total += int64(n)
if err != nil {
return total, err
}
}
if err := scanner.Err(); err != nil {
return total, err
}
}
case html.StartTagToken:
tn, _ := tokenizer.TagName()
if string(tn) == "pre" {
if active {
// nesting not allowed
return total, fmt.Errorf("unexpected %s", tokenizer.Token())
}
active = true
}
case html.EndTagToken:
tn, _ := tokenizer.TagName()
if string(tn) == "pre" {
if !active {
// stray end tag
return total, fmt.Errorf("unexpected %s", tokenizer.Token())
}
active = false
}
}
}
}
// NewArmorDecoder returns a new AMP armor decoder.
func NewArmorDecoder(r io.Reader) (io.Reader, error) {
pr, pw := io.Pipe()
go func() {
_, err := decodeToWriter(pw, r)
pw.CloseWithError(err)
}()
// The first byte inside the element encoding is a server–client
// protocol version indicator.
var version [1]byte
_, err := pr.Read(version[:])
if err != nil {
pr.CloseWithError(err)
return nil, err
}
switch version[0] {
case '0':
return base64.NewDecoder(base64.StdEncoding, pr), nil
default:
err := ErrUnknownVersion(version[0])
pr.CloseWithError(err)
return nil, err
}
}