All Downloads are FREE. Search and download functionalities are using the official Maven repository.

vendor.github.com.andybalholm.brotli.context.go Maven / Gradle / Ivy

The newest version!
package brotli

/* Lookup table to map the previous two bytes to a context id.

There are four different context modeling modes defined here:
  contextLSB6: context id is the least significant 6 bits of the last byte,
  contextMSB6: context id is the most significant 6 bits of the last byte,
  contextUTF8: second-order context model tuned for UTF8-encoded text,
  contextSigned: second-order context model tuned for signed integers.

If |p1| and |p2| are the previous two bytes, and |mode| is current context
mode, we calculate the context as:

  context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].

For contextUTF8 mode, if the previous two bytes are ASCII characters
(i.e. < 128), this will be equivalent to

  context = 4 * context1(p1) + context2(p2),

where context1 is based on the previous byte in the following way:

  0  : non-ASCII control
  1  : \t, \n, \r
  2  : space
  3  : other punctuation
  4  : " '
  5  : %
  6  : ( < [ {
  7  : ) > ] }
  8  : , ; :
  9  : .
  10 : =
  11 : number
  12 : upper-case vowel
  13 : upper-case consonant
  14 : lower-case vowel
  15 : lower-case consonant

and context2 is based on the second last byte:

  0 : control, space
  1 : punctuation
  2 : upper-case letter, number
  3 : lower-case letter

If the last byte is ASCII, and the second last byte is not (in a valid UTF8
stream it will be a continuation byte, value between 128 and 191), the
context is the same as if the second last byte was an ASCII control or space.

If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
be a continuation byte and the context id is 2 or 3 depending on the LSB of
the last byte and to a lesser extent on the second last byte if it is ASCII.

If the last byte is a UTF8 continuation byte, the second last byte can be:
  - continuation byte: the next byte is probably ASCII or lead byte (assuming
    4-byte UTF8 characters are rare) and the context id is 0 or 1.
  - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
  - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3

The possible value combinations of the previous two bytes, the range of
context ids and the type of the next byte is summarized in the table below:

|--------\-----------------------------------------------------------------|
|         \                         Last byte                              |
| Second   \---------------------------------------------------------------|
| last byte \    ASCII            |   cont. byte        |   lead byte      |
|            \   (0-127)          |   (128-191)         |   (192-)         |
|=============|===================|=====================|==================|
|  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
|  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
|-------------|-------------------|---------------------|------------------|
|  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
|  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
|-------------|-------------------|---------------------|------------------|
|  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
|  (192-207)  |                   |  context: 0 - 1     |                  |
|-------------|-------------------|---------------------|------------------|
|  lead byte  | not valid         |  next: cont.        |  not valid       |
|  (208-)     |                   |  context: 2 - 3     |                  |
|-------------|-------------------|---------------------|------------------|
*/

const (
	contextLSB6   = 0
	contextMSB6   = 1
	contextUTF8   = 2
	contextSigned = 3
)

/* Common context lookup table for all context modes. */
var kContextLookup = [2048]byte{
	/* CONTEXT_LSB6, last byte. */
	0,
	1,
	2,
	3,
	4,
	5,
	6,
	7,
	8,
	9,
	10,
	11,
	12,
	13,
	14,
	15,
	16,
	17,
	18,
	19,
	20,
	21,
	22,
	23,
	24,
	25,
	26,
	27,
	28,
	29,
	30,
	31,
	32,
	33,
	34,
	35,
	36,
	37,
	38,
	39,
	40,
	41,
	42,
	43,
	44,
	45,
	46,
	47,
	48,
	49,
	50,
	51,
	52,
	53,
	54,
	55,
	56,
	57,
	58,
	59,
	60,
	61,
	62,
	63,
	0,
	1,
	2,
	3,
	4,
	5,
	6,
	7,
	8,
	9,
	10,
	11,
	12,
	13,
	14,
	15,
	16,
	17,
	18,
	19,
	20,
	21,
	22,
	23,
	24,
	25,
	26,
	27,
	28,
	29,
	30,
	31,
	32,
	33,
	34,
	35,
	36,
	37,
	38,
	39,
	40,
	41,
	42,
	43,
	44,
	45,
	46,
	47,
	48,
	49,
	50,
	51,
	52,
	53,
	54,
	55,
	56,
	57,
	58,
	59,
	60,
	61,
	62,
	63,
	0,
	1,
	2,
	3,
	4,
	5,
	6,
	7,
	8,
	9,
	10,
	11,
	12,
	13,
	14,
	15,
	16,
	17,
	18,
	19,
	20,
	21,
	22,
	23,
	24,
	25,
	26,
	27,
	28,
	29,
	30,
	31,
	32,
	33,
	34,
	35,
	36,
	37,
	38,
	39,
	40,
	41,
	42,
	43,
	44,
	45,
	46,
	47,
	48,
	49,
	50,
	51,
	52,
	53,
	54,
	55,
	56,
	57,
	58,
	59,
	60,
	61,
	62,
	63,
	0,
	1,
	2,
	3,
	4,
	5,
	6,
	7,
	8,
	9,
	10,
	11,
	12,
	13,
	14,
	15,
	16,
	17,
	18,
	19,
	20,
	21,
	22,
	23,
	24,
	25,
	26,
	27,
	28,
	29,
	30,
	31,
	32,
	33,
	34,
	35,
	36,
	37,
	38,
	39,
	40,
	41,
	42,
	43,
	44,
	45,
	46,
	47,
	48,
	49,
	50,
	51,
	52,
	53,
	54,
	55,
	56,
	57,
	58,
	59,
	60,
	61,
	62,
	63,

	/* CONTEXT_LSB6, second last byte, */
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,

	/* CONTEXT_MSB6, last byte. */
	0,
	0,
	0,
	0,
	1,
	1,
	1,
	1,
	2,
	2,
	2,
	2,
	3,
	3,
	3,
	3,
	4,
	4,
	4,
	4,
	5,
	5,
	5,
	5,
	6,
	6,
	6,
	6,
	7,
	7,
	7,
	7,
	8,
	8,
	8,
	8,
	9,
	9,
	9,
	9,
	10,
	10,
	10,
	10,
	11,
	11,
	11,
	11,
	12,
	12,
	12,
	12,
	13,
	13,
	13,
	13,
	14,
	14,
	14,
	14,
	15,
	15,
	15,
	15,
	16,
	16,
	16,
	16,
	17,
	17,
	17,
	17,
	18,
	18,
	18,
	18,
	19,
	19,
	19,
	19,
	20,
	20,
	20,
	20,
	21,
	21,
	21,
	21,
	22,
	22,
	22,
	22,
	23,
	23,
	23,
	23,
	24,
	24,
	24,
	24,
	25,
	25,
	25,
	25,
	26,
	26,
	26,
	26,
	27,
	27,
	27,
	27,
	28,
	28,
	28,
	28,
	29,
	29,
	29,
	29,
	30,
	30,
	30,
	30,
	31,
	31,
	31,
	31,
	32,
	32,
	32,
	32,
	33,
	33,
	33,
	33,
	34,
	34,
	34,
	34,
	35,
	35,
	35,
	35,
	36,
	36,
	36,
	36,
	37,
	37,
	37,
	37,
	38,
	38,
	38,
	38,
	39,
	39,
	39,
	39,
	40,
	40,
	40,
	40,
	41,
	41,
	41,
	41,
	42,
	42,
	42,
	42,
	43,
	43,
	43,
	43,
	44,
	44,
	44,
	44,
	45,
	45,
	45,
	45,
	46,
	46,
	46,
	46,
	47,
	47,
	47,
	47,
	48,
	48,
	48,
	48,
	49,
	49,
	49,
	49,
	50,
	50,
	50,
	50,
	51,
	51,
	51,
	51,
	52,
	52,
	52,
	52,
	53,
	53,
	53,
	53,
	54,
	54,
	54,
	54,
	55,
	55,
	55,
	55,
	56,
	56,
	56,
	56,
	57,
	57,
	57,
	57,
	58,
	58,
	58,
	58,
	59,
	59,
	59,
	59,
	60,
	60,
	60,
	60,
	61,
	61,
	61,
	61,
	62,
	62,
	62,
	62,
	63,
	63,
	63,
	63,

	/* CONTEXT_MSB6, second last byte, */
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,

	/* CONTEXT_UTF8, last byte. */
	/* ASCII range. */
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	4,
	4,
	0,
	0,
	4,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	8,
	12,
	16,
	12,
	12,
	20,
	12,
	16,
	24,
	28,
	12,
	12,
	32,
	12,
	36,
	12,
	44,
	44,
	44,
	44,
	44,
	44,
	44,
	44,
	44,
	44,
	32,
	32,
	24,
	40,
	28,
	12,
	12,
	48,
	52,
	52,
	52,
	48,
	52,
	52,
	52,
	48,
	52,
	52,
	52,
	52,
	52,
	48,
	52,
	52,
	52,
	52,
	52,
	48,
	52,
	52,
	52,
	52,
	52,
	24,
	12,
	28,
	12,
	12,
	12,
	56,
	60,
	60,
	60,
	56,
	60,
	60,
	60,
	56,
	60,
	60,
	60,
	60,
	60,
	56,
	60,
	60,
	60,
	60,
	60,
	56,
	60,
	60,
	60,
	60,
	60,
	24,
	12,
	28,
	12,
	0,

	/* UTF8 continuation byte range. */
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,
	0,
	1,

	/* UTF8 lead byte range. */
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,
	2,
	3,

	/* CONTEXT_UTF8 second last byte. */
	/* ASCII range. */
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	1,
	1,
	1,
	1,
	1,
	1,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	1,
	1,
	1,
	1,
	0,

	/* UTF8 continuation byte range. */
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,

	/* UTF8 lead byte range. */
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,

	/* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */
	0,
	8,
	8,
	8,
	8,
	8,
	8,
	8,
	8,
	8,
	8,
	8,
	8,
	8,
	8,
	8,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	16,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	24,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	32,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	40,
	48,
	48,
	48,
	48,
	48,
	48,
	48,
	48,
	48,
	48,
	48,
	48,
	48,
	48,
	48,
	56,

	/* CONTEXT_SIGNED, second last byte. */
	0,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	1,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	2,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	3,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	4,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	5,
	6,
	6,
	6,
	6,
	6,
	6,
	6,
	6,
	6,
	6,
	6,
	6,
	6,
	6,
	6,
	7,
}

type contextLUT []byte

func getContextLUT(mode int) contextLUT {
	return kContextLookup[mode<<9:]
}

func getContext(p1 byte, p2 byte, lut contextLUT) byte {
	return lut[p1] | lut[256+int(p2)]
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy