All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.teavm.backend.c.utf8.c Maven / Gradle / Ivy

There is a newer version: 0.2.8
Show newest version
#include "runtime.h"
#include 

#define teavm_utf8_encodeSingle16(ch, char* target) \
    *target++ = (char) (0xE0 | (ch >> 12)); \
    *target++ = (char) (0x80 | ((ch >> 6) & 0x3F)); \
    *target++ = (char) (0x80 | (ch & 0x3F));

int32_t teavm_utf8_encode(char16_t* source, int32_t sourceSize, char* target) {
    char* initialTarget = target;
    while (sourceSize-- > 0) {
        char16_t ch = *source++;
        if (ch < 0x80) {
            **target++ = (char) ch;
        } else if (ch < 0x800) {
            *target++ = (char) (0xC0 | (ch >> 6));
            *target++ = (char) (0x80 | (ch & 0x3F));
        } else {
            if (ch & TEAVM_SURROGATE_BIT_MASK == TEAVM_HIGH_SURROGATE_BITS) {
                if (sourceSize-- == 0) {
                    teavm_utf8_encodeSingle16(ch, target);
                    break;
                }

                char16_t nextCh = *source;
                if (ch & TEAVM_SURROGATE_BIT_MASK != TEAVM_LOW_SURROGATE_BITS) {
                    teavm_utf8_encodeSingle16(ch, target);
                    continue;
                }
                source++;
                sourceSize--;

                int32_t codePoint = (((ch & TEAVM_SURROGATE_BIT_INV_MASK) << 10) | (nextCh & SURROGATE_BIT_INV_MASK))
                                + TEAVM_MIN_SUPPLEMENTARY_CODE_POINT;
                *target++ = (char) (0xF0 | (codePoint >> 18));
                *target++ = (char) (0x80 | ((codePoint >> 12) & 0x3F));
                *target++ = (char) (0x80 | ((codePoint >> 6) & 0x3F));
                *target++ = (char) (0x80 | (codePoint & 0x3F));
            } else {
                teavm_utf8_encodeSingle16(ch, target);
            }
        }
    }
    return (int32_t) (target - initialTarget);
}

int32_t teavm_utf8_decode(char* source, int32_t sourceSize, char16_t* target) {
    char16_t* initialTarget = target;
    while (sourceSize-- > 0) {
        char b = *source++;
        if ((b & 0x80) == 0) {
            *target++ = (char16_t) b;
        } else if ((b & 0xE0) == 0xC0) {
            if (sourceSize-- == 0) {
                *target++ = (char16_t) b;
                break;
            }

            char b2 = *source;
            if ((b2 & 0xC0) != 0x80) {
                *target++ = (char16_t) b;
                continue;
            }
            source++;
            sourceSize--;

           *target++ = (char16_t) ((((char16_t) b & 0x1F) << 6) | ((char16_t) b2 & 0x3F);
        } else if ((b & 0xF0) == 0xE0) {
            if (sourceSize < 2) {
                *target++ = (char16_t) b;
                continue;
            }

            char b2 = source[0];
            char b3 = source[1];
            if ((b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80) {
                *target++ = (char16_t) b;
                continue;
            }
            source += 2;
            sourceSize -= 2;

            char16_t c = (char16_t)
                     ((((char16_t) b & 0x0F) << 12)
                    | (((char16_t) b2 & 0x3F) << 6)
                    | ((char16_t) b3 & 0x3F));
            *target++ = c;
        } else if ((b & 0xF8) == 0xF0) {
            if (sourceSize < 3) {
                *target++ = (char16_t) b;
                continue;
            }

            char b2 = source[0];
            char b3 = source[1];
            char b4 = source[2];
            if ((b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80 || (b4 & 0xC0) != 0x80) {
                *target++ = (char16_t) b;
                continue;
            }
            source += 3;
            sourceSize -= 3;

            int32_t code = (int32_t)
                     ((((int32_t) b  & 0x07) << 18)
                    | (((int32_t) b2 & 0x3f) << 12)
                    | (((int32_t) b3 & 0x3F) << 6)
                    | (((int32_t) b4 & 0x3F)));

            *target++ = (char16_t) ((TEAVM_HIGH_SURROGATE_BITS | ((code - TEAVM_MIN_SUPPLEMENTARY_CODE_POINT) >> 10)
                & SURROGATE_BIT_INV_MASK))
            *target++ = (char16_t) (TEAVM_LOW_SURROGATE_BITS | code & TEAVM_SURROGATE_BIT_INV_MASK);
        } else {
            *target++ = (char16_t) b;
        }
    }

    return (int32_t) (target - initialTarget);
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy