org.bytedeco.tesseract.UNICHAR Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of tesseract Show documentation
There is a newer version: 5.5.0-1.5.11
// Targeted by JavaCPP version 1.5.3: DO NOT EDIT THIS FILE

package org.bytedeco.tesseract;

import java.nio.*;
import org.bytedeco.javacpp.*;
import org.bytedeco.javacpp.annotation.*;

import static org.bytedeco.javacpp.presets.javacpp.*;
import org.bytedeco.leptonica.*;
import static org.bytedeco.leptonica.global.lept.*;

import static org.bytedeco.tesseract.global.tesseract.*;


// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
@Namespace("tesseract") @NoOffset @Properties(inherit = org.bytedeco.tesseract.presets.tesseract.class)
public class UNICHAR extends Pointer {
    static { Loader.load(); }
    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
    public UNICHAR(Pointer p) { super(p); }

  public UNICHAR() { super((Pointer)null); allocate(); }
  private native void allocate();

  // Construct from a utf8 string. If len<0 then the string is null terminated.
  // If the string is too long to fit in the UNICHAR then it takes only what
  // will fit.
  public UNICHAR(@Cast("const char*") BytePointer utf8_str, int len) { super((Pointer)null); allocate(utf8_str, len); }
  private native void allocate(@Cast("const char*") BytePointer utf8_str, int len);
  public UNICHAR(String utf8_str, int len) { super((Pointer)null); allocate(utf8_str, len); }
  private native void allocate(String utf8_str, int len);

  // Construct from a single UCS4 character.
  public UNICHAR(int unicode) { super((Pointer)null); allocate(unicode); }
  private native void allocate(int unicode);

  // Default copy constructor and operator= are OK.

  // Get the first character as UCS-4.
  public native int first_uni();

  // Get the length of the UTF8 string.
  public native int utf8_len();

  // Get a UTF8 string, but NOT nullptr terminated.
  public native @Cast("const char*") BytePointer utf8();

  // Get a terminated UTF8 string: Must delete[] it after use.
  public native @Cast("char*") BytePointer utf8_str();

  // Get the number of bytes in the first character of the given utf8 string.
  public static native int utf8_step(@Cast("const char*") BytePointer utf8_str);
  public static native int utf8_step(String utf8_str);

  // A class to simplify iterating over and accessing elements of a UTF8
  // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
  // take ownership of the underlying byte array. It also does not permit
  // modification of the array (as the name suggests).
  //
  // Example:
  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
  //        it != UNICHAR::end(str, len);
  //        ++it) {
  //     tprintf("UCS-4 symbol code = %d\n", *it);
  //     char buf[5];
  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
  //     tprintf("Char = %s\n", buf);
  //   }
  @NoOffset public static class const_iterator extends Pointer {
      static { Loader.load(); }
      /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
      public const_iterator(Pointer p) { super(p); }
  
    // Step to the next UTF8 character.
    // If the current position is at an illegal UTF8 character, then print an
    // error message and step by one byte. If the current position is at a
    // nullptr value, don't step past it.
    public native @ByRef @Name("operator ++") const_iterator increment();

    // Return the UCS-4 value at the current position.
    // If the current position is at an illegal UTF8 value, return a single
    // space character.
    public native @Name("operator *") int multiply();

    // Store the UTF-8 encoding of the current codepoint into buf, which must be
    // at least 4 bytes long. Return the number of bytes written.
    // If the current position is at an illegal UTF8 value, writes a single
    // space character and returns 1.
    // Note that this method does not null-terminate the buffer.
    public native int get_utf8(@Cast("char*") BytePointer buf);
    public native int get_utf8(@Cast("char*") ByteBuffer buf);
    public native int get_utf8(@Cast("char*") byte[] buf);
    // Returns the number of bytes of the current codepoint. Returns 1 if the
    // current position is at an illegal UTF8 value.
    public native int utf8_len();
    // Returns true if the UTF-8 encoding at the current position is legal.
    public native @Cast("bool") boolean is_legal();

    // Return the pointer into the string at the current position.
    public native @Cast("const char*") BytePointer utf8_data();

    // Iterator equality operators.
    
    
  }

  // Create a start/end iterator pointing to a string. Note that these methods
  // are static and do NOT create a copy or take ownership of the underlying
  // array.
  public static native @ByVal const_iterator begin(@Cast("const char*") BytePointer utf8_str, int byte_length);
  public static native @ByVal const_iterator begin(String utf8_str, int byte_length);
  public static native @ByVal const_iterator end(@Cast("const char*") BytePointer utf8_str, int byte_length);
  public static native @ByVal const_iterator end(String utf8_str, int byte_length);

  // Converts a utf-8 string to a vector of unicodes.
  // Returns an empty vector if the input contains invalid UTF-8.
  public static native @Cast("tesseract::char32*") @StdVector IntPointer UTF8ToUTF32(@Cast("const char*") BytePointer utf8_str);
  public static native @Cast("tesseract::char32*") @StdVector IntBuffer UTF8ToUTF32(String utf8_str);
  // Converts a vector of unicodes to a utf8 string.
  // Returns an empty string if the input contains an invalid unicode.
  public static native @StdString BytePointer UTF32ToUTF8(@Cast("tesseract::char32*") @StdVector IntPointer str32);
  public static native @StdString String UTF32ToUTF8(@Cast("tesseract::char32*") @StdVector IntBuffer str32);
  public static native @StdString BytePointer UTF32ToUTF8(@Cast("tesseract::char32*") @StdVector int[] str32);
}