All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.treesitter.TSParser Maven / Gradle / Ivy

There is a newer version: 0.24.4
Show newest version
package org.treesitter;

import org.treesitter.utils.NativeUtils;

import java.io.*;
import java.lang.ref.Cleaner;

public class TSParser {
    /**
     * The latest ABI version that is supported by the current version of the
     * library. When Languages are generated by the Tree-sitter CLI, they are
     * assigned an ABI version number that corresponds to the current CLI version.
     * The Tree-sitter library is generally backwards-compatible with languages
     * generated using older CLI versions, but is not forwards-compatible.
     */
    public static final int TREE_SITTER_LANGUAGE_VERSION = 14;

    /**
     * The earliest ABI version that is supported by the current version of the
     * library.
     */
    public static final int TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION = 13;

    static {
        NativeUtils.loadLib("lib/tree-sitter");
    }

    public  static native long ts_parser_new();
    public static native void ts_parser_delete(long tree_parser_ptr);
    private static native boolean ts_parser_set_language(long ts_parser_ptr, long ts_language_ptr);
    private static native long ts_parser_language(long ts_parser_ptr);
    private static native boolean ts_parser_set_included_ranges(long ts_parser_ptr, TSRange[] ranges);
    private static native TSRange[] ts_parser_included_ranges(long ts_parser_ptr);
    private static native long ts_parser_parse(long ts_parser_ptr,  byte[] buf, long ts_tree_ptr,TSReader reader, int tsInputEncoding);
    private static native long ts_parser_parse_string(long ts_parser_ptr, long ts_tree_ptr, String input);
    private static native long ts_parser_parse_string_encoding(long ts_parser_ptr, long ts_tree_ptr, String input, int tsInputEncoding);
    private static native void ts_parser_reset(long ts_parser_ptr);
    private static native void ts_parser_set_timeout_micros(long ts_parser_ptr, long timeout);
    private static native long ts_parser_timeout_micros(long ts_parser_ptr);
    private static native void ts_parser_set_cancellation_flag(long ts_parser_ptr, long flag_ptr);
    private static native long ts_parser_cancellation_flag(long ts_parser_ptr);
    private static native long alloc_cancellation_flag();
    private static native long get_cancellation_flag_value(long flag_ptr);
    private static native void free_cancellation_flag(long flag_ptr);
    private static native void write_cancellation_flag(long flag_ptr, long value);
    protected static native int ts_language_version(long ts_language_ptr);
    private static native void ts_parser_set_logger(long ts_parser_ptr, TSLogger logger);
    private static native void free_logger(long ts_parser_ptr);
    private static native void ts_parser_print_dot_graphs(long ts_parser_ptr, FileDescriptor fileDescriptor);
    protected static native long ts_tree_copy(long tree_ptr);
    protected static native void ts_tree_delete(long tree_ptr);
    protected static native TSNode ts_tree_root_node(long tree_ptr);
    protected static native TSNode ts_tree_root_node_with_offset(long tree_ptr, int offsetBytes, TSPoint offsetPoint);
    protected static native long ts_tree_language(long tree_ptr);
    protected static native TSRange[] ts_tree_included_ranges(long tree_ptr);
    protected static native void ts_tree_edit(long tree_ptr, TSInputEdit edit);
    protected static native TSRange[] ts_tree_get_changed_ranges(long old_tree_ptr, long new_tree_ptr);
    protected static native String ts_node_type(TSNode node);
    protected static native int ts_node_symbol(TSNode node);
    protected static native int ts_node_start_byte(TSNode node);
    protected static native TSPoint ts_node_start_point(TSNode node);
    protected static native int ts_node_end_byte(TSNode node);
    protected static native TSPoint ts_node_end_point(TSNode node);
    protected static native String ts_node_string(TSNode node);
    protected static native boolean ts_node_is_null(TSNode node);
    protected static native boolean ts_node_is_named(TSNode node);
    protected static native boolean ts_node_is_missing(TSNode node);
    protected static native boolean ts_node_is_extra(TSNode node);
    protected static native boolean ts_node_has_changes(TSNode node);
    protected static native boolean ts_node_has_error(TSNode node);
    protected static native TSNode ts_node_parent(TSNode node);
    protected static native TSNode ts_node_child(TSNode node, int index);
    protected static native String ts_node_field_name_for_child(TSNode node, int index);
    protected static native int ts_node_child_count(TSNode node);
    protected static native TSNode ts_node_named_child(TSNode node, int index);
    protected static native int ts_node_named_child_count(TSNode node);
    protected static native TSNode ts_node_child_by_field_name(TSNode node, String field_name);
    protected static native TSNode ts_node_child_by_field_id(TSNode node, int ts_field_id);
    protected static native TSNode ts_node_next_sibling(TSNode node);
    protected static native TSNode ts_node_prev_sibling(TSNode node);
    protected static native TSNode ts_node_next_named_sibling(TSNode node);
    protected static native TSNode ts_node_prev_named_sibling(TSNode node);
    protected static native TSNode ts_node_first_child_for_byte(TSNode node, int start_byte);
    protected static native TSNode ts_node_first_named_child_for_byte(TSNode node, int start_byte);
    protected static native TSNode ts_node_descendant_for_byte_range(TSNode node, int start_byte, int end_byte);
    protected static native TSNode ts_node_descendant_for_point_range(TSNode node, TSPoint start_point, TSPoint end_point);
    protected static native TSNode ts_node_named_descendant_for_byte_range(TSNode node, int start_byte, int end_byte);
    protected static native TSNode ts_node_named_descendant_for_point_range(TSNode node, TSPoint start_point, TSPoint end_point);
    protected static native TSNode ts_node_edit(TSNode node, TSInputEdit edit);
    protected static native boolean ts_node_eq(TSNode a, TSNode b);
    protected static native void free_cursor(long ts_tree_cursor_ptr);
    protected static native long ts_tree_cursor_new(TSNode node);
    protected static native void ts_tree_cursor_delete(long cursor_ptr);
    protected static native void ts_tree_cursor_reset(long cursor_ptr, TSNode node);
    protected static native TSNode ts_tree_cursor_current_node(long cursor_ptr);
    protected static native String ts_tree_cursor_current_field_name(long cursor_ptr);
    protected static native int ts_tree_cursor_current_field_id(long cursor_ptr);
    protected static native boolean ts_tree_cursor_goto_parent(long cursor_ptr);
    protected static native boolean ts_tree_cursor_goto_next_sibling(long cursor_ptr);
    protected static native boolean ts_tree_cursor_goto_first_child(long cursor_ptr);
    protected static native int ts_tree_cursor_goto_first_child_for_byte(long cursor_ptr, int startByte);
    protected static native int ts_tree_cursor_goto_first_child_for_point(long cursor_ptr, TSPoint startPoint);
    protected static native long ts_tree_cursor_copy(long cursor_ptr);
    protected static native long ts_query_new(long ts_language_ptr, String source);
    protected static native void ts_query_delete(long ts_query_ptr);
    protected static native int ts_query_pattern_count(long ts_query_ptr);
    protected static native int ts_query_capture_count(long ts_query_ptr);
    protected static native int ts_query_string_count(long ts_query_ptr);
    protected static native int ts_query_start_byte_for_pattern(long ts_query_ptr, int pattern_index);
    protected static native TSQueryPredicateStep[] ts_query_predicates_for_pattern(long ts_query_ptr, int pattern_index);
    protected static native boolean ts_query_is_pattern_rooted(long ts_query_ptr, int pattern_index);
    protected static native boolean ts_query_is_pattern_non_local(long ts_query_ptr, int pattern_index);
    protected static native boolean ts_query_is_pattern_guaranteed_at_step(long ts_query_ptr, int byte_offset);
    protected static native String ts_query_capture_name_for_id(long ts_query_ptr, int index);
    protected static native int ts_query_capture_quantifier_for_id(long ts_query_ptr, int pattern_id, int index);
    protected static native String ts_query_string_value_for_id(long ts_query_ptr, int index);
    protected static native void ts_query_disable_capture(long ts_query_ptr, String name);
    protected static native void ts_query_disable_pattern(long ts_query_ptr, int patter_index);
    protected static native long ts_query_cursor_new();
    protected static native void ts_query_cursor_delete(long ts_query_cursor_ptr);
    protected static native void ts_query_cursor_exec(long ts_query_cursor_ptr, long ts_query_ptr, TSNode node);
    protected static native boolean ts_query_cursor_did_exceed_match_limit(long ts_query_cursor_ptr);
    protected static native int ts_query_cursor_match_limit(long ts_query_cursor_ptr);
    protected static native void ts_query_cursor_set_match_limit(long ts_query_cursor_ptr, int limit);
    protected static native void ts_query_cursor_set_byte_range(long ts_query_cursor_ptr, int start_byte, int end_byte);
    protected static native void ts_query_cursor_set_point_range(long ts_query_cursor_ptr, TSPoint start_point, TSPoint end_point);
    protected static native boolean ts_query_cursor_next_match(long ts_query_cursor_ptr, TSQueryMatch match);
    protected static native void ts_query_cursor_remove_match(long ts_query_cursor_ptr, int match_id);
    protected static native boolean ts_query_cursor_next_capture(long ts_query_cursor_ptr, TSQueryMatch match);
    protected static native void ts_tree_print_dot_graph(long ts_tree_ptr, FileDescriptor fileDescriptor);
    protected static native int ts_language_field_count(long ts_language_ptr);
    protected static native String ts_language_field_name_for_id(long ts_language_ptr, int ts_field_id);
    protected static native int ts_language_field_id_for_name(long ts_language_ptr, String field_name);
    protected static native int ts_language_symbol_type(long ts_language_ptr, int ts_symbol);
    protected static native int ts_language_symbol_count(long ts_language_ptr);
    protected static native String ts_language_symbol_name(long ts_language_ptr, int ts_symbol);
    protected static native int ts_language_symbol_for_name(long ts_language_ptr, String name, boolean is_named);

    private final long ptr;


    static class TSParserCleaner implements Runnable{
        private final long ptr;

        public TSParserCleaner(long ptr) {
            this.ptr = ptr;
        }

        @Override
        public void run() {
            long flagPtr = ts_parser_cancellation_flag(this.ptr);
            if(flagPtr != 0){
                free_cancellation_flag(flagPtr);
            }
            free_logger(ptr);
            ts_parser_delete(ptr);
        }
    }

    static Cleaner cleaner = Cleaner.create();
    private TSLogger logger;
    /**
     * Create a new parser.
     */
    public TSParser() {
        this.ptr = ts_parser_new();
        long cancellationFlagPtr = alloc_cancellation_flag();
        write_cancellation_flag(cancellationFlagPtr, 0);
        ts_parser_set_cancellation_flag(ptr, cancellationFlagPtr);
        cleaner.register(this, new TSParserCleaner(this.ptr));
    }
    /**
     * Get the parser's current logger.
     *
     * @return The logger that the parser is using.
     */
    public TSLogger getLogger() {
        return logger;
    }
    /**
     * Set the logger that a parser should use during parsing.
* * @param logger The logger that the parser should use. * */ public void setLogger(TSLogger logger) { this.logger = logger; ts_parser_set_logger(ptr, logger); } /** ** * Set the language that the parser should use for parsing. * Returns a boolean indicating whether the language was successfully * assigned. True means assignment succeeded. False means there was a version * mismatch: the language was generated with an incompatible version of the * Tree-sitter CLI. Check the language's version using `ts_language_version` * and compare it to this library's {@link TSParser#TREE_SITTER_LANGUAGE_VERSION} and * {@link TSParser#TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION} constants. * * @param language The language that the parser should use. * * @return True if the language was successfully applied. False otherwise. */ public boolean setLanguage(TSLanguage language) { return ts_parser_set_language(ptr, language.getPtr()); } /** * Use the parser to parse some source code stored in one contiguous buffer. * The first two parameters are the same as in the {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} function * above. * * @param oldTree The old tree to use. If any. * @param input the source code to parse. * * @return {@link TSTree} */ public TSTree parseString(TSTree oldTree, String input) { long oldTreePtr = oldTree == null ? 0 : oldTree.getPtr(); long treePtr = ts_parser_parse_string(ptr, oldTreePtr, input); return new TSTree(treePtr); } /** * Use the parser to parse some source code stored in one contiguous buffer with * a given encoding. The first four parameters work the same as in the * {@link #parseString(TSTree, String) parserString()} method above. The final parameter indicates whether * the text is encoded as UTF8 or UTF16. * * @param oldTree The old tree to use. If any. * @param input The source code to parse. * @param encoding The encoding of the source code. * * @return {@link TSTree} */ public TSTree parseStringEncoding(TSTree oldTree, String input, TSInputEncoding encoding){ long oldTreePtr = oldTree == null ? 0 : oldTree.getPtr(); long treePtr = ts_parser_parse_string_encoding(ptr, oldTreePtr, input, encoding.ordinal()); return new TSTree(treePtr); } /** * Use the parser to parse some source code and create a syntax tree.
* * If you are parsing this document for the first time, pass null for the * `oldTree` parameter. Otherwise, if you have already parsed an earlier * version of this document and the document has since been edited, pass the * previous syntax tree so that the unchanged parts of it can be reused. * This will save time and memory. For this to work correctly, you must have * already edited the old syntax tree using the {@link TSTree#edit(TSInputEdit) edit} function in a * way that exactly matches the source code changes.
* * This function returns a syntax tree on success, and null on failure. There * are three possible reasons for failure:
*
    *
  1. The parser does not have a language assigned. Check for this using the * {@link #getLanguage()} function.
  2. *
  3. Parsing was cancelled due to a timeout that was set by an earlier call to * the {@link #setTimeoutMicros(long) setTimeoutMicros()} function. You can resume parsing from * where the parser left out by calling {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} again with the * same arguments. Or you can start parsing from scratch by first calling * {@link #reset()}.
  4. *
  5. Parsing was cancelled using a cancellation flag that was set by an * earlier call to {@link #setCancellationFlag(long) setCancellationFlag()}. You can resume parsing * from where the parser left out by calling {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} again with * the same arguments.
  6. *
* @param buf Buffer to use while reading from reader. * @param oldTree The old tree to use. If any. * @param reader The reader to read source code from. * @param encoding The encoding of the source code. * * @return {@link TSTree} if success, null otherwise. */ public TSTree parse(byte[] buf, TSTree oldTree, TSReader reader, TSInputEncoding encoding){ long oldTreePtr = oldTree == null ? 0 : oldTree.getPtr(); long treePtr = ts_parser_parse(ptr, buf, oldTreePtr, reader, encoding.ordinal()); if(treePtr == 0){ return null; } return new TSTree(treePtr); } /** * Get the parser's current language. * * @return {@link TSLanguage} */ public TSLanguage getLanguage(){ return () -> ts_parser_language(ptr); } /** * Set the ranges of text that the parser should include when parsing.
* * By default, the parser will always include entire documents. This function * allows you to parse only a *portion* of a document but still return a syntax * tree whose ranges match up with the document as a whole. You can also pass * multiple disjoint ranges.
* * The second and third parameters specify the location and length of an array * of ranges. The parser does *not* take ownership of these ranges; it copies * the data, so it doesn't matter how these ranges are allocated.
* * If length is zero, then the entire document will be parsed. Otherwise, * the given ranges must be ordered from earliest to latest in the document, * and they must not overlap. That is, the following must hold for all * i < length - 1: ranges[i].end_byte <= ranges[i + 1].start_byte
* * If this requirement is not satisfied, the operation will fail, the ranges * will not be assigned, and this function will return false. On success, * this function returns true * * @param ranges The ranges to include. * @return true on success, false otherwise. */ public boolean setIncludedRanges(TSRange[] ranges) { return ts_parser_set_included_ranges(ptr, ranges); } /** * Get the ranges of text that the parser will include when parsing. * * @return {@link TSRange}[] * */ public TSRange[] getIncludedRanges() { return ts_parser_included_ranges(ptr); } /** * Instruct the parser to start the next parse from the beginning.
* * If the parser previously failed because of a timeout or a cancellation, then * by default, it will resume where it left off on the next call to * {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} or other parsing functions. If you don't want to resume, * and instead intend to use this parser to parse some other document, you must * call {@link #reset()} first. */ public void reset(){ ts_parser_reset(ptr); } /** * Set the maximum duration in microseconds that parsing should be allowed to * take before halting.
* * If parsing takes longer than this, it will halt early, returning null. * See {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} for more information. * * @param timeoutMicros The maximum duration in microseconds. */ public void setTimeoutMicros(long timeoutMicros){ ts_parser_set_timeout_micros(ptr, timeoutMicros); } /** * Get the duration in microseconds that parsing is allowed to take. * * @return The maximum duration in microseconds. */ public long getTimeoutMicros(){ return ts_parser_timeout_micros(ptr); } /** * Set the parser's current cancellation flag.
* * If a non-zero value is assigned, then the parser will periodically read * from this pointer during parsing. If it reads a non-zero value, it will * halt early, returning null. See {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} for more information. * * @param flag The cancellation flag. */ public void setCancellationFlag(long flag){ write_cancellation_flag(ts_parser_cancellation_flag(ptr), flag); } /** * Get the parser's current cancellation flag. * * @return The cancellation flag. */ public long getCancellationFlag(){ return get_cancellation_flag_value(ts_parser_cancellation_flag(ptr)); } /** * Set the file to which the parser should write debugging graphs * during parsing. The graphs are formatted in the DOT language. * You may want to pipe these graphs directly to a dot(1) process in order to generate * SVG output. You can turn off this logging by passing null as file.
* * * @param file the file to which the parser should write debugging graphs. Passing null to disable logging and close the file if any. * * @throws IOException if the file cannot be written to. */ public void printDotGraphs(File file) throws IOException { if(file == null) { ts_parser_print_dot_graphs(ptr, null); return; } FileOutputStream outputStream = new FileOutputStream(file); ts_parser_print_dot_graphs(ptr, outputStream.getFD()); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy