org.treesitter.TSParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tree-sitter Show documentation
Show all versions of tree-sitter Show documentation
Next generation Tree Sitter Java binding
package org.treesitter;
import org.treesitter.utils.NativeUtils;
import java.io.File;
import java.io.FileDescriptor;
import java.io.FileOutputStream;
import java.io.IOException;
public class TSParser {
/**
* The latest ABI version that is supported by the current version of the
* library. When Languages are generated by the Tree-sitter CLI, they are
* assigned an ABI version number that corresponds to the current CLI version.
* The Tree-sitter library is generally backwards-compatible with languages
* generated using older CLI versions, but is not forwards-compatible.
*/
public static final int TREE_SITTER_LANGUAGE_VERSION = 14;
/**
* The earliest ABI version that is supported by the current version of the
* library.
*/
public static final int TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION = 13;
static {
NativeUtils.loadLib("lib/tree-sitter");
}
private TSLanguage language;
public static native long ts_parser_new();
public static native void ts_parser_delete(long tree_parser_ptr);
private static native boolean ts_parser_set_language(long ts_parser_ptr, long ts_language_ptr);
private static native long ts_parser_language(long ts_parser_ptr);
private static native boolean ts_parser_set_included_ranges(long ts_parser_ptr, TSRange[] ranges);
private static native TSRange[] ts_parser_included_ranges(long ts_parser_ptr);
private static native long ts_parser_parse(long ts_parser_ptr, byte[] buf, long ts_tree_ptr,TSReader reader, int tsInputEncoding);
private static native long ts_parser_parse_string(long ts_parser_ptr, long ts_tree_ptr, String input);
private static native long ts_parser_parse_string_encoding(long ts_parser_ptr, long ts_tree_ptr, String input, int tsInputEncoding);
private static native void ts_parser_reset(long ts_parser_ptr);
private static native void ts_parser_set_timeout_micros(long ts_parser_ptr, long timeout);
private static native long ts_parser_timeout_micros(long ts_parser_ptr);
private static native void ts_parser_set_cancellation_flag(long ts_parser_ptr, long flag_ptr);
private static native long ts_parser_cancellation_flag(long ts_parser_ptr);
private static native long alloc_cancellation_flag();
private static native long get_cancellation_flag_value(long flag_ptr);
private static native void free_cancellation_flag(long flag_ptr);
private static native void write_cancellation_flag(long flag_ptr, long value);
private static native void ts_parser_set_logger(long ts_parser_ptr, TSLogger logger);
private static native void free_logger(long ts_parser_ptr);
private static native void ts_parser_print_dot_graphs(long ts_parser_ptr, FileDescriptor fileDescriptor);
protected static native long ts_tree_copy(long tree_ptr);
protected static native void ts_tree_delete(long tree_ptr);
protected static native TSNode ts_tree_root_node(long tree_ptr);
protected static native TSNode ts_tree_root_node_with_offset(long tree_ptr, int offsetBytes, TSPoint offsetPoint);
protected static native long ts_tree_language(long tree_ptr);
protected static native TSRange[] ts_tree_included_ranges(long tree_ptr);
protected static native void ts_tree_edit(long tree_ptr, TSInputEdit edit);
protected static native TSRange[] ts_tree_get_changed_ranges(long old_tree_ptr, long new_tree_ptr);
protected static native String ts_node_type(TSNode node);
protected static native int ts_node_symbol(TSNode node);
protected static native String ts_node_grammar_type(TSNode node);
protected static native int ts_node_grammar_symbol(TSNode node);
protected static native int ts_node_start_byte(TSNode node);
protected static native TSPoint ts_node_start_point(TSNode node);
protected static native int ts_node_end_byte(TSNode node);
protected static native TSPoint ts_node_end_point(TSNode node);
protected static native String ts_node_string(TSNode node);
protected static native boolean ts_node_is_null(TSNode node);
protected static native boolean ts_node_is_named(TSNode node);
protected static native boolean ts_node_is_missing(TSNode node);
protected static native boolean ts_node_is_extra(TSNode node);
protected static native boolean ts_node_has_changes(TSNode node);
protected static native boolean ts_node_has_error(TSNode node);
protected static native boolean ts_node_is_error(TSNode node);
protected static native int ts_node_parse_state(TSNode node);
protected static native int ts_node_next_parse_state(TSNode node);
protected static native TSNode ts_node_parent(TSNode node);
protected static native TSNode ts_node_child(TSNode node, int index);
protected static native String ts_node_field_name_for_child(TSNode node, int index);
protected static native int ts_node_child_count(TSNode node);
protected static native TSNode ts_node_named_child(TSNode node, int index);
protected static native int ts_node_named_child_count(TSNode node);
protected static native TSNode ts_node_child_by_field_name(TSNode node, String field_name);
protected static native TSNode ts_node_child_by_field_id(TSNode node, int ts_field_id);
protected static native TSNode ts_node_next_sibling(TSNode node);
protected static native TSNode ts_node_prev_sibling(TSNode node);
protected static native TSNode ts_node_next_named_sibling(TSNode node);
protected static native TSNode ts_node_prev_named_sibling(TSNode node);
protected static native TSNode ts_node_first_child_for_byte(TSNode node, int start_byte);
protected static native TSNode ts_node_first_named_child_for_byte(TSNode node, int start_byte);
protected static native TSNode ts_node_descendant_for_byte_range(TSNode node, int start_byte, int end_byte);
protected static native TSNode ts_node_descendant_for_point_range(TSNode node, TSPoint start_point, TSPoint end_point);
protected static native TSNode ts_node_named_descendant_for_byte_range(TSNode node, int start_byte, int end_byte);
protected static native TSNode ts_node_named_descendant_for_point_range(TSNode node, TSPoint start_point, TSPoint end_point);
protected static native TSNode ts_node_edit(TSNode node, TSInputEdit edit);
protected static native boolean ts_node_eq(TSNode a, TSNode b);
protected static native void free_cursor(long ts_tree_cursor_ptr);
protected static native long ts_tree_cursor_new(TSNode node);
protected static native void ts_tree_cursor_delete(long cursor_ptr);
protected static native void ts_tree_cursor_reset(long cursor_ptr, TSNode node);
protected static native TSNode ts_tree_cursor_current_node(long cursor_ptr);
protected static native String ts_tree_cursor_current_field_name(long cursor_ptr);
protected static native int ts_tree_cursor_current_field_id(long cursor_ptr);
protected static native boolean ts_tree_cursor_goto_parent(long cursor_ptr);
protected static native boolean ts_tree_cursor_goto_next_sibling(long cursor_ptr);
protected static native boolean ts_tree_cursor_goto_first_child(long cursor_ptr);
protected static native int ts_tree_cursor_goto_first_child_for_byte(long cursor_ptr, int startByte);
protected static native int ts_tree_cursor_goto_first_child_for_point(long cursor_ptr, TSPoint startPoint);
protected static native long ts_tree_cursor_copy(long cursor_ptr);
protected static native long ts_query_new(long ts_language_ptr, String source);
protected static native void ts_query_delete(long ts_query_ptr);
protected static native int ts_query_pattern_count(long ts_query_ptr);
protected static native int ts_query_capture_count(long ts_query_ptr);
protected static native int ts_query_string_count(long ts_query_ptr);
protected static native int ts_query_start_byte_for_pattern(long ts_query_ptr, int pattern_index);
protected static native TSQueryPredicateStep[] ts_query_predicates_for_pattern(long ts_query_ptr, int pattern_index);
protected static native boolean ts_query_is_pattern_rooted(long ts_query_ptr, int pattern_index);
protected static native boolean ts_query_is_pattern_non_local(long ts_query_ptr, int pattern_index);
protected static native boolean ts_query_is_pattern_guaranteed_at_step(long ts_query_ptr, int byte_offset);
protected static native String ts_query_capture_name_for_id(long ts_query_ptr, int index);
protected static native int ts_query_capture_quantifier_for_id(long ts_query_ptr, int pattern_id, int index);
protected static native String ts_query_string_value_for_id(long ts_query_ptr, int index);
protected static native void ts_query_disable_capture(long ts_query_ptr, String name);
protected static native void ts_query_disable_pattern(long ts_query_ptr, int patter_index);
protected static native long ts_query_cursor_new();
protected static native void ts_query_cursor_delete(long ts_query_cursor_ptr);
protected static native void ts_query_cursor_exec(long ts_query_cursor_ptr, long ts_query_ptr, TSNode node);
protected static native boolean ts_query_cursor_did_exceed_match_limit(long ts_query_cursor_ptr);
protected static native int ts_query_cursor_match_limit(long ts_query_cursor_ptr);
protected static native void ts_query_cursor_set_match_limit(long ts_query_cursor_ptr, int limit);
protected static native void ts_query_cursor_set_byte_range(long ts_query_cursor_ptr, int start_byte, int end_byte);
protected static native void ts_query_cursor_set_point_range(long ts_query_cursor_ptr, TSPoint start_point, TSPoint end_point);
protected static native boolean ts_query_cursor_next_match(long ts_query_cursor_ptr, TSQueryMatch match);
protected static native void ts_query_cursor_remove_match(long ts_query_cursor_ptr, int match_id);
protected static native boolean ts_query_cursor_next_capture(long ts_query_cursor_ptr, TSQueryMatch match);
protected static native void ts_tree_print_dot_graph(long ts_tree_ptr, FileDescriptor fileDescriptor);
protected static native long ts_language_copy(long ts_language_ptr);
protected static native void ts_language_delete(long ts_language_ptr);
protected static native int ts_language_state_count(long ts_language_ptr);
protected static native int ts_language_next_state(long ts_language_ptr, int ts_state_id, int ts_symbol);
protected static native int ts_language_field_count(long ts_language_ptr);
protected static native String ts_language_field_name_for_id(long ts_language_ptr, int ts_field_id);
protected static native int ts_language_field_id_for_name(long ts_language_ptr, String field_name);
protected static native int ts_language_symbol_type(long ts_language_ptr, int ts_symbol);
protected static native int ts_language_symbol_count(long ts_language_ptr);
protected static native String ts_language_symbol_name(long ts_language_ptr, int ts_symbol);
protected static native int ts_language_symbol_for_name(long ts_language_ptr, String name, boolean is_named);
protected static native int ts_language_version(long ts_language_ptr);
protected static native long ts_lookahead_iterator_new(long ts_language_ptr, int ts_state_id);
protected static native void ts_lookahead_iterator_delete(long ts_lookahead_iterator_ptr);
protected static native boolean ts_lookahead_iterator_reset_state(long ts_lookahead_iterator_ptr, int ts_state_id);
protected static native boolean ts_lookahead_iterator_reset(long ts_lookahead_iterator_ptr, long ts_lang_ptr, int ts_state_id);
protected static native long ts_lookahead_iterator_language(long ts_lookahead_iterator_ptr);
protected static native boolean ts_lookahead_iterator_next(long ts_lookahead_iterator_ptr);
protected static native int ts_lookahead_iterator_current_symbol(long ts_lookahead_iterator_ptr);
protected static native String ts_lookahead_iterator_current_symbol_name(long ts_lookahead_iterator_ptr);
private final long ptr;
private static class TSParserCleanAction implements Runnable{
private final long ptr;
public TSParserCleanAction(long ptr) {
this.ptr = ptr;
}
@Override
public void run() {
long flagPtr = ts_parser_cancellation_flag(this.ptr);
if(flagPtr != 0){
free_cancellation_flag(flagPtr);
}
free_logger(ptr);
ts_parser_delete(ptr);
}
}
private TSLogger logger;
/**
* Create a new parser.
*/
public TSParser() {
this.ptr = ts_parser_new();
long cancellationFlagPtr = alloc_cancellation_flag();
write_cancellation_flag(cancellationFlagPtr, 0);
ts_parser_set_cancellation_flag(ptr, cancellationFlagPtr);
CleanerRunner.register(this, new TSParserCleanAction(this.ptr));
}
/**
* Get the parser's current logger.
*
* @return The logger that the parser is using.
*/
public TSLogger getLogger() {
return logger;
}
/**
* Set the logger that a parser should use during parsing.
*
* @param logger The logger that the parser should use.
*
*/
public void setLogger(TSLogger logger) {
this.logger = logger;
ts_parser_set_logger(ptr, logger);
}
/**
**
* Set the language that the parser should use for parsing.
* Returns a boolean indicating whether the language was successfully
* assigned. True means assignment succeeded. False means there was a version
* mismatch: the language was generated with an incompatible version of the
* Tree-sitter CLI. Check the language's version using `ts_language_version`
* and compare it to this library's {@link TSParser#TREE_SITTER_LANGUAGE_VERSION} and
* {@link TSParser#TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION} constants.
*
* @param language The language that the parser should use.
*
* @return True if the language was successfully applied. False otherwise.
*/
public boolean setLanguage(TSLanguage language) {
this.language = language;
return ts_parser_set_language(ptr, language.getPtr());
}
/**
* Use the parser to parse some source code stored in one contiguous buffer.
* The first two parameters are the same as in the {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} function
* above.
*
* @param oldTree The old tree to use. If any.
* @param input the source code to parse.
*
* @return {@link TSTree}
*/
public TSTree parseString(TSTree oldTree, String input) {
long oldTreePtr = oldTree == null ? 0 : oldTree.getPtr();
long treePtr = ts_parser_parse_string(ptr, oldTreePtr, input);
return new TSTree(treePtr);
}
/**
* Use the parser to parse some source code stored in one contiguous buffer with
* a given encoding. The first four parameters work the same as in the
* {@link #parseString(TSTree, String) parserString()} method above. The final parameter indicates whether
* the text is encoded as UTF8 or UTF16.
*
* @param oldTree The old tree to use. If any.
* @param input The source code to parse.
* @param encoding The encoding of the source code.
*
* @return {@link TSTree}
*/
public TSTree parseStringEncoding(TSTree oldTree, String input, TSInputEncoding encoding){
long oldTreePtr = oldTree == null ? 0 : oldTree.getPtr();
long treePtr = ts_parser_parse_string_encoding(ptr, oldTreePtr, input, encoding.ordinal());
return new TSTree(treePtr);
}
/**
* Use the parser to parse some source code and create a syntax tree.
*
* If you are parsing this document for the first time, pass null
for the
* `oldTree` parameter. Otherwise, if you have already parsed an earlier
* version of this document and the document has since been edited, pass the
* previous syntax tree so that the unchanged parts of it can be reused.
* This will save time and memory. For this to work correctly, you must have
* already edited the old syntax tree using the {@link TSTree#edit(TSInputEdit) edit} function in a
* way that exactly matches the source code changes.
*
* This function returns a syntax tree on success, and null
on failure. There
* are three possible reasons for failure:
*
* - The parser does not have a language assigned. Check for this using the
* {@link #getLanguage()} function.
* - Parsing was cancelled due to a timeout that was set by an earlier call to
* the {@link #setTimeoutMicros(long) setTimeoutMicros()} function. You can resume parsing from
* where the parser left out by calling {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} again with the
* same arguments. Or you can start parsing from scratch by first calling
* {@link #reset()}.
* - Parsing was cancelled using a cancellation flag that was set by an
* earlier call to {@link #setCancellationFlag(long) setCancellationFlag()}. You can resume parsing
* from where the parser left out by calling {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} again with
* the same arguments.
*
* @param buf Buffer to use while reading from reader.
* @param oldTree The old tree to use. If any.
* @param reader The reader to read source code from.
* @param encoding The encoding of the source code.
*
* @return {@link TSTree} if success, null
otherwise.
*/
public TSTree parse(byte[] buf, TSTree oldTree, TSReader reader, TSInputEncoding encoding){
long oldTreePtr = oldTree == null ? 0 : oldTree.getPtr();
long treePtr = ts_parser_parse(ptr, buf, oldTreePtr, reader, encoding.ordinal());
if(treePtr == 0){
return null;
}
return new TSTree(treePtr);
}
/**
* Get the parser's current language.
*
* @return {@link TSLanguage}
*/
public TSLanguage getLanguage(){
return new AnonymousLanguage(ts_parser_language(ptr));
}
/**
* Set the ranges of text that the parser should include when parsing.
*
* By default, the parser will always include entire documents. This function
* allows you to parse only a *portion* of a document but still return a syntax
* tree whose ranges match up with the document as a whole. You can also pass
* multiple disjoint ranges.
*
* The second and third parameters specify the location and length of an array
* of ranges. The parser does *not* take ownership of these ranges; it copies
* the data, so it doesn't matter how these ranges are allocated.
*
* If length
is zero, then the entire document will be parsed. Otherwise,
* the given ranges must be ordered from earliest to latest in the document,
* and they must not overlap. That is, the following must hold for all
* i < length - 1: ranges[i].end_byte <= ranges[i + 1].start_byte
*
* If this requirement is not satisfied, the operation will fail, the ranges
* will not be assigned, and this function will return false
. On success,
* this function returns true
*
* @param ranges The ranges to include.
* @return true
on success, false
otherwise.
*/
public boolean setIncludedRanges(TSRange[] ranges) {
return ts_parser_set_included_ranges(ptr, ranges);
}
/**
* Get the ranges of text that the parser will include when parsing.
*
* @return {@link TSRange}[]
*
*/
public TSRange[] getIncludedRanges() {
return ts_parser_included_ranges(ptr);
}
/**
* Instruct the parser to start the next parse from the beginning.
*
* If the parser previously failed because of a timeout or a cancellation, then
* by default, it will resume where it left off on the next call to
* {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} or other parsing functions. If you don't want to resume,
* and instead intend to use this parser to parse some other document, you must
* call {@link #reset()} first.
*/
public void reset(){
ts_parser_reset(ptr);
}
/**
* Set the maximum duration in microseconds that parsing should be allowed to
* take before halting.
*
* If parsing takes longer than this, it will halt early, returning null
.
* See {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} for more information.
*
* @param timeoutMicros The maximum duration in microseconds.
*/
public void setTimeoutMicros(long timeoutMicros){
ts_parser_set_timeout_micros(ptr, timeoutMicros);
}
/**
* Get the duration in microseconds that parsing is allowed to take.
*
* @return The maximum duration in microseconds.
*/
public long getTimeoutMicros(){
return ts_parser_timeout_micros(ptr);
}
/**
* Set the parser's current cancellation flag.
*
* If a non-zero value is assigned, then the parser will periodically read
* from this pointer during parsing. If it reads a non-zero value, it will
* halt early, returning null
. See {@link #parse(byte[], TSTree, TSReader, TSInputEncoding) parse()} for more information.
*
* @param flag The cancellation flag.
*/
public void setCancellationFlag(long flag){
write_cancellation_flag(ts_parser_cancellation_flag(ptr), flag);
}
/**
* Get the parser's current cancellation flag.
*
* @return The cancellation flag.
*/
public long getCancellationFlag(){
return get_cancellation_flag_value(ts_parser_cancellation_flag(ptr));
}
/**
* Set the file to which the parser should write debugging graphs
* during parsing. The graphs are formatted in the DOT language.
* You may want to pipe these graphs directly to a dot(1)
process in order to generate
* SVG output. You can turn off this logging by passing null
as file.
*
*
* @param file the file to which the parser should write debugging graphs. Passing null
to disable logging and close the file if any.
*
* @throws IOException if the file cannot be written to.
*/
public void printDotGraphs(File file) throws IOException {
if(file == null) {
ts_parser_print_dot_graphs(ptr, null);
return;
}
FileOutputStream outputStream = new FileOutputStream(file);
ts_parser_print_dot_graphs(ptr, outputStream.getFD());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy