All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.vespa.schemals.index.SchemaIndex Maven / Gradle / Ivy

There is a newer version: 8.441.21
Show newest version
package ai.vespa.schemals.index;

import java.net.URI;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Optional;

import ai.vespa.schemals.common.ClientLogger;
import ai.vespa.schemals.index.Symbol.SymbolStatus;
import ai.vespa.schemals.index.Symbol.SymbolType;
import ai.vespa.schemals.parser.ast.annotationElm;
import ai.vespa.schemals.parser.ast.annotationOutside;
import ai.vespa.schemals.parser.ast.constantName;
import ai.vespa.schemals.parser.ast.documentElm;
import ai.vespa.schemals.parser.ast.documentSummary;
import ai.vespa.schemals.parser.ast.fieldElm;
import ai.vespa.schemals.parser.ast.fieldSetElm;
import ai.vespa.schemals.parser.ast.functionElm;
import ai.vespa.schemals.parser.ast.inputName;
import ai.vespa.schemals.parser.ast.namedDocument;
import ai.vespa.schemals.parser.ast.onnxModel;
import ai.vespa.schemals.parser.ast.onnxModelInProfile;
import ai.vespa.schemals.parser.ast.rankProfile;
import ai.vespa.schemals.parser.ast.rootSchema;
import ai.vespa.schemals.parser.ast.structDefinitionElm;
import ai.vespa.schemals.parser.ast.structFieldDefinition;

public class SchemaIndex {
    public static final HashMap, SymbolType> IDENTIFIER_TYPE_MAP = new HashMap<>() {{
        put(annotationElm.class, SymbolType.ANNOTATION);
        put(annotationOutside.class, SymbolType.ANNOTATION);
        put(rootSchema.class, SymbolType.SCHEMA);
        put(documentElm.class, SymbolType.DOCUMENT);
        put(namedDocument.class, SymbolType.DOCUMENT);
        put(fieldElm.class, SymbolType.FIELD);
        put(fieldSetElm.class, SymbolType.FIELDSET);
        put(structDefinitionElm.class, SymbolType.STRUCT);
        put(structFieldDefinition.class, SymbolType.FIELD);
        put(functionElm.class, SymbolType.FUNCTION);
        put(inputName.class, SymbolType.QUERY_INPUT);
        put(constantName.class, SymbolType.RANK_CONSTANT);
        put(onnxModel.class, SymbolType.ONNX_MODEL);
    }};

    public static final HashMap, SymbolType> IDENTIFIER_WITH_DASH_TYPE_MAP = new HashMap<>() {{
        put(rankProfile.class, SymbolType.RANK_PROFILE);
        put(documentSummary.class, SymbolType.DOCUMENT_SUMMARY);
    }};

    private ClientLogger logger;
    private FieldIndex fieldIndex;

    private Map> symbolDefinitions;
    private Map> symbolReferences;
    private Map definitionOfReference;
    private Set unresolvedSymbols;

    // TODO: bad to use string as node type here.
    private InheritanceGraph documentInheritanceGraph;
    private InheritanceGraph structInheritanceGraph;
    private InheritanceGraph rankProfileInheritanceGraph;
    private InheritanceGraph documentSummaryInheritanceGraph;

    // This is an inheritance graph, even though it doesn't model *inheritance* per se.
    private InheritanceGraph documentReferenceGraph;
    
    public SchemaIndex(ClientLogger logger) {
        this.logger = logger;
        this.documentInheritanceGraph        = new InheritanceGraph<>();
        this.structInheritanceGraph          = new InheritanceGraph<>();
        this.rankProfileInheritanceGraph     = new InheritanceGraph<>();
        this.documentSummaryInheritanceGraph = new InheritanceGraph<>();
        this.documentReferenceGraph          = new InheritanceGraph<>();

        this.symbolDefinitions     = new HashMap<>();
        this.symbolReferences      = new HashMap<>();
        this.definitionOfReference = new HashMap<>();
        this.unresolvedSymbols     = new HashSet<>();

        for (SymbolType type : SymbolType.values()) {
            this.symbolDefinitions.put(type, new ArrayList());
        }

        this.fieldIndex = new FieldIndex(logger, this);
    }

    public FieldIndex fieldIndex() {
        return this.fieldIndex;
    }

    /**
     * Returns the inheritance graph for documents in the index.
     *
     * @return the inheritance graph for documents
     */
    public InheritanceGraph getDocumentInheritanceGraph() { return documentInheritanceGraph; }

    /**
     * Returns the inheritance graph for rank-profiles in the index.
     *
     * @return a reference to the inheritance graph for rank-profiles
     */
    public InheritanceGraph getRankProfileInheritanceGraph() { return rankProfileInheritanceGraph; }

    /**
     * Returns the graph over document references
     *
     * @return the document reference graph
     */
    public InheritanceGraph getDocumentReferenceGraph() { return documentReferenceGraph; }

    /**
     * Clears the index for symbols in the specified file
     *
     * @param fileURI the URI of the document to be cleared
     */
    public void clearDocument(String fileURI) {

        // create once instead of repeatedly for each symbol. Slow operation.
        URI fileURIURI = URI.create(fileURI);

        // First: remove symbols listed as a reference for a definition
        // At the same time, remove the reference from the reference -> definition lookup table
        for (Map.Entry> entry : symbolReferences.entrySet()) {
            List references = entry.getValue();

            List replacedReferences = new ArrayList<>();
            for (Symbol symbol : references) {
                if (symbol.fileURIEquals(fileURIURI)) {
                    definitionOfReference.remove(symbol);
                } else {
                    replacedReferences.add(symbol);
                }
            }

            entry.setValue(replacedReferences);
        }

        // For each definition: remove their list of references and then remove the definition itself.
        for (var list : symbolDefinitions.values()) {
            for (int i = list.size() - 1; i >= 0; i--) {
                Symbol symbol = list.get(i);
                if (symbol.fileURIEquals(fileURIURI)) {
                    symbolReferences.remove(symbol);

                    structInheritanceGraph.clearInheritsList(symbol);
                    rankProfileInheritanceGraph.clearInheritsList(symbol);
                    documentSummaryInheritanceGraph.clearInheritsList(symbol);
                    documentReferenceGraph.clearInheritsList(symbol);

                    list.remove(i);
                }
            }
        }

        // Clear unresolved symbols from this document
        this.unresolvedSymbols.removeIf(symbol -> symbol.fileURIEquals(fileURIURI));

        this.fieldIndex.clearFieldsByURI(fileURIURI);

        if (fileURI.endsWith(".sd")) {
            documentInheritanceGraph.clearInheritsList(fileURI);
            // Add the node back:)
            documentInheritanceGraph.createNodeIfNotExists(fileURI);
        }
    }

    /**
     * Searches for the specified symbol in the index.
     *
     * @param symbol The symbol to find, should be UNRESOLVED
     * @return An Optional containing the found symbol, or an empty Optional if the symbol is not found.
     */
    public Optional findSymbol(Symbol symbol) {
        return findSymbol(symbol.getScope(), symbol.getType(), symbol.getShortIdentifier());
    }

    public Optional findSymbol(Symbol scope, SymbolType type, String shortIdentifier) {
        List results = findSymbols(scope, type, shortIdentifier);
        if (results.isEmpty()) return Optional.empty();

        return Optional.of(results.get(0));
    }

    /**
     * Uses symbol.getScope as the scope to search in
     */
    public Optional findSymbolInScope(Symbol symbol) {
        return findSymbolInScope(symbol.getScope(), symbol.getType(), symbol.getShortIdentifier());
    }

    /**
     * Searches for the specified symbol in the given scope. Checks inherited scopes as well,
     * but not containing scopes.
     * */
    public Optional findSymbolInScope(Symbol scope, SymbolType type, String shortIdentifier) {
        List results = findSymbolsInScope(scope, type, shortIdentifier);
        if (results.isEmpty()) return Optional.empty();

        return Optional.of(results.get(0));
    }

    public List findSymbols(Symbol symbol) {
        return findSymbols(symbol.getScope(), symbol.getType(), symbol.getShortIdentifier());
    }

    /**
     * Searches for symbols in the schema index that match the given symbol.
     *
     * @return A list of symbols that match the given symbol.
     */
    public List findSymbols(Symbol scope, SymbolType type, String shortIdentifier) {
        // First candidates are all symbols with correct type and correct short identifier

        // Special case for schema and document because a schema can sometimes refer to a document and vice versa
        if (type == SymbolType.SCHEMA || type == SymbolType.DOCUMENT) {
            SymbolType firstCheck = (type == SymbolType.SCHEMA ? SymbolType.SCHEMA : SymbolType.DOCUMENT);
            List schemaDefinitions = 
                symbolDefinitions.get(firstCheck)
                               .stream()
                               .filter(symbolDefinition -> symbolDefinition.getShortIdentifier().equals(shortIdentifier))
                               .toList();

            if (!schemaDefinitions.isEmpty()) return schemaDefinitions;
            return symbolDefinitions.get(firstCheck == SymbolType.SCHEMA ? SymbolType.DOCUMENT : SymbolType.SCHEMA)
                               .stream()
                               .filter(symbolDefinition -> symbolDefinition.getShortIdentifier().equals(shortIdentifier))
                               .toList();
        }

        // logger.println("Looking for symbol: " + shortIdentifier + " type " + type.toString());
        while (scope != null) {
            // logger.println("  Checking scope: " + scope.getLongIdentifier());
            List result = findSymbolsInScope(scope, type, shortIdentifier);

            if (!result.isEmpty()) {
                return result;
            }
            scope = scope.getScope();
        }

        return new ArrayList<>();
    }

    /*
     * Given a scope, type and short identifier, find definitions defined inside the actual scope. 
     * Will not search inheritance graphs.
     */
    private Optional findSymbolInConcreteScope(Symbol scope, SymbolType type, String shortIdentifier) {
        for (Symbol symbolDefinition : symbolDefinitions.get(type)) {
            if (!symbolDefinition.getShortIdentifier().equals(shortIdentifier)) continue;
            if (!scope.equals(symbolDefinition.getScope())) continue;
            return Optional.of(symbolDefinition);
        }
        return Optional.empty();
    }


    /*
     * Find symbols with given type and short identifier that are valid in scope
     * Will search in inherited scopes
     */
    public List findSymbolsInScope(Symbol scope, SymbolType type, String shortIdentifier) {
        if (scope.getType() == SymbolType.RANK_PROFILE) {
            return rankProfileInheritanceGraph.findFirstMatches(scope, rankProfileDefinitionSymbol -> {
                var definedInScope = findSymbolInConcreteScope(rankProfileDefinitionSymbol, type, shortIdentifier);
                if (definedInScope.isEmpty()) return null;
                return definedInScope;
            }).stream().map(result -> result.result.get()).toList();
        } else if (scope.getType() == SymbolType.STRUCT) {
            return structInheritanceGraph.findFirstMatches(scope, rankProfileDefinitionSymbol -> {
                var definedInScope = findSymbolInConcreteScope(rankProfileDefinitionSymbol, type, shortIdentifier);
                if (definedInScope.isEmpty()) return null;
                return definedInScope;
            }).stream().map(result -> result.result.get()).toList();
        } else if (scope.getType() == SymbolType.DOCUMENT || scope.getType() == SymbolType.SCHEMA) {
            return documentInheritanceGraph.findFirstMatches(scope.getFileURI(), ancestorURI -> {

                List match = symbolDefinitions.get(type)
                    .stream()
                    .filter(symbolDefinition -> symbolDefinition.getScope() != null
                            && (symbolDefinition.getScope().getType() == SymbolType.SCHEMA || symbolDefinition.getScope().getType() == SymbolType.DOCUMENT)
                            && symbolDefinition.getShortIdentifier().equals(shortIdentifier)
                            && symbolDefinition.getScope().getFileURI().equals(ancestorURI))
                    .toList();

                if (match.isEmpty()) return null;

                return Optional.of(match.get(0));
            }).stream().map(result -> result.result.get()).toList();
        }

        Optional symbol = findSymbolInConcreteScope(scope, type, shortIdentifier);
        List result = new ArrayList<>();
        if (symbol.isPresent())result.add(symbol.get());
        return result;
    }

    public List findSymbolsInScope(Symbol reference) {
        return findSymbolsInScope(reference.getScope(), reference.getType(), reference.getFileURI());
    }

    public boolean isInScope(Symbol symbol, Symbol scope) {
        if (scope == null) return true; // lets say every symbol is in the empty scope
        if (scope.getType() == SymbolType.RANK_PROFILE) {
            return !rankProfileInheritanceGraph.findFirstMatches(scope, 
                    rankProfileDefinitionSymbol -> {
                        if (rankProfileDefinitionSymbol.equals(symbol.getScope())) {
                            return Boolean.valueOf(true);
                        }
                        return null;
            }).isEmpty();
        } else if (scope.getType() == SymbolType.STRUCT) {
            return !structInheritanceGraph.findFirstMatches(scope, 
                    structDefinitionSymbol -> {
                        if (structDefinitionSymbol.equals(symbol.getScope())) {
                            return Boolean.valueOf(true);
                        }
                        return null;
            }).isEmpty();
        } else if ((symbol.getScope() == null || symbol.getScope().getType() == SymbolType.SCHEMA || symbol.getScope().getType() == SymbolType.DOCUMENT) && 
                (scope.getType() == SymbolType.SCHEMA || scope.getType() == SymbolType.DOCUMENT)) {
            return !documentInheritanceGraph.findFirstMatches(scope.getFileURI(), 
                ancestorURI -> {
                    if (symbol.getFileURI().equals(ancestorURI)) {
                        return Boolean.valueOf(true);
                    }
                    return null;
                }
            ).isEmpty();
        }

        return scope.equals(symbol.getScope());
    }

    /**
     * Retrieves the definition of a symbol from a map.
     * We have three versions of this, because definitions can reference other definitions. 
     * For example, struct-field is both a definition (you want to jump there if you go-to-definition on field.structfield somewhere else), 
     * and a reference (to the field inside the struct).
     *  {@link getSymbolDefinition} returns identity if you supply a definition.
     *  {@link getNextSymbolDefinition} always tries to interpret the argument as a reference (jumping one step even if argument is a definition).
     *  {@link getFirstSymbolDefinition} can be thought of as successive applications of getNextSymbolDefinition and returns the last valid definition.
     *
     *
     * @param reference The reference to retrieve the definition for.
     * @return An Optional containing the definition of the symbol, or an empty Optional if the symbol is not found.
     */
    public Optional getSymbolDefinition(Symbol reference) {
        if (reference.getStatus() == SymbolStatus.DEFINITION) return Optional.of(reference);
        return Optional.ofNullable(definitionOfReference.get(reference));
    }

    public Optional getNextSymbolDefinition(Symbol reference) {
        return Optional.ofNullable(definitionOfReference.get(reference));
    }

    public Optional getFirstSymbolDefinition(Symbol symbol) {
        while (definitionOfReference.get(symbol) != null) {
            symbol = definitionOfReference.get(symbol);
        }
        if (symbol.getStatus() == SymbolStatus.DEFINITION) return Optional.of(symbol);
        return Optional.empty();
    }

    /**
     * Returns a list of symbol references for the given symbol definition.
     *
     * @param definition The symbol for which to retrieve the references.
     * @return A list of symbol references.
     */
    public List getSymbolReferences(Symbol definition) {
        List results = symbolReferences.get(definition);

        if (results == null) return new ArrayList<>();

        return List.copyOf(results);
    }

    /**
     * Retrieves the symbol definition of the specified schema name.
     *
     * @param shortIdentifier the short identifier of the symbol to retrieve
     * @return an Optional containing the symbol if found, or an empty Optional if not found
     */
    public Optional getSchemaDefinition(String shortIdentifier) {
        List list = symbolDefinitions.get(SymbolType.SCHEMA);

        for (Symbol symbol : list) {
            if (symbol.getShortIdentifier().equals(shortIdentifier)) {
                return Optional.of(symbol);
            }
        }

        list = symbolDefinitions.get(SymbolType.DOCUMENT);
        for (Symbol symbol : list) {
            if (symbol.getShortIdentifier().equals(shortIdentifier)) {
                return Optional.of(symbol);
            }
        }

        return Optional.empty();
    }

    /**
     * Retrieves a list of symbols of the specified type from the schema index.
     *
     * @param type The type of symbols to retrieve.
     * @return A list of symbols of the specified type, or an empty list if no symbols are found.
     */
    public List getSymbolsByType(SymbolType type) {
        return symbolDefinitions.get(type);
    }

    /**
     * Inserts a symbol definition into the schema index.
     *
     * @param symbol the symbol to be inserted
     */
    public void insertSymbolDefinition(Symbol symbol) {

        List list = symbolDefinitions.get(symbol.getType());
        list.add(symbol);

        symbolReferences.put(symbol, new ArrayList<>());

        if (symbol.getType() == SymbolType.FIELD) {
            fieldIndex.insertFieldDefinition(symbol);
        }
    }

    /**
     * Inserts a symbol reference into the schema index.
     * Make sure that he definition exists before inserting
     *
     * @param definition the symbol being defined
     * @param reference the symbol being referenced
     */
    public void insertSymbolReference(Symbol definition, Symbol reference) {
        List list = symbolReferences.get(definition);
        if (list == null) {
            throw new IllegalArgumentException("Could not insert symbol reference" + reference + " before the definition is inserted");
        }

        definitionOfReference.put(reference, definition);
        list.add(reference);

        if (reference.getType() == SymbolType.RANK_PROFILE && reference.getScope() != null) {
            tryRegisterRankProfileInheritance(reference.getScope(), definition);
        }

        if (reference.getType() == SymbolType.STRUCT && reference.getScope() != null) {
            tryRegisterStructInheritance(reference.getScope(), definition);
        }

        if (reference.getType() == SymbolType.DOCUMENT_SUMMARY && reference.getScope() != null) {
            tryRegisterDocumentSummaryInheritance(reference.getScope(), definition);
        }
    }


    /**
     * Deletes the symbol reference from the schema index.
     *
     * @param symbol The symbol reference to be deleted.
     */
    public void deleteSymbolReference(Symbol symbol) {
        Symbol definition = definitionOfReference.remove(symbol);
        if (definition != null) {
            List references = symbolReferences.get(definition);
            if (references != null) {
                references.remove(symbol);
            }
        }
    }

    /**
     * Tries to register document inheritance between a child document and a parent document.
     *
     * @param childURI  the URI of the child document
     * @param parentURI the URI of the parent document
     * @return true if the document inheritance was successfully registered, false otherwise
     */
    public boolean tryRegisterDocumentInheritance(String childURI, String parentURI) {
        return documentInheritanceGraph.addInherits(childURI, parentURI);
    }

    /**
     * Tries to register struct inheritance between the child symbol and the parent symbol.
     *
     * @param childSymbol The child symbol representing the struct.
     * @param parentSymbol The parent symbol representing the inherited struct.
     * @return false if the struct inheritance was successfully registered, false otherwise.
     */
    public boolean tryRegisterStructInheritance(Symbol childSymbol, Symbol parentSymbol) {
        return structInheritanceGraph.addInherits(childSymbol, parentSymbol);
    }

    /**
     * Tries to register the inheritance relationship between a child rank profile and a parent rank profile.
     *
     * @param childSymbol The symbol representing the child rank profile.
     * @param parentSymbol The symbol representing the parent rank profile.
     * @return true if the inheritance relationship was successfully registered, false otherwise.
     */
    public boolean tryRegisterRankProfileInheritance(Symbol childSymbol, Symbol parentSymbol) {
        return rankProfileInheritanceGraph.addInherits(childSymbol, parentSymbol);
    }

    /**
     * Tries to register the inheritance relationship between a child document-summary and a parent document-summary.
     *
     * @param childSymbol The symbol representing the child document-summary.
     * @param parentSymbol The symbol representing the parent document-summary.
     * @return true if the inheritance relationship was successfully registered, false otherwise.
     */
    public boolean tryRegisterDocumentSummaryInheritance(Symbol childSymbol, Symbol parentSymbol) {
        return documentSummaryInheritanceGraph.addInherits(childSymbol, parentSymbol);
    }

    public boolean tryRegisterDocumentReference(Symbol childSymbol, Symbol parentSymbol) {
        return documentReferenceGraph.addInherits(childSymbol, parentSymbol);
    }

    /**
     * Searches for symbols in the specified scope of the given type.
     *
     * @param scope The symbol representing the scope to search in.
     * @param type The type of symbols to find.
     * @return A list of symbols found in the specified scope and of the given type.
     */
    public List listSymbolsInScope(Symbol scope, SymbolType type) {
        return listSymbolsInScope(scope, EnumSet.of(type));
    }

    /**
     * Searches for symbols in the specified scope of one of the given types.
     *
     * @param scope The symbol representing the scope to search in. If null it will return all symbols with the given type.
     * @param types The types of symbols to find.
     * @return A list of symbols found in the specified scope and of one of the given types.
     */
    public List listSymbolsInScope(Symbol scope, EnumSet types) {
        List ret = new ArrayList<>();
        for (SymbolType type : types) {
            ret.addAll(symbolDefinitions.get(type)
                                    .stream()
                                    .filter(symbol -> isInScope(symbol, scope))
                                    .toList()
            );
        }
        return ret;
    }

    public void addUnresolvedSymbol(Symbol unresolvedSymbol) {
        unresolvedSymbols.add(unresolvedSymbol);
    }

    public List getUnresolvedSymbols() {
        unresolvedSymbols.removeIf(symbol -> getSymbolDefinition(symbol).isPresent());
        return List.copyOf(unresolvedSymbols);
    }

    /**
     * Dumps the index to the console.
     */
    public void dumpIndex() {

        logger.info(" === SYMBOL DEFINITIONS === ");
        for (var entry : symbolDefinitions.entrySet()) {
            logger.info("TYPE: " + entry.getKey());

            for (var symbol : entry.getValue()) {
                logger.info("    " + symbol);
            }
        }

        logger.info("\n === SYMBOL DEFINITION REFERENCES === ");
        for (var entry : symbolReferences.entrySet()) {
            logger.info(entry.getKey());

            for (var symbol : entry.getValue()) {
                logger.info("    " + symbol);
            }
        }

        logger.info("\n === REFERENCES TO DEFINITIONS ===");
        for (var entry : definitionOfReference.entrySet()) {
            String toPrint = String.format("%-50s -> %s", entry.getKey(), entry.getValue());
            logger.info(toPrint);
        }

        logger.info("\n === DOCUMENT INHERITANCE === ");
        documentInheritanceGraph.dumpAllEdges(logger);

        logger.info(" === STRUCT INHERITANCE === ");
        structInheritanceGraph.dumpAllEdges(logger);

        logger.info(" === RANK PROFILE INHERITANCE === ");
        rankProfileInheritanceGraph.dumpAllEdges(logger);

        logger.info(" === DOCUMENT REFERENCES === ");
        documentReferenceGraph.dumpAllEdges(logger);

        logger.info(" === FIELD INDEX === ");
        fieldIndex.dumpIndex();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy