All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.vespa.schemals.index.FieldIndex Maven / Gradle / Ivy

There is a newer version: 8.458.13
Show newest version
package ai.vespa.schemals.index;

import java.net.URI;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

import com.yahoo.schema.parser.ParsedType.Variant;

import ai.vespa.schemals.common.ClientLogger;
import ai.vespa.schemals.index.Symbol.SymbolStatus;
import ai.vespa.schemals.index.Symbol.SymbolType;
import ai.vespa.schemals.parser.ast.dataType;
import ai.vespa.schemals.parser.ast.fieldOutsideDoc;
import ai.vespa.schemals.parser.ast.importField;
import ai.vespa.schemals.tree.SchemaNode;

/**
 * FieldIndex
 * For storing metadata about fields.
 * It is owned by a SchemaIndex, so there is a one-to-one correspondence
 */
public class FieldIndex {

    public enum IndexingType {
        ATTRIBUTE,
        INDEX,
        SUMMARY
    }

    /*
     * Simulating a struct because record is immutable
     */
    public class FieldIndexEntry {
        public SchemaNode dataTypeNode;
        public EnumSet indexingTypes = EnumSet.noneOf(IndexingType.class);
        public boolean isInsideDoc = true;

        FieldIndexEntry(SchemaNode dataTypeNode, boolean isInsideDoc) {
            this.dataTypeNode = dataTypeNode;
            this.isInsideDoc = isInsideDoc;
        }

        @Override
        public String toString() {
            return (dataTypeNode == null ? " unknown type" : dataTypeNode.toString()) + " " + indexingTypes.toString() + " " + (isInsideDoc ? " in document" : " in schema");
        }
    }

    // Key is a field definition symbol.
    private Map database = new HashMap<>();
    private ClientLogger logger;
    private SchemaIndex schemaIndex;

    public FieldIndex(ClientLogger logger, SchemaIndex schemaIndex) {
        this.logger = logger;
        this.schemaIndex = schemaIndex;
    }

    public void clearFieldsByURI(URI fileURI) {
        for (Iterator> it = database.entrySet().iterator(); it.hasNext(); ) {
            Map.Entry entry = it.next();
            if (entry.getKey().fileURIEquals(fileURI)) {
                it.remove();
            }
        }
    }

    public void insertFieldDefinition(Symbol fieldDefinition) {
        if (fieldDefinition.getStatus() != SymbolStatus.DEFINITION || fieldDefinition.getType() != SymbolType.FIELD) {
            throw new IllegalArgumentException("Only field definitions should be stored in FieldIndex!");
        }
        if (database.containsKey(fieldDefinition)) return;

        SchemaNode dataTypeNode = resolveFieldDataTypeNode(fieldDefinition);
        database.put(fieldDefinition, new FieldIndexEntry(dataTypeNode, resolveIsInsideDoc(fieldDefinition)));
    }

    public void addFieldIndexingType(Symbol fieldDefinition, IndexingType indexingType) {
        insertFieldDefinition(fieldDefinition);

        FieldIndexEntry entry = database.get(fieldDefinition);
        entry.indexingTypes.add(indexingType);

        // Attribute propagates from struct-field
        if (indexingType == IndexingType.ATTRIBUTE 
                && fieldDefinition.getScope() != null 
                && fieldDefinition.getScope().getType() == SymbolType.FIELD
                && fieldDefinition.getScope().getStatus() == SymbolStatus.DEFINITION) {
            addFieldIndexingType(fieldDefinition.getScope(), indexingType);
        }
    }

    public Optional getFieldDataTypeNode(Symbol fieldDefinition) {
        FieldIndexEntry entry = database.get(fieldDefinition);
        if (entry == null) return Optional.empty();
        if (entry.dataTypeNode != null) return Optional.of(entry.dataTypeNode);

        // Try to resolve it
        entry.dataTypeNode = resolveFieldDataTypeNode(fieldDefinition);
        return Optional.ofNullable(entry.dataTypeNode);
    }

    public EnumSet getFieldIndexingTypes(Symbol fieldDefinition) {
        FieldIndexEntry entry = database.get(fieldDefinition);

        if (entry == null) return EnumSet.noneOf(IndexingType.class);

        return EnumSet.copyOf(entry.indexingTypes);
    }

    public boolean getIsInsideDoc(Symbol fieldDefinition) {
        FieldIndexEntry entry = database.get(fieldDefinition);
        if (entry == null) return false;
        return entry.isInsideDoc;
    }

    /*
     * Some fields have struct, array, map etc. as their data type.
     * Fields inside the struct (or key/value for the case of map) are accessible from the field
     * with '.'-syntax.
     * This function tries to find the definition of said struct (or map) if it exists.
     */
    public Optional findFieldStructDefinition(Symbol fieldDefinition) {
        Optional dataTypeNode = getFieldDataTypeNode(fieldDefinition);
        if (dataTypeNode.isEmpty()) return Optional.empty();

        if (dataTypeNode.get().hasSymbol()) {
            // TODO: handle non struct?
            if (dataTypeNode.get().getSymbol().getType() != SymbolType.STRUCT) return Optional.empty();

            return schemaIndex.getSymbolDefinition(dataTypeNode.get().getSymbol());
        }
        dataType originalNode = (dataType)dataTypeNode.get().getOriginalSchemaNode();
        if (originalNode.getParsedType().getVariant() == Variant.MAP) {
            return Optional.of(fieldDefinition);
        } else if (originalNode.getParsedType().getVariant() == Variant.ARRAY) {
            if (dataTypeNode.get().size() < 3 || !dataTypeNode.get().get(2).isASTInstance(dataType.class)) return Optional.empty();

            SchemaNode innerType = dataTypeNode.get().get(2);

            if (!innerType.hasSymbol() || innerType.getSymbol().getType() != SymbolType.STRUCT) return Optional.empty();

            Symbol structReference = innerType.getSymbol();
            return schemaIndex.getSymbolDefinition(structReference);
        }

        return Optional.empty();
    }

    /**
     * Try to find the node that holds the dataType element
     * Also try to not fall into an infinite loop. It could possibly happen if there are cyclic field references somehow
     */
    private SchemaNode resolveFieldDataTypeNode(Symbol fieldDefinition) { 
        fieldDefinition = schemaIndex.getFirstSymbolDefinition(fieldDefinition).get();
        return resolveFieldDataTypeNode(fieldDefinition, new HashSet<>()); 
    }

    private SchemaNode resolveFieldDataTypeNode(Symbol fieldDefinition, Set visited) {
        if (visited.contains(fieldDefinition)) return null;
        visited.add(fieldDefinition);
        SchemaNode fieldDefinitionNode = fieldDefinition.getNode();

        if (fieldDefinitionNode.isASTInstance(dataType.class)) {
            // For map key and value
            return fieldDefinitionNode;
        }

        if (fieldDefinitionNode.getParent().isASTInstance(importField.class)) {
            SchemaNode importReferenceNode = fieldDefinitionNode.getPreviousSibling().getPreviousSibling();
            if (!importReferenceNode.hasSymbol() || importReferenceNode.getSymbol().getStatus() != SymbolStatus.REFERENCE) return null;
            Optional referencedField = schemaIndex.getSymbolDefinition(importReferenceNode.getSymbol());
            if (referencedField.isEmpty()) return null;
            return resolveFieldDataTypeNode(referencedField.get(), visited);
        }

        if (fieldDefinitionNode.getNextSibling() != null && fieldDefinitionNode.getNextSibling().getNextSibling() != null && fieldDefinitionNode.getNextSibling().getNextSibling().isASTInstance(dataType.class)) {
            return fieldDefinitionNode.getNextSibling().getNextSibling();
        }         
        return null;
    }

    private boolean resolveIsInsideDoc(Symbol fieldDefinition) {
        // todo: struct field definition
        if (fieldDefinition.getScope() != null && fieldDefinition.getScope().getType() == SymbolType.FIELD && fieldDefinition.getScope().getStatus() == SymbolStatus.DEFINITION) {
            return resolveIsInsideDoc(fieldDefinition.getScope());
        }
        SchemaNode fieldDefinitionNode = fieldDefinition.getNode();
        if (fieldDefinitionNode.getParent().isASTInstance(importField.class)) return false;
        if (fieldDefinitionNode.getParent().getParent().isASTInstance(fieldOutsideDoc.class)) return false;
        return true;
    }

    public void dumpIndex() {
        for (var entry : database.entrySet()) {
            logger.info(entry.getKey().toString() + " -> " + entry.getValue().toString());
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy