All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codelibs.fess.suggest.entity.SuggestItem Maven / Gradle / Ivy

There is a newer version: 14.19.0
Show newest version
/*
 * Copyright 2012-2024 CodeLibs Project and the Others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.codelibs.fess.suggest.entity;

import java.time.Clock;
import java.time.Instant;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.suggest.constants.FieldNames;
import org.codelibs.fess.suggest.constants.SuggestConstants;
import org.codelibs.fess.suggest.util.SuggestUtil;
import org.opensearch.common.Nullable;

public class SuggestItem {

    public enum Kind {
        DOCUMENT("document"), QUERY("query"), USER("user");

        private final String kind;

        Kind(final String kind) {
            this.kind = kind;
        }

        @Override
        public String toString() {
            return kind;
        }
    }

    private String text;

    private ZonedDateTime timestamp;

    private long queryFreq;

    private long docFreq;

    private float userBoost;

    private String[][] readings;

    private String[] fields;

    private String[] tags;

    private String[] roles;

    private String[] languages;

    private Kind[] kinds;

    private Map emptySource;

    private String id;

    private SuggestItem() {
    }

    public SuggestItem(final String[] text, final String[][] readings, final String[] fields, final long docFreq, final long queryFreq,
            final float userBoost, @Nullable final String[] tags, @Nullable final String[] roles, @Nullable final String[] languages,
            final Kind kind) {
        this.text = String.join(SuggestConstants.TEXT_SEPARATOR, text);
        this.readings = readings;
        this.fields = fields != null ? fields : new String[] {};
        this.tags = tags != null ? tags : new String[] {};

        if (roles == null || roles.length == 0) {
            this.roles = new String[] { SuggestConstants.DEFAULT_ROLE };
        } else {
            this.roles = new String[roles.length];
            System.arraycopy(roles, 0, this.roles, 0, roles.length);
        }

        this.languages = languages != null ? languages : new String[] {};

        kinds = new Kind[] { kind };
        if (userBoost > 1) {
            this.userBoost = userBoost;
        } else {
            this.userBoost = 1;
        }
        this.docFreq = docFreq;
        this.queryFreq = queryFreq;
        timestamp = ZonedDateTime.now();
        emptySource = createEmptyMap();
        id = SuggestUtil.createSuggestTextId(this.text);
    }

    public String getText() {
        return text;
    }

    public String[][] getReadings() {
        return readings;
    }

    public String[] getTags() {
        return tags;
    }

    public String[] getRoles() {
        return roles;
    }

    public String[] getLanguages() {
        return languages;
    }

    public String[] getFields() {
        return fields;
    }

    public Kind[] getKinds() {
        return kinds;
    }

    public long getQueryFreq() {
        return queryFreq;
    }

    public long getDocFreq() {
        return docFreq;
    }

    public float getUserBoost() {
        return userBoost;
    }

    public ZonedDateTime getTimestamp() {
        return timestamp;
    }

    public void setText(final String text) {
        this.text = text;
    }

    public void setTimestamp(final ZonedDateTime timestamp) {
        this.timestamp = timestamp;
    }

    public void setQueryFreq(final long queryFreq) {
        this.queryFreq = queryFreq;
    }

    public void setDocFreq(final long docFreq) {
        this.docFreq = docFreq;
    }

    public void setUserBoost(final float userBoost) {
        this.userBoost = userBoost;
    }

    public void setReadings(final String[][] readings) {
        this.readings = readings;
    }

    public void setFields(final String[] fields) {
        this.fields = fields;
    }

    public void setTags(final String[] tags) {
        this.tags = tags;
    }

    public void setRoles(final String[] roles) {
        this.roles = roles;
    }

    public void setLanguages(final String[] languages) {
        this.languages = languages;
    }

    public void setKinds(final Kind[] kinds) {
        this.kinds = kinds;
    }

    public void setEmptySource(final Map emptySource) {
        this.emptySource = emptySource;
    }

    public void setId(final String id) {
        this.id = id;
    }

    public Map toEmptyMap() {
        return emptySource;
    }

    protected Map createEmptyMap() {
        final Map map = new HashMap<>();
        map.put(FieldNames.TEXT, StringUtil.EMPTY);

        for (int i = 0; i < readings.length; i++) {
            map.put(FieldNames.READING_PREFIX + i, new String[] {});
        }

        map.put(FieldNames.FIELDS, new String[] {});
        map.put(FieldNames.TAGS, new String[] {});
        map.put(FieldNames.ROLES, new String[] {});
        map.put(FieldNames.LANGUAGES, new String[] {});
        map.put(FieldNames.KINDS, new String[] {});
        map.put(FieldNames.SCORE, 1.0F);
        map.put(FieldNames.QUERY_FREQ, 0L);
        map.put(FieldNames.DOC_FREQ, 0L);
        map.put(FieldNames.USER_BOOST, 1.0F);
        map.put(FieldNames.TIMESTAMP, DateTimeFormatter.ISO_INSTANT.format(ZonedDateTime.now()));
        return map;
    }

    public String getId() {
        return id;
    }

    public Map getSource() {
        final Map map = new HashMap<>();
        map.put(FieldNames.TEXT, text);

        for (int i = 0; i < readings.length; i++) {
            final String[] values = readings[i] == null ? null : Arrays.stream(readings[i]).distinct().toArray(n -> new String[n]);
            map.put(FieldNames.READING_PREFIX + i, values);
        }

        map.put(FieldNames.FIELDS, fields);
        map.put(FieldNames.TAGS, tags);
        map.put(FieldNames.ROLES, roles);
        map.put(FieldNames.LANGUAGES, languages);
        map.put(FieldNames.KINDS, Stream.of(kinds).map(Kind::toString).toArray());
        map.put(FieldNames.QUERY_FREQ, queryFreq);
        map.put(FieldNames.DOC_FREQ, docFreq);
        map.put(FieldNames.USER_BOOST, userBoost);
        map.put(FieldNames.SCORE, (queryFreq + docFreq) * userBoost);
        map.put(FieldNames.TIMESTAMP, timestamp.toInstant().toEpochMilli());
        return map;
    }

    public static SuggestItem parseSource(final Map source) {
        final String text = source.get(FieldNames.TEXT).toString();
        final List readings = new ArrayList<>();
        for (int i = 0;; i++) {
            final Object readingObj = source.get(FieldNames.READING_PREFIX + i);
            if (!(readingObj instanceof List)) {
                break;
            }
            @SuppressWarnings("unchecked")
            final List list = (List) readingObj;
            readings.add(list.toArray(new String[list.size()]));
        }
        final List fields = SuggestUtil.getAsList(source.get(FieldNames.FIELDS));
        final long docFreq = Long.parseLong(source.get(FieldNames.DOC_FREQ).toString());
        final long queryFreq = Long.parseLong(source.get(FieldNames.QUERY_FREQ).toString());
        final float userBoost = Float.parseFloat(source.get(FieldNames.USER_BOOST).toString());
        final List tags = SuggestUtil.getAsList(source.get(FieldNames.TAGS));
        final List roles = SuggestUtil.getAsList(source.get(FieldNames.ROLES));
        final List languages = SuggestUtil.getAsList(source.get(FieldNames.LANGUAGES));
        final List kinds = SuggestUtil.getAsList(source.get(FieldNames.KINDS));
        final long timestamp = Long.parseLong(source.get(FieldNames.TIMESTAMP).toString());

        final SuggestItem item = new SuggestItem();
        item.text = text;
        item.readings = readings.toArray(new String[readings.size()][]);
        item.fields = fields.toArray(new String[fields.size()]);
        item.docFreq = docFreq;
        item.queryFreq = queryFreq;
        item.userBoost = userBoost;
        item.tags = tags.toArray(new String[tags.size()]);
        item.roles = roles.toArray(new String[roles.size()]);
        item.languages = languages.toArray(new String[languages.size()]);

        item.kinds = new Kind[kinds.size()];
        for (int i = 0; i < kinds.size(); i++) {
            final String kind = kinds.get(i);
            if (kind.equals(Kind.DOCUMENT.toString())) {
                item.kinds[i] = Kind.DOCUMENT;
            } else if (kind.equals(Kind.QUERY.toString())) {
                item.kinds[i] = Kind.QUERY;
            } else if (kind.equals(Kind.USER.toString())) {
                item.kinds[i] = Kind.USER;
            }
        }

        item.id = SuggestUtil.createSuggestTextId(item.text);
        item.timestamp = ZonedDateTime.ofInstant(Instant.ofEpochMilli(timestamp), Clock.systemDefaultZone().getZone());
        return item;
    }

    public Map getUpdatedSource(final Map existingSource) {
        final Map map = new HashMap<>();
        map.put(FieldNames.TEXT, text);

        for (int i = 0; i < readings.length; i++) {
            final Object readingObj = existingSource.get(FieldNames.READING_PREFIX + i);
            if (readingObj instanceof List) {
                @SuppressWarnings("unchecked")
                final List existingValues = (List) readingObj;
                concatValues(existingValues, readings[i]);
                map.put(FieldNames.READING_PREFIX + i, existingValues.stream().distinct().toList());
            } else {
                final String[] values = readings[i] == null ? null : Arrays.stream(readings[i]).distinct().toArray(n -> new String[n]);
                map.put(FieldNames.READING_PREFIX + i, values);
            }
        }

        final Object fieldsObj = existingSource.get(FieldNames.FIELDS);
        if (fieldsObj instanceof List) {
            @SuppressWarnings("unchecked")
            final List existingValues = (List) fieldsObj;
            concatValues(existingValues, fields);
            map.put(FieldNames.FIELDS, existingValues);
        } else {
            map.put(FieldNames.FIELDS, fields);
        }

        final Object tagsObj = existingSource.get(FieldNames.TAGS);
        if (tagsObj instanceof List) {
            @SuppressWarnings("unchecked")
            final List existingValues = (List) tagsObj;
            concatValues(existingValues, tags);
            map.put(FieldNames.TAGS, existingValues);
        } else {
            map.put(FieldNames.TAGS, tags);
        }

        final Object rolesObj = existingSource.get(FieldNames.ROLES);
        if (rolesObj instanceof List) {
            @SuppressWarnings("unchecked")
            final List existingValues = (List) rolesObj;
            concatValues(existingValues, roles);
            map.put(FieldNames.ROLES, existingValues);
        } else {
            map.put(FieldNames.ROLES, roles);
        }

        final Object langsObj = existingSource.get(FieldNames.LANGUAGES);
        if (langsObj instanceof List) {
            @SuppressWarnings("unchecked")
            final List existingValues = (List) langsObj;
            concatValues(existingValues, languages);
            map.put(FieldNames.LANGUAGES, existingValues);
        } else {
            map.put(FieldNames.LANGUAGES, languages);
        }

        final Object kindsObj = existingSource.get(FieldNames.KINDS);
        if (kindsObj instanceof List) {
            @SuppressWarnings("unchecked")
            final List existingFields = (List) kindsObj;
            concatValues(existingFields, Stream.of(kinds).map(Kind::toString).toArray(count -> new String[count]));
            map.put(FieldNames.KINDS, existingFields);
        } else {
            map.put(FieldNames.KINDS, Stream.of(kinds).map(Kind::toString).toArray());
        }

        final long updatedQueryFreq;
        final Object queryFreqObj = existingSource.get(FieldNames.QUERY_FREQ);
        if (queryFreqObj == null) {
            updatedQueryFreq = queryFreq;
        } else {
            final Long existingValue = Long.parseLong(queryFreqObj.toString());
            updatedQueryFreq = queryFreq + existingValue;
        }
        map.put(FieldNames.QUERY_FREQ, updatedQueryFreq);

        final long updatedDocFreq;
        final Object docFreqObj = existingSource.get(FieldNames.DOC_FREQ);
        if (docFreqObj == null) {
            updatedDocFreq = docFreq;
        } else {
            final Long existingValue = Long.parseLong(docFreqObj.toString());
            updatedDocFreq = docFreq + existingValue;
        }
        map.put(FieldNames.DOC_FREQ, updatedDocFreq);

        map.put(FieldNames.USER_BOOST, userBoost);
        map.put(FieldNames.SCORE, (updatedQueryFreq + updatedDocFreq) * userBoost);
        map.put(FieldNames.TIMESTAMP, timestamp.toInstant().toEpochMilli());
        return map;
    }

    protected static  void concatValues(final List dest, final T... newValues) {
        for (final T value : newValues) {
            if (!dest.contains(value)) {
                dest.add(value);
            }
        }
    }

    protected static Kind[] concatKinds(final Kind[] kinds, final Kind... newKinds) {
        if (kinds == null) {
            return newKinds;
        }
        if (newKinds == null) {
            return kinds;
        }

        final List list = new ArrayList<>(kinds.length + newKinds.length);
        list.addAll(Arrays.asList(kinds));
        for (final Kind kind : newKinds) {
            if (!list.contains(kind)) {
                list.add(kind);
            }
        }
        return list.toArray(new Kind[list.size()]);
    }

    public static SuggestItem merge(final SuggestItem item1, final SuggestItem item2) {
        if (!item1.getId().equals(item2.getId())) {
            throw new IllegalArgumentException("Item id is mismatch.");
        }

        final SuggestItem mergedItem = new SuggestItem();

        mergedItem.id = item1.getId();
        mergedItem.text = item1.getText();

        mergedItem.readings = new String[mergedItem.text.split(SuggestConstants.TEXT_SEPARATOR).length][];
        for (int i = 0; i < mergedItem.readings.length; i++) {
            final List list = new ArrayList<>();
            if (item1.getReadings().length > i) {
                Collections.addAll(list, item1.getReadings()[i]);
            }
            if (item2.getReadings().length > i) {
                for (final String reading : item2.getReadings()[i]) {
                    if (!list.contains(reading)) {
                        list.add(reading);
                    }
                }
            }
            mergedItem.readings[i] = list.toArray(new String[list.size()]);
        }

        final List fieldList = new ArrayList<>(item1.getFields().length + item2.getFields().length);
        Collections.addAll(fieldList, item1.getFields());
        for (final String field : item2.getFields()) {
            if (!fieldList.contains(field)) {
                fieldList.add(field);
            }
        }
        mergedItem.fields = fieldList.toArray(new String[fieldList.size()]);

        final List tagList = new ArrayList<>(item1.getTags().length + item2.getTags().length);
        Collections.addAll(tagList, item1.getTags());
        for (final String tag : item2.getTags()) {
            if (!tagList.contains(tag)) {
                tagList.add(tag);
            }
        }
        mergedItem.tags = tagList.toArray(new String[tagList.size()]);

        final List langList = new ArrayList<>(item1.getLanguages().length + item2.getLanguages().length);
        Collections.addAll(langList, item1.getLanguages());
        for (final String lang : item2.getLanguages()) {
            if (!langList.contains(lang)) {
                langList.add(lang);
            }
        }
        mergedItem.languages = langList.toArray(new String[langList.size()]);

        final List roleList = new ArrayList<>(item1.getRoles().length + item2.getRoles().length);
        Collections.addAll(roleList, item1.getRoles());
        for (final String role : item2.getRoles()) {
            if (!roleList.contains(role)) {
                roleList.add(role);
            }
        }
        mergedItem.roles = roleList.toArray(new String[roleList.size()]);

        mergedItem.kinds = concatKinds(item1.kinds, item2.kinds);
        mergedItem.timestamp = item2.timestamp;
        mergedItem.queryFreq = item1.queryFreq + item2.queryFreq;
        mergedItem.docFreq = item1.docFreq + item2.docFreq;
        mergedItem.userBoost = item2.userBoost;
        mergedItem.emptySource = item2.emptySource;

        return mergedItem;
    }

    public boolean isBadWord(final String[] badWords) {
        for (final String badWord : badWords) {
            if (text.contains(badWord)) {
                return true;
            }
        }
        return false;
    }

    @Override
    public String toString() {
        return "SuggestItem [text=" + text + ", timestamp=" + timestamp + ", queryFreq=" + queryFreq + ", docFreq=" + docFreq
                + ", userBoost=" + userBoost + ", readings=" + Arrays.toString(readings) + ", fields=" + Arrays.toString(fields) + ", tags="
                + Arrays.toString(tags) + ", roles=" + Arrays.toString(roles) + ", languages=" + Arrays.toString(languages) + ", kinds="
                + Arrays.toString(kinds) + ", emptySource=" + emptySource + ", id=" + id + "]";
    }

    private String convertJsonString(final String value) {
        return "\"" + value.replace("\"", "\\\"") + "\"";
    }

    private String convertJsonStrings(final String[] values) {
        if (values == null) {
            return "[]";
        }
        return "[" + Arrays.stream(values).map(this::convertJsonString).collect(Collectors.joining(",")) + "]";
    }

    public String toJsonString() {
        final StringBuilder buf = new StringBuilder();
        buf.append('{').append('"').append(FieldNames.TEXT).append("\":").append(convertJsonString(text));

        for (int i = 0; i < readings.length; i++) {
            final String[] values = readings[i] == null ? null : Arrays.stream(readings[i]).distinct().toArray(n -> new String[n]);
            buf.append(',').append('"').append(FieldNames.READING_PREFIX + i).append("\":").append(convertJsonStrings(values));
        }

        buf.append(',').append('"').append(FieldNames.FIELDS).append("\":").append(convertJsonStrings(fields));
        buf.append(',').append('"').append(FieldNames.TAGS).append("\":").append(convertJsonStrings(tags));
        buf.append(',').append('"').append(FieldNames.ROLES).append("\":").append(convertJsonStrings(roles));
        buf.append(',').append('"').append(FieldNames.LANGUAGES).append("\":").append(convertJsonStrings(languages));
        buf.append(',').append('"').append(FieldNames.KINDS).append("\":")
                .append(convertJsonStrings(Stream.of(kinds).map(Kind::toString).toArray(n -> new String[n])));
        buf.append(',').append('"').append(FieldNames.QUERY_FREQ).append("\":").append(queryFreq);
        buf.append(',').append('"').append(FieldNames.DOC_FREQ).append("\":").append(docFreq);
        buf.append(',').append('"').append(FieldNames.USER_BOOST).append("\":").append(userBoost);
        buf.append(',').append('"').append(FieldNames.SCORE).append("\":").append((queryFreq + docFreq) * userBoost);
        buf.append(',').append('"').append(FieldNames.TIMESTAMP).append("\":").append(timestamp.toInstant().toEpochMilli());
        return buf.append('}').toString();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy