All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.visallo.phoneNumber.PhoneNumberGraphPropertyWorker Maven / Gradle / Ivy

The newest version!
package org.visallo.phoneNumber;

import com.google.common.base.Charsets;
import com.google.common.io.CharStreams;
import com.google.i18n.phonenumbers.PhoneNumberMatch;
import com.google.i18n.phonenumbers.PhoneNumberUtil;
import org.vertexium.Element;
import org.vertexium.Property;
import org.vertexium.Vertex;
import org.visallo.core.ingest.graphProperty.GraphPropertyWorkData;
import org.visallo.core.ingest.graphProperty.GraphPropertyWorker;
import org.visallo.core.ingest.graphProperty.GraphPropertyWorkerPrepareData;
import org.visallo.core.model.Description;
import org.visallo.core.model.Name;
import org.visallo.core.model.properties.VisalloProperties;
import org.visallo.core.model.termMention.TermMentionBuilder;
import org.visallo.core.util.VisalloLogger;
import org.visallo.core.util.VisalloLoggerFactory;
import org.visallo.web.clientapi.model.VisibilityJson;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import static org.vertexium.util.IterableUtils.count;
import static org.visallo.core.model.ontology.OntologyRepository.PUBLIC;

@Name("Phone Number Extractor")
@Description("Extracts phone numbers from text")
public class PhoneNumberGraphPropertyWorker extends GraphPropertyWorker {
    private static final VisalloLogger LOGGER = VisalloLoggerFactory.getLogger(PhoneNumberGraphPropertyWorker.class);
    public static final String PHONE_NUMBER_CONCEPT_INTENT = "phoneNumber";
    private static final String DEFAULT_REGION_CODE = "phoneNumber.defaultRegionCode";
    private static final String DEFAULT_DEFAULT_REGION_CODE = "US";

    private final PhoneNumberUtil phoneNumberUtil = PhoneNumberUtil.getInstance();
    private String defaultRegionCode;
    private String publicEntityType;

    @Override
    public void prepare(GraphPropertyWorkerPrepareData workerPrepareData) throws Exception {
        super.prepare(workerPrepareData);

        defaultRegionCode = (String) workerPrepareData.getConfiguration().get(DEFAULT_REGION_CODE);
        if (defaultRegionCode == null) {
            defaultRegionCode = DEFAULT_DEFAULT_REGION_CODE;
        }

        publicEntityType = getOntologyRepository().getRequiredConceptIRIByIntent(PHONE_NUMBER_CONCEPT_INTENT, PUBLIC);
    }

    @Override
    public void execute(InputStream in, GraphPropertyWorkData data) throws Exception {
        LOGGER.debug("Extracting phone numbers from provided text");

        final String text = CharStreams.toString(new InputStreamReader(in, Charsets.UTF_8));

        Vertex outVertex = (Vertex) data.getElement();
        VisibilityJson visibilityJson = VisalloProperties.VISIBILITY_JSON.getPropertyValue(outVertex);
        final Iterable phoneNumbers = phoneNumberUtil.findNumbers(text, defaultRegionCode);
        List termMentions = new ArrayList<>();
        for (final PhoneNumberMatch phoneNumber : phoneNumbers) {
            final String formattedNumber = phoneNumberUtil.format(phoneNumber.number(), PhoneNumberUtil.PhoneNumberFormat.E164);
            int start = phoneNumber.start();
            int end = phoneNumber.end();

            Vertex termMention = new TermMentionBuilder()
                    .outVertex(outVertex)
                    .propertyKey(data.getProperty().getKey())
                    .propertyName(data.getProperty().getName())
                    .start(start)
                    .end(end)
                    .title(formattedNumber)
                    .conceptIri(publicEntityType)
                    .visibilityJson(visibilityJson)
                    .process(getClass().getName())
                    .save(getGraph(), getVisibilityTranslator(), getUser(), getAuthorizations());
            termMentions.add(termMention);
        }
        getGraph().flush();
        applyTermMentionFilters(outVertex, termMentions);
        pushTextUpdated(data);

        LOGGER.debug("Number of phone numbers extracted: %d", count(phoneNumbers));
    }

    @Override
    public boolean isHandled(Element element, Property property) {
        if (property == null) {
            return false;
        }

        if (property.getName().equals(VisalloProperties.RAW.getPropertyName())) {
            return false;
        }

        String mimeType = VisalloProperties.MIME_TYPE_METADATA.getMetadataValue(property.getMetadata(), null);
        return !(mimeType == null || !mimeType.startsWith("text"));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy