All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.ner.grobid.GrobidNERecogniser Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.ner.grobid;

import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.tika.parser.ner.NERecogniser;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import java.io.IOException;
import java.util.*;

public class GrobidNERecogniser implements NERecogniser{

	private static final Logger LOG = LoggerFactory.getLogger(GrobidNERecogniser.class);
    private static boolean available = false;
    private static final String GROBID_REST_HOST = "http://localhost:8080";
    private String restHostUrlStr;
	
    /*
     * Useful Entities from Grobid NER 
     */
    public static final Set ENTITY_TYPES = new HashSet(){{
        add("MEASUREMENT_NUMBERS");
        add("MEASUREMENT_UNITS");
        add("MEASUREMENTS");
        add("NORMALIZED_MEASUREMENTS");
        add("MEASUREMENT_TYPES");
    }};


    public GrobidNERecogniser(){
        try {

	            String restHostUrlStr="";
	            try {
	                restHostUrlStr = readRestUrl();
	            } catch (IOException e) {
	            	LOG.warn("couldn't read rest url", e);

	            }

	            if (restHostUrlStr == null || restHostUrlStr.equals("")) {
	                this.restHostUrlStr = GROBID_REST_HOST;
	            } else {
	                this.restHostUrlStr = restHostUrlStr;
	            }

	            Response response = WebClient.create(restHostUrlStr).accept(MediaType.APPLICATION_JSON).get();
	            int responseCode = response.getStatus();
	            if(responseCode == 200){
	                available = true;
	            }
	            else{
	                LOG.info("Grobid REST Server is not running");
	            }
	
        	}
        	catch (Exception e) {
            	LOG.info(e.getMessage(), e);
        	}
    }

    /**
     * Reads the GROBID REST URL from the properties file
     * returns the GROBID REST URL
     */
    private static String readRestUrl() throws IOException {
    	Properties grobidProperties = new Properties();
        grobidProperties.load(GrobidNERecogniser.class.getResourceAsStream("GrobidServer.properties"));
        return grobidProperties.getProperty("grobid.server.url");
    }
    
    /**
     * Reads the GROBID REST Endpoint from the properties file
     * returns the GROBID REST Endpoint
     */
    private static String readRestEndpoint() throws IOException {
    	Properties grobidProperties = new Properties();
        grobidProperties.load(GrobidNERecogniser.class.getResourceAsStream("GrobidServer.properties"));
        return grobidProperties.getProperty("grobid.endpoint.text");
    }

    /**
     * @return {@code true} if server endpoint is available.
     * returns {@code false} if server endpoint is not avaliable for service.
     */
    public boolean isAvailable() {
        return available;
    }

    /**
     * Gets set of entity types recognised by this recogniser
     * @return set of entity classes/types
     */
    public Set getEntityTypes() {
        return ENTITY_TYPES;
    }

    /**
     * Converts JSON Object to JSON Array 
     * @return a JSON array
     */
    public JSONArray convertToJSONArray(JSONObject obj, String key){
    	JSONArray jsonArray = new JSONArray();
    	try{
    		jsonArray = (JSONArray) obj.get(key);
    	}
    	catch(Exception e){
    	   	LOG.info(e.getMessage(), e);
        }
    	return jsonArray;
    }
    
    /**
     * Parses a JSON String and converts it to a JSON Object 
     * @return a JSON Object
     */
    public JSONObject convertToJSONObject(String jsonString){
    	JSONParser parser = new JSONParser();
    	JSONObject jsonObject = new JSONObject();
    	try{
    		jsonObject = (JSONObject) parser.parse(jsonString);
    	}
    	catch(Exception e){
    	   	LOG.info(e.getMessage(), e);
        }
		return jsonObject;
    }
    /**
     * recognises names of entities in the text
     * @param text text which possibly contains names
     * @return map of entity type -> set of names
     */
    public Map> recognise(String text) {
       
    	Map> entities = new HashMap>();
        Set measurementNumberSet = new HashSet();
        Set unitSet = new HashSet();
        Set measurementSet = new HashSet();
        Set normalizedMeasurementSet = new HashSet();
        Set measurementTypeSet = new HashSet();
        
        try {
            String url = restHostUrlStr + readRestEndpoint();
            Response response = WebClient.create(url).accept(MediaType.APPLICATION_JSON).post("text=" + text);
            int responseCode = response.getStatus();
    
            if (responseCode == 200) {
                String result = response.readEntity(String.class);
                JSONObject jsonObject = convertToJSONObject(result);
                JSONArray measurements = convertToJSONArray(jsonObject, "measurements");
                for(int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy