All Downloads are FREE. Search and download functionalities are using the official Maven repository.

lv.lnb.ner.NECounterFull Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright 2012 Institute of Mathematics and Computer Science, University of Latvia
 * Author: Pēteris Paikens
 * 
 *     This program is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * 
 *     This program is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 * 
 *     You should have received a copy of the GNU General Public License
 *     along with this program.  If not, see .
 *******************************************************************************/
package lv.lnb.ner;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;

public class NECounterFull {
	private HashMap entities = new HashMap();
	private HashMap count = new HashMap();
	private HashMap> documents = new HashMap>();
	
	void add(String document, String name, String normalform, String tag) {
		NE entity = entities.get(tag+"|"+normalform);
		if (entity==null) {
			entity = new NE(name, normalform, tag);
			entities.put(tag+"|"+normalform, entity);
		}
		entity.add(name, document);
				
		HashMap doc = documents.get(document);
		if (doc == null) {
			doc = new HashMap();
			documents.put(document,doc);
		}
		
		Integer prevcount = count.get(entity);
		if (prevcount == null) prevcount = 0;
		count.put(entity,prevcount+1);
		
		prevcount = doc.get(entity);
		if (prevcount == null) prevcount = 0;
		doc.put(entity,prevcount+1);
	}
	
	void show() {
		List sorted = new LinkedList( new TreeSet(count.keySet())); 
		for (NE entity : sorted) {
			//System.out.printf("%s\t%s\t%d\n%s",entity.tag, entity.normalform, count.get(entity), entity.alternatives());
			System.out.printf("%s\t%s\t%d\n%s",entity.tag, entity.normalform, count.get(entity), "");
		}		
	}
	
	void db_insert() {
		String infoSource = "PP imports 27jun";
		for (NE entity : count.keySet()) {
			System.out.printf("insert into entity (categoryid, definition, infoSource) values (%d, \"%s\", \"%s\");\n", entity.category(), entity.getName(), infoSource);
			System.out.printf("insert into name (name, infoSource) values (\"%s\", \"%s\");\n", entity.getName(), infoSource);
			System.out.printf("insert into entityName(nameID, entityID, infoSource) select LAST_INSERT_ID() as nameID, ID as entityID, infoSource from entity where infoSource=\"%s\" and definition=\"%s\";\n", entity.getName(), infoSource);
		}
		
		for (Map.Entry> doc : documents.entrySet()) {
			String url = "http://www.periodika.lv/periodika2-viewer/view/index-dev.html#panel:pp|issue:/" + doc.getKey();
			System.out.printf("insert into document (reference, infoSource) values (\"%s\",\"PP imports\");\n",url);
			for (Map.Entry entry : doc.getValue().entrySet()) {
				System.out.printf("insert into nameDocument (nameID, documentID, occurrences, infoSource) select min(name.ID) as nameID, min(document.ID) as documentID, %d as occurrences, \"PP imports\" from name, document where name = \"%s\" and reference = \"%s\";\n", 
						entry.getValue(), entry.getKey().getName(), url);
			}
		}
	}
	
	void filter(int limit) {
		HashMap newentities = new HashMap();
		for (Map.Entry entry : entities.entrySet()) {
			NE entity = entry.getValue();
			int num = count.get(entity);
			if (num doc = documents.get(document);
					if (doc != null) doc.remove(entity);
				}
			} else newentities.put(entry.getKey(), entry.getValue());				
		}
		entities = newentities;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy