Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
uk.ac.shef.dcs.sti.io.TAnnotationWriter Maven / Gradle / Ivy
package uk.ac.shef.dcs.sti.io;
import uk.ac.shef.dcs.sti.util.TripleGenerator;
import uk.ac.shef.dcs.sti.util.DataTypeClassifier;
import uk.ac.shef.dcs.sti.core.model.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
*/
public class TAnnotationWriter {
protected String linkPrefix = "http://www.freebase.com";
protected boolean showLosingCandidates = true;
protected TripleGenerator tripleGenerator;
public TAnnotationWriter(TripleGenerator tripleGenerator) {
this.tripleGenerator = tripleGenerator;
}
public void writeHTML(Table table, TAnnotation tab_annotations, String outFile) throws FileNotFoundException {
StringBuilder table_sb = new StringBuilder();
table_sb.append("\n");
String sourceId = table.getSourceId();
sourceId = sourceId.replaceAll("\\\\", "/");
int trimStart = sourceId.lastIndexOf("/");
int trimEnd = sourceId.lastIndexOf(".htm");
trimStart = trimStart == -1 ? 0 : trimStart;
trimEnd = trimEnd == -1 ? sourceId.length() : trimEnd;
sourceId = sourceId.substring(trimStart + 1, trimEnd);
List triples = tripleGenerator.generate_newTriples(tab_annotations, table);
table_sb.append("source:" + sourceId + " with " + triples.size() + " new attributes .");
writeTriples(triples, outFile + ".triples");
table_sb.append("Table column types and entity disambiguation \n");
table_sb.append("");
table_sb.append(writeHeader(table, tab_annotations));
table_sb.append(writeCell(table, tab_annotations));
table_sb.append("
");
table_sb.append("Table subject column and binary relations ");
table_sb.append("");
table_sb.append(writeRelation_inHeader(table, tab_annotations));
table_sb.append(writeRelation_inCell(table, tab_annotations));
table_sb.append("
\n");
table_sb.append("");
PrintWriter p = new PrintWriter(outFile);
p.println(table_sb.toString());
p.close();
File f = new File(outFile);
String header_key = f.getPath() + ".header.keys";
String relation_key = f.getPath() + ".relation.keys";
String cell_key = f.getPath() + ".cell.keys";
writeHeaderKeyFile(table, tab_annotations, header_key);
writeRelationKeyFile(tab_annotations, relation_key);
writeCellKeyFile(table, tab_annotations, cell_key);
}
protected void writeCellKeyFile(Table table, TAnnotation table_annotation, String cell_key) throws FileNotFoundException {
PrintWriter p = new PrintWriter(cell_key);
for (int r = 0; r < table.getNumRows(); r++) {
for (int c = 0; c < table.getNumCols(); c++) {
TCellAnnotation[] cans = table_annotation.getContentCellAnnotations(r, c);
if (cans != null && cans.length > 0) {
StringBuilder s = new StringBuilder();
s.append(r).append(",").append(c).append("=");
double prevScore=0.0;
for (TCellAnnotation ca : cans) {
if (prevScore == 0.0) {
s.append(ca.getAnnotation().getId());
prevScore=ca.getFinalScore();
}
else{
if(ca.getFinalScore()==prevScore){
s.append("=").append(ca.getAnnotation().getId());
}
else
s.append("|").append(ca.getAnnotation().getId());
}
}
p.println(s.toString());
}
}
}
p.close();
}
protected void writeRelationKeyFile(TAnnotation table_annotation, String relation_key) throws FileNotFoundException {
PrintWriter p = new PrintWriter(relation_key);
for (Map.Entry> e :
table_annotation.getColumncolumnRelations().entrySet()) {
int subCol = e.getKey().getSubjectCol();
int objCol = e.getKey().getObjectCol();
List relations = e.getValue();
Collections.sort(relations);
StringBuilder s = new StringBuilder();
double prevScore=0.0;
for (TColumnColumnRelationAnnotation hr : relations) {
if (prevScore == 0.0) {
s.append(hr.getRelationURI());
prevScore=hr.getFinalScore();
}
else{
if(hr.getFinalScore()==prevScore){
s.append("=").append(hr.getRelationURI());
}
else
s.append("|").append(hr.getRelationURI());
}
}
p.println(subCol + "," + objCol + "=" + s.toString());
}
p.close();
}
protected void writeHeaderKeyFile(Table table, TAnnotation table_annotation, String header_key) throws FileNotFoundException {
PrintWriter p = new PrintWriter(header_key);
for (int c = 0; c < table.getNumCols(); c++) {
TColumnHeaderAnnotation[] anns = table_annotation.getHeaderAnnotation(c);
if (anns != null && anns.length > 0) {
StringBuilder s = new StringBuilder();
s.append(c).append("=");
double prevScore = 0.0;
for (TColumnHeaderAnnotation ha : anns) {
if (prevScore == 0.0) {
s.append(ha.getAnnotation().getId());
prevScore=ha.getFinalScore();
}
else{
if(ha.getFinalScore()==prevScore){
s.append("=").append(ha.getAnnotation().getId());
}
else
s.append("|").append(ha.getAnnotation().getId());
}
}
if(table.getColumnHeader(c).getFeature().getMostFrequentDataType().getType().equals(
DataTypeClassifier.DataType.NAMED_ENTITY
))
s.append("\t\t\t___NE");
p.println(s.toString());
}
}
p.close();
}
protected void writeTriples(List triples, String outFile) throws FileNotFoundException {
PrintWriter p = new PrintWriter(outFile);
p.println("");
for (TableTriple ltt : triples) {
p.println(" <" + ltt.getSubject_annotation() + "," + ltt.getRelation_annotation() + "," + ltt.getObject_annotation() + ">, " +
"(" + ltt.getSubject() + "," + ltt.getObject() + "), " + "[" + ltt.getSubject_position()[0] + "," + ltt.getSubject_position()[1] + "][" +
ltt.getObject_position()[0] + "," + ltt.getObject_position()[1] + "]");
}
p.println("");
p.close();
}
protected String writeRelation_inCell(Table table, TAnnotation tab_annotations) {
StringBuilder out = new StringBuilder();
out.append("\n");
for (int row = 0; row < table.getNumRows(); row++) {
for (int col = 0; col < table.getNumCols(); col++) {
String color = col == tab_annotations.getSubjectColumn() ? " bgcolor=\"yellow\"" : "";
TCell cell = table.getContentCell(row, col);
out.append("\t").append(cell.getText()).append(cell.getOtherText()).append(" \n");
//then annotations
if (col == tab_annotations.getSubjectColumn()) {
out.append("\t-");
else {
annotation.append(" bgcolor=\"#00FF00\">");
for (int i = 0; i < cAnns.length; i++) {
TCellAnnotation cAnn = cAnns[i];
if (i == 0) { //the winning annotation
annotation.append("").append(generateCellAnnotationString(cAnn)).append(" ");
} else if (showLosingCandidates) { //others
annotation.append("").
append(generateCellAnnotationString(cAnn)).append(" ");
}
}
}
annotation.append("\t \n");
out.append(annotation);
} else {
out.append("\t> tmp = tab_annotations.getRelationAnnotationsBetween(key.getSubjectCol(), key.getObjectCol());
if (tmp == null) {
annotation.append(">-");
annotation.append(" \n");
out.append(annotation);
continue;
}
List crAnns = tmp.get(row);
if (crAnns == null)
annotation.append(">-");
else {
Collections.sort(crAnns);
annotation.append(" bgcolor=\"#00FF00\">");
for (int i = 0; i < crAnns.size(); i++) {
TCellCellRelationAnotation crAnn = crAnns.get(i);
if (i == 0) { //the winning annotation
annotation.append("").append(generateAcrossCellRelationString(crAnn)).append(" ");
} else if (showLosingCandidates) { //others
annotation.append("").
append(generateAcrossCellRelationString(crAnn)).append(" ");
}
}
}
annotation.append("\t\n");
out.append(annotation);
}
}
out.append(" \n");
}
return out.toString();
}
protected String writeRelation_inHeader(Table table, TAnnotation tab_annotations) {
StringBuilder out = new StringBuilder();
out.append("\n");
for (int col = 0; col < table.getNumCols(); col++) {
String color = col == tab_annotations.getSubjectColumn() ? " bgcolor=\"yellow\"" : "";
TColumnHeader header = table.getColumnHeader(col);
if(header==null)
continue;
out.append("\t").append(header.getHeaderText()).append(" \n");
//then annotations
if (col == tab_annotations.getSubjectColumn()) {
out.append("\t-");
else {
annotation.append(" bgcolor=\"#00FF00\">");
double best_score = 0.0;
for (int i = 0; i < hAnns.length; i++) {
TColumnHeaderAnnotation hAnn = hAnns[i];
if (i == 0) { //the winning annotation
annotation.append("").append(generateHeaderAnnotationString(hAnn)).append(" ");
best_score = hAnn.getFinalScore();
} else if (hAnn.getFinalScore() == best_score) {
annotation.append("").append(generateHeaderAnnotationString(hAnn)).append(" ");
} else if (showLosingCandidates) { //others
annotation.append("").
append(generateHeaderAnnotationString(hAnn)).append(" ");
}
}
}
annotation.append("\t \n");
out.append(annotation);
} else {
out.append("\t hAnns = tab_annotations.getColumncolumnRelations().get(key);
if (hAnns == null)
annotation.append(">-");
else {
Collections.sort(hAnns);
annotation.append(" bgcolor=\"#00FF00\">");
for (int i = 0; i < hAnns.size(); i++) {
TColumnColumnRelationAnnotation hAnn = hAnns.get(i);
if (i == 0) { //the winning annotation
annotation.append("").append(generateAcrossHeaderRelationString(hAnn)).append(" ");
} else if (showLosingCandidates) { //others
annotation.append("").
append(generateAcrossHeaderRelationString(hAnn)).append(" ");
}
}
}
annotation.append("\t \n");
out.append(annotation);
}
}
out.append(" \n");
return out.toString();
}
protected String writeHeader(Table table, TAnnotation tab_annotations) {
StringBuilder out = new StringBuilder();
out.append("\n");
for (int col = 0; col < table.getNumCols(); col++) {
TColumnHeader header = table.getColumnHeader(col);
if(header==null)
continue;
out.append("\t").append(header.getHeaderText()).append(" \n");
//then annotations
out.append("\t-");
else {
annotation.append(" bgcolor=\"#00FF00\">");
double best_score = 0.0;
for (int i = 0; i < hAnns.length; i++) {
TColumnHeaderAnnotation hAnn = hAnns[i];
if (i == 0) { //the winning annotation
annotation.append("").append(generateHeaderAnnotationString(hAnn)).append(" ");
best_score = hAnn.getFinalScore();
} else if (hAnn.getFinalScore() == best_score) {
annotation.append("").append(generateHeaderAnnotationString(hAnn)).append(" ");
} else if (showLosingCandidates) { //others
annotation.append("").
append(generateHeaderAnnotationString(hAnn)).append(" ");
}
}
}
annotation.append("\t \n");
out.append(annotation);
}
out.append(" \n");
return out.toString();
}
protected String writeCell(Table table, TAnnotation tab_annotations) {
StringBuilder out = new StringBuilder();
for (int row = 0; row < table.getNumRows(); row++) {
out.append("\n");
for (int col = 0; col < table.getNumCols(); col++) {
TCell tcc = table.getContentCell(row, col);
out.append("\t").append(tcc.getText()).append(tcc.getOtherText()).append("").append(" [").
append(tcc.getType().getValue()).append("] ").
append(" \n");
//then annotations
out.append("\t-");
else {
annotation.append(" bgcolor=\"#00FF00\">");
for (int i = 0; i < cAnns.length; i++) {
TCellAnnotation cAnn = cAnns[i];
if (i == 0) { //the winning annotation
annotation.append("").append(generateCellAnnotationString(cAnn)).append(" ");
} else if (showLosingCandidates) { //others
annotation.append("").
append(generateCellAnnotationString(cAnn)).append(" ");
}
}
}
annotation.append("\t \n");
out.append(annotation);
}
out.append(" \n");
}
return out.toString();
}
protected Object generateCellAnnotationString(TCellAnnotation cAnn) {
StringBuilder sb = new StringBuilder();
sb.append("").
append(cAnn.getAnnotation().getLabel()).append(" ").
append("=").append(Math.round(cAnn.getFinalScore() * 100.0) / 100.0).append(cAnn.getAnnotation().getTypeIds());
return sb.toString();
}
protected String generateHeaderAnnotationString(TColumnHeaderAnnotation ha) {
StringBuilder sb = new StringBuilder();
sb.append("").
append(ha.getAnnotation().getId()).append("(").append(ha.getAnnotation().getId()).append(") ").
append("=").append(Math.round(ha.getFinalScore() * 100.0) / 100.0).append(ha.getSupportingRows());
return sb.toString();
}
protected String generateAcrossHeaderRelationString(TColumnColumnRelationAnnotation ha) {
StringBuilder sb = new StringBuilder();
sb.append("").
append(ha.getRelationURI()).
append(" ").
append("=").append(Math.round(ha.getFinalScore() * 100.0) / 100.0).append(ha.getSupportingRows());
return sb.toString();
}
protected String generateAcrossCellRelationString(TCellCellRelationAnotation ca) {
StringBuilder sb = new StringBuilder();
sb.append("").
append(ca.getRelationURI()).
append(" ").
append("=").append(Math.round(ca.getWinningAttributeMatchScore() * 100.0) / 100.0);
return sb.toString();
}
}