All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.shef.dcs.sti.util.TripleGenerator Maven / Gradle / Ivy

The newest version!
package uk.ac.shef.dcs.sti.util;

import uk.ac.shef.dcs.kbsearch.model.Entity;
import uk.ac.shef.dcs.sti.core.model.*;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

/**
 */
public class TripleGenerator {

    private String kbNamespace;
    private String defaultNamespace;

    public TripleGenerator(String kbNamespace, String dummyNamespace) {
        this.kbNamespace = kbNamespace;
        this.defaultNamespace = dummyNamespace;
    }

    public List generate_newTriples(TAnnotation tab_annotation, Table table) {
        List result = new ArrayList<>();

        //column typing instances
        for (int col = 0; col < table.getNumCols(); col++) {
            TColumnHeader header = table.getColumnHeader(col);

            List bestHeaderAnnotations = tab_annotation.getWinningHeaderAnnotations(col);
            if (bestHeaderAnnotations.size() == 0)
                continue;


            for (int row = 0; row < table.getNumRows(); row++) {
                for (TColumnHeaderAnnotation final_type_for_the_column : bestHeaderAnnotations) {

                    /*if (final_type_for_the_column.getSupportingRows().contains(row))
                        continue;*/
                    TCell tcc = table.getContentCell(row, col);
                    TCellAnnotation[] cell_annotations = tab_annotation.getContentCellAnnotations(row, col);
                    if (cell_annotations == null || cell_annotations.length == 0) //no entity found for this cell
                        continue;

                    TCellAnnotation final_cell_annotation = cell_annotations[0];
                    Entity entity = final_cell_annotation.getAnnotation();

                    //new triple
                    TableTriple ltt = new TableTriple();
                    ltt.setSubject_position(new int[]{row, col});
                    ltt.setSubject(tcc.getText());
                    ltt.setSubject_annotation(kbNamespace + entity.getId());
                    ltt.setObject(header.getHeaderText());
                    ltt.setObject_annotation(kbNamespace + final_type_for_the_column.getAnnotation().getId());
                    ltt.setObject_position(new int[]{-1, -1});
                    ltt.setRelation_annotation("rdf:type");
                    result.add(ltt);
                }
            }
        }

        //across column relations at each row
        List related_columns_with_subject = new ArrayList();
        int main_subject_column = 0;
        Map>
                relations_across_columns = tab_annotation.getColumncolumnRelations();
        for (Map.Entry> entry :
                relations_across_columns.entrySet()) {
            RelationColumns the_two_columns = entry.getKey();
            int subCol = the_two_columns.getSubjectCol();
            int objCol = the_two_columns.getObjectCol();
            related_columns_with_subject.add(objCol);
            main_subject_column = subCol;

            Collections.sort(entry.getValue());
            TColumnColumnRelationAnnotation relation_annotation = entry.getValue().get(0);

            for (int row = 0; row < table.getNumRows(); row++) {
                if (relation_annotation.getSupportingRows().contains(row))
                    continue;

                TCell subject_cell = table.getContentCell(row, subCol);
                TCell object_cell = table.getContentCell(row, objCol);
                TCellAnnotation[] subject_cell_annotations = tab_annotation.getContentCellAnnotations(row, subCol);
                if (subject_cell_annotations == null || subject_cell_annotations.length == 0)
                    continue;
                TCellAnnotation final_subject_cell_annotation = subject_cell_annotations[0];
                TCellAnnotation[] object_cell_annotations = tab_annotation.getContentCellAnnotations(row, objCol);
                TCellAnnotation final_object_cell_annotation = object_cell_annotations == null || object_cell_annotations.length == 0 ? null : object_cell_annotations[0];

                TableTriple triple = new TableTriple();
                triple.setSubject_position(new int[]{row, subCol});
                triple.setSubject(final_subject_cell_annotation.getTerm());
                triple.setSubject_annotation(kbNamespace + final_subject_cell_annotation.getAnnotation().getId());

                triple.setObject_position(new int[]{row, objCol});
                if (final_object_cell_annotation != null) {
                    triple.setObject_annotation(kbNamespace + final_object_cell_annotation.getAnnotation().getId());
                    triple.setObject(final_object_cell_annotation.getTerm());
                } else {
                    triple.setObject_annotation("'" + object_cell.getText() + "'");
                    triple.setObject(object_cell.getText());
                }
                triple.setRelation_annotation(kbNamespace + relation_annotation.getRelationURI());
                result.add(triple);

            }
        }

        //remaining columns with subject column create dummy relations
        for (int col = 0; col < table.getNumCols(); col++) {
            if (col == main_subject_column || related_columns_with_subject.contains(col))
                continue;

            TColumnHeader header = table.getColumnHeader(col);
            if (header!=null&&header.getTypes() != null) {
                if (header.getTypes().get(0).getType().equals(DataTypeClassifier.DataType.ORDERED_NUMBER))
                    continue;
            }
            else{
                continue;
            }

            for (int row = 0; row < table.getNumRows(); row++) {
                TCell subject_cell = table.getContentCell(row, main_subject_column);
                TCell object_cell = table.getContentCell(row, col);
                TCellAnnotation[] subject_cell_annotations = tab_annotation.getContentCellAnnotations(row, main_subject_column);
                if (subject_cell_annotations == null || subject_cell_annotations.length == 0)
                    continue;
                TCellAnnotation final_subject_cell_annotation = subject_cell_annotations[0];

                TableTriple triple = new TableTriple();
                triple.setSubject_position(new int[]{row, main_subject_column});
                triple.setSubject(final_subject_cell_annotation.getTerm());
                triple.setSubject_annotation(kbNamespace + final_subject_cell_annotation.getAnnotation().getId());

                triple.setObject_position(new int[]{row, col});
                triple.setObject(object_cell.getText());
                triple.setObject_annotation("'" + object_cell.getText() + "'");
                triple.setRelation_annotation(defaultNamespace + "/" + header.getHeaderText());
                result.add(triple);
            }
        }

        return result;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy