All Downloads are FREE. Search and download functionalities are using the official Maven repository.

sp2.bin-ext.rdf2schema.pl Maven / Gradle / Ivy

There is a newer version: 0.9.8
Show newest version
#!/usr/bin/perl

use strict;

### check cmd line args
if ($#ARGV!=1) {
    &abort("Main","Wrong number of arguments; " 
                . "expecting --mysql/--oracle/--monet and file");
} elsif (!($ARGV[0] eq "--mysql"
        || $ARGV[0] eq "--oracle"
        || $ARGV[0] eq "--monet")) {
    &abort("Main","Illegal first argument; expected --mysql/--oracle/--monet");
}


### global variables
my $last_refblank_id;
my $last_refblank;

###############################################################################
############# STEP 0: process cmd line and prepare
my $TYPE_MYSQL=0;
my $TYPE_ORACLE=1;
my $TYPE_MONET=2;

my $db_type;
if ($ARGV[0] eq "--mysql") {
    $db_type=$TYPE_MYSQL;
} elsif ($ARGV[0] eq "--oracle") {
    $db_type=$TYPE_ORACLE;
} elsif ($ARGV[0] eq "--monet") {
    $db_type=$TYPE_MONET;
}

my $infile=$ARGV[1];
my $outfile="./out.sql"; # fixed
&abort("Main","Input file '$infile' could not be opened.")
    if (!open(IN,"<$infile")); 
&abort("Main","Output file '$outfile' could not be opened.")
    if (!open(OUT,">$outfile"));


###############################################################################
########### STEP 1: create tables
my $shortstring;
my $mediumstring;
my $largestring;
my $integer;
if ($db_type==$TYPE_MYSQL) {
    $shortstring="VARCHAR(100)";
    $mediumstring="VARCHAR(200)";
    $largestring="VARCHAR(20000)";
    $integer="INTEGER";
} elsif ($db_type==$TYPE_ORACLE) {
    $shortstring="VARCHAR(100)";
    $mediumstring="VARCHAR(200)";
    $largestring="CLOB";
    $integer="INTEGER";
} elsif ($db_type==$TYPE_MONET) {
    $shortstring="VARCHAR(100)";
    $mediumstring="VARCHAR(500)";
    $largestring="VARCHAR(20000)";
    $integer="INTEGER";
} else {
    &abort("Main","Undefined SQL database type");
}


### here we store delayed editors
# (sometimes editors are written before the corresponding person is defined;
#  in this case, we have to insert them right at the end of processing)
my @delay;

my %venue_types=(
    "journal"     => 0,
    "proceedings" => 1);

my %publication_types=(
    "article"       => 0,
    "inproceedings" => 1,
    "book"          => 2,
    "incollection"  => 3,
    "www"           => 4,
    "phdthesis"     => 5,
    "mastersthesis" => 6);


#######################
###### TABLE DEFINITION
##################################################################
### Doc table (and corresponding multi attribute tables)
my $tbl_doc="Document";
my %doc_attributes=(
    "ID"        => $integer,
    "address"   => $shortstring,
    "booktitle" => $shortstring,
    "isbn"      => $shortstring,
    "issued"    => $integer,
    "mnth"      => $integer,
    "note"      => $mediumstring,
    "nr"        => $integer,
    "publisher" => $shortstring,
    "series"    => $shortstring,
    "stringid"  => $shortstring,
    "title"     => $mediumstring,
    "volume"    => $integer);
my $doc_attributes_pkey="ID";
my %doc_multi_attributes=(
    "homepage" => $mediumstring,
    "seeAlso"  => $mediumstring);
my @doc_fk=();

##################################################################
### Venue type table
my $tbl_venue_type="VenueType";
my %venue_type_attributes=(
    "ID"   => $integer,
    "name" => $shortstring);
my $venue_type_attributes_pkey="ID";

##################################################################
### Venue table (and corresponding multi attribute tables)
my $tbl_venue="Venue";
my %venue_attributes=(
    "ID"            => $integer,
    "fk_document"   => $integer,
    "fk_venue_type" => $integer);
my $venue_attributes_pkey="ID";
my %venue_multi_attributes=();
my %venue_fk1=(
    "DEST"        => $tbl_doc,
    "fk_document" => "ID");
my %venue_fk2=(
    "DEST"          => $tbl_venue_type,
    "fk_venue_type" => "ID");
my @venue_fk=(\%venue_fk1,\%venue_fk2);


##################################################################
### Publication type table
my $tbl_publication_type="PublicationType";
my %publication_type_attributes=(
    "ID"   => $integer,
    "name" => $shortstring);
my $publication_type_attributes_pkey="ID";


##################################################################
### Publication table (and corresponding multi attribute tables)
my $tbl_publication="Publication";
my %publication_attributes=(
    "ID"                  => $integer,
    "chapter"             => $integer,
    "fk_document"         => $integer,
    "fk_publication_type" => $integer,
    "fk_venue"            => $integer,
    "pages"               => $integer);
my $publication_attributes_pkey="ID";
my %publication_multi_attributes=(
    "cdrom" => $mediumstring
);    
my %publication_fk1=(
    "DEST"        => $tbl_doc,
    "fk_document" => "ID");
my %publication_fk2=(
    "DEST"                => $tbl_publication_type,
    "fk_publication_type" => "ID");
my %publication_fk3=(
    "DEST"     => $tbl_venue,
    "fk_venue" => "ID");
my @publication_fk=(\%publication_fk1,\%publication_fk2,
                    \%publication_fk3);


##################################################################
### Peron table
my $tbl_person="Person";
my %person_attributes=(
    "ID"       => $integer,
    "name"     => $shortstring,
    "stringid" => $shortstring);
my $person_attributes_pkey="ID";


##################################################################
### References table
my $tbl_references="Reference";
my %references_attributes=(
    "fk_from" => $integer,
    "fk_to"   => $integer);
my $references_attributes_pkey=0; # external primary key
my %references_fk1=(
    "DEST"    => $tbl_publication,
    "fk_from" => "ID");
my %references_fk2=(
    "DEST"  => $tbl_publication,
    "fk_to" => "ID");
my @references_fk=(\%references_fk1,\%references_fk2);


##################################################################
### Abstract table
my $tbl_abstract="Abstract";
my %abstract_attributes=(
    "fk_publication" => $integer,
    "txt"            => $largestring);
my $abstract_attributes_pkey="fk_publication";
my %abstract_fk1=(
    "DEST"           => $tbl_publication,
    "fk_publication" => "ID");
my @abstract_fk=(\%abstract_fk1);


##################################################################
### Author table
my $tbl_author="Author";
my %author_attributes=(
    "fk_person"      => $integer,
    "fk_publication" => $integer);
my $author_attributes_pkey="fk_person,fk_publication";
my %author_fk1=(
    "DEST"      => $tbl_person,
    "fk_person" => "ID");
my %author_fk2=(
    "DEST"           => $tbl_publication,
    "fk_publication" => "ID");
my @author_fk=(\%author_fk1,\%author_fk2);


##################################################################
### Editor table
my $tbl_editor="Editor";
my %editor_attributes=(
    "fk_document" => $integer,
    "fk_person"   => $integer);
my $editor_attributes_pkey="fk_person,fk_document";
my %editor_fk1=(
    "DEST"      => $tbl_person,
    "fk_person" => "ID");
my %editor_fk2=(
    "DEST"        => $tbl_doc,
    "fk_document" => "ID");
my @editor_fk=(\%editor_fk1,\%editor_fk2);


#####################
###### Table creation
### Document table (and corresponding multi attribute tables)
my $sql_document=
    &createTableSQL($tbl_doc,\%doc_attributes,
                    $doc_attributes_pkey,\@doc_fk);
my $sql_repeat_document=
    &createRepeatTables($tbl_doc,
            "fk_document","$integer",
            \%doc_multi_attributes);

### Publication table (and corresponding multi attribute tables)
my $sql_publication=
    &createTableSQL($tbl_publication,\%publication_attributes,
                    $publication_attributes_pkey,\@publication_fk);
my $sql_repeat_publication=
    &createRepeatTables($tbl_publication,
            "fk_publication","$integer",
            \%publication_multi_attributes);

### Publication type table
my $sql_publication_type=
    &createTableSQL($tbl_publication_type,\%publication_type_attributes,
                    $publication_type_attributes_pkey,);

### Venue table (and corresponding multi attribute tables)
my $sql_venue=
    &createTableSQL($tbl_venue,\%venue_attributes,
                    $venue_attributes_pkey,\@venue_fk);
my $sql_repeat_venue=
    &createRepeatTables($tbl_venue,
                        "fk_venue","$integer",
                        \%venue_multi_attributes);

### Venue type table
my $sql_venue_type=
    &createTableSQL($tbl_venue_type,\%venue_type_attributes,
                    $venue_type_attributes_pkey);

### Peron table
my $sql_person=
    &createTableSQL($tbl_person,\%person_attributes,
                    $person_attributes_pkey);

### References table
my $sql_references=
    &createTableSQL($tbl_references,\%references_attributes,
                    $references_attributes_pkey,\@references_fk);

### Abstract table
my $sql_abstract=
    &createTableSQL($tbl_abstract,\%abstract_attributes,
                    $abstract_attributes_pkey,\@abstract_fk);

### Author table
my $sql_author=
    &createTableSQL($tbl_author,\%author_attributes,
                    $author_attributes_pkey,\@author_fk);

### Editor table
my $sql_editor=
    &createTableSQL($tbl_editor,\%editor_attributes,
                    $editor_attributes_pkey,\@editor_fk);


### Fixed inserts
my $sql_venue_journal="INSERT INTO $tbl_venue_type VALUES "
                    . "(" . $venue_types{"journal"} . ",'Journal');";
my $sql_venue_proceeding="INSERT INTO $tbl_venue_type VALUES "
                    . "(" . $venue_types{"proceedings"} . ",'Proceedings');";
my $sql_publication_article="INSERT INTO $tbl_publication_type VALUES "
                    . "(" . $publication_types{"article"} . ",'Article');";
my $sql_publication_inproceeding="INSERT INTO $tbl_publication_type VALUES "
                    . "(" . $publication_types{"inproceedings"} . ",'Inproceedings');";
my $sql_publication_book="INSERT INTO $tbl_publication_type VALUES "
                    . "(" . $publication_types{"book"} . ",'Book');";
my $sql_publication_incollection="INSERT INTO $tbl_publication_type VALUES "
                    . "(" . $publication_types{"incollection"} . ",'Incollection');";
my $sql_publication_www="INSERT INTO $tbl_publication_type VALUES "
                    . "(" . $publication_types{"www"} . ",'Www');";
my $sql_publication_phd="INSERT INTO $tbl_publication_type VALUES "
                    . "(" . $publication_types{"phdthesis"} . ",'PhDThesis');";
my $sql_publication_masters="INSERT INTO $tbl_publication_type VALUES "
                    . "(" . $publication_types{"mastersthesis"} . ",'MastersThesis');";


### write tables (order matters)
print OUT "START TRANSACTION;\n" if ($db_type==$TYPE_MONET);
print OUT "$sql_document\n";
print OUT "$sql_publication_type\n";
print OUT "$sql_venue_type\n";
print OUT "$sql_venue\n";
print OUT "$sql_publication\n";
print OUT "$sql_person\n";
print OUT "$sql_references\n";
print OUT "$sql_abstract\n";
print OUT "$sql_author\n";
print OUT "$sql_editor\n";
print OUT "$sql_repeat_document\n";
print OUT "$sql_repeat_publication\n";
print OUT "$sql_repeat_venue\n";
print OUT "\n";

### write default inserts
print OUT "$sql_venue_journal\n";
print OUT "$sql_venue_proceeding\n";
print OUT "$sql_publication_article\n";
print OUT "$sql_publication_inproceeding\n";
print OUT "$sql_publication_book\n";
print OUT "$sql_publication_incollection\n";
print OUT "$sql_publication_www\n";
print OUT "$sql_publication_phd\n";
print OUT "$sql_publication_masters\n";
print OUT "\n";


###############################################################################
########### STEP 2: parse input file
### prefixes (for more efficient internal storage)
my %std_prefixes=(
    "journal"        => "http://localhost/publications/journals/",
    "article"        => "http://localhost/publications/articles/",
    "incollection"    => "http://localhost/publications/incolls/",
    "inproceedings"    => "http://localhost/publications/inprocs/",
    "proceedings"    => "http://localhost/publications/procs/",
    "book"            => "http://localhost/publications/books/",
    "www"            => "http://localhost/publications/wwws/",
    "mastersthesis"    => "http://localhost/publications/masters/",
    "phdthesis"        => "http://localhost/publications/phds/",
    "misc"            => "http://localhost/misc/");
&log("Main","Manually added standard prefixes");

### set up environment (for IDs), initially empty
my %publication_env=("ID_CTR" => 0);
my %venue_env=("ID_CTR" => 0);
my %doc_env=("ID_CTR" => 0);
my %blank_env=("ID_CTR" => 0);

my %venue_to_doc;
my %publication_to_doc;


### parse prefix section
my %prefixes=%std_prefixes; # will be extended

&log("Main","Parsing input file '$infile' now");
my $cnt=0;
my $initial=1;
while (my $line=) {
    chomp($line);
    $line=~s/\.$//g;

    ### process prefix sextion
    if ($initial) {
        if ($line!~/^\@prefix/) {
            $initial=0;
        } else {
            my @prefix=split(/ +/,$line);
            my $short=$prefix[1];
            $short=~s/://;
            my $long=$prefix[2];
            $long=~s///;
            &log("Main","Extracted prefix '$short' -> '$long'");
            $prefixes{$short}=$long;
        }
    }

    ### process data section
    if (!$initial) {
        my $result=&parseLine($line,\%prefixes);
        my ($subject,$predicate,$object)=@$result;

        #&log("Main","Processing the following tuple");
        #&log("Main","subject=" . &toRDF($subject),1);
        #&log("Main","predicate=" . &toRDF($predicate),1);
        #&log("Main","object=" . &toRDF($object),1);
        
        # ignore triples with "bench:..." subject
        if ($subject->{"type"} eq "U" && $subject->{"prefix"} eq "bench") {
            &log("Main","Ignoring " . $subject->{"value"} . " typing (not required)");
        } elsif ($subject->{"type"} eq "U" && $subject->{"prefix"} eq "Misc") {
            &log("Main","Ignoring UnknownDocument typing (not required)");
        } else {

            # if subject is fresh URI (never been used before)
            my $id=&lookup($subject,\%publication_env,\%venue_env,
                            \%blank_env,\%doc_env,\%publication_types,
                            \%venue_types);
            if (defined($id)) {
                print OUT &createUpdateSQL($id,$subject,$predicate,$object,
                                            \%publication_env,\%venue_env,
                                            \%blank_env,\%publication_types,
                                            \%venue_types,$cnt,
                                            \%venue_to_doc,
                                            \%publication_to_doc) . "\n";
            } else {
                # ... then this should be an rdf:type declaration
                if ($predicate->{"prefix"} eq "rdf" &&
                    $predicate->{"value"} eq "type" &&
                    !($object->{"prefix"} eq "rdf" &&
                      $object->{"value"} eq "Bag")) {

                      if ($subject->{"prefix"} eq "misc" &&
                        $subject->{"value"} eq "UnknownDocument") {
                        &log("Main","Ignoring UnknownDocument");
                    } else {
                        # we insert this node in the environment
                        my $id=&insert($subject,$object,\%publication_env,
                                        \%venue_env,\%blank_env,\%doc_env,
                                        \%publication_types,\%venue_types,
                                        \%venue_to_doc,\%publication_to_doc);
                        print OUT &createInsertSQL($id,$subject,$predicate,$object,
                                                    \%venue_to_doc,
                                                    \%publication_to_doc) . "\n";
                    }
                } elsif ($predicate->{"prefix"} eq "rdf" &&
                    $predicate->{"value"} eq "type" &&
                    $object->{"prefix"} eq "rdf" &&
                    $object->{"value"} eq "Bag") {
                    # ignore this triple
                } elsif ($predicate->{"prefix"} eq "rdf" &&
                            $predicate->{"value"}=~/^_[1-9][0-9]*$/) {
                    print OUT &createReferenceSQL($subject,$predicate,$object,
                                                    \%publication_types,
                                                    \%publication_env) . "\n";
                } else {
                    &abort("Main","Expected type declaration for fresh URI");
                }

            }
        }
    }

    $cnt++;
    &log("Main","$cnt triples processed") if ($cnt%10000==0);
}

&log("Main","Processing $#delay delayed editors");
for (my $i=0;$i<=$#delay;$i++) {
    if ($delay[$i]=~/^INSERT INTO $tbl_editor VALUES \((.*),(.*)\);$/) {
        my $sql="INSERT INTO $tbl_editor VALUES ("
                . $1 . "," . $blank_env{$2} . ");";
        print OUT "$sql\n";
    } else {
        &abort("Main","Wrong format for delayed editor");
    }
}
sub createExitSQL {

    my $db_type=shift;

    my $sql="";
    if ($db_type==$TYPE_ORACLE) {
        $sql.="exit;";
    } elsif ($db_type==$TYPE_MONET) {
        $sql.="COMMIT;";
    }
    return $sql;
}

print OUT &createAlterTableSQL($tbl_editor,\%editor_fk1);
print OUT &createAlterTableSQL($tbl_editor,\%editor_fk2);
print OUT &createAlterTableSQL($tbl_author,\%author_fk1);
print OUT &createAlterTableSQL($tbl_author,\%author_fk2);
print OUT &createAlterTableSQL($tbl_abstract,\%abstract_fk1);
print OUT &createAlterTableSQL($tbl_references,\%references_fk1);
print OUT &createAlterTableSQL($tbl_references,\%references_fk2);
print OUT &createAlterTableSQL($tbl_publication,\%publication_fk1);
print OUT &createAlterTableSQL($tbl_publication,\%publication_fk2);
print OUT &createAlterTableSQL($tbl_publication,\%publication_fk3);
print OUT &createAlterTableSQL($tbl_venue,\%venue_fk1);
print OUT &createAlterTableSQL($tbl_venue,\%venue_fk2);
my %document_fk=(
    "DEST"        => $tbl_doc,
    "fk_document" => "ID");
print OUT &createAlterTableSQL("Document_homepage",\%document_fk);
print OUT &createAlterTableSQL("Document_seeAlso",\%document_fk);
my %publication_fk=(
	"DEST"           => $tbl_publication,
	"fk_publication" => "ID");
print OUT &createAlterTableSQL("Publication_cdrom",\%publication_fk);

my $exit_sql=&createExitSQL($db_type);
print OUT "$exit_sql";
close(OUT);

###############################################################################
########### Function library
sub createTableSQL {
    my $table_name=shift;
    my $fields=shift;
    my $pkey=shift;
    my $fkeys=shift;

    my $sql="CREATE TABLE $table_name (";

    my $i=0;
    foreach my $key (sort keys %$fields) {
        $sql.="," if ($i++>0);
        $sql.="$key " . $fields->{$key};
    }
    $sql.=",PRIMARY KEY($pkey)" if ($pkey);

#    if (defined($fkeys)) {
#        my @fkey_arr=@$fkeys;
#        for (my $i=0;$i<=$#fkey_arr;$i++) {
#            $sql.="," . &foreignKeySQL($fkey_arr[$i]);
#        }
#    }

    $sql.=");";
    return $sql;
}

sub createAlterTableSQL {
    my $table_name=shift;
    my $fkey=shift;

	return "ALTER TABLE $table_name ADD " , &foreignKeySQL($fkey) . ";\n";
}



sub foreignKeySQL {

    my $fk=shift;

    my $dest="";
    my $from="";
    my $to="";

    my $ctr=0;
    foreach my $key (keys %$fk) {


        if ($key eq "DEST") {
            $dest=$fk->{$key};
        } else {
            if ($ctr++>0) {
                $from.=",";
                $to.=",";
            }
            $from.=$key;
            $to.=$fk->{$key};
        }
    }

    &abort("foreignKeySQL","Foreign Key creation failed!")
        if ($dest eq "" || $from eq "" || $to eq "");

    # build result string
    return "FOREIGN KEY ($from) REFERENCES $dest($to)";        
}


sub createRepeatTables {
    my $base_table=shift;
    my $base_key=shift;
    my $base_key_datatype=shift;
    my $fields=shift;

    my $sql;
    my $ctr=0;
    foreach my $key (sort keys %$fields) {
        $sql.="\n" if ($ctr++>0);
        $sql.="CREATE TABLE $base_table" . "_$key ";
        $sql.="($base_key $base_key_datatype,$key $fields->{$key},";
        $sql.="PRIMARY KEY($base_key,$key));";
    }

    return $sql;
}


sub parseLine {
    my $line=shift;
    my $prefixes=shift;

    my @triple=split(/ +/,$line);
    my %subject=&parseElement($triple[0],$prefixes);
    my %predicate=&parseElement($triple[1],$prefixes);

    my $tail;
    for (my $i=2;$i<=$#triple;$i++) {
        $tail.=" " if ($i>2);
        $tail.=$triple[$i];
    }
    my %object=&parseElement($tail,$prefixes);

    my @result=(\%subject,\%predicate,\%object);
    return \@result;

}

sub parseElement {
    my $element=shift;
    my $prefixes=shift;

    my %elem;

    ################################################## parsing
    # (1) special case: UnknownDocument
    if ($element eq "") {
        $elem{"type"}="U";
        $elem{"prefix"}="misc";
        $elem{"value"}="UnknownDocument";

    # (2) URI of the form  
    } elsif ($element=~/^<(.*)>$/) {

        my $match=0;
        $elem{"type"}="U";
        foreach my $key (keys %$prefixes) {
            my $cur_prefix=$prefixes->{$key};
            if ($element=~/^<$cur_prefix(.*)>$/) {
                $elem{"value"}=$1;
                $elem{"prefix"}=$key;
                $match++;
            }
        }
        if ($match!=1) {
            &abort("parseElement","'$element' matches $match prefixes");
        }

    # (3) Literal of the form "some text"^^xsd::datatype
    } elsif ($element=~/^"(.*)"\^\^xsd:(.*)$/) {
        $elem{"value"}=$1;
        $elem{"datatype"}=$2;
        $elem{"type"}="L";

    # (4) URI of the form prefix:value or blank node
    } else {
        my @spl=split(/:/,$element);
        if ($spl[0] eq "_") {
            $elem{"type"}="B";
        } else {
            $elem{"prefix"}=$spl[0];
            $elem{"type"}="U";
        }
        $elem{"value"}=$spl[1];
    }

    # some consistency checks
    # (a) type must be defined
    &abort("parseElement","No type defined") if (!defined($elem{"type"}));
    # (b) if prefix defined, it must be listed
    &abort("parseElement","Undefined prefix '" . $elem{"prefix"} . "'")
        if (!($elem{"prefix"} eq "") && !defined($prefixes->{$elem{"prefix"}})); 
    # (c) URI must have prefix and value defined, but no datatype
    if ($elem{"type"} eq "U") {
        &abort("parseElement","URI without prefix/value detected")
            if ($elem{"prefix"} eq "" || $elem{"value"} eq "");
        &abort("parseElement","URI with datatype detected")
            if (defined($elem{"datatype"}));
    }
    # (d) Blank node must not value defined, but neither prefix nor datatype
    if ($elem{"type"} eq "B") {
        &abort("parseElement","Blank node without value detected")
            if ($elem{"value"} eq "");
        &abort("parseElement","Blank node with datatype/prefix detected")
            if (defined($elem{"prefix"}) || defined($elem{"datatype"}));
    }
    # (e) Literal must have datatype defined, but no prefix 
    if ($elem{"type"} eq "L") {
        &abort("parseElement","Literal without data type detected") 
            if ($elem{"datatype"} eq "");            
        &abort("parseElement","Literal with prefix detected") 
            if (defined($elem{"prefix"}));            
    }

    # OK, all fine... so return the element
    return %elem;
}

sub abort {
    my $caller=shift;
    my $message=shift;
    print STDOUT "[$caller] ERROR: $message!\nABORTING!\n";
    exit(1);
}

sub log {
    my $caller=shift;
    my $message=shift;
    my $indent=shift;

    if ($indent) {
        print STDOUT "\t$message.\n";
    } else {
        print STDOUT "[$caller] $message.\n";
    }
}

sub toRDF {
    my $element=shift;

    my $elem;
    if ($element->{"type"} eq "U") {
        $elem=$element->{"prefix"} . ":" . $element->{"value"};
    } elsif ($element->{"type"} eq "B") {
        $elem="_:" . $element->{"value"};
    } elsif ($element->{"type"} eq "L") {
        $elem="\"" . $element->{"value"} . "\"^^xsd:"
                . $element->{"datatype"};
    }    

    return $elem;
}


sub lookup {
    my $elem=shift;
    my $publication_env=shift;
    my $venue_env=shift;
    my $blank_env=shift;
    my $doc_env=shift;
    my $publication_types=shift;
    my $venue_types=shift;

    if ($elem->{"type"} eq "B" ||
        ($elem->{"prefix"} eq "person" && $elem->{"value"} eq "Paul_Erdoes")) {
        return $blank_env->{$elem->{"value"}};
    } elsif ($elem->{"type"} eq "U") {
        if (defined($venue_types->{$elem->{"prefix"}})) {
            return $venue_env->{$elem->{"value"}};
        } elsif (defined($publication_types->{$elem->{"prefix"}})) {
            return $publication_env->{$elem->{"value"}};
        } elsif ($elem->{"prefix"} eq "misc" &&
                    $elem->{"value"} eq "UnknownDocument") {
            &log("lookup","Ignoring UnknownDocument");
            return undef;
        }
    }

    &abort("lookup","Unexpected element " . &toRDF($elem)); #failure
}

sub insert {

    my $subject=shift;
    my $object=shift;
    my $publication_env=shift;
    my $venue_env=shift;
    my $blank_env=shift;
    my $doc_env=shift;
    my $publication_types=shift;
    my $venue_types=shift;
    my $venue_to_doc=shift;
    my $publication_to_doc=shift;

    if (!($object->{"type"} eq "U")) {
        &abort("insert","Unexpected object type " . $object->{"type"});
    }

    if ($subject->{"type"} eq "B" || $subject->{"prefix"} eq "person") {

        # this is a person
        if ($object->{"prefix"} eq "foaf" && $object->{"value"} eq "Person") {
            $blank_env->{$subject->{"value"}}=$blank_env->{"ID_CTR"}++;
            return $blank_env->{$subject->{"value"}};
        } else {
            &abort("insert","Blank node must be a person");
        }
        
    } elsif ($subject->{"type"} eq "U") {

        # consistency check
        if (!($subject->{"prefix"} eq lc($object->{"value"}) &&
                $object->{"prefix"} eq "bench")) {
            &log("insert",&toRDF($subject) . " --- " . &toRDF($object) . "\n");
            &abort("insert","Subject prefix and object value must coincide");
        }

        # lookup fresh id in environment and insert
        if (defined($venue_types->{$subject->{"prefix"}})) {
            $venue_env->{$subject->{"value"}}=$venue_env->{"ID_CTR"}++;
            $doc_env->{$subject->{"value"}}=$doc_env->{"ID_CTR"}++;
            my $venue_id=$venue_env->{$subject->{"value"}};
            my $doc_id=$doc_env->{$subject->{"value"}};

            # add mapping to venue_to_doc hash
            $venue_to_doc->{$venue_id}=$doc_id;

            return $venue_id;
        } elsif (defined($publication_types->{$subject->{"prefix"}})) {
            $publication_env->{$subject->{"value"}}=$publication_env->{"ID_CTR"}++;
            $doc_env->{$subject->{"value"}}=$doc_env->{"ID_CTR"}++;
            my $publication_id=$publication_env->{$subject->{"value"}};
            my $doc_id=$doc_env->{$subject->{"value"}};

            # add mapping to venue_to_doc hash
            $publication_to_doc->{$publication_id}=$doc_id;

            return $publication_id;
        }
    } 

    &abort("insert","Insertion failed");
}

sub createInsertSQL {
    my $id=shift;
    my $subject=shift;
    my $predicate=shift;
    my $object=shift;
    my $venue_to_doc=shift;
    my $publication_to_doc=shift;

    my $sql;
    if ($subject->{"type"} eq "B" ||
        ($subject->{"type"} eq "U" && $subject->{"prefix"} eq "person")) {
        $sql="INSERT INTO $tbl_person VALUES ($id,NULL,'"
            . $subject->{"value"} . "');";
    } elsif (defined($venue_types{$subject->{"prefix"}})) {
        my $docid=$venue_to_doc->{$id};
        $sql.="INSERT INTO $tbl_doc VALUES ($docid,"
            . "NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'"
            . $subject->{"value"} . "',NULL,NULL);\n";
        $sql.="INSERT INTO $tbl_venue VALUES ($id,$docid,"
            . $venue_types{$subject->{"prefix"}} . ");";
    } elsif (defined($publication_types{$subject->{"prefix"}})) {
        my $docid=$publication_to_doc->{$id};
        $sql.="INSERT INTO $tbl_doc VALUES ($docid,"
            . "NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'"
            . $subject->{"value"} . "',NULL,NULL);\n";
        $sql.="INSERT INTO $tbl_publication VALUES ($id,NULL,$docid,"
            . $publication_types{$subject->{"prefix"}} . ",NULL,NULL);";
    } else {
        &abort("createInsertSQL","Undefined type '" . $subject->{"prefix"} . "'");
    }

    return $sql;
}

sub createUpdateSQL {
    my $id=shift;
    my $subject=shift;
    my $predicate=shift;
    my $object=shift;
    my $publication_env=shift;
    my $venue_env=shift;
    my $blank_env=shift;
    my $publication_types=shift;
    my $venue_types=shift;
    my $line_nr=shift;
    my $venue_to_doc=shift;
    my $publication_to_doc=shift;

    $object->{"value"}=~s/'/-/;

    my $sql;
    if ($subject->{"type"} eq "B" ||
        ($subject->{"type"} eq "U" && $subject->{"prefix"} eq "person")) {

        ### update name table
        if ($predicate->{"type"} eq "U" &&
            $predicate->{"prefix"} eq "foaf" &&
            $predicate->{"value"} eq "name") {
            $sql="UPDATE $tbl_person SET name='" . $object->{"value"} 
                . "' WHERE ID=$id;";
        } else {
            &log("createUpdateSQL","WARNING: duplicate definition of "
                . &toRDF($subject));
            &abort("createUpdateSQL","Unhandled blank predicate");
        }

    } elsif (defined($venue_types{$subject->{"prefix"}})) {

        # get corresponding parent id
        my $docid=$venue_to_doc->{$id};

        if ($predicate->{"type"} eq "U") {

            ###################################### Common fields
            ###################################### (single)
            ### swrc:address
            if ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "address") {
                $sql="UPDATE $tbl_doc SET address='"
                    . $object->{"value"} . "' WHERE ID=$docid;";
                
            ### swrc:note
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "note") {
                $sql="UPDATE $tbl_doc SET note='"
                    . $object->{"value"} . "' WHERE ID=$docid;";
                
            ### swrc:number
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                $predicate->{"value"} eq "number") {
                $sql="UPDATE $tbl_doc SET nr="
                    . $object->{"value"} . " WHERE ID=$docid;";

            ### dc:title
            } elsif ($predicate->{"prefix"} eq "dc" && 
                        $predicate->{"value"} eq "title") {
                $sql="UPDATE $tbl_doc SET title='"
                    . $object->{"value"} . "' WHERE ID=$docid;";

            ### swrc: volume
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "volume") {
                $sql="UPDATE $tbl_doc SET volume="
                    . $object->{"value"} . " WHERE ID=$docid;";

            ### dcterms:issued
            } elsif ($predicate->{"prefix"} eq "dcterms" && 
                        $predicate->{"value"} eq "issued") {
                $sql="UPDATE $tbl_doc SET issued="
                    . $object->{"value"} . " WHERE ID=$docid;";

            ### bench:booktitle
            } elsif ($predicate->{"prefix"} eq "bench" && 
                        $predicate->{"value"} eq "booktitle") {
                $sql="UPDATE $tbl_doc SET booktitle='"
                    . $object->{"value"} . "' WHERE ID=$docid;";

            ### swrc:isbn
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "isbn") {
                $sql="UPDATE $tbl_doc SET isbn='"
                    . $object->{"value"} . "' WHERE ID=$docid;";

            ### dc:publisher
            } elsif ($predicate->{"prefix"} eq "dc" && 
                        $predicate->{"value"} eq "publisher") {
                $sql="UPDATE $tbl_doc SET publisher='"
                    . $object->{"value"} . "' WHERE ID=$docid;";

            ### swrc:series 
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "series") {
                $sql="UPDATE $tbl_doc SET series="
                    . $object->{"value"} . " WHERE ID=$docid;";

            ### swrc:month
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "month") {
                $sql="UPDATE $tbl_doc SET mnth="
                    . $object->{"value"} . " WHERE ID=$docid;";


            ###################################### Common fields
            ###################################### (multi)
            ### foaf:homepage
            } elsif ($predicate->{"prefix"} eq "foaf" && 
                        $predicate->{"value"} eq "homepage") {
                $sql="INSERT INTO $tbl_doc" . "_homepage VALUES ("
                    . "$docid,'" . $object->{"value"} . "');";

            ### rdfs:seeAlso
            } elsif ($predicate->{"prefix"} eq "rdfs" && 
                        $predicate->{"value"} eq "seeAlso") {
                $sql="INSERT INTO $tbl_doc" . "_seeAlso VALUES ("
                    . "$docid,'" . $object->{"value"} . "');";


            ###################################### Persons 
            ### swrc:editor
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "editor") {
                if ($object->{"prefix"} eq "person" &&
                        $object->{"value"} eq "Paul_Erdoes") {
                    $sql="INSERT INTO $tbl_editor VALUES ("
                        . $docid . ",0);"; # Paul Erdoes has ID 0
                } elsif (defined($blank_env->{$object->{"value"}})) {
                    $sql="INSERT INTO $tbl_editor VALUES ("
                        . $docid . ","
                        . $blank_env->{$object->{"value"}} . ");";
                } else {
                    push(@delay,"INSERT INTO $tbl_editor VALUES ($docid,"
                                . $object->{"value"} . ");");
                }

            } else {
                &abort("createUpdateSQL","Unhandled venue triple type "
                        . &toRDF($subject) . " - " . &toRDF($predicate)
                        . " - " . &toRDF($object));
            }
        } else {
            &abort("createUpdateSQL","Non-URI predicate detected");
        }

    } elsif (defined($publication_types{$subject->{"prefix"}})) {

        # get corresponding parent id
        my $docid=$publication_to_doc->{$id};

        if ($predicate->{"type"} eq "U") {

            ###################################### Common fields
            ###################################### (single)
            ### swrc:address
            if ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "address") {
                $sql="UPDATE $tbl_doc SET address='"
                    . $object->{"value"} . "' WHERE ID=$docid;";
                
            ### swrc:note
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "note") {
                $sql="UPDATE $tbl_doc SET note='"
                    . $object->{"value"} . "' WHERE ID=$docid;";
                
            ### swrc:number
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                $predicate->{"value"} eq "number") {
                $sql="UPDATE $tbl_doc SET nr="
                    . $object->{"value"} . " WHERE ID=$docid;";

            ### dc:title
            } elsif ($predicate->{"prefix"} eq "dc" && 
                        $predicate->{"value"} eq "title") {
                $sql="UPDATE $tbl_doc SET title='"
                    . $object->{"value"} . "' WHERE ID=$docid;";

            ### swrc: volume
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "volume") {
                $sql="UPDATE $tbl_doc SET volume="
                    . $object->{"value"} . " WHERE ID=$docid;";

            ### dcterms:issued
            } elsif ($predicate->{"prefix"} eq "dcterms" && 
                        $predicate->{"value"} eq "issued") {
                $sql="UPDATE $tbl_doc SET issued="
                    . $object->{"value"} . " WHERE ID=$docid;";

            ### bench:booktitle
            } elsif ($predicate->{"prefix"} eq "bench" && 
                        $predicate->{"value"} eq "booktitle") {
                $sql="UPDATE $tbl_doc SET booktitle='"
                    . $object->{"value"} . "' WHERE ID=$docid;";

            ### swrc:isbn
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "isbn") {
                $sql="UPDATE $tbl_doc SET isbn='"
                    . $object->{"value"} . "' WHERE ID=$docid;";

            ### dc:publisher
            } elsif ($predicate->{"prefix"} eq "dc" && 
                        $predicate->{"value"} eq "publisher") {
                $sql="UPDATE $tbl_doc SET publisher='"
                    . $object->{"value"} . "' WHERE ID=$docid;";

            ### swrc:series 
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "series") {
                $sql="UPDATE $tbl_doc SET series="
                    . $object->{"value"} . " WHERE ID=$docid;";

            ### swrc:month
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "month") {
                $sql="UPDATE $tbl_doc SET mnth="
                    . $object->{"value"} . " WHERE ID=$docid;";


            ###################################### Common fields
            ###################################### (multi)
            ### foaf:homepage
            } elsif ($predicate->{"prefix"} eq "foaf" && 
                        $predicate->{"value"} eq "homepage") {
                $sql="INSERT INTO $tbl_doc" . "_homepage VALUES ("
                    . "$docid,'" . $object->{"value"} . "');";

            ### rdfs:seeAlso
            } elsif ($predicate->{"prefix"} eq "rdfs" && 
                        $predicate->{"value"} eq "seeAlso") {
                $sql="INSERT INTO $tbl_doc" . "_seeAlso VALUES ("
                    . "$docid,'" . $object->{"value"} . "');";

                
            ###################################### Individual fields 
            ###################################### (single)
            ### swrc:pages
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "pages") {
                $sql="UPDATE $tbl_publication SET pages="
                    . $object->{"value"} . " WHERE ID=$id;";
                
            ### swrc:chapter
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "chapter") {
                $sql="UPDATE $tbl_publication SET chapter="
                    . $object->{"value"} . " WHERE ID=$id;";
                
            ### swrc:journal
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "journal") {
                $sql="UPDATE $tbl_publication SET fk_venue="
                    . $venue_env->{$object->{"value"}}
                    . " WHERE ID=$id;";

            ### dcterms:partOf
            } elsif ($predicate->{"prefix"} eq "dcterms" && 
                        $predicate->{"value"} eq "partOf") {
                # this is exactly the same as swrc:journal
                # (but links inproceedings to proceedins
                #  instead of articles to journals)
                $sql="UPDATE $tbl_publication SET fk_venue="
                    . $venue_env->{$object->{"value"}}
                    . " WHERE ID=$id;";

            ###################################### Individual fields 
            ###################################### (single)
            ### bench:cdrom
            } elsif ($predicate->{"prefix"} eq "bench" && 
                        $predicate->{"value"} eq "cdrom") {
                $sql="INSERT INTO $tbl_publication" . "_cdrom VALUES "
                    . "($id,'" . $object->{"value"} . "');";

            ###################################### Persons, 
            ###################################### References, 
            ###################################### Abstract
            ### dc:creator
            } elsif ($predicate->{"prefix"} eq "dc" && 
                        $predicate->{"value"} eq "creator") {
                $sql="INSERT INTO $tbl_author VALUES ("
                    . $blank_env->{$object->{"value"}} . ",$id);";

            ###################################### Persons 
            ### swrc:editor
            } elsif ($predicate->{"prefix"} eq "swrc" && 
                        $predicate->{"value"} eq "editor") {
                if ($object->{"prefix"} eq "person" &&
                        $object->{"value"} eq "Paul_Erdoes") {
                    $sql="INSERT INTO $tbl_editor VALUES ("
                        . $docid . ",0);"; # Paul Erdoes has ID 0
                } elsif (defined($blank_env->{$object->{"value"}})) {
                    $sql="INSERT INTO $tbl_editor VALUES ("
                        . $docid . ","
                        . $blank_env->{$object->{"value"}} . ");";
                } else {
                    push(@delay,"INSERT INTO $tbl_editor VALUES ($docid,"
                                . $object->{"value"} . ");");
                }

            ### dcterms:references
            } elsif ($predicate->{"prefix"} eq "dcterms" && 
                        $predicate->{"value"} eq "references") {

                $last_refblank_id=$publication_env->{$subject->{"value"}};
                $last_refblank=&toRDF($object);

            ### bench:abstract 
            } elsif ($predicate->{"prefix"} eq "bench" && 
                        $predicate->{"value"} eq "abstract") {
                $sql="INSERT INTO $tbl_abstract VALUES ($id,'"
                    . $object->{"value"} . "');";

            } else {
                &abort("createUpdateSQL","Unhandled publication triple type "
                        . &toRDF($subject) . " - " . &toRDF($predicate)
                        . " - " . &toRDF($object));
            }
        } else {
            &abort("createUpdateSQL","Non-URI predicate detected");
        }
    } else {
        &abort("createUpdateSQL","Undefined type '" . $subject->{"prefix"} . "'");
    }
    return $sql;
}    


sub createReferenceSQL {

    my $subject=shift;
    my $predicate=shift;
    my $object=shift;
    my $publication_types=shift;
    my $publication_env=shift;

    my $sql;
    ### consistency check                
    if (&toRDF($subject) eq $last_refblank) {
    
           if ($object->{"value"} eq "UnknownDocument") {
               $sql="INSERT INTO $tbl_references VALUES ("
                   . "$last_refblank_id,NULL);";
        } else {
               $sql="INSERT INTO $tbl_references VALUES ("
                 . "$last_refblank_id,";
               if (defined($publication_types->{$object->{"prefix"}})) {
                   $sql.=$publication_env->{$object->{"value"}};
               } else {
                   &abort("createReferenceSQL","Reference to non-publication-type");
               }
               $sql.=");";
           }
    } else {
        &abort("createReferenceSQL","Inconstistent reference");
    }
    return $sql;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy