All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.w3.banana.io.NTriplesReaderTestSuite.scala Maven / Gradle / Ivy

package org.w3.banana.io

import org.w3.banana._
import scala.util._
import org.scalatest._
import java.io._

/**
 *
 */
class NTriplesReaderTestSuite[Rdf <: RDF](implicit
  ops: RDFOps[Rdf],
  reader: RDFReader[Rdf, Try, NTriples]
) extends WordSpec with Matchers {

  import ops._
  import NTriplesParser.toGraph

  val foaf = FOAFPrefix[Rdf]

  val bblfish = "http://bblfish.net/people/henry/card#me"
  val name = "Henry Story"

  val typ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"

  def foafstr(n: String) = "http://xmlns.com/foaf/0.1/" + n

  def rd(nt: String): Try[Rdf#Graph] = reader.read(new StringReader(nt),"")
  def st(node: Rdf#Node) =
    node.fold(
      uri => s"<${uri.getString}>",
      bn => s"_:${fromBNode(bn)}",
      {
        case Literal(lexical, Literal.xsdString, None) => s""""$lexical""""
        case Literal(lexical, tp, None) => s""""$lexical"^^<$tp>"""
        case Literal(lexical, Literal.xsdString, Some(lang)) => s""""$lexical"@$lang"""
      })


  def ntparser(ntstring: String, skip: Boolean=false) =
    new NTriplesParser[Rdf](new StringReader(ntstring),skip)

  "test that the parser can parse single components. Parser " should  {

    "parse a URL" in {
      val iri = ntparser(bblfish+">").parseIRI()
      iri should equal (URI(bblfish))
    }

    "parse a URL with an encoded character \\u" in {
      val iri = ntparser("""http://example/\u0053>""").parseIRI()
      iri should equal (URI("http://example/S"))
    }

    "parse a URL with an encoded character \\U" in {
      val iri = ntparser("""http://example/\U00000053>""").parseIRI()
      iri should equal (URI("http://example/S"))
    }


    "parse a plain Literal" in {
       val lit = ntparser(name + "\"").parsePlainLiteral()
       lit should equal(name)
    }

    "not parse a plain Literal that does not close" in {
      val nt = ntparser(name)
      val lit = Try(nt.parseLiteral())
      lit should be a 'failure
    }

    "parse a PlainLiteral"  in {
      val lit = ntparser(name + "\" ").parseLiteral()
      lit should equal(Literal(name))
    }

    "parse a LangLiteral" in {
      val lit = ntparser(name + "\"@en ").parseLiteral()
      lit should equal(Literal.tagged(name,Lang("en")))

      val lit2 = ntparser(name + "\"@en-us ").parseLiteral()
      lit2 should equal(Literal.tagged(name,Lang("en-us")))

    }

    "parse an TypedLiteral" in {
      val litstr = s"""123"^^<${xsd.integer.getString}> """
      val lit = ntparser(litstr).parseLiteral()
      lit should equal(Literal("123",xsd.integer))
    }

    "parse a Bnode" in {
      val bn = Try (ntparser(":123 ").parseBNode())
      bn should equal(Success(BNode("123")))
    }
    
    "not parse an illegal BNode" in {
      val bn = Try (ntparser(":-123 ").parseBNode())
      bn should be a 'failure
    }

  }


  "test that parser can parse one Triple. The parser" should {

    "not fail on a triple containing only URIs" in {
      val str = s"$bblfish> <$typ> <${foafstr("Person")}> ."
      val p = ntparser(str).parseTriple('<')
      p should be (Success(Triple(URI(bblfish),rdf.`type`,foaf.Person)))
    }

    "not fail on a triple containing a Literal" in {
      val str = s"""$bblfish> <${foafstr("name")}> "$name"."""
      val p = ntparser(str).parseTriple('<')
      p should be (Success(Triple(URI(bblfish),foaf.name,Literal(name))))
    }

    "not fail on a triple containing a Literal and a bnode" in {
      val str = s""":nolate <${foafstr("name")}> "$name"@en."""
      val p = ntparser(str).parseTriple('_')
      p should be (Success(Triple(BNode("nolate"),foaf.name,Literal.tagged(name,Lang("en")))))
    }

    "not fail on a triple containing two bnodes" in {
      val str = s""":jane <${foafstr("knows")}> _:tarzan ."""
      val p = ntparser(str).parseTriple('_')
      p should be (Success(Triple(BNode("jane"),foaf.knows,BNode("tarzan"))))
    }

  }




  "Test that parser can parse a document containing one triple. The parser " should {

    "not fail with one triple" in {
      val str = s"<$bblfish> <$typ> <${foafstr("Person")}> ."
      val i = ntparser(str)
      i.hasNext should be(true)
      i.next() should be(Success(Triple(URI(bblfish), rdf.`type`, foaf.Person)))
      val end = i.next()
      end.isFailure should be(true)
      end.failed.get.asInstanceOf[ParseException].character should be (-1)
      i.hasNext should be(false)
    }

    "not fail when parsing a document with one triple" in {
      val str = s"""<$bblfish>     <${foafstr("name")}>      "$name"@de      ."""
      val graphTry = toGraph(ntparser(str))
      assert(graphTry.get isIsomorphicWith Graph(Triple(URI(bblfish), foaf.name, Literal.tagged(name, Lang("de")))))
    }

    "not fail when parsing a document with one triple and whitespace" in {
      val str =
        s"""
         # a document with a comment
           <$bblfish>     <${foafstr("knows")}>      _:anton      .  # and some whitespace

           """
      val graphTry = toGraph(ntparser(str))
      assert( graphTry.get isIsomorphicWith Graph(Triple(URI(bblfish), foaf.knows, BNode("anton"))))
    }


  }


  "Test that the parser can parse a document containing more triples. The parser " should {
      "parse a document with 5 triples and a comment" in {
        val str =
          s"""
            <$bblfish> <${foafstr("name")}> "Henry Story"@en .
            <$bblfish> <${foafstr("knows")}> _:anton .
            <$bblfish> <${foafstr("knows")}> _:betehess .
        # Anton info
        _:anton <${foafstr("name")}> "Anton".
        _:betehess <${foafstr("homepage")}>  .
          """
        val graphTry = toGraph(ntparser(str))
        assert (graphTry.get isIsomorphicWith Graph(
          Triple(URI(bblfish), foaf.name, Literal.tagged(name, Lang("en"))),
          Triple(URI(bblfish), foaf.knows, BNode("anton")),
          Triple(URI(bblfish), foaf.knows, BNode("betehess")),
          Triple(BNode("anton"), foaf.name, Literal("Anton")),
          Triple(BNode("betehess"), foaf.homepage, URI("http://bertails.org/"))
        ))

      }

    "parse a document with 5 triples ( skipping two which do not parse ) and a comment" in {
      val str =
        s"""
            <$bblfish> <${foafstr("name")}> "Henry Story" .
            <$bblfish> <${foafstr("knows")}> _|:anton .
            <$bblfish> <${foafstr("knows")}> _:betehess .
        # Anton info
        _:anton <${foafstr("name")}> "Anton"
        _:betehess <${foafstr("homepage")}>  .
          """
      val graphTry = toGraph(ntparser(str,skip=true))
      graphTry.get.size should be (3)
      assert (graphTry.get isIsomorphicWith Graph(
        Triple(URI(bblfish), foaf.name, Literal(name)),
        Triple(URI(bblfish), foaf.knows, BNode("betehess")),
        Triple(BNode("betehess"), foaf.homepage, URI("http://bertails.org/"))
      ))

    }

  }


  def ntparse(string: String): Try[Rdf#Graph] = toGraph(ntparser(string))



  "w3c tests of type rdft:TestNTriplesPositiveSyntax ( from http://www.w3.org/2013/N-TriplesTests/ ) " should {
    def test(s: String, size: Int)(implicit test: Rdf#Graph => Boolean = _ => true) = {
      val parseAttempt = ntparse(s)
      assert(test(parseAttempt.get))
      parseAttempt should be a 'success
      parseAttempt.get.size should be(size)
    }

    "verify that empty files parse with success" in {
      test("", 0)
      test("#Empty file", 0)

      test(
        """#One comment, one empty line
          |
        """.stripMargin, 0)
    }

    "verify that triples with IRIs parse with success" in {
      test( """   .""", 1)
      test(
        """# x53 is capital S
          |   .""".stripMargin, 1) { graph =>
        graph.triples.head == Triple(URI("http://example/S"), URI("http://example/p"), URI("http://example/o"))
      }

      test(
        """# x53 is capital S
          |   .""".stripMargin, 1) { graph =>
        graph.triples.head == Triple(URI("http://example/S"), URI("http://example/p"), URI("http://example/o"))
      }

      test( """# IRI with all chars in it.
              |   .
              | """.stripMargin,
        1
      )
    }


    "verify that Literals parse with Success" in {

      test( """  "string" .""", 1)
      test( """  "string"@en .""", 1)
      test( """  "string"@en-uk .""", 1) { graph =>
        graph isIsomorphicWith Graph(Triple(URI("http://example/s"), URI("http://example/p"),
          Literal.tagged("string", Lang("en-uk"))))
      }
    }

    "verify that Literals containing a string escape parse successfully" in {
      test( """  "a\n" .""", 1) { graph =>
        val Triple(_, _, o) = graph.triples.head
        o == Literal("a\n")
      }
      test( """  "a\u0020b" .""", 1) { graph =>
        val Triple(_, _, o) = graph.triples.head
        o == Literal("a b")
      }
      test( """  "a\U00000020b" .""", 1) { graph =>
        val Triple(_, _, o) = graph.triples.head
        o == Literal("a b")
      }
    }

    "verify that bnodes parse successfully" in {
      test( """_:a    .""", 1)
      test( """  _:a .
      _:a    .""", 2)

      test( """  _:1a .
      _:1a    .""", 2) { graph => graph.contains(Triple(BNode("1a"), URI("http://example/p"), URI("http://example/o")))
      }

    }

    "verify that datatypes parse successfully" in {
         test("""  "123"^^ .
            |  "123"^^ .""".stripMargin,2)
    }

    "verify a large chunk of NTriples" in {

      val doc =  """#
          |# Copyright World Wide Web Consortium, (Massachusetts Institute of
          |# Technology, Institut National de Recherche en Informatique et en
          |# Automatique, Keio University).
          |#
          |# All Rights Reserved.
          |#
          |# Please see the full Copyright clause at
          |# 
          |#
          |# Test file with a variety of legal N-Triples
          |#
          |# Dave Beckett - http://purl.org/net/dajobe/
          |#
          |# $Id: test.nt,v 1.7 2003/10/06 15:52:19 dbeckett2 Exp $
          |#
          |#####################################################################
          |
          |# comment lines
          |  	  	   # comment line after whitespace
          |# empty blank line, then one with spaces and tabs
          |
          |
          |   .
          |_:anon   .
          |  _:anon .
          |# spaces and tabs throughout:
          | 	  	 	  	.
          |
          |# line ending with CR NL (ASCII 13, ASCII 10)
          |   .
          |
          |# 2 statement lines separated by single CR (ASCII 10)
          |   .
          |   .
          |
          |
          |# All literal escapes
          |  "simple literal" .
          |  "backslash:\\" .
          |  "dquote:\"" .
          |  "newline:\n" .
          |  "return\r" .
          |  "tab:\t" .
          |
          |# Space is optional before final .
          |  .
          |  "x".
          |  _:anon.
          |
          |# \\u and \\U escapes
          |# latin small letter e with acute symbol \u00E9 - 3 UTF-8 bytes #xC3 #A9
          |  "\u00E9" .
          |# Euro symbol \u20ac  - 3 UTF-8 bytes #xE2 #x82 #xAC
          |  "\u20AC" .
          |# resource18 test removed
          |# resource19 test removed
          |# resource20 test removed
          |
          |# XML Literals as Datatyped Literals
          |  ""^^ .
          |  " "^^ .
          |  "x"^^ .
          |  "\""^^ .
          |  ""^^ .
          |  "a "^^ .
          |  "a  c"^^ .
          |  "a\n\nc"^^ .
          |  "chat"^^ .
          |# resource28 test removed 2003-08-03
          |# resource29 test removed 2003-08-03
          |
          |# Plain literals with languages
          |  "chat"@fr .
          |  "chat"@en .
          |
          |# Typed Literals
          |  "abc"^^ .
          |# resource33 test removed 2003-08-03
          | """.stripMargin
      test(doc, 30)
    }

    "comment following triple" in {
      test("""   . # comment
        |  _:o . # comment
        |  "o" . # comment
        |  "o"^^ . # comment
        |  "o"@en . # comment""".stripMargin,5)
    }

    "literal ascii boundary" in {
      //note we are using Scala encoding of chars here
      test(
        """  "\u0000\u0009\u000b\u000c\u000e\u0026\u0028\u005b\u005d\u007f".""",
        1){graph=>
       val Triple(_,_,o) = graph.triples.head
        foldNode(o)(_=>false,_=>false,lit=>lit.lexicalForm.length ==10)
      }
    }

    "literal with UTF-8 boundary" in {
      test("  \"" +
       """\uc280\udfbf\ue0a0\u80e0\ubfbf\ue180\u80ec\ubfbf\ued80\u80ed\u9fbf\uee80\u80ef\ubfbd\uf090\u8080\uf0bf\ubfbd\uf180\u8080\uf3bf\ubfbd\uf480\u8080\uf48f\ubfbd" . """,1) { graph =>
        val Triple(_,_,o) = graph.triples.head
        foldNode(o)(_=>false,_=>false,lit=>{
          val lexical = lit.lexicalForm
          lexical.size == 26 &&
            lexical.contains("龿")
        } )
      }
    }

    "literal all controls" in {
      val lit="""  "\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\u0008\t""" +
        """\\u000B\\u000C\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C""" +
        """\\u001D\\u001E\\u001F" ."""
      test(lit,1)
    }

    "literal all punctuation" in {
      test("""  " !\"#$%&():;<=>?@[]^_`{|}~" .""",1)
    }

    "literal with single quote" in {
      test("""  "x'y" .""",1)
    }


    "literal_with_2_squotes" in {
      test( """  "x''y" .""", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="x''y")
      }
    }

    "literal_with_dquote" in {
      test( """  "x''y" .""", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="x''y")
      }
    }
    "literal_with_2_dquotes" in {
      test( """  "x\"\"y" .""", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="""x""y""")
      }
    }
    "literal_with_REVERSE_SOLIDUS2" in {
      test( """  "test-\\" .""", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="""test-\""")
      }
    }
    "literal_with_CHARACTER_TABULATION" in {
      test( "  \"\\t\" .", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="\t")
      }
    }
    "literal_with_BACKSPACE" in {
      test( "  \"\\b\" .", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="\b")
      }
    }
    "literal_with_LINE_FEED" in {
      test( "  \"\\n\" .", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="\n")
      }
    }
    "literal_with_CARRIAGE_RETURN" in {
      test( """  "\r" .""", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="\r")
      }
    }
    "literal_with_FORM_FEED" in {
      test( """  "\f" .""", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="\f")
      }
    }
    "literal_with_REVERSE_SOLIDUS" in {
      test( """  "\\" .""", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="""\""")
      }
    }
    "literal_with_numeric_escape4" in {
      test( """  "\u006F" .""", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="\u006F")
      }
    }
    "literal_with_numeric_escape8" in {
      test( """  "\U0000006F" .""", 1){g=>
        g.triples.head.objectt.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="\u006F")
      }
    }
    "langtagged_string" in {
      test( """  "chat"@en .""", 1){g=>
        val o = g.triples.head.objectt
        o.fold(uri=>false,bn=>false,lit=>lit.lexicalForm=="chat" && lit.lang == Some(Lang("en")))
      }
    }
    "lantag_with_subtag" in {
      test( """  "Cheers"@en-UK .""", 1){g=>
        val o = g.triples.head.objectt
        o.fold(uri=>false,
               bn=>false,
               lit=> lit.lexicalForm=="Cheers" && lit.lang == Some(Lang("en-UK")))
      }
    }
    "minimal_whitespace" in {
      test( """.
              |"Alice".
              |_:o.
              |_:s.
              |_:s"Alice".
              |_:s_:bnode1.""".stripMargin, 6)
    }

  }

  "w3c tests of type rdft:TestNTriplesNegativeSyntax" should {

    def fail(s: String,erros: Int, test: List[Try[Rdf#Triple]] => Boolean = _ => true) = {
      val parseIterator = ntparser(s,true)
      val resultList = parseIterator.toList
      assert(test(resultList))
      assert(resultList.filter{
        case Failure(ParseException(_,-1,_))=>false
        case _ => true
      }.size == erros)
    }

    "nt-syntax-bad-uri-01" in {
      fail("""# Bad IRI : space.
           |   .""".stripMargin,1)
    }
    "nt-syntax-bad-uri-02" in {
      fail("""# Bad IRI : bad escape
        |   .""".stripMargin,1)
    }
    "nt-syntax-bad-uri-03" in {
      fail("""# Bad IRI : bad escape
             |   .""".stripMargin,1)
    }
    "nt-syntax-bad-uri-04" in {
      fail("""# Bad IRI : character escapes not allowed.
             |   .""".stripMargin,1)
    }
    "nt-syntax-bad-uri-05" in {
      fail("""# Bad IRI : character escapes not allowed.
             |   .""".stripMargin,1)
    }
    "nt-syntax-bad-uri-06" in {
      fail("""# No relative IRIs in N-Triples
        |   .""".stripMargin,1)
    }
    "nt-syntax-bad-uri-07" in {
      fail("""# No relative IRIs in N-Triples
             | 

.""".stripMargin,1) } "nt-syntax-bad-uri-08" in { fail("""# No relative IRIs in N-Triples | .""".stripMargin,1) } "nt-syntax-bad-uri-09" in { fail("""# No relative IRIs in N-Triples | "foo"^^

.""".stripMargin,1) } "nt-syntax-bad-prefix-01" in { fail("""@prefix : .""".stripMargin,1) } "nt-syntax-bad-base-01" in { fail("""@base .""".stripMargin,1) } "nt-syntax-bad-struct-01" in { fail(""" , .""".stripMargin,1) } "nt-syntax-bad-struct-02" in { fail(""" ; , .""".stripMargin, 1) } "nt-syntax-bad-lang-01" in { fail("""# Bad lang tag | "string"@1 .""".stripMargin,1) } "nt-syntax-bad-esc-01" in { fail("""# Bad string escape | "a\zb" .""".stripMargin,1) } "nt-syntax-bad-esc-02" in { fail("""# Bad string escape | "\\uWXYZ" .""".stripMargin,1) } "nt-syntax-bad-esc-03" in { fail("""# Bad string escape | "\\U0000WXYZ" .""".stripMargin,1) } "nt-syntax-bad-string-01" in { fail(""" "abc' .""".stripMargin,0) //we get an eof before the end of the string } "nt-syntax-bad-string-02" in { fail(""" 1.0 .""".stripMargin,1) } "nt-syntax-bad-string-03" in { fail(""" 1.0e1 .""".stripMargin,1) } "nt-syntax-bad-string-04" in { fail(""" '''abc''' .""".stripMargin,1) } "nt-syntax-bad-string-05" in { fail(""" ""\"abc\""\" .""".stripMargin,1) } "nt-syntax-bad-string-06" in { fail(""" "abc .""".stripMargin,0) // we get an eof before the end of the string } "nt-syntax-bad-string-07" in { fail(""" abc" .""".stripMargin,1) } "nt-syntax-bad-num-01" in { fail(""" 1 .""".stripMargin,1) } "nt-syntax-bad-num-02" in { fail(""" 1.0 .""".stripMargin,1) } "nt-syntax-bad-num-03" in { fail(""" 1.0e0 .""".stripMargin,1) } } /** * Usefull method for parsing large files to do speed tests * see: Data Set RDF Dumps * @param args path_to_NTriplesFile [encoding] */ def main(args: Array[String]): Unit = { import java.io._ val encoding = if (args.length > 2) args(1) else "UTF-8" val ntp = new NTriplesParser[Rdf]( new InputStreamReader( new FileInputStream( new File(args(0))), encoding), true) val t1 = System.currentTimeMillis() var x = 0 var failures = 0 while (ntp.hasNext) { val t = ntp.next x = x + 1 if (t.isFailure) { println(s"\r\ntriple=$t") failures = failures + 1 } } val t2 = System.currentTimeMillis() println(s"time to parse $x triples was ${t2 - t1} milliseconds. Found $failures failures. ") } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy