org.chelona.Main.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of chelona_2.12 Show documentation
Show all versions of chelona_2.12 Show documentation
W3C RDF 1.1 Turtle-, TriG-, Quad- and NTriples-Parser
The newest version!
import java.io.{ StringWriter, Writer }
import java.nio.charset.StandardCharsets
import org.chelona.GetCmdLineArgs.{ Config, argsParser }
import org.chelona.NQuadMain.args
import org.chelona._
import org.parboiled2.{ ParseError, ParserInput }
import scala.io.BufferedSource
import scala.util.{ Failure, Success, Try }
object Main extends App with RDFTurtleOutput {
val cmdLineArgs = argsParser.parse(args, Config())
if (cmdLineArgs.isEmpty) {
sys.exit(1)
}
if (cmdLineArgs.get.version) {
System.err.println(chelona_version)
sys.exit(2)
}
val file = cmdLineArgs.get.file.head
val validate = cmdLineArgs.get.validate
val verbose = cmdLineArgs.get.verbose
val fmt = cmdLineArgs.get.fmt
println("FMT=" + fmt.mkString)
if (verbose) {
System.err.println((if (!validate) "Convert: " else "Validate: ") + file.getCanonicalPath)
}
val in =
"""@base .
@prefix rdf: .
@prefix rdfs: .
@prefix foaf: .
@prefix rel: .
<#green-goblin> rel:enemyOf <#spiderman> ;
a foaf:Person ; # in the context of the Marvel universe
foaf:name 'Green Goblin' ;
foaf:mail '[email protected]' .
<#spiderman>
rel:enemyOf <#green-goblin> ;
a foaf:Person ;
foaf:name 'Spiderman', 'Человек-паук'@ru ."""
val output = new StringWriter()
/*
def turtleHTMLWriter(bo: Writer)(triple: List[RDFReturnType]): Int = {
def formatter(token: String, `type`: Int) = {
if (TurtleBitValue.isIRIREF(`type`))
"<" + token.substring(1, token.length - 1) + ">"
else
token
}
triple.map {
case TurtleTriple(s, p, o) ⇒ {
val subject = formatter(s.text, s.tokenType)
val predicate = formatter(p.text, p.tokenType)
val `object` = formatter(o.text, o.tokenType)
bo.write(subject + " " + predicate + " " + `object` + " .\n")
}
}.length
}
*/
System.out.println("Jetzt geht's los...")
val inputfile: Try[BufferedSource] = Try { io.Source.fromFile("./tmp.nt")(StandardCharsets.UTF_8) }
if (inputfile.isFailure) {
System.err.println("Error: " + inputfile.failed.get)
sys.exit(3)
}
/*
val input = inputfile.get
val iterator = input.getLines()
var count = 0
while (iterator.hasNext && count < 100) {
val l = iterator.take(3)
val s = l.toStream
count += l.length
System.out.println("--" + count + "+++" + s.toString())
}
input.close()
System.out.println("LINES = " + count)
sys.exit(-1)
*/
//lazy val input: ParserInput = inputfile.get.mkString
object myCount { var counter = 0 }
def tripleCounter(counterObject: myCount.type)(s: Term, p: Term, o: Term, g: Term): Int = {
if ((myCount.counter % 1000000) == 0) System.out.println("COUNTER=" + myCount.counter)
myCount.counter += 1
1
}
/* AST evaluation procedure. Here is the point to provide your own flavour, if you like. */
val writer = tripleCounter(myCount)_
val parser = NTriplesParser.parseAll("./tmp.nt", inputfile.get, writer, false, "", "", true, false, 25000000)
println("Counted triples = " + myCount.counter)
/*
parser.ntriplesDoc.run() match {
case Success(tripleCount) ⇒
System.err.println("Input converted to " + tripleCount + " triples.")
println("Counted triples = " + myCount.counter)
case Failure(e: ParseError) ⇒ System.err.println("Unexpected error during parsing run: " + parser.formatError(e))
case Failure(e) ⇒ System.err.println("Unexpected error during parsing run: " + e)
}*/
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy