All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dbpedia.extraction.server.resources.Extraction.scala Maven / Gradle / Ivy

package org.dbpedia.extraction.server.resources

import java.net.{URL, URI}
import org.dbpedia.extraction.destinations.formatters.{RDFJSONFormatter, TerseFormatter}
import org.dbpedia.extraction.util.Language
import javax.ws.rs._
import javax.ws.rs.core.{MediaType, Response}
import java.util.logging.{Logger,Level}
import scala.xml.Elem
import scala.io.{Source,Codec}
import org.dbpedia.extraction.server.Server
import org.dbpedia.extraction.wikiparser.WikiTitle
import org.dbpedia.extraction.destinations.{DeduplicatingDestination, WriterDestination}
import org.dbpedia.extraction.sources.{XMLSource, WikiSource}
import stylesheets.TriX
import java.io.StringWriter

object Extraction
{
    private val logger = Logger.getLogger(getClass.getName)
      
    val lines : Map[String, String] = {
        val file = "/extractionPageTitles.txt"
        try {
            // ugly - returns null if file not found, which leads to NPE later
            val in = getClass.getResourceAsStream(file)
            try {
                val titles = 
                  for (line <- Source.fromInputStream(in)(Codec.UTF8).getLines 
                    if line.startsWith("[[") && line.endsWith("]]") && line.contains(':')
                  ) yield {
                    val colon = line.indexOf(':')
                    (line.substring(2, colon), line.substring(colon + 1, line.length - 2))
                  }
                titles.toMap
            } 
            finally in.close
        }
        catch { 
            case e : Exception =>
                logger.log(Level.WARNING, "could not load extraction page titles from classpath resource "+file, e)
                Map()
        }
    }
}

/**
 * TODO: merge Extraction.scala and Mappings.scala
 */
@Path("/extraction/{lang}/")
class Extraction(@PathParam("lang") langCode : String)
{
    private val language = Language.getOrElse(langCode, throw new WebApplicationException(new Exception("invalid language "+langCode), 404))

    if(!Server.instance.managers.contains(language))
        throw new WebApplicationException(new Exception("language "+langCode+" not configured in server"), 404)
    
    private def getTitle : String = Extraction.lines.getOrElse(langCode, "Berlin")

    @GET
    @Produces(Array("application/xhtml+xml"))
    def get = 
    {
       
         {ServerHeader.getHeader("Extractor a page")}
         
           

Extract a page

Page title

Revision ID (optional, overrides title)

Output format


} /** * Extracts a MediaWiki article */ @GET @Path("extract") def extract(@QueryParam("title") title: String, @QueryParam("revid") @DefaultValue("-1") revid: Long, @QueryParam("format") format: String, @QueryParam("extractors") extractors: String) : Response = { if (title == null && revid < 0) throw new WebApplicationException(new Exception("title or revid must be given"), Response.Status.NOT_FOUND) val writer = new StringWriter val formatter = format match { case "turtle-triples" => new TerseFormatter(false, true) case "turtle-quads" => new TerseFormatter(true, true) case "n-triples" => new TerseFormatter(false, false) case "n-quads" => new TerseFormatter(true, false) case "rdf-json" => new RDFJSONFormatter() case _ => TriX.writeHeader(writer, 2) } val customExtraction = extractors match { case "mappings" => false case "custom" => true case _ => false } val source = if (revid >= 0) WikiSource.fromRevisionIDs(List(revid), new URL(language.apiUri), language) else WikiSource.fromTitles(List(WikiTitle.parse(title, language)), new URL(language.apiUri), language) // See https://github.com/dbpedia/extraction-framework/issues/144 // We should mimic the extraction framework behavior val destination = new DeduplicatingDestination(new WriterDestination(() => writer, formatter)) Server.instance.extractor.extract(source, destination, language, customExtraction) Response.ok(writer.toString).`type`(selectContentType(format)).build() } private def selectContentType(format: String): String = { format match { case "trix" => MediaType.APPLICATION_XML case _ => MediaType.TEXT_PLAIN } } /** * Extracts a MediaWiki article */ @POST @Path("extract") @Consumes(Array("application/xml")) @Produces(Array("application/xml")) def extract(xml : Elem) = { val writer = new StringWriter val formatter = TriX.writeHeader(writer, 2) val source = XMLSource.fromXML(xml, language) val destination = new WriterDestination(() => writer, formatter) Server.instance.extractor.extract(source, destination, language) writer.toString } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy