![JAR search and dependency download from the Maven repository](/logo.png)
org.culturegraph.mf.mediawiki.WikiTextParser Maven / Gradle / Ivy
/*
* Copyright 2013 Deutsche Nationalbibliothek
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.culturegraph.mf.mediawiki;
import java.io.IOException;
import javax.xml.bind.JAXBException;
import org.culturegraph.mf.framework.MetafactureException;
import org.culturegraph.mf.framework.ObjectReceiver;
import org.culturegraph.mf.framework.annotations.Description;
import org.culturegraph.mf.framework.annotations.In;
import org.culturegraph.mf.framework.annotations.Out;
import org.culturegraph.mf.framework.helpers.DefaultObjectPipe;
import org.culturegraph.mf.mediawiki.objects.WikiPage;
import org.sweble.wikitext.engine.CompiledPage;
import org.sweble.wikitext.engine.Compiler;
import org.sweble.wikitext.engine.CompilerException;
import org.sweble.wikitext.engine.PageId;
import org.sweble.wikitext.engine.PageTitle;
import org.sweble.wikitext.engine.config.WikiConfigurationInterface;
import org.sweble.wikitext.engine.utils.SimpleWikiConfiguration;
import org.sweble.wikitext.lazy.LinkTargetException;
/**
* Parses a wiki page and adds the AST to the {@link WikiPage} object.
*
* The parser does not support expansion (inclusion of template contents)
* based on the assumption that the page should already contain all
* information. Templates can only transform this information into a
* different form but cannot add new knowledge which is not somehow
* already part of the page.
*
* @author Christoph Böhme
*
*/
@Description("Parses a wiki page and adds the AST to the {@link WikiPage} object.")
@In(WikiPage.class)
@Out(WikiPage.class)
public final class WikiTextParser
extends DefaultObjectPipe> {
/**
* Processing levels of the Wiki parser. See http://sweble.org/downloads/diwp-preprint.pdf
* for an explanation of the different levels.
*/
public enum ParseLevel { PREPROCESS, PARSE, POSTPROCESS }
public static final String DEFAULT_CONFIG = "classpath:/org/sweble/wikitext/engine/SimpleWikiConfiguration.xml";
private ParseLevel parseLevel = ParseLevel.PREPROCESS;
private final WikiConfigurationInterface config;
private final Compiler compiler;
public WikiTextParser() throws IOException {
this(DEFAULT_CONFIG);
}
public WikiTextParser(final String configFile) throws IOException {
super();
try {
config = new SimpleWikiConfiguration(configFile);
} catch (JAXBException e) {
throw new MetafactureException(e);
}
compiler = new Compiler(config);
}
public ParseLevel getParseLevel() {
return parseLevel;
}
public void setParseLevel(final ParseLevel parseLevel) {
this.parseLevel = parseLevel;
}
@Override
public void process(final WikiPage page) {
final CompiledPage compiledPage;
try {
final PageTitle pageTitle = PageTitle.make(config, page.getTitle());
final PageId pageId = new PageId(pageTitle, page.getRevisionId());
switch(parseLevel) {
case PREPROCESS:
compiledPage = compiler.preprocess(pageId, page.getWikiText(),
false, null);
break;
case PARSE:
compiledPage = compiler.parse(pageId, page.getWikiText(), null);
break;
case POSTPROCESS:
compiledPage = compiler.postprocess(pageId, page.getWikiText(), null);
break;
default:
throw new AssertionError("Illegal value for parseLevel");
}
} catch (LinkTargetException e) {
throw new MetafactureException(e);
} catch (CompilerException e) {
throw new MetafactureException(e);
}
page.setWikiAst(compiledPage);
page.setParseLevel(parseLevel);
getReceiver().process(page);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy