![JAR search and dependency download from the Maven repository](/logo.png)
eu.fbk.twm.wiki.xmldump.WikiDataClassExtractor Maven / Gradle / Ivy
/*
* Copyright (2013) Fondazione Bruno Kessler (http://www.fbk.eu/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.fbk.twm.wiki.xmldump;
import eu.fbk.twm.utils.CommandLineWithLogger;
import eu.fbk.twm.utils.Defaults;
import eu.fbk.twm.utils.ExtractorParameters;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.OptionBuilder;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WikiDataClassExtractor extends AbstractWikipediaExtractor {
Pattern q = Pattern.compile("^Q([0-9]+)$");
private String outputFile;
BufferedWriter writer = null;
HashMap types = new HashMap();
Logger logger = Logger.getLogger(WikiDataClassExtractor.class.getName());
public WikiDataClassExtractor(int numThreads, int numPages, Locale locale) {
super(numThreads, numPages, locale);
types.put(279, "subclass_of");
types.put(361, "part_of");
types.put(31, "instance_of");
types.put(646, "freebase");
types.put(508, "bncf");
logger.info(types);
}
public void start(String fileName, String outputFile) {
this.outputFile = outputFile;
try {
writer = new BufferedWriter(new FileWriter(outputFile));
} catch (Exception e) {
logger.error(e.getMessage());
return;
}
startProcess(fileName);
}
@Override
public void start(ExtractorParameters extractorParameters) {
//To change body of implemented methods use File | Settings | File Templates.
}
@Override
public void disambiguationPage(String text, String title, int wikiID) {
//To change body of implemented methods use File | Settings | File Templates.
}
@Override
public void categoryPage(String text, String title, int wikiID) {
//To change body of implemented methods use File | Settings | File Templates.
}
@Override
public void templatePage(String text, String title, int wikiID) {
//To change body of implemented methods use File | Settings | File Templates.
}
@Override
public void redirectPage(String text, String title, int wikiID) {
//To change body of implemented methods use File | Settings | File Templates.
}
@Override
public void portalPage(String text, String title, int wikiID) {
//To change body of implemented methods use File | Settings | File Templates.
}
@Override
public void projectPage(String text, String title, int wikiID) {
//To change body of implemented methods use File | Settings | File Templates.
}
@Override
public void filePage(String text, String title, int wikiID) {
//To change body of implemented methods use File | Settings | File Templates.
}
@Override
public void contentPage(String text, String title, int wikiID) {
Matcher m = q.matcher(title);
if (!m.find()) {
logger.trace("Invalid title: " + title);
return;
}
String id = m.group(1);
logger.trace("ID: " + id);
ObjectMapper mapper = new ObjectMapper();
Map pageData = null;
try {
pageData = mapper.readValue(text, Map.class);
} catch (Exception ignored) {
return;
}
StringBuffer sb = new StringBuffer();
ArrayList
© 2015 - 2025 Weber Informatics LLC | Privacy Policy