
eu.fbk.twm.utils.ExtractorParameters Maven / Gradle / Ivy
The newest version!
/*
* Copyright (2013) Fondazione Bruno Kessler (http://www.fbk.eu/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.fbk.twm.utils;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import java.io.File;
import java.util.HashMap;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created with IntelliJ IDEA.
* User: giuliano
* Date: 1/21/13
* Time: 9:29 AM
* To change this template use File | Settings | File Templates.
*/
public class ExtractorParameters {
/**
* Define a static logger variable so that it references the
* Logger instance named ExtractorParameters
.
*/
static Logger logger = Logger.getLogger(ExtractorParameters.class.getName());
public static final String OUTPUT_EXT = ".csv";
String version;
String lang;
public String getWikipediaPageTopicsIndexName() {
return wikipediaPageTopicsIndexName;
}
public String getWikipediaPageTopicsFileName() {
return wikipediaPageTopicsFileName;
}
String wikipediaXmlFileName;
String extractionOutputDirName;
public String getExtractionOutputDirName() {
return extractionOutputDirName;
}
String wikipediaDirName;
String wikipediaOutputFilePrefixName;
String wikipediaRedirFileName;
String wikipediaRedirIndexName;
public String getWikipediaRedirIndexName() {
return wikipediaRedirIndexName;
}
public String getWikipediaPageCategoryMainSortedCategoryFileName() {
return wikipediaPageCategoryMainSortedCategoryFileName;
}
public String getWikipediaPageCategoryMainFileName() {
return wikipediaPageCategoryMainFileName;
}
String wikipediaDisambiguationFileName;
String wikipediaDisambiguationIndexName;
String wikipediaAnalysisFileName;
String wikipediaTitleIdFileName;
String wikipediaSeeAlsoFileName;
String wikipediaSeeAlsoIndexName;
String commonsFileName;
String wikipediaFileName;
String wikipediaFileSourceName;
String wikipediaPageFreqIndexName;
String wikipediaPageTrafficFileName;
String wikipediaPersonInfoFileName;
String wikipediaPersonInfoIndexName;
String wikipediaOutgoingFileName;
String wikipediaPageCategoryPrefix;
String wikipediaPageTopCategoryIndexName;
String wikipediaPageCategoryFileName;
String wikipediaPageCategoryXmlIndex;
public String getWikipediaPageCategoryXmlIndex() {
return wikipediaPageCategoryXmlIndex;
}
String wikipediaPageCategoryMainFileName;
String wikipediaPageCategoryMainSortedCategoryFileName;
String wikipediaPageCategoryIndexName;
String wikipediaPageTopCategoryFileName;
String wikipediaCategorySuperCategoryIndexName;
String wikipediaCategorySubCategoryIndexName;
String wikipediaCategoryPageIndexName;
String wikipediaFormIdFileName;
HashMap wikipediaCategoryFileNames = new HashMap();
String wikipediaCategorySuperCategoryFileName;
String wikipediaCategorySubCategoryFileName;
String wikipediaCategoryPageFileName;
String wikipediaCrossLanguageLinkFileName;
String wikipediaTemplateFilePrefixName;
HashMap wikipediaTemplateFileNames = new HashMap();
String wikipediaInfoboxFileName;
String wikipediaFirstNameFileName;
String wikipediaExampleFileName;
//WikipediaSectionExtractor filenames
String wikipediaSectionFileName;
String wikipediaPageSectionFreqFileName;
String wikipediaSectionFreqFileName;
String wikipediaSectionTitlePrefix;
String wikipediaSectionTitleFileName;
HashMap wikipediaSectionTitleFileNames = new HashMap();
String wikipediaSortedPageFileName;
String wikipediaSortedFormFileName;
String wikipediaIncomingFileName;
String wikipediaFormFreqFileName;
String wikipediaPageFreqFileName;
String wikipediaTextFileName;
String wikipediaNGramFileName;
String wikipediaVectorFileName;
String wikipediaDBPediaFileName;
String wikipediaFilteredExampleFileName;
String wikipediaFirstNameIndexName;
String wikipediaPageFormIndexName;
String wikipediaFormPageIndexName;
String wikipediaNGramIndexName;
String wikipediaTypeIndexName;
String wikipediaCrossLanguageLinkIndexName;
String extractionLogFileName;
String preprocessingLogFileName;
String sortingLogFileName;
String indexingLogFileName;
String wikipediaVectorIndexName;
String vectorLogFileName;
String outgoingLogFileName;
String wikipediaContentPageFileName;
String wikipediaOutgoingIndexName;
String wikipediaIncomingIndexName;
String wikipediaIncomingOutgoingFileName;
String wikipediaIncomingOutgoingWeightedIndexName;
String wikipediaIncomingOutgoingIndexName;
String incomingOutgoingLogFileName;
String wikipediaSortedIncomingOutgoingFileName;
String wikipediaUnigramFileName;
String oneExamplePerSenseFileName;
String oneExamplePerSenseIndexName;
String wikipediaTextIndexName;
String wikipediaFileSourceIndexName;
String wikipediaPageAirPediaClassIndexName;
String wikipediaAbstractIndexName;
String wikipediaAbstractFileName;
String wikipediaFirstSentenceFileName;
String wikipediaFirstSentenceIndexName;
public String getWikipediaFirstSentenceFileName() {
return wikipediaFirstSentenceFileName;
}
public String getWikipediaFirstSentenceIndexName() {
return wikipediaFirstSentenceIndexName;
}
String wikipediaPageAllCategoryFileName;
String wikipediaPagePerCategoryCountFileName;
String wikipediaNomFileName;
String wikipediaCategoryFileName;
String wikipediaPageTypeFileName;
String wikipediaPageTypeIndexName;
String wikipediaPageNavigationTemplateIndexName;
String wikipediaPageNavigationTemplateFileName;
String wikipediaPagePortalIndexName;
String wikipediaPagePortalFileName;
String wikipediaPageTopicsIndexName;
String wikipediaPageTopicsFileName;
String wikipediaDBPediaClassesIndexName;
String wikipediaNamNomIndexName;
String wikipediaAirpedia2IndexName;
String wikipediCategoryIndexFileName;
String wikipediaSortedPagePerCategoryCountFileName;
String wikipediaPageAllCategoryIndexName;
public String getWikipediaAirpedia2IndexName() {
return wikipediaAirpedia2IndexName;
}
public String getWikipediaNamNomIndexName() {
return wikipediaNamNomIndexName;
}
public String getWikipediaPageNavigationTemplateIndexName() {
return wikipediaPageNavigationTemplateIndexName;
}
public String getWikipediaPageNavigationTemplateFileName() {
return wikipediaPageNavigationTemplateFileName;
}
public String getWikipediaPagePortalIndexName() {
return wikipediaPagePortalIndexName;
}
public String getWikipediaPagePortalFileName() {
return wikipediaPagePortalFileName;
}
Locale locale;
public HashMap getWikipediaSectionTitleFileNames() {
return wikipediaSectionTitleFileNames;
}
public HashMap getWikipediaTemplateFileNames() {
return wikipediaTemplateFileNames;
}
public HashMap getWikipediaCategoryFileNames() {
return wikipediaCategoryFileNames;
}
protected void parseXmlFileName(String wikipediaXmlFileName) {
if (this.wikipediaXmlFileName == null) {
this.wikipediaXmlFileName = wikipediaXmlFileName;
logger.debug(wikipediaXmlFileName);
File wikipediaXmlFile = new File(wikipediaXmlFileName);
wikipediaDirName = wikipediaXmlFile.getParent();
String name = wikipediaXmlFile.getName();
//[lang]wiki-[version]-pages-articles.xml
logger.debug(name);
Pattern pattern = Pattern.compile("(\\w+)wiki-(\\d+)-pages-articles.xml");
Matcher matcher = pattern.matcher(name);
if (matcher.find()) {
lang = name.substring(matcher.start(1), matcher.end(1));
locale = new Locale(lang);
version = name.substring(matcher.start(2), matcher.end(2));
}
}
}
public ExtractorParameters(String wikipediaXmlFileName, String baseDir, boolean useAsBaseDir) {
parseXmlFileName(wikipediaXmlFileName);
if (!baseDir.endsWith(File.separator)) {
baseDir += File.separator;
}
if (useAsBaseDir) {
setNames(baseDir + lang + File.separator + version);
}
else {
setNames(baseDir);
}
}
public ExtractorParameters(String wikipediaXmlFileName, String extractionOutputDirName) {
this(wikipediaXmlFileName, extractionOutputDirName, false);
}
/*public ExtractorParameters(String wikipediaXmlFileName) {
this(wikipediaXmlFileName, System.getProperty("user.dir"), false);
}*/
public String getWikipediaDBPediaClassesIndexName() {
return wikipediaDBPediaClassesIndexName;
}
public String getWikipediaDisambiguationIndexName() {
return wikipediaDisambiguationIndexName;
}
public void setWikipediaDisambiguationIndexName(String wikipediaDisambiguationIndexName) {
this.wikipediaDisambiguationIndexName = wikipediaDisambiguationIndexName;
}
protected void setNames(String extractionOutputDirName) {
this.extractionOutputDirName = extractionOutputDirName;
if (!this.extractionOutputDirName.endsWith(File.separator)) {
this.extractionOutputDirName += File.separator;
}
wikipediaOutputFilePrefixName = this.extractionOutputDirName + lang + "wiki-" + version + '-';
wikipediaAnalysisFileName = wikipediaOutputFilePrefixName + "analysis.csv";
wikipediaRedirFileName = wikipediaOutputFilePrefixName + "redirect.csv";
wikipediaRedirIndexName = wikipediaOutputFilePrefixName + "redirect-index";
wikipediaDisambiguationFileName = wikipediaOutputFilePrefixName + "disambiguation.csv";
wikipediaDisambiguationIndexName = wikipediaOutputFilePrefixName + "disambiguation-index-raw";
wikipediaTitleIdFileName = wikipediaOutputFilePrefixName + "title-id.csv";
wikipediaPageTrafficFileName = wikipediaOutputFilePrefixName + "page-traffic.csv";
wikipediaContentPageFileName = wikipediaOutputFilePrefixName + "content-page.csv";
wikipediaTextFileName = wikipediaOutputFilePrefixName + "text.csv";
wikipediaVectorFileName = wikipediaOutputFilePrefixName + "vector.csv";
wikipediaVectorIndexName = wikipediaOutputFilePrefixName + "page-vector-index";
wikipediaPersonInfoFileName = wikipediaOutputFilePrefixName + "person-info.csv";
wikipediaPersonInfoIndexName = wikipediaOutputFilePrefixName + "person-info-index";
wikipediaFirstNameFileName = wikipediaOutputFilePrefixName + "first-name.csv";
wikipediaOutgoingFileName = wikipediaOutputFilePrefixName + "outgoing.csv";
wikipediaFileName = wikipediaOutputFilePrefixName + "file.csv";
commonsFileName = wikipediaOutputFilePrefixName + "file.csv";
wikipediaAbstractFileName = wikipediaOutputFilePrefixName + "abstract.csv";
wikipediaFirstSentenceFileName = wikipediaOutputFilePrefixName + "links-first-sentence.csv";
wikipediaSeeAlsoFileName = wikipediaOutputFilePrefixName + "page-see-also.csv";
wikipediaSeeAlsoIndexName = wikipediaOutputFilePrefixName + "page-see-also-index";
wikipediaAbstractIndexName = wikipediaOutputFilePrefixName + "abstract-index";
wikipediaFirstSentenceIndexName = wikipediaOutputFilePrefixName + "links-first-sentence-index";
wikipediaFileSourceName = wikipediaOutputFilePrefixName + "page-file-source.csv";
wikipediaFileSourceIndexName = wikipediaOutputFilePrefixName + "page-file-source-index";
wikipediaIncomingOutgoingFileName = wikipediaOutputFilePrefixName + "incoming-outgoing.csv";
wikipediaSortedIncomingOutgoingFileName = wikipediaOutputFilePrefixName + "sorted-incoming-outgoing.csv";
wikipediaPageCategoryPrefix = wikipediaOutputFilePrefixName + "page-category";
wikipediaPageTopCategoryFileName = wikipediaOutputFilePrefixName + "page-top-category.csv";
wikipediaPageAllCategoryFileName = wikipediaOutputFilePrefixName + "page-all-category.csv";
wikipediaPageAllCategoryIndexName = wikipediaOutputFilePrefixName + "page-all-category-index";
wikipediaPagePerCategoryCountFileName = wikipediaOutputFilePrefixName + "page-per-category-count.csv";
wikipediaSortedPagePerCategoryCountFileName = wikipediaOutputFilePrefixName + "sorted-page-per-category-count.csv";
wikipediCategoryIndexFileName = wikipediaOutputFilePrefixName + "category-index.csv";
wikipediaPageTopCategoryIndexName = wikipediaOutputFilePrefixName + "page-top-category-index";
wikipediaPageCategoryFileName = wikipediaPageCategoryPrefix + ".csv";
wikipediaPageCategoryIndexName = wikipediaOutputFilePrefixName + "page-category-index";
wikipediaPageCategoryMainFileName = wikipediaPageCategoryPrefix + "-main.csv";
wikipediaPageCategoryMainSortedCategoryFileName = wikipediaPageCategoryPrefix + "-main-sorted-category.csv";
wikipediaPageCategoryXmlIndex = wikipediaPageCategoryPrefix + "-xml-index-raw";
wikipediaCategorySuperCategoryFileName = wikipediaOutputFilePrefixName + "category-super-category.csv";
wikipediaCategorySuperCategoryIndexName = wikipediaOutputFilePrefixName + "category-super-category-index";
wikipediaCategorySubCategoryFileName = wikipediaOutputFilePrefixName + "category-sub-category.csv";
wikipediaCategorySubCategoryIndexName = wikipediaOutputFilePrefixName + "category-sub-category-index";
wikipediaCategoryPageFileName = wikipediaOutputFilePrefixName + "category-page.csv";
wikipediaCategoryPageIndexName = wikipediaOutputFilePrefixName + "category-page-index";
wikipediaCategoryFileName = wikipediaOutputFilePrefixName + "category.csv";
wikipediaCrossLanguageLinkFileName = wikipediaOutputFilePrefixName + "cross-lang.csv";
wikipediaTemplateFilePrefixName = wikipediaOutputFilePrefixName + "template-";
wikipediaExampleFileName = wikipediaOutputFilePrefixName + "example.csv";
wikipediaFilteredExampleFileName = wikipediaOutputFilePrefixName + "filtered-example.csv";
wikipediaSortedPageFileName = wikipediaOutputFilePrefixName + "sorted-page.csv";
wikipediaNomFileName = wikipediaOutputFilePrefixName + "nom.csv";
wikipediaSortedFormFileName = wikipediaOutputFilePrefixName + "sorted-form.csv";
//WikipediaSectionExtractor filenames
wikipediaSectionFileName = wikipediaOutputFilePrefixName + "section-text" + OUTPUT_EXT;
wikipediaPageSectionFreqFileName = wikipediaOutputFilePrefixName + "page-section-freq" + OUTPUT_EXT;
wikipediaSectionFreqFileName = wikipediaOutputFilePrefixName + "section-freq" + OUTPUT_EXT;
wikipediaSectionTitlePrefix = wikipediaOutputFilePrefixName + "section-title";
wikipediaSectionTitleFileName = wikipediaSectionTitlePrefix + ".csv";
wikipediaIncomingFileName = wikipediaOutputFilePrefixName + "incoming.csv";
wikipediaFormFreqFileName = wikipediaOutputFilePrefixName + "form-freq.csv";
wikipediaPageFreqFileName = wikipediaOutputFilePrefixName + "page-freq.csv";
wikipediaPageFreqIndexName = wikipediaOutputFilePrefixName + "page-freq-index";
wikipediaFormIdFileName = wikipediaOutputFilePrefixName + "form-id.csv";
wikipediaInfoboxFileName = wikipediaOutputFilePrefixName + "infobox.csv";
wikipediaDBPediaFileName = wikipediaOutputFilePrefixName + "dbpedia.csv";
wikipediaPageFormIndexName = wikipediaOutputFilePrefixName + "page-form-index";
wikipediaFirstNameIndexName = wikipediaOutputFilePrefixName + "first-name-index";
wikipediaFormPageIndexName = wikipediaOutputFilePrefixName + "form-page-index";
wikipediaOutgoingIndexName = wikipediaOutputFilePrefixName + "outgoing-index";
wikipediaIncomingIndexName = wikipediaOutputFilePrefixName + "incoming-index";
wikipediaIncomingOutgoingIndexName = wikipediaOutputFilePrefixName + "incoming-outgoing-index";
wikipediaIncomingOutgoingWeightedIndexName = wikipediaOutputFilePrefixName + "incoming-outgoing-weighted-index";
wikipediaNGramFileName = wikipediaOutputFilePrefixName + "ngram.csv";
wikipediaUnigramFileName = wikipediaOutputFilePrefixName + "unigram.csv";
wikipediaNGramIndexName = wikipediaOutputFilePrefixName + "ngram-index";
wikipediaTypeIndexName = wikipediaOutputFilePrefixName + "type-index";
wikipediaPageTypeIndexName = wikipediaOutputFilePrefixName + "page-type-index";
wikipediaPageTypeFileName = wikipediaOutputFilePrefixName + "page-type.csv";
preprocessingLogFileName = wikipediaOutputFilePrefixName + "preprocessing.log";
vectorLogFileName = wikipediaOutputFilePrefixName + "lsa.log";
extractionLogFileName = wikipediaOutputFilePrefixName + "extraction.log";
sortingLogFileName = wikipediaOutputFilePrefixName + "sorting.log";
indexingLogFileName = wikipediaOutputFilePrefixName + "indexing.log";
outgoingLogFileName = wikipediaOutputFilePrefixName + "outgoing.log";
incomingOutgoingLogFileName = wikipediaOutputFilePrefixName + "incoming-outgoing.log";
oneExamplePerSenseFileName = wikipediaOutputFilePrefixName + "one-example-per-sense.csv";
oneExamplePerSenseIndexName = wikipediaOutputFilePrefixName + "one-example-per-sense-index";
wikipediaTextIndexName = wikipediaOutputFilePrefixName + "text-index";
wikipediaPageAirPediaClassIndexName = wikipediaOutputFilePrefixName + "airpedia-class-index";
wikipediaTemplateFileNames.put("name", wikipediaTemplateFilePrefixName + "name.csv");
wikipediaTemplateFileNames.put("freq", wikipediaTemplateFilePrefixName + "freq.csv");
wikipediaTemplateFileNames.put("map", wikipediaTemplateFilePrefixName + "map.csv");
wikipediaTemplateFileNames.put("map-rep", wikipediaTemplateFilePrefixName + "map-rep.csv");
wikipediaTemplateFileNames.put("map-prop", wikipediaTemplateFilePrefixName + "map-prop.csv");
wikipediaTemplateFileNames.put("complete", wikipediaTemplateFilePrefixName + "complete.csv");
wikipediaTemplateFileNames.put("good", wikipediaTemplateFilePrefixName + "good.csv");
wikipediaTemplateFileNames.put("pruned", wikipediaTemplateFilePrefixName + "pruned.csv");
wikipediaTemplateFileNames.put("pruned-s-page", wikipediaTemplateFilePrefixName + "pruned-s-page.csv");
wikipediaTemplateFileNames.put("pruned-s-tpl", wikipediaTemplateFilePrefixName + "pruned-s-tpl.csv");
wikipediaTemplateFileNames.put("index-p2t", wikipediaTemplateFilePrefixName + "index-p2t");
wikipediaTemplateFileNames.put("index-t2p", wikipediaTemplateFilePrefixName + "index-t2p");
wikipediaTemplateFileNames.put("index-id", wikipediaTemplateFilePrefixName + "index-id");
wikipediaTemplateFileNames.put("properties", wikipediaTemplateFilePrefixName + "properties-index");
wikipediaTemplateFileNames.put("infoboxes", wikipediaTemplateFilePrefixName + "infoboxes.csv");
wikipediaTemplateFileNames.put("navigation", wikipediaTemplateFilePrefixName + "navigation.csv");
wikipediaPagePortalIndexName = wikipediaOutputFilePrefixName + "page-portal-index";
wikipediaPageNavigationTemplateIndexName = wikipediaOutputFilePrefixName + "page-navigation-index";
wikipediaPagePortalFileName = wikipediaOutputFilePrefixName + "page-portal.csv";
wikipediaPageNavigationTemplateFileName = wikipediaOutputFilePrefixName + "page-navigation.csv";
wikipediaDBPediaClassesIndexName = wikipediaOutputFilePrefixName + "page-dbpediaclass-index";
// wikipediaTemplateFileNames.put("portals", wikipediaTemplateFilePrefixName + "portals.csv");
// wikipediaTemplateFileNames.put("page-navigation", wikipediaTemplateFilePrefixName + "page-navigation.csv");
// wikipediaTemplateFileNames.put("index-portal", wikipediaTemplateFilePrefixName + "index-portal");
// wikipediaTemplateFileNames.put("index-navigation", wikipediaTemplateFilePrefixName + "index-navigation");
wikipediaCategoryFileNames.put("s-cat", wikipediaPageCategoryPrefix + "-s-cat.csv");
wikipediaCategoryFileNames.put("index-p2c", wikipediaPageCategoryPrefix + "-index-p2c");
wikipediaCategoryFileNames.put("index-c2p", wikipediaPageCategoryPrefix + "-index-c2p");
wikipediaCategoryFileNames.put("index-id", wikipediaPageCategoryPrefix + "-index-id");
wikipediaCategoryFileNames.put("tokens", wikipediaPageCategoryPrefix + "-tokens.csv");
wikipediaCategoryFileNames.put("tokens-s-tok", wikipediaPageCategoryPrefix + "-tokens-s-tok.csv");
wikipediaCategoryFileNames.put("tokens-index-p2k", wikipediaPageCategoryPrefix + "-tokens-index-p2k");
wikipediaCategoryFileNames.put("tokens-index-k2p", wikipediaPageCategoryPrefix + "-tokens-index-k2p");
wikipediaCategoryFileNames.put("tokens-index-id", wikipediaPageCategoryPrefix + "-tokens-index-id");
wikipediaSectionTitleFileNames.put("s-sec", wikipediaSectionTitlePrefix + "-s-sec.csv");
wikipediaSectionTitleFileNames.put("index-p2s", wikipediaSectionTitlePrefix + "-index-p2s");
wikipediaSectionTitleFileNames.put("index-s2p", wikipediaSectionTitlePrefix + "-index-s2p");
wikipediaSectionTitleFileNames.put("index-id", wikipediaSectionTitlePrefix + "-index-id");
wikipediaSectionTitleFileNames.put("tokens", wikipediaSectionTitlePrefix + "-tokens.csv");
wikipediaSectionTitleFileNames.put("tokens-s-tok", wikipediaSectionTitlePrefix + "-tokens-s-tok.csv");
wikipediaSectionTitleFileNames.put("tokens-index-p2k", wikipediaSectionTitlePrefix + "-tokens-index-p2k");
wikipediaSectionTitleFileNames.put("tokens-index-k2p", wikipediaSectionTitlePrefix + "-tokens-index-k2p");
wikipediaSectionTitleFileNames.put("tokens-index-id", wikipediaSectionTitlePrefix + "-tokens-index-id");
// Links to global models
wikipediaNamNomIndexName = wikipediaOutputFilePrefixName + "page-namnom-index";
wikipediaAirpedia2IndexName = wikipediaOutputFilePrefixName + "page-airpedia2class-index";
wikipediaCrossLanguageLinkIndexName = wikipediaOutputFilePrefixName + "cross-lang-index";
wikipediaPageTopicsIndexName = wikipediaOutputFilePrefixName + "page-topics-index";
wikipediaPageTopicsFileName = wikipediaOutputFilePrefixName + "page-topics.csv";
}
public String getWikipediaSectionFreqFileName() {
return wikipediaSectionFreqFileName;
}
public String getWikipediaCategoryPageFileName() {
return wikipediaCategoryPageFileName;
}
public String getWikipediaCategoryPageIndexName() {
return wikipediaCategoryPageIndexName;
}
public String getWikipediaCategoryFileName() {
return wikipediaCategoryFileName;
}
public String getWikipediaPageAllCategoryIndexName() {
return wikipediaPageAllCategoryIndexName;
}
public String getWikipediaSortedPagePerCategoryCountFileName() {
return wikipediaSortedPagePerCategoryCountFileName;
}
public String getWikipediCategoryIndexFileName() {
return wikipediCategoryIndexFileName;
}
public String getWikipediaPageTypeIndexName() {
return wikipediaPageTypeIndexName;
}
public String getWikipediaPageTypeFileName() {
return wikipediaPageTypeFileName;
}
public String getWikipediaNomFileName() {
return wikipediaNomFileName;
}
public String getWikipediaPagePerCategoryCountFileName() {
return wikipediaPagePerCategoryCountFileName;
}
public String getWikipediaSeeAlsoFileName() {
return wikipediaSeeAlsoFileName;
}
public String getWikipediaSeeAlsoIndexName() {
return wikipediaSeeAlsoIndexName;
}
public String getWikipediaPageAllCategoryFileName() {
return wikipediaPageAllCategoryFileName;
}
public String getWikipediaPageTopCategoryFileName() {
return wikipediaPageTopCategoryFileName;
}
public String getWikipediaPageTopCategoryIndexName() {
return wikipediaPageTopCategoryIndexName;
}
public String getWikipediaPageFreqIndexName() {
return wikipediaPageFreqIndexName;
}
public void setWikipediaPageFreqIndexName(String wikipediaPageFreqIndexName) {
this.wikipediaPageFreqIndexName = wikipediaPageFreqIndexName;
}
public String getWikipediaFormIdFileName() {
return wikipediaFormIdFileName;
}
public void setWikipediaFormIdFileName(String wikipediaFormIdFileName) {
this.wikipediaFormIdFileName = wikipediaFormIdFileName;
}
public String getWikipediaIncomingOutgoingWeightedIndexName() {
return wikipediaIncomingOutgoingWeightedIndexName;
}
public void setWikipediaIncomingOutgoingWeightedIndexName(String wikipediaIncomingOutgoingWeightedIndexName) {
this.wikipediaIncomingOutgoingWeightedIndexName = wikipediaIncomingOutgoingWeightedIndexName;
}
public String getWikipediaPageTrafficFileName() {
return wikipediaPageTrafficFileName;
}
public void setWikipediaPageTrafficFileName(String wikipediaPageTrafficFileName) {
this.wikipediaPageTrafficFileName = wikipediaPageTrafficFileName;
}
public String getWikipediaCategorySubCategoryFileName() {
return wikipediaCategorySubCategoryFileName;
}
public String getWikipediaCategorySubCategoryIndexName() {
return wikipediaCategorySubCategoryIndexName;
}
public String getWikipediaCategorySuperCategoryIndexName() {
return wikipediaCategorySuperCategoryIndexName;
}
public String getWikipediaPageCategoryIndexName() {
return wikipediaPageCategoryIndexName;
}
public String getWikipediaPersonInfoIndexName() {
return wikipediaPersonInfoIndexName;
}
public String getWikipediaAbstractIndexName() {
return wikipediaAbstractIndexName;
}
public String getWikipediaAbstractFileName() {
return wikipediaAbstractFileName;
}
public String getWikipediaPageAirPediaClassIndexName() {
return wikipediaPageAirPediaClassIndexName;
}
public String getWikipediaFirstNameFileName() {
return wikipediaFirstNameFileName;
}
public String getWikipediaFirstNameIndexName() {
return wikipediaFirstNameIndexName;
}
public String getWikipediaFileSourceName() {
return wikipediaFileSourceName;
}
public String getWikipediaFileSourceIndexName() {
return wikipediaFileSourceIndexName;
}
public String getCommonsFileName() {
return commonsFileName;
}
public String getWikipediaFileName() {
return wikipediaFileName;
}
public String getWikipediaTextIndexName() {
return wikipediaTextIndexName;
}
public String getOneExamplePerSenseFileName() {
return oneExamplePerSenseFileName;
}
public String getOneExamplePerSenseIndexName() {
return oneExamplePerSenseIndexName;
}
public String getWikipediaSectionTitleFileName() {
return wikipediaSectionTitleFileName;
}
public String getWikipediaUnigramFileName() {
return wikipediaUnigramFileName;
}
public String getWikipediaSortedIncomingOutgoingFileName() {
return wikipediaSortedIncomingOutgoingFileName;
}
public String getIncomingOutgoingLogFileName() {
return incomingOutgoingLogFileName;
}
public String getWikipediaIncomingOutgoingFileName() {
return wikipediaIncomingOutgoingFileName;
}
public String getWikipediaIncomingOutgoingIndexName() {
return wikipediaIncomingOutgoingIndexName;
}
public String getWikipediaOutgoingIndexName() {
return wikipediaOutgoingIndexName;
}
public String getWikipediaIncomingIndexName() {
return wikipediaIncomingIndexName;
}
public String getWikipediaContentPageFileName() {
return wikipediaContentPageFileName;
}
public String getOutgoingLogFileName() {
return outgoingLogFileName;
}
public String getVectorLogFileName() {
return vectorLogFileName;
}
public String getWikipediaVectorIndexName() {
return wikipediaVectorIndexName;
}
public String getWikipediaCrossLanguageLinkIndexName() {
return wikipediaCrossLanguageLinkIndexName;
}
public String getExtractionLogFileName() {
return extractionLogFileName;
}
public String getPreprocessingLogFileName() {
return preprocessingLogFileName;
}
public String getSortingLogFileName() {
return sortingLogFileName;
}
public String getIndexingLogFileName() {
return indexingLogFileName;
}
public String getWikipediaTypeIndexName() {
return wikipediaTypeIndexName;
}
public String getWikipediaNGramIndexName() {
return wikipediaNGramIndexName;
}
public String getWikipediaNGramFileName() {
return wikipediaNGramFileName;
}
public String getWikipediaPageFormIndexName() {
return wikipediaPageFormIndexName;
}
public void setWikipediaPageFormIndexName(String wikipediaPageFormIndexName) {
this.wikipediaPageFormIndexName = wikipediaPageFormIndexName;
}
public String getWikipediaFormPageIndexName() {
return wikipediaFormPageIndexName;
}
public void setWikipediaFormPageIndexName(String wikipediaFormPageIndexName) {
this.wikipediaFormPageIndexName = wikipediaFormPageIndexName;
}
public String getWikipediaFilteredExampleFileName() {
return wikipediaFilteredExampleFileName;
}
public String getWikipediaInfoboxFileName() {
return wikipediaInfoboxFileName;
}
public String getWikipediaDBPediaFileName() {
return wikipediaDBPediaFileName;
}
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
}
public Locale getLocale() {
return locale;
}
public String getLang() {
return lang;
}
public String getWikipediaDirName() {
return wikipediaDirName;
}
public String getWikipediaRedirFileName() {
return wikipediaRedirFileName;
}
public String getWikipediaDisambiguationFileName() {
return wikipediaDisambiguationFileName;
}
public String getWikipediaAnalysisFileName() {
return wikipediaAnalysisFileName;
}
public String getWikipediaTitleIdFileName() {
return wikipediaTitleIdFileName;
}
public String getWikipediaPersonInfoFileName() {
return wikipediaPersonInfoFileName;
}
public String getWikipediaOutgoingFileName() {
return wikipediaOutgoingFileName;
}
public String getWikipediaPageCategoryFileName() {
return wikipediaPageCategoryFileName;
}
public String getWikipediaCategorySuperCategoryFileName() {
return wikipediaCategorySuperCategoryFileName;
}
public String getWikipediaCrossLanguageLinkFileName() {
return wikipediaCrossLanguageLinkFileName;
}
public String getWikipediaTemplateFilePrefixName() {
return wikipediaTemplateFilePrefixName;
}
public String getWikipediaExampleFileName() {
return wikipediaExampleFileName;
}
public String getWikipediaSectionTitleFilePrefixName() {
return wikipediaSectionTitleFileName;
}
public String getWikipediaSortedPageFileName() {
return wikipediaSortedPageFileName;
}
public String getWikipediaSortedFormFileName() {
return wikipediaSortedFormFileName;
}
public String getWikipediaIncomingFileName() {
return wikipediaIncomingFileName;
}
public String getWikipediaFormFreqFileName() {
return wikipediaFormFreqFileName;
}
public String getWikipediaPageFreqFileName() {
return wikipediaPageFreqFileName;
}
public String getWikipediaTextFileName() {
return wikipediaTextFileName;
}
public String getWikipediaVectorFileName() {
return wikipediaVectorFileName;
}
public String getWikipediaXmlFileName() {
return wikipediaXmlFileName;
}
public String getWikipediaSectionFileName() {
return wikipediaSectionFileName;
}
public String getWikipediaPageSectionFreqFileName() {
return wikipediaPageSectionFreqFileName;
}
@Override
public String toString() {
return "ExtractorParameters{" +
"version='" + version + '\'' +
", lang='" + lang + '\'' +
", wikipediaXmlFileName='" + wikipediaXmlFileName + '\'' +
", extractionOutputDirName='" + extractionOutputDirName + '\'' +
", wikipediaDirName='" + wikipediaDirName + '\'' +
", wikipediaOutputFilePrefixName='" + wikipediaOutputFilePrefixName + '\'' +
", wikipediaRedirFileName='" + wikipediaRedirFileName + '\'' +
", wikipediaDisambiguationFileName='" + wikipediaDisambiguationFileName + '\'' +
", wikipediaAnalysisFileName='" + wikipediaAnalysisFileName + '\'' +
", wikipediaTitleIdFileName='" + wikipediaTitleIdFileName + '\'' +
", wikipediaPersonInfoFileName='" + wikipediaPersonInfoFileName + '\'' +
", wikipediaOutgoingFileName='" + wikipediaOutgoingFileName + '\'' +
", wikipediaPageCategoryPrefix='" + wikipediaPageCategoryPrefix + '\'' +
", wikipediaPageCategoryFileName='" + wikipediaPageCategoryFileName + '\'' +
", wikipediaCategoryFileNames=" + wikipediaCategoryFileNames +
", wikipediaCategorySuperCategoryFileName='" + wikipediaCategorySuperCategoryFileName + '\'' +
", wikipediaCrossLanguageLinkFileName='" + wikipediaCrossLanguageLinkFileName + '\'' +
", wikipediaTemplateFilePrefixName='" + wikipediaTemplateFilePrefixName + '\'' +
", wikipediaTemplateFileNames=" + wikipediaTemplateFileNames +
", wikipediaInfoboxFileName='" + wikipediaInfoboxFileName + '\'' +
", wikipediaExampleFileName='" + wikipediaExampleFileName + '\'' +
", wikipediaSectionTitlePrefix='" + wikipediaSectionTitlePrefix + '\'' +
", wikipediaSectionTitleFileName='" + wikipediaSectionTitleFileName + '\'' +
", wikipediaSectionTitleFileNames=" + wikipediaSectionTitleFileNames +
", wikipediaSortedPageFileName='" + wikipediaSortedPageFileName + '\'' +
", wikipediaSortedFormFileName='" + wikipediaSortedFormFileName + '\'' +
", wikipediaIncomingFileName='" + wikipediaIncomingFileName + '\'' +
", wikipediaFormFreqFileName='" + wikipediaFormFreqFileName + '\'' +
", wikipediaPageFreqFileName='" + wikipediaPageFreqFileName + '\'' +
", wikipediaTextFileName='" + wikipediaTextFileName + '\'' +
", wikipediaNGramFileName='" + wikipediaNGramFileName + '\'' +
", wikipediaVectorFileName='" + wikipediaVectorFileName + '\'' +
", wikipediaDBPediaFileName='" + wikipediaDBPediaFileName + '\'' +
", wikipediaFilteredExampleFileName='" + wikipediaFilteredExampleFileName + '\'' +
", wikipediaPageFormIndexName='" + wikipediaPageFormIndexName + '\'' +
", wikipediaFormPageIndexName='" + wikipediaFormPageIndexName + '\'' +
", wikipediaNGramIndexName='" + wikipediaNGramIndexName + '\'' +
", wikipediaTypeIndexName='" + wikipediaTypeIndexName + '\'' +
", wikipediaCrossLanguageLinkIndexName='" + wikipediaCrossLanguageLinkIndexName + '\'' +
", extractionLogFileName='" + extractionLogFileName + '\'' +
", preprocessingLogFileName='" + preprocessingLogFileName + '\'' +
", sortingLogFileName='" + sortingLogFileName + '\'' +
", indexingLogFileName='" + indexingLogFileName + '\'' +
", wikipediaVectorIndexName='" + wikipediaVectorIndexName + '\'' +
", vectorLogFileName='" + vectorLogFileName + '\'' +
", outgoingLogFileName='" + outgoingLogFileName + '\'' +
", wikipediaContentPageFileName='" + wikipediaContentPageFileName + '\'' +
", wikipediaOutgoingIndexName='" + wikipediaOutgoingIndexName + '\'' +
", wikipediaIncomingIndexName='" + wikipediaIncomingIndexName + '\'' +
", wikipediaIncomingOutgoingFileName='" + wikipediaIncomingOutgoingFileName + '\'' +
", wikipediaIncomingOutgoingIndexName='" + wikipediaIncomingOutgoingIndexName + '\'' +
", incomingOutgoingLogFileName='" + incomingOutgoingLogFileName + '\'' +
", wikipediaSortedIncomingOutgoingFileName='" + wikipediaSortedIncomingOutgoingFileName + '\'' +
", wikipediaUnigramFileName='" + wikipediaUnigramFileName + '\'' +
", oneExamplePerSenseFileName='" + oneExamplePerSenseFileName + '\'' +
", oneExamplePerSenseIndexName='" + oneExamplePerSenseIndexName + '\'' +
", wikipediaTextIndexName='" + wikipediaTextIndexName + '\'' +
", locale=" + locale +
'}';
}
public static void main(String args[]) throws Exception {
String logConfig = System.getProperty("log-config");
if (logConfig == null) {
logConfig = "configuration/log-config.txt";
}
PropertyConfigurator.configure(logConfig);
//java -cp dist/thewikimachine.jar eu.fbk.twm.utils.ExtractorParameters
ExtractorParameters extractorParameters = new ExtractorParameters(args[0], args[1]);
logger.debug(extractorParameters);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy