org.fbk.cit.hlt.thewikimachine.index.Prova Maven / Gradle / Ivy
/*
* Copyright (2013) Fondazione Bruno Kessler (http://www.fbk.eu/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fbk.cit.hlt.thewikimachine.index;
import org.apache.commons.cli.*;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.fbk.cit.hlt.core.lsa.LSM;
import org.fbk.cit.hlt.core.lsa.BOW;
import org.fbk.cit.hlt.core.math.Node;
import org.fbk.cit.hlt.core.math.Vector;
import org.fbk.cit.hlt.thewikimachine.analysis.HardTokenizer;
import org.fbk.cit.hlt.thewikimachine.analysis.Tokenizer;
import org.fbk.cit.hlt.thewikimachine.csv.OneExamplePerSenseExtractor;
import org.fbk.cit.hlt.thewikimachine.index.util.AbstractSearcher;
import org.fbk.cit.hlt.thewikimachine.util.CharacterTable;
import org.fbk.cit.hlt.thewikimachine.util.GenericFileUtils;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaExtractor;
import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.DecimalFormat;
import java.util.*;
import java.util.regex.Pattern;
/**
* Created with IntelliJ IDEA.
* User: giuliano
* Date: 1/22/13
* Time: 6:11 PM
* To change this template use File | Settings | File Templates.
*
* The best configuration is PageIncomingOutgoingSearcher
*/
public class Prova {
/**
* Define a static logger variable so that it references the
* Logger instance named Prova
.
*/
static Logger logger = Logger.getLogger(Prova.class.getName());
public static void main(String args[]) throws Exception {
// java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.index.Prova
String logConfig = System.getProperty("log-config");
if (logConfig == null) {
logConfig = "configuration/log-config.txt";
}
PropertyConfigurator.configure(logConfig);
Map resourceMap = null;
resourceMap = GenericFileUtils.searchForFilesInTheSameFolder(args[0] + args[1], "type-index", "page-form-index", "form-page-index", "ngram-index", "page-freq.csv", "form-freq", "cross-lang-index", "ngram.csv", "unigram", "one-example-per-sense-index", "page-file-source-index", "first-name-index", "person-info-index", "airpedia-class-index", "abstract-index", "page-category-index", "category-super-category-index", "page-vector-index", "incoming-outgoing-weighted-index");
logger.debug(resourceMap);
/*String name = args[0];
byte[] bytesOfMessage = new byte[0];
try {
logger.debug(name);
bytesOfMessage = name.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
logger.error(e);
}
logger.debug(Arrays.toString(bytesOfMessage));
MessageDigest md = null;
try {
md = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
logger.error(e);
}
byte[] digest = md.digest(bytesOfMessage);
logger.debug(Arrays.toString(digest));
BigInteger bigInt = new BigInteger(1, digest);
String hash16 = bigInt.toString(16);
String hash32 = bigInt.toString(32);
logger.debug(hash16 + " (" + hash16.length() + ")");
logger.debug(hash32 + " (" + hash32.length() + ")");
while (hash16.length() < 32) {
hash16 = "0" + hash16;
}
logger.debug(hash16 + " (" + hash16.length() + ")");*/
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy