All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.fbk.cit.hlt.thewikimachine.index.Prova Maven / Gradle / Ivy

/*
 * Copyright (2013) Fondazione Bruno Kessler (http://www.fbk.eu/)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.fbk.cit.hlt.thewikimachine.index;

import org.apache.commons.cli.*;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.fbk.cit.hlt.core.lsa.LSM;
import org.fbk.cit.hlt.core.lsa.BOW;
import org.fbk.cit.hlt.core.math.Node;
import org.fbk.cit.hlt.core.math.Vector;
import org.fbk.cit.hlt.thewikimachine.analysis.HardTokenizer;
import org.fbk.cit.hlt.thewikimachine.analysis.Tokenizer;
import org.fbk.cit.hlt.thewikimachine.csv.OneExamplePerSenseExtractor;
import org.fbk.cit.hlt.thewikimachine.index.util.AbstractSearcher;
import org.fbk.cit.hlt.thewikimachine.util.CharacterTable;

import org.fbk.cit.hlt.thewikimachine.util.GenericFileUtils;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaExtractor;

import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.DecimalFormat;
import java.util.*;
import java.util.regex.Pattern;

/**
 * Created with IntelliJ IDEA.
 * User: giuliano
 * Date: 1/22/13
 * Time: 6:11 PM
 * To change this template use File | Settings | File Templates.
 * 

* The best configuration is PageIncomingOutgoingSearcher */ public class Prova { /** * Define a static logger variable so that it references the * Logger instance named Prova. */ static Logger logger = Logger.getLogger(Prova.class.getName()); public static void main(String args[]) throws Exception { // java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.index.Prova String logConfig = System.getProperty("log-config"); if (logConfig == null) { logConfig = "configuration/log-config.txt"; } PropertyConfigurator.configure(logConfig); Map resourceMap = null; resourceMap = GenericFileUtils.searchForFilesInTheSameFolder(args[0] + args[1], "type-index", "page-form-index", "form-page-index", "ngram-index", "page-freq.csv", "form-freq", "cross-lang-index", "ngram.csv", "unigram", "one-example-per-sense-index", "page-file-source-index", "first-name-index", "person-info-index", "airpedia-class-index", "abstract-index", "page-category-index", "category-super-category-index", "page-vector-index", "incoming-outgoing-weighted-index"); logger.debug(resourceMap); /*String name = args[0]; byte[] bytesOfMessage = new byte[0]; try { logger.debug(name); bytesOfMessage = name.getBytes("UTF-8"); } catch (UnsupportedEncodingException e) { logger.error(e); } logger.debug(Arrays.toString(bytesOfMessage)); MessageDigest md = null; try { md = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { logger.error(e); } byte[] digest = md.digest(bytesOfMessage); logger.debug(Arrays.toString(digest)); BigInteger bigInt = new BigInteger(1, digest); String hash16 = bigInt.toString(16); String hash32 = bigInt.toString(32); logger.debug(hash16 + " (" + hash16.length() + ")"); logger.debug(hash32 + " (" + hash32.length() + ")"); while (hash16.length() < 32) { hash16 = "0" + hash16; } logger.debug(hash16 + " (" + hash16.length() + ")");*/ } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy