All Downloads are FREE. Search and download functionalities are using the official Maven repository.

api.wikibrain-core.0.7.2.source-code.reference.conf Maven / Gradle / Ivy

There is a newer version: 0.9.1
Show newest version
// A default configuration file in HOCON format, almost JSON format
// The file format is described at https://github.com/typesafehub/config.


// Parent directory for data files, downloads, scripts, etc.
baseDir : .


// Directory used for temporary files.
// Override this if you don't have hundreds of GBs free in your system's tmp directory.
tmpDir : ${baseDir}"/.tmp"


// Maximum number of threads that should run simultaneously
// defaults to Runtime.getRuntime().availableProcessors()
maxThreads : -1


// Language sets
// You can specify a custom language set from the command line.
// See EnvBuilder for more information.
languages : {

    // by default use the languages that have local pages
    default : loaded

    // languages that have local pages loaded
    loaded : { type : loaded }

    // languages that have downloaded articles files
    downloaded : { type : downloaded }

    // the largest world economies
    big-economies : {
        type : custom
        langCodes : [
            "en","de","fr","nl","it","pl","es","ru","ja","pt","zh",
            "sv","uk","ca","no","fi","cs","hu","ko","id","tr","ro",
            "sk","da","he","simple"
        ]
    }

    // the english languages
    all-english : {
        type : custom
        langCodes : ["en", "simple"]
    }

    // This can be dynamically populated using the EnvBuilder or command line
    manual : {
        type : custom
        langCodes : []
    }
}


// Filesets downloaded by default
download : {
    matcher : ["articles", "links"]
    path : ${baseDir}"/download"
    listFile : ${download.path}"/list.tsv"
}


// Configuration for the lucene search engine.
lucene : {
    version : "4.3"
    directory : ${baseDir}"/db/lucene"
    options : {
        default : plaintext
        plaintext : {
            type : plaintext

            version : ${lucene.version}
            directory : ${lucene.directory}
            namespaces : ["article"]

            // TokenizerOptions
            caseInsensitive : true
            useStopWords : true
            useStem : true

            // TextFieldElements
            title : 0
            redirects : false
            plaintext : true
        }

        esa : {
            type : esa

            version : ${lucene.version}
            directory : ${lucene.directory}
            namespaces : ["article"]

            // TokenizerOptions
            caseInsensitive : true
            useStopWords : true
            useStem : true

            // TextFieldElements
            title : 1
            redirects : true
            plaintext : true
        }
    }
    searcher : {
        esa : {
            options : esa
        }
    }
}

// multilingual string normalizers
stringnormalizers {
    default : identity

    // do nothing
    identity : {
        type : identity
    }

    // remove punctuation
    simple : {
        type : lucene
        version : ${lucene.version}
        caseInsensitive : false
        useStopWords : false
        useStem : false
    }

    // removes punctuation, folds case
    foldcase : {
        type : lucene
        version : ${lucene.version}
        caseInsensitive : true
        useStopWords : false
        useStem : false
    }

    // fold case, porter stemming
    stemmer : {
        type : lucene
        version : ${lucene.version}
        caseInsensitive : true
        useStopWords : false
        useStem : true
    }
}

// phrase analyzers resolve phrases to articles and vice-versa
phrases {
    // whether or not the phrase analyzers are being loaded.
    // will be overridden while saving corpora to the daos
    loading : false

    // base path for all phrase analyzer database
    path : ${baseDir}"/db/phrases/"

    // which analyzers should be loaded by the loader by default
    toLoad :  [ "anchortext" ]

    // Analyzers
    analyzer : {
        default : fast-cascading
        stanford : {
            phraseDao : stanford
            localPageDao : default
            path : ${download.path}"/stanford-dictionary.bz2"
            url : "http://www-nlp.stanford.edu/pubs/crosswikis-data.tar.bz2/dictionary.bz2"
            type : stanford
            phrasePruner : {
                type : string
                minCount : 3,
                maxRank : 10,
                minFraction : 0.001
            }
            pagePruner : {
                type : simple
                minCount : 3,
                maxRank : 15,
                minFraction : 0.001
            }
            dao : {
                isNew : ${phrases.loading}
                type : objectdb
                normalizer : default
            }
        }
        anchortext : {
            phraseDao : anchortext
            localPageDao : default
            localLinkDao : default
            type : anchortext
            phrasePruner : {
                type : string
                minCount : 1,
                maxRank : 10,
                minFraction : 0.001
            }
            pagePruner : {
                type : simple
                minCount : 1,
                maxRank : 15,
                minFraction : 0.001
            }
            dao : {
                isNew : ${phrases.loading}
                type : objectdb
                normalizer : default
            }
        }
        anchortext-foldcase : ${phrases.analyzer.anchortext} {
            dao.normalizer : foldcase
        }
        anchortext-stemmed : ${phrases.analyzer.anchortext} {
            dao.normalizer : stemmer
        }
        lucene : {
            type : lucene
            localPageDao : default
        }
        cascading : {
            type : cascading
            delegates : [ "stanford", "lucene" ]
        }
        fast-cascading : {
            type : cascading
            delegates : [ "anchortext", "lucene" ]
        }
        titleRedirect{
            type: titleRedirect
            useRedirects : true
            localPageDao : default
            redirectDao : default
        }
    }

    linkProbability : {
        objectDb : {
            path : ${baseDir}"/db/phrases/linkProbability"
            phraseAnalyzer : anchortext
        }
    }
}


// data access objects
dao : {
    dataSource : {
        default : h2
        h2 : {
           driver : org.h2.Driver
           url: "jdbc:h2:"${baseDir}"/db/h2;LOG=0;CACHE_SIZE=65536;LOCK_MODE=0;UNDO_LOG=0;MAX_OPERATION_MEMORY=100000000"
           username : sa
           password : ""

           // Connection pooling
           // This sets the total number of jdbc connections to a minimum of 16.
           // partitions defaults to max(8, num-logical-cores)
           partitions : default
           connectionsPerPartition : 2
        }
        psql : {
           driver : org.postgresql.Driver
           url: "jdbc:postgresql://localhost/wikibrain"
           username : toby
           password : ""

           // Connection pooling
           // This sets the total number of jdbc connections to a minimum of 16.
           // partitions defaults to max(8, num-logical-cores)
           partitions : default
           connectionsPerPartition : 2
        }
    }
    metaInfo : {
        default : sql
        sql : {
            type : sql
            dataSource : default
        }
        live : {}
    }
    sqlCachePath : ${baseDir}"/db/sql-cache"
    localPage : {
        default : sql
        sql : {
            type : sql
            dataSource : default
        }
        live : {
            type : live
        }

    }
    pageView : {
        default : sql
        sql : {
            type : sql
            dir : ${baseDir}"/download/pageviews"
            dataSource : default
        }
        db : {
            type : db
        }
    }
    interLanguageLink : {
        default : sql
        sql : {
            type : sql
            dataSource : default
        }
    }
    localLink : {
        default : matrix
        sql : {
            type : sql
            dataSource : default
        }
        matrix : {
            type : matrix
            delegate : sql
            path : ${baseDir}"/db/matrix/local-link"
        }
        live : {
            type : live
        }
    }
    rawPage : {
        default : sql
        sql : {
            type : sql
            dataSource : default
            localPageDao : sql
        }
        live : {}
    }
    wikidata : {
        default : sql
        sql : {
            type : sql
            dataSource : default
            localPageDao : sql
        }
        live : {}
    }
    universalPage : {
        default : wikidata
        wikidata : {
            type : sql
            mapper : purewikidata
            dataSource : default
        }
        monolingual : {
           type : sql
           mapper : monolingual
           dataSource : default
       }
        live : {}
    }

    localCategoryMember : {
        default : sql
        sql : {
            type : sql
            dataSource: default
        }
        live : {
            type : live
        }
    }

    localArticle : {
        default : sql
        sql : {
            type : sql
            dataSource: default
        }
        live : {
            type : live
        }
    }

    localCategory : {
        default : sql
        sql : {
            type : sql
            dataSource: default
        }
        live : {
            type : live
        }
     }

    universalLink : {
        default : sql-wikidata
        sql-wikidata : {
            type : sql
            dataSource : default
            mapper : purewikidata
            localLinkDao : sql
        }
        skeletal-sql-wikidata : {
            type : skeletal-sql
            mapper : purewikidata
            dataSource : default
        }
        live : {}
    }
    redirect : {
        default : sql
        sql : {
            type : sql
            dataSource : default
        }
        live : {
            type : live
        }
    }
    diagnostic : {
        default : parse
        parse : {
            tokenFile : ${baseDir}"/dat/token.txt"
            log : ${baseDir}"/dat/stages.csv"
            appId : "6VaQtzvMSzzXXm0VMlm9IHKUYXAYwKRlsW19faV7"
            restApiId: "cxkuWsM6rIEVamS0OU5zpbKvuChzK8nw4XXQVwjJ"
        }
    }

}


mapper : {
    default : purewikidata
    monolingual : {
        type : monolingual
        algorithmId : 0     // each algorithm must have a unique ID
        localPageDao : sql
    }
    purewikidata : {
        type : purewikidata
        algorithmId : 1
        localPageDao : sql
    }
}


sr : {

    disambig : {
        default : similarity
        topResult : {
            type : topResult
            phraseAnalyzer : default
        }
        titleRedirect : {
            type : topResult
            phraseAnalyzer : titleRedirect
        }
        topResultConsensus : {
            type : topResultConsensus
            phraseAnalyzers : ["lucene","stanford","anchortext"]
        }
        milnewitten : {
            type : milnewitten
            metric : milnewitten
            phraseAnalyzer : default
        }
        similarity : {
            type : similarity
            metric : inlinknotrain
            phraseAnalyzer : default

            // how to score candidate senses. Possibilities are:
            //      popularity: just popularity
            //      similarity: just similarity
            //      product: similarity * popularity
            //      sum: similarity + popularity
            criteria : sum
        }
    }

    concepts {
        path : ${baseDir}"/dat/sr/concepts/"
    }

    blacklist {
        path : ""
    }

    // The parent configuration for all vector-based SR metrics
    sparsevectorbase {
            type : sparsevector
            pageDao : default
            disambiguator : default

            // Concrete metrics must override the generator
            generator : { type : OVERRIDE_THIS }

            // Default vector similarity is cosine similarity
            similarity : { type : cosine }

            // normalizers
            similaritynormalizer : percentile
            mostsimilarnormalizer : percentile

            // Controls how phrase vectors are created. Values can be:
            //      none: do not create phrase vectors. disambiguate instead.
            //      generator: ask the feature generator to create the phrase vectors
            //      creator: ask the phrase vector create to create the phrase vectors
            //      both: first ask the generator, then the creator
            phraseMode : none
    }



    // The parent configuration for all vector-based SR metrics
    densevectorbase {
            type : densevector
            pageDao : default
            disambiguator : default

            // Concrete metrics must override the generator
            generator : { type : OVERRIDE_THIS }

            // Default vector similarity is cosine similarity
            similarity : { type : cosine }

            // normalizers
            similaritynormalizer : percentile
            mostsimilarnormalizer : percentile
    }

    metric {
        // when training, normalizers are not read from disk
        training : false

        path : ${baseDir}"/dat/sr/"
        local : {
            default : milnewitten
            ESA : ${sr.sparsevectorbase} {
                generator : {
                    type : esa
                    luceneSearcher : esa
                    concepts : ${sr.concepts.path}
                }
                similarity : { type : cosine }
            }
            word2vec-nophrase : ${sr.densevectorbase} {
                generator : {
                    type : word2vec
                    corpus : plain
                    modelDir : ${baseDir}"/dat/word2vec"
                }
            }
            word2vec : ${sr.densevectorbase} {
                generator : {
                    type : word2vec
                    corpus : wikified
                    modelDir : ${baseDir}"/dat/word2vec"
                }
                reliesOn : [ "word2vecRaw" ]
            }

            // Used for performance reasons in the websail wikifier
            word2vecRaw :  ${sr.densevectorbase} {
                generator : {
                    type : word2vec
                    corpus : plain
                    modelDir : ${baseDir}"/dat/word2vecRaw"
                }
                similaritynormalizer : identity
                mostsimilarnormalizer : identity
            }

            // Uses prebuilt 300-dimensional vectors from Google
            // Requires you to download "GoogleNews-vectors-negative300.bin.gz"
            // and place it in your download directory.
            prebuiltword2vec : ${sr.densevectorbase} {
                generator : {
                    type : word2vec
                    corpus : NONE
                    binfile  : ${baseDir}"/download/GoogleNews-vectors-negative300.bin.gz"
                    languages : ["simple", "en"]
                    modelDir : ${baseDir}"/dat/prebuiltword2vec"
                    url : "https://code.google.com/p/word2vec/"
                    prebuilt : true
                }
                similarity : { type : cosine }
            }

            ESAnotrain : ${sr.sparsevectorbase} {
                generator : {
                    type : esa
                    luceneSearcher : esa
                    concepts : ${sr.concepts.path}
                }
                similarity : { type : cosine }
                similaritynormalizer : identity
                mostsimilarnormalizer : identity
            }
            outlink : ${sr.sparsevectorbase} {
                generator : {
                    type : links
                    outLinks : true
                    weightByPopularity : true
                    logTransform : true
                }
                similarity : {
                    type : cosine
                }
            }
            inlink : ${sr.sparsevectorbase} {
                generator : {
                    type : links
                    outLinks : false
                }
                similarity : {
                    type : google
                }
            }
            inlinknotrain : ${sr.sparsevectorbase} {
                generator : {
                    type : links
                    outLinks : false
                }
                similarity : {
                    type : google
                }
                similaritynormalizer : identity
                mostsimilarnormalizer : identity
            }

            directlink : {
                type : directlink
                disambiguator : default
                similaritynormalizer : identity
                mostsimilarnormalizer : identity
            }

            milnewitten : {
                type : milnewitten
                inlink : inlink
                outlink : outlink
                disambiguator : milnewitten
                similaritynormalizer : identity
                mostsimilarnormalizer : identity
            }
            synrank : {
                type : synrank
                similaritynormalizer : identity
                mostsimilarnormalizer : identity
                disambiguator : titleRedirect
            }
            simplemilnewitten : {
                type : simplemilnewitten
            }
            fast-ensemble : {
                type : ensemble
                metrics : ["milnewitten","milnewittenout"]
                similaritynormalizer : identity
                mostsimilarnormalizer : identity
                ensemble : linear
                disambiguator : default
                pageDao : default
            }
            ensemble : {
                type : ensemble
                metrics : ["ESA", "category", "milnewitten"]
                similaritynormalizer : percentile
                mostsimilarnormalizer : percentile
                ensemble : linear
                resolvephrases : false
                disambiguator : default
                pageDao : default
            }
            word2vec-ensemble : {
                type : ensemble
                metrics : ["ESA","inlink","outlink","category","word2vec","milnewitten","directlink"]
                similaritynormalizer : percentile
                mostsimilarnormalizer : percentile
                ensemble : linear
                resolvephrases : false
                disambiguator : default
                pageDao : default
            }
            super-ensemble : {
                type : ensemble
                metrics : ["ESA","inlink","outlink","category","mostsimilarconcepts","milnewitten"]
                similaritynormalizer : percentile
                mostsimilarnormalizer : percentile
                ensemble : linear
                resolvephrases : false
                disambiguator : default
                pageDao : default
            }
            mostsimilarconcepts : ${sr.sparsevectorbase} {
                generator : {
                    type : mostsimilarconcepts
                    basemetric : "msc-ensemble"
                    concepts : ${sr.concepts.path}
                    maxResults : 1000
                }
            }
            // for most similar concepts
            msc-ensemble : {
                // Set defaults for training the cosimilarity matrix.
                mostSimilarConcepts : ${sr.concepts.path}
                maxResults : 1000

                type : ensemble
                metrics : ["ESA","inlink","outlink","category","milnewitten"]
                similaritynormalizer : percentile
                mostsimilarnormalizer : rank
                ensemble : linear
                resolvephrases : false
                disambiguator : default
                pageDao : default
                buildMostSimilarCache : true
                mostsimilarnormalizer : rank
            }
            category :{
                type : categorygraphsimilarity
                disambiguator : default
                pageDao : default
                categoryMemberDao : default
                similaritynormalizer : percentile
                mostsimilarnormalizer : percentile
            }
            known-phrase : {
                type: knownphrase
                stringnormalizer: simple
                metrics: ["prebuiltword2vec", "ESA", "outlink", "inlink"]
                coefficients: [0.25, 0.45, 0.12, 0.18]
            }
        }
    }

    corpus {
        plain : {
            path : ${baseDir}"/dat/corpus/plain/"
            wikifier : identity
            rawPageDao : default
            localPageDao : default
            phraseAnalyzer : anchortext
        }
        wikified : {
            path : ${baseDir}"/dat/corpus/wikified/"
            wikifier : websail
            rawPageDao : default
            localPageDao : default
            phraseAnalyzer : anchortext
        }
    }

    ensemble {
        default : linear
        even : {
            type : even
        }
        linear : {
            type : linear
        }
    }

    normalizer {
        defaultmaxresults : 100
        identity : {
            type : identity
        }
        logLoess : {
            type : loess
            log : true
        }
        loess : {
            type : loess
        }
        log : {
            type : log
        }
        percentile : {
            type : percentile
        }
        range : {
            type : range
            min : 0.0
            max : 1.0
            truncate : true
        }
        rank : {
            type : rank
        }
    }

    explanationformatter {
        explanationformatter {
            localpagedao : sql
        }
    }

    dataset : {
        dao : {
            resource : {
                type : resource
                disambig : topResult
                resolvePhrases : true
            }
        }
        defaultsets : ["wordsim353.txt","MC.txt"]
        groups : {
                // large, commonly used datasets
                major-en : ["wordsim353.txt", "MTURK-771.csv", "atlasify240.txt", "radinsky.txt"]
        }
        // pairs under this threshold won't be used for most similar training.
        mostSimilarThreshold : 0.7
        records : ${baseDir}"/dat/records/"
    }

    wikifier : {
        default : identity
        milnewitten : {
            type : milnewitten
            phraseAnalyzer : anchortext
            sr : inlinknotrain
            localLinkDao : matrix
            useLinkProbabilityCache : true
        }
        identity : {
            type : identity
            localLinkDao : sql
        }
        websail : {
            type : websail
            phraseAnalyzer : anchortext
            sr : word2vecRaw
            identityWikifier : identity
            localLinkDao : matrix
            useLinkProbabilityCache : true
        }
    }

}

// spatial

spatial : {
    dir : ${baseDir}"/dat/spatial"

    dao : {

        dataSource : {

                // These all use keys standard to Geotools JDBC
                // see: http://docs.geotools.org/stable/userguide/library/jdbc/datastore.html
                // change this part according to your DB settings
                default : postgis
                postgis : {
                    dbtype : postgis
                    host : localhost
                    port : 5432
                    schema : public
                    database : wikibrain_spatial
                    user : toby
                    passwd : ""
                    max connections : 19
                }
            }

        spatialData : {
            default : postgis
            postgis{
                dataSource : postgis
            }
        }
        spatialContainment : {
            default : postgis
            postgis{
                dataSource : postgis
            }
        }
        spatialNeighbor : {
            default : postgis
            postgis{
                dataSource : postgis
            }
        }

    }


    datasets {
        earth.country.naturalEarth : {
            url : "http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip"

            mappingUrl : "http://www.shilad.com/wikibrain/naturalEarth.country.zip"

            encoding : "UTF-8"

            // Name of shapefile withing the zip archive
            shp : ne_10m_admin_0_countries.shp

            // Fields that may contain titles, in order of priority
            titles : [ "NAME", "NAME_LONG", "FORMAL_EN", "ADMIN"]

            // Fields that may contain words that provide context
            context : [ "SUBREGION", "CONTINENT" ]

            // Fields that will be concatenated together to create a persistent key in the file
            key : ["NAME_LONG" ]

            // Other fields that should be included in the spreadsheet
            other : []

            scorers : [
                { type : instanceOf, file : /spatial-matcher/country.txt, weight : 1.0 }
                { type : wikidataValue, property : "FIPS 10-4 (countries and regions)", column : FIPS_10_, weight : 4.0 }
            ]

            dab : topResult
        }
        earth.state.naturalEarth : {
            url : "http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip"

            mappingUrl : "http://www.shilad.com/wikibrain/naturalEarth.state.zip"

            encoding : "UTF-8"

            // Name of shapefile withing the zip archive
            shp : ne_10m_admin_1_states_provinces.shp

            // Fields that may contain titles, in order of priority
            titles : [ "NAME", "NAME_LOCAL", "NAME_ALT", "WOE_NAME", "GN_NAME" ]

            country : [ "ADMIN", "GEONUNIT" ]

            fips : [ "FIPS" ]
            // Fields that may contain words that provide context
            context : [ "REGION" ]

            // Fields that will be concatenated together to create a persistent key in the file
            key : ["NAME", "ADMIN" ]

            // Other fields that should be included in the spreadsheet
            other : []

            scorers : [
                { type : instanceOf, file : /spatial-matcher/state.txt, weight : 1.0 }
                { type : contains, weight : 1.0 }
                { type : wikidataValue, property : "FIPS 10-4 (countries and regions)", column : FIPS, weight : 4.0 }
                { type : wikidataValue, property : "ISO 3166-2", column : "ISO_3166_2", weight : 4.0 }
            ]

            dab : topResult
        }
        earth.marine.naturalEarth : {
            url : "http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_geography_marine_polys.zip"

            mappingUrl : "http://www.shilad.com/wikibrain/naturalEarth.marine.zip"

            encoding : "UTF-8"

            // Name of shapefile withing the zip archive
            shp : ne_10m_geography_marine_polys.shp

            // Fields that may contain titles, in order of priority

            titles : [ "NAME", "NAMEALT" ]

            // Fields that will be concatenated together to create a persistent key in the file
            key : ["NAME" ]

            context : []

            // Other fields that should be included in the spreadsheet
            other : []

            scorers : [
                { type : instanceOf, file : /spatial-matcher/marine.txt, weight : 1.0 }
                { type : contains, weight : 1.0 }
            ]

            dab : topResult
        }
    }

}

loader {
    groups {
        core : [ "fetchlinks", "download", "dumploader", "redirects", "wikitext", "lucene", "phrases"],
        multilingual-core : ${loader.groups.core} ["concepts"]
    }
    // Stages of the loading pipeline, used by PipelineLoader
    stages : [
            {
                name : fetchlinks,
                class : org.wikibrain.download.RequestedLinkGetter
                extraArgs : []
                runtime : "0"
                diskSpace : "1"
            },
            {
                name : download,
                class : org.wikibrain.download.DumpFileDownloader
                dependsOnStage : fetchlinks
                extraArgs : []
                runtime : "0"
                diskSpace : "#{articles} / 250"
                downloadSize : "#{articles} / 250"
            },
            {
                name : dumploader,
                class : org.wikibrain.loader.DumpLoader
                runtime : "#{articles} / #{multiCoreSpeed} / 300.0 + #{articles} / #{singleCoreSpeed} / 4000.0"
                dependsOnStage : download
                loadsClass : LocalPage
                extraArgs : ["-d"]
                diskSpace : "#{articles} / 175"
            },
            {
                name : redirects,
                class : org.wikibrain.loader.RedirectLoader
                dependsOnStage : dumploader
                loadsClass : Redirect
                extraArgs : ["-d"]
                runtime : "#{articles} / #{singleCoreSpeed} / 15000.0"
                diskSpace : "#{articles} / 3500"
            },
            {
                name : wikitext,
                class : org.wikibrain.loader.WikiTextLoader
                loadsClass : LocalLink
                dependsOnStage : redirects
                extraArgs : ["-d"],
                runtime : "#{links} / #{multiCoreSpeed} / 3000.0 + #{links} / #{singleCoreSpeed} / 25000.0"
                diskSpace : "#{links} / 8000"
            },
            {
                name : lucene,
                class : org.wikibrain.loader.LuceneLoader
                loadsClass : LuceneSearcher
                dependsOnStage : wikitext
                extraArgs : []
                runtime : "#{articles} / #{multiCoreSpeed} / 90.0"
                diskSpace : "#{articles} / 140"
            },
            {
                name : phrases,
                class : org.wikibrain.loader.PhraseLoader
                loadsClass: PrunedCounts
                dependsOnStage : wikitext
                extraArgs : ["-p", "anchortext"],
                runtime : "#{links} / #{singleCoreSpeed} / 90000.0"
                diskSpace : "#{links} / 40000"
            },
            {
                name : concepts,
                class : org.wikibrain.loader.ConceptLoader
                dependsOnStage : redirects
                loadsClass : UniversalPage
                extraArgs : ["-d"],
                runtime : "#{articles} / #{singleCoreSpeed} / 3000.0 + 300"
                diskSpace : "#{articles} / 3500"
                downloadSize : "550"
            },
            {
                name : universal,
                class : org.wikibrain.loader.UniversalLinkLoader
                dependsOnStage : [ "concepts", "wikitext" ]
                loadsClass: UniversalLink
                extraArgs : ["-d"],
                runtime : "#{links} / #{multiCoreSpeed} / 3000.0 + #{links} / #{singleCoreSpeed} / 25000.0"
                diskSpace : "#{links} / 4000"
            },
            {
                name : wikidata,
                class : org.wikibrain.wikidata.WikidataDumpLoader
                dependsOnStage : concepts
                loadsClass: WikidataEntity
                extraArgs : ["-d"],
                runtime : "900 + #{articles} / #{multiCoreSpeed} / 300.0"
                diskSpace : "#{links} / 60000"
                downloadSize : "2400"
            },
            {
                name : spatial,
                class : org.wikibrain.spatial.loader.SpatialDataLoader
                dependsOnStage : wikidata
                loadsClass: Geometry
                extraArgs : ["-d" ],
                runtime : "900 / #{singleCoreSpeed}"
                diskSpace : "200"
                downloadSize : "50"
            }
            {
                name : sr,
                class : org.wikibrain.sr.SRBuilder
                dependsOnStage : ["wikitext", "phrases", "lucene"]
                extraArgs : ["-m", "milnewitten", "-o", "both"],
                runtime : "#{articles} / #{singleCoreSpeed} / 15000.0 + #{articles} / #{multiCoreSpeed} / 2000.0"
                diskSpace : "#{links} / 24000"
            }
    ]
}


// backup for integration tests
integration {
    dir : ${baseDir}"/backup"
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy