ach_2.11.1.3.2.source-code.application.conf Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of reach_2.11 Show documentation
reach
The newest version!
#
# Configuration file for reach
#

# this is the directory that stores the raw nxml, .csv, and/or .tsv files
# this directory *must* exist
papersDir = ${HOME}/Documents/reach/papers

# this is where the output files containing the extracted mentions will be stored
# if this directory doesn't exist it will be created
outDir = ${HOME}/Documents/reach/output

# the output format for mentions: text, fries, indexcard, or assembly-csv (default is 'fries')
outputType = "fries"

# whether or not assembly should be run
withAssembly = true

# this is where the context files will be stored
# if this directory doesn't exist it will be created
contextDir = ${HOME}/Documents/reach/context

# this is where the brat standoff and text files are dumped
bratDir = ${HOME}/Documents/reach/brat

# verbose logging
verbose = true

# the encoding of input and output files
encoding = "utf-8"


# this is a list of sections that we should ignore
ignoreSections = ["references", "materials", "materials|methods", "methods", "supplementary-material"]

# context engine config
contextEngine {
    type = Policy4
    params = {
        bound = 3
    }
}

logging {
  # defines project-wide logging level
  loglevel = INFO
  logfile = ${HOME}/.reach.log
}

# this log file gets overwritten every time ReachCLI is executed
# so you should copy it if you want to keep it around
logFile = ${HOME}/Documents/reach/log.txt

# grounding configuration
grounding: {
  # List of AdHoc grounding files to insert, in order, into the grounding search sequence.
  # Each element of the list is a map of KB filename and optional meta info (not yet used):
  #   example: { kb: "adhoc.tsv", source: "NMZ at CMU" }
  adHocFiles: [
    { kb: "NER-Grounding-Override.tsv.gz", source: "MITRE/NMZ/BG feedback overrides" }
  ]

  # flag to turn off the influence of species on grounding
  overrideSpecies = true
}

# number of simultaneous threads to use for parallelization
threadLimit = 2

# ReadPapers
ReadPapers.papersDir = src/test/resources/inputs/nxml/
ReadPapers.serializedPapers = mentions.ser

# settings for assembly
assembly {
  # assembly can be run directly over a directory of papers (see ReachCLI) set by the papersDir property
  #
  # assembly output (outDir) is in the form of json or tsv files:
  #
  # TSV-style output
  # Currently, two tsv files are produced for each paper:
  # 1. assembly matching MITRE's (March 2016) requirements
  # 2. unconstrained
  #
  # Additionally, two output files are produced to show assembly across all papers:
  # 1. assembly matching MITRE's (March 2016) requirements
  # 2. unconstrained

  # assembly can also be performed against a serialized dataset (see AssembleFromDataset)
  serializedDataset = ${HOME}/Documents/reach/serialized

  # a relation corpus (json)
  corpusFile = annotations.json

  # assembly relation classifier
  classifier {
    # what algorithm to use?
    classifier = lr-l1

    # the trained model file (for reading and writing)
    model = src/main/resources/org/clulab/assembly/fbc.model # this is an lr-l1 model

    # report of results
    results = results.tsv
  }

  # serialized gold PrecedenceRelations for evaluation
  evalGold = evalGold.ser

  # serialized mentions prior to applying rule-based sieves
  evalMentions = evalMentions.ser

  # allow additional information output
  verbose = true
}