ach_2.11.1.3.2.source-code.application.conf Maven / Gradle / Ivy
The newest version!
#
# Configuration file for reach
#
# this is the directory that stores the raw nxml, .csv, and/or .tsv files
# this directory *must* exist
papersDir = ${HOME}/Documents/reach/papers
# this is where the output files containing the extracted mentions will be stored
# if this directory doesn't exist it will be created
outDir = ${HOME}/Documents/reach/output
# the output format for mentions: text, fries, indexcard, or assembly-csv (default is 'fries')
outputType = "fries"
# whether or not assembly should be run
withAssembly = true
# this is where the context files will be stored
# if this directory doesn't exist it will be created
contextDir = ${HOME}/Documents/reach/context
# this is where the brat standoff and text files are dumped
bratDir = ${HOME}/Documents/reach/brat
# verbose logging
verbose = true
# the encoding of input and output files
encoding = "utf-8"
# this is a list of sections that we should ignore
ignoreSections = ["references", "materials", "materials|methods", "methods", "supplementary-material"]
# context engine config
contextEngine {
type = Policy4
params = {
bound = 3
}
}
logging {
# defines project-wide logging level
loglevel = INFO
logfile = ${HOME}/.reach.log
}
# this log file gets overwritten every time ReachCLI is executed
# so you should copy it if you want to keep it around
logFile = ${HOME}/Documents/reach/log.txt
# grounding configuration
grounding: {
# List of AdHoc grounding files to insert, in order, into the grounding search sequence.
# Each element of the list is a map of KB filename and optional meta info (not yet used):
# example: { kb: "adhoc.tsv", source: "NMZ at CMU" }
adHocFiles: [
{ kb: "NER-Grounding-Override.tsv.gz", source: "MITRE/NMZ/BG feedback overrides" }
]
# flag to turn off the influence of species on grounding
overrideSpecies = true
}
# number of simultaneous threads to use for parallelization
threadLimit = 2
# ReadPapers
ReadPapers.papersDir = src/test/resources/inputs/nxml/
ReadPapers.serializedPapers = mentions.ser
# settings for assembly
assembly {
# assembly can be run directly over a directory of papers (see ReachCLI) set by the papersDir property
#
# assembly output (outDir) is in the form of json or tsv files:
#
# TSV-style output
# Currently, two tsv files are produced for each paper:
# 1. assembly matching MITRE's (March 2016) requirements
# 2. unconstrained
#
# Additionally, two output files are produced to show assembly across all papers:
# 1. assembly matching MITRE's (March 2016) requirements
# 2. unconstrained
# assembly can also be performed against a serialized dataset (see AssembleFromDataset)
serializedDataset = ${HOME}/Documents/reach/serialized
# a relation corpus (json)
corpusFile = annotations.json
# assembly relation classifier
classifier {
# what algorithm to use?
classifier = lr-l1
# the trained model file (for reading and writing)
model = src/main/resources/org/clulab/assembly/fbc.model # this is an lr-l1 model
# report of results
results = results.tsv
}
# serialized gold PrecedenceRelations for evaluation
evalGold = evalGold.ser
# serialized mentions prior to applying rule-based sieves
evalMentions = evalMentions.ser
# allow additional information output
verbose = true
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy