edu.emory.mathcs.nlp.configuration.config-train-ner.xml Maven / Gradle / Ivy
The newest version!
<!-- named entity recognition --> <configuration> <tsv> <column index="1" field="form"/> <column index="2" field="lemma"/> <column index="3" field="pos"/> <column index="4" field="feats"/> <column index="5" field="nament"/> </tsv> <lexica> <word_clusters field="word_form_simplified_lowercase">edu/emory/mathcs/nlp/lexica/en-brown-clusters-simplified-lowercase.xz</word_clusters> <word_embeddings field="word_form_undigitalized">edu/emory/mathcs/nlp/lexica/en-word-embeddings-undigitalized.xz</word_embeddings> <named_entity_gazetteers field="word_form_simplified">edu/emory/mathcs/nlp/lexica/en-named-entity-gazetteers-simplified.xz</named_entity_gazetteers> </lexica> <optimizer> <l1_regularization>0.00001</l1_regularization> <algorithm>adagrad-mini-batch</algorithm> <learning_rate>0.02</learning_rate> <feature_cutoff>0</feature_cutoff> <lols fixed="0" decaying="0.95"/> <batch_size>5</batch_size> <max_epoch>20</max_epoch> <bias>0</bias> </optimizer> <reducer> <lower_bound>86.98</lower_bound> <increment>0.01</increment> <iteration>2</iteration> <start>0.05</start> <range>0.005</range> </reducer> <feature_template> <!-- 1-gram features --> <feature f0="i-1:word_form_simplified"/> <feature f0="i:word_form_simplified"/> <feature f0="i+1:word_form_simplified"/> <feature f0="i-2:word_form_simplified_lowercase"/> <feature f0="i-1:word_form_simplified_lowercase"/> <feature f0="i:word_form_simplified_lowercase"/> <feature f0="i+1:word_form_simplified_lowercase"/> <feature f0="i+2:word_form_simplified_lowercase"/> <feature f0="i-1:word_shape"/> <feature f0="i:word_shape"/> <feature f0="i+1:word_shape"/> <feature f0="i-1:part_of_speech_tag"/> <feature f0="i:part_of_speech_tag"/> <feature f0="i+1:part_of_speech_tag"/> <feature f0="i-2:named_entity_tag"/> <feature f0="i-1:named_entity_tag"/> <feature set="true" f0="i-1:named_entity_gazetteers"/> <feature set="true" f0="i:named_entity_gazetteers"/> <feature set="true" f0="i+1:named_entity_gazetteers"/> <!-- 2-gram features --> <feature f0="i-2:part_of_speech_tag" f1="i-1:part_of_speech_tag"/> <feature f0="i+1:part_of_speech_tag" f1="i+2:part_of_speech_tag"/> <feature f0="i:lemma" f1="i:part_of_speech_tag"/> <feature f0="i+1:lemma" f1="i:part_of_speech_tag"/> <feature f0="i-1:lemma" f1="i:lemma"/> <feature f0="i:lemma" f1="i+1:lemma"/> <feature f0="i+1:lemma" f1="i+2:lemma"/> <!-- 3-gram features --> <feature f0="i-1:lemma" f1="i-1:part_of_speech_tag" f2="i-1:named_entity_tag"/> <feature f0="i:lemma" f1="i:part_of_speech_tag" f2="i-1:named_entity_tag"/> <!-- affix features --> <feature f0="i:suffix:3"/> <feature f0="i+1:prefix:3"/> <feature f0="i:suffix:3" f1="i:word_form_simplified_lowercase"/> <feature f0="i-1:suffix:3" f1="i:word_form_simplified_lowercase"/> <!-- word cluster features --> <feature set="true" f0="i-2:word_clusters"/> <feature set="true" f0="i-1:word_clusters"/> <feature set="true" f0="i:word_clusters"/> <feature set="true" f0="i+1:word_clusters"/> <feature set="true" f0="i+2:word_clusters"/> <feature f0="i:word_embedding"/> </feature_template> </configuration>
© 2015 - 2024 Weber Informatics LLC | Privacy Policy