All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.emory.mathcs.nlp.configuration.config-train-dep.xml Maven / Gradle / Ivy

The newest version!
<!-- dependency parsing -->
<configuration>
	<tsv>
        <column index="1" field="form"/>
        <column index="3" field="lemma"/>
        <column index="5" field="pos"/>
        <column index="7" field="feats"/>
        <column index="8" field="dhead"/>
        <column index="9" field="deprel"/>
    </tsv>

    <lexica>
        <word_clusters field="word_form_simplified_lowercase">edu/emory/mathcs/nlp/lexica/en-brown-clusters-simplified-lowercase.xz</word_clusters>
    </lexica>

    <optimizer>
        <l1_regularization>0.00001</l1_regularization>
        <algorithm>adagrad-mini-batch</algorithm>
        <learning_rate>0.02</learning_rate>
        <feature_cutoff>2</feature_cutoff>
        <lols fixed="2" decaying="0.95"/>
        <batch_size>5</batch_size>
        <max_epoch>20</max_epoch>
        <bias>0</bias>
    </optimizer>

        <reducer>
        <lower_bound>88.91</lower_bound>
        <increment>0.01</increment>
        <iteration>2</iteration>
        <start>0.04</start>
        <range>0.005</range>
    </reducer>


    <feature_template>
        <!-- basic features -->
        <feature f0="i:lemma"/>
        <feature f0="j:lemma"/>
        <feature f0="i:part_of_speech_tag"/>
        <feature f0="j:part_of_speech_tag"/>

        <feature f0="i:part_of_speech_tag" f1="i:lemma"/>
        <feature f0="j:part_of_speech_tag" f1="j:lemma"/>

        <feature f0="i:part_of_speech_tag" f1="j:part_of_speech_tag"/>
        <feature f0="i:part_of_speech_tag" f1="j:lemma"/>
        <feature f0="i:lemma"              f1="j:part_of_speech_tag"/>
        <feature f0="i:lemma"              f1="j:lemma"/>

        <!-- 1-gram features -->
        <feature f0="k-1:lemma"/>
        <feature f0="i-1:lemma"/>
        <feature f0="i+1:lemma"/>
        <feature f0="j-2:lemma"/>
        <feature f0="j-1:lemma"/>
        <feature f0="j+1:lemma"/>
        <feature f0="j+2:lemma"/>

        <feature f0="i-2:part_of_speech_tag"/>
        <feature f0="i-1:part_of_speech_tag"/>
        <feature f0="i+1:part_of_speech_tag"/>
        <feature f0="i+2:part_of_speech_tag"/>
        <feature f0="j-1:part_of_speech_tag"/>
        <feature f0="j+1:part_of_speech_tag"/>

        <!-- 2-gram features -->
        <feature f0="i:part_of_speech_tag" f1="k-1:part_of_speech_tag"/>
        <feature f0="i:part_of_speech_tag" f1="j+1:part_of_speech_tag"/>
        <feature f0="j:part_of_speech_tag" f1="k-1:part_of_speech_tag"/>

        <feature f0="i:lemma" f1="j-1:part_of_speech_tag"/>
        <feature f0="i:lemma" f1="j+1:part_of_speech_tag"/>
        <feature f0="j:lemma" f1="j+1:part_of_speech_tag"/>

        <feature f0="j+1:lemma" f1="i:part_of_speech_tag"/>
        <feature f0="j+1:lemma" f1="j:part_of_speech_tag"/>
        <feature f0="i+1:lemma" f1="i:lemma"/>
        <feature f0="i+1:lemma" f1="j:lemma"/>

        <!-- 3-gram features -->
        <feature f0="i-2:part_of_speech_tag" f1="i-1:part_of_speech_tag" f2="i:part_of_speech_tag"/>
        <feature f0="i-1:part_of_speech_tag" f1="i:part_of_speech_tag"   f2="i+1:part_of_speech_tag"/>
        <feature f0="j-1:part_of_speech_tag" f1="j:part_of_speech_tag"   f2="j+1:part_of_speech_tag"/>
        <feature f0="j:part_of_speech_tag"   f1="j+1:part_of_speech_tag" f2="j+2:part_of_speech_tag"/>

        <feature f0="k-2:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="i-1:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="i+1:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="j-2:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="j-1:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="j+1:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="j+2:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="j+3:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>

        <!-- valency features -->
        <feature f0="i:valency:all" f1="i:lemma"/>
        <feature f0="j:valency:all" f1="j:lemma"/>

        <!-- 2nd-order features -->
        <feature f0="i:dependency_label"/>
        <feature f0="j:dependency_label"/>
        <feature f0="i_lmd:dependency_label"/>

        <feature f0="i_h:lemma"/>
        <feature f0="i_lmd:lemma"/>
        <feature f0="i_rmd:lemma"/>
        <feature f0="j_lmd:lemma"/>

        <feature f0="i_h:part_of_speech_tag"/>
        <feature f0="i_rmd:part_of_speech_tag"/>
        <feature f0="j_lmd:part_of_speech_tag"/>

        <feature f0="i:dependency_label" f1="i:lemma"/>
        <feature f0="i:dependency_label" f1="j:lemma"/>
        <feature f0="i:dependency_label" f1="i:part_of_speech_tag"/>
        <feature f0="i:dependency_label" f1="j:part_of_speech_tag"/>

        <feature f0="i_lmd:dependency_label"   f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="i_rmd:dependency_label"   f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="j_lmd:dependency_label"   f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="i_lns:dependency_label"   f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>

        <feature f0="i_lmd:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="i_rmd:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="j_lmd:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>

        <!-- 3rd-order features -->
        <feature f0="i_h:dependency_label"/>
        <feature f0="j_h:dependency_label"/>

        <feature f0="i_h2:lemma"/>
        <feature f0="j_lmd2:lemma"/>

        <feature f0="i_lmd2:part_of_speech_tag"/>
        <feature f0="i_rmd2:part_of_speech_tag"/>
        <feature f0="j_lmd2:part_of_speech_tag"/>

        <feature f0="i_h:dependency_label" f1="i:lemma"/>
        <feature f0="i_h:dependency_label" f1="j:lemma"/>
        <feature f0="i_h:dependency_label" f1="j:part_of_speech_tag"/>

        <feature f0="i_lns2:dependency_label"   f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="i_lmd2:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="i_rmd2:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>
        <feature f0="j_lmd2:part_of_speech_tag" f1="i:part_of_speech_tag" f2="j:part_of_speech_tag"/>

        <feature f0="i_lmd2:part_of_speech_tag" f1="i_lmd:part_of_speech_tag" f2="i:part_of_speech_tag"/>

        <!-- distributional semantics features -->
        <feature set="true" f0="i:word_clusters"/>
        <feature set="true" f0="j:word_clusters"/>
        <feature set="true" f0="i+1:word_clusters"/>
        <feature set="true" f0="j+1:word_clusters"/>

        <!-- positional features -->
        <feature set="true" f0="i:positional"/>
        <feature set="true" f0="j:positional"/>
    </feature_template>
</configuration>




© 2015 - 2024 Weber Informatics LLC | Privacy Policy