All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.resource.km.disambiguation.xml Maven / Gradle / Ivy

The newest version!
<?xml version="1.0" encoding="utf-8"?>
<!--
	Khmer Disambiguation Rules for LanguageTool
	Copyright (C) 2010 Nathan Wells		
-->
<rules lang="km" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
       xsi:noNamespaceSchemaLocation="../../../../../../../../../languagetool-core/src/main/resources/org/languagetool/resource/disambiguation.xsd">
    <rule name="Time as an adjective" id="TIME_ADJ">
        <pattern>
            <token>ពេល</token>
            <token>ដែល</token>
        </pattern>
        <disambig postag="JJ"></disambig>
    </rule>
    <rule name="TAG AS NOUN" id="TAG_AS_NOUN">
        <pattern>
            <token regexp="yes">ការ|ភាព|សេចក្ដី</token>
            <marker>
                <token postag_regexp="yes" postag="VB|JJ|RB"></token>
            </marker>
        </pattern>
        <disambig postag="NN"></disambig>
    </rule>
    <rule name="TAG AS NOUN END" id="TAG_AS_NOUN_END">
        <pattern>
            <marker>
                <token postag_regexp="yes" postag="VB|JJ|RB"></token>
            </marker>
            <token regexp="yes">ធម៌|និយម|ករ</token>
        </pattern>
        <disambig postag="NN"></disambig>
    </rule>
    <rule name="USAGE of អស់" id="USAGE_ALL">
        <pattern>
            <token postag="PRO"></token>
            <marker>
                <token>អស់</token>
            </marker>
            <token postag="NN"></token>
        </pattern>
        <disambig postag="RB"></disambig>
    </rule>
    <rule name="CONJUNCTION ដោយសារ" id="DOWY_SA">
        <pattern>
            <token>ដោយ</token>
            <marker>
                <token>សារ</token>
            </marker>
        </pattern>
        <disambig postag="CC"></disambig>
    </rule>
    <rule name="ពេល as Adjective" id="TIME_AS_ADJ">
        <pattern>
            <token postag="JJ"></token>
            <marker>
                <token>ពេល</token>
            </marker>
            <token postag_regexp="yes" postag="NN|VB|PRO"></token>
        </pattern>
        <disambig postag="JJ"></disambig>
    </rule>
    <rule name="ជា + អ្នក + JJ NN JJ" id="TOBE_YOU_as_Noun">
        <pattern>
            <token>ជា</token>
            <token>អ្នក</token>
            <marker>
                <token postag_regexp="yes" postag="VB|NN|JJ"></token>
            </marker>
        </pattern>
        <disambig postag="NN"></disambig>
        <!--Sample: ជា​អ្នក​សង្គ្រោះ​របស់​គេ -->
    </rule>
    <rule name="ជា + អ្នក + JJ NN JJ2" id="TOBE_YOU_as_Noun2">
        <pattern>
            <token>ជា</token>
            <marker>
                <token>អ្នក</token>
            </marker>
            <token postag_regexp="yes" postag="VB|NN|JJ"></token>
        </pattern>
        <disambig postag="NN"></disambig>
        <!--Sample: ជា​អ្នក​សង្គ្រោះ​របស់​គេ THIS IS TO MARK អ្នក​ as a noun as well -->
    </rule>
    <rule name="CLS + អ្នក + VB NN JJ" id="CLS_YOU_as_Noun">
        <pattern>
            <token postag="CLS"></token>
            <token>អ្នក</token>
            <marker>
                <token postag_regexp="yes" postag="VB|NN|JJ"></token>
            </marker>
        </pattern>
        <disambig postag="NN"></disambig>
        <!--Sample: អាន​ជា​មួយ​ក្រុម​អ្នក​សិល្បៈ​នោះ -->
    </rule>
    <rule name="CLS + អ្នក + VB NN JJ2" id="CLS_YOU_as_Noun2">
        <pattern>
            <token postag="CLS"></token>
            <marker>
                <token>អ្នក</token>
            </marker>
            <token postag_regexp="yes" postag="VB|NN|JJ"></token>
        </pattern>
        <disambig postag="NN"></disambig>
        <!--Sample: អាន​ជា​មួយ​ក្រុម​អ្នក​សិល្បៈ​នោះ THIS IS TO MARK អ្នក​ as a noun as well -->
    </rule>
    <rule name="Noun + ផ្ទាល់" id="NOUN_PTOAL">
        <pattern>
            <token postag="NN"><exception postag_regexp="yes" postag="PRO|PRP"></exception></token>
            <marker>
                <token>ផ្ទាល់</token>
            </marker>
            <token postag="PRO"></token>
        </pattern>
        <disambig postag="PRO"></disambig>
        <!--Sample: រឿង​ផ្ទាល់​ខ្លួន -->
    </rule>
    <rule name="VERB + ផ្ទាល់" id="VERB_PTOAL">
        <pattern>
            <token postag="VB"></token>
            <token postag="PRO"></token>
            <marker>
                <token>ផ្ទាល់</token>
            </marker>
        </pattern>
        <disambig postag="PRO"></disambig>
        <!--Sample: បាន​ជា​គេ​ផ្ទាល់​ឲ្យ -->
    </rule>
    <rule name="ADJ + ផ្ទាល់" id="ADJ_PTOAL">
        <pattern>
            <token postag="RB"><exception postag_regexp="yes" postag="PRO|PRP"></exception></token>
            <marker>
                <token>ផ្ទាល់</token>
            </marker>
            <token postag="PRO"></token>
        </pattern>
        <disambig postag="PRO"></disambig>
        <!--Sample: ទង្វើ​ល្អ​ផ្ទាល់​ខ្លួន -->
    </rule>
    <rule name="theself" id="self_PTOAL">
        <pattern>
            <token>ខ្លួន</token>
            <marker>
                <token>ឯង</token>
            </marker>
            <token>ផ្ទាល់</token>
        </pattern>
        <disambig postag="PRO"></disambig>
        <!--NEED THREE TO TAG THESE THIS IS 1 Sample: ខ្លួនឯងផ្ទាល់ -->
    </rule>
    <rule name="theself2" id="self_PTOAL2">
        <pattern>
            <token>ខ្លួន</token>
            <token>ឯង</token>
            <marker>
                <token>ផ្ទាល់</token>
            </marker>
        </pattern>
        <disambig postag="PRO"></disambig>
        <!--NEED THREE TO TAG THESE THIS IS 2 Sample: ខ្លួនឯងផ្ទាល់ -->
    </rule>
    <rule name="theself3" id="self_PTOAL3">
        <pattern>
            <marker>
                <token>ខ្លួន</token>
            </marker>
            <token>ឯង</token>
            <token>ផ្ទាល់</token>
        </pattern>
        <disambig postag="PRO"></disambig>
        <!--NEED THREE TO TAG THESE THIS IS 3 Sample: ខ្លួនឯងផ្ទាល់ -->
    </rule>
    <rule name="ផ្ទាល់ as verb" id="VERB_PTOAL2">
        <pattern>
            <token postag_regexp="yes" postag="PRO|NNP"></token>
            <marker>
                <token>ផ្ទាល់</token>
            </marker>
            <token postag_regexp="yes" postag="PRO|NNP"></token>
        </pattern>
        <disambig postag="VB"></disambig>
        <!--Sample: ខចាំ​មើល​អញ​ផ្ទាល់​វា​​ម្ដង-->
    </rule>
    <rule name="ឯង as pronoun" id="Ing-pronoun">
        <pattern>
            <token postag_regexp="yes" postag="PRO|NNP"></token>
            <marker>
                <token>ឯង</token>
            </marker>
        </pattern>
        <disambig postag="PRO"></disambig>
        <!--NEED THREE TO TAG THESE THIS IS 3 Sample: ខ្លួនឯង -->
    </rule>
    <rule name="ផ្ទាល់ as pronoun" id="ptoal-pronoun">
        <pattern>
            <token postag_regexp="yes" postag="PRO|NNP"></token>
            <marker>
                <token>ផ្ទាល់</token>
            </marker>
        </pattern>
        <disambig postag="PRO"></disambig>
        <!--NEED THREE TO TAG THESE THIS IS 3 Sample: ខ្លួនផ្ទាល់ -->
    </rule>
    <rule name="តែមួយ" id="timoy">
        <pattern>
            <token postag="NN"></token>
            <token>តែ</token>
            <token>មួយ</token>
        </pattern>
        <disambig postag="JJ"></disambig>
        <!--Sample: និង​ជា​ព្រះ​ក្នុង​ពេល​តែ​មួយ -->
    </rule>
    <rule name="ជាយួរ" id="TOBE_LONG">
        <pattern>
            <token>ជា</token>
            <marker>
                <token>យូរ</token>
            </marker>
            <token postag="NN"></token>
        </pattern>
        <disambig postag="RB"></disambig>
        <!--Sample: ជា​យូរ​ឆ្នាំ​មក​ហើយ  -->
    </rule>
    <rule name="ADJ + ចិត្ត" id="ADJ_HEART">
        <pattern>
            <token postag="PRO"></token>
            <token postag="JJ"></token>
            <marker>
                <token>ចិត្ត</token>
            </marker>
        </pattern>
        <disambig postag="JJ"></disambig>
        <!--Sample: អ្នក​សប្បាយ​ចិត្ត  -->
    </rule>
    <rule name="NUM +ចំនួន" id="NUM_AMOUNT">
        <pattern>
            <marker>
                <token postag="NUM"></token>
            </marker>
            <token>ចំនួន</token>
        </pattern>
        <disambig postag="NUM"></disambig>
        <!--Sample: ក្មេង​មួយ​ចំនួន​ពុំ​មាន  -->
    </rule>
    <rule name="ជាមួយ" id="WITH_PREP">
        <pattern>
            <token>ជា</token>
            <token>មួយ</token>
        </pattern>
        <disambig postag="IN"></disambig>
        <!--Sample: អាន​ជា​មួយ​ក្រុម​អ្នក​សិល្បៈ​នោះ  -->
    </rule>
    <rule name="នៅជាមួយ" id="WITH_PREP2">
        <pattern>
            <token>នៅ</token>
            <token>ជា</token>
            <token>មួយ</token>
        </pattern>
        <disambig postag="IN"></disambig>
        <!--Sample: នៅ​ជា​មួយ​ទ្រង់ -->
    </rule>
    <rule name="PAST TENSE VERB បាន" id="PAST_TENSE_VERB">
        <pattern>
            <marker>
                <token>បាន</token>
            </marker>
            <token postag="VB"></token>
        </pattern>
        <disambig postag="PAS"></disambig>
        <!--Sample: អ្នក​រាល់​គ្នា​បាន​សង្គ្រោះ -->
    </rule>
    <rule name="យ៉ាង as Adverb Marker" id="ADVERB_MARKER">
        <pattern>
            <marker>
                <token>យ៉ាង</token>
            </marker>
            <token postag_regexp="yes" postag="JJ|RB"></token>
        </pattern>
        <disambig postag="RB"></disambig>
        <!--Sample: អនាង​បាន​ប្រលែង​នឹង​ប្អូន​យ៉ាង​ទន់ភ្លន់ -->
    </rule>
    <rule name="មុន as Preposition" id="MON_PREP">
        <pattern>
            <marker>
                <token>មុន</token>
            </marker>
            <token>ពេល</token>
        </pattern>
        <disambig postag="IN"></disambig>
        <!--Sample: មុន​ពេល​គេង -->
    </rule>
</rules>




© 2015 - 2025 Weber Informatics LLC | Privacy Policy