rules.fraktur.xml Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of webservices Show documentation
Show all versions of webservices Show documentation
Webservice API for Tēzaurs.lv and other ailab.lv Latvian computational linguistic tools
<?xml version="1.0" encoding="UTF-8"?> <rules> <exact> <!-- 1. "ee" transliteration near the begin of the token: --> <!-- 1.1. prefix "ie" combined with stem starting with "ie" --> <!-- 1.1.1. in combination with prefix "vis" --> <r target="visneeeee"> <replace position="begin">visneieie</replace> </r> <r target="vişneeeee"> <replace position="begin">visneieie</replace> </r> <r target="visjāeeee"> <replace position="begin">visjāieie</replace> </r> <r target="vişjāeeee"> <replace position="begin">visjāieie</replace> </r> <r target="visjaheeee"> <replace position="begin">visjāieie</replace> </r> <r target="vişjaheeee"> <replace position="begin">visjāieie</replace> </r> <r target="visneee"> <replace position="begin">visneie</replace> </r> <r target="vişneee"> <replace position="begin">visneie</replace> </r> <r target="visjāee"> <replace position="begin">visjāie</replace> </r> <r target="vişjāee"> <replace position="begin">visjāie</replace> </r> <r target="visjahee"> <replace position="begin">visjāie</replace> </r> <r target="vişjahee"> <replace position="begin">visjāie</replace> </r> <r target="viseeee"> <replace position="begin">visieie</replace> </r> <r target="vişeeee"> <replace position="begin">visieie</replace> </r> <r target="visee"> <replace position="begin">visie</replace> </r> <r target="vişee"> <replace position="begin">visie</replace> </r> <!-- 1.1.2. combinations without prefix "vis" --> <r target="neeeee"> <replace position="begin">neieie</replace> </r> <r target="jāeeee"> <replace position="begin">jāieie</replace> </r> <r target="jaheeee"> <replace position="begin">jāieie</replace> </r> <r target="neee"> <replace position="begin">neie</replace> </r> <r target="jāee"> <replace position="begin">jāie</replace> </r> <r target="jahee"> <replace position="begin">jāie</replace> </r> <r target="eeee"> <replace position="begin">ieie</replace> </r> <r target="ee"> <replace position="begin">ie</replace> </r> <!-- 1.2. other prefixes combined with prefix "ie" --> <!-- 1.2.1. in combination with prefix "vis" --> <r target="visaizee"> <replace position="begin">visaizie</replace> </r> <r target="vişaizee"> <replace position="begin">visaizie</replace> </r> <r target="visapee"> <replace position="begin">visapie</replace> </r> <r target="vişapee"> <replace position="begin">visapie</replace> </r> <r target="visatee"> <replace position="begin">visatie</replace> </r> <r target="vişatee"> <replace position="begin">visatie</replace> </r> <r target="visbezee"> <replace position="begin">visbezie</replace> </r> <r target="vişbezee"> <replace position="begin">visbezie</replace> </r> <r target="viseksee"> <replace position="begin">viseksie</replace> </r> <r target="vişeksee"> <replace position="begin">viseksie</replace> </r> <r target="visizee"> <replace position="begin">visizie</replace> </r> <r target="vişizee"> <replace position="begin">visizie</replace> </r> <r target="visnoee"> <replace position="begin">visnoie</replace> </r> <r target="vişnoee"> <replace position="begin">visnoie</replace> </r> <r target="vispaee"> <replace position="begin">vispaie</replace> </r> <r target="vişpaee"> <replace position="begin">vispaie</replace> </r> <r target="vispāree"> <replace position="begin">vispārie</replace> </r> <r target="vişpāree"> <replace position="begin">vispārie</replace> </r> <r target="vispahree"> <replace position="begin">vispārie</replace> </r> <r target="vişpahree"> <replace position="begin">vispārie</replace> </r> <r target="visparee"> <replace position="begin">visparie</replace> <!-- Why this was comented out in periodika I? --> </r> <r target="vişparee"> <replace position="begin">visparie</replace> </r> <r target="vispeeee"> <replace position="begin">vispieie</replace> </r> <r target="vişpeeee"> <replace position="begin">vispieie</replace> </r> <r target="vissaee"> <replace position="begin">vissaie</replace> </r> <r target="vişşaee"> <replace position="begin">vissaie</replace> </r> <r target="visuzee"> <replace position="begin">visuzie</replace> </r> <r target="vişuzee"> <replace position="begin">visuzie</replace> </r> <!-- 1.2.2. combinations without prefix "vis" --> <r target="aizee"> <replace position="begin">aizie</replace> </r> <r target="apee"> <replace position="begin">apie</replace> </r> <r target="atee"> <replace position="begin">atie</replace> </r> <r target="bezee"> <replace position="begin">bezie</replace> </r> <r target="eksee"> <replace position="begin">eksie</replace> </r> <r target="ekşee"> <replace position="begin">eksie</replace> </r> <r target="izee"> <replace position="begin">izie</replace> </r> <r target="noee"> <replace position="begin">noie</replace> </r> <r target="paee"> <replace position="begin">paie</replace> </r> <r target="pāree"> <replace position="begin">pārie</replace> </r> <r target="pahree"> <replace position="begin">pārie</replace> </r> <r target="paree"> <replace position="begin">parie</replace> <!-- Why this was comented out in periodika I? --> </r> <r target="peeee"> <replace position="begin">pieie</replace> </r> <r target="saee"> <replace position="begin">saie</replace> </r> <r target="şaee"> <replace position="begin">saie</replace> </r> <r target="uzee"> <replace position="begin">uzie</replace> </r> <!-- 1.3. other prefixes containing "ie" --> <r target="vispee"> <replace position="begin">vispie</replace> </r> <r target="vişpee"> <replace position="begin">vispie</replace> </r> <r target="pee"> <replace position="begin">pie</replace> </r> <!-- 2. Transliteration for the prefixes "iz", "uz", "aiz" in front of s: --> <!-- 2.1. in combination with prefix "vis" --> <r target="visneiss"> <replace position="begin">visneizs</replace> </r> <r target="vişneiss"> <replace position="begin">visneizs</replace> </r> <r target="visneişs"> <replace position="begin">visneizs</replace> </r> <r target="visneisş"> <replace position="begin">visneizs</replace> </r> <r target="vişneişs"> <replace position="begin">visneizs</replace> </r> <r target="vişneisş"> <replace position="begin">visneizs</replace> </r> <r target="visneişş"> <replace position="begin">visneizs</replace> </r> <r target="vişneişş"> <replace position="begin">visneizs</replace> </r> <r target="visjāiss"> <replace position="begin">visjāizs</replace> </r> <r target="vişjāiss"> <replace position="begin">visjāizs</replace> </r> <r target="visjāişs"> <replace position="begin">visjāizs</replace> </r> <r target="visjāisş"> <replace position="begin">visjāizs</replace> </r> <r target="vişjāişs"> <replace position="begin">visjāizs</replace> </r> <r target="vişjāisş"> <replace position="begin">visjāizs</replace> </r> <r target="visjāişş"> <replace position="begin">visjāizs</replace> </r> <r target="vişjāişş"> <replace position="begin">visjāizs</replace> </r> <r target="visjahiss"> <replace position="begin">visjāizs</replace> </r> <r target="vişjahiss"> <replace position="begin">visjāizs</replace> </r> <r target="visjahişs"> <replace position="begin">visjāizs</replace> </r> <r target="visjahisş"> <replace position="begin">visjāizs</replace> </r> <r target="vişjahişs"> <replace position="begin">visjāizs</replace> </r> <r target="vişjahisş"> <replace position="begin">visjāizs</replace> </r> <r target="visjahişş"> <replace position="begin">visjāizs</replace> </r> <r target="vişjahişş"> <replace position="begin">visjāizs</replace> </r> <r target="visiss"> <replace position="begin">visizs</replace> </r> <r target="vişiss"> <replace position="begin">visizs</replace> </r> <r target="visişs"> <replace position="begin">visizs</replace> </r> <r target="visisş"> <replace position="begin">visizs</replace> </r> <r target="vişişs"> <replace position="begin">visizs</replace> </r> <r target="vişisş"> <replace position="begin">visizs</replace> </r> <r target="visişş"> <replace position="begin">visizs</replace> </r> <r target="vişişş"> <replace position="begin">visizs</replace> </r> <r target="visneuss"> <replace position="begin">visneuzs</replace> </r> <r target="vişneuss"> <replace position="begin">visneuzs</replace> </r> <r target="visneuşs"> <replace position="begin">visneuzs</replace> </r> <r target="visneusş"> <replace position="begin">visneuzs</replace> </r> <r target="vişneuşs"> <replace position="begin">visneuzs</replace> </r> <r target="vişneusş"> <replace position="begin">visneuzs</replace> </r> <r target="visneuşş"> <replace position="begin">visneuzs</replace> </r> <r target="vişneuşş"> <replace position="begin">visneuzs</replace> </r> <r target="visjāuss"> <replace position="begin">visjāuzs</replace> </r> <r target="vişjāuss"> <replace position="begin">visjāuzs</replace> </r> <r target="visjāuşs"> <replace position="begin">visjāuzs</replace> </r> <r target="visjāusş"> <replace position="begin">visjāuzs</replace> </r> <r target="vişjāuşs"> <replace position="begin">visjāuzs</replace> </r> <r target="vişjāusş"> <replace position="begin">visjāuzs</replace> </r> <r target="visjāuşş"> <replace position="begin">visjāuzs</replace> </r> <r target="vişjāuşş"> <replace position="begin">visjāuzs</replace> </r> <r target="visjahuss"> <replace position="begin">visjāuzs</replace> </r> <r target="vişjahuss"> <replace position="begin">visjāuzs</replace> </r> <r target="visjahuşs"> <replace position="begin">visjāuzs</replace> </r> <r target="visjahusş"> <replace position="begin">visjāuzs</replace> </r> <r target="vişjahuşs"> <replace position="begin">visjāuzs</replace> </r> <r target="vişjahusş"> <replace position="begin">visjāuzs</replace> </r> <r target="visjahuşş"> <replace position="begin">visjāuzs</replace> </r> <r target="vişjahuşş"> <replace position="begin">visjāuzs</replace> </r> <r target="visuss"> <replace position="begin">visuzs</replace> </r> <r target="vişuss"> <replace position="begin">visuzs</replace> </r> <r target="visuşs"> <replace position="begin">visuzs</replace> </r> <r target="visusş"> <replace position="begin">visuzs</replace> </r> <r target="vişuşs"> <replace position="begin">visuzs</replace> </r> <r target="vişusş"> <replace position="begin">visuzs</replace> </r> <r target="visuşş"> <replace position="begin">visuzs</replace> </r> <r target="vişuşş"> <replace position="begin">visuzs</replace> </r> <r target="visneaiss"> <replace position="begin">visneaizs</replace> </r> <r target="vişneaiss"> <replace position="begin">visneaizs</replace> </r> <r target="visneaişs"> <replace position="begin">visneaizs</replace> </r> <r target="visneaisş"> <replace position="begin">visneaizs</replace> </r> <r target="vişneaişs"> <replace position="begin">visneaizs</replace> </r> <r target="vişneaisş"> <replace position="begin">visneaizs</replace> </r> <r target="visneaişş"> <replace position="begin">visneaizs</replace> </r> <r target="vişneaişş"> <replace position="begin">visneaizs</replace> </r> <r target="visjāaiss"> <replace position="begin">visjāaizs</replace> </r> <r target="vişjāaiss"> <replace position="begin">visjāaizs</replace> </r> <r target="visjāaişs"> <replace position="begin">visjāaizs</replace> </r> <r target="visjāaisş"> <replace position="begin">visjāaizs</replace> </r> <r target="vişjāaişs"> <replace position="begin">visjāaizs</replace> </r> <r target="vişjāaisş"> <replace position="begin">visjāaizs</replace> </r> <r target="visjāaişş"> <replace position="begin">visjāaizs</replace> </r> <r target="vişjāaişş"> <replace position="begin">visjāaizs</replace> </r> <r target="visjahaiss"> <replace position="begin">visjāaizs</replace> </r> <r target="vişjahaiss"> <replace position="begin">visjāaizs</replace> </r> <r target="visjahaişs"> <replace position="begin">visjāaizs</replace> </r> <r target="visjahaisş"> <replace position="begin">visjāaizs</replace> </r> <r target="vişjahaişs"> <replace position="begin">visjāaizs</replace> </r> <r target="vişjahaisş"> <replace position="begin">visjāaizs</replace> </r> <r target="visjahaişş"> <replace position="begin">visjāaizs</replace> </r> <r target="vişjahaişş"> <replace position="begin">visjāaizs</replace> </r> <r target="visaiss"> <replace position="begin">visaizs</replace> </r> <r target="vişaiss"> <replace position="begin">visaizs</replace> </r> <r target="visaişs"> <replace position="begin">visaizs</replace> </r> <r target="visaisş"> <replace position="begin">visaizs</replace> </r> <r target="vişaişs"> <replace position="begin">visaizs</replace> </r> <r target="vişaisş"> <replace position="begin">visaizs</replace> </r> <r target="visaişş"> <replace position="begin">visaizs</replace> </r> <r target="vişaişş"> <replace position="begin">visaizs</replace> </r> <!-- 2.2. combinations without prefix "vis" --> <r target="neiss"> <replace position="begin">neizs</replace> </r> <r target="neişs"> <replace position="begin">neizs</replace> </r> <r target="neisş"> <replace position="begin">neizs</replace> </r> <r target="neişş"> <replace position="begin">neizs</replace> </r> <r target="jāiss"> <replace position="begin">jāizs</replace> </r> <r target="jāişs"> <replace position="begin">jāizs</replace> </r> <r target="jāisş"> <replace position="begin">jāizs</replace> </r> <r target="jāişş"> <replace position="begin">jāizs</replace> </r> <r target="jahiss"> <replace position="begin">jāizs</replace> </r> <r target="jahişs"> <replace position="begin">jāizs</replace> </r> <r target="jahisş"> <replace position="begin">jāizs</replace> </r> <r target="jahişş"> <replace position="begin">jāizs</replace> </r> <r target="iss"> <replace position="begin">izs</replace> <!-- In periodika I restricted to words longer than 4 characters. --> </r> <r target="işs"> <replace position="begin">izs</replace> <!-- In periodika I restricted to words longer than 4 characters. --> </r> <r target="isş"> <replace position="begin">izs</replace> <!-- In periodika I restricted to words longer than 4 characters. --> </r> <r target="işş"> <replace position="begin">izs</replace> <!-- In periodika I restricted to words longer than 4 characters. --> </r> <r target="neuss"> <replace position="begin">neuzs</replace> </r> <r target="neuşs"> <replace position="begin">neuzs</replace> </r> <r target="neusş"> <replace position="begin">neuzs</replace> </r> <r target="neuşş"> <replace position="begin">neuzs</replace> </r> <r target="jāuss"> <replace position="begin">jāuzs</replace> </r> <r target="jāuşs"> <replace position="begin">jāuzs</replace> </r> <r target="jāusş"> <replace position="begin">jāuzs</replace> </r> <r target="jāuşş"> <replace position="begin">jāuzs</replace> </r> <r target="jahuss"> <replace position="begin">jāuzs</replace> </r> <r target="jahuşs"> <replace position="begin">jāuzs</replace> </r> <r target="jahusş"> <replace position="begin">jāuzs</replace> </r> <r target="jahuşş"> <replace position="begin">jāuzs</replace> </r> <r target="uss"> <replace position="begin">uzs</replace> <!-- In periodika I restricted to words longer than 4 characters. --> </r> <r target="uşs"> <replace position="begin">uzs</replace> <!-- In periodika I restricted to words longer than 4 characters. --> </r> <r target="usş"> <replace position="begin">uzs</replace> <!-- In periodika I restricted to words longer than 4 characters. --> </r> <r target="uşş"> <replace position="begin">uzs</replace> <!-- In periodika I restricted to words longer than 4 characters. --> </r> <r target="neaiss"> <replace position="begin">neaizs</replace> </r> <r target="neaişs"> <replace position="begin">neaizs</replace> </r> <r target="neaisş"> <replace position="begin">neaizs</replace> </r> <r target="neaişş"> <replace position="begin">neaizs</replace> </r> <r target="jāaiss"> <replace position="begin">jāaizs</replace> </r> <r target="jāaişs"> <replace position="begin">jāaizs</replace> </r> <r target="jāaisş"> <replace position="begin">jāaizs</replace> </r> <r target="jāaişş"> <replace position="begin">jāaizs</replace> </r> <r target="jahaiss"> <replace position="begin">jāaizs</replace> </r> <r target="jahaişs"> <replace position="begin">jāaizs</replace> </r> <r target="jahaisş"> <replace position="begin">jāaizs</replace> </r> <r target="jahaişş"> <replace position="begin">jāaizs</replace> </r> <r target="aiss"> <replace position="begin">aizs</replace> </r> <r target="aişs"> <replace position="begin">aizs</replace> </r> <r target="aisş"> <replace position="begin">aizs</replace> </r> <r target="aişş"> <replace position="begin">aizs</replace> </r> <!-- 3. "ee" transliteration near the end of the token:--> <r target="ee"> <replace position="end">ie</replace> </r> <r target="ees"> <replace position="end">ies</replace> </r> <r target="eeş"> <replace position="end">ies</replace> </r> <r target="eem"> <replace position="end">iem</replace> </r> <!--4. other endings: --> <r target="aî"> <replace position="end">ai</replace> </r> <!-- 5. transliterations resulting in 1 letter: --> <!-- 5.1. cosonants --> <r target="tsch">č</r> <r target="tşch">č</r> <r target="zch">ž</r> <r target="sch">š</r> <r target="şch">š</r> <r target="ch">h</r> <r target="w">v</r> <r target="ŗ">r</r> <r target="ş">s</r> <r target="q">g</r> <!-- 5.2. vowels --> <r target="ah">ā</r> <r target="à">ā</r> <r target="á">ā</r> <r target="â">ā</r> <r target="ã">ā</r> <r target="ä">ā</r> <r target="eh">ē</r> <r target="è">ē</r> <r target="é">ē</r> <r target="ê">ē</r> <r target="ë">ē</r> <r target="ih">ī</r> <r target="ì">ī</r> <r target="í">ī</r> <r target="î">ī</r> <r target="ï">ī</r> <r target="ō">o</r> <r target="ò">o</r> <r target="ó">o</r> <r target="ô">o</r> <r target="õ">o</r> <r target="uh">ū</r> <r target="ù">ū</r> <r target="ú">ū</r> <r target="û">ū</r> <r target="ü">ū</r> <!-- 6. rules for specific words. --> <r target="waj"> <replace position="exact">vai</replace> </r> <r target="vaj"> <replace position="exact">vai</replace> </r> <r target="woj"> <replace position="exact">vai</replace> </r> <r target="nau"> <replace position="exact">nav</replace> </r> <r target="jaw"> <replace position="exact">jau</replace> </r> </exact> <fuzzy> <!-- Rules covering orthography differences. --> <r target="I" sensitive="1">J</r> <r target="J" sensitive="1">I</r> <r target="ee">ie</r> <r target="i">ī</r> <r target="ī">i</r> <r target="e">ē</r> <r target="ē">e</r> <r target="a">ā</r> <r target="ā">a</r> <r target="oh">o</r> <!-- Rules covering common OCR mistakes. --> <r target="tfch">č</r> <r target="f"> <replace>z</replace> <replace>s</replace> <replace>t</replace> </r> <r target="z"> <replace>c</replace> <replace>s</replace> </r> <r target="s">z</r> <r target="ş">z</r> <r target="fch"> <replace>ž</replace> <replace>š</replace> </r> <r target="zch">š</r> <r target="sch">ž</r> <r target="şch">ž</r> <r target="tch"> <replace>ž</replace> <replace>š</replace> </r> <r target="w"> <replace>v</replace> <replace>m</replace> </r> <r target="m">v</r> <r target="j">i</r> <r target="ro">v</r> <r target="l" sensitive="1" position="begin"> <replace >I</replace> <replace >J</replace> </r> <r target="ļ"> <replace>l</replace> <replace>t</replace> </r> <r target="l">ļ</r> <r target="c">e</r> <r target="ce">ie</r> <r target="ec">ie</r> <r target="cc">ie</r> <r target="Ģ" sensitive="1">S</r> <r target="Ģch" sensitive="1">Š</r> <r target="ņ">n</r> <r target="v">p</r> <r target="rv">v</r> <r target="ì">l</r> <!-- Rules for specific words. --> <r target="uu"> <replace position="exact">un</replace> </r> <r target="ori"> <replace position="exact">arī</replace> </r> <r target="orī"> <replace position="exact">arī</replace> </r> <r target="moj"> <replace position="exact">vai</replace> </r> </fuzzy> </rules>
© 2015 - 2024 Weber Informatics LLC | Privacy Policy