All Downloads are FREE. Search and download functionalities are using the official Maven repository.

rules.fraktur.xml Maven / Gradle / Ivy

Go to download

Webservice API for Tēzaurs.lv and other ailab.lv Latvian computational linguistic tools

There is a newer version: 2.5.7
Show newest version
<?xml version="1.0" encoding="UTF-8"?>
<rules>
	<exact>
		<!-- 1. "ee" transliteration near the begin of the token: -->
		<!-- 1.1. prefix "ie" combined with stem starting with "ie" -->
		<!-- 1.1.1. in combination with prefix "vis" -->
		<r target="visneeeee">
			<replace position="begin">visneieie</replace>
		</r>
		<r target="vişneeeee">
			<replace position="begin">visneieie</replace>
		</r>
		<r target="visjāeeee">
			<replace position="begin">visjāieie</replace>
		</r>
		<r target="vişjāeeee">
			<replace position="begin">visjāieie</replace>
		</r>
		<r target="visjaheeee">
			<replace position="begin">visjāieie</replace>
		</r>
		<r target="vişjaheeee">
			<replace position="begin">visjāieie</replace>
		</r>
		<r target="visneee">
			<replace position="begin">visneie</replace>
		</r>
		<r target="vişneee">
			<replace position="begin">visneie</replace>
		</r>
		<r target="visjāee">
			<replace position="begin">visjāie</replace>
		</r>
		<r target="vişjāee">
			<replace position="begin">visjāie</replace>
		</r>
		<r target="visjahee">
			<replace position="begin">visjāie</replace>
		</r>
		<r target="vişjahee">
			<replace position="begin">visjāie</replace>
		</r>
		<r target="viseeee">
			<replace position="begin">visieie</replace>
		</r>
		<r target="vişeeee">
			<replace position="begin">visieie</replace>
		</r>
		<r target="visee">
			<replace position="begin">visie</replace>
		</r>
		<r target="vişee">
			<replace position="begin">visie</replace>
		</r>
		
		<!-- 1.1.2. combinations without prefix "vis" -->
		<r target="neeeee">
			<replace position="begin">neieie</replace>
		</r>
		<r target="jāeeee">
			<replace position="begin">jāieie</replace>
		</r>
		<r target="jaheeee">
			<replace position="begin">jāieie</replace>
		</r>
		<r target="neee">
			<replace position="begin">neie</replace>
		</r>
		<r target="jāee">
			<replace position="begin">jāie</replace>
		</r>
		<r target="jahee">
			<replace position="begin">jāie</replace>
		</r>
		<r target="eeee">
			<replace position="begin">ieie</replace>
		</r>
		<r target="ee">
			<replace position="begin">ie</replace>
		</r>
		
		<!-- 1.2. other prefixes combined with prefix "ie" -->
		<!-- 1.2.1. in combination with prefix "vis" -->
		<r target="visaizee">
			<replace position="begin">visaizie</replace>
		</r>
		<r target="vişaizee">
			<replace position="begin">visaizie</replace>
		</r>
		<r target="visapee">
			<replace position="begin">visapie</replace>
		</r>
		<r target="vişapee">
			<replace position="begin">visapie</replace>
		</r>
		<r target="visatee">
			<replace position="begin">visatie</replace>
		</r>
		<r target="vişatee">
			<replace position="begin">visatie</replace>
		</r>
		<r target="visbezee">
			<replace position="begin">visbezie</replace>
		</r>
		<r target="vişbezee">
			<replace position="begin">visbezie</replace>
		</r>
		<r target="viseksee">
			<replace position="begin">viseksie</replace>
		</r>
		<r target="vişeksee">
			<replace position="begin">viseksie</replace>
		</r>
		<r target="visizee">
			<replace position="begin">visizie</replace>
		</r>
		<r target="vişizee">
			<replace position="begin">visizie</replace>
		</r>
		<r target="visnoee">
			<replace position="begin">visnoie</replace>
		</r>
		<r target="vişnoee">
			<replace position="begin">visnoie</replace>
		</r>
		<r target="vispaee">
			<replace position="begin">vispaie</replace>
		</r>
		<r target="vişpaee">
			<replace position="begin">vispaie</replace>
		</r>
		<r target="vispāree">
			<replace position="begin">vispārie</replace>
		</r>
		<r target="vişpāree">
			<replace position="begin">vispārie</replace>
		</r>
		<r target="vispahree">
			<replace position="begin">vispārie</replace>
		</r>
		<r target="vişpahree">
			<replace position="begin">vispārie</replace>
		</r>
		<r target="visparee">
			<replace position="begin">visparie</replace>
			<!-- Why this was comented out in periodika I? -->
		</r>
		<r target="vişparee">
			<replace position="begin">visparie</replace>
		</r>
		<r target="vispeeee">
			<replace position="begin">vispieie</replace>
		</r>
		<r target="vişpeeee">
			<replace position="begin">vispieie</replace>
		</r>
		<r target="vissaee">
			<replace position="begin">vissaie</replace>
		</r>
		<r target="vişşaee">
			<replace position="begin">vissaie</replace>
		</r>
		<r target="visuzee">
			<replace position="begin">visuzie</replace>
		</r>
		<r target="vişuzee">
			<replace position="begin">visuzie</replace>
		</r>
		
		<!-- 1.2.2. combinations without prefix "vis" -->
		<r target="aizee">
			<replace position="begin">aizie</replace>
		</r>
		<r target="apee">
			<replace position="begin">apie</replace>
		</r>
		<r target="atee">
			<replace position="begin">atie</replace>
		</r>
		<r target="bezee">
			<replace position="begin">bezie</replace>
		</r>
		<r target="eksee">
			<replace position="begin">eksie</replace>
		</r>
		<r target="ekşee">
			<replace position="begin">eksie</replace>
		</r>
		<r target="izee">
			<replace position="begin">izie</replace>
		</r>
		<r target="noee">
			<replace position="begin">noie</replace>
		</r>
		<r target="paee">
			<replace position="begin">paie</replace>
		</r>
		<r target="pāree">
			<replace position="begin">pārie</replace>
		</r>
		<r target="pahree">
			<replace position="begin">pārie</replace>
		</r>
		<r target="paree">
			<replace position="begin">parie</replace>
			<!-- Why this was comented out in periodika I? -->
		</r>
		<r target="peeee">
			<replace position="begin">pieie</replace>
		</r>
		<r target="saee">
			<replace position="begin">saie</replace>
		</r>
		<r target="şaee">
			<replace position="begin">saie</replace>
		</r>
		<r target="uzee">
			<replace position="begin">uzie</replace>
		</r>
		
		<!-- 1.3. other prefixes containing "ie" -->
		<r target="vispee">
			<replace position="begin">vispie</replace>
		</r>
		<r target="vişpee">
			<replace position="begin">vispie</replace>
		</r>
		<r target="pee">
			<replace position="begin">pie</replace>
		</r>
		
		<!-- 2. Transliteration for the prefixes "iz", "uz", "aiz" in front of s: -->
		<!-- 2.1. in combination with prefix "vis" -->
		<r target="visneiss">
			<replace position="begin">visneizs</replace>
		</r>
		<r target="vişneiss">
			<replace position="begin">visneizs</replace>
		</r>
		<r target="visneişs">
			<replace position="begin">visneizs</replace>
		</r>
		<r target="visneisş">
			<replace position="begin">visneizs</replace>
		</r>
		<r target="vişneişs">
			<replace position="begin">visneizs</replace>
		</r>
		<r target="vişneisş">
			<replace position="begin">visneizs</replace>
		</r>
		<r target="visneişş">
			<replace position="begin">visneizs</replace>
		</r>
		<r target="vişneişş">
			<replace position="begin">visneizs</replace>
		</r>
		<r target="visjāiss">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="vişjāiss">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="visjāişs">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="visjāisş">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="vişjāişs">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="vişjāisş">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="visjāişş">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="vişjāişş">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="visjahiss">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="vişjahiss">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="visjahişs">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="visjahisş">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="vişjahişs">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="vişjahisş">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="visjahişş">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="vişjahişş">
			<replace position="begin">visjāizs</replace>
		</r>
		<r target="visiss">
			<replace position="begin">visizs</replace>
		</r>
		<r target="vişiss">
			<replace position="begin">visizs</replace>
		</r>
		<r target="visişs">
			<replace position="begin">visizs</replace>
		</r>
		<r target="visisş">
			<replace position="begin">visizs</replace>
		</r>
		<r target="vişişs">
			<replace position="begin">visizs</replace>
		</r>
		<r target="vişisş">
			<replace position="begin">visizs</replace>
		</r>
		<r target="visişş">
			<replace position="begin">visizs</replace>
		</r>
		<r target="vişişş">
			<replace position="begin">visizs</replace>
		</r>
		
		<r target="visneuss">
			<replace position="begin">visneuzs</replace>
		</r>
		<r target="vişneuss">
			<replace position="begin">visneuzs</replace>
		</r>
		<r target="visneuşs">
			<replace position="begin">visneuzs</replace>
		</r>
		<r target="visneusş">
			<replace position="begin">visneuzs</replace>
		</r>
		<r target="vişneuşs">
			<replace position="begin">visneuzs</replace>
		</r>
		<r target="vişneusş">
			<replace position="begin">visneuzs</replace>
		</r>
		<r target="visneuşş">
			<replace position="begin">visneuzs</replace>
		</r>
		<r target="vişneuşş">
			<replace position="begin">visneuzs</replace>
		</r>
		<r target="visjāuss">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="vişjāuss">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="visjāuşs">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="visjāusş">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="vişjāuşs">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="vişjāusş">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="visjāuşş">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="vişjāuşş">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="visjahuss">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="vişjahuss">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="visjahuşs">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="visjahusş">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="vişjahuşs">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="vişjahusş">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="visjahuşş">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="vişjahuşş">
			<replace position="begin">visjāuzs</replace>
		</r>
		<r target="visuss">
			<replace position="begin">visuzs</replace>
		</r>
		<r target="vişuss">
			<replace position="begin">visuzs</replace>
		</r>
		<r target="visuşs">
			<replace position="begin">visuzs</replace>
		</r>
		<r target="visusş">
			<replace position="begin">visuzs</replace>
		</r>
		<r target="vişuşs">
			<replace position="begin">visuzs</replace>
		</r>
		<r target="vişusş">
			<replace position="begin">visuzs</replace>
		</r>
		<r target="visuşş">
			<replace position="begin">visuzs</replace>
		</r>
		<r target="vişuşş">
			<replace position="begin">visuzs</replace>
		</r>
		
		<r target="visneaiss">
			<replace position="begin">visneaizs</replace>
		</r>
		<r target="vişneaiss">
			<replace position="begin">visneaizs</replace>
		</r>
		<r target="visneaişs">
			<replace position="begin">visneaizs</replace>
		</r>
		<r target="visneaisş">
			<replace position="begin">visneaizs</replace>
		</r>
		<r target="vişneaişs">
			<replace position="begin">visneaizs</replace>
		</r>
		<r target="vişneaisş">
			<replace position="begin">visneaizs</replace>
		</r>
		<r target="visneaişş">
			<replace position="begin">visneaizs</replace>
		</r>
		<r target="vişneaişş">
			<replace position="begin">visneaizs</replace>
		</r>
		<r target="visjāaiss">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="vişjāaiss">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="visjāaişs">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="visjāaisş">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="vişjāaişs">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="vişjāaisş">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="visjāaişş">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="vişjāaişş">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="visjahaiss">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="vişjahaiss">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="visjahaişs">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="visjahaisş">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="vişjahaişs">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="vişjahaisş">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="visjahaişş">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="vişjahaişş">
			<replace position="begin">visjāaizs</replace>
		</r>
		<r target="visaiss">
			<replace position="begin">visaizs</replace>
		</r>
		<r target="vişaiss">
			<replace position="begin">visaizs</replace>
		</r>
		<r target="visaişs">
			<replace position="begin">visaizs</replace>
		</r>
		<r target="visaisş">
			<replace position="begin">visaizs</replace>
		</r>
		<r target="vişaişs">
			<replace position="begin">visaizs</replace>
		</r>
		<r target="vişaisş">
			<replace position="begin">visaizs</replace>
		</r>
		<r target="visaişş">
			<replace position="begin">visaizs</replace>
		</r>
		<r target="vişaişş">
			<replace position="begin">visaizs</replace>
		</r>

		
		<!-- 2.2. combinations without prefix "vis" -->
		<r target="neiss">
			<replace position="begin">neizs</replace>
		</r>
		<r target="neişs">
			<replace position="begin">neizs</replace>
		</r>
		<r target="neisş">
			<replace position="begin">neizs</replace>
		</r>
		<r target="neişş">
			<replace position="begin">neizs</replace>
		</r>
		<r target="jāiss">
			<replace position="begin">jāizs</replace>
		</r>
		<r target="jāişs">
			<replace position="begin">jāizs</replace>
		</r>
		<r target="jāisş">
			<replace position="begin">jāizs</replace>
		</r>
		<r target="jāişş">
			<replace position="begin">jāizs</replace>
		</r>
		<r target="jahiss">
			<replace position="begin">jāizs</replace>
		</r>
		<r target="jahişs">
			<replace position="begin">jāizs</replace>
		</r>
		<r target="jahisş">
			<replace position="begin">jāizs</replace>
		</r>
		<r target="jahişş">
			<replace position="begin">jāizs</replace>
		</r>
		<r target="iss">
			<replace position="begin">izs</replace>
			<!-- In periodika I restricted to words longer than 4 characters. -->
		</r>
		<r target="işs">
			<replace position="begin">izs</replace>
			<!-- In periodika I restricted to words longer than 4 characters. -->
		</r>
		<r target="isş">
			<replace position="begin">izs</replace>
			<!-- In periodika I restricted to words longer than 4 characters. -->
		</r>
		<r target="işş">
			<replace position="begin">izs</replace>
			<!-- In periodika I restricted to words longer than 4 characters. -->
		</r>
		
		<r target="neuss">
			<replace position="begin">neuzs</replace>
		</r>
		<r target="neuşs">
			<replace position="begin">neuzs</replace>
		</r>
		<r target="neusş">
			<replace position="begin">neuzs</replace>
		</r>
		<r target="neuşş">
			<replace position="begin">neuzs</replace>
		</r>
		<r target="jāuss">
			<replace position="begin">jāuzs</replace>
		</r>
		<r target="jāuşs">
			<replace position="begin">jāuzs</replace>
		</r>
		<r target="jāusş">
			<replace position="begin">jāuzs</replace>
		</r>
		<r target="jāuşş">
			<replace position="begin">jāuzs</replace>
		</r>
		<r target="jahuss">
			<replace position="begin">jāuzs</replace>
		</r>
		<r target="jahuşs">
			<replace position="begin">jāuzs</replace>
		</r>
		<r target="jahusş">
			<replace position="begin">jāuzs</replace>
		</r>
		<r target="jahuşş">
			<replace position="begin">jāuzs</replace>
		</r>
		<r target="uss">
			<replace position="begin">uzs</replace>
			<!-- In periodika I restricted to words longer than 4 characters. -->
		</r>
		<r target="uşs">
			<replace position="begin">uzs</replace>
			<!-- In periodika I restricted to words longer than 4 characters. -->
		</r>
		<r target="usş">
			<replace position="begin">uzs</replace>
			<!-- In periodika I restricted to words longer than 4 characters. -->
		</r>
		<r target="uşş">
			<replace position="begin">uzs</replace>
			<!-- In periodika I restricted to words longer than 4 characters. -->
		</r>

		<r target="neaiss">
			<replace position="begin">neaizs</replace>
		</r>
		<r target="neaişs">
			<replace position="begin">neaizs</replace>
		</r>
		<r target="neaisş">
			<replace position="begin">neaizs</replace>
		</r>
		<r target="neaişş">
			<replace position="begin">neaizs</replace>
		</r>
		<r target="jāaiss">
			<replace position="begin">jāaizs</replace>
		</r>
		<r target="jāaişs">
			<replace position="begin">jāaizs</replace>
		</r>
		<r target="jāaisş">
			<replace position="begin">jāaizs</replace>
		</r>
		<r target="jāaişş">
			<replace position="begin">jāaizs</replace>
		</r>
		<r target="jahaiss">
			<replace position="begin">jāaizs</replace>
		</r>
		<r target="jahaişs">
			<replace position="begin">jāaizs</replace>
		</r>
		<r target="jahaisş">
			<replace position="begin">jāaizs</replace>
		</r>
		<r target="jahaişş">
			<replace position="begin">jāaizs</replace>
		</r>
		<r target="aiss">
			<replace position="begin">aizs</replace>
		</r>
		<r target="aişs">
			<replace position="begin">aizs</replace>
		</r>
		<r target="aisş">
			<replace position="begin">aizs</replace>
		</r>
		<r target="aişş">
			<replace position="begin">aizs</replace>
		</r>

		<!-- 3. "ee" transliteration near the end of the token:-->
		<r target="ee">
			<replace position="end">ie</replace>
		</r>
		<r target="ees">
			<replace position="end">ies</replace>
		</r>
		<r target="eeş">
			<replace position="end">ies</replace>
		</r>
		<r target="eem">
			<replace position="end">iem</replace>
		</r>
		
		<!--4. other endings: -->
		<r target="aî">
			<replace position="end">ai</replace>
		</r>
		
		<!-- 5. transliterations resulting in 1 letter: -->
		<!-- 5.1. cosonants -->
		<r target="tsch">č</r>
		<r target="tşch">č</r>
		<r target="zch">ž</r>
		<r target="sch">š</r>
		<r target="şch">š</r>
		<r target="ch">h</r>
		<r target="w">v</r>
		<r target="ŗ">r</r>
		<r target="ş">s</r>
		<r target="q">g</r>
		
		<!-- 5.2. vowels -->
		<r target="ah">ā</r>
		<r target="à">ā</r>
		<r target="á">ā</r>
		<r target="â">ā</r>
		<r target="ã">ā</r>
		<r target="ä">ā</r>

		<r target="eh">ē</r>
		<r target="è">ē</r>
		<r target="é">ē</r>
		<r target="ê">ē</r>
		<r target="ë">ē</r>
		
		<r target="ih">ī</r>
		<r target="ì">ī</r>
		<r target="í">ī</r>
		<r target="î">ī</r>
		<r target="ï">ī</r>
		
		<r target="ō">o</r>
		<r target="ò">o</r>
		<r target="ó">o</r>
		<r target="ô">o</r>
		<r target="õ">o</r>

		<r target="uh">ū</r>
		<r target="ù">ū</r>
		<r target="ú">ū</r>
		<r target="û">ū</r>
		<r target="ü">ū</r>
		
		<!-- 6. rules for specific words. -->
		<r target="waj">
			<replace position="exact">vai</replace>
		</r>
		<r target="vaj">
			<replace position="exact">vai</replace>
		</r>
		<r target="woj">
			<replace position="exact">vai</replace>
		</r>
		<r target="nau">
			<replace position="exact">nav</replace>
		</r>
		<r target="jaw">
			<replace position="exact">jau</replace>
		</r>
					
	</exact>
	<fuzzy>
		<!-- Rules covering orthography differences. -->
		<r target="I" sensitive="1">J</r>
		<r target="J" sensitive="1">I</r>
		
		<r target="ee">ie</r>
		<r target="i">ī</r>
		<r target="ī">i</r>
		<r target="e">ē</r>
		<r target="ē">e</r>
		<r target="a">ā</r>
		<r target="ā">a</r>
		<r target="oh">o</r>
		
		<!-- Rules covering common OCR mistakes. -->
		<r target="tfch">č</r>
		<r target="f">
			<replace>z</replace>
			<replace>s</replace>
			<replace>t</replace>
		</r>
		<r target="z">
			<replace>c</replace>
			<replace>s</replace>
		</r>
		<r target="s">z</r>
		<r target="ş">z</r>
		<r target="fch">
			<replace>ž</replace>
			<replace>š</replace>
		</r>
		<r target="zch">š</r>
		<r target="sch">ž</r>
		<r target="şch">ž</r>
		<r target="tch">
			<replace>ž</replace>
			<replace>š</replace>
		</r>

		<r target="w">
			<replace>v</replace>
			<replace>m</replace>
		</r>
		<r target="m">v</r>
		<r target="j">i</r>
		
		<r target="ro">v</r>
		<r target="l" sensitive="1" position="begin">
			<replace >I</replace>
			<replace >J</replace>
		</r>
		<r target="ļ">
			<replace>l</replace>
			<replace>t</replace>
		</r>
		<r target="l">ļ</r>
	
		<r target="c">e</r>
		<r target="ce">ie</r>
		<r target="ec">ie</r>
		<r target="cc">ie</r>
		
		<r target="Ģ" sensitive="1">S</r>
		<r target="Ģch" sensitive="1">Š</r>
		
		<r target="ņ">n</r>
		<r target="v">p</r>
		<r target="rv">v</r>

		<r target="ì">l</r>
		
		<!-- Rules for specific words. -->
		<r target="uu">
			<replace position="exact">un</replace>
		</r>
		<r target="ori">
			<replace position="exact">arī</replace>
		</r>
		<r target="orī">
			<replace position="exact">arī</replace>
		</r>
		<r target="moj">
			<replace position="exact">vai</replace>
		</r>
		
	</fuzzy>
</rules>




© 2015 - 2024 Weber Informatics LLC | Privacy Policy