All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.text.linguistic.lucene.HindiStemmer Maven / Gradle / Ivy

Go to download

Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).

There is a newer version: 3.16.3
Show newest version

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2019, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.text.linguistic.lucene;

/* 
 * Imported from Apache Lucene.
 * 
 * https://svn.apache.org/repos/asf/lucene/dev/trunk
 * svn rev.: 1534186
 */

import static org.carrot2.text.linguistic.lucene.StemmerUtil.*;

/**
 * Light Stemmer for Hindi.
 * 

* Implements the algorithm specified in: * A Lightweight Stemmer for Hindi * Ananthakrishnan Ramanathan and Durgesh D Rao. * http://computing.open.ac.uk/Sites/EACLSouthAsia/Papers/p6-Ramanathan.pdf *

*/ class HindiStemmer { public int stem(char buffer[], int len) { // 5 if ((len > 6) && (endsWith(buffer, len, "ाएंगी") || endsWith(buffer, len, "ाएंगे") || endsWith(buffer, len, "ाऊंगी") || endsWith(buffer, len, "ाऊंगा") || endsWith(buffer, len, "ाइयाँ") || endsWith(buffer, len, "ाइयों") || endsWith(buffer, len, "ाइयां") )) return len - 5; // 4 if ((len > 5) && (endsWith(buffer, len, "ाएगी") || endsWith(buffer, len, "ाएगा") || endsWith(buffer, len, "ाओगी") || endsWith(buffer, len, "ाओगे") || endsWith(buffer, len, "एंगी") || endsWith(buffer, len, "ेंगी") || endsWith(buffer, len, "एंगे") || endsWith(buffer, len, "ेंगे") || endsWith(buffer, len, "ूंगी") || endsWith(buffer, len, "ूंगा") || endsWith(buffer, len, "ातीं") || endsWith(buffer, len, "नाओं") || endsWith(buffer, len, "नाएं") || endsWith(buffer, len, "ताओं") || endsWith(buffer, len, "ताएं") || endsWith(buffer, len, "ियाँ") || endsWith(buffer, len, "ियों") || endsWith(buffer, len, "ियां") )) return len - 4; // 3 if ((len > 4) && (endsWith(buffer, len, "ाकर") || endsWith(buffer, len, "ाइए") || endsWith(buffer, len, "ाईं") || endsWith(buffer, len, "ाया") || endsWith(buffer, len, "ेगी") || endsWith(buffer, len, "ेगा") || endsWith(buffer, len, "ोगी") || endsWith(buffer, len, "ोगे") || endsWith(buffer, len, "ाने") || endsWith(buffer, len, "ाना") || endsWith(buffer, len, "ाते") || endsWith(buffer, len, "ाती") || endsWith(buffer, len, "ाता") || endsWith(buffer, len, "तीं") || endsWith(buffer, len, "ाओं") || endsWith(buffer, len, "ाएं") || endsWith(buffer, len, "ुओं") || endsWith(buffer, len, "ुएं") || endsWith(buffer, len, "ुआं") )) return len - 3; // 2 if ((len > 3) && (endsWith(buffer, len, "कर") || endsWith(buffer, len, "ाओ") || endsWith(buffer, len, "िए") || endsWith(buffer, len, "ाई") || endsWith(buffer, len, "ाए") || endsWith(buffer, len, "ने") || endsWith(buffer, len, "नी") || endsWith(buffer, len, "ना") || endsWith(buffer, len, "ते") || endsWith(buffer, len, "ीं") || endsWith(buffer, len, "ती") || endsWith(buffer, len, "ता") || endsWith(buffer, len, "ाँ") || endsWith(buffer, len, "ां") || endsWith(buffer, len, "ों") || endsWith(buffer, len, "ें") )) return len - 2; // 1 if ((len > 2) && (endsWith(buffer, len, "ो") || endsWith(buffer, len, "े") || endsWith(buffer, len, "ू") || endsWith(buffer, len, "ु") || endsWith(buffer, len, "ी") || endsWith(buffer, len, "ि") || endsWith(buffer, len, "ा") )) return len - 1; return len; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy