package com.dlj.ir.analysis.ru;import java.util.Map;import java.util.WeakHashMap;/** * Russian stemmer- * Done by Ljiljana Dolamic(University of Neuchatel, www.unine.ch/info/clef/) * * @author Ljiljana Doalmic. Email: ljiljana.dolamic@unine.ch * -removes case endings form nouns and adjectives, */public class RussianStemmerLight { /** * A cache of words and their stems */ static private Map cache = new WeakHashMap(); /** * A buffer of the current word being stemmed */ private StringBuffer sb=new StringBuffer(); /** * Default constructor */ public RussianStemmerLight(){ } // constructor public String stem(String input){ String result = cache.get(input); if (result != null) return result; //reset string buffer sb.delete(0,sb.length()); sb.insert(0,input); // stemming... //removes case endings from nouns and adjectives removeCase(sb); normalize(sb); result = sb.toString(); cache.put(input, result); return result; } private void normalize(StringBuffer buffer) { int len=buffer.length(); if( len>3){ if(buffer.substring( len - 1, len).equals("ь")){ buffer.delete( len - 1 , len); return; } if(buffer.substring( len - 2, len).equals("нн")){ buffer.delete( len - 1 , len); return; } if(buffer.substring( len -1,len).equals("и")){ buffer.delete( len - 1 , len); return; } }//len>3 } private void removeCase(StringBuffer buffer) { int len=buffer.length(); // if( len> 6){ if(buffer.substring( len - 4 , len).equals("иями")|| buffer.substring( len - 4, len).equals("оями")){ buffer.delete( len - 4 , len); return; } }//len>6 if( len>5 ){ if(buffer.substring( len - 3 , len).equals("иям")|| buffer.substring( len - 3 , len).equals("иях")|| buffer.substring( len - 3 , len).equals("оях")|| buffer.substring( len - 3 , len).equals("ями")|| buffer.substring( len - 3 , en).equals("oям")|| buffer.substring( len - 3 , en).equals("oьв")|| buffer.substring( len - 3 , len).equals("ами")|| buffer.substring( len - 3 , len).equals("его")|| buffer.substring( len - 3 , len).equals("емu")|| buffer.substring( len - 3 , len).equals("ери")|| buffer.substring( len - 3 , len).equals("ими")|| buffer.substring( len - 3 , len).equals("ого")|| buffer.substring( len -3 , len).equals("ому")|| buffer.substring( len - 3 , len).equals("ыми")|| buffer.substring( len - 3 , len).equals("оев"))} ){ buffer.delete( len - 3 , len); return; } }//len>5 if( len> 4 ){ if(buffer.substring( len - 2 , len).equals("ая")|| buffer.substring( len - 2 , len).equals("яя")|| buffer.substring( len - 2 , len).equals("ях")|| buffer.substring( len - 2, len).equals("юю")|| buffer.substring( len - 2 , len).equals("ах")|| buffer.substring( len - 2 , len).equals("ею")|| buffer.substring( len - 2 , len).equals("их")|| buffer.substring( len - 2, len).equals("ия")|| buffer.substring( len - 2, len).equals("ию")|| buffer.substring( len - 2, len).equals("ьв")|| buffer.substring( len - 2, len).equals("ою")|| buffer.substring( len - 2 , len).equals("ую")|| buffer.substring( len - 2 , len).equals("ям")|| buffer.substring( len - 2 , len).equals("ых")|| buffer.substring( len - 2 , len).equals("ея")|| buffer.substring( len - 2 , len).equals("ам")|| buffer.substring( len - 2 , len).equals("ее")|| buffer.substring( len - 2 , len).equals("ей")|| buffer.substring( len - 2 , len).equals("ем")|| buffer.substring( len - 2 , len).equals("ев")|| buffer.substring( len - 2 , len).equals("ий")|| buffer.substring( len - 2 , len).equals("им")|| buffer.substring( len - 2 , len).equals("ое")|| buffer.substring( len - 2 , len).equals("ой")|| buffer.substring( len - 2 , len).equals("ом")|| buffer.substring( len - 2 , len).equals("ов")|| buffer.substring( len - 2 , len).equals("ыe")|| buffer.substring( len - 2 , len).equals("ыj")|| buffer.substring( len - 2 , len).equals("ым")|| buffer.substring( len - 2 , len).equals("ми")){ buffer.delete( len - 2 , len); return; } } if( len> 3){ if( buffer.substring( len - 1 , len).equals("а")|| buffer.substring( len - 1 , len).equals("е")|| buffer.substring( len - 1 , len).equals("и")|| buffer.substring( len - 1 , len).equals("о")|| buffer.substring( len - 1 , len).equals("у")|| buffer.substring( len - 1 , len).equals("й")|| buffer.substring( len - 1 , len).equals("ы")|| buffer.substring( len - 1 , len).equals("я")|| buffer.substring( len - 1 , len).equals("ю")|| buffer.substring( len - 1 , len).equals("ь")||){ buffer.delete( len - 1 , len); return; } }//len>3 } }