/* Arabic stemmer (stem2) tring to remove common prefixes and suffixes */ /* more conservative than stem3 (see J. Savoy, Y. Rasolofo, TREC 2002) */ static char *removeArabicSuffix2(); static char *removeArabicPrefix2(); /* For the Arabic language */ char *arabic_stemming2(word) char *word; { removeArabicSuffix2(word); removeArabicPrefix2(word); return(word); } static char *removeArabicSuffix2(word) char *word; { int len = strlen (word)-1; if (len > 4) { /* -A{nt} */ if ((word[len-1]=='A') && ((word[len]=='t') || (word[len]=='n'))) { word[len-1]='\0'; return(word); } /* -hA */ if ((word[len]=='A') && (word[len-1]=='h')) { word[len-1]='\0'; return(word); } /* -Y{phn} */ if ((word[len-1]=='Y') && ((word[len]=='p') || (word[len]=='h') || (word[len]=='n'))) { word[len-1]='\0'; return(word); } /* -wn */ if ((word[len-1]=='w') && (word[len]=='n')) { word[len-1]='\0'; return(word); } } /* end if len > 4 */ if (len > 3) { /* -{Yph} -> - */ if ((word[len]=='Y') || (word[len]=='p') || (word[len]=='h')) { word[len]='\0'; return(word); } } /* end if len > 3 */ return(word); } static char *removeArabicPrefix2(word) char *word; { int pos; int len = strlen (word)-1; if (len > 5) { /* {fXbw}Al-- -> -- */ if ((word[2]=='l') && (word[1]=='A') && ((word[0]=='f') || (word[0]=='X') || (word[0]=='b') || (word[0]=='w'))) { pos = 0; while (word[pos+3] != '\0') { word[pos] = word[pos+3]; pos++; } word[pos]= '\0'; return(word); } } /* end if len > 5 */ if (len > 4) { /* Al-- -> -- */ if ((word[0]=='A') && (word[1]=='l')) { pos = 0; while (word[pos+2] != '\0') { word[pos] = word[pos+2]; pos++; } word[pos]= '\0'; return(word); } } /* end if len > 4 */ if (len > 3) { /* {wY}-- -> -- */ if ((word[0]=='w') || (word[0]=='Y')) { pos = 0; while (word[pos] != '\0') { word[pos] = word[pos+1]; pos++; } return(word); } } /* end if len > 3 */ return(word); }