package gpl.pierrick.brihaye.aramorph.lucene;

import com.sun.tools.doclets.TagletManager;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;

/* loaded from: input_file:WEB-INF/lib/ArabicAnalyzer.jar:gpl/pierrick/brihaye/aramorph/lucene/ArabicGrammaticalFilter.class */
public class ArabicGrammaticalFilter extends TokenFilter {
    private boolean debug;
    private String[] WORTHY_CATEGORIES;
    private String[] UNWORTHY_CATEGORIES;
    private ArrayList worthyCategories;
    private ArrayList unworthyCategories;
    private ArrayList tokenStems;

    public ArabicGrammaticalFilter(TokenStream tokenStream) {
        this(tokenStream, false);
    }

    public ArabicGrammaticalFilter(TokenStream tokenStream, boolean z) {
        super(tokenStream);
        this.debug = false;
        this.WORTHY_CATEGORIES = new String[]{"ADJ", "ADV", "FUT_PART", "NOUN", "NOUN_PROP", "NSUFF_FEM_DU_ACCGEN", "NSUFF_FEM_DU_ACCGEN_POSS", "NSUFF_FEM_DU_NOM", "NSUFF_FEM_DU_NOM_POSS", "NSUFF_MASC_DU_ACCGEN", "NSUFF_MASC_DU_ACCGEN_POSS", "NSUFF_MASC_DU_NOM", "NSUFF_MASC_DU_NOM_POSS", "PART", "PVSUFF_SUBJ:1S", "PVSUFF_SUBJ:2FP", "PVSUFF_SUBJ:2FS", "PVSUFF_SUBJ:2MP", "PVSUFF_SUBJ:2MS", "PVSUFF_SUBJ:3FP", "PVSUFF_SUBJ:3FS", "PVSUFF_SUBJ:3MP", "PVSUFF_SUBJ:3MS", "VERB_IMPERATIVE", "VERB_IMPERFECT", "VERB_PERFECT", "NO_RESULT"};
        this.UNWORTHY_CATEGORIES = new String[]{"ABBREV", "CONJ", "DET", "DEM_PRON_F", "DEM_PRON_FS", "DEM_PRON_FD", "DEM_PRON_MD", "DEM_PRON_MP", "DEM_PRON_MS", "FUNC_WORD", "INTERJ", "INTERROG", "INTERROG_PART", "IVSUFF_SUBJ:MP_MOOD:I", "NEG_PART", "NUMERIC_COMMA", "POSS_PRON_1S", "PREP", "PRON_1P", "PRON_1S", "PRON_2D", "PRON_2FP", "PRON_2FS", "PRON_2MP", "PRON_2MS", "PRON_3D", "PRON_3FP", "PRON_3FS", "PRON_3MP", "PRON_3MS", "REL_PRON", "REL_PRON+bayona", "REL_PRON+bayoni"};
        this.worthyCategories = null;
        this.unworthyCategories = null;
        this.tokenStems = null;
        this.debug = z;
        this.worthyCategories = new ArrayList();
        for (int i = 0; i < this.WORTHY_CATEGORIES.length; i++) {
            this.worthyCategories.add(this.WORTHY_CATEGORIES[i]);
        }
        this.unworthyCategories = new ArrayList();
        for (int i2 = 0; i2 < this.UNWORTHY_CATEGORIES.length; i2++) {
            this.unworthyCategories.add(this.UNWORTHY_CATEGORIES[i2]);
        }
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public final Token next() throws IOException {
        Token next = this.input.next();
        while (true) {
            Token token = next;
            if (token == null) {
                return null;
            }
            if (token.getPositionIncrement() != 0) {
                this.tokenStems = new ArrayList();
            }
            if (this.worthyCategories.contains(token.type())) {
                if (this.debug) {
                    System.out.println(new StringBuffer().append(token.termText()).append("\t").append(token.type()).append("\t").append("[").append(token.startOffset()).append(TagletManager.ALT_SIMPLE_TAGLET_OPT_SEPERATOR).append(token.endOffset()).append("]").toString());
                }
                if (!this.tokenStems.contains(token.termText())) {
                    this.tokenStems.add(token.termText());
                    return token;
                }
            } else if (this.unworthyCategories.contains(token.type())) {
                continue;
            } else {
                System.err.println(new StringBuffer().append("What to do with category : ").append(token.type()).append(" ?").toString());
                if (!this.tokenStems.contains(token.termText())) {
                    this.tokenStems.add(token.termText());
                    return token;
                }
            }
            next = this.input.next();
        }
    }
}
