/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.ml.inference.nlp.tokenizers;

import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.ControlCharFilter;
import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.WordPieceTokenFilter;

public class WordPieceAnalyzer
extends Analyzer {
    private final List<String> vocabulary;
    private final List<String> neverSplit;
    private final boolean doLowerCase;
    private final boolean doTokenizeCjKChars;
    private final boolean doStripAccents;
    private WordPieceTokenFilter innerTokenFilter;
    private final String unknownToken;

    public WordPieceAnalyzer(List<String> vocabulary, List<String> neverSplit, boolean doLowerCase, boolean doTokenizeCjKChars, boolean doStripAccents, String unknownToken) {
        this.vocabulary = vocabulary;
        this.neverSplit = neverSplit;
        this.doLowerCase = doLowerCase;
        this.doTokenizeCjKChars = doTokenizeCjKChars;
        this.doStripAccents = doStripAccents;
        this.unknownToken = unknownToken;
    }

    protected Tokenizer createTokenizer() {
        return new WhitespaceTokenizer(512);
    }

    protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
        try {
            Tokenizer tokenizer = this.createTokenizer();
            this.innerTokenFilter = WordPieceTokenFilter.build(this.doLowerCase, this.doTokenizeCjKChars, this.doStripAccents, this.neverSplit, this.vocabulary, this.unknownToken, 100, (TokenStream)tokenizer);
            return new Analyzer.TokenStreamComponents(tokenizer, (TokenStream)this.innerTokenFilter);
        }
        catch (IOException ex) {
            throw new UncheckedIOException(ex);
        }
    }

    public List<WordPieceTokenFilter.WordPieceToken> getTokens() {
        if (this.innerTokenFilter != null) {
            return this.innerTokenFilter.getTokenizedValues();
        }
        return List.of();
    }

    protected Reader initReader(String fieldName, Reader reader) {
        return new ControlCharFilter(reader);
    }

    protected Reader initReaderForNormalization(String fieldName, Reader reader) {
        return new ControlCharFilter(reader);
    }
}

