/*
 * Decompiled with CFR 0.152.
 */
package org.opensearch.search.aggregations.bucket.terms;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.function.LongConsumer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.opensearch.common.lease.Releasable;
import org.opensearch.common.lease.Releasables;
import org.opensearch.common.util.BigArrays;
import org.opensearch.common.util.BytesRefHash;
import org.opensearch.common.util.ObjectArray;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.lucene.analysis.miscellaneous.DeDuplicatingTokenFilter;
import org.opensearch.lucene.analysis.miscellaneous.DuplicateByteSequenceSpotter;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.Aggregator;
import org.opensearch.search.aggregations.AggregatorFactories;
import org.opensearch.search.aggregations.AggregatorFactory;
import org.opensearch.search.aggregations.CardinalityUpperBound;
import org.opensearch.search.aggregations.LeafBucketCollector;
import org.opensearch.search.aggregations.LeafBucketCollectorBase;
import org.opensearch.search.aggregations.bucket.BucketUtils;
import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
import org.opensearch.search.aggregations.bucket.terms.MapStringTermsAggregator;
import org.opensearch.search.aggregations.bucket.terms.SignificanceLookup;
import org.opensearch.search.aggregations.bucket.terms.SignificantTextAggregationBuilder;
import org.opensearch.search.aggregations.bucket.terms.TermsAggregator;
import org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic;
import org.opensearch.search.internal.SearchContext;
import org.opensearch.search.lookup.SourceLookup;

public class SignificantTextAggregatorFactory
extends AggregatorFactory {
    private static final int MEMORY_GROWTH_REPORTING_INTERVAL_BYTES = 5000;
    private final IncludeExclude includeExclude;
    private final String indexedFieldName;
    private final MappedFieldType fieldType;
    private final String[] sourceFieldNames;
    private final QueryBuilder backgroundFilter;
    private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
    private final SignificanceHeuristic significanceHeuristic;
    private final boolean filterDuplicateText;

    public SignificantTextAggregatorFactory(String name, IncludeExclude includeExclude, QueryBuilder backgroundFilter, TermsAggregator.BucketCountThresholds bucketCountThresholds, SignificanceHeuristic significanceHeuristic, QueryShardContext queryShardContext, AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder, String fieldName, String[] sourceFieldNames, boolean filterDuplicateText, Map<String, Object> metadata) throws IOException {
        super(name, queryShardContext, parent, subFactoriesBuilder, metadata);
        String[] stringArray;
        this.fieldType = queryShardContext.fieldMapper(fieldName);
        if (this.fieldType != null && this.fieldType.indexAnalyzer() == null) {
            throw new IllegalArgumentException("Field [" + this.fieldType.name() + "] has no analyzer, but SignificantText requires an analyzed field");
        }
        String string = this.indexedFieldName = this.fieldType != null ? this.fieldType.name() : fieldName;
        if (sourceFieldNames == null) {
            String[] stringArray2 = new String[1];
            stringArray = stringArray2;
            stringArray2[0] = this.indexedFieldName;
        } else {
            stringArray = sourceFieldNames;
        }
        this.sourceFieldNames = stringArray;
        this.includeExclude = includeExclude;
        this.backgroundFilter = backgroundFilter;
        this.filterDuplicateText = filterDuplicateText;
        this.bucketCountThresholds = bucketCountThresholds;
        this.significanceHeuristic = significanceHeuristic;
    }

    @Override
    protected Aggregator createInternal(SearchContext searchContext, Aggregator parent, CardinalityUpperBound cardinality, Map<String, Object> metadata) throws IOException {
        TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(this.bucketCountThresholds);
        if (bucketCountThresholds.getShardSize() == SignificantTextAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
            bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize()));
        }
        int maxRegexLength = searchContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
        IncludeExclude.StringFilter incExcFilter = this.includeExclude == null ? null : this.includeExclude.convertToStringFilter(DocValueFormat.RAW, maxRegexLength);
        SignificantTextCollectorSource collectorSource = new SignificantTextCollectorSource(this.queryShardContext.bigArrays(), this.fieldType, this.sourceFieldNames, this.filterDuplicateText);
        SignificanceLookup lookup = new SignificanceLookup(this.queryShardContext, this.fieldType, DocValueFormat.RAW, this.backgroundFilter);
        return new MapStringTermsAggregator(this.name, this.factories, collectorSource, a -> {
            MapStringTermsAggregator mapStringTermsAggregator = a;
            Objects.requireNonNull(mapStringTermsAggregator);
            return mapStringTermsAggregator.new MapStringTermsAggregator.SignificantTermsResults(lookup, this.significanceHeuristic, cardinality);
        }, null, DocValueFormat.RAW, bucketCountThresholds, incExcFilter, searchContext, parent, Aggregator.SubAggCollectionMode.BREADTH_FIRST, false, cardinality, metadata);
    }

    @Override
    protected boolean supportsConcurrentSegmentSearch() {
        return true;
    }

    private static class SignificantTextCollectorSource
    implements MapStringTermsAggregator.CollectorSource {
        private final SourceLookup sourceLookup = new SourceLookup();
        private final BigArrays bigArrays;
        private final MappedFieldType fieldType;
        private final String[] sourceFieldNames;
        private ObjectArray<DuplicateByteSequenceSpotter> dupSequenceSpotters;

        SignificantTextCollectorSource(BigArrays bigArrays, MappedFieldType fieldType, String[] sourceFieldNames, boolean filterDuplicateText) {
            this.bigArrays = bigArrays;
            this.fieldType = fieldType;
            this.sourceFieldNames = sourceFieldNames;
            this.dupSequenceSpotters = filterDuplicateText ? bigArrays.newObjectArray(1L) : null;
        }

        @Override
        public boolean needsScores() {
            return false;
        }

        @Override
        public LeafBucketCollector getLeafCollector(final IncludeExclude.StringFilter includeExclude, final LeafReaderContext ctx, final LeafBucketCollector sub, final LongConsumer addRequestCircuitBreakerBytes, final MapStringTermsAggregator.CollectConsumer consumer) throws IOException {
            return new LeafBucketCollectorBase(sub, null){
                private final BytesRefBuilder scratch;
                {
                    super(sub2, values);
                    this.scratch = new BytesRefBuilder();
                }

                @Override
                public void collect(int doc, long owningBucketOrd) throws IOException {
                    if (dupSequenceSpotters == null) {
                        this.collectFromSource(doc, owningBucketOrd, null);
                        return;
                    }
                    dupSequenceSpotters = bigArrays.grow(dupSequenceSpotters, owningBucketOrd + 1L);
                    DuplicateByteSequenceSpotter spotter = dupSequenceSpotters.get(owningBucketOrd);
                    if (spotter == null) {
                        spotter = new DuplicateByteSequenceSpotter();
                        dupSequenceSpotters.set(owningBucketOrd, spotter);
                    }
                    this.collectFromSource(doc, owningBucketOrd, spotter);
                    spotter.startNewSequence();
                }

                /*
                 * WARNING - Removed try catching itself - possible behaviour change.
                 */
                private void collectFromSource(int doc, long owningBucketOrd, DuplicateByteSequenceSpotter spotter) throws IOException {
                    sourceLookup.setSegmentAndDocument(ctx, doc);
                    BytesRefHash inDocTerms = new BytesRefHash(256L, bigArrays);
                    try {
                        for (String sourceField : sourceFieldNames) {
                            Iterator itr = sourceLookup.extractRawValues(sourceField).stream().map(obj -> {
                                if (obj == null) {
                                    return null;
                                }
                                if (obj instanceof BytesRef) {
                                    return fieldType.valueForDisplay(obj).toString();
                                }
                                return obj.toString();
                            }).iterator();
                            NamedAnalyzer analyzer = fieldType.indexAnalyzer();
                            while (itr.hasNext()) {
                                TokenStream ts = analyzer.tokenStream(fieldType.name(), (String)itr.next());
                                this.processTokenStream(doc, owningBucketOrd, ts, inDocTerms, spotter);
                            }
                        }
                    }
                    finally {
                        Releasables.close((Releasable)inDocTerms);
                    }
                }

                /*
                 * WARNING - Removed try catching itself - possible behaviour change.
                 */
                private void processTokenStream(int doc, long owningBucketOrd, TokenStream ts, BytesRefHash inDocTerms, DuplicateByteSequenceSpotter spotter) throws IOException {
                    long growth;
                    long lastTrieSize = 0L;
                    if (spotter != null) {
                        lastTrieSize = spotter.getEstimatedSizeInBytes();
                        ts = new DeDuplicatingTokenFilter((TokenStream)ts, spotter);
                    }
                    CharTermAttribute termAtt = (CharTermAttribute)ts.addAttribute(CharTermAttribute.class);
                    ts.reset();
                    try {
                        while (ts.incrementToken()) {
                            long newTrieSize;
                            long growth2;
                            if (spotter != null && (growth2 = (newTrieSize = spotter.getEstimatedSizeInBytes()) - lastTrieSize) > 5000L) {
                                addRequestCircuitBreakerBytes.accept(growth2);
                                lastTrieSize = newTrieSize;
                            }
                            this.scratch.clear();
                            this.scratch.copyChars((CharSequence)termAtt);
                            BytesRef bytes = this.scratch.get();
                            if (includeExclude != null && !includeExclude.accept(bytes) || inDocTerms.add(bytes) < 0L) continue;
                            consumer.accept(sub, doc, owningBucketOrd, bytes);
                        }
                    }
                    finally {
                        ts.close();
                    }
                    if (spotter != null && (growth = spotter.getEstimatedSizeInBytes() - lastTrieSize) > 0L) {
                        addRequestCircuitBreakerBytes.accept(growth);
                    }
                }
            };
        }

        public void close() {
            Releasables.close(this.dupSequenceSpotters);
        }
    }
}

