mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 01:22:26 -04:00
This change improves the performance of sparse vector statistics gathering by using the document count of terms directly, rather than relying on the field name field to compute stats. By avoiding per-term disk/network reads and instead leveraging statistics already loaded into leaf readers at index opening, we expect to significantly reduce overhead. Relates to #128583
This commit is contained in:
parent
1b03d0956d
commit
bab6e835d4
2 changed files with 14 additions and 10 deletions
5
docs/changelog/128740.yaml
Normal file
5
docs/changelog/128740.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 128740
|
||||
summary: Optimize sparse vector stats collection
|
||||
area: Stats
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.SegmentCommitInfo;
|
|||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.QueryCache;
|
||||
import org.apache.lucene.search.QueryCachingPolicy;
|
||||
|
@ -61,7 +60,6 @@ import org.elasticsearch.core.TimeValue;
|
|||
import org.elasticsearch.index.IndexVersion;
|
||||
import org.elasticsearch.index.VersionType;
|
||||
import org.elasticsearch.index.mapper.DocumentParser;
|
||||
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
|
||||
import org.elasticsearch.index.mapper.LuceneDocument;
|
||||
import org.elasticsearch.index.mapper.Mapper;
|
||||
import org.elasticsearch.index.mapper.Mapping;
|
||||
|
@ -337,14 +335,15 @@ public abstract class Engine implements Closeable {
|
|||
|
||||
private long getSparseVectorValueCount(final LeafReader atomicReader, List<BytesRef> fields) throws IOException {
|
||||
long count = 0;
|
||||
Terms terms = atomicReader.terms(FieldNamesFieldMapper.NAME);
|
||||
if (terms == null) {
|
||||
return count;
|
||||
}
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
for (var fieldName : fields) {
|
||||
if (termsEnum.seekExact(fieldName)) {
|
||||
count += termsEnum.docFreq();
|
||||
for (var fieldNameBR : fields) {
|
||||
var fieldName = fieldNameBR.utf8ToString();
|
||||
var fi = atomicReader.getFieldInfos().fieldInfo(fieldName);
|
||||
if (fi == null) {
|
||||
continue;
|
||||
}
|
||||
Terms terms = atomicReader.terms(fieldName);
|
||||
if (terms != null) {
|
||||
count += terms.getDocCount();
|
||||
}
|
||||
}
|
||||
return count;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue