Cut over stored fields to ZSTD for compression. (#103374)

This cuts over stored fields with `index.codec: best_speed` (default) to ZSTD with level 0 and blocks of at most 128 documents or 14kB, and `index.codec: best_compression` to ZSTD with level 3 and blocks of at most 2,048 documents or 240kB.

Compared with the current codecs, this would yield similar indexing speed, much better space efficiency and similar retrieval speed. Benchmarks on the `elastic/logs` track suggest 10% better storage efficiency and slightly faster ingestion.

The Lucene codec infrastructure records the codec on a per-segment basis and ensures that this change is backward-compatible. Segments will get progressively migrated to ZSTD as they get merged in the background.

Bindings for ZSTD are provided by the Panama FFI API on JDK21+ and JNA on older JDKs.

ZSTD support is currently behind a feature flag, so it won't be enabled immediately when this feature gets merged, this will need a follow-up change.

Co-authored-by: Mark Vieira <portugee@gmail.com>
Co-authored-by: Ryan Ernst <ryan@iernst.net>
This commit is contained in:
Adrien Grand 2024-04-09 09:18:58 +02:00 committed by GitHub
parent 911aaf8ef9
commit 49ffa045a6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 678 additions and 126 deletions

View file

@ -6,6 +6,7 @@
* Side Public License, v 1.
*/
import org.elasticsearch.index.codec.Elasticsearch814Codec;
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.plugins.internal.RestExtension;
@ -243,6 +244,7 @@ module org.elasticsearch.server {
exports org.elasticsearch.index.codec;
exports org.elasticsearch.index.codec.tsdb;
exports org.elasticsearch.index.codec.bloomfilter;
exports org.elasticsearch.index.codec.zstd;
exports org.elasticsearch.index.engine;
exports org.elasticsearch.index.fielddata;
exports org.elasticsearch.index.fielddata.fieldcomparator;
@ -433,6 +435,7 @@ module org.elasticsearch.server {
with
org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat,
org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat;
provides org.apache.lucene.codecs.Codec with Elasticsearch814Codec;
exports org.elasticsearch.cluster.routing.allocation.shards
to