Add more dense_vector details for cluster stats field stats (#113607)

This adds some more counts for dense_vector field mapping stats. This
allows for seeing the number of mappings with a given element type,
similarity, or index type.
This commit is contained in:
Benjamin Trent 2024-10-01 11:58:04 -04:00 committed by GitHub
parent ee73969bbb
commit 8ed0df42f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 170 additions and 23 deletions

View file

@ -0,0 +1,5 @@
pr: 113607
summary: Add more `dense_vector` details for cluster stats field stats
area: Search
type: enhancement
issues: []

View file

@ -432,6 +432,15 @@ To get information on segment files, use the <<cluster-nodes-stats,node stats AP
`indexed_vector_dim_max`:: `indexed_vector_dim_max`::
(integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes. (integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes.
`vector_index_type_count`::
(object) For dense_vector field types, the number of indexed vector types by index type in selected nodes.
`vector_similarity_type_count`::
(object) For dense_vector field types, the number of vector types by similarity type in selected nodes.
`vector_element_type_count`::
(object) For dense_vector field types, the number of vector types by element type in selected nodes.
`script_count`:: `script_count`::
(integer) Number of fields that declare a script. (integer) Number of fields that declare a script.

View file

@ -1,4 +1,7 @@
setup: setup:
- requires:
cluster_features: [ "gte_v8.4.0" ]
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
- skip: - skip:
features: headers features: headers
@ -15,11 +18,16 @@ setup:
dims: 768 dims: 768
index: true index: true
similarity: l2_norm similarity: l2_norm
element_type: byte
index_options:
type: hnsw
vector2: vector2:
type: dense_vector type: dense_vector
dims: 1024 dims: 1024
index: true index: true
similarity: dot_product similarity: dot_product
index_options:
type: int8_hnsw
vector3: vector3:
type: dense_vector type: dense_vector
dims: 100 dims: 100
@ -41,9 +49,6 @@ setup:
--- ---
"Field mapping stats": "Field mapping stats":
- requires:
cluster_features: ["gte_v8.4.0"]
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
- do: { cluster.stats: { } } - do: { cluster.stats: { } }
- length: { indices.mappings.field_types: 1 } - length: { indices.mappings.field_types: 1 }
- match: { indices.mappings.field_types.0.name: dense_vector } - match: { indices.mappings.field_types.0.name: dense_vector }
@ -52,3 +57,28 @@ setup:
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 } - match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 } - match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 } - match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
---
"Field mapping stats with field details":
- requires:
test_runner_features: [ capabilities ]
capabilities:
- method: GET
path: /_cluster/stats
capabilities:
- "verbose-dense-vector-mapping-stats"
reason: "Capability required to run test"
- do: { cluster.stats: { } }
- length: { indices.mappings.field_types: 1 }
- match: { indices.mappings.field_types.0.name: dense_vector }
- match: { indices.mappings.field_types.0.count: 4 }
- match: { indices.mappings.field_types.0.index_count: 2 }
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
- match: { indices.mappings.field_types.0.vector_index_type_count.hnsw: 1 }
- match: { indices.mappings.field_types.0.vector_index_type_count.int8_hnsw: 2 }
- match: { indices.mappings.field_types.0.vector_index_type_count.not_indexed: 1 }
- match: { indices.mappings.field_types.0.vector_similarity_type_count.l2_norm: 2 }
- match: { indices.mappings.field_types.0.vector_similarity_type_count.dot_product: 1 }
- match: { indices.mappings.field_types.0.vector_element_type_count.float: 3 }
- match: { indices.mappings.field_types.0.vector_element_type_count.byte: 1 }

View file

@ -9,11 +9,12 @@
package org.elasticsearch.action.admin.cluster.stats; package org.elasticsearch.action.admin.cluster.stats;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects; import java.util.Objects;
/** /**
@ -21,7 +22,10 @@ import java.util.Objects;
*/ */
public final class DenseVectorFieldStats extends FieldStats { public final class DenseVectorFieldStats extends FieldStats {
static final int UNSET = -1; static final int UNSET = -1;
static final String NOT_INDEXED = "not_indexed";
Map<String, Integer> vectorIndexTypeCount; // count of mappings by index type
Map<String, Integer> vectorSimilarityTypeCount; // count of mappings by similarity
Map<String, Integer> vectorElementTypeCount; // count of mappings by element type
int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster
int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster
int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster
@ -31,21 +35,14 @@ public final class DenseVectorFieldStats extends FieldStats {
indexedVectorCount = 0; indexedVectorCount = 0;
indexedVectorDimMin = UNSET; indexedVectorDimMin = UNSET;
indexedVectorDimMax = UNSET; indexedVectorDimMax = UNSET;
} vectorIndexTypeCount = new HashMap<>();
vectorSimilarityTypeCount = new HashMap<>();
DenseVectorFieldStats(StreamInput in) throws IOException { vectorElementTypeCount = new HashMap<>();
super(in);
indexedVectorCount = in.readVInt();
indexedVectorDimMin = in.readVInt();
indexedVectorDimMax = in.readVInt();
} }
@Override @Override
public void writeTo(StreamOutput out) throws IOException { public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out); assert false : "writeTo should not be called on DenseVectorFieldStats";
out.writeVInt(indexedVectorCount);
out.writeVInt(indexedVectorDimMin);
out.writeVInt(indexedVectorDimMax);
} }
@Override @Override
@ -53,6 +50,21 @@ public final class DenseVectorFieldStats extends FieldStats {
builder.field("indexed_vector_count", indexedVectorCount); builder.field("indexed_vector_count", indexedVectorCount);
builder.field("indexed_vector_dim_min", indexedVectorDimMin); builder.field("indexed_vector_dim_min", indexedVectorDimMin);
builder.field("indexed_vector_dim_max", indexedVectorDimMax); builder.field("indexed_vector_dim_max", indexedVectorDimMax);
if (vectorIndexTypeCount.isEmpty() == false) {
builder.startObject("vector_index_type_count");
builder.mapContents(vectorIndexTypeCount);
builder.endObject();
}
if (vectorSimilarityTypeCount.isEmpty() == false) {
builder.startObject("vector_similarity_type_count");
builder.mapContents(vectorSimilarityTypeCount);
builder.endObject();
}
if (vectorElementTypeCount.isEmpty() == false) {
builder.startObject("vector_element_type_count");
builder.mapContents(vectorElementTypeCount);
builder.endObject();
}
} }
@Override @Override
@ -69,11 +81,53 @@ public final class DenseVectorFieldStats extends FieldStats {
DenseVectorFieldStats that = (DenseVectorFieldStats) o; DenseVectorFieldStats that = (DenseVectorFieldStats) o;
return indexedVectorCount == that.indexedVectorCount return indexedVectorCount == that.indexedVectorCount
&& indexedVectorDimMin == that.indexedVectorDimMin && indexedVectorDimMin == that.indexedVectorDimMin
&& indexedVectorDimMax == that.indexedVectorDimMax; && indexedVectorDimMax == that.indexedVectorDimMax
&& Objects.equals(vectorIndexTypeCount, that.vectorIndexTypeCount)
&& Objects.equals(vectorSimilarityTypeCount, that.vectorSimilarityTypeCount)
&& Objects.equals(vectorElementTypeCount, that.vectorElementTypeCount);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(super.hashCode(), indexedVectorCount, indexedVectorDimMin, indexedVectorDimMax); return Objects.hash(
super.hashCode(),
indexedVectorCount,
indexedVectorDimMin,
indexedVectorDimMax,
vectorIndexTypeCount,
vectorSimilarityTypeCount,
vectorElementTypeCount
);
}
@Override
public String toString() {
return "DenseVectorFieldStats{"
+ "vectorIndexTypeCount="
+ vectorIndexTypeCount
+ ", vectorSimilarityTypeCount="
+ vectorSimilarityTypeCount
+ ", vectorElementTypeCount="
+ vectorElementTypeCount
+ ", indexedVectorCount="
+ indexedVectorCount
+ ", indexedVectorDimMin="
+ indexedVectorDimMin
+ ", indexedVectorDimMax="
+ indexedVectorDimMax
+ ", scriptCount="
+ scriptCount
+ ", scriptLangs="
+ scriptLangs
+ ", fieldScriptStats="
+ fieldScriptStats
+ ", name='"
+ name
+ '\''
+ ", count="
+ count
+ ", indexCount="
+ indexCount
+ '}';
} }
} }

View file

@ -86,9 +86,30 @@ public final class MappingStats implements ToXContentFragment, Writeable {
FieldStats stats; FieldStats stats;
if (type.equals("dense_vector")) { if (type.equals("dense_vector")) {
stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new); stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new);
boolean indexed = fieldMapping.containsKey("index") ? (boolean) fieldMapping.get("index") : false;
if (indexed) {
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats; DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
if (fieldMapping.containsKey("similarity")) {
Object similarity = fieldMapping.get("similarity");
vStats.vectorSimilarityTypeCount.compute(similarity.toString(), (t, c) -> c == null ? count : c + count);
}
String elementTypeStr = "float";
if (fieldMapping.containsKey("element_type")) {
Object elementType = fieldMapping.get("element_type");
elementTypeStr = elementType.toString();
}
vStats.vectorElementTypeCount.compute(elementTypeStr, (t, c) -> c == null ? count : c + count);
boolean indexed = fieldMapping.containsKey("index") && (boolean) fieldMapping.get("index");
if (indexed) {
Object indexOptions = fieldMapping.get("index_options");
// NOTE, while the default for `float` is now `int8_hnsw`, that is actually added to the mapping
// if the value is truly missing & we are indexed, we default to hnsw.
String indexTypeStr = "hnsw";
if (indexOptions instanceof Map<?, ?> indexOptionsMap) {
Object indexType = indexOptionsMap.get("type");
if (indexType != null) {
indexTypeStr = indexType.toString();
}
}
vStats.vectorIndexTypeCount.compute(indexTypeStr, (t, c) -> c == null ? count : c + count);
vStats.indexedVectorCount += count; vStats.indexedVectorCount += count;
Object obj = fieldMapping.get("dims"); Object obj = fieldMapping.get("dims");
if (obj != null) { if (obj != null) {
@ -100,6 +121,8 @@ public final class MappingStats implements ToXContentFragment, Writeable {
vStats.indexedVectorDimMax = dims; vStats.indexedVectorDimMax = dims;
} }
} }
} else {
vStats.vectorIndexTypeCount.compute(DenseVectorFieldStats.NOT_INDEXED, (t, c) -> c == null ? 1 : c + 1);
} }
} else { } else {
stats = fieldTypes.computeIfAbsent(type, FieldStats::new); stats = fieldTypes.computeIfAbsent(type, FieldStats::new);

View file

@ -31,7 +31,10 @@ import static org.elasticsearch.rest.RestUtils.getTimeout;
@ServerlessScope(Scope.INTERNAL) @ServerlessScope(Scope.INTERNAL)
public class RestClusterStatsAction extends BaseRestHandler { public class RestClusterStatsAction extends BaseRestHandler {
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of("human-readable-total-docs-size"); private static final Set<String> SUPPORTED_CAPABILITIES = Set.of(
"human-readable-total-docs-size",
"verbose-dense-vector-mapping-stats"
);
private static final Set<String> SUPPORTED_CAPABILITIES_CCS_STATS = Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats")); private static final Set<String> SUPPORTED_CAPABILITIES_CCS_STATS = Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats"));
public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry"); public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry");

View file

@ -114,7 +114,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
"index_count" : 2, "index_count" : 2,
"indexed_vector_count" : 2, "indexed_vector_count" : 2,
"indexed_vector_dim_min" : 100, "indexed_vector_dim_min" : 100,
"indexed_vector_dim_max" : 100 "indexed_vector_dim_max" : 100,
"vector_index_type_count" : {
"hnsw" : 2
},
"vector_similarity_type_count" : {
"dot_product" : 2
},
"vector_element_type_count" : {
"float" : 2
}
}, },
{ {
"name" : "keyword", "name" : "keyword",
@ -234,7 +243,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
"index_count" : 3, "index_count" : 3,
"indexed_vector_count" : 3, "indexed_vector_count" : 3,
"indexed_vector_dim_min" : 100, "indexed_vector_dim_min" : 100,
"indexed_vector_dim_max" : 100 "indexed_vector_dim_max" : 100,
"vector_index_type_count" : {
"hnsw" : 3
},
"vector_similarity_type_count" : {
"dot_product" : 3
},
"vector_element_type_count" : {
"float" : 3
}
}, },
{ {
"name" : "keyword", "name" : "keyword",
@ -460,6 +478,11 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
expectedStats.indexedVectorCount = 2 * indicesCount; expectedStats.indexedVectorCount = 2 * indicesCount;
expectedStats.indexedVectorDimMin = 768; expectedStats.indexedVectorDimMin = 768;
expectedStats.indexedVectorDimMax = 1024; expectedStats.indexedVectorDimMax = 1024;
expectedStats.vectorIndexTypeCount.put("hnsw", 2 * indicesCount);
expectedStats.vectorIndexTypeCount.put("not_indexed", 2);
expectedStats.vectorSimilarityTypeCount.put("dot_product", 3);
expectedStats.vectorSimilarityTypeCount.put("cosine", 3);
expectedStats.vectorElementTypeCount.put("float", 4 * indicesCount);
assertEquals(Collections.singletonList(expectedStats), mappingStats.getFieldTypeStats()); assertEquals(Collections.singletonList(expectedStats), mappingStats.getFieldTypeStats());
} }