Add more dense_vector details for cluster stats field stats (#113607)

This adds some more counts for dense_vector field mapping stats. This
allows for seeing the number of mappings with a given element type,
similarity, or index type.
This commit is contained in:
Benjamin Trent 2024-10-01 11:58:04 -04:00 committed by GitHub
parent ee73969bbb
commit 8ed0df42f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 170 additions and 23 deletions

View file

@ -0,0 +1,5 @@
pr: 113607
summary: Add more `dense_vector` details for cluster stats field stats
area: Search
type: enhancement
issues: []

View file

@ -432,6 +432,15 @@ To get information on segment files, use the <<cluster-nodes-stats,node stats AP
`indexed_vector_dim_max`::
(integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes.
`vector_index_type_count`::
(object) For dense_vector field types, the number of indexed vector types by index type in selected nodes.
`vector_similarity_type_count`::
(object) For dense_vector field types, the number of vector types by similarity type in selected nodes.
`vector_element_type_count`::
(object) For dense_vector field types, the number of vector types by element type in selected nodes.
`script_count`::
(integer) Number of fields that declare a script.

View file

@ -1,4 +1,7 @@
setup:
- requires:
cluster_features: [ "gte_v8.4.0" ]
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
- skip:
features: headers
@ -15,11 +18,16 @@ setup:
dims: 768
index: true
similarity: l2_norm
element_type: byte
index_options:
type: hnsw
vector2:
type: dense_vector
dims: 1024
index: true
similarity: dot_product
index_options:
type: int8_hnsw
vector3:
type: dense_vector
dims: 100
@ -41,9 +49,6 @@ setup:
---
"Field mapping stats":
- requires:
cluster_features: ["gte_v8.4.0"]
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
- do: { cluster.stats: { } }
- length: { indices.mappings.field_types: 1 }
- match: { indices.mappings.field_types.0.name: dense_vector }
@ -52,3 +57,28 @@ setup:
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
---
"Field mapping stats with field details":
- requires:
test_runner_features: [ capabilities ]
capabilities:
- method: GET
path: /_cluster/stats
capabilities:
- "verbose-dense-vector-mapping-stats"
reason: "Capability required to run test"
- do: { cluster.stats: { } }
- length: { indices.mappings.field_types: 1 }
- match: { indices.mappings.field_types.0.name: dense_vector }
- match: { indices.mappings.field_types.0.count: 4 }
- match: { indices.mappings.field_types.0.index_count: 2 }
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
- match: { indices.mappings.field_types.0.vector_index_type_count.hnsw: 1 }
- match: { indices.mappings.field_types.0.vector_index_type_count.int8_hnsw: 2 }
- match: { indices.mappings.field_types.0.vector_index_type_count.not_indexed: 1 }
- match: { indices.mappings.field_types.0.vector_similarity_type_count.l2_norm: 2 }
- match: { indices.mappings.field_types.0.vector_similarity_type_count.dot_product: 1 }
- match: { indices.mappings.field_types.0.vector_element_type_count.float: 3 }
- match: { indices.mappings.field_types.0.vector_element_type_count.byte: 1 }

View file

@ -9,11 +9,12 @@
package org.elasticsearch.action.admin.cluster.stats;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
/**
@ -21,7 +22,10 @@ import java.util.Objects;
*/
public final class DenseVectorFieldStats extends FieldStats {
static final int UNSET = -1;
static final String NOT_INDEXED = "not_indexed";
Map<String, Integer> vectorIndexTypeCount; // count of mappings by index type
Map<String, Integer> vectorSimilarityTypeCount; // count of mappings by similarity
Map<String, Integer> vectorElementTypeCount; // count of mappings by element type
int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster
int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster
int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster
@ -31,21 +35,14 @@ public final class DenseVectorFieldStats extends FieldStats {
indexedVectorCount = 0;
indexedVectorDimMin = UNSET;
indexedVectorDimMax = UNSET;
}
DenseVectorFieldStats(StreamInput in) throws IOException {
super(in);
indexedVectorCount = in.readVInt();
indexedVectorDimMin = in.readVInt();
indexedVectorDimMax = in.readVInt();
vectorIndexTypeCount = new HashMap<>();
vectorSimilarityTypeCount = new HashMap<>();
vectorElementTypeCount = new HashMap<>();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeVInt(indexedVectorCount);
out.writeVInt(indexedVectorDimMin);
out.writeVInt(indexedVectorDimMax);
assert false : "writeTo should not be called on DenseVectorFieldStats";
}
@Override
@ -53,6 +50,21 @@ public final class DenseVectorFieldStats extends FieldStats {
builder.field("indexed_vector_count", indexedVectorCount);
builder.field("indexed_vector_dim_min", indexedVectorDimMin);
builder.field("indexed_vector_dim_max", indexedVectorDimMax);
if (vectorIndexTypeCount.isEmpty() == false) {
builder.startObject("vector_index_type_count");
builder.mapContents(vectorIndexTypeCount);
builder.endObject();
}
if (vectorSimilarityTypeCount.isEmpty() == false) {
builder.startObject("vector_similarity_type_count");
builder.mapContents(vectorSimilarityTypeCount);
builder.endObject();
}
if (vectorElementTypeCount.isEmpty() == false) {
builder.startObject("vector_element_type_count");
builder.mapContents(vectorElementTypeCount);
builder.endObject();
}
}
@Override
@ -69,11 +81,53 @@ public final class DenseVectorFieldStats extends FieldStats {
DenseVectorFieldStats that = (DenseVectorFieldStats) o;
return indexedVectorCount == that.indexedVectorCount
&& indexedVectorDimMin == that.indexedVectorDimMin
&& indexedVectorDimMax == that.indexedVectorDimMax;
&& indexedVectorDimMax == that.indexedVectorDimMax
&& Objects.equals(vectorIndexTypeCount, that.vectorIndexTypeCount)
&& Objects.equals(vectorSimilarityTypeCount, that.vectorSimilarityTypeCount)
&& Objects.equals(vectorElementTypeCount, that.vectorElementTypeCount);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), indexedVectorCount, indexedVectorDimMin, indexedVectorDimMax);
return Objects.hash(
super.hashCode(),
indexedVectorCount,
indexedVectorDimMin,
indexedVectorDimMax,
vectorIndexTypeCount,
vectorSimilarityTypeCount,
vectorElementTypeCount
);
}
@Override
public String toString() {
return "DenseVectorFieldStats{"
+ "vectorIndexTypeCount="
+ vectorIndexTypeCount
+ ", vectorSimilarityTypeCount="
+ vectorSimilarityTypeCount
+ ", vectorElementTypeCount="
+ vectorElementTypeCount
+ ", indexedVectorCount="
+ indexedVectorCount
+ ", indexedVectorDimMin="
+ indexedVectorDimMin
+ ", indexedVectorDimMax="
+ indexedVectorDimMax
+ ", scriptCount="
+ scriptCount
+ ", scriptLangs="
+ scriptLangs
+ ", fieldScriptStats="
+ fieldScriptStats
+ ", name='"
+ name
+ '\''
+ ", count="
+ count
+ ", indexCount="
+ indexCount
+ '}';
}
}

View file

@ -86,9 +86,30 @@ public final class MappingStats implements ToXContentFragment, Writeable {
FieldStats stats;
if (type.equals("dense_vector")) {
stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new);
boolean indexed = fieldMapping.containsKey("index") ? (boolean) fieldMapping.get("index") : false;
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
if (fieldMapping.containsKey("similarity")) {
Object similarity = fieldMapping.get("similarity");
vStats.vectorSimilarityTypeCount.compute(similarity.toString(), (t, c) -> c == null ? count : c + count);
}
String elementTypeStr = "float";
if (fieldMapping.containsKey("element_type")) {
Object elementType = fieldMapping.get("element_type");
elementTypeStr = elementType.toString();
}
vStats.vectorElementTypeCount.compute(elementTypeStr, (t, c) -> c == null ? count : c + count);
boolean indexed = fieldMapping.containsKey("index") && (boolean) fieldMapping.get("index");
if (indexed) {
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
Object indexOptions = fieldMapping.get("index_options");
// NOTE, while the default for `float` is now `int8_hnsw`, that is actually added to the mapping
// if the value is truly missing & we are indexed, we default to hnsw.
String indexTypeStr = "hnsw";
if (indexOptions instanceof Map<?, ?> indexOptionsMap) {
Object indexType = indexOptionsMap.get("type");
if (indexType != null) {
indexTypeStr = indexType.toString();
}
}
vStats.vectorIndexTypeCount.compute(indexTypeStr, (t, c) -> c == null ? count : c + count);
vStats.indexedVectorCount += count;
Object obj = fieldMapping.get("dims");
if (obj != null) {
@ -100,6 +121,8 @@ public final class MappingStats implements ToXContentFragment, Writeable {
vStats.indexedVectorDimMax = dims;
}
}
} else {
vStats.vectorIndexTypeCount.compute(DenseVectorFieldStats.NOT_INDEXED, (t, c) -> c == null ? 1 : c + 1);
}
} else {
stats = fieldTypes.computeIfAbsent(type, FieldStats::new);

View file

@ -31,7 +31,10 @@ import static org.elasticsearch.rest.RestUtils.getTimeout;
@ServerlessScope(Scope.INTERNAL)
public class RestClusterStatsAction extends BaseRestHandler {
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of("human-readable-total-docs-size");
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of(
"human-readable-total-docs-size",
"verbose-dense-vector-mapping-stats"
);
private static final Set<String> SUPPORTED_CAPABILITIES_CCS_STATS = Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats"));
public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry");

View file

@ -114,7 +114,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
"index_count" : 2,
"indexed_vector_count" : 2,
"indexed_vector_dim_min" : 100,
"indexed_vector_dim_max" : 100
"indexed_vector_dim_max" : 100,
"vector_index_type_count" : {
"hnsw" : 2
},
"vector_similarity_type_count" : {
"dot_product" : 2
},
"vector_element_type_count" : {
"float" : 2
}
},
{
"name" : "keyword",
@ -234,7 +243,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
"index_count" : 3,
"indexed_vector_count" : 3,
"indexed_vector_dim_min" : 100,
"indexed_vector_dim_max" : 100
"indexed_vector_dim_max" : 100,
"vector_index_type_count" : {
"hnsw" : 3
},
"vector_similarity_type_count" : {
"dot_product" : 3
},
"vector_element_type_count" : {
"float" : 3
}
},
{
"name" : "keyword",
@ -460,6 +478,11 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
expectedStats.indexedVectorCount = 2 * indicesCount;
expectedStats.indexedVectorDimMin = 768;
expectedStats.indexedVectorDimMax = 1024;
expectedStats.vectorIndexTypeCount.put("hnsw", 2 * indicesCount);
expectedStats.vectorIndexTypeCount.put("not_indexed", 2);
expectedStats.vectorSimilarityTypeCount.put("dot_product", 3);
expectedStats.vectorSimilarityTypeCount.put("cosine", 3);
expectedStats.vectorElementTypeCount.put("float", 4 * indicesCount);
assertEquals(Collections.singletonList(expectedStats), mappingStats.getFieldTypeStats());
}