mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 09:28:55 -04:00
Add more dense_vector details for cluster stats field stats (#113607)
This adds some more counts for dense_vector field mapping stats. This allows for seeing the number of mappings with a given element type, similarity, or index type.
This commit is contained in:
parent
ee73969bbb
commit
8ed0df42f6
7 changed files with 170 additions and 23 deletions
5
docs/changelog/113607.yaml
Normal file
5
docs/changelog/113607.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 113607
|
||||
summary: Add more `dense_vector` details for cluster stats field stats
|
||||
area: Search
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -432,6 +432,15 @@ To get information on segment files, use the <<cluster-nodes-stats,node stats AP
|
|||
`indexed_vector_dim_max`::
|
||||
(integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes.
|
||||
|
||||
`vector_index_type_count`::
|
||||
(object) For dense_vector field types, the number of indexed vector types by index type in selected nodes.
|
||||
|
||||
`vector_similarity_type_count`::
|
||||
(object) For dense_vector field types, the number of vector types by similarity type in selected nodes.
|
||||
|
||||
`vector_element_type_count`::
|
||||
(object) For dense_vector field types, the number of vector types by element type in selected nodes.
|
||||
|
||||
`script_count`::
|
||||
(integer) Number of fields that declare a script.
|
||||
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
setup:
|
||||
- requires:
|
||||
cluster_features: [ "gte_v8.4.0" ]
|
||||
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
|
||||
- skip:
|
||||
features: headers
|
||||
|
||||
|
@ -15,11 +18,16 @@ setup:
|
|||
dims: 768
|
||||
index: true
|
||||
similarity: l2_norm
|
||||
element_type: byte
|
||||
index_options:
|
||||
type: hnsw
|
||||
vector2:
|
||||
type: dense_vector
|
||||
dims: 1024
|
||||
index: true
|
||||
similarity: dot_product
|
||||
index_options:
|
||||
type: int8_hnsw
|
||||
vector3:
|
||||
type: dense_vector
|
||||
dims: 100
|
||||
|
@ -41,9 +49,6 @@ setup:
|
|||
|
||||
---
|
||||
"Field mapping stats":
|
||||
- requires:
|
||||
cluster_features: ["gte_v8.4.0"]
|
||||
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
|
||||
- do: { cluster.stats: { } }
|
||||
- length: { indices.mappings.field_types: 1 }
|
||||
- match: { indices.mappings.field_types.0.name: dense_vector }
|
||||
|
@ -52,3 +57,28 @@ setup:
|
|||
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
|
||||
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
|
||||
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
|
||||
---
|
||||
"Field mapping stats with field details":
|
||||
- requires:
|
||||
test_runner_features: [ capabilities ]
|
||||
capabilities:
|
||||
- method: GET
|
||||
path: /_cluster/stats
|
||||
capabilities:
|
||||
- "verbose-dense-vector-mapping-stats"
|
||||
reason: "Capability required to run test"
|
||||
- do: { cluster.stats: { } }
|
||||
- length: { indices.mappings.field_types: 1 }
|
||||
- match: { indices.mappings.field_types.0.name: dense_vector }
|
||||
- match: { indices.mappings.field_types.0.count: 4 }
|
||||
- match: { indices.mappings.field_types.0.index_count: 2 }
|
||||
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
|
||||
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
|
||||
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
|
||||
- match: { indices.mappings.field_types.0.vector_index_type_count.hnsw: 1 }
|
||||
- match: { indices.mappings.field_types.0.vector_index_type_count.int8_hnsw: 2 }
|
||||
- match: { indices.mappings.field_types.0.vector_index_type_count.not_indexed: 1 }
|
||||
- match: { indices.mappings.field_types.0.vector_similarity_type_count.l2_norm: 2 }
|
||||
- match: { indices.mappings.field_types.0.vector_similarity_type_count.dot_product: 1 }
|
||||
- match: { indices.mappings.field_types.0.vector_element_type_count.float: 3 }
|
||||
- match: { indices.mappings.field_types.0.vector_element_type_count.byte: 1 }
|
||||
|
|
|
@ -9,11 +9,12 @@
|
|||
|
||||
package org.elasticsearch.action.admin.cluster.stats;
|
||||
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
|
@ -21,7 +22,10 @@ import java.util.Objects;
|
|||
*/
|
||||
public final class DenseVectorFieldStats extends FieldStats {
|
||||
static final int UNSET = -1;
|
||||
|
||||
static final String NOT_INDEXED = "not_indexed";
|
||||
Map<String, Integer> vectorIndexTypeCount; // count of mappings by index type
|
||||
Map<String, Integer> vectorSimilarityTypeCount; // count of mappings by similarity
|
||||
Map<String, Integer> vectorElementTypeCount; // count of mappings by element type
|
||||
int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster
|
||||
int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster
|
||||
int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster
|
||||
|
@ -31,21 +35,14 @@ public final class DenseVectorFieldStats extends FieldStats {
|
|||
indexedVectorCount = 0;
|
||||
indexedVectorDimMin = UNSET;
|
||||
indexedVectorDimMax = UNSET;
|
||||
}
|
||||
|
||||
DenseVectorFieldStats(StreamInput in) throws IOException {
|
||||
super(in);
|
||||
indexedVectorCount = in.readVInt();
|
||||
indexedVectorDimMin = in.readVInt();
|
||||
indexedVectorDimMax = in.readVInt();
|
||||
vectorIndexTypeCount = new HashMap<>();
|
||||
vectorSimilarityTypeCount = new HashMap<>();
|
||||
vectorElementTypeCount = new HashMap<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
super.writeTo(out);
|
||||
out.writeVInt(indexedVectorCount);
|
||||
out.writeVInt(indexedVectorDimMin);
|
||||
out.writeVInt(indexedVectorDimMax);
|
||||
assert false : "writeTo should not be called on DenseVectorFieldStats";
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -53,6 +50,21 @@ public final class DenseVectorFieldStats extends FieldStats {
|
|||
builder.field("indexed_vector_count", indexedVectorCount);
|
||||
builder.field("indexed_vector_dim_min", indexedVectorDimMin);
|
||||
builder.field("indexed_vector_dim_max", indexedVectorDimMax);
|
||||
if (vectorIndexTypeCount.isEmpty() == false) {
|
||||
builder.startObject("vector_index_type_count");
|
||||
builder.mapContents(vectorIndexTypeCount);
|
||||
builder.endObject();
|
||||
}
|
||||
if (vectorSimilarityTypeCount.isEmpty() == false) {
|
||||
builder.startObject("vector_similarity_type_count");
|
||||
builder.mapContents(vectorSimilarityTypeCount);
|
||||
builder.endObject();
|
||||
}
|
||||
if (vectorElementTypeCount.isEmpty() == false) {
|
||||
builder.startObject("vector_element_type_count");
|
||||
builder.mapContents(vectorElementTypeCount);
|
||||
builder.endObject();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -69,11 +81,53 @@ public final class DenseVectorFieldStats extends FieldStats {
|
|||
DenseVectorFieldStats that = (DenseVectorFieldStats) o;
|
||||
return indexedVectorCount == that.indexedVectorCount
|
||||
&& indexedVectorDimMin == that.indexedVectorDimMin
|
||||
&& indexedVectorDimMax == that.indexedVectorDimMax;
|
||||
&& indexedVectorDimMax == that.indexedVectorDimMax
|
||||
&& Objects.equals(vectorIndexTypeCount, that.vectorIndexTypeCount)
|
||||
&& Objects.equals(vectorSimilarityTypeCount, that.vectorSimilarityTypeCount)
|
||||
&& Objects.equals(vectorElementTypeCount, that.vectorElementTypeCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), indexedVectorCount, indexedVectorDimMin, indexedVectorDimMax);
|
||||
return Objects.hash(
|
||||
super.hashCode(),
|
||||
indexedVectorCount,
|
||||
indexedVectorDimMin,
|
||||
indexedVectorDimMax,
|
||||
vectorIndexTypeCount,
|
||||
vectorSimilarityTypeCount,
|
||||
vectorElementTypeCount
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "DenseVectorFieldStats{"
|
||||
+ "vectorIndexTypeCount="
|
||||
+ vectorIndexTypeCount
|
||||
+ ", vectorSimilarityTypeCount="
|
||||
+ vectorSimilarityTypeCount
|
||||
+ ", vectorElementTypeCount="
|
||||
+ vectorElementTypeCount
|
||||
+ ", indexedVectorCount="
|
||||
+ indexedVectorCount
|
||||
+ ", indexedVectorDimMin="
|
||||
+ indexedVectorDimMin
|
||||
+ ", indexedVectorDimMax="
|
||||
+ indexedVectorDimMax
|
||||
+ ", scriptCount="
|
||||
+ scriptCount
|
||||
+ ", scriptLangs="
|
||||
+ scriptLangs
|
||||
+ ", fieldScriptStats="
|
||||
+ fieldScriptStats
|
||||
+ ", name='"
|
||||
+ name
|
||||
+ '\''
|
||||
+ ", count="
|
||||
+ count
|
||||
+ ", indexCount="
|
||||
+ indexCount
|
||||
+ '}';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,9 +86,30 @@ public final class MappingStats implements ToXContentFragment, Writeable {
|
|||
FieldStats stats;
|
||||
if (type.equals("dense_vector")) {
|
||||
stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new);
|
||||
boolean indexed = fieldMapping.containsKey("index") ? (boolean) fieldMapping.get("index") : false;
|
||||
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
|
||||
if (fieldMapping.containsKey("similarity")) {
|
||||
Object similarity = fieldMapping.get("similarity");
|
||||
vStats.vectorSimilarityTypeCount.compute(similarity.toString(), (t, c) -> c == null ? count : c + count);
|
||||
}
|
||||
String elementTypeStr = "float";
|
||||
if (fieldMapping.containsKey("element_type")) {
|
||||
Object elementType = fieldMapping.get("element_type");
|
||||
elementTypeStr = elementType.toString();
|
||||
}
|
||||
vStats.vectorElementTypeCount.compute(elementTypeStr, (t, c) -> c == null ? count : c + count);
|
||||
boolean indexed = fieldMapping.containsKey("index") && (boolean) fieldMapping.get("index");
|
||||
if (indexed) {
|
||||
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
|
||||
Object indexOptions = fieldMapping.get("index_options");
|
||||
// NOTE, while the default for `float` is now `int8_hnsw`, that is actually added to the mapping
|
||||
// if the value is truly missing & we are indexed, we default to hnsw.
|
||||
String indexTypeStr = "hnsw";
|
||||
if (indexOptions instanceof Map<?, ?> indexOptionsMap) {
|
||||
Object indexType = indexOptionsMap.get("type");
|
||||
if (indexType != null) {
|
||||
indexTypeStr = indexType.toString();
|
||||
}
|
||||
}
|
||||
vStats.vectorIndexTypeCount.compute(indexTypeStr, (t, c) -> c == null ? count : c + count);
|
||||
vStats.indexedVectorCount += count;
|
||||
Object obj = fieldMapping.get("dims");
|
||||
if (obj != null) {
|
||||
|
@ -100,6 +121,8 @@ public final class MappingStats implements ToXContentFragment, Writeable {
|
|||
vStats.indexedVectorDimMax = dims;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vStats.vectorIndexTypeCount.compute(DenseVectorFieldStats.NOT_INDEXED, (t, c) -> c == null ? 1 : c + 1);
|
||||
}
|
||||
} else {
|
||||
stats = fieldTypes.computeIfAbsent(type, FieldStats::new);
|
||||
|
|
|
@ -31,7 +31,10 @@ import static org.elasticsearch.rest.RestUtils.getTimeout;
|
|||
@ServerlessScope(Scope.INTERNAL)
|
||||
public class RestClusterStatsAction extends BaseRestHandler {
|
||||
|
||||
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of("human-readable-total-docs-size");
|
||||
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of(
|
||||
"human-readable-total-docs-size",
|
||||
"verbose-dense-vector-mapping-stats"
|
||||
);
|
||||
private static final Set<String> SUPPORTED_CAPABILITIES_CCS_STATS = Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats"));
|
||||
public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry");
|
||||
|
||||
|
|
|
@ -114,7 +114,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
|
|||
"index_count" : 2,
|
||||
"indexed_vector_count" : 2,
|
||||
"indexed_vector_dim_min" : 100,
|
||||
"indexed_vector_dim_max" : 100
|
||||
"indexed_vector_dim_max" : 100,
|
||||
"vector_index_type_count" : {
|
||||
"hnsw" : 2
|
||||
},
|
||||
"vector_similarity_type_count" : {
|
||||
"dot_product" : 2
|
||||
},
|
||||
"vector_element_type_count" : {
|
||||
"float" : 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"name" : "keyword",
|
||||
|
@ -234,7 +243,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
|
|||
"index_count" : 3,
|
||||
"indexed_vector_count" : 3,
|
||||
"indexed_vector_dim_min" : 100,
|
||||
"indexed_vector_dim_max" : 100
|
||||
"indexed_vector_dim_max" : 100,
|
||||
"vector_index_type_count" : {
|
||||
"hnsw" : 3
|
||||
},
|
||||
"vector_similarity_type_count" : {
|
||||
"dot_product" : 3
|
||||
},
|
||||
"vector_element_type_count" : {
|
||||
"float" : 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"name" : "keyword",
|
||||
|
@ -460,6 +478,11 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
|
|||
expectedStats.indexedVectorCount = 2 * indicesCount;
|
||||
expectedStats.indexedVectorDimMin = 768;
|
||||
expectedStats.indexedVectorDimMax = 1024;
|
||||
expectedStats.vectorIndexTypeCount.put("hnsw", 2 * indicesCount);
|
||||
expectedStats.vectorIndexTypeCount.put("not_indexed", 2);
|
||||
expectedStats.vectorSimilarityTypeCount.put("dot_product", 3);
|
||||
expectedStats.vectorSimilarityTypeCount.put("cosine", 3);
|
||||
expectedStats.vectorElementTypeCount.put("float", 4 * indicesCount);
|
||||
assertEquals(Collections.singletonList(expectedStats), mappingStats.getFieldTypeStats());
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue