mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 17:34:17 -04:00
Add more dense_vector details for cluster stats field stats (#113607)
This adds some more counts for dense_vector field mapping stats. This allows for seeing the number of mappings with a given element type, similarity, or index type.
This commit is contained in:
parent
ee73969bbb
commit
8ed0df42f6
7 changed files with 170 additions and 23 deletions
5
docs/changelog/113607.yaml
Normal file
5
docs/changelog/113607.yaml
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
pr: 113607
|
||||||
|
summary: Add more `dense_vector` details for cluster stats field stats
|
||||||
|
area: Search
|
||||||
|
type: enhancement
|
||||||
|
issues: []
|
|
@ -432,6 +432,15 @@ To get information on segment files, use the <<cluster-nodes-stats,node stats AP
|
||||||
`indexed_vector_dim_max`::
|
`indexed_vector_dim_max`::
|
||||||
(integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes.
|
(integer) For dense_vector field types, the maximum dimension of all indexed vector types in selected nodes.
|
||||||
|
|
||||||
|
`vector_index_type_count`::
|
||||||
|
(object) For dense_vector field types, the number of indexed vector types by index type in selected nodes.
|
||||||
|
|
||||||
|
`vector_similarity_type_count`::
|
||||||
|
(object) For dense_vector field types, the number of vector types by similarity type in selected nodes.
|
||||||
|
|
||||||
|
`vector_element_type_count`::
|
||||||
|
(object) For dense_vector field types, the number of vector types by element type in selected nodes.
|
||||||
|
|
||||||
`script_count`::
|
`script_count`::
|
||||||
(integer) Number of fields that declare a script.
|
(integer) Number of fields that declare a script.
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
setup:
|
setup:
|
||||||
|
- requires:
|
||||||
|
cluster_features: [ "gte_v8.4.0" ]
|
||||||
|
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
|
||||||
- skip:
|
- skip:
|
||||||
features: headers
|
features: headers
|
||||||
|
|
||||||
|
@ -15,11 +18,16 @@ setup:
|
||||||
dims: 768
|
dims: 768
|
||||||
index: true
|
index: true
|
||||||
similarity: l2_norm
|
similarity: l2_norm
|
||||||
|
element_type: byte
|
||||||
|
index_options:
|
||||||
|
type: hnsw
|
||||||
vector2:
|
vector2:
|
||||||
type: dense_vector
|
type: dense_vector
|
||||||
dims: 1024
|
dims: 1024
|
||||||
index: true
|
index: true
|
||||||
similarity: dot_product
|
similarity: dot_product
|
||||||
|
index_options:
|
||||||
|
type: int8_hnsw
|
||||||
vector3:
|
vector3:
|
||||||
type: dense_vector
|
type: dense_vector
|
||||||
dims: 100
|
dims: 100
|
||||||
|
@ -41,9 +49,6 @@ setup:
|
||||||
|
|
||||||
---
|
---
|
||||||
"Field mapping stats":
|
"Field mapping stats":
|
||||||
- requires:
|
|
||||||
cluster_features: ["gte_v8.4.0"]
|
|
||||||
reason: "Cluster mappings stats for indexed dense vector was added in 8.4"
|
|
||||||
- do: { cluster.stats: { } }
|
- do: { cluster.stats: { } }
|
||||||
- length: { indices.mappings.field_types: 1 }
|
- length: { indices.mappings.field_types: 1 }
|
||||||
- match: { indices.mappings.field_types.0.name: dense_vector }
|
- match: { indices.mappings.field_types.0.name: dense_vector }
|
||||||
|
@ -52,3 +57,28 @@ setup:
|
||||||
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
|
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
|
||||||
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
|
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
|
||||||
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
|
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
|
||||||
|
---
|
||||||
|
"Field mapping stats with field details":
|
||||||
|
- requires:
|
||||||
|
test_runner_features: [ capabilities ]
|
||||||
|
capabilities:
|
||||||
|
- method: GET
|
||||||
|
path: /_cluster/stats
|
||||||
|
capabilities:
|
||||||
|
- "verbose-dense-vector-mapping-stats"
|
||||||
|
reason: "Capability required to run test"
|
||||||
|
- do: { cluster.stats: { } }
|
||||||
|
- length: { indices.mappings.field_types: 1 }
|
||||||
|
- match: { indices.mappings.field_types.0.name: dense_vector }
|
||||||
|
- match: { indices.mappings.field_types.0.count: 4 }
|
||||||
|
- match: { indices.mappings.field_types.0.index_count: 2 }
|
||||||
|
- match: { indices.mappings.field_types.0.indexed_vector_count: 3 }
|
||||||
|
- match: { indices.mappings.field_types.0.indexed_vector_dim_min: 768 }
|
||||||
|
- match: { indices.mappings.field_types.0.indexed_vector_dim_max: 1024 }
|
||||||
|
- match: { indices.mappings.field_types.0.vector_index_type_count.hnsw: 1 }
|
||||||
|
- match: { indices.mappings.field_types.0.vector_index_type_count.int8_hnsw: 2 }
|
||||||
|
- match: { indices.mappings.field_types.0.vector_index_type_count.not_indexed: 1 }
|
||||||
|
- match: { indices.mappings.field_types.0.vector_similarity_type_count.l2_norm: 2 }
|
||||||
|
- match: { indices.mappings.field_types.0.vector_similarity_type_count.dot_product: 1 }
|
||||||
|
- match: { indices.mappings.field_types.0.vector_element_type_count.float: 3 }
|
||||||
|
- match: { indices.mappings.field_types.0.vector_element_type_count.byte: 1 }
|
||||||
|
|
|
@ -9,11 +9,12 @@
|
||||||
|
|
||||||
package org.elasticsearch.action.admin.cluster.stats;
|
package org.elasticsearch.action.admin.cluster.stats;
|
||||||
|
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
|
||||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
import org.elasticsearch.xcontent.XContentBuilder;
|
import org.elasticsearch.xcontent.XContentBuilder;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -21,7 +22,10 @@ import java.util.Objects;
|
||||||
*/
|
*/
|
||||||
public final class DenseVectorFieldStats extends FieldStats {
|
public final class DenseVectorFieldStats extends FieldStats {
|
||||||
static final int UNSET = -1;
|
static final int UNSET = -1;
|
||||||
|
static final String NOT_INDEXED = "not_indexed";
|
||||||
|
Map<String, Integer> vectorIndexTypeCount; // count of mappings by index type
|
||||||
|
Map<String, Integer> vectorSimilarityTypeCount; // count of mappings by similarity
|
||||||
|
Map<String, Integer> vectorElementTypeCount; // count of mappings by element type
|
||||||
int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster
|
int indexedVectorCount; // number of times vectors with index:true are used in mappings of this cluster
|
||||||
int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster
|
int indexedVectorDimMin; // minimum dimension of indexed vectors in this cluster
|
||||||
int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster
|
int indexedVectorDimMax; // maximum dimension of indexed vectors in this cluster
|
||||||
|
@ -31,21 +35,14 @@ public final class DenseVectorFieldStats extends FieldStats {
|
||||||
indexedVectorCount = 0;
|
indexedVectorCount = 0;
|
||||||
indexedVectorDimMin = UNSET;
|
indexedVectorDimMin = UNSET;
|
||||||
indexedVectorDimMax = UNSET;
|
indexedVectorDimMax = UNSET;
|
||||||
}
|
vectorIndexTypeCount = new HashMap<>();
|
||||||
|
vectorSimilarityTypeCount = new HashMap<>();
|
||||||
DenseVectorFieldStats(StreamInput in) throws IOException {
|
vectorElementTypeCount = new HashMap<>();
|
||||||
super(in);
|
|
||||||
indexedVectorCount = in.readVInt();
|
|
||||||
indexedVectorDimMin = in.readVInt();
|
|
||||||
indexedVectorDimMax = in.readVInt();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeTo(StreamOutput out) throws IOException {
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
super.writeTo(out);
|
assert false : "writeTo should not be called on DenseVectorFieldStats";
|
||||||
out.writeVInt(indexedVectorCount);
|
|
||||||
out.writeVInt(indexedVectorDimMin);
|
|
||||||
out.writeVInt(indexedVectorDimMax);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -53,6 +50,21 @@ public final class DenseVectorFieldStats extends FieldStats {
|
||||||
builder.field("indexed_vector_count", indexedVectorCount);
|
builder.field("indexed_vector_count", indexedVectorCount);
|
||||||
builder.field("indexed_vector_dim_min", indexedVectorDimMin);
|
builder.field("indexed_vector_dim_min", indexedVectorDimMin);
|
||||||
builder.field("indexed_vector_dim_max", indexedVectorDimMax);
|
builder.field("indexed_vector_dim_max", indexedVectorDimMax);
|
||||||
|
if (vectorIndexTypeCount.isEmpty() == false) {
|
||||||
|
builder.startObject("vector_index_type_count");
|
||||||
|
builder.mapContents(vectorIndexTypeCount);
|
||||||
|
builder.endObject();
|
||||||
|
}
|
||||||
|
if (vectorSimilarityTypeCount.isEmpty() == false) {
|
||||||
|
builder.startObject("vector_similarity_type_count");
|
||||||
|
builder.mapContents(vectorSimilarityTypeCount);
|
||||||
|
builder.endObject();
|
||||||
|
}
|
||||||
|
if (vectorElementTypeCount.isEmpty() == false) {
|
||||||
|
builder.startObject("vector_element_type_count");
|
||||||
|
builder.mapContents(vectorElementTypeCount);
|
||||||
|
builder.endObject();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -69,11 +81,53 @@ public final class DenseVectorFieldStats extends FieldStats {
|
||||||
DenseVectorFieldStats that = (DenseVectorFieldStats) o;
|
DenseVectorFieldStats that = (DenseVectorFieldStats) o;
|
||||||
return indexedVectorCount == that.indexedVectorCount
|
return indexedVectorCount == that.indexedVectorCount
|
||||||
&& indexedVectorDimMin == that.indexedVectorDimMin
|
&& indexedVectorDimMin == that.indexedVectorDimMin
|
||||||
&& indexedVectorDimMax == that.indexedVectorDimMax;
|
&& indexedVectorDimMax == that.indexedVectorDimMax
|
||||||
|
&& Objects.equals(vectorIndexTypeCount, that.vectorIndexTypeCount)
|
||||||
|
&& Objects.equals(vectorSimilarityTypeCount, that.vectorSimilarityTypeCount)
|
||||||
|
&& Objects.equals(vectorElementTypeCount, that.vectorElementTypeCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(super.hashCode(), indexedVectorCount, indexedVectorDimMin, indexedVectorDimMax);
|
return Objects.hash(
|
||||||
|
super.hashCode(),
|
||||||
|
indexedVectorCount,
|
||||||
|
indexedVectorDimMin,
|
||||||
|
indexedVectorDimMax,
|
||||||
|
vectorIndexTypeCount,
|
||||||
|
vectorSimilarityTypeCount,
|
||||||
|
vectorElementTypeCount
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "DenseVectorFieldStats{"
|
||||||
|
+ "vectorIndexTypeCount="
|
||||||
|
+ vectorIndexTypeCount
|
||||||
|
+ ", vectorSimilarityTypeCount="
|
||||||
|
+ vectorSimilarityTypeCount
|
||||||
|
+ ", vectorElementTypeCount="
|
||||||
|
+ vectorElementTypeCount
|
||||||
|
+ ", indexedVectorCount="
|
||||||
|
+ indexedVectorCount
|
||||||
|
+ ", indexedVectorDimMin="
|
||||||
|
+ indexedVectorDimMin
|
||||||
|
+ ", indexedVectorDimMax="
|
||||||
|
+ indexedVectorDimMax
|
||||||
|
+ ", scriptCount="
|
||||||
|
+ scriptCount
|
||||||
|
+ ", scriptLangs="
|
||||||
|
+ scriptLangs
|
||||||
|
+ ", fieldScriptStats="
|
||||||
|
+ fieldScriptStats
|
||||||
|
+ ", name='"
|
||||||
|
+ name
|
||||||
|
+ '\''
|
||||||
|
+ ", count="
|
||||||
|
+ count
|
||||||
|
+ ", indexCount="
|
||||||
|
+ indexCount
|
||||||
|
+ '}';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,9 +86,30 @@ public final class MappingStats implements ToXContentFragment, Writeable {
|
||||||
FieldStats stats;
|
FieldStats stats;
|
||||||
if (type.equals("dense_vector")) {
|
if (type.equals("dense_vector")) {
|
||||||
stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new);
|
stats = fieldTypes.computeIfAbsent(type, DenseVectorFieldStats::new);
|
||||||
boolean indexed = fieldMapping.containsKey("index") ? (boolean) fieldMapping.get("index") : false;
|
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
|
||||||
|
if (fieldMapping.containsKey("similarity")) {
|
||||||
|
Object similarity = fieldMapping.get("similarity");
|
||||||
|
vStats.vectorSimilarityTypeCount.compute(similarity.toString(), (t, c) -> c == null ? count : c + count);
|
||||||
|
}
|
||||||
|
String elementTypeStr = "float";
|
||||||
|
if (fieldMapping.containsKey("element_type")) {
|
||||||
|
Object elementType = fieldMapping.get("element_type");
|
||||||
|
elementTypeStr = elementType.toString();
|
||||||
|
}
|
||||||
|
vStats.vectorElementTypeCount.compute(elementTypeStr, (t, c) -> c == null ? count : c + count);
|
||||||
|
boolean indexed = fieldMapping.containsKey("index") && (boolean) fieldMapping.get("index");
|
||||||
if (indexed) {
|
if (indexed) {
|
||||||
DenseVectorFieldStats vStats = (DenseVectorFieldStats) stats;
|
Object indexOptions = fieldMapping.get("index_options");
|
||||||
|
// NOTE, while the default for `float` is now `int8_hnsw`, that is actually added to the mapping
|
||||||
|
// if the value is truly missing & we are indexed, we default to hnsw.
|
||||||
|
String indexTypeStr = "hnsw";
|
||||||
|
if (indexOptions instanceof Map<?, ?> indexOptionsMap) {
|
||||||
|
Object indexType = indexOptionsMap.get("type");
|
||||||
|
if (indexType != null) {
|
||||||
|
indexTypeStr = indexType.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vStats.vectorIndexTypeCount.compute(indexTypeStr, (t, c) -> c == null ? count : c + count);
|
||||||
vStats.indexedVectorCount += count;
|
vStats.indexedVectorCount += count;
|
||||||
Object obj = fieldMapping.get("dims");
|
Object obj = fieldMapping.get("dims");
|
||||||
if (obj != null) {
|
if (obj != null) {
|
||||||
|
@ -100,6 +121,8 @@ public final class MappingStats implements ToXContentFragment, Writeable {
|
||||||
vStats.indexedVectorDimMax = dims;
|
vStats.indexedVectorDimMax = dims;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
vStats.vectorIndexTypeCount.compute(DenseVectorFieldStats.NOT_INDEXED, (t, c) -> c == null ? 1 : c + 1);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
stats = fieldTypes.computeIfAbsent(type, FieldStats::new);
|
stats = fieldTypes.computeIfAbsent(type, FieldStats::new);
|
||||||
|
|
|
@ -31,7 +31,10 @@ import static org.elasticsearch.rest.RestUtils.getTimeout;
|
||||||
@ServerlessScope(Scope.INTERNAL)
|
@ServerlessScope(Scope.INTERNAL)
|
||||||
public class RestClusterStatsAction extends BaseRestHandler {
|
public class RestClusterStatsAction extends BaseRestHandler {
|
||||||
|
|
||||||
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of("human-readable-total-docs-size");
|
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of(
|
||||||
|
"human-readable-total-docs-size",
|
||||||
|
"verbose-dense-vector-mapping-stats"
|
||||||
|
);
|
||||||
private static final Set<String> SUPPORTED_CAPABILITIES_CCS_STATS = Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats"));
|
private static final Set<String> SUPPORTED_CAPABILITIES_CCS_STATS = Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats"));
|
||||||
public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry");
|
public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry");
|
||||||
|
|
||||||
|
|
|
@ -114,7 +114,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
|
||||||
"index_count" : 2,
|
"index_count" : 2,
|
||||||
"indexed_vector_count" : 2,
|
"indexed_vector_count" : 2,
|
||||||
"indexed_vector_dim_min" : 100,
|
"indexed_vector_dim_min" : 100,
|
||||||
"indexed_vector_dim_max" : 100
|
"indexed_vector_dim_max" : 100,
|
||||||
|
"vector_index_type_count" : {
|
||||||
|
"hnsw" : 2
|
||||||
|
},
|
||||||
|
"vector_similarity_type_count" : {
|
||||||
|
"dot_product" : 2
|
||||||
|
},
|
||||||
|
"vector_element_type_count" : {
|
||||||
|
"float" : 2
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name" : "keyword",
|
"name" : "keyword",
|
||||||
|
@ -234,7 +243,16 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
|
||||||
"index_count" : 3,
|
"index_count" : 3,
|
||||||
"indexed_vector_count" : 3,
|
"indexed_vector_count" : 3,
|
||||||
"indexed_vector_dim_min" : 100,
|
"indexed_vector_dim_min" : 100,
|
||||||
"indexed_vector_dim_max" : 100
|
"indexed_vector_dim_max" : 100,
|
||||||
|
"vector_index_type_count" : {
|
||||||
|
"hnsw" : 3
|
||||||
|
},
|
||||||
|
"vector_similarity_type_count" : {
|
||||||
|
"dot_product" : 3
|
||||||
|
},
|
||||||
|
"vector_element_type_count" : {
|
||||||
|
"float" : 3
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name" : "keyword",
|
"name" : "keyword",
|
||||||
|
@ -460,6 +478,11 @@ public class MappingStatsTests extends AbstractWireSerializingTestCase<MappingSt
|
||||||
expectedStats.indexedVectorCount = 2 * indicesCount;
|
expectedStats.indexedVectorCount = 2 * indicesCount;
|
||||||
expectedStats.indexedVectorDimMin = 768;
|
expectedStats.indexedVectorDimMin = 768;
|
||||||
expectedStats.indexedVectorDimMax = 1024;
|
expectedStats.indexedVectorDimMax = 1024;
|
||||||
|
expectedStats.vectorIndexTypeCount.put("hnsw", 2 * indicesCount);
|
||||||
|
expectedStats.vectorIndexTypeCount.put("not_indexed", 2);
|
||||||
|
expectedStats.vectorSimilarityTypeCount.put("dot_product", 3);
|
||||||
|
expectedStats.vectorSimilarityTypeCount.put("cosine", 3);
|
||||||
|
expectedStats.vectorElementTypeCount.put("float", 4 * indicesCount);
|
||||||
assertEquals(Collections.singletonList(expectedStats), mappingStats.getFieldTypeStats());
|
assertEquals(Collections.singletonList(expectedStats), mappingStats.getFieldTypeStats());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue