diff --git a/docs/reference/query-dsl/special-queries.asciidoc b/docs/reference/query-dsl/special-queries.asciidoc
index d46377f69835..8fb23ca4dbb6 100644
--- a/docs/reference/query-dsl/special-queries.asciidoc
+++ b/docs/reference/query-dsl/special-queries.asciidoc
@@ -60,3 +60,5 @@ include::wrapper-query.asciidoc[]
 include::pinned-query.asciidoc[]
 
 include::rule-query.asciidoc[]
+
+include::weighted-tokens-query.asciidoc[]
diff --git a/docs/reference/query-dsl/text-expansion-query.asciidoc b/docs/reference/query-dsl/text-expansion-query.asciidoc
index 46a9aafdd1af..cb0a7c6ea9c0 100644
--- a/docs/reference/query-dsl/text-expansion-query.asciidoc
+++ b/docs/reference/query-dsl/text-expansion-query.asciidoc
@@ -62,7 +62,7 @@ Default: Disabled.
 Parameters for `<pruning_config>` are:
 
 `tokens_freq_ratio_threshold`::
-(Optional, float)
+(Optional, integer)
 preview:[]
 Tokens whose frequency is more than `tokens_freq_ratio_threshold` times the average frequency of all tokens in the specified field are considered outliers and pruned.
 This value must between 1 and 100.
@@ -110,29 +110,96 @@ GET my-index/_search
 ----
 // TEST[skip: TBD]
 
-[discrete]
-[[text-expansion-query-with-pruning-config-example]]
-=== Example ELSER query with pruning configuration
+Multiple `text_expansion` queries can be combined with each other or other query types.
+This can be achieved by wrapping them in <<query-dsl-bool-query, boolean query clauses>> and using linear boosting:
 
-The following is an extension to the above example that adds a preview:[] pruning configuration to the `text_expansion` query.
-The pruning configuration identifies non-significant tokens to prune from the query in order to improve query performance.
 [source,console]
 ----
 GET my-index/_search
 {
-   "query":{
-      "text_expansion":{
-         "ml.tokens":{
-            "model_id":".elser_model_2",
-            "model_text":"How is the weather in Jamaica?"
-         },
-         "pruning_config": {
-             "tokens_freq_ratio_threshold": 5,
-             "tokens_weight_threshold": 0.4,
-             "only_score_pruned_tokens": false
-         }
+  "query": {
+    "bool": {
+      "should": [
+        {
+          "text_expansion": {
+            "ml.inference.title_expanded.predicted_value": {
+              "model_id": ".elser_model_2",
+              "model_text": "How is the weather in Jamaica?",
+              "boost": 1
+            }
+          }
+        },
+        {
+          "text_expansion": {
+            "ml.inference.description_expanded.predicted_value": {
+              "model_id": ".elser_model_2",
+              "model_text": "How is the weather in Jamaica?",
+              "boost": 1
+            }
+          }
+        },
+        {
+          "multi_match": {
+            "query": "How is the weather in Jamaica?",
+            "fields": [
+              "title",
+              "description"
+            ],
+            "boost": 4
+          }
+        }
+      ]
+    }
+  }
+}
+----
+// TEST[skip: TBD]
+
+This can also be achieved by using sub searches combined with <<rrf>>.
+
+[source,console]
+----
+GET my-index/_search
+{
+  "sub_searches": [
+    {
+      "query": {
+        "multi_match": {
+          "query": "How is the weather in Jamaica?",
+          "fields": [
+            "title",
+            "description"
+          ]
+        }
       }
-   }
+    },
+    {
+      "query": {
+        "text_expansion": {
+          "ml.inference.title_expanded.predicted_value": {
+            "model_id": ".elser_model_2",
+            "model_text": "How is the weather in Jamaica?"
+          }
+        }
+      }
+    },
+    {
+      "query": {
+        "text_expansion": {
+          "ml.inference.description_expanded.predicted_value": {
+            "model_id": ".elser_model_2",
+            "model_text": "How is the weather in Jamaica?"
+          }
+        }
+      }
+    }
+  ],
+  "rank": {
+    "rrf": {
+      "window_size": 10,
+      "rank_constant": 20
+    }
+  }
 }
 ----
 // TEST[skip: TBD]
@@ -141,9 +208,13 @@ GET my-index/_search
 [[text-expansion-query-with-pruning-config-and-rescore-example]]
 === Example ELSER query with pruning configuration and rescore
 
-The following is an extension to the above example that adds a <<rescore>> function on top of the preview:[] pruning configuration to the `text_expansion` query.
+The following is an extension to the above example that adds a preview:[] pruning configuration to the `text_expansion` query.
 The pruning configuration identifies non-significant tokens to prune from the query in order to improve query performance.
-Rescoring the query with the tokens that were originally pruned from the query may improve overall search relevance when using this pruning strategy.
+
+Token pruning happens at the shard level.
+While this should result in the same tokens being labeled as insignificant across shards, this is not guaranteed based on the composition of each shard.
+Therefore, if you are running `text_expansion` with a `pruning_config` on a multi-shard index, we strongly recommend adding a <<rescore>> function with the tokens that were originally pruned from the query.
+This will help mitigate any shard-level inconsistency with pruned tokens and provide better relevance overall.
 
 [source,console]
 ----
@@ -188,30 +259,3 @@ GET my-index/_search
 ====
 Depending on your data, the text expansion query may be faster with `track_total_hits: false`.
 ====
-
-[discrete]
-[[weighted-tokens-query-example]]
-=== Example Weighted token query
-
-In order to quickly iterate during tests, we exposed a new preview:[] `weighted_tokens` query for evaluation of tokenized datasets.
-While this is not a query that is intended for production use, it can be used to quickly evaluate relevance using various pruning configurations.
-
-[source,console]
-----
-POST /docs/_search
-{
-  "query": {
-    "weighted_tokens": {
-      "query_expansion": {
-        "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
-        "pruning_config": {
-          "tokens_freq_ratio_threshold": 5,
-          "tokens_weight_threshold": 0.4,
-          "only_score_pruned_tokens": false
-        }
-      }
-    }
-  }
-}
-----
-//TEST[skip: TBD]
diff --git a/docs/reference/query-dsl/weighted-tokens-query.asciidoc b/docs/reference/query-dsl/weighted-tokens-query.asciidoc
new file mode 100644
index 000000000000..cbd88eb3290d
--- /dev/null
+++ b/docs/reference/query-dsl/weighted-tokens-query.asciidoc
@@ -0,0 +1,122 @@
+[[query-dsl-weighted-tokens-query]]
+=== Weighted tokens query
+++++
+<titleabbrev>Weighted tokens</titleabbrev>
+++++
+
+preview::[]
+
+The weighted tokens query requires a list of token-weight pairs that are sent in with a query rather than calculated using a {nlp} model.
+These token pairs are then used in a query against a <<sparse-vector,sparse vector>> or <<rank-features,rank features>> field.
+
+Weighted tokens queries are useful when you want to use an external query expansion model, or quickly prototype changes without reindexing a new model.
+
+[discrete]
+[[weighted-tokens-query-ex-request]]
+==== Example request
+
+[source,console]
+----
+POST _search
+{
+  "query": {
+    "weighted_tokens": {
+      "query_expansion_field": {
+        "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
+        "pruning_config": {
+          "tokens_freq_ratio_threshold": 5,
+          "tokens_weight_threshold": 0.4,
+          "only_score_pruned_tokens": false
+        }
+      }
+    }
+  }
+}
+----
+// TEST[skip: TBD]
+
+[discrete]
+[[weighted-token-query-params]]
+=== Top level parameters for `weighted_token`
+
+`<tokens>`:::
+(Required, dictionary)
+A dictionary of token-weight pairs.
+
+`pruning_config` ::::
+(Optional, object)
+Optional pruning configuration. If enabled, this will omit non-significant tokens from the query in order to improve query performance.
+Default: Disabled.
++
+--
+Parameters for `<pruning_config>` are:
+
+`tokens_freq_ratio_threshold`::
+(Optional, integer)
+Tokens whose frequency is more than `tokens_freq_ratio_threshold` times the average frequency of all tokens in the specified field are considered outliers and pruned.
+This value must between 1 and 100.
+Default: `5`.
+
+`tokens_weight_threshold`::
+(Optional, float)
+Tokens whose weight is less than `tokens_weight_threshold` are considered nonsignificant and pruned.
+This value must be between 0 and 1.
+Default: `0.4`.
+
+`only_score_pruned_tokens`::
+(Optional, boolean)
+If `true` we only input pruned tokens into scoring, and discard non-pruned tokens.
+It is strongly recommended to set this to `false` for the main query, but this can be set to `true` for a rescore query to get more relevant results.
+Default: `false`.
+
+NOTE: The default values for `tokens_freq_ratio_threshold` and `tokens_weight_threshold` were chosen based on tests using ELSER that provided the most optimal results.
+--
+
+[discrete]
+[[weighted-tokens-query-with-pruning-config-and-rescore-example]]
+==== Example weighted tokens query with pruning configuration and rescore
+
+The following example adds a pruning configuration to the `text_expansion` query.
+The pruning configuration identifies non-significant tokens to prune from the query in order to improve query performance.
+
+Token pruning happens at the shard level.
+While this should result in the same tokens being labeled as insignificant across shards, this is not guaranteed based on the composition of each shard.
+Therefore, if you are running `text_expansion` with a `pruning_config` on a multi-shard index, we strongly recommend adding a <<rescore>> function with the tokens that were originally pruned from the query.
+This will help mitigate any shard-level inconsistency with pruned tokens and provide better relevance overall.
+
+[source,console]
+----
+GET my-index/_search
+{
+   "query":{
+      "weighted_tokens": {
+      "query_expansion_field": {
+        "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
+        "pruning_config": {
+          "tokens_freq_ratio_threshold": 5,
+          "tokens_weight_threshold": 0.4,
+          "only_score_pruned_tokens": false
+        }
+      }
+    }
+   },
+   "rescore": {
+      "window_size": 100,
+      "query": {
+         "rescore_query": {
+            "weighted_tokens": {
+              "query_expansion_field": {
+                "tokens": {"2161": 0.4679, "2621": 0.307, "2782": 0.1299, "2851": 0.1056, "3088": 0.3041, "3376": 0.1038, "3467": 0.4873, "3684": 0.8958, "4380": 0.334, "4542": 0.4636, "4633": 2.2805, "4785": 1.2628, "4860": 1.0655, "5133": 1.0709, "7139": 1.0016, "7224": 0.2486, "7387": 0.0985, "7394": 0.0542, "8915": 0.369, "9156": 2.8947, "10505": 0.2771, "11464": 0.3996, "13525": 0.0088, "14178": 0.8161, "16893": 0.1376, "17851": 1.5348, "19939": 0.6012},
+                "pruning_config": {
+                  "tokens_freq_ratio_threshold": 5,
+                  "tokens_weight_threshold": 0.4,
+                  "only_score_pruned_tokens": true
+                }
+              }
+            }
+         }
+      }
+   }
+}
+----
+//TEST[skip: TBD]
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TokenPruningConfig.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TokenPruningConfig.java
index d789a645fd9c..90fb9291b3b8 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TokenPruningConfig.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TokenPruningConfig.java
@@ -47,8 +47,8 @@ public class TokenPruningConfig implements Writeable, ToXContentObject {
             throw new IllegalArgumentException(
                 "["
                     + TOKENS_FREQ_RATIO_THRESHOLD.getPreferredName()
-                    + "] must be between [1.0] and ["
-                    + String.format(Locale.ROOT, "%.1f", MAX_TOKENS_FREQ_RATIO_THRESHOLD)
+                    + "] must be between [1] and ["
+                    + String.format(Locale.ROOT, "%d", (int) MAX_TOKENS_FREQ_RATIO_THRESHOLD)
                     + "], got "
                     + tokensFreqRatioThreshold
             );
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/WeightedTokensQueryBuilderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/WeightedTokensQueryBuilderTests.java
index 4d91c66de4b9..59d6db2c2ea4 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/WeightedTokensQueryBuilderTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/WeightedTokensQueryBuilderTests.java
@@ -231,7 +231,7 @@ public class WeightedTokensQueryBuilderTests extends AbstractQueryTestCase<Weigh
                 WeightedTokensQueryBuilder queryThatShouldBePruned = new WeightedTokensQueryBuilder(
                     RANK_FEATURES_FIELD,
                     inputTokens,
-                    new TokenPruningConfig(1.5f, 0.5f, false)
+                    new TokenPruningConfig(2, 0.5f, false)
                 );
                 query = queryThatShouldBePruned.doToQuery(context);
                 assertCorrectLuceneQuery("queryThatShouldBePruned", query, List.of("dog", "jumped", "on", "me"));
@@ -239,7 +239,7 @@ public class WeightedTokensQueryBuilderTests extends AbstractQueryTestCase<Weigh
                 WeightedTokensQueryBuilder onlyScorePrunedTokensQuery = new WeightedTokensQueryBuilder(
                     RANK_FEATURES_FIELD,
                     inputTokens,
-                    new TokenPruningConfig(1.5f, 0.5f, true)
+                    new TokenPruningConfig(2, 0.5f, true)
                 );
                 query = onlyScorePrunedTokensQuery.doToQuery(context);
                 assertCorrectLuceneQuery("onlyScorePrunedTokensQuery", query, List.of("the", "black"));
@@ -361,21 +361,21 @@ public class WeightedTokensQueryBuilderTests extends AbstractQueryTestCase<Weigh
         {
             IllegalArgumentException e = expectThrows(
                 IllegalArgumentException.class,
-                () -> new WeightedTokensQueryBuilder("field name", weightedTokens, new TokenPruningConfig(-1f, 0.0f, false))
+                () -> new WeightedTokensQueryBuilder("field name", weightedTokens, new TokenPruningConfig(-1, 0.0f, false))
             );
-            assertEquals("[tokens_freq_ratio_threshold] must be between [1.0] and [100.0], got -1.0", e.getMessage());
+            assertEquals("[tokens_freq_ratio_threshold] must be between [1] and [100], got -1.0", e.getMessage());
         }
         {
             IllegalArgumentException e = expectThrows(
                 IllegalArgumentException.class,
-                () -> new WeightedTokensQueryBuilder("field name", weightedTokens, new TokenPruningConfig(101f, 0.0f, false))
+                () -> new WeightedTokensQueryBuilder("field name", weightedTokens, new TokenPruningConfig(101, 0.0f, false))
             );
-            assertEquals("[tokens_freq_ratio_threshold] must be between [1.0] and [100.0], got 101.0", e.getMessage());
+            assertEquals("[tokens_freq_ratio_threshold] must be between [1] and [100], got 101.0", e.getMessage());
         }
         {
             IllegalArgumentException e = expectThrows(
                 IllegalArgumentException.class,
-                () -> new WeightedTokensQueryBuilder("field name", weightedTokens, new TokenPruningConfig(5f, 5f, false))
+                () -> new WeightedTokensQueryBuilder("field name", weightedTokens, new TokenPruningConfig(5, 5f, false))
             );
             assertEquals("[tokens_weight_threshold] must be between 0 and 1", e.getMessage());
         }