[8.12] [Obs AI Assistant] ES|QL improvements (#173303) (#173486)

# Backport

This will backport the following commits from `main` to `8.12`:
- [[Obs AI Assistant] ES|QL improvements
(#173303)](https://github.com/elastic/kibana/pull/173303)

<!--- Backport version: 8.9.7 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Dario
Gieselaar","email":"dario.gieselaar@elastic.co"},"sourceCommit":{"committedDate":"2023-12-17T21:23:50Z","message":"[Obs
AI Assistant] ES|QL improvements (#173303)\n\nUse a combination of
system prompt + recall for ES|QL to allow for:\r\n\r\n- faster
responses\r\n- wider support of functions (e.g. any documented function
is now\r\nsupported, in theory)\r\n\r\nPerformance for the current ES|QL
scenarios is similar.\r\n\r\nSome things I've noticed:\r\n- ~for
whatever reason it is much harder to get the LLM to not make\r\ncertain
mistakes. I've tried mentioning some mistakes close to the end\r\nof the
conversation, but will explore other options.~ I am again opting\r\nfor
repeating common mistakes at the start of the message, but this
time\r\nI'm allowing the LLM to pick them.\r\n- if we can prevent it
from making \"known\" mistakes, we can figure out a\r\nway to have the
LLM store known mistakes. This is also useful for
other\r\nfunctions\r\n- another thing I want to figure out is if more
examples improves for\r\ncommands and custom documentation improves
performance. but later.\r\n\r\n---------\r\n\r\nCo-authored-by:
kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"d54109d8f8243cae00f6871c73d3bdc19f4f9905","branchLabelMapping":{"^v8.13.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:fix","v8.12.0","v8.12.1","v8.13.0"],"number":173303,"url":"https://github.com/elastic/kibana/pull/173303","mergeCommit":{"message":"[Obs
AI Assistant] ES|QL improvements (#173303)\n\nUse a combination of
system prompt + recall for ES|QL to allow for:\r\n\r\n- faster
responses\r\n- wider support of functions (e.g. any documented function
is now\r\nsupported, in theory)\r\n\r\nPerformance for the current ES|QL
scenarios is similar.\r\n\r\nSome things I've noticed:\r\n- ~for
whatever reason it is much harder to get the LLM to not make\r\ncertain
mistakes. I've tried mentioning some mistakes close to the end\r\nof the
conversation, but will explore other options.~ I am again opting\r\nfor
repeating common mistakes at the start of the message, but this
time\r\nI'm allowing the LLM to pick them.\r\n- if we can prevent it
from making \"known\" mistakes, we can figure out a\r\nway to have the
LLM store known mistakes. This is also useful for
other\r\nfunctions\r\n- another thing I want to figure out is if more
examples improves for\r\ncommands and custom documentation improves
performance. but later.\r\n\r\n---------\r\n\r\nCo-authored-by:
kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"d54109d8f8243cae00f6871c73d3bdc19f4f9905"}},"sourceBranch":"main","suggestedTargetBranches":["8.12"],"targetPullRequestStates":[{"branch":"8.12","label":"v8.12.0","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v8.13.0","labelRegex":"^v8.13.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/173303","number":173303,"mergeCommit":{"message":"[Obs
AI Assistant] ES|QL improvements (#173303)\n\nUse a combination of
system prompt + recall for ES|QL to allow for:\r\n\r\n- faster
responses\r\n- wider support of functions (e.g. any documented function
is now\r\nsupported, in theory)\r\n\r\nPerformance for the current ES|QL
scenarios is similar.\r\n\r\nSome things I've noticed:\r\n- ~for
whatever reason it is much harder to get the LLM to not make\r\ncertain
mistakes. I've tried mentioning some mistakes close to the end\r\nof the
conversation, but will explore other options.~ I am again opting\r\nfor
repeating common mistakes at the start of the message, but this
time\r\nI'm allowing the LLM to pick them.\r\n- if we can prevent it
from making \"known\" mistakes, we can figure out a\r\nway to have the
LLM store known mistakes. This is also useful for
other\r\nfunctions\r\n- another thing I want to figure out is if more
examples improves for\r\ncommands and custom documentation improves
performance. but later.\r\n\r\n---------\r\n\r\nCo-authored-by:
kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"d54109d8f8243cae00f6871c73d3bdc19f4f9905"}}]}]
BACKPORT-->

Co-authored-by: Dario Gieselaar <dario.gieselaar@elastic.co>
This commit is contained in:
Kibana Machine 2023-12-17 17:38:47 -05:00 committed by GitHub
parent bd389402a2
commit dab8881b8f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
103 changed files with 2432 additions and 575 deletions

View file

@ -893,7 +893,7 @@
"canvg": "^3.0.9",
"cbor-x": "^1.3.3",
"chalk": "^4.1.0",
"cheerio": "^1.0.0-rc.10",
"cheerio": "^1.0.0-rc.12",
"chroma-js": "^2.1.0",
"classnames": "2.2.6",
"color": "^4.2.3",
@ -1334,6 +1334,7 @@
"@types/chroma-js": "^2.1.0",
"@types/chromedriver": "^81.0.5",
"@types/classnames": "^2.2.9",
"@types/cli-progress": "^3.11.5",
"@types/color": "^3.0.3",
"@types/cytoscape": "^3.14.0",
"@types/d3": "^3.5.43",
@ -1496,6 +1497,7 @@
"chance": "1.0.18",
"chromedriver": "^119.0.1",
"clean-webpack-plugin": "^3.0.0",
"cli-progress": "^3.12.0",
"cli-table3": "^0.6.1",
"copy-webpack-plugin": "^6.0.2",
"cpy": "^8.1.1",

View file

@ -42,6 +42,7 @@ export function Application({
<KibanaContextProvider
services={{
...coreStart,
...pluginsStart,
plugins: {
start: pluginsStart,
},

View file

@ -0,0 +1,41 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import $, { AnyNode, Cheerio } from 'cheerio';
export function extractSections(cheerio: Cheerio<AnyNode>) {
const sections: Array<{
title: string;
content: string;
}> = [];
cheerio.find('.section h3').each((index, element) => {
let untilNextHeader = $(element).nextUntil('h3');
if (untilNextHeader.length === 0) {
untilNextHeader = $(element).parents('.titlepage').nextUntil('h3');
}
if (untilNextHeader.length === 0) {
untilNextHeader = $(element).parents('.titlepage').nextAll();
}
const title = $(element).text().trim().replace('edit', '');
untilNextHeader.find('table').remove();
untilNextHeader.find('svg').remove();
const text = untilNextHeader.text();
const content = text.replaceAll(/([\n]\s*){2,}/g, '\n');
sections.push({
title: title === 'STATS ... BY' ? 'STATS' : title,
content: `${title}\n\n${content}`,
});
});
return sections;
}

View file

@ -0,0 +1,10 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
require('@kbn/babel-register').install();
require('./load_esql_docs');

View file

@ -0,0 +1,233 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { run } from '@kbn/dev-cli-runner';
import $, { load } from 'cheerio';
import { SingleBar } from 'cli-progress';
import FastGlob from 'fast-glob';
import Fs from 'fs/promises';
import { once, partition } from 'lodash';
import pLimit from 'p-limit';
import Path from 'path';
import git, { SimpleGitProgressEvent } from 'simple-git';
import yargs, { Argv } from 'yargs';
import { extractSections } from './extract_sections';
yargs(process.argv.slice(2))
.command(
'*',
'Extract ES|QL documentation for the Observability AI Assistant',
(y: Argv) =>
y.option('logLevel', {
describe: 'Log level',
string: true,
default: process.env.LOG_LEVEL || 'info',
choices: ['info', 'debug', 'silent', 'verbose'],
}),
(argv) => {
run(
async ({ log }) => {
const builtDocsDir = Path.join(__dirname, '../../../../../../built-docs');
log.debug(`Looking in ${builtDocsDir} for built-docs repository`);
const dirExists = await Fs.stat(builtDocsDir);
const getProgressHandler = () => {
let stage: string = '';
let method: string = '';
const loader: SingleBar = new SingleBar({
barsize: 25,
format: `{phase} {bar} {percentage}%`,
});
const start = once(() => {
loader.start(100, 0, { phase: 'initializing' });
});
return {
progress: (event: SimpleGitProgressEvent) => {
start();
if (event.stage !== stage || event.method !== method) {
stage = event.stage;
method = event.method;
}
loader.update(event.progress, { phase: event.method + '/' + event.stage });
},
stop: () => loader.stop(),
};
};
if (!dirExists) {
log.info('Cloning built-docs repo. This will take a while.');
const { progress, stop } = getProgressHandler();
await git(Path.join(builtDocsDir, '..'), {
progress,
}).clone(`https://github.com/elastic/built-docs`, builtDocsDir, ['--depth', '1']);
stop();
}
const { progress, stop } = getProgressHandler();
const builtDocsGit = git(builtDocsDir, { progress });
log.debug('Initializing simple-git');
await builtDocsGit.init();
log.info('Making sure built-docs is up to date');
await builtDocsGit.pull();
const files = FastGlob.sync(
`${builtDocsDir}/html/en/elasticsearch/reference/master/esql*.html`
);
if (!files) {
throw new Error('No files found');
}
const limiter = pLimit(10);
stop();
log.info(`Processing ${files.length} files`);
const documents: Array<Array<{ title: string; content: string }>> = await Promise.all(
files.map((file) =>
limiter(async () => {
const fileContents = await Fs.readFile(file);
const $element = load(fileContents.toString())('*');
function getSimpleText() {
$element.remove('.navfooter');
$element.remove('#sticky_content');
$element.find('code').each(function () {
$(this).replaceWith('`' + $(this).text() + '`');
});
return $element
.find('.section,section,.part')
.last()
.text()
.replaceAll(/([\n]\s*){2,}/g, '\n');
}
switch (Path.basename(file)) {
case 'esql-commands.html':
return extractSections($element);
case 'esql-limitations.html':
return [
{
title: 'Limitations',
content: getSimpleText(),
},
];
case 'esql-syntax.html':
return [
{
title: 'Syntax',
content: getSimpleText(),
},
];
case 'esql.html':
return [
{
title: 'Overview',
content: getSimpleText().replace(
/The ES\|QL documentation is organized in these sections(.*)$/,
''
),
},
];
case 'esql-functions-operators.html':
const sections = extractSections($element);
const searches = [
'Binary operators',
'Equality',
'Inequality',
'Less than',
'Greater than',
'Add +',
'Subtract -',
'Multiply *',
'Divide /',
'Modulus %',
'Unary operators',
'Logical operators',
'IS NULL',
];
const matches = [
'CIDR_MATCH',
'ENDS_WITH',
'IN',
'IS_FINITE',
'IS_INFINITE',
'IS_NAN',
'LIKE',
'RLIKE',
'STARTS_WITH',
];
const [operatorSections, allOtherSections] = partition(sections, (section) => {
return (
matches.includes(section.title) ||
searches.some((search) =>
section.title.toLowerCase().startsWith(search.toLowerCase())
)
);
});
return allOtherSections.concat({
title: 'Operators',
content: operatorSections
.map(({ title, content }) => `${title}\n${content}`)
.join('\n'),
});
default:
break;
}
return [];
})
)
);
const flattened = documents.flat().filter((doc) => {
return !doc.title.startsWith('ES|QL');
});
const outDir = Path.join(__dirname, '../../server/functions/esql/docs');
log.info(`Writing ${flattened.length} documents to disk to ${outDir}`);
log.debug(`Clearing ${outDir}`);
await Fs.rm(outDir, { recursive: true });
await Fs.mkdir(outDir);
await Promise.all(
flattened.map((doc) =>
limiter(async () => {
const fileName = Path.join(
outDir,
`esql-${doc.title.replaceAll(' ', '-').toLowerCase()}.txt`
);
await Fs.writeFile(fileName, doc.content);
})
)
);
},
{ log: { defaultLevel: argv.logLevel as any }, flags: { allowUnexpected: true } }
);
}
)
.parse();

View file

@ -1,562 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import dedent from 'dedent';
import { Observable } from 'rxjs';
import type { FunctionRegistrationParameters } from '.';
import {
type CreateChatCompletionResponseChunk,
FunctionVisibility,
MessageRole,
} from '../../common/types';
import { processOpenAiStream } from '../../common/utils/process_openai_stream';
import { streamIntoObservable } from '../service/util/stream_into_observable';
export function registerEsqlFunction({
client,
registerFunction,
resources,
}: FunctionRegistrationParameters) {
registerFunction(
{
name: 'execute_query',
contexts: ['core'],
visibility: FunctionVisibility.User,
description: 'Execute an ES|QL query',
parameters: {
type: 'object',
additionalProperties: false,
properties: {
query: {
type: 'string',
},
},
required: ['query'],
} as const,
},
async ({ arguments: { query } }) => {
const response = await (
await resources.context.core
).elasticsearch.client.asCurrentUser.transport.request({
method: 'POST',
path: '_query',
body: {
query,
},
});
return { content: response };
}
);
registerFunction(
{
name: 'esql',
contexts: ['core'],
description: `This function answers ES|QL related questions including query generation and syntax/command questions.`,
visibility: FunctionVisibility.System,
parameters: {
type: 'object',
additionalProperties: false,
properties: {
switch: {
type: 'boolean',
},
},
} as const,
},
async ({ messages, connectorId }, signal) => {
const systemMessage = dedent(`You are a helpful assistant for Elastic ES|QL.
Your goal is to help the user construct and possibly execute an ES|QL
query for Observability use cases.
ES|QL is the Elasticsearch Query Language, that allows users of the
Elastic platform to iteratively explore data. An ES|QL query consists
of a series of commands, separated by pipes. Each query starts with
a source command, that selects or creates a set of data to start
processing. This source command is then followed by one or more
processing commands, which can transform the data returned by the
previous command.
ES|QL is not Elasticsearch SQL, nor is it anything like SQL. SQL
commands are not available in ES|QL. Its close equivalent is SPL
(Search Processing Language). Make sure you reply using only
the context of this conversation.
# Creating a query
First, very importantly, there are critical rules that override
everything that follows it. Always repeat these rules, verbatim.
1. ES|QL is not Elasticsearch SQL. Do not apply Elasticsearch SQL
commands, functions and concepts. Only use information available
in the context of this conversation.
2. Use a WHERE clause as early and often as possible, because
it limits the number of documents that need to be evaluated.
3. Use EVAL to create new columns that require mathemetical
operations or non-aggregation functions like CASE, ROUND or
DATE_EXTRACT. YOU MUST DO THIS before using these operations
in a STATS command.
4. DO NOT UNDER ANY CIRCUMSTANCES:
- wrap a data source in single or double quotes when using FROM
- use COUNT(*) or COUNT(). A single argument (field name) is
required, like COUNT(my.field.name).
- use the AS keyword. Create a new column by using the = operator.
this is wrong: STATS SUM(field) AS sum_field.
When constructing a query, break it down into the following steps.
Ask these questions out loud so the user can see your reasoning.
Remember, these rules are for you, not for the user.
- What are the critical rules I need to think of?
- What data source is the user requesting? What command should I
select for this data source? Don't use any quotes to wrap the
source.
- Does the data set need to be filtered? Use the WHERE clause for
this, as it improves performance.
- Do I need to add columns that use math or other non-aggregation
functions like CASE using the EVAL command before I run the STATS
BY command with aggregation functions?
- If I run a STATS command, what columns are available after the
command?
- What are the steps needed to get the result that the user needs?
Break each operation down into its own step. Reason about what data
is the outcome of each command or function.
- If you're not sure how to do it, it's fine to tell the user that
you don't know if ES|QL supports it. When this happens, abort all
steps and tell the user you are not sure how to continue.
Format ALL of your responses as follows, including the dashes.
ALWAYS start your message with two dashes and then the rules:
\`\`\`
--
Sure, let's remember the critical rules:
<rules>
--
Let's break down the query step-by-step:
<breakdown>
\`\`\`esql
<placeholder-for-final-query>
\`\`\`
\`\`\`
Always format a complete query as follows:
\`\`\`esql
...
\`\`\`
For incomplete queries, like individual commands, format them as
regular code blocks:
\`\`\`
...
\`\`\`
# Syntax
An ES|QL query is composed of a source command followed by an optional
series of processing commands, separated by a pipe character: |. For
example:
<source-command>
| <processing-command1>
| <processing-command2>
## Binary comparison operators
- equality: ==
- inequality: !=
- less than: <
- less than or equal: <=
- larger than: >
- larger than or equal: >=
## Boolean operators
- AND
- OR
- NOT
## PREDICATES
For NULL comparison use the IS NULL and IS NOT NULL predicates:
- \`| WHERE birth_date IS NULL\`
- \`| WHERE birth_date IS NOT NULL\`
## Timespan literal syntax
Datetime intervals and timespans can be expressed using timespan
literals. Timespan literals are a combination of a number and a
qualifier. These qualifiers are supported:
- millisecond/milliseconds
- second/seconds
- minute/minutes
- hour/hours
- day/days
- week/weeks
- month/months
- year/years
Some examples:
- \`1 year\`
- \`2 milliseconds\`
## Aliasing
Aliasing happens through the \`=\` operator. Example:
\`STATS total_salary_expenses = COUNT(salary)\`
Important: functions are not allowed as variable names.
# Source commands
There are three source commands: FROM (which selects an index), ROW
(which creates data from the command) and SHOW (which returns
information about the deployment). You do not support SHOW for now.
### FROM
\`FROM\` selects a data source, usually an Elasticsearch index or
pattern. You can also specify multiple indices. DO NOT UNDER ANY
CIRCUMSTANCES wrap an index or pattern in single or double quotes
as such: \`FROM "my_index.pattern-*"\`.
Some examples:
- \`FROM employees\`
- \`FROM employees.annual_salaries-*\`
- \`FROM employees*,my-alias,my-index.with-a-dot*\`
# Processing commands
Note that the following processing commands are available in ES|QL,
but not supported in this context:
ENRICH,GROK,MV_EXPAND,RENAME
### DISSECT
\`DISSECT\` enables you to extract structured data out of a string.
It matches the string against a delimiter-based pattern, and extracts
the specified keys as columns. It uses the same syntax as the
Elasticsearch Dissect Processor. DO NOT UNDER ANY CIRCUMSTANCES use
single quotes instead of double quotes. Some examples:
- \`ROW a = "foo bar" | DISSECT a "%{b} %{c}";\`
- \`ROW a = "foo bar baz" | DISSECT a "%{b} %{?c} %{d}";\`
### DROP
\`DROP\` removes columns. Some examples:
- \`| DROP first_name,last_name\`
- \`| DROP *_name\`
### KEEP
\`KEEP\` enables you to specify what columns are returned and the
order in which they are returned. Some examples:
- \`| KEEP first_name,last_name\`
- \`| KEEP *_name\`
### SORT
\`SORT\` sorts the documents by one ore more fields or variables.
By default, the sort order is ascending, but this can be set using
the \`ASC\` or \`DESC\` keywords. Some examples:
- \`| SORT my_field\`
- \`| SORT height DESC\`
DO NOT UNDER ANY CIRCUMSTANCES use functions or math as part of the
sort statement. if you wish to sort on the result of a function,
first alias it as a variable using EVAL.
This is wrong: \`| SORT AVG(cpu)\`.
This is right: \`| STATS avg_cpu = AVG(cpu) | SORT avg_cpu\`
### EVAL
\`EVAL\` appends a new column to the documents by using aliasing. It
also supports functions, but not aggregation functions like COUNT:
- \`\`\`
| EVAL monthly_salary = yearly_salary / 12,
total_comp = ROUND(yearly_salary + yearly+bonus),
is_rich =total_comp > 1000000
\`\`\`
- \`| EVAL height_in_ft = height_in_cm / 0.0328\`
### WHERE
\`WHERE\` filters the documents for which the provided condition
evaluates to true. Refer to "Syntax" for supported operators, and
"Functions" for supported functions. When using WHERE, make sure
that the columns in your statement are still available. Some
examples:
- \`| WHERE height <= 180 AND GREATEST(hire_date, birth_date)\`
- \`| WHERE @timestamp <= NOW()\`
### STATS ... BY
\`STATS ... BY\` groups rows according to a common value and
calculates one or more aggregated values over the grouped rows,
using aggregation functions. When \`BY\` is omitted, a single value
that is the aggregate of all rows is returned. Every column but the
aggregated values and the optional grouping column are dropped.
Mention the retained columns when explaining the STATS command.
DO NOT UNDER ANY CIRCUMSTANCES use non-aggregation functions (like
CASE or DATE_EXTRACT) or mathemetical operators in the STATS
command. YOU MUST USE an EVAL command before the STATS command
to append the new calculated column.
Some examples:
- \`| STATS count = COUNT(emp_no) BY languages\`
- \`| STATS salary = AVG(salary)\`
- \`| EVAL monthly_salary = salary / 12 | STATS avg_monthly_salary = AVG(monthly_salary) BY emp_country\`
### LIMIT
Limits the rows returned. Only supports a number as input. Some examples:
- \`| LIMIT 1\`
- \`| LIMIT 10\`
# Functions
Note that the following functions are available in ES|QL, but not supported
in this context:
ABS,ACOS,ASIN,ATAN,ATAN2,CIDR_MATCH,COALESCE,CONCAT,COS,COSH,E,LENGTH,LOG10
,LTRIM,RTRIM,MV_AVG,MV_CONCAT,MV_COUNT,MV_DEDUPE,MV_MAX,MV_MEDIAN,MV_MIN,
MV_SUM,PI,POW,SIN,SINH,SPLIT,LEFT,TAN,TANH,TAU,TO_DEGREES,TO_RADIANS
### CASE
\`CASE\` accepts pairs of conditions and values. The function returns
the value that belongs to the first condition that evaluates to true. If
the number of arguments is odd, the last argument is the default value which
is returned when no condition matches. Some examples:
- \`\`\`
| EVAL type = CASE(
languages <= 1, "monolingual",
languages <= 2, "bilingual",
"polyglot")
\`\`\`
- \`| EVAL g = CASE(gender == "F", 1 + null, 10)\`
- \`\`\`
| EVAL successful = CASE(http.response.status_code == 200, 1, 0), failed = CASE(http.response.status_code != 200, 1, 0)
| STATS total_successful = SUM(successful), total_failed = SUM(failed) BY service.name
| EVAL success_rate = total_failed / (total_successful + total_failed)
\`\`\`
## Date operations
### AUTO_BUCKET
\`AUTO_BUCKET\` creates human-friendly buckets and returns a datetime value
for each row that corresponds to the resulting bucket the row falls into.
Combine AUTO_BUCKET with STATS ... BY to create a date histogram.
You provide a target number of buckets, a start date, and an end date,
and it picks an appropriate bucket size to generate the target number of
buckets or fewer. If you don't have a start and end date, provide placeholder
values. Some examples:
- \`| EVAL bucket=AUTO_BUCKET(@timestamp), 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")\`
- \`| EVAL bucket=AUTO_BUCKET(my_date_field), 100, <start-date>, <end-date>)\`
- \`| EVAL bucket=AUTO_BUCKET(@timestamp), 100, NOW() - 15 minutes, NOW())\`
### DATE_EXTRACT
\`DATE_EXTRACT\` parts of a date, like year, month, day, hour. The supported
field types are those provided by java.time.temporal.ChronoField.
Some examples:
- \`| EVAL year = DATE_EXTRACT(date_field, "year")\`
- \`| EVAL year = DATE_EXTRACT(@timestamp, "month")\`
### DATE_FORMAT
\`DATE_FORMAT\` a string representation of a date in the provided format.
Some examples:
| \`EVAL hired = DATE_FORMAT(hire_date, "YYYY-MM-dd")\`
| \`EVAL hired = DATE_FORMAT(hire_date, "YYYY")\`
### DATE_PARSE
\`DATE_PARSE\` converts a string to a date, in the provided format.
- \`| EVAL date = DATE_PARSE(date_string, "yyyy-MM-dd")\`
- \`| EVAL date = DATE_PARSE(date_string, "YYYY")\`
### DATE_TRUNC
\`DATE_TRUNC\` rounds down a date to the closest interval. Intervals
can be expressed using the timespan literal syntax. Use this together
with STATS ... BY to group data into time buckets with a fixed interval.
Some examples:
- \`| EVAL year_hired = DATE_TRUNC(1 year, hire_date)\`
- \`| EVAL month_logged = DATE_TRUNC(1 month, @timestamp)\`
- \`| EVAL bucket = DATE_TRUNC(1 minute, @timestamp) | STATS avg_salary = AVG(salary) BY bucket\`
- \`| EVAL bucket = DATE_TRUNC(4 hours, @timestamp) | STATS max_salary MAX(salary) BY bucket\`
### NOW
\`NOW\` returns current date and time. Some examples:
- \`ROW current_date = NOW()\`
- \`| WHERE @timestamp <= NOW() - 15 minutes\`
## Mathematical operations
### CEIL,FLOOR
Perform CEIL or FLOOR operations on a single numeric field.
Some examples:
- \`| EVAL ceiled = CEIL(my.number)\`
- \`| EVAL floored = FLOOR(my.other.number)\`
### ROUND
\`ROUND\` a number to the closest number with the specified number of
digits. Defaults to 0 digits if no number of digits is provided. If the
specified number of digits is negative, rounds to the number of digits
left of the decimal point. Some examples:
- \`| EVAL height_ft = ROUND(height * 3.281, 1)\`
- \`| EVAL percent = ROUND(0.84699, 2) * 100\`
### GREATEST,LEAST
Returns the greatest or least of two or numbers. Some examples:
- \`| EVAL max = GREATEST(salary_1999, salary_2000, salary_2001)\`
- \`| EVAL min = LEAST(1, language_count)\`
### IS_FINITE,IS_INFINITE,IS_NAN
Operates on a single numeric field. Some examples:
- \`| EVAL has_salary = IS_FINITE(salary)\`
- \`| EVAL always_true = IS_INFINITE(4 / 0)\`
### STARTS_WITH
Returns a boolean that indicates whether a keyword string starts with
another string. Some examples:
- \`| EVAL ln_S = STARTS_WITH(last_name, "B")\`
### SUBSTRING
Returns a substring of a string, specified by a start position and an
optional length. Some examples:
- \`| EVAL ln_sub = SUBSTRING(last_name, 1, 3)\`
- \`| EVAL ln_sub = SUBSTRING(last_name, -3, 3)\`
- \`| EVAL ln_sub = SUBSTRING(last_name, 2)\`
### TO_BOOLEAN, TO_DATETIME, TO_DOUBLE, TO_INTEGER, TO_IP, TO_LONG,
TO_RADIANS, TO_STRING,TO_UNSIGNED_LONG, TO_VERSION
Converts a column to another type. Some examples:
- \`| EVAL version = TO_VERSION("1.2.3")\`
- \`| EVAL as_bool = TO_BOOLEAN(my_boolean_string)\`
- \`| EVAL percent = TO_DOUBLE(part) / TO_DOUBLE(total)\`
### TRIM
Trims leading and trailing whitespace. Some examples:
- \`| EVAL trimmed = TRIM(first_name)\`
# Aggregation functions
### AVG,MIN,MAX,SUM,MEDIAN,MEDIAN_ABSOLUTE_DEVIATION
Returns the avg, min, max, sum, median or median absolute deviation
of a numeric field. Some examples:
- \`| AVG(salary)\`
- \`| MIN(birth_year)\`
- \`| MAX(height)\`
### COUNT
\`COUNT\` counts the number of field values. It requires a single
argument, and does not support wildcards. One single argument is
required. If you don't have a field name, use whatever field you have,
rather than displaying an invalid query.
Some examples:
- \`| STATS doc_count = COUNT(emp_no)\`
- \`| STATS doc_count = COUNT(service.name) BY service.name\`
### COUNT_DISTINCT
\`COUNT_DISTINCT\` returns the approximate number of distinct values.
Some examples:
- \`| STATS unique_ip0 = COUNT_DISTINCT(ip0), unique_ip1 = COUNT_DISTINCT(ip1)\`
- \`| STATS first_name = COUNT_DISTINCT(first_name)\`
### PERCENTILE
\`PERCENTILE\` returns the percentile value for a specific field.
Some examples:
- \`| STATS p50 = PERCENTILE(salary, 50)\`
- \`| STATS p99 = PERCENTILE(salary, 99)\`
`);
const source$ = streamIntoObservable(
await client.chat({
connectorId,
messages: [
{
'@timestamp': new Date().toISOString(),
message: { role: MessageRole.System, content: systemMessage },
},
...messages.slice(1),
],
signal,
stream: true,
})
).pipe(processOpenAiStream());
return new Observable<CreateChatCompletionResponseChunk>((subscriber) => {
let cachedContent: string = '';
function includesDivider() {
const firstDividerIndex = cachedContent.indexOf('--');
return firstDividerIndex !== -1 && cachedContent.lastIndexOf('--') !== firstDividerIndex;
}
source$.subscribe({
next: (message) => {
if (includesDivider()) {
subscriber.next(message);
}
cachedContent += message.choices[0].delta.content || '';
},
complete: () => {
if (!includesDivider()) {
subscriber.next({
created: 0,
id: '',
model: '',
object: 'chat.completion.chunk',
choices: [
{
delta: {
content: cachedContent,
},
},
],
});
}
subscriber.complete();
},
error: (error) => {
subscriber.error(error);
},
});
});
}
);
}

View file

@ -0,0 +1,8 @@
ABS
Returns the absolute value.
FROM employees
| KEEP first_name, last_name, height
| EVAL abs_height = ABS(0.0 - height)
Supported types:

View file

@ -0,0 +1,11 @@
ACOS
Syntax
Parameters
n
Numeric expression. If null, the function returns null.
DescriptionReturns the arccosine of n as an
angle, expressed in radians.Supported types
Example
ROW a=.9
| EVAL acos=ACOS(a)

View file

@ -0,0 +1,7 @@
ASIN
Inverse sine trigonometric function.
ROW a=.9
| EVAL asin=ASIN(a)
Supported types:

View file

@ -0,0 +1,7 @@
ATAN
Inverse tangent trigonometric function.
ROW a=12.9
| EVAL atan=ATAN(a)
Supported types:

View file

@ -0,0 +1,8 @@
ATAN2
The angle between the positive x-axis and the
ray from the origin to the point (x , y) in the Cartesian plane.
ROW y=12.9, x=.6
| EVAL atan2=ATAN2(y, x)
Supported types:

View file

@ -0,0 +1,27 @@
AUTO_BUCKET
Creates human-friendly buckets and returns a datetime value for each row that
corresponds to the resulting bucket the row falls into. Combine AUTO_BUCKET
with STATS ... BY to create a date histogram.You provide a target number of buckets, a start date, and an end date, and it
picks an appropriate bucket size to generate the target number of buckets or
fewer. For example, this asks for at most 20 buckets over a whole year, which
picks monthly buckets:
ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z")
| EVAL bucket=AUTO_BUCKET(date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
The goal isnt to provide exactly the target number of buckets, its to pick a
range that people are comfortable with that provides at most the target number of
buckets.If you ask for more buckets then AUTO_BUCKET can pick a smaller range. For example,
asking for at most 100 buckets in a year will get you week long buckets:
ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z")
| EVAL bucket=AUTO_BUCKET(date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
AUTO_BUCKET does not filter any rows. It only uses the provided time range to
pick a good bucket size. For rows with a date outside of the range, it returns a
datetime that corresponds to a bucket outside the range. Combine AUTO_BUCKET
with WHERE to filter rows.A more complete example might look like:
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| EVAL bucket = AUTO_BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| STATS AVG(salary) BY bucket
| SORT bucket
AUTO_BUCKET does not create buckets that dont match any documents. Thats
why the example above is missing 1985-03-01 and other dates.

View file

@ -0,0 +1,6 @@
AVG
The average of a numeric field.
FROM employees
| STATS AVG(height)
The result is always a double not matter the input type.

View file

@ -0,0 +1,22 @@
CASE
Syntax
CASE(condition1, value1[, ..., conditionN, valueN][, default_value])
Parameters
conditionX
A condition.
valueX
The value thats returned when the corresponding condition is the first to
evaluate to true.
default_value
The default value thats is returned when no condition matches.
DescriptionAccepts pairs of conditions and values. The function returns the value that
belongs to the first condition that evaluates to true.If the number of arguments is odd, the last argument is the default value which
is returned when no condition matches. If the number of arguments is even, and
no condition matches, the function returns null.Example
FROM employees
| EVAL type = CASE(
languages <= 1, "monolingual",
languages <= 2, "bilingual",
"polyglot")
| KEEP emp_no, languages, type

View file

@ -0,0 +1,10 @@
CEIL
Round a number up to the nearest integer.
ROW a=1.8
| EVAL a=CEIL(a)
This is a noop for long (including unsigned) and integer.
For double this picks the the closest double value to the integer ala
Math.ceil.
Supported types:

View file

@ -0,0 +1,5 @@
COALESCE
Returns the first non-null value.
ROW a=null, b="b"
| EVAL COALESCE(a, b)

View file

@ -0,0 +1,6 @@
CONCAT
Concatenates two or more strings.
FROM employees
| KEEP first_name, last_name, height
| EVAL fullname = CONCAT(first_name, " ", last_name)

View file

@ -0,0 +1,7 @@
COS
Cosine trigonometric function. Input expected in radians.
ROW a=1.8
| EVAL cos=COS(a)
Supported types:

View file

@ -0,0 +1,7 @@
COSH
Cosine hyperbolic function.
ROW a=1.8
| EVAL cosh=COSH(a)
Supported types:

View file

@ -0,0 +1,10 @@
COUNT
Counts field values.
FROM employees
| STATS COUNT(height)
Can take any field type as input and the result is always a long not matter
the input type.To count the number of rows, use COUNT(*):
FROM employees
| STATS count = COUNT(*) BY languages
| SORT languages DESC

View file

@ -0,0 +1,28 @@
COUNT_DISTINCT
The approximate number of distinct values.
FROM hosts
| STATS COUNT_DISTINCT(ip0), COUNT_DISTINCT(ip1)
Can take any field type as input and the result is always a long not matter
the input type.Counts are approximateeditComputing exact counts requires loading values into a set and returning its
size. This doesnt scale when working on high-cardinality sets and/or large
values as the required memory usage and the need to communicate those
per-shard sets between nodes would utilize too many resources of the cluster.This COUNT_DISTINCT function is based on the
HyperLogLog++
algorithm, which counts based on the hashes of the values with some interesting
properties:
configurable precision, which decides on how to trade memory for accuracy,
excellent accuracy on low-cardinality sets,
fixed memory usage: no matter if there are tens or billions of unique values,
memory usage only depends on the configured precision.
For a precision threshold of c, the implementation that we are using requires
about c * 8 bytes.The following chart shows how the error varies before and after the threshold:For all 3 thresholds, counts have been accurate up to the configured threshold.
Although not guaranteed, this is likely to be the case. Accuracy in practice depends
on the dataset in question. In general, most datasets show consistently good
accuracy. Also note that even with a threshold as low as 100, the error
remains very low (1-6% as seen in the above graph) even when counting millions of items.The HyperLogLog++ algorithm depends on the leading zeros of hashed
values, the exact distributions of hashes in a dataset can affect the
accuracy of the cardinality.Precision is configurableeditThe COUNT_DISTINCT function takes an optional second parameter to configure the
precision discussed previously.
FROM hosts
| STATS COUNT_DISTINCT(ip0, 80000), COUNT_DISTINCT(ip1, 5)

View file

@ -0,0 +1,6 @@
DATE_EXTRACT
Extracts parts of a date, like year, month, day, hour.
The supported field types are those provided by java.time.temporal.ChronoField.
ROW date = DATE_PARSE("yyyy-MM-dd", "2022-05-06")
| EVAL year = DATE_EXTRACT("year", date)

View file

@ -0,0 +1,7 @@
DATE_FORMAT
Returns a string representation of a date in the provided format. If no format
is specified, the yyyy-MM-dd'T'HH:mm:ss.SSSZ format is used.
FROM employees
| KEEP first_name, last_name, hire_date
| EVAL hired = DATE_FORMAT("YYYY-MM-dd", hire_date)

View file

@ -0,0 +1,16 @@
DATE_PARSE
Syntax
DATE_PARSE([format,] date_string)
Parameters
format
The date format. Refer to the
DateTimeFormatter
documentation for the syntax. If null, the function returns null.
date_string
Date expression as a string. If null or an empty string, the function returns
null.
DescriptionReturns a date by parsing the second argument using the format specified in the
first argument.Example
ROW date_string = "2022-05-06"
| EVAL date = DATE_PARSE("yyyy-MM-dd", date_string)

View file

@ -0,0 +1,8 @@
DATE_TRUNC
Rounds down a date to the closest interval. Intervals can be expressed using the
timespan literal syntax.
FROM employees
| EVAL year_hired = DATE_TRUNC(1 year, hire_date)
| STATS COUNT(emp_no) BY year_hired
| SORT year_hired

View file

@ -0,0 +1,25 @@
DISSECT
Syntax
DISSECT input "pattern" [APPEND_SEPARATOR="<separator>"]
Parameters
input
The column that contains the string you want to structure. If the column has
multiple values, DISSECT will process each value.
pattern
A dissect pattern.
<separator>
A string used as the separator between appended values, when using the append modifier.
DescriptionDISSECT enables you to extract
structured data out of a string. DISSECT matches the string against a
delimiter-based pattern, and extracts the specified keys as columns.Refer to Process data with DISSECT for the syntax of dissect patterns.ExamplesThe following example parses a string that contains a timestamp, some text, and
an IP address:
ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1"
| DISSECT a "%{date} - %{msg} - %{ip}"
| KEEP date, msg, ip
By default, DISSECT outputs keyword string columns. To convert to another
type, use Type conversion functions:
ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1"
| DISSECT a "%{date} - %{msg} - %{ip}"
| KEEP date, msg, ip
| EVAL date = TO_DATETIME(date)

View file

@ -0,0 +1,14 @@
DROP
Syntax
DROP columns
Parameters
columns
A comma-separated list of columns to remove. Supports wildcards.
DescriptionThe DROP processing command removes one or more columns.Examples
FROM employees
| DROP height
Rather than specify each column by name, you can use wildcards to drop all
columns with a name that matches a pattern:
FROM employees
| DROP height*

View file

@ -0,0 +1,5 @@
E
Eulers number.
ROW E()

View file

@ -0,0 +1,47 @@
ENRICH
Syntax
ENRICH policy [ON match_field] [WITH [new_name1 = ]field1, [new_name2 = ]field2, ...]
Parameters
policy
The name of the enrich policy. You need to create
and execute the enrich policy first.
match_field
The match field. ENRICH uses its value to look for records in the enrich
index. If not specified, the match will be performed on the column with the same
name as the match_field defined in the enrich policy.
fieldX
The enrich fields from the enrich index that are added to the result as new
columns. If a column with the same name as the enrich field already exists, the
existing column will be replaced by the new column. If not specified, each of
the enrich fields defined in the policy is added
new_nameX
Enables you to change the name of the column thats added for each of the enrich
fields. Defaults to the enrich field name.
DescriptionENRICH enables you to add data from existing indices as new columns using an
enrich policy. Refer to Data enrichment for information about setting up a
policy.
Before you can use ENRICH, you need to create
and execute an enrich policy.
ExamplesThe following example uses the languages_policy enrich policy to add a new
column for each enrich field defined in the policy. The match is performed using
the match_field defined in the enrich policy and
requires that the input table has a column with the same name (language_code
in this example). ENRICH will look for records in the
enrich index based on the match field value.
ROW language_code = "1"
| ENRICH languages_policy
To use a column with a different name than the match_field defined in the
policy as the match field, use ON <column-name>:
ROW a = "1"
| ENRICH languages_policy ON a
By default, each of the enrich fields defined in the policy is added as a
column. To explicitly select the enrich fields that are added, use
WITH <field1>, <field2>, ...:
ROW a = "1"
| ENRICH languages_policy ON a WITH language_name
You can rename the columns that are added using WITH new_name=<field1>:
ROW a = "1"
| ENRICH languages_policy ON a WITH name = language_name
In case of name collisions, the newly created columns will override existing
columns.

View file

@ -0,0 +1,23 @@
EVAL
Syntax
EVAL column1 = value1[, ..., columnN = valueN]
Parameters
columnX
The column name.
valueX
The value for the column. Can be a literal, an expression, or a
function.
DescriptionThe EVAL processing command enables you to append new columns with calculated
values. EVAL supports various functions for calculating values. Refer to
Functions for more information.Examples
FROM employees
| SORT emp_no
| KEEP first_name, last_name, height
| EVAL height_feet = height * 3.281, height_cm = height * 100
If the specified column already exists, the existing column will be dropped, and
the new column will be appended to the table:
FROM employees
| SORT emp_no
| KEEP first_name, last_name, height
| EVAL height = height * 3.281

View file

@ -0,0 +1,10 @@
FLOOR
Round a number down to the nearest integer.
ROW a=1.8
| EVAL a=FLOOR(a)
This is a noop for long (including unsigned) and integer.
For double this picks the the closest double value to the integer ala
Math.floor.
Supported types:

View file

@ -0,0 +1,29 @@
FROM
Syntax
FROM index_pattern [METADATA fields]
Parameters
index_pattern
A list of indices, data streams or aliases. Supports wildcards and date math.
fields
A comma-separated list of metadata fields to retrieve.
DescriptionThe FROM source command returns a table with data from a data stream, index,
or alias. Each row in the resulting table represents a document. Each column
corresponds to a field, and can be accessed by the name of that field.
By default, an ES|QL query without an explicit LIMIT uses an implicit
limit of 500. This applies to FROM too. A FROM command without LIMIT:
FROM employees
is executed as:
FROM employees
| LIMIT 500
Examples
FROM employees
You can use date math to refer to indices, aliases
and data streams. This can be useful for time series data, for example to access
todays index:
FROM <logs-{now/d}>
Use comma-separated lists or wildcards to query multiple data streams, indices,
or aliases:
FROM employees-00001,other-employees-*
Use the METADATA directive to enable metadata fields:
FROM employees [METADATA _id]

View file

@ -0,0 +1,11 @@
GREATEST
Returns the maximum value from many columns. This is similar to MV_MAX
except its intended to run on multiple columns at once.
ROW a = 10, b = 20
| EVAL g = GREATEST(a, b)
When run on keyword or text fields, thisll return the last string
in alphabetical order. When run on boolean columns this will return
true if any values are true.
Supported types:

View file

@ -0,0 +1,28 @@
GROK
Syntax
GROK input "pattern"
Parameters
input
The column that contains the string you want to structure. If the column has
multiple values, GROK will process each value.
pattern
A grok pattern.
DescriptionGROK enables you to extract
structured data out of a string. GROK matches the string against patterns,
based on regular expressions, and extracts the specified patterns as columns.Refer to Process data with GROK for the syntax of grok patterns.ExamplesThe following example parses a string that contains a timestamp, an IP address,
an email address, and a number:
ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}"
| KEEP date, ip, email, num
By default, GROK outputs keyword string columns. int and float types can
be converted by appending :type to the semantics in the pattern. For example
{NUMBER:num:int}:
ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}"
| KEEP date, ip, email, num
For other type conversions, use Type conversion functions:
ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}"
| KEEP date, ip, email, num
| EVAL date = TO_DATETIME(date)

View file

@ -0,0 +1,19 @@
KEEP
Syntax
KEEP columns
Parameters
columns::
A comma-separated list of columns to keep. Supports wildcards.DescriptionThe KEEP processing command enables you to specify what columns are returned
and the order in which they are returned.ExamplesThe columns are returned in the specified order:
FROM employees
| KEEP emp_no, first_name, last_name, height
Rather than specify each column by name, you can use wildcards to return all
columns with a name that matches a pattern:
FROM employees
| KEEP h*
The asterisk wildcard (*) by itself translates to all columns that do not
match the other arguments. This query will first return all columns with a name
that starts with h, followed by all other columns:
FROM employees
| KEEP h*, *

View file

@ -0,0 +1,11 @@
LEAST
Returns the minimum value from many columns. This is similar to MV_MIN
except its intended to run on multiple columns at once.
ROW a = 10, b = 20
| EVAL l = LEAST(a, b)
When run on keyword or text fields, thisll return the first string
in alphabetical order. When run on boolean columns this will return
false if any values are false.
Supported types:

View file

@ -0,0 +1,10 @@
LEFT
Return the substring that extracts length chars from the string starting from the left.
FROM employees
| KEEP last_name
| EVAL left = LEFT(last_name, 3)
| SORT last_name ASC
| LIMIT 5
Supported types:

View file

@ -0,0 +1,6 @@
LENGTH
Returns the character length of a string.
FROM employees
| KEEP first_name, last_name, height
| EVAL fn_length = LENGTH(first_name)

View file

@ -0,0 +1,24 @@
LIMIT
Syntax
LIMIT max_number_of_rows
Parameters
max_number_of_rows
The maximum number of rows to return.
DescriptionThe LIMIT processing command enables you to limit the number of rows that are
returned.
Queries do not return more than 10,000 rows, regardless of the LIMIT commands
value.This limit only applies to the number of rows that are retrieved by the query.
Queries and aggregations run on the full data set.To overcome this limitation:
Reduce the result set size by modifying the query to only return relevant
data. Use WHERE to select a smaller subset of the data.
Shift any post-query processing to the query itself. You can use the ES|QL
STATS ... BY command to aggregate data in the query.
The default and maximum limits can be changed using these dynamic cluster
settings:
esql.query.result_truncation_default_size
esql.query.result_truncation_max_size
Example
FROM employees
| SORT emp_no ASC
| LIMIT 5

View file

@ -0,0 +1,149 @@
You are looking at preliminary documentation for a future release.
Not what you want? See the
current release documentation.
Elastic Docs
Elasticsearch Guide [master]
ES|QL
« ES|QL task management
ES|QL examples »
ES|QL limitationsedit
Result set size limitedit
By default, an ES|QL query returns up to 500 rows. You can increase the number
of rows up to 10,000 using the `LIMIT` command.
Queries do not return more than 10,000 rows, regardless of the `LIMIT` commands
value.
This limit only applies to the number of rows that are retrieved by the query.
Queries and aggregations run on the full data set.
To overcome this limitation:
Reduce the result set size by modifying the query to only return relevant
data. Use `WHERE` to select a smaller subset of the data.
Shift any post-query processing to the query itself. You can use the ES|QL
`STATS ... BY` command to aggregate data in the query.
The default and maximum limits can be changed using these dynamic cluster
settings:
`esql.query.result_truncation_default_size`
`esql.query.result_truncation_max_size`
Field typesedit
Supported typesedit
ES|QL currently supports the following field types:
`alias`
`boolean`
`date`
`double` (`float`, `half_float`, `scaled_float` are represented as `double`)
`ip`
`keyword` family including `keyword`, `constant_keyword`, and `wildcard`
`int` (`short` and `byte` are represented as `int`)
`long`
`null`
`text`
`unsigned_long`
`version`
Spatial types
`geo_point`
`point`
Unsupported typesedit
ES|QL does not yet support the following field types:
TSDB metrics
`counter`
`position`
`aggregate_metric_double`
Spatial types
`geo_shape`
`shape`
Date/time
`date_nanos`
`date_range`
Other types
`binary`
`completion`
`dense_vector`
`double_range`
`flattened`
`float_range`
`histogram`
`integer_range`
`ip_range`
`long_range`
`nested`
`rank_feature`
`rank_features`
`search_as_you_type`
Querying a column with an unsupported type returns an error. If a column with an
unsupported type is not explicitly used in a query, it is returned with `null`
values, with the exception of nested fields. Nested fields are not returned at
all.
Full-text search is not supportededit
Because of the way ES|QL treats `text` values,
full-text search is not yet supported. Queries on `text` fields are like queries
on `keyword` fields: they are case-sensitive and need to match the full string.
For example, after indexing a field of type `text` with the value `Elasticsearch
query language`, the following `WHERE` clause does not match because the `LIKE`
operator is case-sensitive:
| WHERE field LIKE "elasticsearch query language"
The following `WHERE` clause does not match either, because the `LIKE` operator
tries to match the whole string:
| WHERE field LIKE "Elasticsearch"
As a workaround, use wildcards and regular expressions. For example:
| WHERE field RLIKE "[Ee]lasticsearch.*"
`text` fields behave like `keyword` fieldsedit
While ES|QL supports `text` fields, ES|QL does not treat these fields
like the Search API does. ES|QL queries do not query or aggregate the
analyzed string. Instead, an ES|QL query will try to get a `text`
fields subfield of the keyword family type and query/aggregate
that. If its not possible to retrieve a `keyword` subfield, ES|QL will get the
string from a documents `_source`. If the `_source` cannot be retrieved, for
example when using synthetic source, `null` is returned.
Note that ES|QLs retrieval of `keyword` subfields may have unexpected
consequences. An ES|QL query on a `text` field is case-sensitive. Furthermore,
a subfield may have been mapped with a normalizer, which can
transform the original string. Or it may have been mapped with `ignore_above`,
which can truncate the string. None of these mapping operations are applied to
an ES|QL query, which may lead to false positives or negatives.
To avoid these issues, a best practice is to be explicit about the field that
you query, and query `keyword` sub-fields instead of `text` fields.
Time series data streams are not supportededit
ES|QL does not support querying time series data streams (TSDS).
Cross-cluster search is not supportededit
ES|QL does not support cross-cluster search.
Date math limitationsedit
Date math expressions work well when the leftmost expression is a datetime, for
example:
now() + 1 year - 2hour + ...
But using parentheses or putting the datetime to the right is not always supported yet. For example, the following expressions fail:
1year + 2hour + now()
now() + (1year + 2hour)
Date math does not allow subtracting two datetimes, for example:
now() - 2023-10-26
Enrich limitationsedit
The ES|QL `ENRICH` command only supports enrich policies of type `match`.
Furthermore, `ENRICH` only supports enriching on a column of type `keyword`.
Dissect limitationsedit
The `DISSECT` command does not support reference keys.
Grok limitationsedit
The `GROK` command does not support configuring custom
patterns, or multiple patterns. The `GROK` command is not
subject to Grok watchdog settings.
Multivalue limitationsedit
ES|QL supports multivalued fields, but functions
return `null` when applied to a multivalued field, unless documented otherwise.
Work around this limitation by converting the field to single value with one of
the multivalue functions.
Timezone supportedit
ES|QL only supports the UTC timezone.
Kibana limitationsedit
The user interface to filter data is not enabled when Discover is in ES|QL
mode. To filter data, write a query that uses the `WHERE` command
instead.
In ES|QL mode, clicking a field in the field list in Discover does not show
quick statistics for that field.
Discover shows no more than 10,000 rows. This limit only applies to the number
of rows that are retrieved by the query and displayed in Discover. Queries and
aggregations run on the full data set.
Discover shows no more than 50 columns. If a query returns
more than 50 columns, Discover only shows the first 50.
Querying many many indices at once without any filters can cause an error in
kibana which looks like `[esql] > Unexpected error from Elasticsearch: The
content length (536885793) is bigger than the maximum allowed string
(536870888)`. The response from ES|QL is too long. Use `DROP` or
`KEEP` to limit the number of fields returned.

View file

@ -0,0 +1,8 @@
LOG10
Returns the log base 10. The input can be any numeric value, the return value
is always a double.Logs of negative numbers are NaN. Logs of infinites are infinite, as is the log of 0.
ROW d = 1000.0
| EVAL s = LOG10(d)
Supported types:

View file

@ -0,0 +1,10 @@
LTRIM
Removes leading whitespaces from strings.
ROW message = " some text ", color = " red "
| EVAL message = LTRIM(message)
| EVAL color = LTRIM(color)
| EVAL message = CONCAT("'", message, "'")
| EVAL color = CONCAT("'", color, "'")
Supported types:

View file

@ -0,0 +1,5 @@
MAX
The maximum value of a numeric field.
FROM employees
| STATS MAX(languages)

View file

@ -0,0 +1,9 @@
MEDIAN
The value that is greater than half of all values and less than half of
all values, also known as the 50% PERCENTILE.
FROM employees
| STATS MEDIAN(salary), PERCENTILE(salary, 50)
Like PERCENTILE, MEDIAN is usually approximate.
MEDIAN is also non-deterministic.
This means you can get slightly different results using the same data.

View file

@ -0,0 +1,14 @@
MEDIAN_ABSOLUTE_DEVIATION
The median absolute deviation, a measure of variability. It is a robust
statistic, meaning that it is useful for describing data that may have outliers,
or may not be normally distributed. For such data it can be more descriptive than
standard deviation.It is calculated as the median of each data points deviation from the median of
the entire sample. That is, for a random variable X, the median absolute deviation
is median(|median(X) - Xi|).
FROM employees
| STATS MEDIAN(salary), MEDIAN_ABSOLUTE_DEVIATION(salary)
Like PERCENTILE, MEDIAN_ABSOLUTE_DEVIATION is
usually approximate.
MEDIAN_ABSOLUTE_DEVIATION is also non-deterministic.
This means you can get slightly different results using the same data.

View file

@ -0,0 +1,5 @@
MIN
The minimum value of a numeric field.
FROM employees
| STATS MIN(languages)

View file

@ -0,0 +1,7 @@
MV_AVG
Converts a multivalued field into a single valued field containing the average
of all of the values. For example:
ROW a=[3, 5, 1, 6]
| EVAL avg_a = MV_AVG(a)
The output type is always a double and the input type can be any number.

View file

@ -0,0 +1,11 @@
MV_CONCAT
Converts a multivalued string field into a single valued field containing the
concatenation of all values separated by a delimiter:
ROW a=["foo", "zoo", "bar"]
| EVAL j = MV_CONCAT(a, ", ")
If you want to concat non-string fields call TO_STRING on them first:
ROW a=[10, 9, 8]
| EVAL j = MV_CONCAT(TO_STRING(a), ", ")
Supported types:

View file

@ -0,0 +1,8 @@
MV_COUNT
Converts a multivalued field into a single valued field containing a count of the number
of values:
ROW a=["foo", "zoo", "bar"]
| EVAL count_a = MV_COUNT(a)
Supported types:

View file

@ -0,0 +1,8 @@
MV_DEDUPE
Removes duplicates from a multivalued field. For example:
ROW a=["foo", "foo", "bar", "foo"]
| EVAL dedupe_a = MV_DEDUPE(a)
Supported types:
MV_DEDUPE may, but wont always, sort the values in the field.

View file

@ -0,0 +1,11 @@
MV_EXPAND
Syntax
MV_EXPAND column
Parameters
column
The multivalued column to expand.
DescriptionThe MV_EXPAND processing command expands multivalued columns into one row per
value, duplicating other columns.Example
ROW a=[1,2,3], b="b", j=["a","b"]
| MV_EXPAND a

View file

@ -0,0 +1,11 @@
MV_MAX
Converts a multivalued field into a single valued field containing the maximum value. For example:
ROW a=[3, 5, 1]
| EVAL max_a = MV_MAX(a)
It can be used by any field type, including keyword fields. In that case picks the
last string, comparing their utf-8 representation byte by byte:
ROW a=["foo", "zoo", "bar"]
| EVAL max_a = MV_MAX(a)
Supported types:

View file

@ -0,0 +1,10 @@
MV_MEDIAN
Converts a multivalued field into a single valued field containing the median value. For example:
ROW a=[3, 5, 1]
| EVAL median_a = MV_MEDIAN(a)
It can be used by any numeric field type and returns a value of the same type. If the
row has an even number of values for a column the result will be the average of the
middle two entries. If the field is not floating point then the average rounds down:
ROW a=[3, 7, 1, 6]
| EVAL median_a = MV_MEDIAN(a)

View file

@ -0,0 +1,11 @@
MV_MIN
Converts a multivalued field into a single valued field containing the minimum value. For example:
ROW a=[2, 1]
| EVAL min_a = MV_MIN(a)
It can be used by any field type, including keyword fields. In that case picks the
first string, comparing their utf-8 representation byte by byte:
ROW a=["foo", "bar"]
| EVAL min_a = MV_MIN(a)
Supported types:

View file

@ -0,0 +1,7 @@
MV_SUM
Converts a multivalued field into a single valued field containing the sum
of all of the values. For example:
ROW a=[3, 5, 6]
| EVAL sum_a = MV_SUM(a)
The input type can be any number and the output type is the same as the input type.

View file

@ -0,0 +1,4 @@
NOW
Returns current date and time.
ROW current_date = NOW()

View file

@ -0,0 +1,12 @@
Numeric fields
auto_bucket can also operate on numeric fields like this:
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| EVAL bs = AUTO_BUCKET(salary, 20, 25324, 74999)
| SORT hire_date, salary
| KEEP hire_date, salary, bs
Unlike the example above where you are intentionally filtering on a date range,
you rarely want to filter on a numeric range. So you have find the min and max
separately. We dont yet have an easy way to do that automatically. Improvements
coming!

View file

@ -0,0 +1,181 @@
Binary operators
Binary operators
Equality
Equality
Supported types:
Inequality !=
Inequality !=
Supported types:
Less than <
Less than <
Supported types:
Less than or equal to <=
Less than or equal to <=
Supported types:
Greater than >
Greater than >
Supported types:
Greater than or equal to >=
Greater than or equal to >=
Supported types:
Add +
Add +
Supported types:
Subtract -
Subtract -
Supported types:
Multiply *
Multiply *
Supported types:
Divide /
Divide /
Supported types:
Modulus %
Modulus %
Supported types:
Unary operators
Unary operators
The only unary operators is negation (-):
Supported types:
Logical operators
Logical operators
The following logical operators are supported:
AND
OR
NOT
IS NULL and IS NOT NULL predicates
IS NULL and IS NOT NULL predicates
For NULL comparison, use the IS NULL and IS NOT NULL predicates:
FROM employees
| WHERE birth_date IS NULL
| KEEP first_name, last_name
| SORT first_name
| LIMIT 3
FROM employees
| WHERE is_rehired IS NOT NULL
| STATS COUNT(emp_no)
CIDR_MATCH
CIDR_MATCH
Returns true if the provided IP is contained in one of the provided CIDR
blocks.CIDR_MATCH accepts two or more arguments. The first argument is the IP
address of type ip (both IPv4 and IPv6 are supported). Subsequent arguments
are the CIDR blocks to test the IP against.
FROM hosts
| WHERE CIDR_MATCH(ip, "127.0.0.2/32", "127.0.0.3/32")
ENDS_WITH
ENDS_WITH
Returns a boolean that indicates whether a keyword string ends with another
string:
FROM employees
| KEEP last_name
| EVAL ln_E = ENDS_WITH(last_name, "d")
Supported types:
IN
IN
The IN operator allows testing whether a field or expression equals
an element in a list of literals, fields or expressions:
ROW a = 1, b = 4, c = 3
| WHERE c-a IN (3, b / 2, a)
IS_FINITE
IS_FINITE
Returns a boolean that indicates whether its input is a finite number.
ROW d = 1.0
| EVAL s = IS_FINITE(d/0)
IS_INFINITE
IS_INFINITE
Returns a boolean that indicates whether its input is infinite.
ROW d = 1.0
| EVAL s = IS_INFINITE(d/0)
IS_NAN
IS_NAN
Returns a boolean that indicates whether its input is not a number.
ROW d = 1.0
| EVAL s = IS_NAN(d)
LIKE
LIKE
Use LIKE to filter data based on string patterns using wildcards. LIKE
usually acts on a field placed on the left-hand side of the operator, but it can
also act on a constant (literal) expression. The right-hand side of the operator
represents the pattern.The following wildcard characters are supported:
* matches zero or more characters.
? matches one character.
FROM employees
| WHERE first_name LIKE "?b*"
| KEEP first_name, last_name
RLIKE
RLIKE
Use RLIKE to filter data based on string patterns using using
regular expressions. RLIKE usually acts on a field placed on
the left-hand side of the operator, but it can also act on a constant (literal)
expression. The right-hand side of the operator represents the pattern.
FROM employees
| WHERE first_name RLIKE ".leja.*"
| KEEP first_name, last_name
STARTS_WITH
STARTS_WITH
Returns a boolean that indicates whether a keyword string starts with another
string:
FROM employees
| KEEP last_name
| EVAL ln_S = STARTS_WITH(last_name, "B")
Supported types:

View file

@ -0,0 +1,45 @@
ES|QLedit
This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features.
The Elasticsearch Query Language (ES|QL) provides a powerful way to filter, transform,
and analyze data stored in Elasticsearch, and in the future in other runtimes. It is
designed to be easy to learn and use, by end users, SRE teams, application
developers, and administrators.
Users can author ES|QL queries to find specific events, perform statistical
analysis, and generate visualizations. It supports a wide range of commands and
functions that enable users to perform various data operations, such as
filtering, aggregation, time-series analysis, and more.
The Elasticsearch Query Language (ES|QL) makes use of "pipes" (|) to manipulate and
transform data in a step-by-step fashion. This approach allows users to compose
a series of operations, where the output of one operation becomes the input for
the next, enabling complex data transformations and analysis.
The ES|QL Compute Engineedit
ES|QL is more than a language: it represents a significant investment in new
compute capabilities within Elasticsearch. To achieve both the functional and performance
requirements for ES|QL, it was necessary to build an entirely new compute
architecture. ES|QL search, aggregation, and transformation functions are
directly executed within Elasticsearch itself. Query expressions are not
transpiled to Query DSL for execution. This approach allows ES|QL to be
extremely performant and versatile.
The new ES|QL execution engine was designed with performance in mind — it
operates on blocks at a time instead of per row, targets vectorization and cache
locality, and embraces specialization and multi-threading. It is a separate
component from the existing Elasticsearch aggregation framework with different
performance characteristics.
The ES|QL documentation is organized in these sections:
Getting started
A tutorial to help you get started with ES|QL.
Learning ES|QL
Reference documentation for the ES|QL syntax,
commands, and functions and
operators. Information about working with metadata
fields and multivalued fields. And guidance for
data processing with DISSECT and
GROK and data enrichment with ENRICH.
Using ES|QL
An overview of using the REST API, Using ES|QL in Kibana,
Using ES|QL in Elastic Security, and Task management.
Limitations
The current limitations of ES|QL.
Examples
A few examples of what you can do with ES|QL.

View file

@ -0,0 +1,32 @@
PERCENTILE
The value at which a certain percentage of observed values occur. For example,
the 95th percentile is the value which is greater than 95% of the observed values and
the 50th percentile is the MEDIAN.
FROM employees
| STATS p0 = PERCENTILE(salary, 0)
, p50 = PERCENTILE(salary, 50)
, p99 = PERCENTILE(salary, 99)
PERCENTILE is (usually) approximateeditThere are many different algorithms to calculate percentiles. The naive
implementation simply stores all the values in a sorted array. To find the 50th
percentile, you simply find the value that is at my_array[count(my_array) * 0.5].Clearly, the naive implementation does not scalethe sorted array grows
linearly with the number of values in your dataset. To calculate percentiles
across potentially billions of values in an Elasticsearch cluster, approximate
percentiles are calculated.The algorithm used by the percentile metric is called TDigest (introduced by
Ted Dunning in
Computing Accurate Quantiles using T-Digests).When using this metric, there are a few guidelines to keep in mind:
Accuracy is proportional to q(1-q). This means that extreme percentiles (e.g. 99%)
are more accurate than less extreme percentiles, such as the median
For small sets of values, percentiles are highly accurate (and potentially
100% accurate if the data is small enough).
As the quantity of values in a bucket grows, the algorithm begins to approximate
the percentiles. It is effectively trading accuracy for memory savings. The
exact level of inaccuracy is difficult to generalize, since it depends on your
data distribution and volume of data being aggregated
The following chart shows the relative error on a uniform distribution depending
on the number of collected values and the requested percentile:It shows how precision is better for extreme percentiles. The reason why error diminishes
for large number of values is that the law of large numbers makes the distribution of
values more and more uniform and the t-digest tree can do a better job at summarizing
it. It would not be the case on more skewed distributions.
PERCENTILE is also non-deterministic.
This means you can get slightly different results using the same data.

View file

@ -0,0 +1,5 @@
PI
The ratio of a circles circumference to its diameter.
ROW PI()

View file

@ -0,0 +1,13 @@
POW
Returns the value of a base (first argument) raised to the power of an exponent (second argument).
Both arguments must be numeric. The output is always a double. Note that it is still possible to overflow
a double result here; in that case, null will be returned.
ROW base = 2.0, exponent = 2
| EVAL result = POW(base, exponent)
Fractional exponentseditThe exponent can be a fraction, which is similar to performing a root.
For example, the exponent of 0.5 will give the square root of the base:
ROW base = 4, exponent = 0.5
| EVAL s = POW(base, exponent)
Table of supported input and output typeseditFor clarity, the following table describes the output result type for all combinations of numeric input types:

View file

@ -0,0 +1,17 @@
Processing commands
ES|QL processing commands change an input table by adding, removing, or changing
rows and columns.
ES|QL supports these processing commands:
DISSECT
DROP
ENRICH
EVAL
GROK
KEEP
LIMIT
MV_EXPAND
RENAME
SORT
STATS ... BY
WHERE

View file

@ -0,0 +1,18 @@
RENAME
Syntax
RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN]
Parameters
old_nameX
The name of a column you want to rename.
new_nameX
The new name of the column.
DescriptionThe RENAME processing command renames one or more columns. If a column with
the new name already exists, it will be replaced by the new column.Examples
FROM employees
| KEEP first_name, last_name, still_hired
| RENAME still_hired AS employed
Multiple columns can be renamed with a single RENAME command:
FROM employees
| KEEP first_name, last_name
| RENAME first_name AS fn, last_name AS ln

View file

@ -0,0 +1,7 @@
REPLACE
The function substitutes in the string (1st argument) any match of the regular expression (2nd argument) with the replacement string (3rd argument).If any of the arguments are NULL, the result is NULL.
This example replaces an occurrence of the word "World" with the word "Universe":
ROW str = "Hello World"
| EVAL str = REPLACE(str, "World", "Universe")
| KEEP str

View file

@ -0,0 +1,10 @@
RIGHT
Return the substring that extracts length chars from the string starting from the right.
FROM employees
| KEEP last_name
| EVAL right = RIGHT(last_name, 3)
| SORT last_name ASC
| LIMIT 5
Supported types:

View file

@ -0,0 +1,8 @@
ROUND
Rounds a number to the closest number with the specified number of digits.
Defaults to 0 digits if no number of digits is provided. If the specified number
of digits is negative, rounds to the number of digits left of the decimal point.
FROM employees
| KEEP first_name, last_name, height
| EVAL height_ft = ROUND(height * 3.281, 1)

View file

@ -0,0 +1,17 @@
ROW
Syntax
ROW column1 = value1[, ..., columnN = valueN]
Parameters
columnX
The column name.
valueX
The value for the column. Can be a literal, an expression, or a
function.
DescriptionThe ROW source command produces a row with one or more columns with values
that you specify. This can be useful for testing.Examples
ROW a = 1, b = "two", c = null
Use square brackets to create multi-value columns:
ROW a = [2, 1]
ROW supports the use of functions:
ROW a = ROUND(1.23, 0)

View file

@ -0,0 +1,10 @@
RTRIM
Removes trailing whitespaces from strings.
ROW message = " some text ", color = " red "
| EVAL message = RTRIM(message)
| EVAL color = RTRIM(color)
| EVAL message = CONCAT("'", message, "'")
| EVAL color = CONCAT("'", color, "'")
Supported types:

View file

@ -0,0 +1,15 @@
SHOW
Syntax
SHOW item
Parameters
item
Can be INFO or FUNCTIONS.
DescriptionThe SHOW source command returns information about the deployment and
its capabilities:
Use SHOW INFO to return the deployments version, build date and hash.
Use SHOW FUNCTIONS to return a list of all supported functions and a
synopsis of each function.
Examples
SHOW functions
| WHERE STARTS_WITH(name, "is_")

View file

@ -0,0 +1,7 @@
SIN
Sine trigonometric function. Input expected in radians.
ROW a=1.8
| EVAL sin=SIN(a)
Supported types:

View file

@ -0,0 +1,7 @@
SINH
Sine hyperbolic function.
ROW a=1.8
| EVAL sinh=SINH(a)
Supported types:

View file

@ -0,0 +1,29 @@
SORT
Syntax
SORT column1 [ASC/DESC][NULLS FIRST/NULLS LAST][, ..., columnN [ASC/DESC][NULLS FIRST/NULLS LAST]]
Parameters
columnX
The column to sort on.
DescriptionThe SORT processing command sorts a table on one or more columns.The default sort order is ascending. Use ASC or DESC to specify an explicit
sort order.Two rows with the same sort key are considered equal. You can provide additional
sort expressions to act as tie breakers.Sorting on multivalued columns uses the lowest value when sorting ascending and
the highest value when sorting descending.By default, null values are treated as being larger than any other value. With
an ascending sort order, null values are sorted last, and with a descending
sort order, null values are sorted first. You can change that by providing
NULLS FIRST or NULLS LAST.Examples
FROM employees
| KEEP first_name, last_name, height
| SORT height
Explicitly sorting in ascending order with ASC:
FROM employees
| KEEP first_name, last_name, height
| SORT height DESC
Providing additional sort expressions to act as tie breakers:
FROM employees
| KEEP first_name, last_name, height
| SORT height DESC, first_name ASC
Sorting null values first using NULLS FIRST:
FROM employees
| KEEP first_name, last_name, height
| SORT first_name ASC NULLS FIRST

View file

@ -0,0 +1,7 @@
Source commands
An ES|QL source command produces a table, typically with data from Elasticsearch. An ES|QL query must start with a source command.
ES|QL supports these source commands:
FROM
ROW
SHOW

View file

@ -0,0 +1,7 @@
SPLIT
Split a single valued string into multiple strings. For example:
ROW words="foo;bar;baz;qux;quux;corge"
| EVAL word = SPLIT(words, ";")
Which splits "foo;bar;baz;qux;quux;corge" on ; and returns an array:
Only single byte delimiters are currently supported.

View file

@ -0,0 +1,8 @@
SQRT
Returns the square root of a number. The input can be any numeric value, the return value
is always a double.Square roots of negative numbers are NaN. Square roots of infinites are infinite.
ROW d = 100.0
| EVAL s = SQRT(d)
Supported types:

View file

@ -0,0 +1,50 @@
STATS ... BY
Syntax
STATS [column1 =] expression1[, ..., [columnN =] expressionN] [BY grouping_column1[, ..., grouping_columnN]]
Parameters
columnX
The name by which the aggregated value is returned. If omitted, the name is
equal to the corresponding expression (expressionX).
expressionX
An expression that computes an aggregated value.
grouping_columnX
The column containing the values to group by.
DescriptionThe STATS ... BY processing command groups rows according to a common value
and calculate one or more aggregated values over the grouped rows. If BY is
omitted, the output table contains exactly one row with the aggregations applied
over the entire dataset.The following aggregation functions are supported:
AVG
COUNT
COUNT_DISTINCT
MAX
MEDIAN
MEDIAN_ABSOLUTE_DEVIATION
MIN
PERCENTILE
SUM
STATS without any groups is much much faster than adding a group.
Grouping on a single column is currently much more optimized than grouping
on many columns. In some tests we have seen grouping on a single keyword
column to be five times faster than grouping on two keyword columns. Do
not try to work around this by combining the two columns together with
something like CONCAT and then grouping - that is not going to be
faster.
ExamplesCalculating a statistic and grouping by the values of another column:
FROM employees
| STATS count = COUNT(emp_no) BY languages
| SORT languages
Omitting BY returns one row with the aggregations applied over the entire
dataset:
FROM employees
| STATS avg_lang = AVG(languages)
Its possible to calculate multiple values:
FROM employees
| STATS avg_lang = AVG(languages), max_lang = MAX(languages)
Its also possible to group by multiple values (only supported for long and
keyword family fields):
FROM employees
| EVAL hired = DATE_FORMAT("YYYY", hire_date)
| STATS avg_salary = AVG(salary) BY hired, languages.long
| EVAL avg_salary = ROUND(avg_salary)
| SORT hired, languages.long

View file

@ -0,0 +1,17 @@
SUBSTRING
Returns a substring of a string, specified by a start position and an optional
length. This example returns the first three characters of every last name:
FROM employees
| KEEP last_name
| EVAL ln_sub = SUBSTRING(last_name, 1, 3)
A negative start position is interpreted as being relative to the end of the
string. This example returns the last three characters of of every last name:
FROM employees
| KEEP last_name
| EVAL ln_sub = SUBSTRING(last_name, -3, 3)
If length is omitted, substring returns the remainder of the string. This
example returns all characters except for the first:
FROM employees
| KEEP last_name
| EVAL ln_sub = SUBSTRING(last_name, 2)

View file

@ -0,0 +1,5 @@
SUM
The sum of a numeric field.
FROM employees
| STATS SUM(languages)

View file

@ -0,0 +1,94 @@
ES|QL syntax referenceedit
Basic syntaxedit
An ES|QL query is composed of a source command followed
by an optional series of processing commands,
separated by a pipe character: `|`. For example:
source-command
| processing-command1
| processing-command2
The result of a query is the table produced by the final processing command.
For an overview of all supported commands, functions, and operators, refer to Commands and Functions and operators.
For readability, this documentation puts each processing command on a new
line. However, you can write an ES|QL query as a single line. The following
query is identical to the previous one:
source-command | processing-command1 | processing-command2
Identifiersedit
The identifiers can be used as they are and dont require quoting, unless
containing special characters, in which case they must be quoted with
backticks (```). What "special characters" means is command dependent.
For FROM, KEEP, DROP,
RENAME, MV_EXPAND and
ENRICH these are: `=`, ```, `,`, ` ` (space), `|` ,
`[`, `]`, `\t` (TAB), `\r` (CR), `\n` (LF); one `/` is allowed unquoted, but
a sequence of two or more require quoting.
The rest of the commands - those allowing for identifiers be used in
expressions - require quoting if the identifier contains characters other than
letters, numbers and `_` and doesnt start with a letter, `_` or `@`.
For instance:
// Retain just one field
FROM index
| KEEP 1.field
is legal. However, if same field is to be used with an EVAL,
itd have to be quoted:
// Copy one field
FROM index
| EVAL my_field = `1.field`
Literalsedit
ES|QL currently supports numeric and string literals.
String literalsedit
A string literal is a sequence of unicode characters delimited by double
quotes (`"`).
// Filter by a string value
FROM index
| WHERE first_name == "Georgi"
If the literal string itself contains quotes, these need to be escaped (`\\"`).
ES|QL also supports the triple-quotes (`"""`) delimiter, for convenience:
ROW name = """Indiana "Indy" Jones"""
The special characters CR, LF and TAB can be provided with the usual escaping:
`\r`, `\n`, `\t`, respectively.
Numerical literalsedit
The numeric literals are accepted in decimal and in the scientific notation
with the exponent marker (`e` or `E`), starting either with a digit, decimal
point `.` or the negative sign `-`:
1969 -- integer notation
3.14 -- decimal notation
.1234 -- decimal notation starting with decimal point
4E5 -- scientific notation (with exponent marker)
1.2e-3 -- scientific notation with decimal point
-.1e2 -- scientific notation starting with the negative sign
The integer numeric literals are implicitly converted to the `integer`, `long`
or the `double` type, whichever can first accommodate the literals value.
The floating point literals are implicitly converted the `double` type.
To obtain constant values of different types, use one of the numeric
conversion functions.
Commentsedit
ES|QL uses C++ style comments:
double slash `//` for single line comments
`/*` and `*/` for block comments
// Query the employees index
FROM employees
| WHERE height > 2
FROM /* Query the employees index */ employees
| WHERE height > 2
FROM employees
/* Query the
* employees
* index */
| WHERE height > 2
Timespan literalsedit
Datetime intervals and timespans can be expressed using timespan literals.
Timespan literals are a combination of a number and a qualifier. These
qualifiers are supported:
`millisecond`/`milliseconds`
`second`/`seconds`
`minute`/`minutes`
`hour`/`hours`
`day`/`days`
`week`/`weeks`
`month`/`months`
`year`/`years`
Timespan literals are not whitespace sensitive. These expressions are all valid:
`1day`
`1 day`
`1 day`

View file

@ -0,0 +1,7 @@
TAN
Tangent trigonometric function. Input expected in radians.
ROW a=1.8
| EVAL tan=TAN(a)
Supported types:

View file

@ -0,0 +1,7 @@
TANH
Tangent hyperbolic function.
ROW a=1.8
| EVAL tanh=TANH(a)
Supported types:

View file

@ -0,0 +1,5 @@
TAU
The ratio of a circles circumference to its radius.
ROW TAU()

View file

@ -0,0 +1,10 @@
TO_BOOLEAN
Converts an input value to a boolean value.The input can be a single- or multi-valued field or an expression. The input
type must be of a string or numeric type.A string value of "true" will be case-insensitive converted to the Boolean
true. For anything else, including the empty string, the function will
return false. For example:
ROW str = ["true", "TRuE", "false", "", "yes", "1"]
| EVAL bool = TO_BOOLEAN(str)
The numerical value of 0 will be converted to false, anything else will be
converted to true.Alias: TO_BOOL

View file

@ -0,0 +1,8 @@
TO_CARTESIANPOINT
Converts an input value to a point value.The input can be a single- or multi-valued field or an expression.
The input type must be a string or a cartesian point.A string will only be successfully converted if it respects the
WKT Point format:
row wkt = ["POINT(4297.11 -1475.53)", "POINT(7580.93 2272.77)"]
| mv_expand wkt
| eval pt = to_cartesianpoint(wkt)

View file

@ -0,0 +1,17 @@
TO_DATETIME
Converts an input value to a date value.The input can be a single- or multi-valued field or an expression. The input
type must be of a string or numeric type.A string will only be successfully converted if its respecting the format
yyyy-MM-dd'T'HH:mm:ss.SSS'Z' (to convert dates in other formats, use DATE_PARSE). For example:
ROW string = ["1953-09-02T00:00:00.000Z", "1964-06-02T00:00:00.000Z", "1964-06-02 00:00:00"]
| EVAL datetime = TO_DATETIME(string)
Note that in this example, the last value in the source multi-valued
field has not been converted. The reason being that if the date format is not
respected, the conversion will result in a null value. When this happens a
Warning header is added to the response. The header will provide information
on the source of the failure:"Line 1:112: evaluation of [TO_DATETIME(string)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.IllegalArgumentException: failed to parse date field [1964-06-02 00:00:00] with format [yyyy-MM-dd'T'HH:mm:ss.SSS'Z']"If the input parameter is of a numeric type, its value will be interpreted as
milliseconds since the Unix epoch.
For example:
ROW int = [0, 1]
| EVAL dt = TO_DATETIME(int)
Alias: TO_DT

View file

@ -0,0 +1,7 @@
TO_DEGREES
Converts a number in radians
to degrees.The input can be a single- or multi-valued field or an expression. The input
type must be of a numeric type and result is always double.Example:
ROW rad = [1.57, 3.14, 4.71]
| EVAL deg = TO_DEGREES(rad)

View file

@ -0,0 +1,12 @@
TO_DOUBLE
Converts an input value to a double value.The input can be a single- or multi-valued field or an expression. The input
type must be of a boolean, date, string or numeric type.Example:
ROW str1 = "5.20128E11", str2 = "foo"
| EVAL dbl = TO_DOUBLE("520128000000"), dbl1 = TO_DOUBLE(str1), dbl2 = TO_DOUBLE(str2)
Note that in this example, the last conversion of the string isnt
possible. When this happens, the result is a null value. In this case a
Warning header is added to the response. The header will provide information
on the source of the failure:"Line 1:115: evaluation of [TO_DOUBLE(str2)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.NumberFormatException: For input string: \"foo\""If the input parameter is of a date type, its value will be interpreted as
milliseconds since the Unix epoch,
converted to double.Boolean true will be converted to double 1.0, false to 0.0.Alias: TO_DBL

View file

@ -0,0 +1,7 @@
TO_GEOPOINT
Converts an input value to a geo_point value.The input can be a single- or multi-valued field or an expression.
The input type must be a string or a geo_point.A string will only be successfully converted if it respects the
WKT Point format:
row wkt = "POINT(42.97109630194 14.7552534413725)"
| eval pt = to_geopoint(wkt)

View file

@ -0,0 +1,12 @@
TO_INTEGER
Converts an input value to an integer value.The input can be a single- or multi-valued field or an expression. The input
type must be of a boolean, date, string or numeric type.Example:
ROW long = [5013792, 2147483647, 501379200000]
| EVAL int = TO_INTEGER(long)
Note that in this example, the last value of the multi-valued field cannot
be converted as an integer. When this happens, the result is a null value.
In this case a Warning header is added to the response. The header will
provide information on the source of the failure:"Line 1:61: evaluation of [TO_INTEGER(long)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"org.elasticsearch.xpack.ql.QlIllegalArgumentException: [501379200000] out of [integer] range"If the input parameter is of a date type, its value will be interpreted as
milliseconds since the Unix epoch,
converted to integer.Boolean true will be converted to integer 1, false to 0.Alias: TO_INT

View file

@ -0,0 +1,10 @@
TO_IP
Converts an input string to an IP value.The input can be a single- or multi-valued field or an expression.Example:
ROW str1 = "1.1.1.1", str2 = "foo"
| EVAL ip1 = TO_IP(str1), ip2 = TO_IP(str2)
| WHERE CIDR_MATCH(ip1, "1.0.0.0/8")
Note that in the example above the last conversion of the string isnt
possible. When this happens, the result is a null value. In this case a
Warning header is added to the response. The header will provide information
on the source of the failure:"Line 1:68: evaluation of [TO_IP(str2)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.IllegalArgumentException: 'foo' is not an IP string literal."

View file

@ -0,0 +1,12 @@
TO_LONG
Converts an input value to a long value.The input can be a single- or multi-valued field or an expression. The input
type must be of a boolean, date, string or numeric type.Example:
ROW str1 = "2147483648", str2 = "2147483648.2", str3 = "foo"
| EVAL long1 = TO_LONG(str1), long2 = TO_LONG(str2), long3 = TO_LONG(str3)
Note that in this example, the last conversion of the string isnt
possible. When this happens, the result is a null value. In this case a
Warning header is added to the response. The header will provide information
on the source of the failure:"Line 1:113: evaluation of [TO_LONG(str3)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.NumberFormatException: For input string: \"foo\""If the input parameter is of a date type, its value will be interpreted as
milliseconds since the Unix epoch,
converted to long.Boolean true will be converted to long 1, false to 0.

View file

@ -0,0 +1,7 @@
TO_RADIANS
Converts a number in degrees to
radians.The input can be a single- or multi-valued field or an expression. The input
type must be of a numeric type and result is always double.Example:
ROW deg = [90.0, 180.0, 270.0]
| EVAL rad = TO_RADIANS(deg)

View file

@ -0,0 +1,10 @@
TO_STRING
Converts a field into a string. For example:
ROW a=10
| EVAL j = TO_STRING(a)
It also works fine on multivalued fields:
ROW a=[10, 9, 8]
| EVAL j = TO_STRING(a)
Alias: TO_STRSupported types:

View file

@ -0,0 +1,12 @@
TO_UNSIGNED_LONG
Converts an input value to an unsigned long value.The input can be a single- or multi-valued field or an expression. The input
type must be of a boolean, date, string or numeric type.Example:
ROW str1 = "2147483648", str2 = "2147483648.2", str3 = "foo"
| EVAL long1 = TO_UNSIGNED_LONG(str1), long2 = TO_ULONG(str2), long3 = TO_UL(str3)
Note that in this example, the last conversion of the string isnt
possible. When this happens, the result is a null value. In this case a
Warning header is added to the response. The header will provide information
on the source of the failure:"Line 1:133: evaluation of [TO_UL(str3)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.NumberFormatException: Character f is neither a decimal digit number, decimal point, nor \"e\" notation exponential mark."If the input parameter is of a date type, its value will be interpreted as
milliseconds since the Unix epoch,
converted to unsigned long.Boolean true will be converted to unsigned long 1, false to 0.Alias: TO_ULONG, TO_UL

View file

@ -0,0 +1,6 @@
TO_VERSION
Converts an input string to a version value. For example:
ROW v = TO_VERSION("1.2.3")
The input can be a single- or multi-valued field or an expression.Alias: TO_VERSupported types:

View file

@ -0,0 +1,8 @@
TRIM
Removes leading and trailing whitespaces from strings.
ROW message = " some text ", color = " red "
| EVAL message = TRIM(message)
| EVAL color = TRIM(color)
Supported types:

View file

@ -0,0 +1,51 @@
WHERE
Syntax
WHERE expression
Parameters
expression
A boolean expression.
DescriptionThe WHERE processing command produces a table that contains all the rows from
the input table for which the provided condition evaluates to true.Examples
FROM employees
| KEEP first_name, last_name, still_hired
| WHERE still_hired == true
Which, if still_hired is a boolean field, can be simplified to:
FROM employees
| KEEP first_name, last_name, still_hired
| WHERE still_hired
WHERE supports various functions. For example the
LENGTH function:
FROM employees
| KEEP first_name, last_name, height
| WHERE LENGTH(first_name) < 4
For a complete list of all functions, refer to Functions and operators.For NULL comparison, use the IS NULL and IS NOT NULL predicates:
FROM employees
| WHERE birth_date IS NULL
| KEEP first_name, last_name
| SORT first_name
| LIMIT 3
FROM employees
| WHERE is_rehired IS NOT NULL
| STATS COUNT(emp_no)
Use LIKE to filter data based on string patterns using wildcards. LIKE
usually acts on a field placed on the left-hand side of the operator, but it can
also act on a constant (literal) expression. The right-hand side of the operator
represents the pattern.The following wildcard characters are supported:
* matches zero or more characters.
? matches one character.
FROM employees
| WHERE first_name LIKE "?b*"
| KEEP first_name, last_name
Use RLIKE to filter data based on string patterns using using
regular expressions. RLIKE usually acts on a field placed on
the left-hand side of the operator, but it can also act on a constant (literal)
expression. The right-hand side of the operator represents the pattern.
FROM employees
| WHERE first_name RLIKE ".leja.*"
| KEEP first_name, last_name
The IN operator allows testing whether a field or expression equals
an element in a list of literals, fields or expressions:
ROW a = 1, b = 4, c = 3
| WHERE c-a IN (3, b / 2, a)
For a complete list of all operators, refer to Operators.

View file

@ -0,0 +1,305 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import Fs from 'fs';
import { keyBy, mapValues, once, pick } from 'lodash';
import pLimit from 'p-limit';
import Path from 'path';
import { lastValueFrom, Observable } from 'rxjs';
import { promisify } from 'util';
import type { FunctionRegistrationParameters } from '..';
import {
CreateChatCompletionResponseChunk,
FunctionVisibility,
MessageRole,
} from '../../../common/types';
import { concatenateOpenAiChunks } from '../../../common/utils/concatenate_openai_chunks';
import { processOpenAiStream } from '../../../common/utils/process_openai_stream';
import { streamIntoObservable } from '../../service/util/stream_into_observable';
const readFile = promisify(Fs.readFile);
const readdir = promisify(Fs.readdir);
const loadSystemMessage = once(async () => {
const data = await readFile(Path.join(__dirname, './system_message.txt'));
return data.toString('utf-8');
});
const loadEsqlDocs = once(async () => {
const dir = Path.join(__dirname, './docs');
const files = (await readdir(dir)).filter((file) => Path.extname(file) === '.txt');
if (!files.length) {
return {};
}
const limiter = pLimit(10);
return keyBy(
await Promise.all(
files.map((file) =>
limiter(async () => {
const data = (await readFile(Path.join(dir, file))).toString('utf-8');
const filename = Path.basename(file, '.txt');
const keyword = filename
.replace('esql-', '')
.replace('agg-', '')
.replaceAll('-', '_')
.toUpperCase();
return {
keyword: keyword === 'STATS_BY' ? 'STATS' : keyword,
data,
};
})
)
),
'keyword'
);
});
export function registerEsqlFunction({
client,
registerFunction,
resources,
}: FunctionRegistrationParameters) {
registerFunction(
{
name: 'execute_query',
contexts: ['core'],
visibility: FunctionVisibility.User,
description: 'Execute an ES|QL query',
parameters: {
type: 'object',
additionalProperties: false,
properties: {
query: {
type: 'string',
},
},
required: ['query'],
} as const,
},
async ({ arguments: { query } }) => {
const response = await (
await resources.context.core
).elasticsearch.client.asCurrentUser.transport.request({
method: 'POST',
path: '_query',
body: {
query,
},
});
return { content: response };
}
);
registerFunction(
{
name: 'esql',
contexts: ['core'],
description: `This function answers ES|QL related questions including query generation and syntax/command questions.`,
visibility: FunctionVisibility.System,
parameters: {
type: 'object',
additionalProperties: false,
properties: {
switch: {
type: 'boolean',
},
},
} as const,
},
async ({ messages, connectorId }, signal) => {
const [systemMessage, esqlDocs] = await Promise.all([loadSystemMessage(), loadEsqlDocs()]);
const withEsqlSystemMessage = (message?: string) => [
{
'@timestamp': new Date().toISOString(),
message: { role: MessageRole.System, content: `${systemMessage}\n${message ?? ''}` },
},
...messages.slice(1),
];
const source$ = streamIntoObservable(
await client.chat({
connectorId,
messages: withEsqlSystemMessage(),
signal,
stream: true,
functions: [
{
name: 'get_esql_info',
description:
'Use this function to get more information about syntax, commands and examples. Take a deep breath and reason about what commands and functions you expect to use. Do you need to group data? Request `STATS`. Extract data? Request `DISSECT` AND `GROK`. Convert a column based on a set of conditionals? Request `EVAL` and `CASE`.',
parameters: {
type: 'object',
properties: {
commands: {
type: 'array',
items: {
type: 'string',
},
description: 'A list of processing or source commands',
},
functions: {
type: 'array',
items: {
type: 'string',
},
description: 'A list of functions.',
},
},
required: ['commands', 'functions'],
},
},
],
functionCall: 'get_esql_info',
})
).pipe(processOpenAiStream(), concatenateOpenAiChunks());
const response = await lastValueFrom(source$);
const args = JSON.parse(response.message.function_call.arguments) as {
commands: string[];
functions: string[];
};
const keywords = args.commands.concat(args.functions).concat('SYNTAX').concat('OVERVIEW');
const messagesToInclude = mapValues(pick(esqlDocs, keywords), ({ data }) => data);
const esqlResponse$: Observable<CreateChatCompletionResponseChunk> = streamIntoObservable(
await client.chat({
messages: [
...withEsqlSystemMessage(
`Format every ES|QL query as Markdown:
\`\`\`esql
<query>
\`\`\`
Prefer to use commands and functions for which you have documentation.
Pay special attention to these instructions. Not following these instructions to the tee
will lead to excruciating consequences for the user.
#1
Directive: ONLY use aggregation functions in STATS commands, and use ONLY aggregation functions in stats commands, NOT in SORT or EVAL.
Rationale: Only aggregation functions are supported in STATS commands, and aggregation functions are only supported in STATS commands.
Action: Create new columns using EVAL first and then aggregate over them in STATS commands. Do not use aggregation functions anywhere else, such as SORT or EVAL.
Example: EVAL is_failure_as_number = CASE(event.outcome == "failure", 1, 0) | STATS total_failures = SUM(is_failure_as_number) BY my_grouping_name
#2
Directive: Use the \`=\` operator to create new columns in STATS and EVAL, DO NOT UNDER ANY CIRCUMSTANCES use \`AS\`.
Rationale: The \`=\` operator is used for aliasing. Using \`AS\` leads to syntax errors.
Action: When creating a new column in a command, use the = operator.
Example: STATS total_requests = COUNT(*)
#3
Directive: Use placeholder values for information that is missing.
Rationale: It is critical to generate a syntactically valid query.
Action: When you don't know the arguments to a function because information is missing, use placeholder values.
Example: "Here's an ES|QL query that generates a timeseries of 50 buckets calculating the average duration. I've used
"2023-01-23T12:15:00.000Z" and "2023-01-23T12:30:00.000Z" as placeholder values. Replace them with the start
and end date that work for your use case."
#4
Directive: Wrap string literals in double quotes.
Rationale: It is critical to generate a syntactically valid query.
Action: When using string literals in function calls, wrap them in double quotes, not single quotes.
Example: DATE_EXTRACT("year", @timestamp)
At the start of every message, YOU MUST, for every instruction that is relevant to the query you want to construct,
repeat its directives, verbatim, at the start of every message. Exclude the rationales, actions, and examples. Follow
it up by using a delimiter: --
Example:
#1: <directive>
#2: <directive>
#3: <directive>
--
Here is an ES|QL query that you can use:
<query>`
),
{
'@timestamp': new Date().toISOString(),
message: {
role: MessageRole.Assistant,
content: '',
function_call: {
name: 'get_esql_info',
arguments: JSON.stringify(args),
trigger: MessageRole.Assistant as const,
},
},
},
{
'@timestamp': new Date().toISOString(),
message: {
role: MessageRole.User,
name: 'get_esql_info',
content: JSON.stringify({
documentation: messagesToInclude,
}),
},
},
],
connectorId,
functions: [],
signal,
stream: true,
})
).pipe(processOpenAiStream());
return esqlResponse$.pipe((source) => {
return new Observable<CreateChatCompletionResponseChunk>((subscriber) => {
let cachedContent: string = '';
function includesDivider() {
const firstDividerIndex = cachedContent.indexOf('--');
return firstDividerIndex !== -1;
}
source.subscribe({
next: (message) => {
if (includesDivider()) {
subscriber.next(message);
}
cachedContent += message.choices[0].delta.content || '';
},
complete: () => {
if (!includesDivider()) {
subscriber.next({
created: 0,
id: '',
model: '',
object: 'chat.completion.chunk',
choices: [
{
delta: {
content: cachedContent,
},
},
],
});
}
subscriber.complete();
},
error: (error) => {
subscriber.error(error);
},
});
});
});
}
);
}

View file

@ -0,0 +1,220 @@
You are a helpful assistant for generating and executing ES|QL queries.
Your goal is to help the user construct and possibly execute an ES|QL
query for the Observability use cases, which often involve metrics, logs
and traces.
ES|QL is the Elasticsearch Query Language, that allows users of the
Elastic platform to iteratively explore data. An ES|QL query consists
of a series of commands, separated by pipes. Each query starts with
a source command, that selects or creates a set of data to start
processing. This source command is then followed by one or more
processing commands, which can transform the data returned by the
previous command.
ES|QL is not Elasticsearch SQL, nor is it anything like SQL. SQL
commands are not available in ES|QL. Make sure you write a query
using ONLY commands specified in this conversation.
# Syntax
An ES|QL query is composed of a source command followed by an optional
series of processing commands, separated by a pipe character: |. For
example:
<source-command>
| <processing-command1>
| <processing-command2>
Binary operators: ==, !=, <, <=, >, >=.
Logical operators are supported: AND, OR, NOT
Predicates: IS NULL, IS NOT NULL
Timestamp literal syntax: NOW() - 15 days, 24 hours, 1 week
## Source commands
Source commands select a data source. There are three source commands:
FROM (which selects an index), ROW (which creates data from the command)
and SHOW (which returns information about the deployment).
## Processing commands
ES|QL processing commands change an input table by adding, removing, or
changing rows and columns. The following commands are available:
- DISSECT: extracts structured data out of a string, using a dissect
pattern.
- DROP: drops one or more columns
- ENRICH: adds data from existing indices as new columns
- EVAL: adds a new column with calculated values. Supported functions for
EVAL are:
- Mathematical functions
- String functions
- Date-time functions
- Type conversation functions
- Conditional functions and expressions
- Multi-value functions
Aggregation functions are not supported for EVAL.
- GROK: extracts structured data out of a string, using a grok pattern
- KEEP: keeps one or more columns, drop the ones that are not kept
- LIMIT: returns the first n number of rows. The maximum value for this
is 10000.
- MV_EXPAND: expands multi-value columns into a single row per value
- RENAME: renames a column
- SORT: sorts the row in a table
- STATS ... BY: groups rows according to a common value and calculates
one or more aggregated values over the grouped rows. This commands only
supports aggregation functions, and no other functions or operators.
- WHERE: produces a table that contains all the rows from the input table
for which the provided condition returns true. WHERE supports the same
functions as EVAL.
## Functions and operators
### Aggregation functions
- AVG
- COUNT
- COUNT_DISTINCT
- MAX
- MEDIAN
- MEDIAN_ABSOLUTE_DEVIATION
- MIN
- PERCENTILE
- SUM
### Mathematical functions
- ABS
- ACOS
- ASIN
- ATAN
- ATAN2
- CEIL
- COS
- COSH
- E
- FLOOR
- LOG10
- PI
- POW
- ROUND
- SIN
- SINH
- SQRT
- TAN
- TANH
- TAU
### String functions
- CONCAT
- LEFT
- LENGTH
- LTRIM
- REPLACE
- RIGHT
- RTRIM
- SPLIT
- SUBSTRING
- TRIM
### Date-time functions
- AUTO_BUCKET
- DATE_EXTRACT
- DATE_FORMAT
- DATE_PARSE
- DATE_TRUNC
- NOW
### Type conversion functions
- TO_BOOLEAN
- TO_DATETIME
- TO_DEGREES
- TO_DOUBLE
- TO_INTEGER
- TO_IP
- TO_LONG
- TO_RADIANS
- TO_STRING
- TO_UNSIGNED_LONG
- TO_VERSION
### Conditional functions and expressions
- CASE
- COALESCE
- GREATEST
- LEAST
### Multivalue functions
- MV_AVG
- MV_CONCAT
- MV_COUNT
- MV_DEDUPE
- MV_MAX
- MV_MEDIAN
- MV_MIN
- MV_SUM
### Operators
- Binary operators
- Logical operators
- IS NULL and IS NOT NULL predicates
- CIDR_MATCH
- ENDS_WITH
- IN
- IS_FINITE
- IS_INFINITE
- IS_NAN
- LIKE
- RLIKE
- STARTS_WITH
Here are some example queries:
FROM employees
| WHERE still_hired == true
| EVAL hired = DATE_FORMAT("YYYY", hire_date)
| STATS avg_salary = AVG(salary) BY languages
| EVAL avg_salary = ROUND(avg_salary)
| EVAL lang_code = TO_STRING(languages)
| ENRICH languages_policy ON lang_code WITH lang = language_name
| WHERE lang IS NOT NULL
| KEEP avg_salary, lang
| SORT avg_salary ASC
| LIMIT 3
FROM employees
| EVAL trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000
| STATS c = count(languages.long) BY languages.long, trunk_worked_seconds
| SORT c desc, languages.long, trunk_worked_seconds
ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1"
| DISSECT a "%{date} - %{msg} - %{ip}"
| KEEP date, msg, ip
| EVAL date = TO_DATETIME(date)
FROM employees
| WHERE first_name LIKE "?b*"
| KEEP first_name, last_name
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| EVAL bucket = AUTO_BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| STATS AVG(salary) BY bucket
| SORT bucket
ROW a = 1, b = "two", c = null
FROM employees
| EVAL is_recent_hire = CASE(hire_date <= "2023-01-01T00:00:00Z", 1, 0)
| STATS total_recent_hires = SUM(is_recent_hire), total_hires = COUNT(*) BY country
| EVAL recent_hiring_rate = total_recent_hires / total_hires
FROM logs-*
| WHERE @timestamp <= NOW() - 24 hours
// divide data in 1 hour buckets
| EVAL bucket = DATE_TRUNC(1 hour, @timestamp)
// convert a keyword field into a numeric field to aggregate over it
| EVAL is_5xx = CASE(http.response.status_code >= 500, 1, 0)
// count total events and failed events to calculate a rate
| STATS total_events = COUNT(*), total_failures = SUM(is_5xx) BY host.hostname, bucket
| EVAL failure_rate_per_host = total_failures / total_events
| DROP total_events, total_failures

Some files were not shown because too many files have changed in this diff Show more