[inference] NL-to-ESQL: improve doc generation (#192378)

## Summary

Follow-up of https://github.com/elastic/kibana/pull/190433

Fix [#192762](https://github.com/elastic/kibana/issues/192762)

- Cleanup and refactor the documentation generation script
- Make some tweak to the documentation to improve efficiency and make a
better user of tokens
- Perform human review of the generated content to make sure everything
is accurate

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Pierre Gayvallet 2024-09-13 09:29:29 +02:00 committed by GitHub
parent f2f5096c76
commit 3226eb691a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
133 changed files with 3599 additions and 1532 deletions

View file

@ -192,7 +192,7 @@ const buildTestDefinitions = (): Section[] => {
{
title: 'Generates a query to show employees filtered by name and grouped by hire_date',
question: `From the employees index, I want to see how many employees with a "B" in their first name
where hired each month over the past 2 years.
were hired each month over the past 2 years.
Assume the following fields:
- hire_date
- first_name
@ -208,10 +208,10 @@ const buildTestDefinitions = (): Section[] => {
(which can be read the same backward and forward), and then return their last name and first name
- last_name
- first_name`,
expected: `FROM employees
| EVAL reversed_last_name = REVERSE(last_name)
| WHERE TO_LOWER(last_name) == TO_LOWER(reversed_last_name)
| KEEP last_name, first_name`,
criteria: [
`The assistant should not provide an ES|QL query, and explicitly mention that there is no
way to check for palindromes using ES|QL.`,
],
},
{
title: 'Generates a query to show the top 10 domains by doc count',

View file

@ -0,0 +1,288 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import Fs from 'fs/promises';
import Path from 'path';
import fastGlob from 'fast-glob';
import $, { load, Cheerio, AnyNode } from 'cheerio';
import { partition } from 'lodash';
import { ToolingLog } from '@kbn/tooling-log';
import pLimit from 'p-limit';
import { ScriptInferenceClient } from '../util/kibana_client';
import { convertToMarkdownPrompt } from './prompts/convert_to_markdown';
import { bindOutput, PromptCaller } from './utils/output_executor';
/**
* The pages that will be extracted but only used as context
* for the LLM for the enhancement tasks of the documentation entries.
*/
const contextArticles = [
'esql.html',
'esql-syntax.html',
'esql-kibana.html',
'esql-query-api.html',
'esql-limitations.html',
'esql-cross-clusters.html',
'esql-examples.html',
'esql-metadata-fields.html',
'esql-multi-index.html',
];
interface ExtractedPage {
sourceFile: string;
name: string;
content: string;
}
export interface ExtractedCommandOrFunc {
name: string;
markdownContent: string;
command: boolean;
}
export interface ExtractionOutput {
commands: ExtractedCommandOrFunc[];
functions: ExtractedCommandOrFunc[];
pages: ExtractedPage[];
skippedFile: string[];
}
export async function extractDocEntries({
builtDocsDir,
log,
inferenceClient,
}: {
builtDocsDir: string;
log: ToolingLog;
inferenceClient: ScriptInferenceClient;
}): Promise<ExtractionOutput> {
const files = await fastGlob(`${builtDocsDir}/html/en/elasticsearch/reference/master/esql*.html`);
if (!files.length) {
throw new Error('No files found');
}
const output: ExtractionOutput = {
commands: [],
functions: [],
pages: [],
skippedFile: [],
};
const executePrompt = bindOutput({
output: inferenceClient.output,
connectorId: inferenceClient.getConnectorId(),
});
const limiter = pLimit(10);
await Promise.all(
files.map(async (file) => {
return await processFile({
file,
log,
executePrompt,
output,
limiter,
});
})
);
return output;
}
async function processFile({
file: fileFullPath,
output,
executePrompt,
log,
limiter,
}: {
file: string;
output: ExtractionOutput;
executePrompt: PromptCaller;
log: ToolingLog;
limiter: pLimit.Limit;
}) {
const basename = Path.basename(fileFullPath);
const fileContent = (await Fs.readFile(fileFullPath)).toString('utf-8');
if (basename === 'esql-commands.html') {
// process commands
await processCommands({
fileContent,
log,
output,
limiter,
executePrompt,
});
} else if (basename === 'esql-functions-operators.html') {
// process functions / operators
await processFunctionsAndOperators({
fileContent,
log,
output,
limiter,
executePrompt,
});
} else if (contextArticles.includes(basename)) {
const $element = load(fileContent)('*');
output.pages.push({
sourceFile: basename,
name: basename === 'esql.html' ? 'overview' : basename.substring(5, basename.length - 5),
content: getSimpleText($element),
});
} else {
output.skippedFile.push(basename);
}
}
async function processFunctionsAndOperators({
fileContent,
output,
executePrompt,
log,
limiter,
}: {
fileContent: string;
output: ExtractionOutput;
executePrompt: PromptCaller;
log: ToolingLog;
limiter: pLimit.Limit;
}) {
const $element = load(fileContent.toString())('*');
const sections = extractSections($element);
const searches = [
'Binary operators',
'Equality',
'Inequality',
'Less than',
'Less than or equal to',
'Greater than',
'Greater than or equal to',
'Add +',
'Subtract -',
'Multiply *',
'Divide /',
'Modulus %',
'Unary operators',
'Logical operators',
'IS NULL and IS NOT NULL',
'Cast (::)',
];
const matches = ['IN', 'LIKE', 'RLIKE'];
const [operatorSections, allOtherSections] = partition(sections, (section) => {
return (
matches.includes(section.title) ||
searches.some((search) => section.title.toLowerCase().startsWith(search.toLowerCase()))
);
});
const functionSections = allOtherSections.filter(({ title }) => !!title.match(/^[A-Z_]+$/));
const markdownFiles = await Promise.all(
functionSections.map(async (section) => {
return limiter(async () => {
return {
name: section.title,
markdownContent: await executePrompt(
convertToMarkdownPrompt({ htmlContent: section.content })
),
command: false,
};
});
})
);
output.functions.push(...markdownFiles);
output.pages.push({
sourceFile: 'esql-functions-operators.html',
name: 'operators',
content: operatorSections.map(({ title, content }) => `${title}\n${content}`).join('\n'),
});
}
async function processCommands({
fileContent,
output,
executePrompt,
log,
limiter,
}: {
fileContent: string;
output: ExtractionOutput;
executePrompt: PromptCaller;
log: ToolingLog;
limiter: pLimit.Limit;
}) {
const $element = load(fileContent.toString())('*');
const sections = extractSections($element).filter(({ title }) => !!title.match(/^[A-Z_]+$/));
const markdownFiles = await Promise.all(
sections.map(async (section) => {
return limiter(async () => {
return {
name: section.title,
markdownContent: await executePrompt(
convertToMarkdownPrompt({ htmlContent: section.content })
),
command: true,
};
});
})
);
output.commands.push(...markdownFiles);
}
function getSimpleText($element: Cheerio<AnyNode>) {
$element.remove('.navfooter');
$element.remove('#sticky_content');
$element.remove('.edit_me');
$element.find('code').each(function () {
$(this).replaceWith('`' + $(this).text() + '`');
});
return $element
.find('.section,section,.part')
.last()
.text()
.replaceAll(/([\n]\s*){2,}/g, '\n');
}
export function extractSections(cheerio: Cheerio<AnyNode>) {
const sections: Array<{
title: string;
content: string;
}> = [];
cheerio.find('.section .position-relative').each((index, element) => {
const untilNextHeader = $(element).nextUntil('.position-relative');
const title = $(element).text().trim().replace('edit', '');
untilNextHeader.find('svg defs').remove();
untilNextHeader.find('.console_code_copy').remove();
untilNextHeader.find('.imageblock').remove();
untilNextHeader.find('table').remove();
const htmlContent = untilNextHeader
.map((i, node) => $(node).prop('outerHTML'))
.toArray()
.join('');
sections.push({
title: title === 'STATS ... BY' ? 'STATS' : title,
content: `<div><h1>${title}</h1> ${htmlContent}</div>`,
});
});
return sections;
}

View file

@ -1,41 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import $, { AnyNode, Cheerio } from 'cheerio';
export function extractSections(cheerio: Cheerio<AnyNode>) {
const sections: Array<{
title: string;
content: string;
}> = [];
cheerio.find('.section h3').each((index, element) => {
let untilNextHeader = $(element).nextUntil('h3');
if (untilNextHeader.length === 0) {
untilNextHeader = $(element).parents('.titlepage').nextUntil('h3');
}
if (untilNextHeader.length === 0) {
untilNextHeader = $(element).parents('.titlepage').nextAll();
}
const title = $(element).text().trim().replace('edit', '');
untilNextHeader.find('table').remove();
untilNextHeader.find('svg').remove();
const text = untilNextHeader.text();
const content = text.replaceAll(/([\n]\s*){2,}/g, '\n');
sections.push({
title: title === 'STATS ... BY' ? 'STATS' : title,
content: `${title}\n\n${content}`,
});
});
return sections;
}

View file

@ -0,0 +1,146 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import pLimit from 'p-limit';
import { ToolingLog } from '@kbn/tooling-log';
import { ScriptInferenceClient } from '../util/kibana_client';
import type { ExtractionOutput } from './extract_doc_entries';
import { createDocumentationPagePrompt, rewriteFunctionPagePrompt } from './prompts';
import { bindOutput } from './utils/output_executor';
export interface FileToWrite {
name: string;
content: string;
}
interface PageGeneration {
outputFileName: string;
sourceFile: string;
instructions: string;
}
export const generateDoc = async ({
extraction,
inferenceClient,
}: {
extraction: ExtractionOutput;
inferenceClient: ScriptInferenceClient;
log: ToolingLog;
}) => {
const filesToWrite: FileToWrite[] = [];
const limiter = pLimit(10);
const callOutput = bindOutput({
connectorId: inferenceClient.getConnectorId(),
output: inferenceClient.output,
});
const documentation = documentationForFunctionRewrite(extraction);
await Promise.all(
[...extraction.commands, ...extraction.functions].map(async (func) => {
return limiter(async () => {
const rewrittenContent = await callOutput(
rewriteFunctionPagePrompt({
content: func.markdownContent,
documentation,
command: func.command,
})
);
filesToWrite.push({
name: fileNameForFunc(func.name),
content: rewrittenContent,
});
});
})
);
const pageContentByName = (pageName: string) =>
extraction.pages.find((page) => page.name === pageName)!.content;
const pages: PageGeneration[] = [
{
sourceFile: 'syntax',
outputFileName: 'esql-syntax.txt',
instructions: `
Generate a description of Elastic ES|QL syntax. Make sure to reuse as much as possible the provided content of file and be as complete as possible.
For timespan literals, generate at least five examples of full ES|QL queries, using a mix commands and functions, using different intervals and units.
**Make sure you use timespan literals, such as \`1 day\` or \`24h\` or \`7 weeks\` in these examples**.
Combine ISO timestamps with time span literals and NOW().
Make sure the example queries are using different combinations of syntax, commands and functions for each, and use BUCKET at least twice
When using DATE_TRUNC, make sure you DO NOT wrap the timespan in single or double quotes.
Do not use the Cast operator. In your examples, make sure to only use commands and functions that exist in the provided documentation.
`,
},
{
sourceFile: 'overview',
outputFileName: 'esql-overview.txt',
instructions: `Generate a description of ES|QL as a language. Ignore links to other documents.
From Limitations, include the known limitations, but ignore limitations that are specific to a command.
Include a summary of what is mentioned in the CROSS_CLUSTER, Kibana and API sections.
Explain how to use the REST API with an example and mention important information for Kibana usage and cross cluster querying.`,
},
{
sourceFile: 'operators',
outputFileName: 'esql-operators.txt',
instructions: `
Generate a document describing the operators.
For each type of operator (binary, unary, logical, and the remaining), generate a section.
For each operator, generate at least one full ES|QL query as an example of its usage.
Keep it short, e.g. only a \`\`\`esql\\nFROM ...\\n| WHERE ... \`\`\`
`,
},
];
await Promise.all(
pages.map(async (page) => {
return limiter(async () => {
const pageContent = await callOutput(
createDocumentationPagePrompt({
documentation,
content: pageContentByName(page.sourceFile),
specificInstructions: page.instructions,
})
);
filesToWrite.push({
name: page.outputFileName,
content: pageContent,
});
});
})
);
return filesToWrite;
};
const fileNameForFunc = (funcName: string) =>
`esql-${funcName.replaceAll(' ', '-').toLowerCase()}.txt`;
const documentationForFunctionRewrite = (extraction: ExtractionOutput) => {
return JSON.stringify(
{
pages: extraction.pages.filter((page) => {
return !['query-api', 'cross-clusters'].includes(page.name);
}),
commands: extraction.commands,
functions: extraction.functions.filter((func) => {
return [
'BUCKET',
'COUNT',
'COUNT_DISTINCT',
'CASE',
'DATE_EXTRACT',
'DATE_DIFF',
'DATE_TRUNC',
].includes(func.name);
}),
},
undefined,
2
);
};

View file

@ -4,19 +4,13 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { run } from '@kbn/dev-cli-runner';
import { ESQLMessage, EditorError, getAstAndSyntaxErrors } from '@kbn/esql-ast';
import { validateQuery } from '@kbn/esql-validation-autocomplete';
import $, { load } from 'cheerio';
import { SingleBar } from 'cli-progress';
import FastGlob from 'fast-glob';
import Fs from 'fs/promises';
import { compact, once, partition } from 'lodash';
import pLimit from 'p-limit';
import Path from 'path';
import git, { SimpleGitProgressEvent } from 'simple-git';
import yargs, { Argv } from 'yargs';
import { lastValueFrom } from 'rxjs';
import { REPO_ROOT } from '@kbn/repo-info';
import { INLINE_ESQL_QUERY_REGEX } from '../../common/tasks/nl_to_esql/constants';
import { correctCommonEsqlMistakes } from '../../common/tasks/nl_to_esql/correct_common_esql_mistakes';
@ -24,7 +18,9 @@ import { connectorIdOption, elasticsearchOption, kibanaOption } from '../util/cl
import { getServiceUrls } from '../util/get_service_urls';
import { KibanaClient } from '../util/kibana_client';
import { selectConnector } from '../util/select_connector';
import { extractSections } from './extract_sections';
import { syncBuiltDocs } from './sync_built_docs_repo';
import { extractDocEntries } from './extract_doc_entries';
import { generateDoc, FileToWrite } from './generate_doc';
yargs(process.argv.slice(2))
.command(
@ -38,16 +34,16 @@ yargs(process.argv.slice(2))
default: process.env.LOG_LEVEL || 'info',
choices: ['info', 'debug', 'silent', 'verbose'],
})
.option('only', {
describe: 'Only regenerate these files',
string: true,
array: true,
})
.option('dryRun', {
describe: 'Do not write or delete any files',
boolean: true,
default: false,
})
.option('syncDocs', {
describe: 'Sync doc repository before generation',
boolean: true,
default: true,
})
.option('kibana', kibanaOption)
.option('elasticsearch', elasticsearchOption)
.option('connectorId', connectorIdOption),
@ -63,431 +59,83 @@ yargs(process.argv.slice(2))
const kibanaClient = new KibanaClient(log, serviceUrls.kibanaUrl);
const connectors = await kibanaClient.getConnectors();
if (!connectors.length) {
throw new Error('No connectors found');
}
const connector = await selectConnector({
connectors,
preferredId: argv.connectorId,
log,
});
log.info(`Using connector ${connector.connectorId}`);
const chatClient = kibanaClient.createInferenceClient({
connectorId: connector.connectorId,
});
log.info(`Using connector ${connector.connectorId}`);
const builtDocsDir = Path.join(REPO_ROOT, '../built-docs');
log.info(`Looking in ${builtDocsDir} for built-docs repository`);
log.debug(`Looking in ${builtDocsDir} for built-docs repository`);
const dirExists = await Fs.stat(builtDocsDir);
const getProgressHandler = () => {
let stage: string = '';
let method: string = '';
const loader: SingleBar = new SingleBar({
barsize: 25,
format: `{phase} {bar} {percentage}%`,
});
const start = once(() => {
loader.start(100, 0, { phase: 'initializing' });
});
return {
progress: (event: SimpleGitProgressEvent) => {
start();
if (event.stage !== stage || event.method !== method) {
stage = event.stage;
method = event.method;
}
loader.update(event.progress, { phase: event.method + '/' + event.stage });
},
stop: () => loader.stop(),
};
};
if (!dirExists) {
log.info('Cloning built-docs repo. This will take a while.');
const { progress, stop } = getProgressHandler();
await git(Path.join(builtDocsDir, '..'), {
progress,
}).clone(`https://github.com/elastic/built-docs`, builtDocsDir, ['--depth', '1']);
stop();
if (argv.syncDocs) {
log.info(`Running sync for built-docs repository in ${builtDocsDir}...`);
await syncBuiltDocs({ builtDocsDir, log });
}
const { progress, stop } = getProgressHandler();
log.info(`Retrieving and converting documentation from ${builtDocsDir}...`);
const extraction = await extractDocEntries({
builtDocsDir,
inferenceClient: chatClient,
log,
});
const builtDocsGit = git(builtDocsDir, { progress });
log.info(`Rewriting documentation...`);
const docFiles = await generateDoc({
extraction,
inferenceClient: chatClient,
log,
});
log.debug('Initializing simple-git');
await builtDocsGit.init();
log.info('Making sure built-docs is up to date');
await builtDocsGit.pull();
const files = FastGlob.sync(
`${builtDocsDir}/html/en/elasticsearch/reference/master/esql*.html`
);
if (!files) {
throw new Error('No files found');
}
const fsLimiter = pLimit(10);
stop();
log.info(`Processing ${files.length} files`);
async function extractContents(
file: string
): Promise<
Array<{ title: string; content: string; instructions?: string; skip?: boolean }>
> {
const fileContents = await Fs.readFile(file);
const $element = load(fileContents.toString())('*');
function getSimpleText() {
$element.remove('.navfooter');
$element.remove('#sticky_content');
$element.find('code').each(function () {
$(this).replaceWith('`' + $(this).text() + '`');
});
return $element
.find('.section,section,.part')
.last()
.text()
.replaceAll(/([\n]\s*){2,}/g, '\n');
}
switch (Path.basename(file)) {
case 'esql-commands.html':
return extractSections($element)
.filter(({ title }) => !!title.match(/^[A-Z_]+$/))
.map((doc) => ({
...doc,
instructions: `For this command, generate a Markdown document containing the following sections:
## {Title}
{What this command does, the use cases, and any limitations from this document or esql-limitations.txt}
### Examples
{example ES|QL queries using this command. prefer to copy mentioned queries, but make sure there are at least three different examples, focusing on different usages of this command}`,
}));
case 'esql-limitations.html':
return [
{
title: 'Limitations',
content: getSimpleText(),
skip: true,
},
];
case 'esql-syntax.html':
return [
{
title: 'Syntax',
content: getSimpleText(),
instructions: `Generate a description of ES|QL syntax. Be as complete as possible.
For timespan literals, generate at least five examples of full ES|QL queries, using a mix commands and functions, using different intervals and units.
**Make sure you use timespan literals, such as \`1 day\` or \`24h\` or \`7 weeks\` in these examples**.
Combine ISO timestamps with time span literals and NOW().
Make sure the example queries are using different combinations of syntax, commands and functions for each.
When using DATE_TRUNC, make sure you DO NOT wrap the timespan in single or double quotes.
Do not use the Cast operator.
`,
},
];
case 'esql.html':
return [
{
title: 'Overview',
content: getSimpleText().replace(
/The ES\|QL documentation is organized in these sections(.*)$/,
''
),
instructions: `Generate a description of ES|QL as a language. Ignore links to other documents. From Limitations, include the known limitations, but ignore limitations that are specific to a command.
Include a summary of what is mentioned in the CROSS_CLUSTER, Kibana and API sections. Explain how to use the REST API with an example and mention important information for Kibana usage and cross cluster querying.`,
},
];
case 'esql-cross-clusters.html':
return [
{
title: 'CROSS_CLUSTER',
content: getSimpleText(),
skip: true,
},
];
case 'esql-query-api.html':
return [
{
title: 'API',
content: getSimpleText(),
skip: true,
},
];
case 'esql-kibana.html':
return [
{
title: 'Kibana',
content: getSimpleText(),
skip: true,
},
];
case 'esql-functions-operators.html':
const sections = extractSections($element);
const searches = [
'Binary operators',
'Equality',
'Inequality',
'Less than',
'Greater than',
'Add +',
'Subtract -',
'Multiply *',
'Divide /',
'Modulus %',
'Unary operators',
'Logical operators',
'IS NULL',
'IS NOT NULL',
'Cast (::)',
];
const matches = ['IN', 'LIKE', 'RLIKE'];
const [operatorSections, allOtherSections] = partition(sections, (section) => {
return (
matches.includes(section.title) ||
searches.some((search) =>
section.title.toLowerCase().startsWith(search.toLowerCase())
)
log.info(`Correcting common ESQL mistakes...`);
docFiles.forEach((docFile) => {
docFile.content = docFile.content.replaceAll(
INLINE_ESQL_QUERY_REGEX,
(match, query) => {
const correctionResult = correctCommonEsqlMistakes(query);
if (correctionResult.isCorrection) {
log.info(
`Corrected ES|QL, from:\n${correctionResult.input}\nto:\n${correctionResult.output}`
);
});
return allOtherSections
.map((section) => ({
...section,
instructions: `For each function, use the following template:
## {Title}
{description of what this function does}
### Examples
{at least two examples of full ES|QL queries. prefer the ones in the document verbatim}
`,
}))
.concat({
title: 'Operators',
content: operatorSections
.map(({ title, content }) => `${title}\n${content}`)
.join('\n'),
instructions:
'Generate a document describing the operators. For each type of operator (binary, unary, logical, and the remaining), generate a section. For each operator, generate at least one full ES|QL query as an example of its usage. Keep it short, e.g. only a ```esql\nFROM ...\n| WHERE ... ```',
});
default:
log.debug('Dropping file', file);
break;
}
return [];
}
const documents = await Promise.all(
files.map((file) => fsLimiter(() => extractContents(file)))
);
const flattened = documents.flat().filter((doc) => {
// ES|QL aggregate functions, ES|QL mathematical functions, ES|QL string functions etc
const isOverviewArticle =
doc.title.startsWith('ES|QL') ||
doc.title === 'Functions overview' ||
doc.title === 'Operators overview';
if (isOverviewArticle) {
log.debug('Dropping overview article', doc.title);
}
return !isOverviewArticle;
}
return '```esql\n' + correctionResult.output + '\n```';
}
);
});
const outDir = Path.join(__dirname, '../../server/tasks/nl_to_esql/esql_docs');
if (!argv.dryRun) {
log.info(`Writing ${flattened.length} documents to disk to ${outDir}`);
}
log.info(`Writing ${docFiles.length} documents to disk to ${outDir}`);
if (!argv.only && !argv.dryRun) {
log.debug(`Clearing ${outDir}`);
await Fs.readdir(outDir, { recursive: true })
.then((filesInDir) => {
const limiter = pLimit(10);
return Promise.all(filesInDir.map((file) => limiter(() => Fs.unlink(file))));
})
.catch((error) => (error.code === 'ENOENT' ? Promise.resolve() : error));
}
if (!argv.dryRun) {
await Fs.mkdir(outDir).catch((error) =>
error.code === 'EEXIST' ? Promise.resolve() : error
);
}
const chatLimiter = pLimit(10);
const allContent = flattened
.map((doc) => `## ${doc.title}\n\n${doc.content}\n\(end of ${doc.title})`)
.join('\n\n');
const allErrors: Array<{
title: string;
fileName: string;
errors: Array<{ query: string; errors: Array<ESQLMessage | EditorError> }>;
}> = [];
async function writeFile(doc: { title: string; content: string }) {
const fileName = Path.join(
outDir,
`esql-${doc.title.replaceAll(' ', '-').toLowerCase()}.txt`
await Promise.all(
docFiles.map(async (file) => {
const fileName = Path.join(outDir, file.name);
await Fs.writeFile(fileName, file.content);
})
);
doc.content = doc.content.replaceAll(INLINE_ESQL_QUERY_REGEX, (match, query) => {
const correctionResult = correctCommonEsqlMistakes(query);
if (correctionResult.isCorrection) {
log.info(
`Corrected ES|QL, from:\n${correctionResult.input}\nto:\n${correctionResult.output}`
);
}
return '```esql\n' + correctionResult.output + '\n```';
});
const queriesWithSyntaxErrors = compact(
await Promise.all(
Array.from(doc.content.matchAll(INLINE_ESQL_QUERY_REGEX)).map(
async ([match, query]) => {
const { errors, warnings } = await validateQuery(query, getAstAndSyntaxErrors, {
// setting this to true, we don't want to validate the index / fields existence
ignoreOnMissingCallbacks: true,
});
const all = [...errors, ...warnings];
if (all.length) {
log.warning(
`Error in ${fileName}:\n${JSON.stringify({ errors, warnings }, null, 2)}`
);
return {
errors: all,
query,
};
}
}
)
)
);
if (queriesWithSyntaxErrors.length) {
allErrors.push({
title: doc.title,
fileName,
errors: queriesWithSyntaxErrors,
});
}
if (!argv.dryRun) {
await Fs.writeFile(fileName, doc.content);
}
}
await Promise.all(
flattened.map(async (doc) => {
if (doc.skip || (argv.only && !argv.only.includes(doc.title))) {
return undefined;
}
if (!doc.instructions) {
return fsLimiter(() => writeFile(doc));
}
return chatLimiter(async () => {
try {
const response = await lastValueFrom(
chatClient.output('generate_markdown', {
connectorId: chatClient.getConnectorId(),
system: `## System instructions
Your job is to generate Markdown documentation off of content that is scraped from the Elasticsearch website.
The documentation is about ES|QL, or the Elasticsearch Query Language, which is a new piped language that can be
used for loading, extracting and transforming data stored in Elasticsearch. The audience for the documentation
you generate, is intended for an LLM, to be able to answer questions about ES|QL or generate and execute ES|QL
queries.
If you need to generate example queries, make sure they are different, in that they use different commands, and arguments,
to show case how a command, function or operator can be used in different ways.
When you generate a complete ES|QL query, always wrap it in code blocks with the language being \`esql\`.. Here's an example:
\`\`\`esql
FROM logs-*
| WHERE @timestamp <= NOW()
\`\`\`
**If you are describing the syntax of a command, only wrap it in SINGLE backticks.
Leave out the esql part**. Eg:
### Syntax:
\`DISSECT input "pattern" [APPEND_SEPARATOR="<separator>"]\`
#### Context
These is the entire documentation, use it as context for answering questions
${allContent}
`,
input: `Generate Markdown for the following document:
## ${doc.title}
### Instructions
${doc.instructions}
### Content of file
${doc.content}`,
})
);
return fsLimiter(() =>
writeFile({ title: doc.title, content: response.content! })
);
} catch (error) {
log.error(`Error processing ${doc.title}: ${error.message}`);
}
});
})
);
log.info(`Checking syntax...`);
const syntaxErrors = (
await Promise.all(docFiles.map(async (file) => await findEsqlSyntaxError(file)))
).flat();
log.warning(
`Please verify the following queries that had syntax errors\n${JSON.stringify(
allErrors,
syntaxErrors,
null,
2
)}`
@ -498,3 +146,31 @@ yargs(process.argv.slice(2))
}
)
.parse();
interface SyntaxError {
query: string;
errors: Array<ESQLMessage | EditorError>;
}
const findEsqlSyntaxError = async (doc: FileToWrite): Promise<SyntaxError[]> => {
return Array.from(doc.content.matchAll(INLINE_ESQL_QUERY_REGEX)).reduce(
async (listP, [match, query]) => {
const list = await listP;
const { errors, warnings } = await validateQuery(query, getAstAndSyntaxErrors, {
// setting this to true, we don't want to validate the index / fields existence
ignoreOnMissingCallbacks: true,
});
const all = [...errors, ...warnings];
if (all.length) {
list.push({
errors: all,
query,
});
}
return list;
},
Promise.resolve([] as SyntaxError[])
);
};

View file

@ -0,0 +1,46 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { PromptTemplate } from '../utils/output_executor';
/**
* Prompt used to ask the LLM to convert a raw html content to markdown.
*/
export const convertToMarkdownPrompt: PromptTemplate<{
htmlContent: string;
}> = ({ htmlContent }) => {
return {
system: `
You are a helpful assistant specialized
in converting html fragment extracted from online documentation into equivalent Markdown documents.
Please respond exclusively with the requested Markdown document, without
adding your thoughts or any non-markdown reply.
- Ignore all links (just use their text content when relevant)
- Blockquotes (>) are not wanted, so don't generate any
- Use title2 (##) for the main title of the document
- Use title3 (###) for the section titles, such as "Syntax", "Parameters", "Examples" and so on.
- Use title4 (####) for subsections, such as parameter names or example titles
- HTML tables that are below code snippets are example of results. Please convert them to Markdown table
- for <svg> elements, only keep the text content of the underlying <text> elements
All the code snippets are for ESQL, so please use the following format for all snippets:
\`\`\`esql
<code example>
\`\`\`
`,
input: `
Here is the html documentation to convert to markdown:
\`\`\`html
${htmlContent}
\`\`\`
`,
};
};

View file

@ -0,0 +1,60 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { PromptTemplate } from '../utils/output_executor';
/**
* Prompt used to ask the LLM to create a documentation page from the provided content
*/
export const createDocumentationPagePrompt: PromptTemplate<{
content: string;
documentation: string;
specificInstructions: string;
}> = ({ content, documentation, specificInstructions }) => {
return {
system: `
You are a helpful assistant specialized in checking and improving technical documentation
about ES|QL, the new Query language from Elasticsearch written in Markdown format.
Your job is to generate technical documentation in Markdown format based on content that is scraped from the Elasticsearch website.
The documentation is about ES|QL, or the Elasticsearch Query Language, which is a new piped language that can be
used for loading, extracting and transforming data stored in Elasticsearch. The audience for the documentation
you generate, is intended for an LLM, to be able to answer questions about ES|QL or generate and execute ES|QL
queries.
If you need to generate example queries, make sure they are different, in that they use different commands, and arguments,
to show case how a command, function or operator can be used in different ways.
When you generate a complete ES|QL query, always wrap it in code blocks with the language being \`esql\`.. Here's an example:
\`\`\`esql
FROM logs-*
| WHERE @timestamp <= NOW()
\`\`\`
#### Context
This is the entire documentation, in JSON format. Use it as context for answering questions
\`\`\`json
${documentation}
\`\`\`
`,
input: `
${specificInstructions}
Use this document as main source to generate your markdown document:
\`\`\`markdown
${content}
\`\`\`
But also add relevant content from the documentation you have access to.
`,
};
};

View file

@ -0,0 +1,10 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export { createDocumentationPagePrompt } from './create_documentation_page';
export { rewriteFunctionPagePrompt } from './rewrite_function_page';
export { convertToMarkdownPrompt } from './convert_to_markdown';

View file

@ -0,0 +1,238 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { PromptTemplate } from '../utils/output_executor';
/**
* Prompt used to ask the LLM to improve a function or command page
*/
export const rewriteFunctionPagePrompt: PromptTemplate<{
content: string;
documentation: string;
command: boolean;
}> = ({ content, documentation, command: isCommand }) => {
const entityName = isCommand ? 'command' : 'function';
return {
system: `
You are a helpful assistant specialized in rewriting technical documentation articles
about ES|QL, the new Query language from Elasticsearch written in Markdown format.
An ES|QL query is composed of a source command followed by an optional
series of processing commands, separated by a pipe character: |. For
example:
<source-command>
| <processing-command1>
| <processing-command2>
An example of what an ES|QL query looks like:
\`\`\`esql
FROM employees
| WHERE still_hired == true
| EVAL hired = DATE_FORMAT("YYYY", hire_date)
| STATS avg_salary = AVG(salary) BY languages
| EVAL avg_salary = ROUND(avg_salary)
| EVAL lang_code = TO_STRING(languages)
| ENRICH languages_policy ON lang_code WITH lang = language_name
| WHERE lang IS NOT NULL
| KEEP avg_salary, lang
| SORT avg_salary ASC
| LIMIT 3
\`\`\`
You will be given a technical documentation article about a specific ES|QL ${entityName},
please rewrite it using the following template:
\`\`\`markdown
# {title of the ${entityName}}
{short description of what the ${entityName} does}
## Syntax
{syntax used for the ${entityName}. Just re-use the content from the original article}
### Parameters
{foreach parameters}
#### {parameter name}
{if the parameter is optional, mention it. otherwise don't mention it's not optional}
{short explanation of what the parameter does}
{end foreach argument}
## Examples
{list of examples from the source doc}
\`\`\`
Additional instructions:
- Follow the template, and DO NOT add any section, unless explicitly asked for in the instructions.
- DO NOT modify the main title of the page, it must only be the command name, e.g. "## AVG"
- Do NOT mention "ES|QL" in the description
- GOOD: "The AVG ${entityName} calculates [...]"
- BAD: "The AVG ${entityName} in ES|QL calculates [...]"
- Move the description section at the beginning of the file (but remove the title).
- This means there is no longer a "Description" section after the "Parameters" one
- For the "Syntax" section, if you need to escape code blocks, use single ticks and not triple ticks
- GOOD: \`AVG(number)\`
- BAD: \`\`\`AVG(number)\`\`\`
- For the "Parameters" section
- if there is a description of the parameter in the source document, re-use it. Else, use your own words.
- For the "Examples" section:
- Re-use as much as possible examples from the source document
- DO NOT modify the syntax of the examples. The syntax is correct, don't try to fix it.
- For each example, add a short, entity-dense sentence explaining what the example does.
- GOOD: "Calculate the average salary change"
- BAD: "Calculate the average salary change. This example uses the \`MV_AVG\` function to first average the multiple values per employee, and then uses the result with the \`AVG\` function:"
- If any limitations impacting this ${entityName} are mentioned in this document or other ones, such
as the "esql-limitations.html" file, please add a "Limitations" section at the bottom of the file
and mention them. Otherwise, don't say or mention that there are no limitations.
- When you generate a complete ES|QL query for the examples, always wrap it in code blocks
with the language being \`esql\`.
An example of rewrite would be:
Source:
/////
${source}
/////
Output:
/////
${output}
/////
Please answer exclusively with the content of the output document, without any additional messages,
information, though or reasoning. DO NOT wrap the output with \`\`\`markdown.
The full documentation, in JSON format:
\`\`\`json
${documentation}
\`\`\`
Please use it to search for limitations or additional information or examples when rewriting the article.
`,
input: `
This is the technical document page you need to rewrite:
\`\`\`markdown
${content}
\`\`\`
`,
};
};
const source = `
## DISSECT
DISSECT enables you to extract structured data out of a string.
### Syntax
\`\`\`esql
DISSECT input \"pattern\" [APPEND_SEPARATOR=\"<separator>\"]
\`\`\`
### Parameters
#### input
The column that contains the string you want to structure. If the column has multiple values, DISSECT will process each value.
#### pattern
A dissect pattern. If a field name conflicts with an existing column, the existing column is dropped. If a field name is used more than once, only the rightmost duplicate creates a column.
#### <separator>
A string used as the separator between appended values, when using the append modifier.
### Description
DISSECT enables you to extract structured data out of a string. DISSECT matches the string against a delimiter-based pattern, and extracts the specified keys as columns.
Refer to Process data with DISSECT for the syntax of dissect patterns.
### Examples
The following example parses a string that contains a timestamp, some text, and an IP address:
\`\`\`esql
ROW a = \"2023-01-23T12:15:00.000Z - some text - 127.0.0.1\"
| DISSECT a \"%{date} - %{msg} - %{ip}\"
| KEEP date, msg, ip
\`\`\`
By default, DISSECT outputs keyword string columns. To convert to another type, use Type conversion functions:
\`\`\`esql
ROW a = \"2023-01-23T12:15:00.000Z - some text - 127.0.0.1\"
| DISSECT a \"%{date} - %{msg} - %{ip}\"
| KEEP date, msg, ip
| EVAL date = TO_DATETIME(date)
\`\`\`
`;
const output = `
# DISSECT
The DISSECT command is used to extract structured data from a string.
It matches the string against a delimiter-based pattern and extracts the specified keys as columns.
## Syntax
\`DISSECT input "pattern" [APPEND_SEPARATOR="<separator>"]\`
### Parameters
#### input
The column containing the string you want to structure. If the column has multiple values, DISSECT will process each value.
#### pattern
A dissect pattern. If a field name conflicts with an existing column, the existing column is dropped. If a field name is used more than once, only the rightmost duplicate creates a column.
#### <separator>
A string used as the separator between appended values, when using the append modifier.
## Examples
The following example parses a string that contains a timestamp, some text, and an IP address:
\`\`\`esql
ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1"
| DISSECT a "%{date} - %{msg} - %{ip}"
| KEEP date, msg, ip
\`\`\`
By default, DISSECT outputs keyword string columns. To convert to another type, use Type conversion functions:
\`\`\`esql
ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1"
| DISSECT a "%{date} - %{msg} - %{ip}"
| KEEP date, msg, ip
| EVAL date = TO_DATETIME(date)
\`\`\`
`;

View file

@ -0,0 +1,85 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import Path from 'path';
import Fs from 'fs/promises';
import git, { SimpleGitProgressEvent } from 'simple-git';
import { SingleBar } from 'cli-progress';
import { once } from 'lodash';
import { ToolingLog } from '@kbn/tooling-log';
export const syncBuiltDocs = async ({
builtDocsDir,
log,
}: {
builtDocsDir: string;
log: ToolingLog;
}) => {
const dirExists = await exists(builtDocsDir);
if (!dirExists) {
log.info('Cloning built-docs repo. This will take a while.');
const { progress, stop } = getProgressHandler();
await git(Path.join(builtDocsDir, '..'), {
progress,
}).clone(`https://github.com/elastic/built-docs`, builtDocsDir, ['--depth', '1']);
stop();
}
const { progress, stop } = getProgressHandler();
const builtDocsGit = git(builtDocsDir, { progress });
log.debug('Initializing simple-git');
await builtDocsGit.init();
log.info('Making sure built-docs is up to date');
await builtDocsGit.pull();
stop();
};
const exists = async (path: string): Promise<boolean> => {
let dirExists = true;
try {
await Fs.stat(path);
} catch (e) {
if (e.code === 'ENOENT') {
dirExists = false;
} else {
throw e;
}
}
return dirExists;
};
const getProgressHandler = () => {
let stage: string = '';
let method: string = '';
const loader: SingleBar = new SingleBar({
barsize: 25,
format: `{phase} {bar} {percentage}%`,
});
const start = once(() => {
loader.start(100, 0, { phase: 'initializing' });
});
return {
progress: (event: SimpleGitProgressEvent) => {
start();
if (event.stage !== stage || event.method !== method) {
stage = event.stage;
method = event.method;
}
loader.update(event.progress, { phase: event.method + '/' + event.stage });
},
stop: () => loader.stop(),
};
};

View file

@ -0,0 +1,39 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { lastValueFrom } from 'rxjs';
import type { OutputAPI } from '../../../common/output';
export interface Prompt {
system?: string;
input: string;
}
export type PromptTemplate<Input> = (input: Input) => Prompt;
export type PromptCaller = (prompt: Prompt) => Promise<string>;
export type PromptCallerFactory = ({
connectorId,
output,
}: {
connectorId: string;
output: OutputAPI;
}) => PromptCaller;
export const bindOutput: PromptCallerFactory = ({ connectorId, output }) => {
return async ({ input, system }) => {
const response = await lastValueFrom(
output('', {
connectorId,
input,
system,
})
);
return response.content ?? '';
};
};

View file

@ -1,14 +1,28 @@
## ABS
# ABS
The `ABS` function returns the absolute value of a numeric expression. If the input is null, the function returns null.
The ABS function returns the absolute value of a given number.
### Examples
## Syntax
`ABS(number)`
### Parameters
#### number
A numeric expression. If the parameter is `null`, the function will also return `null`.
## Examples
In this example, the ABS function is used to calculate the absolute value of -1.0:
```esql
ROW number = -1.0
| EVAL abs_number = ABS(number)
```
In the following example, the ABS function is used to calculate the absolute value of the height of employees:
```esql
FROM employees
| KEEP first_name, last_name, height

View file

@ -1,15 +1,27 @@
## ACOS
# ACOS
The `ACOS` function returns the arccosine of a number as an angle, expressed in radians. The input number must be between -1 and 1. If the input is null, the function returns null.
The ACOS function returns the arccosine of a given number, expressed in radians.
### Examples
## Syntax
`ACOS(number)`
### Parameters
#### number
This is a number between -1 and 1. If the parameter is `null`, the function will also return `null`.
## Examples
In this example, the ACOS function calculates the arccosine of 0.9.
```esql
ROW a = .9
| EVAL acos = ACOS(a)
ROW a=.9
| EVAL acos=ACOS(a)
```
```esql
ROW b = -0.5
| EVAL acos_b = ACOS(b)
```
```

View file

@ -1,15 +1,29 @@
## ASIN
# ASIN
The `ASIN` function returns the arcsine of the input numeric expression as an angle, expressed in radians.
The ASIN function returns the arcsine of a given numeric expression as an angle, expressed in radians.
### Examples
## Syntax
`ASIN(number)`
### Parameters
#### number
This is a numeric value ranging between -1 and 1. If the parameter is `null`, the function will also return `null`.
## Examples
In this example, the ASIN function calculates the arcsine of 0.9:
```esql
ROW a = .9
| EVAL asin = ASIN(a)
ROW a=.9
| EVAL asin=ASIN(a)
```
In this example, the ASIN function calculates the arcsine of -0.5:
```esql
ROW a = -.5
| EVAL asin = ASIN(a)
```
```

View file

@ -1,8 +1,18 @@
## ATAN
# ATAN
The `ATAN` function returns the arctangent of the input numeric expression as an angle, expressed in radians.
The ATAN function returns the arctangent of a given numeric expression, expressed in radians.
### Examples
## Syntax
`ATAN(number)`
### Parameters
#### number
This is a numeric expression. If the parameter is `null`, the function will also return `null`.
## Examples
```esql
ROW a=12.9
@ -12,4 +22,4 @@ ROW a=12.9
```esql
ROW x=5.0, y=3.0
| EVAL atan_yx = ATAN(y / x)
```
```

View file

@ -1,15 +1,29 @@
## AVG
# AVG
The `AVG` function calculates the average of a numeric field.
The AVG function calculates the average of a numeric field.
### Examples
## Syntax
`AVG(number)`
### Parameters
#### number
The numeric field for which the average is calculated.
## Examples
Calculate the average height of employees:
```esql
FROM employees
| STATS AVG(height)
```
The AVG function can be used with inline functions. For example:
```esql
FROM employees
| STATS avg_salary_change = ROUND(AVG(MV_AVG(salary_change)), 10)
```
```

View file

@ -1,8 +1,34 @@
## BUCKET
# BUCKET
The `BUCKET` function creates groups of values—buckets—out of a datetime or numeric input. The size of the buckets can either be provided directly or chosen based on a recommended count and values range.
The BUCKET function allows you to create groups of values, known as buckets, from a datetime or numeric input. The size of the buckets can be specified directly or determined based on a recommended count and values range.
### Examples
## Syntax
`BUCKET(field, buckets, from, to)`
### Parameters
#### field
A numeric or date expression from which to derive buckets.
#### buckets
The target number of buckets, or the desired bucket size if `from` and `to` parameters are omitted.
#### from
The start of the range. This can be a number, a date, or a date expressed as a string.
#### to
The end of the range. This can be a number, a date, or a date expressed as a string.
## Examples
BUCKET can operate in two modes: one where the bucket size is computed based on a bucket count recommendation and a range, and another where the bucket size is provided directly.
For instance, asking for at most 20 buckets over a year results in monthly buckets:
```esql
FROM employees
@ -11,19 +37,7 @@ FROM employees
| SORT hire_date
```
```esql
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| STATS hires_per_month = COUNT(*) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| SORT month
```
```esql
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
| SORT week
```
If the desired bucket size is known in advance, simply provide it as the second argument, leaving the range out:
```esql
FROM employees
@ -32,12 +46,25 @@ FROM employees
| SORT week
```
BUCKET can also operate on numeric fields. For example, to create a salary histogram:
```esql
FROM employees
| STATS COUNT(*) BY bs = BUCKET(salary, 20, 25324, 74999)
| SORT bs
```
BUCKET may be used in both the aggregating and grouping part of the STATS ... BY ... command provided that in the aggregating part the function is referenced by an alias defined in the grouping part, or that it is invoked with the exact same expression:
```esql
FROM employees
| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)
| SORT b1, b2
| KEEP s1, b1, s2, b2
```
More examples:
```esql
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"

View file

@ -1,8 +1,22 @@
## CASE
# CASE
The `CASE` function accepts pairs of conditions and values. The function returns the value that belongs to the first condition that evaluates to true. If the number of arguments is odd, the last argument is the default value which is returned when no condition matches. If the number of arguments is even, and no condition matches, the function returns null.
The CASE function accepts pairs of conditions and values. It returns the value that corresponds to the first condition that evaluates to `true`. If no condition matches, the function returns a default value or `null` if the number of arguments is even.
### Examples
## Syntax
`CASE(condition, trueValue)`
### Parameters
#### condition
A condition to evaluate.
#### trueValue
The value that is returned when the corresponding condition is the first to evaluate to `true`. If no condition matches, the default value is returned.
## Examples
Determine whether employees are monolingual, bilingual, or polyglot:
@ -32,6 +46,6 @@ Calculate an hourly error rate as a percentage of the total number of log messag
FROM sample_data
| EVAL error = CASE(message LIKE "*error*", 1, 0)
| EVAL hour = DATE_TRUNC(1 hour, @timestamp)
| STATS error_rate = AVG(error) BY hour
| STATS error_rate = AVG(error) by hour
| SORT hour
```

View file

@ -1,15 +1,20 @@
## CBRT
# CBRT
The `CBRT` function returns the cube root of a number. The input can be any numeric value, and the return value is always a double. Cube roots of infinities are null.
The CBRT function calculates the cube root of a given number.
### Examples
## Syntax
`CBRT(number)`
### Parameters
#### number
This is a numeric expression. If the parameter is `null`, the function will also return `null`.
## Examples
```esql
ROW d = 1000.0
| EVAL c = CBRT(d)
| EVAL c = cbrt(d)
```
```esql
ROW value = 27.0
| EVAL cube_root = CBRT(value)
```

View file

@ -1,16 +1,24 @@
## CEIL
# CEIL
The `CEIL` function rounds a number up to the nearest integer. This operation is a no-op for long (including unsigned) and integer types. For double types, it picks the closest double value to the integer, similar to `Math.ceil`.
The CEIL function rounds a number up to the nearest integer.
### Examples
## Syntax
`CEIL(number)`
### Parameters
#### number
This is a numeric expression. If the parameter is `null`, the function will also return `null`.
## Examples
```esql
ROW a=1.8
| EVAL a = CEIL(a)
| EVAL a=CEIL(a)
```
```esql
FROM employees
| KEEP first_name, last_name, height
| EVAL height_ceil = CEIL(height)
```
## Limitations
- the CEIL function does not perform any operation for `long` (including unsigned) and `integer` types. For `double` type, it picks the closest `double` value to the integer, similar to the Math.ceil function in other programming languages.

View file

@ -1,17 +1,35 @@
## CIDR_MATCH
# CIDR_MATCH
The `CIDR_MATCH` function returns true if the provided IP is contained in one of the provided CIDR blocks.
The CIDR_MATCH function checks if a given IP address falls within one or more specified CIDR blocks.
### Examples
## Syntax
`CIDR_MATCH(ip, blockX)`
### Parameters
#### ip
The IP address to be checked. This function supports both IPv4 and IPv6 addresses.
#### blockX
The CIDR block(s) against which the IP address is to be checked.
## Examples
The following example checks if the IP address 'ip1' falls within the CIDR blocks "127.0.0.2/32":
```esql
FROM hosts
| WHERE CIDR_MATCH(ip1, "127.0.0.2/32", "127.0.0.3/32")
| WHERE CIDR_MATCH(ip1, "127.0.0.2/32")
| KEEP card, host, ip0, ip1
```
The function also supports passing multiple blockX:
```esql
FROM network_logs
| WHERE CIDR_MATCH(source_ip, "192.168.1.0/24", "10.0.0.0/8")
| KEEP timestamp, source_ip, destination_ip, action
```
```

View file

@ -1,15 +1,37 @@
## COALESCE
# COALESCE
The `COALESCE` function returns the first of its arguments that is not null. If all arguments are null, it returns null.
The COALESCE function returns the first non-null argument from the list of provided arguments.
### Examples
## Syntax
`COALESCE(first, rest)`
### Parameters
#### first
The first expression to evaluate.
#### rest
The subsequent expressions to evaluate.
### Description
The COALESCE function evaluates the provided expressions in order and returns the first non-null value it encounters. If all the expressions evaluate to null, the function returns null.
## Examples
In the following example, the COALESCE function evaluates the expressions 'a' and 'b'. Since 'a' is null, the function returns the value of 'b'.
```esql
ROW a=null, b="b"
| EVAL COALESCE(a, b)
```
COALESCE supports any number of rest parameters:
```esql
ROW x=null, y=null, z="z"
| EVAL first_non_null = COALESCE(x, y, z)
```
```

View file

@ -1,16 +1,36 @@
## CONCAT
# CONCAT
The `CONCAT` function concatenates two or more strings.
The CONCAT function combines two or more strings into one.
### Examples
## Syntax
`CONCAT(string1, string2, [...stringN])`
### Parameters
#### string1
The first string to concatenate.
#### string2
The second string to concatenate.
## Examples
The following example concatenates the `street_1` and `street_2` fields:
```esql
FROM address
| KEEP street_1, street_2
| EVAL fullstreet = CONCAT(street_1, street_2)
```
CONCAT supports any number of string parameters. The following example concatenates the `first_name` and `last_name` fields with a space in between:
```esql
FROM employees
| KEEP first_name, last_name
| EVAL fullname = CONCAT(first_name, " ", last_name)
```
```esql
ROW part1 = "Hello", part2 = "World"
| EVAL greeting = CONCAT(part1, " ", part2)
```

View file

@ -1,15 +1,25 @@
## COS
# COS
The `COS` function returns the cosine of an angle, expressed in radians. If the input angle is null, the function returns null.
The COS function calculates the cosine of a given angle.
### Examples
## Syntax
`COS(angle)`
### Parameters
#### angle
The angle for which the cosine is to be calculated, expressed in radians. If the parameter is `null`, the function will return `null`.
## Examples
```esql
ROW a=1.8
| EVAL cos = COS(a)
| EVAL cos=COS(a)
```
```esql
ROW angle=0.5
| EVAL cosine_value = COS(angle)
```
```

View file

@ -1,15 +1,25 @@
## COSH
# COSH
Returns the hyperbolic cosine of an angle.
The COSH function calculates the hyperbolic cosine of a given angle.
### Examples
## Syntax
`COSH(angle)`
### Parameters
#### angle
The angle in radians for which the hyperbolic cosine is to be calculated. If the angle is null, the function will return null.
## Examples
```esql
ROW a=1.8
| EVAL cosh = COSH(a)
| EVAL cosh=COSH(a)
```
```esql
ROW angle=0.5
| EVAL hyperbolic_cosine = COSH(angle)
```
```

View file

@ -1,31 +1,51 @@
## COUNT
# COUNT
The `COUNT` function returns the total number (count) of input values. If the `field` parameter is omitted, it is equivalent to `COUNT(*)`, which counts the number of rows.
The COUNT function returns the total number of input values.
### Examples
## Syntax
`COUNT(field)`
### Parameters
#### field
This is an expression that outputs values to be counted. If it's omitted, it's equivalent to `COUNT(*)`, which counts the number of rows.
## Examples
Count the number of specific field values:
```esql
FROM employees
| STATS COUNT(height)
```
Count the number of rows using `COUNT()` or `COUNT(*)`:
```esql
FROM employees
| STATS count = COUNT(*) BY languages
| SORT languages DESC
```
The expression can use inline functions. In this example, a string is split into multiple values using the `SPLIT` function, and the values are counted:
```esql
ROW words="foo;bar;baz;qux;quux;foo"
| STATS word_count = COUNT(SPLIT(words, ";"))
```
To count the number of times an expression returns `TRUE`, use a `WHERE` command to remove rows that shouldnt be included:
```esql
ROW n=1
| WHERE n < 0
| STATS COUNT(n)
```
To count the same stream of data based on two different expressions, use the pattern `COUNT(<expression> OR NULL)`:
```esql
ROW n=1
| STATS COUNT(n > 0 OR NULL), COUNT(n < 0 OR NULL)

View file

@ -1,31 +1,46 @@
## COUNT_DISTINCT
# COUNT_DISTINCT
The `COUNT_DISTINCT` function returns the approximate number of distinct values in a column or literal. It uses the HyperLogLog++ algorithm to count based on the hashes of the values, providing configurable precision to trade memory for accuracy. This function is particularly useful for high-cardinality sets and large values, as it maintains fixed memory usage regardless of the number of unique values.
The COUNT_DISTINCT function calculates the approximate number of distinct values in a specified field.
### Examples
## Syntax
`COUNT_DISTINCT(field, precision)`
### Parameters
#### field
The column or literal for which to count the number of distinct values.
#### precision
(Optional) The precision threshold. The counts are approximate. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000.
## Examples
The following example calculates the number of distinct values in the `ip0` and `ip1` fields:
```esql
FROM hosts
| STATS COUNT_DISTINCT(ip0), COUNT_DISTINCT(ip1)
```
You can also specify a precision threshold. In the following example, the precision threshold for `ip0` is set to 80000 and for `ip1` to 5:
```esql
FROM hosts
| STATS COUNT_DISTINCT(ip0, 80000), COUNT_DISTINCT(ip1, 5)
```
The COUNT_DISTINCT function can also be used with inline functions. This example splits a string into multiple values using the `SPLIT` function and counts the unique values:
```esql
ROW words="foo;bar;baz;qux;quux;foo"
| STATS distinct_word_count = COUNT_DISTINCT(SPLIT(words, ";"))
```
### Additional Information
- **Precision Threshold**: The `COUNT_DISTINCT` function takes an optional second parameter to configure the precision threshold. The maximum supported value is 40000, and the default value is 3000. This threshold allows you to trade memory for accuracy, defining a unique count below which counts are expected to be close to accurate. Above this value, counts might become a bit more fuzzy.
- **Algorithm**: The function is based on the HyperLogLog++ algorithm, which provides excellent accuracy on low-cardinality sets and fixed memory usage. The memory usage depends on the configured precision, requiring about `c * 8` bytes for a precision threshold of `c`.
### Notes
- Computing exact counts requires loading values into a set and returning its size, which doesn't scale well for high-cardinality sets or large values due to memory usage and communication overhead.
- The HyperLogLog++ algorithm's accuracy depends on the leading zeros of hashed values, and the exact distributions of hashes in a dataset can affect the accuracy of the cardinality.
- Even with a low threshold, the error remains very low (1-6%) even when counting millions of items.
- Even with a low threshold, the error remains very low (1-6%) even when counting millions of items.

View file

@ -1,8 +1,28 @@
## DATE_DIFF
# DATE_DIFF
The `DATE_DIFF` function subtracts the `startTimestamp` from the `endTimestamp` and returns the difference in multiples of the specified unit. If `startTimestamp` is later than the `endTimestamp`, negative values are returned. Note that while there is an overlap between the functions supported units and ES|QLs supported time span literals, these sets are distinct and not interchangeable. Similarly, the supported abbreviations are conveniently shared with implementations of this function in other established products and not necessarily common with the date-time nomenclature used by Elasticsearch.
The DATE_DIFF function calculates the difference between two timestamps and returns the difference in multiples of the specified `unit`.
### Examples
## Syntax
`DATE_DIFF(unit, startTimestamp, endTimestamp)`
### Parameters
#### unit
The unit of time in which the difference will be calculated.
#### startTimestamp
The starting timestamp for the calculation.
#### endTimestamp
The ending timestamp for the calculation.
## Examples
The following example demonstrates how to use the DATE_DIFF function to calculate the difference between two timestamps in microseconds:
```esql
ROW date1 = TO_DATETIME("2023-12-02T11:00:00.000Z"), date2 = TO_DATETIME("2023-12-02T11:00:00.001Z")
@ -12,4 +32,10 @@ ROW date1 = TO_DATETIME("2023-12-02T11:00:00.000Z"), date2 = TO_DATETIME("2023-1
```esql
ROW date1 = TO_DATETIME("2023-01-01T00:00:00.000Z"), date2 = TO_DATETIME("2023-12-31T23:59:59.999Z")
| EVAL dd_days = DATE_DIFF("days", date1, date2)
```
```
## Notes
- If the `startTimestamp` is later than the `endTimestamp`, the function will return a negative value.
- It's important to note that while there is some overlap between the units supported by this function and ESQL's time span literals, these sets are not interchangeable. Also, the abbreviations supported by this function are shared with other established products and may not align with the date-time nomenclature used by Elasticsearch.

View file

@ -1,15 +1,33 @@
## DATE_EXTRACT
# DATE_EXTRACT
The `DATE_EXTRACT` function extracts specific parts of a date, such as the year, month, day, or hour. It can be used to retrieve various components of a date based on the specified `datePart`.
The DATE_EXTRACT function is used to extract specific parts of a date.
### Examples
## Syntax
`DATE_EXTRACT(datePart, date)`
### Parameters
#### datePart
This is the part of the date you want to extract, such as "year", "month" or ""hour_of_day".
#### date
This is the date expression.
## Examples
To extract the year from a date:
```esql
ROW date = DATE_PARSE("yyyy-MM-dd", "2022-05-06")
| EVAL year = DATE_EXTRACT("year", date)
```
To find all events that occurred outside of business hours (before 9 AM or after 5PM), on any given date:
```esql
FROM sample_data
| WHERE DATE_EXTRACT("hour_of_day", @timestamp) < 9 AND DATE_EXTRACT("hour_of_day", @timestamp) >= 17
```
```

View file

@ -1,17 +1,28 @@
## DATE_FORMAT
# DATE_FORMAT
The `DATE_FORMAT` function returns a string representation of a date in the provided format. If no format is specified, the default format `yyyy-MM-dd'T'HH:mm:ss.SSSZ` is used. If the date expression is null, the function returns null.
The DATE_FORMAT function returns a string representation of a date, formatted according to the provided format.
### Examples
## Syntax
`DATE_FORMAT(dateFormat, date)`
### Parameters
#### dateFormat
This is an optional parameter that specifies the desired date format.
If no format is provided, the function defaults to the `yyyy-MM-dd'T'HH:mm:ss.SSSZ` format.
#### date
This is the date expression that you want to format.
## Examples
In this example, the `hire_date` field is formatted according to the "YYYY-MM-dd" format, and the result is stored in the `hired` field:
```esql
FROM employees
| KEEP first_name, last_name, hire_date
| EVAL hired = DATE_FORMAT("YYYY-MM-dd", hire_date)
```
```esql
FROM employees
| KEEP first_name, last_name, hire_date
| EVAL hired = DATE_FORMAT("yyyy/MM/dd", hire_date)
```

View file

@ -1,15 +1,28 @@
## DATE_PARSE
# DATE_PARSE
The `DATE_PARSE` function returns a date by parsing the second argument using the format specified in the first argument.
The DATE_PARSE function is used to convert a date string into a date format based on the provided pattern.
### Examples
## Syntax
`DATE_PARSE(datePattern, dateString)`
### Parameters
#### datePattern
This is the format of the date. If `null` is provided, the function will return `null`.
#### dateString
This is the date expression in string format.
## Examples
```esql
ROW date_string = "2022-05-06"
| EVAL date = DATE_PARSE("yyyy-MM-dd", date_string)
```
```esql
ROW date_string = "2023-12-25"
| EVAL date = DATE_PARSE("yyyy-MM-dd", date_string)
```
```

View file

@ -1,8 +1,24 @@
## DATE_TRUNC
# DATE_TRUNC
The `DATE_TRUNC` function rounds down a date to the closest interval.
The DATE_TRUNC function rounds down a date to the nearest specified interval.
### Examples
## Syntax
`DATE_TRUNC(interval, date)`
### Parameters
#### interval
This is the interval to which the date will be rounded down. It is expressed using the timespan literal syntax.
#### date
This is the date expression that will be rounded down.
## Examples
The following example rounds down the hire_date to the nearest year:
```esql
FROM employees
@ -10,7 +26,7 @@ FROM employees
| EVAL year_hired = DATE_TRUNC(1 year, hire_date)
```
Combine `DATE_TRUNC` with `STATS ... BY` to create date histograms. For example, the number of hires per year:
You can combine DATE_TRUNC with STATS ... BY to create date histograms. For example, the number of hires per year:
```esql
FROM employees
@ -19,7 +35,7 @@ FROM employees
| SORT year
```
Or an hourly error rate:
Or, you can calculate an hourly error rate:
```esql
FROM sample_data
@ -27,4 +43,4 @@ FROM sample_data
| EVAL hour = DATE_TRUNC(1 hour, @timestamp)
| STATS error_rate = AVG(error) BY hour
| SORT hour
```
```

View file

@ -1,29 +1,32 @@
## DISSECT
# DISSECT
DISSECT enables you to extract structured data out of a string. It matches the string against a delimiter-based pattern and extracts the specified keys as columns. This command is particularly useful for parsing log files, structured text, or any other string data where fields are separated by specific delimiters.
The DISSECT command is used to extract structured data from a string. It matches the string against a delimiter-based pattern and extracts the specified keys as columns.
### Use Cases
- **Log Parsing**: Extracting timestamps, log levels, and messages from log entries.
- **Data Transformation**: Converting unstructured text data into structured columns for further analysis.
- **Data Cleaning**: Removing or reformatting specific parts of a string to make the data more usable.
### Limitations
- If a field name conflicts with an existing column, the existing column is dropped.
- If a field name is used more than once, only the rightmost duplicate creates a column.
- DISSECT does not support reference keys.
### Syntax
## Syntax
`DISSECT input "pattern" [APPEND_SEPARATOR="<separator>"]`
### Parameters
- **input**: The column that contains the string you want to structure. If the column has multiple values, DISSECT will process each value.
- **pattern**: A dissect pattern.
- **<separator>**: A string used as the separator between appended values, when using the append modifier.
### Examples
#### input
The column containing the string you want to structure. If the column has multiple values, DISSECT will process each value.
#### pattern
A dissect pattern. If a field name conflicts with an existing column, the existing column is dropped. If a field name is used more than once, only the rightmost duplicate creates a column.
#### <separator>
A string used as the separator between appended values, when using the append modifier.
## Examples
#### Example 1: Basic Usage
The following example parses a string that contains a timestamp, some text, and an IP address:
```esql
@ -32,7 +35,6 @@ ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1"
| KEEP date, msg, ip
```
#### Example 2: Type Conversion
By default, DISSECT outputs keyword string columns. To convert to another type, use Type conversion functions:
```esql
@ -42,7 +44,6 @@ ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1"
| EVAL date = TO_DATETIME(date)
```
#### Example 3: Using Append Separator
In this example, we use the `APPEND_SEPARATOR` to concatenate values with a custom separator:
```esql
@ -51,4 +52,7 @@ ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1"
| KEEP date, msg, ip
```
These examples showcase different ways to use the DISSECT command to parse and transform string data in Elasticsearch.
### Limitations
- If a field name conflicts with an existing column, the existing column is dropped.
- If a field name is used more than once, only the rightmost duplicate creates a column.
- DISSECT does not support reference keys.

View file

@ -1,35 +1,33 @@
## DROP
# DROP
The `DROP` processing command in ES|QL is used to remove one or more columns from the result set. This command is particularly useful when you want to exclude certain fields from your query results, either to simplify the output or to reduce the amount of data being processed and transferred. The `DROP` command supports the use of wildcards, allowing you to remove multiple columns that match a specific pattern.
The DROP command is used to eliminate one or more columns from the data.
### Use Cases
- **Simplifying Output:** Remove unnecessary columns to make the result set easier to read and analyze.
- **Data Reduction:** Exclude large or irrelevant fields to reduce the amount of data processed and transferred.
- **Pattern Matching:** Use wildcards to efficiently drop multiple columns that share a common naming pattern.
## Syntax
### Limitations
- The `DROP` command does not support nested fields.
- It cannot be used to drop columns of unsupported types as specified in the ES|QL limitations.
`DROP columns`
### Examples
### Parameters
#### Example 1: Dropping a Single Column
This example demonstrates how to drop a single column named `height` from the `employees` index.
#### columns
This is a list of columns, separated by commas, that you want to remove. Wildcards are supported.
## Examples
In the following example, the 'height' column is removed from the data:
```esql
FROM employees
| DROP height
```
#### Example 2: Dropping Multiple Columns Using Wildcards
This example shows how to use wildcards to drop all columns that start with `height`.
You can also use wildcards to remove all columns that match a certain pattern. In the following example, all columns that start with 'height' are removed:
```esql
FROM employees
| DROP height*
```
#### Example 3: Dropping Multiple Specific Columns
This example demonstrates how to drop multiple specific columns by listing them in a comma-separated format.
```esql
@ -37,7 +35,6 @@ FROM employees
| DROP height, weight, age
```
#### Example 4: Dropping Columns with Complex Patterns
This example shows how to drop columns that match a more complex pattern using wildcards.
```esql
@ -45,7 +42,6 @@ FROM employees
| DROP emp_*
```
#### Example 5: Combining DROP with Other Commands
This example demonstrates how to use the `DROP` command in conjunction with other commands like `KEEP` and `SORT`.
```esql
@ -55,4 +51,6 @@ FROM employees
| SORT height DESC
```
By using the `DROP` command, you can effectively manage the columns in your result set, making your ES|QL queries more efficient and easier to work with.
### Limitations
- The `DROP` command does not support nested fields.
- It cannot be used to drop columns of unsupported types as specified in the ES|QL limitations.

View file

@ -1,8 +1,16 @@
## E
# E
The `E` function returns Eulers number, which is a mathematical constant approximately equal to 2.71828. It is the base of the natural logarithm.
The E function returns Euler's number.
### Examples
## Syntax
`E()`
### Parameters
This function does not require any parameters.
## Examples
```esql
ROW E()
@ -12,4 +20,4 @@ ROW E()
FROM employees
| EVAL euler_number = E()
| KEEP euler_number
```
```

View file

@ -1,8 +1,23 @@
## ENDS_WITH
# ENDS_WITH
The `ENDS_WITH` function returns a boolean that indicates whether a keyword string ends with another string.
The ENDS_WITH function checks if a given string ends with a specified suffix.
## Syntax
`ENDS_WITH(str, suffix)`
### Parameters
#### str
This is the string expression that you want to check.
#### suffix
The string expression that will be checked if it is the ending of the first string.
## Examples
### Examples
```esql
FROM employees
@ -14,4 +29,4 @@ FROM employees
FROM employees
| KEEP first_name
| EVAL fn_E = ENDS_WITH(first_name, "a")
```
```

View file

@ -1,48 +1,60 @@
## ENRICH
# ENRICH
ENRICH enables you to add data from existing indices as new columns using an enrich policy. This command is useful for enriching your dataset with additional information from other indices, which can be particularly beneficial for data analysis and reporting. Before using the ENRICH command, you need to create and execute an enrich policy.
The ENRICH command allows you to add data from existing indices as new columns using an enrich policy.
### Use Cases
- **Data Enrichment**: Add supplementary data to your existing dataset for more comprehensive analysis.
- **Cross-Cluster Enrichment**: Enrich data across multiple clusters using the `mode` parameter.
- **Custom Column Names**: Rename columns to avoid conflicts or for better readability.
## Syntax
### Limitations
- The ENRICH command only supports enrich policies of type `match`.
- ENRICH only supports enriching on a column of type `keyword`.
`ENRICH policy [ON match_field] [WITH [new_name1 = ]field1, [new_name2 = ]field2, ...]`
### Examples
### Parameters
#### Example 1: Basic Enrichment
The following example uses the `languages_policy` enrich policy to add a new column for each enrich field defined in the policy. The match is performed using the `match_field` defined in the enrich policy and requires that the input table has a column with the same name (`language_code` in this example).
#### policy
The name of the enrich policy. You need to create and execute the enrich policy first.
#### match_field
The match field. ENRICH uses its value to look for records in the enrich index. If not specified, the match will be performed on the column with the same name as the `match_field` defined in the enrich policy.
#### new_nameX
Allows you to change the name of the column thats added for each of the enrich fields. Defaults to the enrich field name. If a column has the same name as the new name, it will be discarded. If a name (new or original) occurs more than once, only the rightmost duplicate creates a new column.
#### fieldX
The enrich fields from the enrich index that are added to the result as new columns. If a column with the same name as the enrich field already exists, the existing column will be replaced by the new column. If not specified, each of the enrich fields defined in the policy is added. A column with the same name as the enrich field will be dropped unless the enrich field is renamed.
## Examples
The following example uses the `languages_policy` enrich policy to add a new column for each enrich field defined in the policy. The match is performed using the `match_field` defined in the enrich policy and requires that the input table has a column with the same name (`language_code` in this example). ENRICH will look for records in the enrich index based on the match field value.
```esql
ROW language_code = "1"
| ENRICH languages_policy
```
#### Example 2: Using a Different Match Field
To use a column with a different name than the `match_field` defined in the policy as the match field, use the `ON` parameter.
To use a column with a different name than the `match_field` defined in the policy as the match field, use `ON <column-name>`:
```esql
ROW a = "1"
| ENRICH languages_policy ON a
```
#### Example 3: Selecting Specific Enrich Fields
By default, each of the enrich fields defined in the policy is added as a column. To explicitly select the enrich fields that are added, use the `WITH` parameter.
By default, each of the enrich fields defined in the policy is added as a column. To explicitly select the enrich fields that are added, use `WITH <field1>, <field2>, ...`:
```esql
ROW a = "1"
| ENRICH languages_policy ON a WITH language_name
```
#### Example 4: Renaming Enrich Fields
You can rename the columns that are added using the `WITH new_name=<field1>` syntax.
You can rename the columns that are added using `WITH new_name=<field1>`:
```esql
ROW a = "1"
| ENRICH languages_policy ON a WITH name = language_name
```
In case of name collisions, the newly created columns will override existing columns.
### Limitations
- In case of name collisions, the newly created columns will override existing columns.
- The ENRICH command only supports enrich policies of type `match`.
- ENRICH only supports enriching on a column of type `keyword`.

View file

@ -1,20 +1,36 @@
## EVAL
# EVAL
The `EVAL` processing command enables you to append new columns with calculated values. This command is useful for creating new data points derived from existing columns, such as performing arithmetic operations, applying functions, or using expressions.
The EVAL command allows you to append new columns with calculated values to your data.
### Use Cases
- **Data Transformation**: Create new columns based on existing data, such as converting units or calculating derived metrics.
- **Data Enrichment**: Add additional context to your data by computing new values.
- **Data Cleaning**: Standardize or normalize data by applying transformations.
## Syntax
### Limitations
- If a column with the same name already exists, the existing column is dropped.
- If a column name is used more than once, only the rightmost duplicate creates a column.
`EVAL [column1 =] value1[, ..., [columnN =] valueN]`
### Examples
### Parameters
#### Example 1: Converting Height to Different Units
This example demonstrates how to convert the height from meters to feet and centimeters.
#### {columnX}
This is the name of the column. If a column with the same name already exists, it will be replaced. If a column name is used more than once, only the rightmost duplicate will create a column.
#### {valueX}
This is the value for the column. It can be a literal, an expression, or a function. Columns defined to the left of this one can be used.
## Notes
EVAL supports the following types of functions:
- Mathematical functions
- String functions
- Date-time functions
- Type conversation functions
- Conditional functions and expressions
- Multi-value functions
Aggregation functions are NOT supported for EVAL.
## Examples
The following example multiplies the `height` column by 3.281 and 100 to create new columns `height_feet` and `height_cm`:
```esql
FROM employees
@ -23,8 +39,7 @@ FROM employees
| EVAL height_feet = height * 3.281, height_cm = height * 100
```
#### Example 2: Overwriting an Existing Column
In this example, the `height` column is overwritten with its value in feet.
If the specified column already exists, the existing column will be replaced, and the new column will be appended to the table:
```esql
FROM employees
@ -33,8 +48,7 @@ FROM employees
| EVAL height = height * 3.281
```
#### Example 3: Using an Expression as Column Name
Here, a new column is created with a name equal to the expression used to calculate its value.
Specifying the output column name is optional. If not specified, the new column name is equal to the expression. The following query adds a column named `height*3.281`:
```esql
FROM employees
@ -43,8 +57,7 @@ FROM employees
| EVAL height * 3.281
```
#### Example 4: Using Special Characters in Column Names
This example shows how to handle special characters in column names by quoting them with backticks.
Because this name contains special characters, it needs to be quoted with backticks (`) when using it in subsequent commands:
```esql
FROM employees
@ -52,4 +65,6 @@ FROM employees
| STATS avg_height_feet = AVG(`height * 3.281`)
```
These examples illustrate the versatility of the `EVAL` command in transforming and enriching your data within Elasticsearch.
### Limitations
- If a column with the same name already exists, the existing column is dropped.
- If a column name is used more than once, only the rightmost duplicate creates a column.

View file

@ -1,8 +1,19 @@
## EXP
# EXP
The `EXP` function returns the value of Euler's number (e) raised to the power of the given numeric expression. If the input is null, the function returns null.
The EXP function calculates the value of Euler's number (e) raised to the power of a given number.
## Syntax
`EXP(number)`
### Parameters
#### number
A numeric expression. If the parameter is `null`, the function will also return `null`.
## Examples
### Examples
```esql
ROW d = 5.0
@ -12,4 +23,4 @@ ROW d = 5.0
```esql
ROW value = 2.0
| EVAL result = EXP(value)
```
```

View file

@ -1,16 +1,30 @@
## FLOOR
# FLOOR
The `FLOOR` function rounds a number down to the nearest integer. This operation is a no-op for long (including unsigned) and integer types. For double types, it picks the closest double value to the integer, similar to `Math.floor`.
The FLOOR function rounds a number down to the nearest integer.
### Examples
## Syntax
`FLOOR(number)`
### Parameters
#### number
This is a numeric expression. If the parameter is `null`, the function will return `null`.
## Examples
```esql
ROW a=1.8
| EVAL a = FLOOR(a)
| EVAL a=FLOOR(a)
```
```esql
FROM employees
| KEEP first_name, last_name, height
| EVAL height_floor = FLOOR(height)
```
```
## Notes
- The FLOOR function is a no-operation for `long` (including unsigned) and `integer` types. For `double` type, this function picks the closest `double` value to the integer, similar to the Math.floor method in programming languages.

View file

@ -1,21 +1,24 @@
## FROM
# FROM
The `FROM` source command returns a table with data from a data stream, index, or alias. Each row in the resulting table represents a document, and each column corresponds to a field that can be accessed by the name of that field. This command is fundamental for querying data in Elasticsearch using ES|QL.
The `FROM` command retrieves a table of data from a specified data stream, index, or alias.
### Use Cases
## Syntax
- **Basic Data Retrieval**: Fetch data from a specific index or data stream.
- **Time Series Data**: Use date math to access indices relevant to specific time periods.
- **Multiple Indices**: Query multiple data streams, indices, or aliases using comma-separated lists or wildcards.
- **Remote Clusters**: Query data streams and indices on remote clusters.
- **Metadata Retrieval**: Retrieve specific metadata fields using the `METADATA` directive.
`FROM index_pattern [METADATA fields]`
### Limitations
### Parameters
- By default, an ES|QL query without an explicit `LIMIT` uses an implicit limit of 1000 rows. This applies to the `FROM` command as well.
- Queries do not return more than 10,000 rows, regardless of the `LIMIT` commands value.
#### index_pattern
### Examples
This parameter represents a list of indices, data streams, or aliases. It supports the use of wildcards and date math.
#### fields
This is a comma-separated list of metadata fields to be retrieved.
## Description
The `FROM` command retrieves a table of data from a specified data stream, index, or alias. Each row in the resulting table represents a document, and each column corresponds to a field. The field can be accessed using its name.
#### Basic Data Retrieval
```esql
@ -50,4 +53,9 @@ FROM employees METADATA _id
Use enclosing double quotes (") or three enclosing double quotes (""") to escape index names that contain special characters:
```esql
FROM "this=that","""this[that"""
```
```
### Limitations
- By default, an ES|QL query without an explicit `LIMIT` uses an implicit limit of 1000 rows. This applies to the `FROM` command as well.
- Queries do not return more than 10,000 rows, regardless of the `LIMIT` commands value.

View file

@ -1,8 +1,22 @@
## GREATEST
# GREATEST
The `GREATEST` function returns the maximum value from multiple columns. This is similar to `MV_MAX` except it is intended to run on multiple columns at once. When run on keyword or text fields, this function returns the last string in alphabetical order. When run on boolean columns, it will return `true` if any values are `true`.
The GREATEST function returns the maximum value from multiple columns.
### Examples
## Syntax
`GREATEST(first, rest)`
### Parameters
#### first
The first column to evaluate.
#### rest
The remaining columns to evaluate.
## Examples
```esql
ROW a = 10, b = 20
@ -12,4 +26,9 @@ ROW a = 10, b = 20
```esql
ROW x = "apple", y = "banana", z = "cherry"
| EVAL max_fruit = GREATEST(x, y, z)
```
```
## Notes
- When applied to `keyword` or `text` fields, the GREATEST function returns the last string in alphabetical order.
- When applied to `boolean` columns, it returns `true` if any values are `true`.

View file

@ -1,22 +1,24 @@
## GROK
# GROK
GROK enables you to extract structured data out of a string. It matches the string against patterns based on regular expressions and extracts the specified patterns as columns. This command is useful for parsing logs, extracting fields from text, and structuring unstructured data.
The GROK command is used to extract structured data from a string. It matches the string against patterns based on regular expressions and extracts the specified patterns as columns.
### Use Cases
- **Log Parsing**: Extracting timestamps, IP addresses, and other fields from log entries.
- **Data Structuring**: Converting unstructured text data into structured columns.
- **Field Extraction**: Extracting specific fields from a string for further analysis.
## Syntax
### Limitations
- If a field name conflicts with an existing column, the existing column is discarded.
- If a field name is used more than once, a multi-valued column will be created with one value per each occurrence of the field name.
- The `GROK` command does not support configuring custom patterns or multiple patterns.
- The `GROK` command is not subject to Grok watchdog settings.
`GROK input "pattern"`
### Examples
### Parameters
#### Example 1: Basic GROK Usage
This example parses a string that contains a timestamp, an IP address, an email address, and a number.
#### input
The column containing the string you want to structure. If the column has multiple values, GROK will process each value.
#### pattern
A grok pattern. If a field name conflicts with an existing column, the existing column is dropped. If a field name is used more than once, a multi-valued column is created with one value per each occurrence of the field name.
## Examples
The following example parses a string that contains a timestamp, an IP address, an email address, and a number:
```esql
ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
@ -24,8 +26,7 @@ ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
| KEEP date, ip, email, num
```
#### Example 2: Type Conversion with GROK
By default, GROK outputs keyword string columns. To convert to other types, append `:type` to the semantics in the pattern.
By default, GROK outputs keyword string columns. `int` and `float` types can be converted by appending `:type` to the semantics in the pattern. For example `{NUMBER:num:int}`:
```esql
ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
@ -33,8 +34,7 @@ ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
| KEEP date, ip, email, num
```
#### Example 3: Using Type Conversion Functions
For other type conversions, use Type conversion functions.
For other type conversions, use Type conversion functions:
```esql
ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
@ -43,8 +43,7 @@ ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
| EVAL date = TO_DATETIME(date)
```
#### Example 4: Handling Multi-Valued Columns
If a field name is used more than once, GROK creates a multi-valued column.
If a field name is used more than once, GROK creates a multi-valued column:
```esql
FROM addresses
@ -52,4 +51,9 @@ FROM addresses
| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}"
```
These examples showcase different usages of the GROK command, from basic extraction to handling type conversions and multi-valued columns.
### Limitations
- If a field name conflicts with an existing column, the existing column is discarded.
- If a field name is used more than once, a multi-valued column will be created with one value per each occurrence of the field name.
- The `GROK` command does not support configuring custom patterns or multiple patterns.
- The `GROK` command is not subject to Grok watchdog settings.

View file

@ -1,8 +1,26 @@
## IP_PREFIX
# IP_PREFIX
The `IP_PREFIX` function truncates an IP address to a given prefix length. It supports both IPv4 and IPv6 addresses.
The IP_PREFIX function truncates an IP address to a specified prefix length.
### Examples
## Syntax
`IP_PREFIX(ip, prefixLengthV4, prefixLengthV6)`
### Parameters
#### ip
The IP address that you want to truncate. This function supports both IPv4 and IPv6 addresses.
#### prefixLengthV4
The prefix length for IPv4 addresses.
#### prefixLengthV6
The prefix length for IPv6 addresses.
## Examples
```esql
ROW ip4 = TO_IP("1.2.3.4"), ip6 = TO_IP("fe80::cae2:65ff:fece:feb9")
@ -13,4 +31,4 @@ ROW ip4 = TO_IP("1.2.3.4"), ip6 = TO_IP("fe80::cae2:65ff:fece:feb9")
FROM network_logs
| EVAL truncated_ip = IP_PREFIX(ip_address, 16, 0)
| KEEP ip_address, truncated_ip
```
```

View file

@ -1,17 +1,32 @@
## KEEP
# KEEP
The `KEEP` processing command in ES|QL enables you to specify which columns are returned and the order in which they are returned. This command is particularly useful when you want to focus on specific fields in your dataset, either by explicitly naming them or by using wildcard patterns. The `KEEP` command supports a variety of use cases, such as filtering out unnecessary columns, reordering columns for better readability, and ensuring that only relevant data is processed in subsequent commands.
The KEEP command allows you to specify which columns to return and in what order.
### Use Cases
- **Selective Column Retrieval**: Retrieve only the columns you need for analysis, reducing the amount of data processed.
- **Column Reordering**: Specify the order in which columns should appear in the result set.
- **Wildcard Support**: Use wildcards to include multiple columns that match a pattern, simplifying queries when dealing with numerous fields.
## Syntax
### Limitations
- **Precedence Rules**: When a field name matches multiple expressions, precedence rules are applied. Complete field names take the highest precedence, followed by partial wildcard expressions, and finally, the wildcard `*`.
- **Column Conflicts**: If a field matches two expressions with the same precedence, the rightmost expression wins.
`KEEP columns`
### Examples
### Parameters
#### columns
A comma-separated list of columns to retain. Wildcards are supported. If an existing column matches multiple provided wildcards or column names, certain rules apply.
## Note
The KEEP command is used to specify which columns to return and their order.
When a field name matches multiple expressions, precedence rules are applied. Fields are added in the order they appear. If one field matches multiple expressions, the following precedence rules apply (from highest to lowest priority):
1. Complete field name (no wildcards)
2. Partial wildcard expressions (for example: `fieldNam*`)
3. Wildcard only (`*`)
If a field matches two expressions with the same precedence, the rightmost expression wins.
Important: only the columns in the KEEP command can be used after a KEEP command.
## Examples
#### Example 1: Specifying Columns Explicitly
This example demonstrates how to explicitly specify the columns to be returned.
@ -60,5 +75,3 @@ This example illustrates how the simple wildcard expression `*` has the lowest p
FROM employees
| KEEP *, first_name
```
These examples showcase the versatility and utility of the `KEEP` command in various scenarios, making it a powerful tool for data manipulation in ES|QL.

View file

@ -1,8 +1,22 @@
## LEAST
# LEAST
Returns the minimum value from multiple columns. This is similar to `MV_MIN` except it is intended to run on multiple columns at once.
The LEAST function returns the smallest value from multiple columns.
### Examples
## Syntax
`LEAST(first, rest)`
### Parameters
#### first
The first column to evaluate.
#### rest
The remaining columns to evaluate.
## Examples
```esql
ROW a = 10, b = 20
@ -12,4 +26,4 @@ ROW a = 10, b = 20
```esql
ROW x = 5, y = 15, z = 10
| EVAL min_value = LEAST(x, y, z)
```
```

View file

@ -1,8 +1,24 @@
## LEFT
# LEFT
The `LEFT` function returns the substring that extracts a specified number of characters from a string, starting from the left.
The LEFT function returns a substring from the beginning of a specified string.
### Examples
## Syntax
`LEFT(string, length)`
### Parameters
#### string
The string from which a substring will be extracted.
#### length
The number of characters to extract from the string.
## Examples
The following example extracts the first three characters from the `last_name` field:
```esql
FROM employees
@ -16,4 +32,4 @@ FROM employees
ROW full_name = "John Doe"
| EVAL first_name = LEFT(full_name, 4)
| KEEP first_name
```
```

View file

@ -1,8 +1,20 @@
## LENGTH
# LENGTH
The `LENGTH` function returns the character length of a string. If the input string is null, the function returns null.
The LENGTH function calculates the character length of a given string.
### Examples
## Syntax
`LENGTH(string)`
### Parameters
#### string
The string expression for which the length is to be calculated.
## Examples
The following example calculates the character length of the `first_name` field:
```esql
FROM employees
@ -13,4 +25,4 @@ FROM employees
```esql
ROW message = "Hello, World!"
| EVAL message_length = LENGTH(message)
```
```

View file

@ -1,24 +1,19 @@
## LIMIT
# LIMIT
The `LIMIT` processing command in ES|QL is used to restrict the number of rows returned by a query. This is particularly useful when you want to control the volume of data retrieved, either for performance reasons or to focus on a specific subset of the data.
The LIMIT command is used to restrict the number of rows returned by a query.
### Use Cases
- **Performance Optimization**: By limiting the number of rows returned, you can improve query performance and reduce the load on the Elasticsearch cluster.
- **Data Sampling**: Useful for retrieving a sample of data for analysis or debugging.
- **Pagination**: Helps in implementing pagination by limiting the number of rows per page.
## Syntax
### Limitations
- **Maximum Rows**: Queries do not return more than 10,000 rows, regardless of the `LIMIT` commands value. This limit only applies to the number of rows that are retrieved by the query. Queries and aggregations run on the full data set.
- **Overcoming Limitations**: To overcome this limitation, you can:
- Reduce the result set size by modifying the query to only return relevant data using the `WHERE` command.
- Shift any post-query processing to the query itself using the `STATS ... BY` command to aggregate data in the query.
- **Dynamic Cluster Settings**: The default and maximum limits can be changed using these dynamic cluster settings:
- `esql.query.result_truncation_default_size`
- `esql.query.result_truncation_max_size`
`LIMIT max_number_of_rows`
### Examples
### Parameters
#### max_number_of_rows
This parameter specifies the maximum number of rows to be returned.
## Examples
#### Example 1: Basic Usage
This example demonstrates how to limit the number of rows returned to 5.
```esql
@ -27,8 +22,7 @@ FROM employees
| LIMIT 5
```
#### Example 2: Limiting Rows After Filtering
This example shows how to limit the number of rows after applying a filter.
This example shows how to limit the number of rows after applying a filter:
```esql
FROM employees
@ -36,8 +30,7 @@ FROM employees
| LIMIT 10
```
#### Example 3: Limiting Rows with Aggregation
This example demonstrates limiting the number of rows after performing an aggregation.
This example demonstrates limiting the number of rows after performing an aggregation:
```esql
FROM employees
@ -45,8 +38,7 @@ FROM employees
| LIMIT 3
```
#### Example 4: Limiting Rows with Sorting
This example shows how to limit the number of rows after sorting the data.
This example shows how to limit the number of rows after sorting the data:
```esql
FROM employees
@ -54,8 +46,7 @@ FROM employees
| LIMIT 7
```
#### Example 5: Limiting Rows with Multiple Commands
This example demonstrates the use of `LIMIT` in conjunction with multiple other commands.
This example demonstrates the use of `LIMIT` in conjunction with multiple other commands:
```esql
FROM employees
@ -65,4 +56,20 @@ FROM employees
| LIMIT 5
```
By using the `LIMIT` command, you can effectively manage the volume of data returned by your ES|QL queries, ensuring better performance and more focused results.
## Limitations
There is no way to achieve pagination with LIMIT, there is no offset parameter.
A query will never return more than 10,000 rows. This limitation only applies to the number of rows retrieved by the query. The query and any aggregations will still run on the full dataset.
To work around this limitation:
- Reduce the size of the result set by modifying the query to only return relevant data. This can be achieved by using the WHERE command to select a smaller subset of the data.
- Shift any post-query processing to the query itself. The ES|QL STATS ... BY command can be used to aggregate data within the query.
## Notes
The default and maximum limits can be adjusted using the following dynamic cluster settings:
- `esql.query.result_truncation_default_size`
- `esql.query.result_truncation_max_size`

View file

@ -1,18 +1,26 @@
## LOCATE
# LOCATE
The `LOCATE` function returns an integer that indicates the position of a keyword substring within another string.
The LOCATE function returns the position of a specified substring within a string.
### Syntax
## Syntax
`LOCATE(string, substring, start)`
### Parameters
- `string`: An input string.
- `substring`: A substring to locate in the input string.
- `start`: The start index.
#### string
### Examples
The string in which you want to search for the substring.
#### substring
The substring you want to find in the string.
#### start
The starting index for the search.
## Examples
```esql
ROW a = "hello"
@ -22,4 +30,9 @@ ROW a = "hello"
```esql
ROW phrase = "Elasticsearch is powerful"
| EVAL position = LOCATE(phrase, "powerful")
```
```
## Notes
- String positions start from `1`.
- If the substring cannot be found, the function returns `0`.

View file

@ -1,8 +1,22 @@
## LOG
# LOG
The `LOG` function returns the logarithm of a value to a specified base. The input can be any numeric value, and the return value is always a double. Logs of zero, negative numbers, and base of one return null as well as a warning.
The LOG function calculates the logarithm of a given value to a specified base.
### Examples
## Syntax
`LOG(base, number)`
### Parameters
#### base
The base of the logarithm. If the base is `null`, the function will return `null`. If the base is not provided, the function will return the natural logarithm (base e) of the value.
#### number
The numeric value for which the logarithm is to be calculated. If the number is `null`, the function will return `null`.
## Examples
```esql
ROW base = 2.0, value = 8.0
@ -12,4 +26,4 @@ ROW base = 2.0, value = 8.0
```esql
ROW value = 100
| EVAL s = LOG(value)
```
```

View file

@ -1,10 +1,22 @@
## LOOKUP
# LOOKUP
The `LOOKUP` command in ES|QL is highly experimental and only available in SNAPSHOT versions. It matches values from the input against a table provided in the request, adding the other fields from the table to the output. This command is useful for enriching your dataset with additional information from a predefined table. However, it is important to note that if the tables column names conflict with existing columns, the existing columns will be dropped.
The LOOKUP command is a highly experimental feature currently only available in SNAPSHOT versions. It matches values from the input against a provided table, appending the other fields from the table to the output.
### Examples
## Syntax
Here are some example ES|QL queries using the `LOOKUP` command:
`LOOKUP table ON match_field1[, match_field2, ...]`
### Parameters
#### table
The name of the table provided in the request to match against. If the tables column names conflict with existing columns, the existing columns will be dropped.
#### match_field
The fields in the input to match against the table.
## Examples
1. **Basic Lookup Example:**
```esql
@ -98,4 +110,4 @@ A Fire Upon the Deep|Vernor Vinge |Diamond
Dune |Frank Herbert |The New Wave
Revelation Space |Alastair Reynolds|Diamond
Leviathan Wakes |James S.A. Corey |Hadron
```
```

View file

@ -1,8 +1,18 @@
## LTRIM
# LTRIM
Removes leading whitespaces from a string.
The LTRIM function is used to remove leading whitespaces from a string.
### Examples
## Syntax
`LTRIM(string)`
### Parameters
#### string
This is the string expression from which you want to remove leading whitespaces. If the string is `null`, the function will return `null`.
## Examples
```esql
ROW message = " some text ", color = " red "
@ -16,4 +26,4 @@ ROW message = " some text ", color = " red "
ROW text = " example text "
| EVAL trimmed_text = LTRIM(text)
| EVAL formatted_text = CONCAT("Trimmed: '", trimmed_text, "'")
```
```

View file

@ -1,15 +1,29 @@
## MAX
# MAX
The `MAX` function returns the maximum value of a specified field.
The MAX function calculates the maximum value of a specified field.
### Examples
## Syntax
`MAX(field)`
### Parameters
#### field
The field for which the maximum value is to be calculated.
## Examples
Calculate the maximum number of languages known by employees:
```esql
FROM employees
| STATS MAX(languages)
```
The MAX function can be used with inline functions:
```esql
FROM employees
| STATS max_avg_salary_change = MAX(MV_AVG(salary_change))
```
```

View file

@ -1,15 +1,33 @@
## MEDIAN
# MEDIAN
The `MEDIAN` function returns the value that is greater than half of all values and less than half of all values, also known as the 50% PERCENTILE. Like `PERCENTILE`, `MEDIAN` is usually approximate. It is also non-deterministic, meaning you can get slightly different results using the same data.
The MEDIAN function calculates the median value of a numeric field. The median is the value that is greater than half of all values and less than half of all values, also known as the 50% percentile.
### Examples
## Syntax
`MEDIAN(number)`
### Parameters
#### number
The numeric field for which the median is calculated.
## Examples
Calculate the median salary:
```esql
FROM employees
| STATS MEDIAN(salary), PERCENTILE(salary, 50)
| STATS MEDIAN(salary)
```
Calculate the median of the maximum values of a multivalued column:
```esql
FROM employees
| STATS median_max_salary_change = MEDIAN(MV_MAX(salary_change))
```
```
## Limitations
- The MEDIAN function is usually approximate and non-deterministic. This means you can get slightly different results using the same data.

View file

@ -1,15 +1,34 @@
## MEDIAN_ABSOLUTE_DEVIATION
# MEDIAN_ABSOLUTE_DEVIATION
The `MEDIAN_ABSOLUTE_DEVIATION` function returns the median absolute deviation, a measure of variability. It is a robust statistic, meaning that it is useful for describing data that may have outliers, or may not be normally distributed. For such data, it can be more descriptive than standard deviation. It is calculated as the median of each data points deviation from the median of the entire sample. That is, for a random variable X, the median absolute deviation is median(|median(X) - X|). Like `PERCENTILE`, `MEDIAN_ABSOLUTE_DEVIATION` is usually approximate.
The MEDIAN_ABSOLUTE_DEVIATION function calculates the median absolute deviation, a measure of variability. It is particularly useful for describing data that may have outliers or may not follow a normal distribution. In such cases, it can be more descriptive than standard deviation. The function computes the median of each data points deviation from the median of the entire sample.
### Examples
## Syntax
`MEDIAN_ABSOLUTE_DEVIATION(number)`
### Parameters
#### number
The numeric expression for which the median absolute deviation is to be calculated.
## Examples
Calculate the median salary and the median absolute deviation of salaries:
```esql
FROM employees
| STATS MEDIAN(salary), MEDIAN_ABSOLUTE_DEVIATION(salary)
```
Calculate the median absolute deviation of the maximum values of a multivalued column:
```esql
FROM employees
| STATS m_a_d_max_salary_change = MEDIAN_ABSOLUTE_DEVIATION(MV_MAX(salary_change))
```
```
## Limitations
- The `MEDIAN_ABSOLUTE_DEVIATION` function is non-deterministic, which means you can get slightly different results using the same data.
- The `MEDIAN_ABSOLUTE_DEVIATION` function is usually approximate, which means the results may not be exact.

View file

@ -1,15 +1,29 @@
## MIN
# MIN
The `MIN` function returns the minimum value of a specified field.
The MIN function calculates the minimum value of a specified field.
### Examples
## Syntax
`MIN(field)`
### Parameters
#### field
The field for which the minimum value is to be calculated.
## Examples
Calculate the minimum number of languages spoken by employees:
```esql
FROM employees
| STATS MIN(languages)
```
The MIN function can be used with inline functions:
```esql
FROM employees
| STATS min_avg_salary_change = MIN(MV_AVG(salary_change))
```
```

View file

@ -1,8 +1,16 @@
## MV_APPEND
# MV_APPEND
The `MV_APPEND` function concatenates values of two multi-value fields.
MV_APPEND is a function that concatenates the values of two multi-value fields.
### Examples
## Syntax
`MV_APPEND(field1, field2)`
### Parameters
#### field1
The first multi-value field to be concatenated.
```esql
ROW a = ["foo", "bar"], b = ["baz", "qux"]
@ -14,4 +22,4 @@ ROW a = ["foo", "bar"], b = ["baz", "qux"]
ROW x = [1, 2, 3], y = [4, 5, 6]
| EVAL z = MV_APPEND(x, y)
| KEEP x, y, z
```
```

View file

@ -1,8 +1,18 @@
## MV_AVG
# MV_AVG
The `MV_AVG` function converts a multivalued field into a single-valued field containing the average of all the values.
The MV_AVG function calculates the average of all values in a multivalued field and returns a single value.
### Examples
## Syntax
`MV_AVG(number)`
### Parameters
#### number
A multivalued expression.
## Examples
```esql
ROW a=[3, 5, 1, 6]
@ -12,4 +22,4 @@ ROW a=[3, 5, 1, 6]
```esql
ROW scores=[10, 20, 30, 40]
| EVAL average_score = MV_AVG(scores)
```
```

View file

@ -1,15 +1,33 @@
## MV_CONCAT
# MV_CONCAT
Converts a multivalued string expression into a single valued column containing the concatenation of all values separated by a delimiter.
MV_CONCAT is a function that transforms a multivalued string expression into a single valued column. It concatenates all values and separates them with a specified delimiter.
### Examples
## Syntax
`MV_CONCAT(string, delim)`
### Parameters
#### string
A multivalue expression.
#### delim
This is the delimiter that separates the concatenated values.
## Examples
The following example concatenates the values in the array ["foo", "zoo", "bar"] with a comma and a space as the delimiter:
```esql
ROW a=["foo", "zoo", "bar"]
| EVAL j = MV_CONCAT(a, ", ")
```
If you want to concatenate non-string columns, you need to convert them to strings first using the `TO_STRING` function:
```esql
ROW a=[10, 9, 8]
| EVAL j = MV_CONCAT(TO_STRING(a), ", ")
```
```

View file

@ -1,8 +1,18 @@
## MV_COUNT
# MV_COUNT
The `MV_COUNT` function converts a multivalued expression into a single-valued column containing a count of the number of values.
The MV_COUNT function calculates the total number of values in a multivalued expression.
### Examples
## Syntax
`MV_COUNT(field)`
### Parameters
#### field
A multivalued expression.
## Examples
```esql
ROW a=["foo", "zoo", "bar"]
@ -12,4 +22,4 @@ ROW a=["foo", "zoo", "bar"]
```esql
ROW b=["apple", "banana", "cherry", "date"]
| EVAL count_b = MV_COUNT(b)
```
```

View file

@ -1,8 +1,18 @@
## MV_DEDUPE
# MV_DEDUPE
Removes duplicate values from a multivalued field. `MV_DEDUPE` may, but wont always, sort the values in the column.
The MV_DEDUPE function is used to eliminate duplicate values from a multivalued field.
### Examples
## Syntax
`MV_DEDUPE(field)`
### Parameters
#### field
This is a multivalue expression.
## Examples
```esql
ROW a=["foo", "foo", "bar", "foo"]
@ -12,4 +22,8 @@ ROW a=["foo", "foo", "bar", "foo"]
```esql
ROW b=["apple", "apple", "banana", "apple", "banana"]
| EVAL dedupe_b = MV_DEDUPE(b)
```
```
## Notes
While MV_DEDUPE may sort the values in the column, it's not guaranteed to always do so.

View file

@ -1,27 +1,27 @@
## MV_EXPAND
# MV_EXPAND
The `MV_EXPAND` processing command expands multivalued columns into one row per value, duplicating other columns. This command is useful when you need to normalize data that contains multivalued fields, making it easier to perform operations on each individual value.
The MV_EXPAND command is used to expand multivalued columns into individual rows, replicating the other columns for each new row.
### Use Cases
- **Normalization**: Transform multivalued fields into single-valued rows for easier analysis and processing.
- **Data Transformation**: Prepare data for further operations like sorting, filtering, or aggregating by expanding multivalued fields.
- **Data Cleaning**: Simplify complex data structures by breaking down multivalued fields into individual rows.
## Syntax
### Limitations
- This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features.
`MV_EXPAND column`
### Examples
### Parameters
#### Example 1: Basic Expansion
Expanding a multivalued column `a` into individual rows.
#### column
This is the multivalued column that you want to expand.
## Examples
Expanding a multivalued column `a` into individual rows:
```esql
ROW a=[1,2,3], b="b", j=["a","b"]
| MV_EXPAND a
```
#### Example 2: Expanding Multiple Columns
Expanding two multivalued columns `a` and `j` into individual rows.
Expanding two multivalued columns `a` and `j` into individual rows:
```esql
ROW a=[1,2,3], b="b", j=["a","b"]
@ -29,8 +29,7 @@ ROW a=[1,2,3], b="b", j=["a","b"]
| MV_EXPAND j
```
#### Example 3: Combining with Other Commands
Expanding a multivalued column and then filtering the results.
Expanding a multivalued column and then filtering the results:
```esql
ROW a=[1,2,3,4,5], b="b"
@ -38,4 +37,6 @@ ROW a=[1,2,3,4,5], b="b"
| WHERE a > 2
```
These examples demonstrate different ways to use the `MV_EXPAND` command to transform and analyze data with multivalued fields.
## Notes
This feature is currently in technical preview and may be subject to changes or removal in future releases.

View file

@ -1,8 +1,18 @@
## MV_FIRST
# MV_FIRST
The `MV_FIRST` function converts a multivalued expression into a single-valued column containing the first value. This is most useful when reading from a function that emits multivalued columns in a known order like `SPLIT`. The order that multivalued fields are read from underlying storage is not guaranteed. It is frequently ascending, but dont rely on that. If you need the minimum value, use `MV_MIN` instead of `MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isnt a performance benefit to `MV_FIRST`.
The MV_FIRST function converts a multivalued expression into a single valued column containing the first value.
### Examples
## Syntax
`MV_FIRST(field)`
### Parameters
#### field
A multivalue expression.
## Examples
```esql
ROW a="foo;bar;baz"
@ -12,4 +22,8 @@ ROW a="foo;bar;baz"
```esql
ROW b="apple;banana;cherry"
| EVAL first_b = MV_FIRST(SPLIT(b, ";"))
```
```
## Notes
The MV_FIRST function is particularly useful when reading from a function that emits multivalued columns in a known order, such as SPLIT. However, it's important to note that the order in which multivalued fields are read from underlying storage is not guaranteed. While it's often ascending, this should not be relied upon. If you need the minimum value, use the MV_MIN function instead of MV_FIRST. MV_MIN has optimizations for sorted values, so there isn't a performance benefit to MV_FIRST.

View file

@ -1,8 +1,20 @@
## MV_LAST
# MV_LAST
The `MV_LAST` function converts a multivalue expression into a single valued column containing the last value. This is most useful when reading from a function that emits multivalued columns in a known order like `SPLIT`. The order that multivalued fields are read from underlying storage is not guaranteed. It is frequently ascending, but dont rely on that. If you need the maximum value, use `MV_MAX` instead of `MV_LAST`. `MV_MAX` has optimizations for sorted values so there isnt a performance benefit to `MV_LAST`.
The MV_LAST function converts a multivalued expression into a single valued column containing the last value.
### Examples
## Syntax
`MV_LAST(field)`
### Parameters
#### field
A multivalue expression.
## Examples
```esql
ROW a="foo;bar;baz"
@ -12,4 +24,8 @@ ROW a="foo;bar;baz"
```esql
ROW a="apple;banana;cherry"
| EVAL last_fruit = MV_LAST(SPLIT(a, ";"))
```
```
## Notes
The MV_LAST function is particularly useful when reading from a function that emits multivalued columns in a known order, such as SPLIT. However, the order in which multivalued fields are read from underlying storage is not guaranteed. It is often ascending, but this should not be relied upon. If you need the maximum value, use the MV_MAX function instead of MV_LAST. MV_MAX has optimizations for sorted values, so there is no performance benefit to using MV_LAST.

View file

@ -1,15 +1,29 @@
## MV_MAX
# MV_MAX
The `MV_MAX` function converts a multivalued expression into a single valued column containing the maximum value.
MV_MAX function converts a multivalued expression into a single valued column containing the maximum value.
### Examples
## Syntax
`MV_MAX(field)`
### Parameters
#### field
A multivalue expression.
## Examples
The following example demonstrates the use of MV_MAX function:
```esql
ROW a=[3, 5, 1]
| EVAL max_a = MV_MAX(a)
```
MV_MAX function can be used with any column type, including `keyword` columns. In such cases, it selects the last string, comparing their utf-8 representation byte by byte:
```esql
ROW a=["foo", "zoo", "bar"]
| EVAL max_a = MV_MAX(a)
```
```

View file

@ -1,17 +1,27 @@
## MV_MEDIAN
# MV_MEDIAN
The `MV_MEDIAN` function converts a multivalued field into a single valued field containing the median value.
The MV_MEDIAN function converts a multivalued field into a single valued field containing the median value.
### Examples
## Syntax
`MV_MEDIAN(number)`
### Parameters
#### number
A multivalue expression.
## Examples
```esql
ROW a=[3, 5, 1]
| EVAL median_a = MV_MEDIAN(a)
```
If the row has an even number of values for a column, the result will be the average of the middle two entries. If the column is not floating point, the average rounds down:
If the row has an even number of values for a column, the result will be the average of the middle two entries. If the column is not floating point, the average rounds **down**:
```esql
ROW a=[3, 7, 1, 6]
| EVAL median_a = MV_MEDIAN(a)
```
```

View file

@ -1,8 +1,18 @@
## MV_MIN
# MV_MIN
The `MV_MIN` function converts a multivalued expression into a single valued column containing the minimum value.
The MV_MIN function converts a multivalued expression into a single valued column containing the minimum value.
### Examples
## Syntax
`MV_MIN(field)`
### Parameters
#### field
This is a multivalue expression.
## Examples
```esql
ROW a=[2, 1]
@ -12,4 +22,4 @@ ROW a=[2, 1]
```esql
ROW a=["foo", "bar"]
| EVAL min_a = MV_MIN(a)
```
```

View file

@ -1,8 +1,22 @@
## MV_PSERIES_WEIGHTED_SUM
# MV_PSERIES_WEIGHTED_SUM
Converts a multivalued expression into a single-valued column by multiplying every element on the input list by its corresponding term in P-Series and computing the sum.
The MV_PSERIES_WEIGHTED_SUM function transforms a multivalued expression into a single-valued column. It does this by multiplying each element in the input list by its corresponding term in a P-Series and then calculating the sum.
### Examples
## Syntax
`MV_PSERIES_WEIGHTED_SUM(number, p)`
### Parameters
#### number
This is a multivalue expression.
#### p
A number that represents the *p* parameter in the P-Series. It influences the contribution of each element to the weighted sum.
## Examples
```esql
ROW a = [70.0, 45.0, 21.0, 21.0, 21.0]
@ -14,4 +28,4 @@ ROW a = [70.0, 45.0, 21.0, 21.0, 21.0]
ROW b = [10.0, 20.0, 30.0, 40.0, 50.0]
| EVAL weighted_sum = MV_PSERIES_WEIGHTED_SUM(b, 2.0)
| KEEP weighted_sum
```
```

View file

@ -1,8 +1,26 @@
## MV_SLICE
# MV_SLICE
The `MV_SLICE` function returns a subset of the multivalued field using the start and end index values.
The MV_SLICE function is used to extract a subset of a multivalued field using specified start and end index values.
### Examples
## Syntax
`MV_SLICE(field, start, end)`
### Parameters
#### field
This is a multivalue expression. If `null`, the function will return `null`.
#### start
This is the start position. If `null`, the function will return `null`. The start argument can be negative, where an index of -1 is used to specify the last value in the list.
#### end
This is the end position (included). This parameter is optional; if omitted, the position at `start` is returned. The end argument can be negative, where an index of -1 is used to specify the last value in the list.
## Examples
```esql
ROW a = [1, 2, 2, 3]
@ -12,4 +30,4 @@ ROW a = [1, 2, 2, 3]
```esql
ROW a = [1, 2, 2, 3]
| EVAL a1 = MV_SLICE(a, -2), a2 = MV_SLICE(a, -3, -1)
```
```

View file

@ -1,15 +1,35 @@
## MV_SORT
# MV_SORT
The `MV_SORT` function sorts a multivalued field in lexicographical order. The valid options for the sort order are `ASC` (ascending) and `DESC` (descending), with the default being `ASC`.
The MV_SORT function sorts a multivalued field in lexicographical order.
### Examples
## Syntax
`MV_SORT(field, order)`
### Parameters
#### field
This is a multivalue expression. If the value is `null`, the function will return `null`.
#### order
This parameter determines the sort order. The valid options are `ASC` and `DESC`. If not specified, the default is `ASC`.
## Examples
Without order parameter
```esql
ROW names = ["Alice", "Bob", "Charlie"]
| EVAL sorted_names = mv_sort(names)
```
With order parameter
```esql
ROW a = [4, 2, -3, 2]
| EVAL sa = mv_sort(a), sd = mv_sort(a, "DESC")
```
```esql
ROW names = ["Alice", "Bob", "Charlie"]
| EVAL sorted_names = mv_sort(names)
```

View file

@ -1,8 +1,18 @@
## MV_SUM
# MV_SUM
The `MV_SUM` function converts a multivalued field into a single valued field containing the sum of all of the values.
The MV_SUM function converts a multivalued field into a single valued field containing the sum of all the values.
### Examples
## Syntax
`MV_SUM(number)`
### Parameters
#### number
This is a multivalue expression.
## Examples
```esql
ROW a=[3, 5, 6]
@ -12,4 +22,4 @@ ROW a=[3, 5, 6]
```esql
ROW numbers=[1, 2, 3, 4, 5]
| EVAL total_sum = MV_SUM(numbers)
```
```

View file

@ -1,17 +1,37 @@
## MV_ZIP
# MV_ZIP
The `MV_ZIP` function combines the values from two multivalued fields with a delimiter that joins them together.
The MV_ZIP function combines the values from two multivalued fields with a specified delimiter.
### Examples
## Syntax
`MV_ZIP(string1, string2, delim)`
### Parameters
#### string1
A multivalue expression.
#### string2
A multivalue expression.
#### delim
An optional parameter that specifies the delimiter used to join the values. If omitted, a comma (`,`) is used as the default delimiter.
## Examples
The following example demonstrates how to use the MV_ZIP function:
```esql
ROW a = ["x", "y", "z"], b = ["1", "2"]
| EVAL c = mv_zip(a, b, "-")
| EVAL c = MV_ZIP(a, b, "-")
| KEEP a, b, c
```
```esql
ROW names = ["Alice", "Bob", "Charlie"], ids = ["001", "002", "003"]
| EVAL combined = mv_zip(names, ids, ":")
| EVAL combined = MV_ZIP(names, ids, ":")
| KEEP names, ids, combined
```
```

View file

@ -1,8 +1,16 @@
## NOW
# NOW
The `NOW` function returns the current date and time.
The NOW function returns the current date and time.
### Examples
## Syntax
`NOW()`
### Parameters
This function does not require any parameters.
## Examples
```esql
ROW current_date = NOW()
@ -11,4 +19,4 @@ ROW current_date = NOW()
```esql
FROM sample_data
| WHERE @timestamp > NOW() - 1 hour
```
```

View file

@ -1,204 +1,224 @@
# ES|QL Operators
This document provides an overview of the operators supported by ES|QL.
## Binary Operators
### Equality (`==`)
Check if two fields are equal. If either field is multivalued, the result is null. This is pushed to the underlying search index if one side of the comparison is constant and the other side is a field in the index that has both an index and doc_values.
### Equality `==`
The equality operator checks if the values of two operands are equal or not.
Example:
#### Example:
```esql
FROM employees
| WHERE first_name == "John"
| KEEP first_name, last_name
| WHERE emp_no == 10001
```
### Inequality (`!=`)
Check if two fields are unequal. If either field is multivalued, the result is null. This is pushed to the underlying search index if one side of the comparison is constant and the other side is a field in the index that has both an index and doc_values.
### Inequality `!=`
The inequality operator checks if the values of two operands are equal or not.
Example:
#### Example:
```esql
FROM employees
| WHERE first_name != "John"
| KEEP first_name, last_name
| WHERE emp_no != 10001
```
### Less than (`<`)
Check if one field is less than another. If either field is multivalued, the result is null. This is pushed to the underlying search index if one side of the comparison is constant and the other side is a field in the index that has both an index and doc_values.
### Less Than `<`
The less than operator checks if the value of the left operand is less than the value of the right operand.
Example:
#### Example:
```esql
FROM employees
| WHERE age < 30
| KEEP first_name, last_name, age
| WHERE salary < 50000
```
### Less than or equal to (`<=`)
Check if one field is less than or equal to another. If either field is multivalued, the result is null. This is pushed to the underlying search index if one side of the comparison is constant and the other side is a field in the index that has both an index and doc_values.
### Less Than or Equal To `<=`
This operator checks if the value of the left operand is less than or equal to the value of the right operand.
Example:
#### Example:
```esql
FROM employees
| WHERE age <= 30
| KEEP first_name, last_name, age
| WHERE salary <= 50000
```
### Greater than (`>`)
Check if one field is greater than another. If either field is multivalued, the result is null. This is pushed to the underlying search index if one side of the comparison is constant and the other side is a field in the index that has both an index and doc_values.
### Greater Than `>`
The greater than operator checks if the value of the left operand is greater than the value of the right operand.
Example:
#### Example:
```esql
FROM employees
| WHERE age > 30
| KEEP first_name, last_name, age
| WHERE salary > 50000
```
### Greater than or equal to (`>=`)
Check if one field is greater than or equal to another. If either field is multivalued, the result is null. This is pushed to the underlying search index if one side of the comparison is constant and the other side is a field in the index that has both an index and doc_values.
### Greater Than or Equal To `>=`
This operator checks if the value of the left operand is greater than or equal to the value of the right operand.
Example:
#### Example:
```esql
FROM employees
| WHERE age >= 30
| KEEP first_name, last_name, age
| WHERE salary >= 50000
```
### Add (`+`)
Add two numbers together. If either field is multivalued, the result is null.
### Add `+`
The add operator adds the values of the operands.
Example:
#### Example:
```esql
FROM employees
| EVAL total_salary = base_salary + bonus
| KEEP first_name, last_name, total_salary
| EVAL total_compensation = salary + bonus
```
### Subtract (`-`)
Subtract one number from another. If either field is multivalued, the result is null.
### Subtract `-`
The subtract operator subtracts the right-hand operand from the left-hand operand.
Example:
#### Example:
```esql
FROM employees
| EVAL net_salary = gross_salary - tax
| KEEP first_name, last_name, net_salary
| EVAL remaining_salary = salary - tax
```
### Multiply (`*`)
Multiply two numbers together. If either field is multivalued, the result is null.
### Multiply `*`
The multiply operator multiplies the values of the operands.
Example:
#### Example:
```esql
FROM employees
| EVAL annual_salary = monthly_salary * 12
| KEEP first_name, last_name, annual_salary
| EVAL yearly_salary = salary * 12
```
### Divide (`/`)
Divide one number by another. If either field is multivalued, the result is null. Division of two integer types will yield an integer result, rounding towards 0. If you need floating point division, cast one of the arguments to a `DOUBLE`.
### Divide `/`
The divide operator divides the left-hand operand by the right-hand operand.
Example:
#### Example:
```esql
FROM employees
| EVAL average_salary = total_salary / months_worked
| KEEP first_name, last_name, average_salary
| EVAL monthly_salary = salary / 12
```
### Modulus (`%`)
Divide one number by another and return the remainder. If either field is multivalued, the result is null.
### Modulus `%`
The modulus operator returns the remainder of the division of the left operand by the right operand.
Example:
#### Example:
```esql
FROM employees
| EVAL remainder = total_days % 7
| KEEP first_name, last_name, remainder
| EVAL remainder = salary % 12
```
## Unary Operators
### Negation (`-`)
The only unary operator is negation.
#### Example:
Example:
```esql
FROM employees
| EVAL negative_salary = -salary
| KEEP first_name, last_name, negative_salary
```
## Logical Operators
### AND
Logical AND operator.
#### Example:
Example:
```esql
FROM employees
| WHERE age > 30 AND department == "Engineering"
| KEEP first_name, last_name, age, department
| WHERE salary > 50000 AND bonus > 10000
```
### OR
Logical OR operator.
#### Example:
Example:
```esql
FROM employees
| WHERE age > 30 OR department == "Engineering"
| KEEP first_name, last_name, age, department
| WHERE salary > 50000 OR bonus > 10000
```
### NOT
Logical NOT operator.
#### Example:
Example:
```esql
FROM employees
| WHERE NOT (age > 30)
| KEEP first_name, last_name, age
| WHERE NOT (salary > 50000)
```
## Other Operators
### IS NULL and IS NOT NULL
For NULL comparison, use the `IS NULL` and `IS NOT NULL` predicates.
#### Example:
```esql
FROM employees
| WHERE birth_date IS NULL
| KEEP first_name, last_name
| SORT first_name
| LIMIT 3
```
The `IS NULL` operator returns true if the value is null.
Example:
```esql
FROM employees
| WHERE is_rehired IS NOT NULL
| STATS COUNT(emp_no)
| WHERE manager IS NULL
```
### Cast (`::`)
The `::` operator provides a convenient alternative syntax to the `TO_<type>` conversion functions.
The `IS NOT NULL` operator returns true if the value is not null.
Example:
#### Example:
```esql
ROW ver = CONCAT(("0"::INT + 1)::STRING, ".2.3")::VERSION
FROM employees
| WHERE manager IS NOT NULL
```
### IN
The `IN` operator allows testing whether a field or expression equals an element in a list of literals, fields, or expressions.
#### Example:
The `IN` operator checks if a value is within a set of values (literals, fields or expressions).
Example:
```esql
FROM employees
| WHERE department IN ("Sales", "Marketing", "HR")
```
```esql
ROW a = 1, b = 4, c = 3
| WHERE c-a IN (3, b / 2, a)
```
### LIKE
Use `LIKE` to filter data based on string patterns using wildcards. The following wildcard characters are supported:
Use `LIKE` to filter data based on string patterns using wildcards.
The following wildcard characters are supported:
- `*` matches zero or more characters.
- `?` matches one character.
#### Example:
Example:
```esql
FROM employees
| WHERE first_name LIKE "?b*"
@ -206,11 +226,24 @@ FROM employees
```
### RLIKE
Use `RLIKE` to filter data based on string patterns using regular expressions.
#### Example:
Example:
```esql
FROM employees
| WHERE first_name RLIKE ".leja.*"
| KEEP first_name, last_name
```
```
### Cast `::`
The `::` operator provides a convenient alternative syntax to the `TO_<type>` conversion functions.
Example:
```esql
FROM employees
| EVAL salary = salary::double
```

View file

@ -1,19 +1,13 @@
## Overview
## ES|QL Overview
### ES|QL
The Elasticsearch Query Language (ES|QL) provides a powerful way to filter, transform, and analyze data stored in Elasticsearch, and in the future in other runtimes. It is designed to be easy to learn and use by end users, SRE teams, application developers, and administrators.
The Elasticsearch Query Language (ES|QL) provides a powerful way to filter, transform, and analyze data stored in Elasticsearch. It is designed to be easy to learn and use by all types of end users.
Users can author ES|QL queries to find specific events, perform statistical analysis, and generate visualizations. It supports a wide range of commands and functions that enable users to perform various data operations, such as filtering, aggregation, time-series analysis, and more.
ES|QL makes use of "pipes" (`|`) to manipulate and transform data in a step-by-step fashion. This approach allows users to compose a series of operations, where the output of one operation becomes the input for the next, enabling complex data transformations and analysis.
### The ES|QL Compute Engine
ES|QL is more than a language: it represents a significant investment in new compute capabilities within Elasticsearch. To achieve both the functional and performance requirements for ES|QL, it was necessary to build an entirely new compute architecture. ES|QL search, aggregation, and transformation functions are directly executed within Elasticsearch itself. Query expressions are not transpiled to Query DSL for execution. This approach allows ES|QL to be extremely performant and versatile.
The new ES|QL execution engine was designed with performance in mind — it operates on blocks at a time instead of per row, targets vectorization and cache locality, and embraces specialization and multi-threading. It is a separate component from the existing Elasticsearch aggregation framework with different performance characteristics.
### Known Limitations
#### Result Set Size Limit
@ -79,7 +73,7 @@ ES|QL only supports the UTC timezone.
### Cross-Cluster Querying
Using ES|QL across clusters allows you to execute a single query across multiple clusters. This feature is in technical preview and may be changed or removed in a future release.
Using ES|QL across clusters allows you to execute a single query across multiple clusters. This feature is in technical preview and may be changed or removed in a future release.
#### Prerequisites
@ -98,7 +92,7 @@ FROM cluster_one:my-index-000001
### Using ES|QL in Kibana
ES|QL can be used in Kibana to query and aggregate data, create visualizations, and set up alerts.
ES|QL can be used in Kibana to query and aggregate data, create visualizations, and set up alerts.
#### Important Information
@ -106,39 +100,3 @@ ES|QL can be used in Kibana to query and aggregate data, create visualizations,
- The query bar in Discover allows you to write and execute ES|QL queries.
- The results table shows up to 10,000 rows, and Discover shows no more than 50 columns.
- You can create visualizations and alerts based on ES|QL queries.
### Using the REST API
The ES|QL query API allows you to execute ES|QL queries via REST API.
#### Example
```javascript
const response = await client.esql.query({
query: `
FROM library
| EVAL year = DATE_TRUNC(1 YEARS, release_date)
| STATS MAX(page_count) BY year
| SORT year
| LIMIT 5
`,
});
console.log(response);
```
#### Request
`POST /_query`
#### Request Body
- `query` (Required): The ES|QL query to run.
- `format` (Optional): Format for the response.
- `params` (Optional): Values for parameters in the query.
- `profile` (Optional): If `true`, includes a `profile` object with information about query execution.
#### Response
- `columns`: Column `name` and `type` for each column returned in `values`.
- `rows`: Values for the search results.
- `profile`: Profile describing the execution of the query (if `profile` was sent in the request).

View file

@ -1,8 +1,22 @@
## PERCENTILE
# PERCENTILE
The `PERCENTILE` function returns the value at which a certain percentage of observed values occur. For example, the 95th percentile is the value which is greater than 95% of the observed values and the 50th percentile is the MEDIAN.
The PERCENTILE function calculates the value at a specified percentile of observed values.
### Examples
## Syntax
`PERCENTILE(number, percentile)`
### Parameters
#### number
The numeric expression that represents the set of values to be analyzed.
#### percentile
The percentile to compute. The value should be between 0 and 100.
## Examples
```esql
FROM employees
@ -14,13 +28,8 @@ FROM employees
| STATS p80_max_salary_change = PERCENTILE(MV_MAX(salary_change), 80)
```
PERCENTILE is usually approximate. There are many different algorithms to calculate percentiles. The naive implementation simply stores all the values in a sorted array. To find the 50th percentile, you simply find the value that is at `my_array[count(my_array) * 0.5]`. Clearly, the naive implementation does not scalethe sorted array grows linearly with the number of values in your dataset. To calculate percentiles across potentially billions of values in an Elasticsearch cluster, approximate percentiles are calculated. The algorithm used by the percentile metric is called TDigest (introduced by Ted Dunning in Computing Accurate Quantiles using T-Digests).
## Notes
When using this metric, there are a few guidelines to keep in mind:
- Accuracy is proportional to q(1-q). This means that extreme percentiles (e.g. 99%) are more accurate than less extreme percentiles, such as the median.
- For small sets of values, percentiles are highly accurate (and potentially 100% accurate if the data is small enough).
- As the quantity of values in a bucket grows, the algorithm begins to approximate the percentiles. It is effectively trading accuracy for memory savings. The exact level of inaccuracy is difficult to generalize, since it depends on your data distribution and volume of data being aggregated.
- PERCENTILE is usually approximate.
The following chart shows the relative error on a uniform distribution depending on the number of collected values and the requested percentile. It shows how precision is better for extreme percentiles. The reason why error diminishes for a large number of values is that the law of large numbers makes the distribution of values more and more uniform and the t-digest tree can do a better job at summarizing it. It would not be the case on more skewed distributions.
PERCENTILE is also non-deterministic. This means you can get slightly different results using the same data.
- PERCENTILE is also non-deterministic. This means you can get slightly different results using the same data.

View file

@ -1,8 +1,16 @@
## PI
# PI
The `PI` function returns Pi, the ratio of a circles circumference to its diameter.
The PI function returns the mathematical constant Pi, which is the ratio of a circle's circumference to its diameter.
### Examples
## Syntax
`PI()`
### Parameters
This function does not require any parameters.
## Examples
```esql
ROW PI()
@ -12,4 +20,4 @@ ROW PI()
FROM employees
| EVAL pi_value = PI()
| KEEP pi_value
```
```

View file

@ -1,8 +1,22 @@
## POW
# POW
The `POW` function returns the value of a base raised to the power of an exponent. It is still possible to overflow a double result here; in that case, null will be returned.
The POW function calculates the value of a base number raised to the power of an exponent number.
### Examples
## Syntax
`POW(base, exponent)`
### Parameters
#### base
This is a numeric expression for the base.
#### exponent
This is a numeric expression for the exponent.
## Examples
```esql
ROW base = 2.0, exponent = 2
@ -12,4 +26,4 @@ ROW base = 2.0, exponent = 2
```esql
ROW base = 4, exponent = 0.5
| EVAL s = POW(base, exponent)
```
```

View file

@ -1,37 +1,35 @@
## RENAME
# RENAME
The `RENAME` processing command in ES|QL is used to rename one or more columns in a dataset. This command is particularly useful when you need to standardize column names, make them more readable, or avoid conflicts with existing column names. If a column with the new name already exists, it will be replaced by the new column. If multiple columns are renamed to the same name, all but the rightmost column with the same new name are dropped.
The RENAME command is used to change the names of one or more columns in a table.
### Examples
Here are some example ES|QL queries using the `RENAME` command:
1. **Renaming a single column:**
```esql
FROM employees
| KEEP first_name, last_name, still_hired
| RENAME still_hired AS employed
```
2. **Renaming multiple columns in a single command:**
```esql
FROM employees
| KEEP first_name, last_name
| RENAME first_name AS fn, last_name AS ln
```
### Syntax
## Syntax
`RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN]`
### Parameters
- **old_nameX**: The name of a column you want to rename.
- **new_nameX**: The new name of the column. If it conflicts with an existing column name, the existing column is dropped. If multiple columns are renamed to the same name, all but the rightmost column with the same new name are dropped.
#### old_nameX
### Limitations
This is the current name of the column that you want to rename.
- If a column with the new name already exists, it will be replaced by the new column.
- If multiple columns are renamed to the same name, all but the rightmost column with the same new name are dropped.
#### new_nameX
This is the new name that you want to assign to the column. If a column with the new name already exists, the existing column will be replaced. If multiple columns are renamed to the same name, all but the rightmost column with the same new name will be dropped.
## Examples
The following example renames the column "still_hired" to "employed":
```esql
FROM employees
| KEEP first_name, last_name, still_hired
| RENAME still_hired AS employed
```
You can rename multiple columns with a single RENAME command:
```esql
FROM employees
| KEEP first_name, last_name
| RENAME first_name AS fn, last_name AS ln
```

View file

@ -1,8 +1,22 @@
## REPEAT
# REPEAT
The `REPEAT` function returns a string constructed by concatenating the input string with itself the specified number of times.
The REPEAT function generates a string by repeating a specified string a certain number of times.
### Examples
## Syntax
`REPEAT(string, number)`
### Parameters
#### string
The string that you want to repeat.
#### number
The number of times you want to repeat the string.
## Examples
```esql
ROW a = "Hello!"
@ -12,4 +26,4 @@ ROW a = "Hello!"
```esql
ROW greeting = "Hi"
| EVAL repeated_greeting = REPEAT(greeting, 5)
```
```

View file

@ -1,8 +1,28 @@
## REPLACE
# REPLACE
The `REPLACE` function substitutes in the string `str` any match of the regular expression `regex` with the replacement string `newStr`.
The REPLACE function substitutes any match of a regular expression within a string with a replacement string.
### Examples
## Syntax
`REPLACE(string, regex, newString)`
### Parameters
#### string
The string expression where the replacement will occur.
#### regex
The regular expression that will be matched in the string.
#### newString
The string that will replace the matched regular expression in the string.
## Examples
The following example replaces any occurrence of the word "World" with the word "Universe":
```esql
ROW str = "Hello World"
@ -16,4 +36,4 @@ Another example could be replacing digits in a string with a specific character:
ROW str = "User123"
| EVAL str = REPLACE(str, "\\d", "*")
| KEEP str
```
```

View file

@ -1,8 +1,24 @@
## RIGHT
# RIGHT
The `RIGHT` function returns a substring that extracts a specified number of characters from a string, starting from the right.
The RIGHT function extracts a specified number of characters from the end of a string.
### Examples
## Syntax
`RIGHT(string, length)`
### Parameters
#### string
The string from which a substring is to be returned.
#### length
The number of characters to return from the end of the string.
## Examples
The following example extracts the last three characters from the `last_name` field:
```esql
FROM employees
@ -16,4 +32,4 @@ FROM employees
ROW full_name = "John Doe"
| EVAL last_part = RIGHT(full_name, 4)
| KEEP last_part
```
```

View file

@ -1,8 +1,24 @@
## ROUND
# ROUND
The `ROUND` function rounds a number to the specified number of decimal places. By default, it rounds to 0 decimal places, which returns the nearest integer. If the precision is a negative number, it rounds to the number of digits left of the decimal point. If the input value is null, the function returns null.
The ROUND function rounds a numeric value to a specified number of decimal places.
### Examples
## Syntax
`ROUND(number, decimals)`
### Parameters
#### number
The numeric value to be rounded.
#### decimals
The number of decimal places to which the number should be rounded. The default value is 0.
## Examples
The following example rounds the height of employees to one decimal place after converting it from meters to feet:
```esql
FROM employees
@ -14,4 +30,8 @@ FROM employees
FROM sales
| KEEP product_name, revenue
| EVAL rounded_revenue = ROUND(revenue, -2)
```
```
## Notes
If "decimals" is a negative number, the ROUND function rounds to the number of digits left of the decimal point.

View file

@ -1,10 +1,22 @@
## ROW
# ROW
The `ROW` source command produces a row with one or more columns with values that you specify. This can be useful for testing. The command allows you to create a row with specified column names and values, which can be literals, expressions, or functions. In case of duplicate column names, only the rightmost duplicate creates a column.
The ROW command is used to generate a row with one or more columns with specified values. This can be particularly useful for testing purposes.
### Examples
## Syntax
Here are some example ES|QL queries using the `ROW` command:
`ROW column1 = value1[, ..., columnN = valueN]`
### Parameters
#### {column name}
This is the name of the column. If there are duplicate column names, only the rightmost duplicate will create a column.
#### {value}
This is the value for the column. It can be a literal, an expression, or a function.
## Examples
1. Creating a row with simple literal values:
```esql
@ -29,4 +41,4 @@ ROW x = 5, y = [3, 4], z = TO_STRING(123)
5. Using nested functions within a row:
```esql
ROW a = ABS(-10), b = CONCAT("Hello", " ", "World"), c = TO_BOOLEAN("true")
```
```

View file

@ -1,8 +1,20 @@
## RTRIM
# RTRIM
Removes trailing whitespaces from a string.
The RTRIM function is used to remove trailing whitespaces from a string.
### Examples
## Syntax
`RTRIM(string)`
### Parameters
#### string
This is the string expression from which trailing whitespaces will be removed.
## Examples
The following example demonstrates how to use the RTRIM function:
```esql
ROW message = " some text ", color = " red "
@ -10,4 +22,4 @@ ROW message = " some text ", color = " red "
| EVAL color = RTRIM(color)
| EVAL message = CONCAT("'", message, "'")
| EVAL color = CONCAT("'", color, "'")
```
```

View file

@ -1,24 +1,21 @@
## SHOW
# SHOW
The `SHOW` source command returns information about the deployment and its capabilities. This command is useful for retrieving metadata about the Elasticsearch deployment, such as the version, build date, and hash. It is particularly helpful for administrators and developers who need to verify the deployment details or troubleshoot issues. The `SHOW` command has a limitation in that it can only be used with the `INFO` item.
The SHOW command retrieves details about the deployment and its capabilities.
### Examples
## Syntax
Here are some example ES|QL queries using the `SHOW` command:
`SHOW item`
1. Retrieve the deployments version, build date, and hash:
```esql
### Parameters
#### item
The only acceptable value is `INFO`.
## Examples
Retrieve the deployments version, build date, and hash:
```esql
SHOW INFO
```
2. Use the `SHOW` command in a multi-line query for better readability:
```esql
SHOW INFO
```
3. Another example of using the `SHOW` command to get deployment information:
```esql
SHOW INFO
```
These examples demonstrate the primary usage of the `SHOW` command to retrieve deployment information.

View file

@ -1,8 +1,18 @@
## SIGNUM
# SIGNUM
The `SIGNUM` function returns the sign of the given number. It returns -1 for negative numbers, 0 for 0, and 1 for positive numbers.
The SIGNUM function returns the sign of a given number. It outputs `-1` for negative numbers, `0` for `0`, and `1` for positive numbers.
### Examples
## Syntax
`SIGNUM(number)`
### Parameters
#### number
A numeric expression.
## Examples
```esql
ROW d = 100.0
@ -12,4 +22,4 @@ ROW d = 100.0
```esql
ROW d = -50.0
| EVAL s = SIGNUM(d)
```
```

View file

@ -1,8 +1,18 @@
## SIN
# SIN
The `SIN` function returns the sine trigonometric function of an angle, expressed in radians. If the input angle is null, the function returns null.
The SIN function calculates the sine of a given angle.
### Examples
## Syntax
`SIN(angle)`
### Parameters
#### angle
The angle for which the sine value is to be calculated. The angle should be in radians.
## Examples
```esql
ROW a=1.8
@ -12,4 +22,4 @@ ROW a=1.8
```esql
ROW angle=0.5
| EVAL sine_value = SIN(angle)
```
```

View file

@ -1,15 +1,25 @@
## SINH
# SINH
The `SINH` function returns the hyperbolic sine of an angle.
The SINH function calculates the hyperbolic sine of a given angle.
### Examples
## Syntax
`SINH(angle)`
### Parameters
#### angle
The angle in radians for which the hyperbolic sine is to be calculated. If the parameter is null, the function will return null.
## Examples
```esql
ROW a=1.8
| EVAL sinh = SINH(a)
| EVAL sinh=SINH(a)
```
```esql
ROW angle=0.5
| EVAL hyperbolic_sine = SINH(angle)
```
```

View file

@ -1,21 +1,20 @@
## SORT
# SORT
The `SORT` processing command in ES|QL is used to sort a table based on one or more columns. This command is essential for organizing data in a specific order, which can be particularly useful for reporting, data analysis, and visualization. The default sort order is ascending, but you can specify descending order using `DESC`. Additionally, you can handle null values explicitly by using `NULLS FIRST` or `NULLS LAST`.
The SORT command is used to arrange a table based on one or more columns.
### Use Cases
- **Organizing Data**: Sort data to make it easier to read and analyze.
- **Reporting**: Generate reports where data needs to be presented in a specific order.
- **Data Analysis**: Facilitate data analysis by sorting data based on key metrics.
- **Visualization**: Prepare data for visualizations that require sorted input.
## Syntax
### Limitations
- **Multivalued Columns**: When sorting on multivalued columns, the lowest value is used for ascending order and the highest value for descending order.
- **Null Values**: By default, null values are treated as larger than any other value. This can be changed using `NULLS FIRST` or `NULLS LAST`.
`SORT column1 [ASC/DESC][NULLS FIRST/NULLS LAST][, ..., columnN [ASC/DESC][NULLS FIRST/NULLS LAST]]`
### Examples
### Parameters
#### Basic Sorting
Sort the `employees` table by the `height` column in ascending order:
#### columnX
The column on which the sorting is to be performed.
## Examples
Sort a table based on the 'height' column:
```esql
FROM employees
@ -23,8 +22,7 @@ FROM employees
| SORT height
```
#### Explicit Ascending Order
Sort the `employees` table by the `height` column in descending order:
Explicitly sort in ascending order with `ASC`:
```esql
FROM employees
@ -32,8 +30,7 @@ FROM employees
| SORT height DESC
```
#### Multiple Sort Expressions
Sort the `employees` table by the `height` column in descending order and use `first_name` as a tie breaker in ascending order:
Provide additional sort expressions to act as tie breakers:
```esql
FROM employees
@ -41,8 +38,7 @@ FROM employees
| SORT height DESC, first_name ASC
```
#### Sorting Null Values First
Sort the `employees` table by the `first_name` column in ascending order, placing null values first:
Sort `null` values first using `NULLS FIRST`:
```esql
FROM employees
@ -50,4 +46,21 @@ FROM employees
| SORT first_name ASC NULLS FIRST
```
These examples demonstrate the versatility of the `SORT` command in organizing data for various analytical and reporting needs.
## Notes
If SORT is used right after a KEEP command, make sure it only uses column names in KEEP,
or move the SORT before the KEEP, e.g.
- not correct: KEEP date | SORT @timestamp,
- correct: SORT @timestamp | KEEP date)
By default, the sorting order is ascending. You can specify an explicit sort order by using `ASC` for ascending or `DESC` for descending.
If two rows have the same sort key, they are considered equal. You can provide additional sort expressions to act as tie breakers.
When sorting on multivalued columns, the lowest value is used when sorting in ascending order and the highest value is used when sorting in descending order.
By default, `null` values are treated as being larger than any other value. This means that with an ascending sort order, `null` values are sorted last, and with a descending sort order, `null` values are sorted first. You can change this by providing `NULLS FIRST` or `NULLS LAST`.
## Limitations
- **Multivalued Columns**: When sorting on multivalued columns, the lowest value is used for ascending order and the highest value for descending order.
- **Null Values**: By default, null values are treated as larger than any other value. This can be changed using `NULLS FIRST` or `NULLS LAST`.

View file

@ -1,8 +1,22 @@
## SPLIT
# SPLIT
The `SPLIT` function splits a single-valued string into multiple strings based on a specified delimiter.
The SPLIT function is used to divide a single string into multiple strings.
### Examples
## Syntax
`SPLIT(string, delim)`
### Parameters
#### string
This is the string expression that you want to split.
#### delim
This is the delimiter used to split the string. Currently, only single byte delimiters are supported.
## Examples
```esql
ROW words="foo;bar;baz;qux;quux;corge"
@ -12,4 +26,4 @@ ROW words="foo;bar;baz;qux;quux;corge"
```esql
ROW sentence="hello world;this is ES|QL"
| EVAL words = SPLIT(sentence, " ")
```
```

View file

@ -1,8 +1,19 @@
## SQRT
# SQRT
The `SQRT` function returns the square root of a number. The input can be any numeric value, and the return value is always a double. Square roots of negative numbers and infinities are null.
The SQRT function calculates the square root of a given number.
## Syntax
`SQRT(number)`
### Parameters
#### number
This is a numeric expression.
## Examples
### Examples
```esql
ROW d = 100.0
@ -13,4 +24,4 @@ ROW d = 100.0
FROM employees
| KEEP first_name, last_name, height
| EVAL sqrt_height = SQRT(height)
```
```

View file

@ -1,8 +1,20 @@
## ST_CENTROID_AGG
# ST_CENTROID_AGG
The `ST_CENTROID_AGG` function calculates the spatial centroid over a field with spatial point geometry type.
The ST_CENTROID_AGG function calculates the spatial centroid over a field with spatial point geometry type.
### Examples
## Syntax
`ST_CENTROID_AGG(field)`
### Parameters
#### field
The field parameter represents the column that contains the spatial point geometry data.
## Examples
Here is an example of how to use the ST_CENTROID_AGG function:
```esql
FROM airports
@ -12,4 +24,4 @@ FROM airports
```esql
FROM city_boundaries
| STATS city_centroid = ST_CENTROID_AGG(boundary)
```
```

View file

@ -1,8 +1,22 @@
## ST_CONTAINS
# ST_CONTAINS
Returns whether the first geometry contains the second geometry. This is the inverse of the `ST_WITHIN` function.
The ST_CONTAINS function determines if the first specified geometry encompasses the second one. This function is the inverse of the ST_WITHIN function.
### Examples
## Syntax
`ST_CONTAINS(geomA, geomB)`
### Parameters
#### geomA
This is an expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`.
#### geomB
This is an expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`.
## Examples
```esql
FROM airport_city_boundaries
@ -14,4 +28,8 @@ FROM airport_city_boundaries
FROM regions
| WHERE ST_CONTAINS(region_boundary, TO_GEOSHAPE("POLYGON((30 10, 40 40, 20 40, 10 20, 30 10))"))
| KEEP region_name, region_code, region_boundary
```
```
## Limitations
It's important to note that the second parameter must have the same coordinate system as the first. Therefore, it's not possible to combine `geo_*` and `cartesian_*` parameters.

View file

@ -1,8 +1,22 @@
## ST_DISJOINT
# ST_DISJOINT
The `ST_DISJOINT` function returns whether two geometries or geometry columns are disjoint. This is the inverse of the `ST_INTERSECTS` function. In mathematical terms: `ST_Disjoint(A, B) ⇔ A ⋂ B = ∅`.
The ST_DISJOINT function checks if two geometries or geometry columns are disjoint, meaning they do not intersect. This function is the inverse of the ST_INTERSECTS function. In mathematical terms, if A and B are two geometries, ST_Disjoint(A, B) is true if and only if the intersection of A and B is empty.
### Examples
## Syntax
`ST_DISJOINT(geomA, geomB)`
### Parameters
#### geomA
This is an expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`.
#### geomB
This is an expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`.
## Examples
```esql
FROM airport_city_boundaries
@ -14,4 +28,8 @@ FROM airport_city_boundaries
FROM airport_city_boundaries
| WHERE ST_DISJOINT(city_boundary, TO_GEOSHAPE("POLYGON((30 10, 40 40, 20 40, 10 20, 30 10))"))
| KEEP abbrev, airport, region, city, city_location
```
```
## Limitations
It's important to note that the second parameter must have the same coordinate system as the first. This means you cannot combine `geo_*` and `cartesian_*` parameters.

View file

@ -1,8 +1,22 @@
## ST_DISTANCE
# ST_DISTANCE
The `ST_DISTANCE` function computes the distance between two points. For cartesian geometries, this is the pythagorean distance in the same units as the original coordinates. For geographic geometries, this is the circular distance along the great circle in meters.
The ST_DISTANCE function calculates the distance between two points.
### Examples
## Syntax
`ST_DISTANCE(geomA, geomB)`
### Parameters
#### geomA
This is an expression of type `geo_point` or `cartesian_point`.
#### geomB
This is an expression of type `geo_point` or `cartesian_point`.
## Examples
```esql
FROM airports
@ -16,4 +30,8 @@ FROM airports
| WHERE abbrev == "JFK"
| EVAL distance = ST_DISTANCE(location, city_location)
| KEEP abbrev, name, location, city_location, distance
```
```
## Limitations
- It's important to note that the second parameter must have the same coordinate system as the first. Therefore, it's not possible to combine `geo_point` and `cartesian_point` parameters.

View file

@ -1,8 +1,22 @@
## ST_INTERSECTS
# ST_INTERSECTS
The `ST_INTERSECTS` function returns true if two geometries intersect. They intersect if they have any point in common, including their interior points (points along lines or within polygons). This is the inverse of the `ST_DISJOINT` function. In mathematical terms: `ST_Intersects(A, B) ⇔ A ⋂ B ≠ ∅`.
The ST_INTERSECTS function checks if two geometries intersect. They intersect if they share any point, including points within their interiors (points along lines or within polygons). This function is the inverse of the ST_DISJOINT function. In mathematical terms, ST_Intersects(A, B) is true if the intersection of A and B is not empty.
### Examples
## Syntax
`ST_INTERSECTS(geomA, geomB)`
### Parameters
#### geomA
This is an expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`.
#### geomB
This is an expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`. The second parameter must also have the same coordinate system as the first. This means it is not possible to combine `geo_*` and `cartesian_*` parameters.
## Examples
```esql
FROM airports
@ -13,4 +27,4 @@ FROM airports
FROM city_boundaries
| WHERE ST_INTERSECTS(boundary, TO_GEOSHAPE("POLYGON((10 10, 20 10, 20 20, 10 20, 10 10))"))
| KEEP city_name, boundary
```
```

View file

@ -1,8 +1,22 @@
## ST_WITHIN
# ST_WITHIN
The `ST_WITHIN` function returns whether the first geometry is within the second geometry. This is the inverse of the `ST_CONTAINS` function.
The ST_WITHIN function checks if the first geometry is located within the second geometry.
### Examples
## Syntax
`ST_WITHIN(geomA, geomB)`
### Parameters
#### geomA
This is an expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If the value is `null`, the function will return `null`.
#### geomB
This is an expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If the value is `null`, the function will return `null`. It's important to note that the second parameter must have the same coordinate system as the first. This means you cannot combine `geo_*` and `cartesian_*` parameters.
## Examples
```esql
FROM airport_city_boundaries
@ -14,4 +28,4 @@ FROM airport_city_boundaries
FROM parks
| WHERE ST_WITHIN(park_boundary, TO_GEOSHAPE("POLYGON((40.7128 -74.0060, 40.7128 -73.9352, 40.7306 -73.9352, 40.7306 -74.0060, 40.7128 -74.0060))"))
| KEEP park_name, park_boundary
```
```

View file

@ -1,8 +1,20 @@
## ST_X
# ST_X
The `ST_X` function extracts the x coordinate from the supplied point. If the point is of type `geo_point`, this is equivalent to extracting the longitude value.
The ST_X function extracts the `x` coordinate from a given point.
### Examples
## Syntax
`ST_X(point)`
### Parameters
#### point
This is an expression of type `geo_point` or `cartesian_point`.
## Examples
Here is an example of how to use the ST_X function:
```esql
ROW point = TO_GEOPOINT("POINT(42.97109629958868 14.7552534006536)")
@ -12,4 +24,4 @@ ROW point = TO_GEOPOINT("POINT(42.97109629958868 14.7552534006536)")
```esql
ROW point = TO_CARTESIANPOINT("POINT(100.0 200.0)")
| EVAL x = ST_X(point), y = ST_Y(point)
```
```

Some files were not shown because too many files have changed in this diff Show more