[eem] metadata as keyword (#202611)

Cast identity fields and metadata fields as keyword to prevent ambiguous
mappings error

### Testing

- setup data
```
PUT service-name-as-keyword
{
    "mappings": {
        "dynamic": false,
        "properties": {
            "service.name": {
                "type": "keyword"
            }
        }
    }
}

POST service-name-as-keyword/_doc
{
  "service.name": "as-keyword"
}

PUT service-name-as-text
{
    "mappings": {
        "dynamic": false,
        "properties": {
            "service.name": {
                "type": "text"
            }
        }
    }
}


POST service-name-as-text/_doc
{
  "service.name": "as-text"
}
```

- data loads successfully in `/app/entity_manager`
![Screenshot 2024-12-03 at 11 50
10](https://github.com/user-attachments/assets/12d6cbd8-c075-475f-b140-9158e93158ff)

_new query_
```
POST _query
{
    "query": """FROM service-name-as* | WHERE service.name::keyword IS NOT NULL | STATS  BY service.name::keyword | RENAME `service.name::keyword` AS service.name | EVAL entity.type = "service", entity.id = service.name, entity.display_name = entity.id | SORT entity.id ASC | LIMIT 10"""
}
```

- previous query fails with ambiguous mappings error
```
POST _query
{
    "query": """FROM service-name-as* | WHERE service.name IS NOT NULL | STATS  BY service.name | EVAL entity.type = "service", entity.id = service.name, entity.display_name = entity.id | SORT entity.id ASC | LIMIT 10"""
}
```
This commit is contained in:
Kevin Lacabane 2024-12-05 12:57:12 +01:00 committed by GitHub
parent bd576e6cef
commit 58f51fdac7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 18 additions and 6 deletions

View file

@ -29,9 +29,10 @@ describe('getEntityInstancesQuery', () => {
expect(query).toEqual(
'FROM logs-*, metrics-* | ' +
'WHERE service.name IS NOT NULL | ' +
'WHERE service.name::keyword IS NOT NULL | ' +
'WHERE custom_timestamp_field >= "2024-11-20T19:00:00.000Z" AND custom_timestamp_field <= "2024-11-20T20:00:00.000Z" | ' +
'STATS host.name = VALUES(host.name), entity.last_seen_timestamp = MAX(custom_timestamp_field), service.id = MAX(service.id) BY service.name | ' +
'STATS host.name = VALUES(host.name::keyword), entity.last_seen_timestamp = MAX(custom_timestamp_field), service.id = MAX(service.id::keyword) BY service.name::keyword | ' +
'RENAME `service.name::keyword` AS service.name | ' +
'EVAL entity.type = "service", entity.id = service.name, entity.display_name = COALESCE(service.id, entity.id) | ' +
'SORT entity.id DESC | ' +
'LIMIT 5'

View file

@ -5,6 +5,7 @@
* 2.0.
*/
import { asKeyword } from './utils';
import { EntitySourceDefinition, SortBy } from '../types';
const sourceCommand = ({ source }: { source: EntitySourceDefinition }) => {
@ -30,7 +31,7 @@ const whereCommand = ({
end: string;
}) => {
const filters = [
source.identity_fields.map((field) => `${field} IS NOT NULL`).join(' AND '),
source.identity_fields.map((field) => `${asKeyword(field)} IS NOT NULL`).join(' AND '),
...source.filters,
];
@ -46,7 +47,7 @@ const whereCommand = ({
const statsCommand = ({ source }: { source: EntitySourceDefinition }) => {
const aggs = source.metadata_fields
.filter((field) => !source.identity_fields.some((idField) => idField === field))
.map((field) => `${field} = VALUES(${field})`);
.map((field) => `${field} = VALUES(${asKeyword(field)})`);
if (source.timestamp_field) {
aggs.push(`entity.last_seen_timestamp = MAX(${source.timestamp_field})`);
@ -55,10 +56,15 @@ const statsCommand = ({ source }: { source: EntitySourceDefinition }) => {
if (source.display_name) {
// ideally we want the latest value but there's no command yet
// so we use MAX for now
aggs.push(`${source.display_name} = MAX(${source.display_name})`);
aggs.push(`${source.display_name} = MAX(${asKeyword(source.display_name)})`);
}
return `STATS ${aggs.join(', ')} BY ${source.identity_fields.join(', ')}`;
return `STATS ${aggs.join(', ')} BY ${source.identity_fields.map(asKeyword).join(', ')}`;
};
const renameCommand = ({ source }: { source: EntitySourceDefinition }) => {
const operations = source.identity_fields.map((field) => `\`${asKeyword(field)}\` AS ${field}`);
return `RENAME ${operations.join(', ')}`;
};
const evalCommand = ({ source }: { source: EntitySourceDefinition }) => {
@ -107,6 +113,7 @@ export function getEntityInstancesQuery({
sourceCommand({ source }),
whereCommand({ source, start, end }),
statsCommand({ source }),
renameCommand({ source }),
evalCommand({ source }),
sortCommand({ source, sort }),
`LIMIT ${limit}`,

View file

@ -65,3 +65,7 @@ export function mergeEntitiesList(
return Object.values(instances);
}
export function asKeyword(field: string) {
return `${field}::keyword`;
}