[Fleet] Adjust background File cleanup task to also process to-host indexes (#161138)

## Summary

- Updates the `fleet:check-deleted-files-task` to include the indexes
that store files for delivery to the Host (currently used only by
Endpoint integration)
This commit is contained in:
Paul Tavares 2023-07-06 10:46:55 -04:00 committed by GitHub
parent 0c03f1010e
commit 9e5d6b3e0f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 198 additions and 75 deletions

View file

@ -5,12 +5,8 @@
* 2.0.
*/
import { FILE_STORAGE_METADATA_INDEX_PATTERN } from '../constants';
import { getFileDataIndexName, getFileMetadataIndexName } from '..';
import { getIntegrationNameFromIndexName } from './file_storage';
describe('File Storage services', () => {
describe('File Index Names', () => {
it('should generate file metadata index name for files received from host', () => {
@ -29,17 +25,4 @@ describe('File Storage services', () => {
expect(getFileDataIndexName('foo', true)).toEqual('.fleet-fileds-tohost-data-foo');
});
});
describe('getIntegrationNameFromIndexName()', () => {
it.each([
['regular index names', '.fleet-fileds-fromhost-meta-agent'],
['datastream index names', '.ds-.fleet-fileds-fromhost-data-agent-2023.06.30-00001'],
])('should handle %s', (_, index) => {
expect(getIntegrationNameFromIndexName(index, FILE_STORAGE_METADATA_INDEX_PATTERN)).toEqual(
'agent'
);
});
it.todo('should error if index pattern does not include `*`');
});
});

View file

@ -55,41 +55,3 @@ export const getFileDataIndexName = (
`Unable to define integration file data index. No '*' in index pattern: ${dataIndex}`
);
};
/**
* Returns back the integration name for a given File Data (chunks) index name.
*
* @example
* // Given a File data index pattern of `.fleet-fileds-fromhost-data-*`:
*
* getIntegrationNameFromFileDataIndexName('.fleet-fileds-fromhost-data-agent');
* // return 'agent'
*
* getIntegrationNameFromFileDataIndexName('.ds-.fleet-fileds-fromhost-data-agent');
* // return 'agent'
*
* getIntegrationNameFromFileDataIndexName('.ds-.fleet-fileds-fromhost-data-agent-2023.06.30-00001');
* // return 'agent'
*/
export const getIntegrationNameFromFileDataIndexName = (indexName: string): string => {
return getIntegrationNameFromIndexName(indexName, FILE_STORAGE_DATA_INDEX_PATTERN);
};
export const getIntegrationNameFromIndexName = (
indexName: string,
indexPattern: string
): string => {
const integrationNameIndexPosition = indexPattern.split('-').indexOf('*');
if (integrationNameIndexPosition === -1) {
throw new Error(`Unable to parse index name. No '*' in index pattern: ${indexPattern}`);
}
const indexPieces = indexName.replace(/^\.ds-/, '').split('-');
if (indexPieces[integrationNameIndexPosition]) {
return indexPieces[integrationNameIndexPosition];
}
throw new Error(`Index name ${indexName} does not seem to be a File storage index`);
};

View file

@ -11,6 +11,8 @@ import { elasticsearchServiceMock } from '@kbn/core/server/mocks';
import {
FILE_STORAGE_DATA_INDEX_PATTERN,
FILE_STORAGE_METADATA_INDEX_PATTERN,
FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN,
FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN,
} from '../../../common/constants/file_storage';
import { getFileDataIndexName, getFileMetadataIndexName } from '../../../common/services';
@ -67,7 +69,7 @@ describe('files service', () => {
expect(esClientMock.search).toBeCalledWith(
{
index: FILE_STORAGE_METADATA_INDEX_PATTERN,
index: [FILE_STORAGE_METADATA_INDEX_PATTERN, FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN],
body: {
size: ES_SEARCH_LIMIT,
query: {
@ -130,7 +132,8 @@ describe('files service', () => {
expect(esClientMock.search).toBeCalledWith(
{
index: FILE_STORAGE_DATA_INDEX_PATTERN,
ignore_unavailable: true,
index: [FILE_STORAGE_DATA_INDEX_PATTERN, FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN],
body: {
size: ES_SEARCH_LIMIT,
query: {

View file

@ -12,18 +12,19 @@ import type { FileStatus } from '@kbn/files-plugin/common/types';
import {
FILE_STORAGE_DATA_INDEX_PATTERN,
FILE_STORAGE_METADATA_INDEX_PATTERN,
FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN,
FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN,
} from '../../../common/constants';
import {
getFileMetadataIndexName,
getIntegrationNameFromFileDataIndexName,
getIntegrationNameFromIndexName,
} from '../../../common/services';
import { getFileMetadataIndexName } from '../../../common/services';
import { ES_SEARCH_LIMIT } from '../../../common/constants';
import { parseFileStorageIndex } from './utils';
/**
* Gets files with given status
* Gets files with given status from the files metadata index. Includes both files
* `tohost` and files `fromhost`
*
* @param esClient
* @param abortController
@ -37,7 +38,7 @@ export async function getFilesByStatus(
const result = await esClient
.search(
{
index: FILE_STORAGE_METADATA_INDEX_PATTERN,
index: [FILE_STORAGE_METADATA_INDEX_PATTERN, FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN],
body: {
size: ES_SEARCH_LIMIT,
query: {
@ -79,20 +80,18 @@ export async function fileIdsWithoutChunksByIndex(
const noChunkFileIdsByIndex = files.reduce((acc, file) => {
allFileIds.add(file._id);
const integration = getIntegrationNameFromIndexName(
file._index,
FILE_STORAGE_METADATA_INDEX_PATTERN
);
const metadataIndex = getFileMetadataIndexName(integration);
const { index: metadataIndex } = parseFileStorageIndex(file._index);
const fileIds = acc[metadataIndex];
acc[metadataIndex] = fileIds ? fileIds.add(file._id) : new Set([file._id]);
return acc;
}, {} as FileIdsByIndex);
const chunks = await esClient
.search<{ bid: string }>(
{
index: FILE_STORAGE_DATA_INDEX_PATTERN,
index: [FILE_STORAGE_DATA_INDEX_PATTERN, FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN],
body: {
size: ES_SEARCH_LIMIT,
query: {
@ -113,6 +112,7 @@ export async function fileIdsWithoutChunksByIndex(
},
_source: ['bid'],
},
ignore_unavailable: true,
},
{ signal: abortController.signal }
)
@ -123,9 +123,12 @@ export async function fileIdsWithoutChunksByIndex(
chunks.hits.hits.forEach((hit) => {
const fileId = hit._source?.bid;
if (!fileId) return;
const integration = getIntegrationNameFromFileDataIndexName(hit._index);
const metadataIndex = getFileMetadataIndexName(integration);
const { integration, direction } = parseFileStorageIndex(hit._index);
const metadataIndex = getFileMetadataIndexName(integration, direction === 'to-host');
if (noChunkFileIdsByIndex[metadataIndex]?.delete(fileId)) {
allFileIds.delete(fileId);
}

View file

@ -0,0 +1,66 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { getFileDataIndexName, getFileMetadataIndexName } from '../../../common';
import { parseFileStorageIndex } from './utils';
describe('Files service utils', () => {
describe('parseFileStorageIndex()', () => {
it.each([
[
'tohost meta',
'.ds-.fleet-fileds-tohost-meta-endpoint-2023.07.03-000001',
{
index: getFileMetadataIndexName('endpoint', true),
integration: 'endpoint',
direction: 'to-host',
type: 'meta',
},
],
[
'tohost data',
'.ds-.fleet-fileds-tohost-data-agent-2023.07.03-000001',
{
index: getFileDataIndexName('agent', true),
integration: 'agent',
direction: 'to-host',
type: 'data',
},
],
[
'fromhost meta',
'.ds-.fleet-fileds-fromhost-meta-agent-2023.07.03-000001',
{
index: getFileMetadataIndexName('agent'),
integration: 'agent',
direction: 'from-host',
type: 'meta',
},
],
[
'fromhost data',
'.ds-.fleet-fileds-fromhost-data-endpoint-2023.07.03-000001',
{
index: getFileDataIndexName('endpoint'),
integration: 'endpoint',
direction: 'from-host',
type: 'data',
},
],
])('should parse index %s', (_, index, result) => {
expect(parseFileStorageIndex(index)).toEqual(result);
});
it('should error if index does not match a known pattern', () => {
expect(() => parseFileStorageIndex('foo')).toThrow(
'Unable to parse index [foo]. Does not match a known index pattern: [.fleet-fileds-fromhost-meta-* | ' +
'.fleet-fileds-fromhost-data-* | .fleet-fileds-tohost-meta-* | .fleet-fileds-tohost-data-*]'
);
});
});
});

View file

@ -0,0 +1,77 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { getFileDataIndexName, getFileMetadataIndexName } from '../../../common/services';
import {
FILE_STORAGE_DATA_INDEX_PATTERN,
FILE_STORAGE_METADATA_INDEX_PATTERN,
FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN,
FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN,
} from '../../../common/constants';
interface ParsedFileStorageIndex {
index: string;
integration: string;
type: 'meta' | 'data';
direction: 'to-host' | 'from-host';
}
/**
* Given a document index (from either a file's metadata doc or a file's chunk doc), utility will
* parse it and return information about that index
* @param index
*/
export const parseFileStorageIndex = (index: string): ParsedFileStorageIndex => {
const response: ParsedFileStorageIndex = {
index: '',
integration: '',
type: 'meta',
direction: 'from-host',
};
const fileStorageIndexPatterns = [
FILE_STORAGE_METADATA_INDEX_PATTERN,
FILE_STORAGE_DATA_INDEX_PATTERN,
FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN,
FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN,
];
for (const indexPattern of fileStorageIndexPatterns) {
const indexPrefix = indexPattern.substring(0, indexPattern.indexOf('*'));
if (index.includes(indexPrefix)) {
const isDeliveryToHost = index.includes('-tohost-');
const isDataIndex = index.includes('host-data-');
const integrationPosition = indexPattern.split('-').indexOf('*');
const integration = index
.replace(/^\.ds-/, '')
.split('-')
.at(integrationPosition);
if (!integration) {
throw new Error(`Index name ${index} does not seem to be a File storage index`);
}
response.direction = isDeliveryToHost ? 'to-host' : 'from-host';
response.type = isDataIndex ? 'data' : 'meta';
response.integration = integration;
response.index = isDataIndex
? getFileDataIndexName(response.integration, isDeliveryToHost)
: getFileMetadataIndexName(response.integration, isDeliveryToHost);
return response;
}
}
throw new Error(
`Unable to parse index [${index}]. Does not match a known index pattern: [${fileStorageIndexPatterns.join(
' | '
)}]`
);
};

View file

@ -21,8 +21,8 @@ import { appContextService } from '../services';
import { CheckDeletedFilesTask, TYPE, VERSION } from './check_deleted_files_task';
const MOCK_FILE_METADATA_INDEX = getFileMetadataIndexName('mock');
const MOCK_FILE_DATA_INDEX = getFileDataIndexName('mock');
const MOCK_FILE_METADATA_INDEX = '.ds-' + getFileMetadataIndexName('mock');
const MOCK_FILE_DATA_INDEX = '.ds-' + getFileDataIndexName('mock');
const MOCK_TASK_INSTANCE = {
id: `${TYPE}:${VERSION}`,
@ -100,6 +100,35 @@ describe('check deleted files task', () => {
return taskRunner.run();
};
it('should search both metadata indexes', async () => {
esClient.search.mockResolvedValue({
took: 5,
timed_out: false,
_shards: {
total: 1,
successful: 1,
skipped: 0,
failed: 0,
},
hits: {
total: {
value: 0,
relation: 'eq',
},
hits: [],
},
});
await runTask();
expect(esClient.search).toHaveBeenCalledWith(
expect.objectContaining({
index: ['.fleet-fileds-fromhost-meta-*', '.fleet-fileds-tohost-meta-*'],
}),
expect.anything()
);
});
it('should attempt to update deleted files', async () => {
// mock getReadyFiles search
esClient.search
@ -162,7 +191,7 @@ describe('check deleted files task', () => {
expect(esClient.updateByQuery).toHaveBeenCalledWith(
{
index: MOCK_FILE_METADATA_INDEX,
index: MOCK_FILE_METADATA_INDEX.replace('.ds-', ''),
query: {
ids: {
values: ['metadata-testid2'],