mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 17:59:23 -04:00
[Code] better limit the size of indexed files (#38327)
* [Code] better limit the size of indexed files * [Code] separate the batch index helper for document and symbols
This commit is contained in:
parent
dced039b68
commit
7f334c011d
7 changed files with 53 additions and 27 deletions
|
@ -41,7 +41,7 @@ export const code = (kibana: any) =>
|
||||||
enabled: Joi.boolean().default(true),
|
enabled: Joi.boolean().default(true),
|
||||||
queueIndex: Joi.string().default('.code_internal-worker-queue'),
|
queueIndex: Joi.string().default('.code_internal-worker-queue'),
|
||||||
// 1 hour by default.
|
// 1 hour by default.
|
||||||
queueTimeout: Joi.number().default(moment.duration(1, 'hour').asMilliseconds()),
|
queueTimeoutMs: Joi.number().default(moment.duration(1, 'hour').asMilliseconds()),
|
||||||
// The frequency which update scheduler executes. 5 minutes by default.
|
// The frequency which update scheduler executes. 5 minutes by default.
|
||||||
updateFrequencyMs: Joi.number().default(moment.duration(5, 'minute').asMilliseconds()),
|
updateFrequencyMs: Joi.number().default(moment.duration(5, 'minute').asMilliseconds()),
|
||||||
// The frequency which index scheduler executes. 1 day by default.
|
// The frequency which index scheduler executes. 1 day by default.
|
||||||
|
|
|
@ -195,8 +195,13 @@ describe('lsp_incremental_indexer unit tests', () => {
|
||||||
// There are 3 MODIFIED items. 1 file + 1 symbol + 1 reference = 3 objects to
|
// There are 3 MODIFIED items. 1 file + 1 symbol + 1 reference = 3 objects to
|
||||||
// index for each item. Total doc indexed should be 3 * 3 = 9, which can be
|
// index for each item. Total doc indexed should be 3 * 3 = 9, which can be
|
||||||
// fitted into a single batch index.
|
// fitted into a single batch index.
|
||||||
assert.ok(bulkSpy.calledOnce);
|
assert.strictEqual(bulkSpy.callCount, 2);
|
||||||
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 9 * 2);
|
let total = 0;
|
||||||
|
for (let i = 0; i < bulkSpy.callCount; i++) {
|
||||||
|
total += bulkSpy.getCall(i).args[0].body.length;
|
||||||
|
}
|
||||||
|
assert.strictEqual(total, 9 * 2);
|
||||||
|
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
}).timeout(20000);
|
}).timeout(20000);
|
||||||
|
|
||||||
|
@ -294,8 +299,12 @@ describe('lsp_incremental_indexer unit tests', () => {
|
||||||
// There are 3 MODIFIED items, but 1 item after the checkpoint. 1 file
|
// There are 3 MODIFIED items, but 1 item after the checkpoint. 1 file
|
||||||
// + 1 symbol + 1 ref = 3 objects to be indexed for each item. Total doc
|
// + 1 symbol + 1 ref = 3 objects to be indexed for each item. Total doc
|
||||||
// indexed should be 3 * 2 = 2, which can be fitted into a single batch index.
|
// indexed should be 3 * 2 = 2, which can be fitted into a single batch index.
|
||||||
assert.ok(bulkSpy.calledOnce);
|
assert.strictEqual(bulkSpy.callCount, 2);
|
||||||
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 3 * 2);
|
let total = 0;
|
||||||
|
for (let i = 0; i < bulkSpy.callCount; i++) {
|
||||||
|
total += bulkSpy.getCall(i).args[0].body.length;
|
||||||
|
}
|
||||||
|
assert.strictEqual(total, 3 * 2);
|
||||||
assert.strictEqual(deleteByQuerySpy.callCount, 2);
|
assert.strictEqual(deleteByQuerySpy.callCount, 2);
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
}).timeout(20000);
|
}).timeout(20000);
|
||||||
|
|
|
@ -205,9 +205,13 @@ describe('lsp_indexer unit tests', function(this: any) {
|
||||||
// The rest 158 files will only be indexed for document.
|
// The rest 158 files will only be indexed for document.
|
||||||
// There are also 10 binary files to be excluded.
|
// There are also 10 binary files to be excluded.
|
||||||
// So the total number of index requests will be 66 + 158 - 10 = 214.
|
// So the total number of index requests will be 66 + 158 - 10 = 214.
|
||||||
assert.ok(bulkSpy.calledOnce);
|
assert.strictEqual(bulkSpy.callCount, 5);
|
||||||
assert.strictEqual(lspSendRequestSpy.callCount, 22);
|
assert.strictEqual(lspSendRequestSpy.callCount, 22);
|
||||||
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 214 * 2);
|
let total = 0;
|
||||||
|
for (let i = 0; i < bulkSpy.callCount; i++) {
|
||||||
|
total += bulkSpy.getCall(i).args[0].body.length;
|
||||||
|
}
|
||||||
|
assert.strictEqual(total, 214 * 2);
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
}).timeout(20000);
|
}).timeout(20000);
|
||||||
|
|
||||||
|
@ -319,9 +323,13 @@ describe('lsp_indexer unit tests', function(this: any) {
|
||||||
// 3 * 11 = 33. Also there are 15 files without supported language. Only one
|
// 3 * 11 = 33. Also there are 15 files without supported language. Only one
|
||||||
// document will be index for these files. So total index requests would be
|
// document will be index for these files. So total index requests would be
|
||||||
// 33 + 15 = 48.
|
// 33 + 15 = 48.
|
||||||
assert.ok(bulkSpy.calledOnce);
|
assert.strictEqual(bulkSpy.callCount, 2);
|
||||||
assert.strictEqual(lspSendRequestSpy.callCount, 11);
|
assert.strictEqual(lspSendRequestSpy.callCount, 11);
|
||||||
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 48 * 2);
|
let total = 0;
|
||||||
|
for (let i = 0; i < bulkSpy.callCount; i++) {
|
||||||
|
total += bulkSpy.getCall(i).args[0].body.length;
|
||||||
|
}
|
||||||
|
assert.strictEqual(total, 48 * 2);
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
}).timeout(20000);
|
}).timeout(20000);
|
||||||
});
|
});
|
||||||
|
|
|
@ -182,13 +182,13 @@ export class LspIncrementalIndexer extends LspIndexer {
|
||||||
if (response && response.result.length > 0) {
|
if (response && response.result.length > 0) {
|
||||||
const { symbols, references } = response.result[0];
|
const { symbols, references } = response.result[0];
|
||||||
for (const symbol of symbols) {
|
for (const symbol of symbols) {
|
||||||
await this.batchIndexHelper.index(SymbolIndexName(repoUri), symbol);
|
await this.lspBatchIndexHelper.index(SymbolIndexName(repoUri), symbol);
|
||||||
symbolNames.add(symbol.symbolInformation.name);
|
symbolNames.add(symbol.symbolInformation.name);
|
||||||
}
|
}
|
||||||
stats.set(IndexStatsKey.Symbol, symbols.length);
|
stats.set(IndexStatsKey.Symbol, symbols.length);
|
||||||
|
|
||||||
for (const ref of references) {
|
for (const ref of references) {
|
||||||
await this.batchIndexHelper.index(ReferenceIndexName(repoUri), ref);
|
await this.lspBatchIndexHelper.index(ReferenceIndexName(repoUri), ref);
|
||||||
}
|
}
|
||||||
stats.set(IndexStatsKey.Reference, references.length);
|
stats.set(IndexStatsKey.Reference, references.length);
|
||||||
} else {
|
} else {
|
||||||
|
@ -217,7 +217,7 @@ export class LspIncrementalIndexer extends LspIndexer {
|
||||||
language,
|
language,
|
||||||
qnames: Array.from(symbolNames),
|
qnames: Array.from(symbolNames),
|
||||||
};
|
};
|
||||||
await this.batchIndexHelper.index(DocumentIndexName(repoUri), body);
|
await this.docBatchIndexHelper.index(DocumentIndexName(repoUri), body);
|
||||||
stats.set(IndexStatsKey.File, 1);
|
stats.set(IndexStatsKey.File, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,13 @@ import { ALL_RESERVED, DocumentIndexName, ReferenceIndexName, SymbolIndexName }
|
||||||
|
|
||||||
export class LspIndexer extends AbstractIndexer {
|
export class LspIndexer extends AbstractIndexer {
|
||||||
protected type: string = 'lsp';
|
protected type: string = 'lsp';
|
||||||
protected batchIndexHelper: BatchIndexHelper;
|
// Batch index helper for symbols/references
|
||||||
|
protected lspBatchIndexHelper: BatchIndexHelper;
|
||||||
|
// Batch index helper for documents
|
||||||
|
protected docBatchIndexHelper: BatchIndexHelper;
|
||||||
|
|
||||||
|
private LSP_BATCH_INDEX_SIZE = 1000;
|
||||||
|
private DOC_BATCH_INDEX_SIZE = 50;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected readonly repoUri: RepositoryUri,
|
protected readonly repoUri: RepositoryUri,
|
||||||
|
@ -43,7 +49,8 @@ export class LspIndexer extends AbstractIndexer {
|
||||||
) {
|
) {
|
||||||
super(repoUri, revision, client, log);
|
super(repoUri, revision, client, log);
|
||||||
|
|
||||||
this.batchIndexHelper = new BatchIndexHelper(client, log);
|
this.lspBatchIndexHelper = new BatchIndexHelper(client, log, this.LSP_BATCH_INDEX_SIZE);
|
||||||
|
this.docBatchIndexHelper = new BatchIndexHelper(client, log, this.DOC_BATCH_INDEX_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public async start(progressReporter?: ProgressReporter, checkpointReq?: LspIndexRequest) {
|
public async start(progressReporter?: ProgressReporter, checkpointReq?: LspIndexRequest) {
|
||||||
|
@ -52,13 +59,15 @@ export class LspIndexer extends AbstractIndexer {
|
||||||
} finally {
|
} finally {
|
||||||
if (!this.isCancelled()) {
|
if (!this.isCancelled()) {
|
||||||
// Flush all the index request still in the cache for bulk index.
|
// Flush all the index request still in the cache for bulk index.
|
||||||
this.batchIndexHelper.flush();
|
this.lspBatchIndexHelper.flush();
|
||||||
|
this.docBatchIndexHelper.flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public cancel() {
|
public cancel() {
|
||||||
this.batchIndexHelper.cancel();
|
this.lspBatchIndexHelper.cancel();
|
||||||
|
this.docBatchIndexHelper.cancel();
|
||||||
super.cancel();
|
super.cancel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -193,17 +202,17 @@ export class LspIndexer extends AbstractIndexer {
|
||||||
const lstat = util.promisify(fs.lstat);
|
const lstat = util.promisify(fs.lstat);
|
||||||
const stat = await lstat(localFilePath);
|
const stat = await lstat(localFilePath);
|
||||||
|
|
||||||
|
if (stat.size > TEXT_FILE_LIMIT) {
|
||||||
|
this.log.debug(`File size exceeds limit. Skip index.`);
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
const readLink = util.promisify(fs.readlink);
|
const readLink = util.promisify(fs.readlink);
|
||||||
const readFile = util.promisify(fs.readFile);
|
const readFile = util.promisify(fs.readFile);
|
||||||
const content = stat.isSymbolicLink()
|
const content = stat.isSymbolicLink()
|
||||||
? await readLink(localFilePath, 'utf8')
|
? await readLink(localFilePath, 'utf8')
|
||||||
: await readFile(localFilePath, 'utf8');
|
: await readFile(localFilePath, 'utf8');
|
||||||
|
|
||||||
if (content.length > TEXT_FILE_LIMIT) {
|
|
||||||
this.log.debug(`File size exceeds limit. Skip index.`);
|
|
||||||
return stats;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const lang = detectLanguageByFilename(filePath);
|
const lang = detectLanguageByFilename(filePath);
|
||||||
// filter file by language
|
// filter file by language
|
||||||
|
@ -218,13 +227,13 @@ export class LspIndexer extends AbstractIndexer {
|
||||||
if (response && response.result && response.result.length > 0 && response.result[0]) {
|
if (response && response.result && response.result.length > 0 && response.result[0]) {
|
||||||
const { symbols, references } = response.result[0];
|
const { symbols, references } = response.result[0];
|
||||||
for (const symbol of symbols) {
|
for (const symbol of symbols) {
|
||||||
await this.batchIndexHelper.index(SymbolIndexName(repoUri), symbol);
|
await this.lspBatchIndexHelper.index(SymbolIndexName(repoUri), symbol);
|
||||||
symbolNames.add(symbol.symbolInformation.name);
|
symbolNames.add(symbol.symbolInformation.name);
|
||||||
}
|
}
|
||||||
stats.set(IndexStatsKey.Symbol, symbols.length);
|
stats.set(IndexStatsKey.Symbol, symbols.length);
|
||||||
|
|
||||||
for (const ref of references) {
|
for (const ref of references) {
|
||||||
await this.batchIndexHelper.index(ReferenceIndexName(repoUri), ref);
|
await this.lspBatchIndexHelper.index(ReferenceIndexName(repoUri), ref);
|
||||||
}
|
}
|
||||||
stats.set(IndexStatsKey.Reference, references.length);
|
stats.set(IndexStatsKey.Reference, references.length);
|
||||||
} else {
|
} else {
|
||||||
|
@ -251,7 +260,7 @@ export class LspIndexer extends AbstractIndexer {
|
||||||
language,
|
language,
|
||||||
qnames: Array.from(symbolNames),
|
qnames: Array.from(symbolNames),
|
||||||
};
|
};
|
||||||
await this.batchIndexHelper.index(DocumentIndexName(repoUri), body);
|
await this.docBatchIndexHelper.index(DocumentIndexName(repoUri), body);
|
||||||
stats.set(IndexStatsKey.File, 1);
|
stats.set(IndexStatsKey.File, 1);
|
||||||
return stats;
|
return stats;
|
||||||
}
|
}
|
||||||
|
|
|
@ -155,7 +155,7 @@ async function initCodeNode(server: Server, serverOptions: ServerOptions, log: L
|
||||||
|
|
||||||
log.info('Initializing Code plugin as code-node.');
|
log.info('Initializing Code plugin as code-node.');
|
||||||
const queueIndex: string = server.config().get('xpack.code.queueIndex');
|
const queueIndex: string = server.config().get('xpack.code.queueIndex');
|
||||||
const queueTimeout: number = server.config().get('xpack.code.queueTimeout');
|
const queueTimeoutMs: number = server.config().get('xpack.code.queueTimeoutMs');
|
||||||
const devMode: boolean = server.config().get('env.dev');
|
const devMode: boolean = server.config().get('env.dev');
|
||||||
|
|
||||||
const esClient: EsClient = new EsClientWithInternalRequest(server);
|
const esClient: EsClient = new EsClientWithInternalRequest(server);
|
||||||
|
@ -198,7 +198,7 @@ async function initCodeNode(server: Server, serverOptions: ServerOptions, log: L
|
||||||
// Initialize queue.
|
// Initialize queue.
|
||||||
const queue = new Esqueue(queueIndex, {
|
const queue = new Esqueue(queueIndex, {
|
||||||
client: esClient,
|
client: esClient,
|
||||||
timeout: queueTimeout,
|
timeout: queueTimeoutMs,
|
||||||
});
|
});
|
||||||
const indexWorker = new IndexWorker(
|
const indexWorker = new IndexWorker(
|
||||||
queue,
|
queue,
|
||||||
|
|
|
@ -20,7 +20,7 @@ export const emptyAsyncFunc = async (_: AnyObject): Promise<any> => {
|
||||||
const TEST_OPTIONS = {
|
const TEST_OPTIONS = {
|
||||||
enabled: true,
|
enabled: true,
|
||||||
queueIndex: '.code_internal-worker-queue',
|
queueIndex: '.code_internal-worker-queue',
|
||||||
queueTimeout: 60 * 60 * 1000, // 1 hour by default
|
queueTimeoutMs: 60 * 60 * 1000, // 1 hour by default
|
||||||
updateFreqencyMs: 5 * 60 * 1000, // 5 minutes by default
|
updateFreqencyMs: 5 * 60 * 1000, // 5 minutes by default
|
||||||
indexFrequencyMs: 24 * 60 * 60 * 1000, // 1 day by default
|
indexFrequencyMs: 24 * 60 * 60 * 1000, // 1 day by default
|
||||||
lsp: {
|
lsp: {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue