[Code] better limit the size of indexed files (#38327)

* [Code] better limit the size of indexed files

* [Code] separate the batch index helper for document and symbols
This commit is contained in:
Mengwei Ding 2019-06-07 12:26:19 -07:00 committed by GitHub
parent dced039b68
commit 7f334c011d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 53 additions and 27 deletions

View file

@ -41,7 +41,7 @@ export const code = (kibana: any) =>
enabled: Joi.boolean().default(true), enabled: Joi.boolean().default(true),
queueIndex: Joi.string().default('.code_internal-worker-queue'), queueIndex: Joi.string().default('.code_internal-worker-queue'),
// 1 hour by default. // 1 hour by default.
queueTimeout: Joi.number().default(moment.duration(1, 'hour').asMilliseconds()), queueTimeoutMs: Joi.number().default(moment.duration(1, 'hour').asMilliseconds()),
// The frequency which update scheduler executes. 5 minutes by default. // The frequency which update scheduler executes. 5 minutes by default.
updateFrequencyMs: Joi.number().default(moment.duration(5, 'minute').asMilliseconds()), updateFrequencyMs: Joi.number().default(moment.duration(5, 'minute').asMilliseconds()),
// The frequency which index scheduler executes. 1 day by default. // The frequency which index scheduler executes. 1 day by default.

View file

@ -195,8 +195,13 @@ describe('lsp_incremental_indexer unit tests', () => {
// There are 3 MODIFIED items. 1 file + 1 symbol + 1 reference = 3 objects to // There are 3 MODIFIED items. 1 file + 1 symbol + 1 reference = 3 objects to
// index for each item. Total doc indexed should be 3 * 3 = 9, which can be // index for each item. Total doc indexed should be 3 * 3 = 9, which can be
// fitted into a single batch index. // fitted into a single batch index.
assert.ok(bulkSpy.calledOnce); assert.strictEqual(bulkSpy.callCount, 2);
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 9 * 2); let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 9 * 2);
// @ts-ignore // @ts-ignore
}).timeout(20000); }).timeout(20000);
@ -294,8 +299,12 @@ describe('lsp_incremental_indexer unit tests', () => {
// There are 3 MODIFIED items, but 1 item after the checkpoint. 1 file // There are 3 MODIFIED items, but 1 item after the checkpoint. 1 file
// + 1 symbol + 1 ref = 3 objects to be indexed for each item. Total doc // + 1 symbol + 1 ref = 3 objects to be indexed for each item. Total doc
// indexed should be 3 * 2 = 2, which can be fitted into a single batch index. // indexed should be 3 * 2 = 2, which can be fitted into a single batch index.
assert.ok(bulkSpy.calledOnce); assert.strictEqual(bulkSpy.callCount, 2);
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 3 * 2); let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 3 * 2);
assert.strictEqual(deleteByQuerySpy.callCount, 2); assert.strictEqual(deleteByQuerySpy.callCount, 2);
// @ts-ignore // @ts-ignore
}).timeout(20000); }).timeout(20000);

View file

@ -205,9 +205,13 @@ describe('lsp_indexer unit tests', function(this: any) {
// The rest 158 files will only be indexed for document. // The rest 158 files will only be indexed for document.
// There are also 10 binary files to be excluded. // There are also 10 binary files to be excluded.
// So the total number of index requests will be 66 + 158 - 10 = 214. // So the total number of index requests will be 66 + 158 - 10 = 214.
assert.ok(bulkSpy.calledOnce); assert.strictEqual(bulkSpy.callCount, 5);
assert.strictEqual(lspSendRequestSpy.callCount, 22); assert.strictEqual(lspSendRequestSpy.callCount, 22);
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 214 * 2); let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 214 * 2);
// @ts-ignore // @ts-ignore
}).timeout(20000); }).timeout(20000);
@ -319,9 +323,13 @@ describe('lsp_indexer unit tests', function(this: any) {
// 3 * 11 = 33. Also there are 15 files without supported language. Only one // 3 * 11 = 33. Also there are 15 files without supported language. Only one
// document will be index for these files. So total index requests would be // document will be index for these files. So total index requests would be
// 33 + 15 = 48. // 33 + 15 = 48.
assert.ok(bulkSpy.calledOnce); assert.strictEqual(bulkSpy.callCount, 2);
assert.strictEqual(lspSendRequestSpy.callCount, 11); assert.strictEqual(lspSendRequestSpy.callCount, 11);
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 48 * 2); let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 48 * 2);
// @ts-ignore // @ts-ignore
}).timeout(20000); }).timeout(20000);
}); });

View file

@ -182,13 +182,13 @@ export class LspIncrementalIndexer extends LspIndexer {
if (response && response.result.length > 0) { if (response && response.result.length > 0) {
const { symbols, references } = response.result[0]; const { symbols, references } = response.result[0];
for (const symbol of symbols) { for (const symbol of symbols) {
await this.batchIndexHelper.index(SymbolIndexName(repoUri), symbol); await this.lspBatchIndexHelper.index(SymbolIndexName(repoUri), symbol);
symbolNames.add(symbol.symbolInformation.name); symbolNames.add(symbol.symbolInformation.name);
} }
stats.set(IndexStatsKey.Symbol, symbols.length); stats.set(IndexStatsKey.Symbol, symbols.length);
for (const ref of references) { for (const ref of references) {
await this.batchIndexHelper.index(ReferenceIndexName(repoUri), ref); await this.lspBatchIndexHelper.index(ReferenceIndexName(repoUri), ref);
} }
stats.set(IndexStatsKey.Reference, references.length); stats.set(IndexStatsKey.Reference, references.length);
} else { } else {
@ -217,7 +217,7 @@ export class LspIncrementalIndexer extends LspIndexer {
language, language,
qnames: Array.from(symbolNames), qnames: Array.from(symbolNames),
}; };
await this.batchIndexHelper.index(DocumentIndexName(repoUri), body); await this.docBatchIndexHelper.index(DocumentIndexName(repoUri), body);
stats.set(IndexStatsKey.File, 1); stats.set(IndexStatsKey.File, 1);
} }

View file

@ -31,7 +31,13 @@ import { ALL_RESERVED, DocumentIndexName, ReferenceIndexName, SymbolIndexName }
export class LspIndexer extends AbstractIndexer { export class LspIndexer extends AbstractIndexer {
protected type: string = 'lsp'; protected type: string = 'lsp';
protected batchIndexHelper: BatchIndexHelper; // Batch index helper for symbols/references
protected lspBatchIndexHelper: BatchIndexHelper;
// Batch index helper for documents
protected docBatchIndexHelper: BatchIndexHelper;
private LSP_BATCH_INDEX_SIZE = 1000;
private DOC_BATCH_INDEX_SIZE = 50;
constructor( constructor(
protected readonly repoUri: RepositoryUri, protected readonly repoUri: RepositoryUri,
@ -43,7 +49,8 @@ export class LspIndexer extends AbstractIndexer {
) { ) {
super(repoUri, revision, client, log); super(repoUri, revision, client, log);
this.batchIndexHelper = new BatchIndexHelper(client, log); this.lspBatchIndexHelper = new BatchIndexHelper(client, log, this.LSP_BATCH_INDEX_SIZE);
this.docBatchIndexHelper = new BatchIndexHelper(client, log, this.DOC_BATCH_INDEX_SIZE);
} }
public async start(progressReporter?: ProgressReporter, checkpointReq?: LspIndexRequest) { public async start(progressReporter?: ProgressReporter, checkpointReq?: LspIndexRequest) {
@ -52,13 +59,15 @@ export class LspIndexer extends AbstractIndexer {
} finally { } finally {
if (!this.isCancelled()) { if (!this.isCancelled()) {
// Flush all the index request still in the cache for bulk index. // Flush all the index request still in the cache for bulk index.
this.batchIndexHelper.flush(); this.lspBatchIndexHelper.flush();
this.docBatchIndexHelper.flush();
} }
} }
} }
public cancel() { public cancel() {
this.batchIndexHelper.cancel(); this.lspBatchIndexHelper.cancel();
this.docBatchIndexHelper.cancel();
super.cancel(); super.cancel();
} }
@ -193,17 +202,17 @@ export class LspIndexer extends AbstractIndexer {
const lstat = util.promisify(fs.lstat); const lstat = util.promisify(fs.lstat);
const stat = await lstat(localFilePath); const stat = await lstat(localFilePath);
if (stat.size > TEXT_FILE_LIMIT) {
this.log.debug(`File size exceeds limit. Skip index.`);
return stats;
}
const readLink = util.promisify(fs.readlink); const readLink = util.promisify(fs.readlink);
const readFile = util.promisify(fs.readFile); const readFile = util.promisify(fs.readFile);
const content = stat.isSymbolicLink() const content = stat.isSymbolicLink()
? await readLink(localFilePath, 'utf8') ? await readLink(localFilePath, 'utf8')
: await readFile(localFilePath, 'utf8'); : await readFile(localFilePath, 'utf8');
if (content.length > TEXT_FILE_LIMIT) {
this.log.debug(`File size exceeds limit. Skip index.`);
return stats;
}
try { try {
const lang = detectLanguageByFilename(filePath); const lang = detectLanguageByFilename(filePath);
// filter file by language // filter file by language
@ -218,13 +227,13 @@ export class LspIndexer extends AbstractIndexer {
if (response && response.result && response.result.length > 0 && response.result[0]) { if (response && response.result && response.result.length > 0 && response.result[0]) {
const { symbols, references } = response.result[0]; const { symbols, references } = response.result[0];
for (const symbol of symbols) { for (const symbol of symbols) {
await this.batchIndexHelper.index(SymbolIndexName(repoUri), symbol); await this.lspBatchIndexHelper.index(SymbolIndexName(repoUri), symbol);
symbolNames.add(symbol.symbolInformation.name); symbolNames.add(symbol.symbolInformation.name);
} }
stats.set(IndexStatsKey.Symbol, symbols.length); stats.set(IndexStatsKey.Symbol, symbols.length);
for (const ref of references) { for (const ref of references) {
await this.batchIndexHelper.index(ReferenceIndexName(repoUri), ref); await this.lspBatchIndexHelper.index(ReferenceIndexName(repoUri), ref);
} }
stats.set(IndexStatsKey.Reference, references.length); stats.set(IndexStatsKey.Reference, references.length);
} else { } else {
@ -251,7 +260,7 @@ export class LspIndexer extends AbstractIndexer {
language, language,
qnames: Array.from(symbolNames), qnames: Array.from(symbolNames),
}; };
await this.batchIndexHelper.index(DocumentIndexName(repoUri), body); await this.docBatchIndexHelper.index(DocumentIndexName(repoUri), body);
stats.set(IndexStatsKey.File, 1); stats.set(IndexStatsKey.File, 1);
return stats; return stats;
} }

View file

@ -155,7 +155,7 @@ async function initCodeNode(server: Server, serverOptions: ServerOptions, log: L
log.info('Initializing Code plugin as code-node.'); log.info('Initializing Code plugin as code-node.');
const queueIndex: string = server.config().get('xpack.code.queueIndex'); const queueIndex: string = server.config().get('xpack.code.queueIndex');
const queueTimeout: number = server.config().get('xpack.code.queueTimeout'); const queueTimeoutMs: number = server.config().get('xpack.code.queueTimeoutMs');
const devMode: boolean = server.config().get('env.dev'); const devMode: boolean = server.config().get('env.dev');
const esClient: EsClient = new EsClientWithInternalRequest(server); const esClient: EsClient = new EsClientWithInternalRequest(server);
@ -198,7 +198,7 @@ async function initCodeNode(server: Server, serverOptions: ServerOptions, log: L
// Initialize queue. // Initialize queue.
const queue = new Esqueue(queueIndex, { const queue = new Esqueue(queueIndex, {
client: esClient, client: esClient,
timeout: queueTimeout, timeout: queueTimeoutMs,
}); });
const indexWorker = new IndexWorker( const indexWorker = new IndexWorker(
queue, queue,

View file

@ -20,7 +20,7 @@ export const emptyAsyncFunc = async (_: AnyObject): Promise<any> => {
const TEST_OPTIONS = { const TEST_OPTIONS = {
enabled: true, enabled: true,
queueIndex: '.code_internal-worker-queue', queueIndex: '.code_internal-worker-queue',
queueTimeout: 60 * 60 * 1000, // 1 hour by default queueTimeoutMs: 60 * 60 * 1000, // 1 hour by default
updateFreqencyMs: 5 * 60 * 1000, // 5 minutes by default updateFreqencyMs: 5 * 60 * 1000, // 5 minutes by default
indexFrequencyMs: 24 * 60 * 60 * 1000, // 1 day by default indexFrequencyMs: 24 * 60 * 60 * 1000, // 1 day by default
lsp: { lsp: {