[Code] better limit the size of indexed files (#38327)

* [Code] better limit the size of indexed files

* [Code] separate the batch index helper for document and symbols
This commit is contained in:
Mengwei Ding 2019-06-07 12:26:19 -07:00 committed by GitHub
parent dced039b68
commit 7f334c011d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 53 additions and 27 deletions

View file

@ -41,7 +41,7 @@ export const code = (kibana: any) =>
enabled: Joi.boolean().default(true),
queueIndex: Joi.string().default('.code_internal-worker-queue'),
// 1 hour by default.
queueTimeout: Joi.number().default(moment.duration(1, 'hour').asMilliseconds()),
queueTimeoutMs: Joi.number().default(moment.duration(1, 'hour').asMilliseconds()),
// The frequency which update scheduler executes. 5 minutes by default.
updateFrequencyMs: Joi.number().default(moment.duration(5, 'minute').asMilliseconds()),
// The frequency which index scheduler executes. 1 day by default.

View file

@ -195,8 +195,13 @@ describe('lsp_incremental_indexer unit tests', () => {
// There are 3 MODIFIED items. 1 file + 1 symbol + 1 reference = 3 objects to
// index for each item. Total doc indexed should be 3 * 3 = 9, which can be
// fitted into a single batch index.
assert.ok(bulkSpy.calledOnce);
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 9 * 2);
assert.strictEqual(bulkSpy.callCount, 2);
let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 9 * 2);
// @ts-ignore
}).timeout(20000);
@ -294,8 +299,12 @@ describe('lsp_incremental_indexer unit tests', () => {
// There are 3 MODIFIED items, but 1 item after the checkpoint. 1 file
// + 1 symbol + 1 ref = 3 objects to be indexed for each item. Total doc
// indexed should be 3 * 2 = 2, which can be fitted into a single batch index.
assert.ok(bulkSpy.calledOnce);
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 3 * 2);
assert.strictEqual(bulkSpy.callCount, 2);
let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 3 * 2);
assert.strictEqual(deleteByQuerySpy.callCount, 2);
// @ts-ignore
}).timeout(20000);

View file

@ -205,9 +205,13 @@ describe('lsp_indexer unit tests', function(this: any) {
// The rest 158 files will only be indexed for document.
// There are also 10 binary files to be excluded.
// So the total number of index requests will be 66 + 158 - 10 = 214.
assert.ok(bulkSpy.calledOnce);
assert.strictEqual(bulkSpy.callCount, 5);
assert.strictEqual(lspSendRequestSpy.callCount, 22);
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 214 * 2);
let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 214 * 2);
// @ts-ignore
}).timeout(20000);
@ -319,9 +323,13 @@ describe('lsp_indexer unit tests', function(this: any) {
// 3 * 11 = 33. Also there are 15 files without supported language. Only one
// document will be index for these files. So total index requests would be
// 33 + 15 = 48.
assert.ok(bulkSpy.calledOnce);
assert.strictEqual(bulkSpy.callCount, 2);
assert.strictEqual(lspSendRequestSpy.callCount, 11);
assert.strictEqual(bulkSpy.getCall(0).args[0].body.length, 48 * 2);
let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 48 * 2);
// @ts-ignore
}).timeout(20000);
});

View file

@ -182,13 +182,13 @@ export class LspIncrementalIndexer extends LspIndexer {
if (response && response.result.length > 0) {
const { symbols, references } = response.result[0];
for (const symbol of symbols) {
await this.batchIndexHelper.index(SymbolIndexName(repoUri), symbol);
await this.lspBatchIndexHelper.index(SymbolIndexName(repoUri), symbol);
symbolNames.add(symbol.symbolInformation.name);
}
stats.set(IndexStatsKey.Symbol, symbols.length);
for (const ref of references) {
await this.batchIndexHelper.index(ReferenceIndexName(repoUri), ref);
await this.lspBatchIndexHelper.index(ReferenceIndexName(repoUri), ref);
}
stats.set(IndexStatsKey.Reference, references.length);
} else {
@ -217,7 +217,7 @@ export class LspIncrementalIndexer extends LspIndexer {
language,
qnames: Array.from(symbolNames),
};
await this.batchIndexHelper.index(DocumentIndexName(repoUri), body);
await this.docBatchIndexHelper.index(DocumentIndexName(repoUri), body);
stats.set(IndexStatsKey.File, 1);
}

View file

@ -31,7 +31,13 @@ import { ALL_RESERVED, DocumentIndexName, ReferenceIndexName, SymbolIndexName }
export class LspIndexer extends AbstractIndexer {
protected type: string = 'lsp';
protected batchIndexHelper: BatchIndexHelper;
// Batch index helper for symbols/references
protected lspBatchIndexHelper: BatchIndexHelper;
// Batch index helper for documents
protected docBatchIndexHelper: BatchIndexHelper;
private LSP_BATCH_INDEX_SIZE = 1000;
private DOC_BATCH_INDEX_SIZE = 50;
constructor(
protected readonly repoUri: RepositoryUri,
@ -43,7 +49,8 @@ export class LspIndexer extends AbstractIndexer {
) {
super(repoUri, revision, client, log);
this.batchIndexHelper = new BatchIndexHelper(client, log);
this.lspBatchIndexHelper = new BatchIndexHelper(client, log, this.LSP_BATCH_INDEX_SIZE);
this.docBatchIndexHelper = new BatchIndexHelper(client, log, this.DOC_BATCH_INDEX_SIZE);
}
public async start(progressReporter?: ProgressReporter, checkpointReq?: LspIndexRequest) {
@ -52,13 +59,15 @@ export class LspIndexer extends AbstractIndexer {
} finally {
if (!this.isCancelled()) {
// Flush all the index request still in the cache for bulk index.
this.batchIndexHelper.flush();
this.lspBatchIndexHelper.flush();
this.docBatchIndexHelper.flush();
}
}
}
public cancel() {
this.batchIndexHelper.cancel();
this.lspBatchIndexHelper.cancel();
this.docBatchIndexHelper.cancel();
super.cancel();
}
@ -193,17 +202,17 @@ export class LspIndexer extends AbstractIndexer {
const lstat = util.promisify(fs.lstat);
const stat = await lstat(localFilePath);
if (stat.size > TEXT_FILE_LIMIT) {
this.log.debug(`File size exceeds limit. Skip index.`);
return stats;
}
const readLink = util.promisify(fs.readlink);
const readFile = util.promisify(fs.readFile);
const content = stat.isSymbolicLink()
? await readLink(localFilePath, 'utf8')
: await readFile(localFilePath, 'utf8');
if (content.length > TEXT_FILE_LIMIT) {
this.log.debug(`File size exceeds limit. Skip index.`);
return stats;
}
try {
const lang = detectLanguageByFilename(filePath);
// filter file by language
@ -218,13 +227,13 @@ export class LspIndexer extends AbstractIndexer {
if (response && response.result && response.result.length > 0 && response.result[0]) {
const { symbols, references } = response.result[0];
for (const symbol of symbols) {
await this.batchIndexHelper.index(SymbolIndexName(repoUri), symbol);
await this.lspBatchIndexHelper.index(SymbolIndexName(repoUri), symbol);
symbolNames.add(symbol.symbolInformation.name);
}
stats.set(IndexStatsKey.Symbol, symbols.length);
for (const ref of references) {
await this.batchIndexHelper.index(ReferenceIndexName(repoUri), ref);
await this.lspBatchIndexHelper.index(ReferenceIndexName(repoUri), ref);
}
stats.set(IndexStatsKey.Reference, references.length);
} else {
@ -251,7 +260,7 @@ export class LspIndexer extends AbstractIndexer {
language,
qnames: Array.from(symbolNames),
};
await this.batchIndexHelper.index(DocumentIndexName(repoUri), body);
await this.docBatchIndexHelper.index(DocumentIndexName(repoUri), body);
stats.set(IndexStatsKey.File, 1);
return stats;
}

View file

@ -155,7 +155,7 @@ async function initCodeNode(server: Server, serverOptions: ServerOptions, log: L
log.info('Initializing Code plugin as code-node.');
const queueIndex: string = server.config().get('xpack.code.queueIndex');
const queueTimeout: number = server.config().get('xpack.code.queueTimeout');
const queueTimeoutMs: number = server.config().get('xpack.code.queueTimeoutMs');
const devMode: boolean = server.config().get('env.dev');
const esClient: EsClient = new EsClientWithInternalRequest(server);
@ -198,7 +198,7 @@ async function initCodeNode(server: Server, serverOptions: ServerOptions, log: L
// Initialize queue.
const queue = new Esqueue(queueIndex, {
client: esClient,
timeout: queueTimeout,
timeout: queueTimeoutMs,
});
const indexWorker = new IndexWorker(
queue,

View file

@ -20,7 +20,7 @@ export const emptyAsyncFunc = async (_: AnyObject): Promise<any> => {
const TEST_OPTIONS = {
enabled: true,
queueIndex: '.code_internal-worker-queue',
queueTimeout: 60 * 60 * 1000, // 1 hour by default
queueTimeoutMs: 60 * 60 * 1000, // 1 hour by default
updateFreqencyMs: 5 * 60 * 1000, // 5 minutes by default
indexFrequencyMs: 24 * 60 * 60 * 1000, // 1 day by default
lsp: {