[kbn/babel-register] improve cache performance (#150261)

After https://github.com/elastic/kibana/pull/146212 it feels like the
babel-register cache is getting invalidated more frequently for some
reason. The current version of the cache only stores a single cache
entry for each file path, which shouldn't be too big of a problem but
with these changes several versions of a file will be cached. The
performance seems about equal, but because the cache contains multiple
versions of a single file we should spend less time transpiling files
when switching branches often.

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Spencer 2023-02-06 19:50:42 -07:00 committed by GitHub
parent 55b66e20fe
commit 1ee97e1657
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 127 additions and 210 deletions

View file

@ -1051,6 +1051,7 @@
"cypress-react-selector": "^3.0.0", "cypress-react-selector": "^3.0.0",
"cypress-real-events": "^1.7.6", "cypress-real-events": "^1.7.6",
"cypress-recurse": "^1.26.0", "cypress-recurse": "^1.26.0",
"date-fns": "^2.29.3",
"debug": "^2.6.9", "debug": "^2.6.9",
"delete-empty": "^2.0.0", "delete-empty": "^2.0.0",
"dependency-check": "^4.1.0", "dependency-check": "^4.1.0",

View file

@ -35,6 +35,7 @@ BUNDLER_DEPS = [
"@npm//chalk", "@npm//chalk",
"@npm//pirates", "@npm//pirates",
"@npm//lmdb", "@npm//lmdb",
"@npm//date-fns",
"@npm//source-map-support", "@npm//source-map-support",
"//packages/kbn-repo-packages", "//packages/kbn-repo-packages",
"//packages/kbn-repo-info", "//packages/kbn-repo-info",

View file

@ -10,7 +10,6 @@ const Fs = require('fs');
const Path = require('path'); const Path = require('path');
const Crypto = require('crypto'); const Crypto = require('crypto');
const { readHashOfPackageMap } = require('@kbn/repo-packages');
const babel = require('@babel/core'); const babel = require('@babel/core');
const peggy = require('@kbn/peggy'); const peggy = require('@kbn/peggy');
const { REPO_ROOT, UPSTREAM_BRANCH } = require('@kbn/repo-info'); const { REPO_ROOT, UPSTREAM_BRANCH } = require('@kbn/repo-info');
@ -25,7 +24,6 @@ const { getBabelOptions } = require('@kbn/babel-transform');
*/ */
function determineCachePrefix() { function determineCachePrefix() {
const json = JSON.stringify({ const json = JSON.stringify({
synthPkgMapHash: readHashOfPackageMap(),
babelVersion: babel.version, babelVersion: babel.version,
peggyVersion: peggy.version, peggyVersion: peggy.version,
// get a config for a fake js, ts, and tsx file to make sure we // get a config for a fake js, ts, and tsx file to make sure we
@ -63,8 +61,7 @@ function getCache() {
if (lmdbAvailable()) { if (lmdbAvailable()) {
log?.write('lmdb is available, using lmdb cache\n'); log?.write('lmdb is available, using lmdb cache\n');
return new (require('./lmdb_cache').LmdbCache)({ return new (require('./lmdb_cache').LmdbCache)({
pathRoot: REPO_ROOT, dir: Path.resolve(REPO_ROOT, 'data/babel_register_cache', UPSTREAM_BRANCH),
dir: Path.resolve(REPO_ROOT, 'data/babel_register_cache_v1', UPSTREAM_BRANCH),
prefix: determineCachePrefix(), prefix: determineCachePrefix(),
log, log,
}); });

View file

@ -7,17 +7,21 @@
*/ */
const Path = require('path'); const Path = require('path');
const Crypto = require('crypto');
const startOfDay = /** @type {import('date-fns/startOfDay').default} */ (
/** @type {unknown} */ (require('date-fns/startOfDay'))
);
const chalk = require('chalk'); const chalk = require('chalk');
const LmdbStore = require('lmdb'); const LmdbStore = require('lmdb');
const GLOBAL_ATIME = `${Date.now()}`; const GLOBAL_ATIME = startOfDay(new Date()).valueOf();
const MINUTE = 1000 * 60; const MINUTE = 1000 * 60;
const HOUR = MINUTE * 60; const HOUR = MINUTE * 60;
const DAY = HOUR * 24; const DAY = HOUR * 24;
/** @typedef {import('./types').Cache} CacheInterface */ /** @typedef {import('./types').Cache} CacheInterface */
/** @typedef {import('lmdb').Database<string, string>} Db */ /** @typedef {import('lmdb').Database<import('./types').CacheEntry, string>} Db */
/** /**
* @param {Db} db * @param {Db} db
@ -31,63 +35,29 @@ const dbName = (db) =>
* @implements {CacheInterface} * @implements {CacheInterface}
*/ */
class LmdbCache { class LmdbCache {
/** @type {import('lmdb').RootDatabase<string, string>} */ /** @type {import('lmdb').RootDatabase<import('./types').CacheEntry, string>} */
#codes; #db;
/** @type {Db} */
#atimes;
/** @type {Db} */
#mtimes;
/** @type {Db} */
#sourceMaps;
/** @type {string} */
#pathRoot;
/** @type {string} */
#prefix;
/** @type {import('stream').Writable | undefined} */ /** @type {import('stream').Writable | undefined} */
#log; #log;
/** @type {ReturnType<typeof setTimeout>} */ /** @type {string} */
#timer; #prefix;
/** /**
* @param {import('./types').CacheConfig} config * @param {import('./types').CacheConfig} config
*/ */
constructor(config) { constructor(config) {
if (!Path.isAbsolute(config.pathRoot)) {
throw new Error('cache requires an absolute path to resolve paths relative to');
}
this.#pathRoot = config.pathRoot;
this.#prefix = config.prefix;
this.#log = config.log; this.#log = config.log;
this.#prefix = config.prefix;
this.#codes = LmdbStore.open(config.dir, { this.#db = LmdbStore.open(Path.resolve(config.dir, 'v5'), {
name: 'codes', name: 'db',
encoding: 'string', encoding: 'json',
maxReaders: 500,
}); });
// TODO: redundant 'name' syntax is necessary because of a bug that I have yet to fix const lastClean = this.#db.get('@last clean');
this.#atimes = this.#codes.openDB('atimes', { if (!lastClean || lastClean[0] < GLOBAL_ATIME - 7 * DAY) {
name: 'atimes', try {
encoding: 'string', this.#pruneOldKeys();
}); } catch (error) {
this.#mtimes = this.#codes.openDB('mtimes', {
name: 'mtimes',
encoding: 'string',
});
this.#sourceMaps = this.#codes.openDB('sourceMaps', {
name: 'sourceMaps',
encoding: 'string',
});
// after the process has been running for 30 minutes prune the
// keys which haven't been used in 30 days. We use `unref()` to
// make sure this timer doesn't hold other processes open
// unexpectedly
this.#timer = setTimeout(() => {
this.#pruneOldKeys().catch((error) => {
process.stderr.write(` process.stderr.write(`
Failed to cleanup @kbn/babel-register cache: Failed to cleanup @kbn/babel-register cache:
@ -95,83 +65,60 @@ Failed to cleanup @kbn/babel-register cache:
To eliminate this problem you may want to delete the "${Path.relative(process.cwd(), config.dir)}" To eliminate this problem you may want to delete the "${Path.relative(process.cwd(), config.dir)}"
directory and report this error to the Operations team.\n`); directory and report this error to the Operations team.\n`);
}); } finally {
}, 30 * MINUTE); this.#db.putSync('@last clean', [GLOBAL_ATIME, '', {}]);
}
// timer.unref is not defined in jest which emulates the dom by default
if (typeof this.#timer.unref === 'function') {
this.#timer.unref();
} }
} }
/** /**
* Get the cache key of the path and source from disk of a file
* @param {string} path * @param {string} path
* @param {string} source
* @returns {string}
*/ */
getMtime(path) { getKey(path, source) {
return this.#safeGet(this.#mtimes, this.#getKey(path)); return `${this.#prefix}:${Crypto.createHash('sha1').update(path).update(source).digest('hex')}`;
} }
/** /**
* @param {string} path * @param {string} key
* @returns {string|undefined}
*/ */
getCode(path) { getCode(key) {
const key = this.#getKey(path); const entry = this.#safeGet(this.#db, key);
const code = this.#safeGet(this.#codes, key);
if (code !== undefined) { if (entry !== undefined && entry[0] !== GLOBAL_ATIME) {
// when we use a file from the cache set the "atime" of that cache entry // when we use a file from the cache set the "atime" of that cache entry
// so that we know which cache items we use and which haven't been // so that we know which cache items we use and which haven't been
// touched in a long time (currently 30 days) // used in a long time (currently 30 days)
this.#safePut(this.#atimes, key, GLOBAL_ATIME); this.#safePut(this.#db, key, [GLOBAL_ATIME, entry[1], entry[2]]);
} }
return code; return entry?.[1];
} }
/** /**
* @param {string} path * @param {string} key
* @returns {object|undefined}
*/ */
getSourceMap(path) { getSourceMap(key) {
const map = this.#safeGet(this.#sourceMaps, this.#getKey(path)); const entry = this.#safeGet(this.#db, key);
if (typeof map === 'string') { if (entry) {
return JSON.parse(map); return entry[2];
} }
} }
close() {
clearTimeout(this.#timer);
}
/** /**
* @param {string} path * @param {string} key
* @param {{ mtime: string; code: string; map?: any }} file * @param {{ code: string, map: object }} entry
*/ */
async update(path, file) { async update(key, entry) {
const key = this.#getKey(path); this.#safePut(this.#db, key, [GLOBAL_ATIME, entry.code, entry.map]);
this.#safePut(this.#atimes, key, GLOBAL_ATIME);
this.#safePut(this.#mtimes, key, file.mtime);
this.#safePut(this.#codes, key, file.code);
if (file.map) {
this.#safePut(this.#sourceMaps, key, JSON.stringify(file.map));
}
} }
/** /**
* @param {string} path * @param {Db} db
*/
#getKey(path) {
const normalizedPath =
Path.sep !== '/'
? Path.relative(this.#pathRoot, path).split(Path.sep).join('/')
: Path.relative(this.#pathRoot, path);
return `${this.#prefix}:${normalizedPath}`;
}
/**
* @param {LmdbStore.Database<string, string>} db
* @param {string} key * @param {string} key
*/ */
#safeGet(db, key) { #safeGet(db, key) {
@ -190,9 +137,9 @@ directory and report this error to the Operations team.\n`);
} }
/** /**
* @param {LmdbStore.Database<string, string>} db * @param {Db} db
* @param {string} key * @param {string} key
* @param {string} value * @param {import('./types').CacheEntry} value
*/ */
#safePut(db, key, value) { #safePut(db, key, value) {
try { try {
@ -205,7 +152,7 @@ directory and report this error to the Operations team.\n`);
/** /**
* @param {string} type * @param {string} type
* @param {LmdbStore.Database<string, string>} db * @param {Db} db
* @param {string} key * @param {string} key
*/ */
#debug(type, db, key) { #debug(type, db, key) {
@ -214,7 +161,7 @@ directory and report this error to the Operations team.\n`);
/** /**
* @param {'GET' | 'PUT'} type * @param {'GET' | 'PUT'} type
* @param {LmdbStore.Database<string, string>} db * @param {Db} db
* @param {string} key * @param {string} key
* @param {Error} error * @param {Error} error
*/ */
@ -227,51 +174,36 @@ directory and report this error to the Operations team.\n`);
); );
} }
async #pruneOldKeys() { #pruneOldKeys() {
try {
const ATIME_LIMIT = Date.now() - 30 * DAY; const ATIME_LIMIT = Date.now() - 30 * DAY;
const BATCH_SIZE = 1000;
/** @type {string[]} */ /** @type {string[]} */
const validKeys = []; const toDelete = [];
/** @type {string[]} */ const flushDeletes = () => {
const invalidKeys = []; if (!toDelete.length) {
return;
for (const { key, value } of this.#atimes.getRange()) {
const atime = parseInt(`${value}`, 10);
if (Number.isNaN(atime) || atime < ATIME_LIMIT) {
invalidKeys.push(key);
} else {
validKeys.push(key);
} }
if (validKeys.length + invalidKeys.length >= BATCH_SIZE) { this.#db.transactionSync(() => {
const promises = new Set(); for (const key of toDelete) {
this.#db.removeSync(key);
if (invalidKeys.length) { }
for (const k of invalidKeys) { });
// all these promises are the same currently, so Set() will };
// optimise this to a single promise, but I wouldn't be shocked
// if a future version starts returning independent promises so for (const { key, value } of this.#db.getRange()) {
// this is just for some future-proofing if (Number.isNaN(value[0]) || value[0] < ATIME_LIMIT) {
promises.add(this.#atimes.remove(k)); toDelete.push(key);
promises.add(this.#mtimes.remove(k));
promises.add(this.#codes.remove(k)); // flush deletes early if there are many deleted
promises.add(this.#sourceMaps.remove(k)); if (toDelete.length > 10_000) {
flushDeletes();
}
} }
} else {
// delay a smidge to allow other things to happen before the next batch of checks
promises.add(new Promise((resolve) => setTimeout(resolve, 1)));
} }
invalidKeys.length = 0; // delete all the old keys
validKeys.length = 0; flushDeletes();
await Promise.all(Array.from(promises));
}
}
} catch {
// ignore errors, the cache is totally disposable and will rebuild if there is some sort of corruption
}
} }
} }

View file

@ -18,7 +18,7 @@ const DIR = Path.resolve(__dirname, '../__tmp__/cache');
const makeTestLog = () => { const makeTestLog = () => {
const log = Object.assign( const log = Object.assign(
new Writable({ new Writable({
write(chunk, enc, cb) { write(chunk, _, cb) {
log.output += chunk; log.output += chunk;
cb(); cb();
}, },
@ -39,50 +39,35 @@ const makeCache = (...options: ConstructorParameters<typeof LmdbCache>) => {
}; };
beforeEach(async () => await del(DIR)); beforeEach(async () => await del(DIR));
afterEach(async () => { afterEach(async () => await del(DIR));
await del(DIR);
for (const instance of instances) {
instance.close();
}
instances.length = 0;
});
it('returns undefined until values are set', async () => { it('returns undefined until values are set', async () => {
const path = '/foo/bar.js'; const path = '/foo/bar.js';
const mtime = new Date().toJSON(); const source = `console.log("hi, hello")`;
const log = makeTestLog(); const log = makeTestLog();
const cache = makeCache({ const cache = makeCache({
dir: DIR, dir: DIR,
prefix: 'prefix', prefix: 'prefix',
log, log,
pathRoot: '/foo/',
}); });
expect(cache.getMtime(path)).toBe(undefined); const key = cache.getKey(path, source);
expect(cache.getCode(path)).toBe(undefined); expect(cache.getCode(key)).toBe(undefined);
expect(cache.getSourceMap(path)).toBe(undefined); expect(cache.getSourceMap(key)).toBe(undefined);
await cache.update(path, { await cache.update(key, {
mtime,
code: 'var x = 1', code: 'var x = 1',
map: { foo: 'bar' }, map: { foo: 'bar' },
}); });
expect(cache.getMtime(path)).toBe(mtime); expect(cache.getCode(key)).toBe('var x = 1');
expect(cache.getCode(path)).toBe('var x = 1'); expect(cache.getSourceMap(key)).toEqual({ foo: 'bar' });
expect(cache.getSourceMap(path)).toEqual({ foo: 'bar' });
expect(log.output).toMatchInlineSnapshot(` expect(log.output).toMatchInlineSnapshot(`
"MISS [mtimes] prefix:bar.js "MISS [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
MISS [codes] prefix:bar.js MISS [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
MISS [sourceMaps] prefix:bar.js PUT [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
PUT [atimes] prefix:bar.js HIT [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
PUT [mtimes] prefix:bar.js HIT [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
PUT [codes] prefix:bar.js
PUT [sourceMaps] prefix:bar.js
HIT [mtimes] prefix:bar.js
HIT [codes] prefix:bar.js
PUT [atimes] prefix:bar.js
HIT [sourceMaps] prefix:bar.js
" "
`); `);
}); });

View file

@ -12,6 +12,10 @@
* @implements {CacheInterface} * @implements {CacheInterface}
*/ */
class NoCacheCache { class NoCacheCache {
getKey() {
return '';
}
getCode() { getCode() {
return undefined; return undefined;
} }

View file

@ -9,16 +9,16 @@
import { Writable } from 'stream'; import { Writable } from 'stream';
export interface CacheConfig { export interface CacheConfig {
pathRoot: string;
dir: string; dir: string;
prefix: string; prefix: string;
log?: Writable; log?: Writable;
} }
export interface Cache { export interface Cache {
getMtime(path: string): string | undefined; getKey(path: string, source: string): string;
getCode(path: string): string | undefined; getCode(key: string): string | undefined;
getSourceMap(path: string): object | undefined; getSourceMap(key: string): object | undefined;
update(path: string, opts: { mtime: string; code: string; map?: any }): Promise<void>; update(key: string, entry: { code: string; map?: object | null }): Promise<void>;
close(): void;
} }
export type CacheEntry = [atime: number, code: string, sourceMap: object];

View file

@ -41,6 +41,7 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
const Fs = require('fs');
const Path = require('path'); const Path = require('path');
const { addHook } = require('pirates'); const { addHook } = require('pirates');
@ -105,7 +106,18 @@ function install(options = undefined) {
environment: 'node', environment: 'node',
// @ts-expect-error bad source-map-support types // @ts-expect-error bad source-map-support types
retrieveSourceMap(path) { retrieveSourceMap(path) {
const map = cache.getSourceMap(path); if (!Path.isAbsolute(path)) {
return null;
}
let source;
try {
source = Fs.readFileSync(path, 'utf8');
} catch {
return null;
}
const map = cache.getSourceMap(cache.getKey(path, source));
return map ? { map, url: null } : null; return map ? { map, url: null } : null;
}, },
}); });

View file

@ -6,25 +6,18 @@
* Side Public License, v 1. * Side Public License, v 1.
*/ */
const Fs = require('fs');
const { transformCode } = require('@kbn/babel-transform'); const { transformCode } = require('@kbn/babel-transform');
/** @type {import('./types').Transform} */ /** @type {import('./types').Transform} */
const babelTransform = (path, source, cache) => { const babelTransform = (path, source, cache) => {
const mtime = `${Fs.statSync(path).mtimeMs}`; const key = cache.getKey(path, source);
const cached = cache.getCode(key);
if (cache.getMtime(path) === mtime) { if (cached) {
const code = cache.getCode(path); return cached;
if (code) {
return code;
}
} }
const result = transformCode(path, source); const result = transformCode(path, source);
cache.update(key, {
cache.update(path, {
mtime,
code: result.code, code: result.code,
map: result.map, map: result.map,
}); });

View file

@ -6,27 +6,16 @@
* Side Public License, v 1. * Side Public License, v 1.
*/ */
const Fs = require('fs');
const Crypto = require('crypto');
const Peggy = require('@kbn/peggy'); const Peggy = require('@kbn/peggy');
/** @type {import('./types').Transform} */ /** @type {import('./types').Transform} */
const peggyTransform = (path, source, cache) => { const peggyTransform = (path, source, cache) => {
const config = Peggy.findConfigFile(path); const config = Peggy.findConfigFile(path);
const mtime = `${Fs.statSync(path).mtimeMs}`; const key = cache.getKey(path, source);
const key = !config
? path
: `${path}.config.${Crypto.createHash('sha256')
.update(config.source)
.digest('hex')
.slice(0, 8)}`;
if (cache.getMtime(key) === mtime) { const cached = cache.getCode(key);
const code = cache.getCode(key); if (cached) {
if (code) { return cached;
return code;
}
} }
const code = Peggy.getJsSourceSync({ const code = Peggy.getJsSourceSync({
@ -40,7 +29,6 @@ const peggyTransform = (path, source, cache) => {
cache.update(key, { cache.update(key, {
code, code,
mtime,
}); });
return code; return code;

View file

@ -16,7 +16,6 @@
"@kbn/repo-info", "@kbn/repo-info",
"@kbn/babel-transform", "@kbn/babel-transform",
"@kbn/peggy", "@kbn/peggy",
"@kbn/repo-packages",
], ],
"exclude": [ "exclude": [
"target/**/*", "target/**/*",

View file

@ -77,7 +77,7 @@ export class Watcher {
// ignore changes in any devOnly package, these can't power the server so we can ignore them // ignore changes in any devOnly package, these can't power the server so we can ignore them
if (pkg?.devOnly) { if (pkg?.devOnly) {
return; return pkg.id === '@kbn/babel-register';
} }
const result = this.classifier.classify(event.path); const result = this.classifier.classify(event.path);

View file

@ -12314,6 +12314,11 @@ date-fns@^1.27.2, date-fns@^1.30.1:
resolved "https://registry.yarnpkg.com/date-fns/-/date-fns-1.30.1.tgz#2e71bf0b119153dbb4cc4e88d9ea5acfb50dc05c" resolved "https://registry.yarnpkg.com/date-fns/-/date-fns-1.30.1.tgz#2e71bf0b119153dbb4cc4e88d9ea5acfb50dc05c"
integrity sha512-hBSVCvSmWC+QypYObzwGOd9wqdDpOt+0wl0KbU+R+uuZBS1jN8VsD1ss3irQDknRj5NvxiTF6oj/nDRnN/UQNw== integrity sha512-hBSVCvSmWC+QypYObzwGOd9wqdDpOt+0wl0KbU+R+uuZBS1jN8VsD1ss3irQDknRj5NvxiTF6oj/nDRnN/UQNw==
date-fns@^2.29.3:
version "2.29.3"
resolved "https://registry.yarnpkg.com/date-fns/-/date-fns-2.29.3.tgz#27402d2fc67eb442b511b70bbdf98e6411cd68a8"
integrity sha512-dDCnyH2WnnKusqvZZ6+jA1O51Ibt8ZMRNkDZdyAyK4YfbDwa/cEmuztzG5pk6hqlp9aSBPYcjOlktquahGwGeA==
date-now@^0.1.4: date-now@^0.1.4:
version "0.1.4" version "0.1.4"
resolved "https://registry.yarnpkg.com/date-now/-/date-now-0.1.4.tgz#eaf439fd4d4848ad74e5cc7dbef200672b9e345b" resolved "https://registry.yarnpkg.com/date-now/-/date-now-0.1.4.tgz#eaf439fd4d4848ad74e5cc7dbef200672b9e345b"