[kbn/babel-register] improve cache performance (#150261)

After https://github.com/elastic/kibana/pull/146212 it feels like the
babel-register cache is getting invalidated more frequently for some
reason. The current version of the cache only stores a single cache
entry for each file path, which shouldn't be too big of a problem but
with these changes several versions of a file will be cached. The
performance seems about equal, but because the cache contains multiple
versions of a single file we should spend less time transpiling files
when switching branches often.

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Spencer 2023-02-06 19:50:42 -07:00 committed by GitHub
parent 55b66e20fe
commit 1ee97e1657
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 127 additions and 210 deletions

View file

@ -1051,6 +1051,7 @@
"cypress-react-selector": "^3.0.0",
"cypress-real-events": "^1.7.6",
"cypress-recurse": "^1.26.0",
"date-fns": "^2.29.3",
"debug": "^2.6.9",
"delete-empty": "^2.0.0",
"dependency-check": "^4.1.0",

View file

@ -35,6 +35,7 @@ BUNDLER_DEPS = [
"@npm//chalk",
"@npm//pirates",
"@npm//lmdb",
"@npm//date-fns",
"@npm//source-map-support",
"//packages/kbn-repo-packages",
"//packages/kbn-repo-info",

View file

@ -10,7 +10,6 @@ const Fs = require('fs');
const Path = require('path');
const Crypto = require('crypto');
const { readHashOfPackageMap } = require('@kbn/repo-packages');
const babel = require('@babel/core');
const peggy = require('@kbn/peggy');
const { REPO_ROOT, UPSTREAM_BRANCH } = require('@kbn/repo-info');
@ -25,7 +24,6 @@ const { getBabelOptions } = require('@kbn/babel-transform');
*/
function determineCachePrefix() {
const json = JSON.stringify({
synthPkgMapHash: readHashOfPackageMap(),
babelVersion: babel.version,
peggyVersion: peggy.version,
// get a config for a fake js, ts, and tsx file to make sure we
@ -63,8 +61,7 @@ function getCache() {
if (lmdbAvailable()) {
log?.write('lmdb is available, using lmdb cache\n');
return new (require('./lmdb_cache').LmdbCache)({
pathRoot: REPO_ROOT,
dir: Path.resolve(REPO_ROOT, 'data/babel_register_cache_v1', UPSTREAM_BRANCH),
dir: Path.resolve(REPO_ROOT, 'data/babel_register_cache', UPSTREAM_BRANCH),
prefix: determineCachePrefix(),
log,
});

View file

@ -7,17 +7,21 @@
*/
const Path = require('path');
const Crypto = require('crypto');
const startOfDay = /** @type {import('date-fns/startOfDay').default} */ (
/** @type {unknown} */ (require('date-fns/startOfDay'))
);
const chalk = require('chalk');
const LmdbStore = require('lmdb');
const GLOBAL_ATIME = `${Date.now()}`;
const GLOBAL_ATIME = startOfDay(new Date()).valueOf();
const MINUTE = 1000 * 60;
const HOUR = MINUTE * 60;
const DAY = HOUR * 24;
/** @typedef {import('./types').Cache} CacheInterface */
/** @typedef {import('lmdb').Database<string, string>} Db */
/** @typedef {import('lmdb').Database<import('./types').CacheEntry, string>} Db */
/**
* @param {Db} db
@ -31,63 +35,29 @@ const dbName = (db) =>
* @implements {CacheInterface}
*/
class LmdbCache {
/** @type {import('lmdb').RootDatabase<string, string>} */
#codes;
/** @type {Db} */
#atimes;
/** @type {Db} */
#mtimes;
/** @type {Db} */
#sourceMaps;
/** @type {string} */
#pathRoot;
/** @type {string} */
#prefix;
/** @type {import('lmdb').RootDatabase<import('./types').CacheEntry, string>} */
#db;
/** @type {import('stream').Writable | undefined} */
#log;
/** @type {ReturnType<typeof setTimeout>} */
#timer;
/** @type {string} */
#prefix;
/**
* @param {import('./types').CacheConfig} config
*/
constructor(config) {
if (!Path.isAbsolute(config.pathRoot)) {
throw new Error('cache requires an absolute path to resolve paths relative to');
}
this.#pathRoot = config.pathRoot;
this.#prefix = config.prefix;
this.#log = config.log;
this.#codes = LmdbStore.open(config.dir, {
name: 'codes',
encoding: 'string',
maxReaders: 500,
this.#prefix = config.prefix;
this.#db = LmdbStore.open(Path.resolve(config.dir, 'v5'), {
name: 'db',
encoding: 'json',
});
// TODO: redundant 'name' syntax is necessary because of a bug that I have yet to fix
this.#atimes = this.#codes.openDB('atimes', {
name: 'atimes',
encoding: 'string',
});
this.#mtimes = this.#codes.openDB('mtimes', {
name: 'mtimes',
encoding: 'string',
});
this.#sourceMaps = this.#codes.openDB('sourceMaps', {
name: 'sourceMaps',
encoding: 'string',
});
// after the process has been running for 30 minutes prune the
// keys which haven't been used in 30 days. We use `unref()` to
// make sure this timer doesn't hold other processes open
// unexpectedly
this.#timer = setTimeout(() => {
this.#pruneOldKeys().catch((error) => {
const lastClean = this.#db.get('@last clean');
if (!lastClean || lastClean[0] < GLOBAL_ATIME - 7 * DAY) {
try {
this.#pruneOldKeys();
} catch (error) {
process.stderr.write(`
Failed to cleanup @kbn/babel-register cache:
@ -95,83 +65,60 @@ Failed to cleanup @kbn/babel-register cache:
To eliminate this problem you may want to delete the "${Path.relative(process.cwd(), config.dir)}"
directory and report this error to the Operations team.\n`);
});
}, 30 * MINUTE);
// timer.unref is not defined in jest which emulates the dom by default
if (typeof this.#timer.unref === 'function') {
this.#timer.unref();
} finally {
this.#db.putSync('@last clean', [GLOBAL_ATIME, '', {}]);
}
}
}
/**
* Get the cache key of the path and source from disk of a file
* @param {string} path
* @param {string} source
* @returns {string}
*/
getMtime(path) {
return this.#safeGet(this.#mtimes, this.#getKey(path));
getKey(path, source) {
return `${this.#prefix}:${Crypto.createHash('sha1').update(path).update(source).digest('hex')}`;
}
/**
* @param {string} path
* @param {string} key
* @returns {string|undefined}
*/
getCode(path) {
const key = this.#getKey(path);
const code = this.#safeGet(this.#codes, key);
getCode(key) {
const entry = this.#safeGet(this.#db, key);
if (code !== undefined) {
if (entry !== undefined && entry[0] !== GLOBAL_ATIME) {
// when we use a file from the cache set the "atime" of that cache entry
// so that we know which cache items we use and which haven't been
// touched in a long time (currently 30 days)
this.#safePut(this.#atimes, key, GLOBAL_ATIME);
// used in a long time (currently 30 days)
this.#safePut(this.#db, key, [GLOBAL_ATIME, entry[1], entry[2]]);
}
return code;
return entry?.[1];
}
/**
* @param {string} path
* @param {string} key
* @returns {object|undefined}
*/
getSourceMap(path) {
const map = this.#safeGet(this.#sourceMaps, this.#getKey(path));
if (typeof map === 'string') {
return JSON.parse(map);
getSourceMap(key) {
const entry = this.#safeGet(this.#db, key);
if (entry) {
return entry[2];
}
}
close() {
clearTimeout(this.#timer);
}
/**
* @param {string} path
* @param {{ mtime: string; code: string; map?: any }} file
* @param {string} key
* @param {{ code: string, map: object }} entry
*/
async update(path, file) {
const key = this.#getKey(path);
this.#safePut(this.#atimes, key, GLOBAL_ATIME);
this.#safePut(this.#mtimes, key, file.mtime);
this.#safePut(this.#codes, key, file.code);
if (file.map) {
this.#safePut(this.#sourceMaps, key, JSON.stringify(file.map));
}
async update(key, entry) {
this.#safePut(this.#db, key, [GLOBAL_ATIME, entry.code, entry.map]);
}
/**
* @param {string} path
*/
#getKey(path) {
const normalizedPath =
Path.sep !== '/'
? Path.relative(this.#pathRoot, path).split(Path.sep).join('/')
: Path.relative(this.#pathRoot, path);
return `${this.#prefix}:${normalizedPath}`;
}
/**
* @param {LmdbStore.Database<string, string>} db
* @param {Db} db
* @param {string} key
*/
#safeGet(db, key) {
@ -190,9 +137,9 @@ directory and report this error to the Operations team.\n`);
}
/**
* @param {LmdbStore.Database<string, string>} db
* @param {Db} db
* @param {string} key
* @param {string} value
* @param {import('./types').CacheEntry} value
*/
#safePut(db, key, value) {
try {
@ -205,7 +152,7 @@ directory and report this error to the Operations team.\n`);
/**
* @param {string} type
* @param {LmdbStore.Database<string, string>} db
* @param {Db} db
* @param {string} key
*/
#debug(type, db, key) {
@ -214,7 +161,7 @@ directory and report this error to the Operations team.\n`);
/**
* @param {'GET' | 'PUT'} type
* @param {LmdbStore.Database<string, string>} db
* @param {Db} db
* @param {string} key
* @param {Error} error
*/
@ -227,51 +174,36 @@ directory and report this error to the Operations team.\n`);
);
}
async #pruneOldKeys() {
try {
#pruneOldKeys() {
const ATIME_LIMIT = Date.now() - 30 * DAY;
const BATCH_SIZE = 1000;
/** @type {string[]} */
const validKeys = [];
/** @type {string[]} */
const invalidKeys = [];
for (const { key, value } of this.#atimes.getRange()) {
const atime = parseInt(`${value}`, 10);
if (Number.isNaN(atime) || atime < ATIME_LIMIT) {
invalidKeys.push(key);
} else {
validKeys.push(key);
const toDelete = [];
const flushDeletes = () => {
if (!toDelete.length) {
return;
}
if (validKeys.length + invalidKeys.length >= BATCH_SIZE) {
const promises = new Set();
if (invalidKeys.length) {
for (const k of invalidKeys) {
// all these promises are the same currently, so Set() will
// optimise this to a single promise, but I wouldn't be shocked
// if a future version starts returning independent promises so
// this is just for some future-proofing
promises.add(this.#atimes.remove(k));
promises.add(this.#mtimes.remove(k));
promises.add(this.#codes.remove(k));
promises.add(this.#sourceMaps.remove(k));
this.#db.transactionSync(() => {
for (const key of toDelete) {
this.#db.removeSync(key);
}
});
};
for (const { key, value } of this.#db.getRange()) {
if (Number.isNaN(value[0]) || value[0] < ATIME_LIMIT) {
toDelete.push(key);
// flush deletes early if there are many deleted
if (toDelete.length > 10_000) {
flushDeletes();
}
}
} else {
// delay a smidge to allow other things to happen before the next batch of checks
promises.add(new Promise((resolve) => setTimeout(resolve, 1)));
}
invalidKeys.length = 0;
validKeys.length = 0;
await Promise.all(Array.from(promises));
}
}
} catch {
// ignore errors, the cache is totally disposable and will rebuild if there is some sort of corruption
}
// delete all the old keys
flushDeletes();
}
}

View file

@ -18,7 +18,7 @@ const DIR = Path.resolve(__dirname, '../__tmp__/cache');
const makeTestLog = () => {
const log = Object.assign(
new Writable({
write(chunk, enc, cb) {
write(chunk, _, cb) {
log.output += chunk;
cb();
},
@ -39,50 +39,35 @@ const makeCache = (...options: ConstructorParameters<typeof LmdbCache>) => {
};
beforeEach(async () => await del(DIR));
afterEach(async () => {
await del(DIR);
for (const instance of instances) {
instance.close();
}
instances.length = 0;
});
afterEach(async () => await del(DIR));
it('returns undefined until values are set', async () => {
const path = '/foo/bar.js';
const mtime = new Date().toJSON();
const source = `console.log("hi, hello")`;
const log = makeTestLog();
const cache = makeCache({
dir: DIR,
prefix: 'prefix',
log,
pathRoot: '/foo/',
});
expect(cache.getMtime(path)).toBe(undefined);
expect(cache.getCode(path)).toBe(undefined);
expect(cache.getSourceMap(path)).toBe(undefined);
const key = cache.getKey(path, source);
expect(cache.getCode(key)).toBe(undefined);
expect(cache.getSourceMap(key)).toBe(undefined);
await cache.update(path, {
mtime,
await cache.update(key, {
code: 'var x = 1',
map: { foo: 'bar' },
});
expect(cache.getMtime(path)).toBe(mtime);
expect(cache.getCode(path)).toBe('var x = 1');
expect(cache.getSourceMap(path)).toEqual({ foo: 'bar' });
expect(cache.getCode(key)).toBe('var x = 1');
expect(cache.getSourceMap(key)).toEqual({ foo: 'bar' });
expect(log.output).toMatchInlineSnapshot(`
"MISS [mtimes] prefix:bar.js
MISS [codes] prefix:bar.js
MISS [sourceMaps] prefix:bar.js
PUT [atimes] prefix:bar.js
PUT [mtimes] prefix:bar.js
PUT [codes] prefix:bar.js
PUT [sourceMaps] prefix:bar.js
HIT [mtimes] prefix:bar.js
HIT [codes] prefix:bar.js
PUT [atimes] prefix:bar.js
HIT [sourceMaps] prefix:bar.js
"MISS [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
MISS [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
PUT [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
HIT [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
HIT [db] prefix:05a4b8198c4ec215d54d94681ef00ca9ecb45931
"
`);
});

View file

@ -12,6 +12,10 @@
* @implements {CacheInterface}
*/
class NoCacheCache {
getKey() {
return '';
}
getCode() {
return undefined;
}

View file

@ -9,16 +9,16 @@
import { Writable } from 'stream';
export interface CacheConfig {
pathRoot: string;
dir: string;
prefix: string;
log?: Writable;
}
export interface Cache {
getMtime(path: string): string | undefined;
getCode(path: string): string | undefined;
getSourceMap(path: string): object | undefined;
update(path: string, opts: { mtime: string; code: string; map?: any }): Promise<void>;
close(): void;
getKey(path: string, source: string): string;
getCode(key: string): string | undefined;
getSourceMap(key: string): object | undefined;
update(key: string, entry: { code: string; map?: object | null }): Promise<void>;
}
export type CacheEntry = [atime: number, code: string, sourceMap: object];

View file

@ -41,6 +41,7 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
const Fs = require('fs');
const Path = require('path');
const { addHook } = require('pirates');
@ -105,7 +106,18 @@ function install(options = undefined) {
environment: 'node',
// @ts-expect-error bad source-map-support types
retrieveSourceMap(path) {
const map = cache.getSourceMap(path);
if (!Path.isAbsolute(path)) {
return null;
}
let source;
try {
source = Fs.readFileSync(path, 'utf8');
} catch {
return null;
}
const map = cache.getSourceMap(cache.getKey(path, source));
return map ? { map, url: null } : null;
},
});

View file

@ -6,25 +6,18 @@
* Side Public License, v 1.
*/
const Fs = require('fs');
const { transformCode } = require('@kbn/babel-transform');
/** @type {import('./types').Transform} */
const babelTransform = (path, source, cache) => {
const mtime = `${Fs.statSync(path).mtimeMs}`;
if (cache.getMtime(path) === mtime) {
const code = cache.getCode(path);
if (code) {
return code;
}
const key = cache.getKey(path, source);
const cached = cache.getCode(key);
if (cached) {
return cached;
}
const result = transformCode(path, source);
cache.update(path, {
mtime,
cache.update(key, {
code: result.code,
map: result.map,
});

View file

@ -6,27 +6,16 @@
* Side Public License, v 1.
*/
const Fs = require('fs');
const Crypto = require('crypto');
const Peggy = require('@kbn/peggy');
/** @type {import('./types').Transform} */
const peggyTransform = (path, source, cache) => {
const config = Peggy.findConfigFile(path);
const mtime = `${Fs.statSync(path).mtimeMs}`;
const key = !config
? path
: `${path}.config.${Crypto.createHash('sha256')
.update(config.source)
.digest('hex')
.slice(0, 8)}`;
const key = cache.getKey(path, source);
if (cache.getMtime(key) === mtime) {
const code = cache.getCode(key);
if (code) {
return code;
}
const cached = cache.getCode(key);
if (cached) {
return cached;
}
const code = Peggy.getJsSourceSync({
@ -40,7 +29,6 @@ const peggyTransform = (path, source, cache) => {
cache.update(key, {
code,
mtime,
});
return code;

View file

@ -16,7 +16,6 @@
"@kbn/repo-info",
"@kbn/babel-transform",
"@kbn/peggy",
"@kbn/repo-packages",
],
"exclude": [
"target/**/*",

View file

@ -77,7 +77,7 @@ export class Watcher {
// ignore changes in any devOnly package, these can't power the server so we can ignore them
if (pkg?.devOnly) {
return;
return pkg.id === '@kbn/babel-register';
}
const result = this.classifier.classify(event.path);

View file

@ -12314,6 +12314,11 @@ date-fns@^1.27.2, date-fns@^1.30.1:
resolved "https://registry.yarnpkg.com/date-fns/-/date-fns-1.30.1.tgz#2e71bf0b119153dbb4cc4e88d9ea5acfb50dc05c"
integrity sha512-hBSVCvSmWC+QypYObzwGOd9wqdDpOt+0wl0KbU+R+uuZBS1jN8VsD1ss3irQDknRj5NvxiTF6oj/nDRnN/UQNw==
date-fns@^2.29.3:
version "2.29.3"
resolved "https://registry.yarnpkg.com/date-fns/-/date-fns-2.29.3.tgz#27402d2fc67eb442b511b70bbdf98e6411cd68a8"
integrity sha512-dDCnyH2WnnKusqvZZ6+jA1O51Ibt8ZMRNkDZdyAyK4YfbDwa/cEmuztzG5pk6hqlp9aSBPYcjOlktquahGwGeA==
date-now@^0.1.4:
version "0.1.4"
resolved "https://registry.yarnpkg.com/date-now/-/date-now-0.1.4.tgz#eaf439fd4d4848ad74e5cc7dbef200672b9e345b"