mirror of
https://github.com/elastic/kibana.git
synced 2025-06-27 10:40:07 -04:00
Load huggingface content datasets (#224543)
Implements a huggingface dataset loader for RAG evals - see [x-pack/platform/packages/shared/kbn-ai-tools-cli/src/hf_dataset_loader/README.md](https://github.com/dgieselaar/kibana/blob/hf-dataset-loader/x-pack/platform/packages/shared/kbn-ai-tools-cli/src/hf_dataset_loader/README.md). Additionally, a `@kbn/cache-cli` tool was added that allows tooling authors to cache to disk (possibly remote storage later). Used o3 for finding datasets on HuggingFace and doing an initial pass on a line-by-line dataset processor ([see conversation](https://chatgpt.com/share/6853e49a-e870-8000-9c65-f7a5a3a72af0)) Libraries added: - `cache-manager`, `cache-manager-fs-hash`, `keyv`, `@types/cache-manager-fs-hash`: caching libraries and plugins. could not find any existing caching libraries in the repo. - `@huggingface/hub`: api client for HF. --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
parent
e401aa4c07
commit
7d20301289
32 changed files with 1047 additions and 3 deletions
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
@ -426,6 +426,7 @@ src/platform/packages/shared/kbn-apm-utils @elastic/obs-ux-infra_services-team
|
||||||
src/platform/packages/shared/kbn-avc-banner @elastic/security-defend-workflows
|
src/platform/packages/shared/kbn-avc-banner @elastic/security-defend-workflows
|
||||||
src/platform/packages/shared/kbn-axe-config @elastic/appex-qa
|
src/platform/packages/shared/kbn-axe-config @elastic/appex-qa
|
||||||
src/platform/packages/shared/kbn-babel-register @elastic/kibana-operations
|
src/platform/packages/shared/kbn-babel-register @elastic/kibana-operations
|
||||||
|
src/platform/packages/shared/kbn-cache-cli @elastic/kibana-operations
|
||||||
src/platform/packages/shared/kbn-calculate-auto @elastic/obs-ux-management-team
|
src/platform/packages/shared/kbn-calculate-auto @elastic/obs-ux-management-team
|
||||||
src/platform/packages/shared/kbn-calculate-width-from-char-count @elastic/kibana-visualizations
|
src/platform/packages/shared/kbn-calculate-width-from-char-count @elastic/kibana-visualizations
|
||||||
src/platform/packages/shared/kbn-cases-components @elastic/response-ops
|
src/platform/packages/shared/kbn-cases-components @elastic/response-ops
|
||||||
|
@ -839,6 +840,7 @@ x-pack/platform/packages/shared/file-upload-common @elastic/ml-ui
|
||||||
x-pack/platform/packages/shared/index-lifecycle-management/index_lifecycle_management_common_shared @elastic/kibana-management
|
x-pack/platform/packages/shared/index-lifecycle-management/index_lifecycle_management_common_shared @elastic/kibana-management
|
||||||
x-pack/platform/packages/shared/index-management/index_management_shared_types @elastic/kibana-management
|
x-pack/platform/packages/shared/index-management/index_management_shared_types @elastic/kibana-management
|
||||||
x-pack/platform/packages/shared/kbn-ai-assistant @elastic/search-kibana @elastic/obs-ai-assistant
|
x-pack/platform/packages/shared/kbn-ai-assistant @elastic/search-kibana @elastic/obs-ai-assistant
|
||||||
|
x-pack/platform/packages/shared/kbn-ai-tools-cli @elastic/appex-ai-infra
|
||||||
x-pack/platform/packages/shared/kbn-alerting-comparators @elastic/response-ops
|
x-pack/platform/packages/shared/kbn-alerting-comparators @elastic/response-ops
|
||||||
x-pack/platform/packages/shared/kbn-apm-types @elastic/obs-ux-infra_services-team
|
x-pack/platform/packages/shared/kbn-apm-types @elastic/obs-ux-infra_services-team
|
||||||
x-pack/platform/packages/shared/kbn-cloud-security-posture/common @elastic/kibana-cloud-security-posture
|
x-pack/platform/packages/shared/kbn-cloud-security-posture/common @elastic/kibana-cloud-security-posture
|
||||||
|
|
3
.github/codeql/codeql-config.yml
vendored
3
.github/codeql/codeql-config.yml
vendored
|
@ -102,6 +102,7 @@ paths-ignore:
|
||||||
- src/platform/packages/private/kbn-telemetry-tools
|
- src/platform/packages/private/kbn-telemetry-tools
|
||||||
- src/platform/packages/shared/kbn-apm-synthtrace
|
- src/platform/packages/shared/kbn-apm-synthtrace
|
||||||
- src/platform/packages/shared/kbn-axe-config
|
- src/platform/packages/shared/kbn-axe-config
|
||||||
|
- src/platform/packages/shared/kbn-cache-cli
|
||||||
- src/platform/packages/shared/kbn-dev-cli-errors
|
- src/platform/packages/shared/kbn-dev-cli-errors
|
||||||
- src/platform/packages/shared/kbn-dev-cli-runner
|
- src/platform/packages/shared/kbn-dev-cli-runner
|
||||||
- src/platform/packages/shared/kbn-dev-proc-runner
|
- src/platform/packages/shared/kbn-dev-proc-runner
|
||||||
|
@ -119,7 +120,9 @@ paths-ignore:
|
||||||
- x-pack/examples
|
- x-pack/examples
|
||||||
- x-pack/packages/ai-infra/product-doc-artifact-builder
|
- x-pack/packages/ai-infra/product-doc-artifact-builder
|
||||||
- x-pack/packages/kbn-synthetics-private-location
|
- x-pack/packages/kbn-synthetics-private-location
|
||||||
|
- x-pack/platform/packages/shared/kbn-ai-tools-cli
|
||||||
- x-pack/platform/packages/shared/kbn-inference-cli
|
- x-pack/platform/packages/shared/kbn-inference-cli
|
||||||
|
- x-pack/platform/packages/shared/kbn-kibana-api-cli
|
||||||
- x-pack/platform/packages/shared/kbn-sample-parser
|
- x-pack/platform/packages/shared/kbn-sample-parser
|
||||||
- x-pack/platform/plugins/private/cloud_integrations/cloud_full_story/public/assets/**
|
- x-pack/platform/plugins/private/cloud_integrations/cloud_full_story/public/assets/**
|
||||||
- x-pack/platform/test
|
- x-pack/platform/test
|
||||||
|
|
|
@ -1420,11 +1420,13 @@
|
||||||
"@emotion/jest": "^11.11.0",
|
"@emotion/jest": "^11.11.0",
|
||||||
"@fast-check/jest": "^2.1.0",
|
"@fast-check/jest": "^2.1.0",
|
||||||
"@frsource/cypress-plugin-visual-regression-diff": "^3.3.10",
|
"@frsource/cypress-plugin-visual-regression-diff": "^3.3.10",
|
||||||
|
"@huggingface/hub": "^2.2.0",
|
||||||
"@jest/console": "^29.7.0",
|
"@jest/console": "^29.7.0",
|
||||||
"@jest/reporters": "^29.7.0",
|
"@jest/reporters": "^29.7.0",
|
||||||
"@jest/transform": "^29.6.1",
|
"@jest/transform": "^29.6.1",
|
||||||
"@jest/types": "^29.6.3",
|
"@jest/types": "^29.6.3",
|
||||||
"@kayahr/text-encoding": "^1.3.0",
|
"@kayahr/text-encoding": "^1.3.0",
|
||||||
|
"@kbn/ai-tools-cli": "link:x-pack/platform/packages/shared/kbn-ai-tools-cli",
|
||||||
"@kbn/alerting-api-integration-helpers": "link:x-pack/platform/test/alerting_api_integration/packages/helpers",
|
"@kbn/alerting-api-integration-helpers": "link:x-pack/platform/test/alerting_api_integration/packages/helpers",
|
||||||
"@kbn/ambient-common-types": "link:src/platform/packages/private/kbn-ambient-common-types",
|
"@kbn/ambient-common-types": "link:src/platform/packages/private/kbn-ambient-common-types",
|
||||||
"@kbn/ambient-ftr-types": "link:src/platform/packages/private/kbn-ambient-ftr-types",
|
"@kbn/ambient-ftr-types": "link:src/platform/packages/private/kbn-ambient-ftr-types",
|
||||||
|
@ -1438,6 +1440,7 @@
|
||||||
"@kbn/babel-register": "link:src/platform/packages/shared/kbn-babel-register",
|
"@kbn/babel-register": "link:src/platform/packages/shared/kbn-babel-register",
|
||||||
"@kbn/babel-transform": "link:src/platform/packages/private/kbn-babel-transform",
|
"@kbn/babel-transform": "link:src/platform/packages/private/kbn-babel-transform",
|
||||||
"@kbn/bazel-runner": "link:packages/kbn-bazel-runner",
|
"@kbn/bazel-runner": "link:packages/kbn-bazel-runner",
|
||||||
|
"@kbn/cache-cli": "link:src/platform/packages/shared/kbn-cache-cli",
|
||||||
"@kbn/capture-oas-snapshot-cli": "link:packages/kbn-capture-oas-snapshot-cli",
|
"@kbn/capture-oas-snapshot-cli": "link:packages/kbn-capture-oas-snapshot-cli",
|
||||||
"@kbn/check-mappings-update-cli": "link:packages/kbn-check-mappings-update-cli",
|
"@kbn/check-mappings-update-cli": "link:packages/kbn-check-mappings-update-cli",
|
||||||
"@kbn/check-prod-native-modules-cli": "link:packages/kbn-check-prod-native-modules-cli",
|
"@kbn/check-prod-native-modules-cli": "link:packages/kbn-check-prod-native-modules-cli",
|
||||||
|
@ -1657,6 +1660,7 @@
|
||||||
"@types/aws4": "^1.5.0",
|
"@types/aws4": "^1.5.0",
|
||||||
"@types/base64-js": "^1.5.0",
|
"@types/base64-js": "^1.5.0",
|
||||||
"@types/byte-size": "^8.1.2",
|
"@types/byte-size": "^8.1.2",
|
||||||
|
"@types/cache-manager-fs-hash": "^0.0.5",
|
||||||
"@types/chance": "^1.0.0",
|
"@types/chance": "^1.0.0",
|
||||||
"@types/chroma-js": "^2.1.0",
|
"@types/chroma-js": "^2.1.0",
|
||||||
"@types/chrome-remote-interface": "^0.31.14",
|
"@types/chrome-remote-interface": "^0.31.14",
|
||||||
|
@ -1793,6 +1797,8 @@
|
||||||
"backport": "^10.0.1",
|
"backport": "^10.0.1",
|
||||||
"blob-polyfill": "^9.0.20240710",
|
"blob-polyfill": "^9.0.20240710",
|
||||||
"buildkite-test-collector": "^1.8.1",
|
"buildkite-test-collector": "^1.8.1",
|
||||||
|
"cache-manager": "^7.0.0",
|
||||||
|
"cache-manager-fs-hash": "^2.0.0",
|
||||||
"callsites": "^3.1.0",
|
"callsites": "^3.1.0",
|
||||||
"chance": "1.0.18",
|
"chance": "1.0.18",
|
||||||
"chromedriver": "^137.0.0",
|
"chromedriver": "^137.0.0",
|
||||||
|
@ -1875,6 +1881,7 @@
|
||||||
"json-schema-typed": "^8.0.1",
|
"json-schema-typed": "^8.0.1",
|
||||||
"json5": "^2.2.3",
|
"json5": "^2.2.3",
|
||||||
"jsondiffpatch": "0.4.1",
|
"jsondiffpatch": "0.4.1",
|
||||||
|
"keyv": "^5.3.4",
|
||||||
"license-checker": "^25.0.1",
|
"license-checker": "^25.0.1",
|
||||||
"lighthouse": "^12.6.1",
|
"lighthouse": "^12.6.1",
|
||||||
"listr2": "^8.2.5",
|
"listr2": "^8.2.5",
|
||||||
|
|
|
@ -191,7 +191,12 @@
|
||||||
"langsmith",
|
"langsmith",
|
||||||
"openai",
|
"openai",
|
||||||
"@types/json-schema",
|
"@types/json-schema",
|
||||||
"table"
|
"table",
|
||||||
|
"@huggingface/hub",
|
||||||
|
"cache-manager",
|
||||||
|
"cache-manager-fs-hash",
|
||||||
|
"keyv",
|
||||||
|
"@types/cache-manager-fs-hash"
|
||||||
],
|
],
|
||||||
"reviewers": [
|
"reviewers": [
|
||||||
"team:appex-ai-infra"
|
"team:appex-ai-infra"
|
||||||
|
|
49
src/platform/packages/shared/kbn-cache-cli/README.md
Normal file
49
src/platform/packages/shared/kbn-cache-cli/README.md
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
# @kbn/cache-cli
|
||||||
|
|
||||||
|
Centralised caching helpers for scripts and CLIs in the Kibana repo.
|
||||||
|
|
||||||
|
The goal is to make it easy for engineers to cache computationally or I/O expensive operations on disk, or in the future, possible remote.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import { fromCache, createLocalDirDiskCacheStore } from '@kbn/cache-cli';
|
||||||
|
import { createCache } from 'cache-manager';
|
||||||
|
|
||||||
|
const DOC_CACHE = createCache({
|
||||||
|
stores: [createLocalDirDiskCacheStore({ dir: 'my_docs', ttl: 60 * 60 /* 1h */ })],
|
||||||
|
});
|
||||||
|
|
||||||
|
const docs = await fromCache('docs', DOC_CACHE, async () => fetchDocs());
|
||||||
|
```
|
||||||
|
|
||||||
|
`fromCache(key, cache, cb, validator?)` semantics:
|
||||||
|
|
||||||
|
1. Tries `cache.get(key)` (skipped when `process.env.DISABLE_KBN_CACHE` is truthy).
|
||||||
|
2. Runs the optional `validator(cached)` – return `false` to force a refresh.
|
||||||
|
3. Calls `cb()` if the cache miss / invalid.
|
||||||
|
4. Persists the fresh value via `cache.set(key, value)` and returns it.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Available cache stores
|
||||||
|
|
||||||
|
`@kbn/cache-cli` wraps [`cache-manager`](https://github.com/node-cache-manager/node-cache-manager) so any **Keyv compatible** store works. The helpers below ship out-of-the-box:
|
||||||
|
|
||||||
|
| Helper | Backing store | Typical use-case |
|
||||||
|
| --------------------------------------------- | --------------------------------------------------- | ---------------------------------------------------------- |
|
||||||
|
| `createLocalDirDiskCacheStore({ dir, ttl? })` | `cache-manager-fs-hash` on `<REPO_ROOT>/data/{dir}` | Persist in `./data` with an unknown ttl |
|
||||||
|
| `createTmpDirDiskCacheStore({ dir, ttl? })` | `cache-manager-fs-hash` on `<OS_TMP_DIR>/{dir}` | Persist in os tmp dir which might be cleared over restarts |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Cache invalidation strategies
|
||||||
|
|
||||||
|
1. **Manual bypass** – set `DISABLE_KBN_CACHE=true` to force fresh data (useful in CI workflows).
|
||||||
|
2. **Time-to-live (TTL)** – pass `ttl` when creating a store to let the backend expire entries automatically.
|
||||||
|
3. **Programmatic validation** – supply the `cacheValidator` callback to `fromCache()`; it receives the cached value and should return `true` when it is still valid.
|
||||||
|
4. **Clear on disk** – delete the relevant directory under `data/` if you need a hard reset.
|
||||||
|
|
||||||
|
Choose whichever fits your script. They can be combined (e.g. a TTL plus a validator).
|
13
src/platform/packages/shared/kbn-cache-cli/index.ts
Normal file
13
src/platform/packages/shared/kbn-cache-cli/index.ts
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the "Elastic License
|
||||||
|
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||||
|
* Public License v 1"; you may not use this file except in compliance with, at
|
||||||
|
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||||
|
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||||
|
*/
|
||||||
|
|
||||||
|
export { createLocalDirDiskCacheStore } from './src/stores/create_local_disk_cache_store';
|
||||||
|
export { createTmpDirDiskCacheStore } from './src/stores/create_tmp_dir_disk_cache_store';
|
||||||
|
|
||||||
|
export { fromCache } from './src/from_cache';
|
14
src/platform/packages/shared/kbn-cache-cli/jest.config.js
Normal file
14
src/platform/packages/shared/kbn-cache-cli/jest.config.js
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the "Elastic License
|
||||||
|
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||||
|
* Public License v 1"; you may not use this file except in compliance with, at
|
||||||
|
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||||
|
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||||
|
*/
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
preset: '@kbn/test/jest_node',
|
||||||
|
rootDir: '../../../../..',
|
||||||
|
roots: ['<rootDir>/src/platform/packages/shared/kbn-cache-cli'],
|
||||||
|
};
|
8
src/platform/packages/shared/kbn-cache-cli/kibana.jsonc
Normal file
8
src/platform/packages/shared/kbn-cache-cli/kibana.jsonc
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"type": "shared-common",
|
||||||
|
"id": "@kbn/cache-cli",
|
||||||
|
"owner": "@elastic/kibana-operations",
|
||||||
|
"group": "platform",
|
||||||
|
"visibility": "shared",
|
||||||
|
"devOnly": true
|
||||||
|
}
|
6
src/platform/packages/shared/kbn-cache-cli/package.json
Normal file
6
src/platform/packages/shared/kbn-cache-cli/package.json
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"name": "@kbn/cache-cli",
|
||||||
|
"private": true,
|
||||||
|
"version": "1.0.0",
|
||||||
|
"license": "Elastic License 2.0 OR AGPL-3.0-only OR SSPL-1.0"
|
||||||
|
}
|
|
@ -0,0 +1,87 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the "Elastic License
|
||||||
|
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||||
|
* Public License v 1"; you may not use this file except in compliance with, at
|
||||||
|
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||||
|
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { fromCache } from './from_cache';
|
||||||
|
import type { Cache } from 'cache-manager';
|
||||||
|
|
||||||
|
function createMockCache(): { store: Map<string, unknown>; cache: Cache } {
|
||||||
|
const backing = new Map<string, unknown>();
|
||||||
|
const cache = {
|
||||||
|
get: jest.fn(async (key: string) => backing.get(key)),
|
||||||
|
set: jest.fn(async (key: string, value: unknown) => {
|
||||||
|
backing.set(key, value);
|
||||||
|
}),
|
||||||
|
} as Partial<Cache>;
|
||||||
|
return { store: backing, cache: cache as Cache };
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('fromCache', () => {
|
||||||
|
const KEY = 'test-key';
|
||||||
|
const NEW_VAL = 'fresh-value';
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
delete process.env.DISABLE_KBN_CLI_CACHE;
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns the cached value when present', async () => {
|
||||||
|
const { cache, store } = createMockCache();
|
||||||
|
store.set(KEY, 'cached-value');
|
||||||
|
|
||||||
|
const cb = jest.fn().mockResolvedValue(NEW_VAL);
|
||||||
|
const result = await fromCache(KEY, cache, cb);
|
||||||
|
|
||||||
|
expect(result).toBe('cached-value');
|
||||||
|
expect(cb).not.toHaveBeenCalled();
|
||||||
|
expect(cache.get).toHaveBeenCalledWith(KEY);
|
||||||
|
// value should not be overwritten, but invalidated
|
||||||
|
expect(cache.set).toHaveBeenCalledWith(KEY, 'cached-value');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('bypasses cache when DISABLE_KBN_CACHE env var is set', async () => {
|
||||||
|
process.env.DISABLE_KBN_CLI_CACHE = 'true';
|
||||||
|
const { cache } = createMockCache();
|
||||||
|
const cb = jest.fn().mockResolvedValue(NEW_VAL);
|
||||||
|
|
||||||
|
const result = await fromCache(KEY, cache, cb);
|
||||||
|
|
||||||
|
expect(cb).toHaveBeenCalledTimes(1);
|
||||||
|
expect(result).toBe(NEW_VAL);
|
||||||
|
|
||||||
|
// still updates the cache with the new value
|
||||||
|
expect(cache.set).toHaveBeenCalledWith(KEY, NEW_VAL);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('validates cached value with cacheValidator and recomputes when invalid', async () => {
|
||||||
|
const { cache, store } = createMockCache();
|
||||||
|
store.set(KEY, 'stale');
|
||||||
|
|
||||||
|
const cb = jest.fn().mockResolvedValue(NEW_VAL);
|
||||||
|
|
||||||
|
const validator = jest.fn((val: string) => val === 'fresh-value');
|
||||||
|
|
||||||
|
const result = await fromCache(KEY, cache, cb, validator);
|
||||||
|
|
||||||
|
expect(validator).toHaveBeenCalledWith('stale');
|
||||||
|
expect(cb).toHaveBeenCalledTimes(1);
|
||||||
|
expect(result).toBe(NEW_VAL);
|
||||||
|
expect(cache.set).toHaveBeenCalledWith(KEY, NEW_VAL);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('stores newly computed value in cache when no cached value exists', async () => {
|
||||||
|
const { cache } = createMockCache();
|
||||||
|
const cb = jest.fn().mockResolvedValue(NEW_VAL);
|
||||||
|
|
||||||
|
const result = await fromCache(KEY, cache, cb);
|
||||||
|
|
||||||
|
expect(result).toBe(NEW_VAL);
|
||||||
|
expect(cb).toHaveBeenCalledTimes(1);
|
||||||
|
expect(cache.set).toHaveBeenCalledWith(KEY, NEW_VAL);
|
||||||
|
});
|
||||||
|
});
|
29
src/platform/packages/shared/kbn-cache-cli/src/from_cache.ts
Normal file
29
src/platform/packages/shared/kbn-cache-cli/src/from_cache.ts
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the "Elastic License
|
||||||
|
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||||
|
* Public License v 1"; you may not use this file except in compliance with, at
|
||||||
|
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||||
|
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||||
|
*/
|
||||||
|
import { Cache } from 'cache-manager';
|
||||||
|
|
||||||
|
export async function fromCache<T>(
|
||||||
|
key: string,
|
||||||
|
store: Cache,
|
||||||
|
cb: () => Promise<T>,
|
||||||
|
cacheValidator?: (val: T) => boolean
|
||||||
|
): Promise<T> {
|
||||||
|
let val = process.env.DISABLE_KBN_CLI_CACHE ? undefined : await store.get<T>(key);
|
||||||
|
|
||||||
|
if (val !== undefined && cacheValidator) {
|
||||||
|
val = cacheValidator(val) ? val : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val === undefined) {
|
||||||
|
val = await cb();
|
||||||
|
}
|
||||||
|
|
||||||
|
store.set(key, val);
|
||||||
|
return val;
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the "Elastic License
|
||||||
|
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||||
|
* Public License v 1"; you may not use this file except in compliance with, at
|
||||||
|
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||||
|
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||||
|
*/
|
||||||
|
|
||||||
|
import DiskStore from 'cache-manager-fs-hash';
|
||||||
|
import { KeyvAdapter } from 'cache-manager';
|
||||||
|
import Path from 'path';
|
||||||
|
import { REPO_ROOT } from '@kbn/repo-info';
|
||||||
|
import { Keyv } from 'keyv';
|
||||||
|
|
||||||
|
export interface LocalDiskCacheOptions {
|
||||||
|
dir: string;
|
||||||
|
ttl?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createLocalDirDiskCacheStore(opts: LocalDiskCacheOptions): Keyv {
|
||||||
|
const adapter = new KeyvAdapter(
|
||||||
|
DiskStore.create({
|
||||||
|
store: DiskStore,
|
||||||
|
options: { path: Path.join(REPO_ROOT, 'data', opts.dir), ttl: opts.ttl },
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
return new Keyv({ store: adapter });
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the "Elastic License
|
||||||
|
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||||
|
* Public License v 1"; you may not use this file except in compliance with, at
|
||||||
|
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||||
|
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||||
|
*/
|
||||||
|
|
||||||
|
import DiskStore from 'cache-manager-fs-hash';
|
||||||
|
import { KeyvAdapter } from 'cache-manager';
|
||||||
|
import Os from 'os';
|
||||||
|
import Path from 'path';
|
||||||
|
import { Keyv } from 'keyv';
|
||||||
|
|
||||||
|
export interface TmpDirDiskCacheOptions {
|
||||||
|
dir: string;
|
||||||
|
ttl?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createTmpDirDiskCacheStore(opts: TmpDirDiskCacheOptions): Keyv {
|
||||||
|
const adapter = new KeyvAdapter(
|
||||||
|
DiskStore.create({
|
||||||
|
store: DiskStore,
|
||||||
|
options: { path: Path.join(Os.tmpdir(), opts.dir), ttl: opts.ttl },
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
return new Keyv(adapter);
|
||||||
|
}
|
19
src/platform/packages/shared/kbn-cache-cli/tsconfig.json
Normal file
19
src/platform/packages/shared/kbn-cache-cli/tsconfig.json
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
{
|
||||||
|
"extends": "../../../../../tsconfig.base.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"outDir": "target/types",
|
||||||
|
"types": [
|
||||||
|
"jest",
|
||||||
|
"node"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"include": [
|
||||||
|
"**/*.ts",
|
||||||
|
],
|
||||||
|
"exclude": [
|
||||||
|
"target/**/*"
|
||||||
|
],
|
||||||
|
"kbn_references": [
|
||||||
|
"@kbn/repo-info",
|
||||||
|
]
|
||||||
|
}
|
|
@ -26,6 +26,8 @@
|
||||||
"@kbn/ai-assistant-management-plugin/*": ["src/platform/plugins/shared/ai_assistant_management/selection/*"],
|
"@kbn/ai-assistant-management-plugin/*": ["src/platform/plugins/shared/ai_assistant_management/selection/*"],
|
||||||
"@kbn/ai-security-labs-content": ["x-pack/solutions/security/packages/ai-security-labs-content"],
|
"@kbn/ai-security-labs-content": ["x-pack/solutions/security/packages/ai-security-labs-content"],
|
||||||
"@kbn/ai-security-labs-content/*": ["x-pack/solutions/security/packages/ai-security-labs-content/*"],
|
"@kbn/ai-security-labs-content/*": ["x-pack/solutions/security/packages/ai-security-labs-content/*"],
|
||||||
|
"@kbn/ai-tools-cli": ["x-pack/platform/packages/shared/kbn-ai-tools-cli"],
|
||||||
|
"@kbn/ai-tools-cli/*": ["x-pack/platform/packages/shared/kbn-ai-tools-cli/*"],
|
||||||
"@kbn/aiops-change-point-detection": ["x-pack/platform/packages/private/ml/aiops_change_point_detection"],
|
"@kbn/aiops-change-point-detection": ["x-pack/platform/packages/private/ml/aiops_change_point_detection"],
|
||||||
"@kbn/aiops-change-point-detection/*": ["x-pack/platform/packages/private/ml/aiops_change_point_detection/*"],
|
"@kbn/aiops-change-point-detection/*": ["x-pack/platform/packages/private/ml/aiops_change_point_detection/*"],
|
||||||
"@kbn/aiops-common": ["x-pack/platform/packages/shared/ml/aiops_common"],
|
"@kbn/aiops-common": ["x-pack/platform/packages/shared/ml/aiops_common"],
|
||||||
|
@ -130,6 +132,8 @@
|
||||||
"@kbn/banners-plugin/*": ["x-pack/platform/plugins/private/banners/*"],
|
"@kbn/banners-plugin/*": ["x-pack/platform/plugins/private/banners/*"],
|
||||||
"@kbn/bazel-runner": ["packages/kbn-bazel-runner"],
|
"@kbn/bazel-runner": ["packages/kbn-bazel-runner"],
|
||||||
"@kbn/bazel-runner/*": ["packages/kbn-bazel-runner/*"],
|
"@kbn/bazel-runner/*": ["packages/kbn-bazel-runner/*"],
|
||||||
|
"@kbn/cache-cli": ["src/platform/packages/shared/kbn-cache-cli"],
|
||||||
|
"@kbn/cache-cli/*": ["src/platform/packages/shared/kbn-cache-cli/*"],
|
||||||
"@kbn/calculate-auto": ["src/platform/packages/shared/kbn-calculate-auto"],
|
"@kbn/calculate-auto": ["src/platform/packages/shared/kbn-calculate-auto"],
|
||||||
"@kbn/calculate-auto/*": ["src/platform/packages/shared/kbn-calculate-auto/*"],
|
"@kbn/calculate-auto/*": ["src/platform/packages/shared/kbn-calculate-auto/*"],
|
||||||
"@kbn/calculate-width-from-char-count": ["src/platform/packages/shared/kbn-calculate-width-from-char-count"],
|
"@kbn/calculate-width-from-char-count": ["src/platform/packages/shared/kbn-calculate-width-from-char-count"],
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
# @kbn/ai-tools-cli
|
||||||
|
|
||||||
|
Empty package generated by @kbn/generate
|
10
x-pack/platform/packages/shared/kbn-ai-tools-cli/index.ts
Normal file
10
x-pack/platform/packages/shared/kbn-ai-tools-cli/index.ts
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export { loadHuggingFaceDatasets } from './src/hf_dataset_loader/load_hugging_face_datasets';
|
||||||
|
export type { HuggingFaceDatasetSpec } from './src/hf_dataset_loader/types';
|
||||||
|
export { ALL_HUGGING_FACE_DATASETS } from './src/hf_dataset_loader/config';
|
|
@ -0,0 +1,12 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
preset: '@kbn/test/jest_node',
|
||||||
|
rootDir: '../../../../..',
|
||||||
|
roots: ['<rootDir>/x-pack/platform/packages/shared/kbn-ai-tools-cli'],
|
||||||
|
};
|
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"type": "shared-server",
|
||||||
|
"id": "@kbn/ai-tools-cli",
|
||||||
|
"owner": "@elastic/appex-ai-infra",
|
||||||
|
"group": "platform",
|
||||||
|
"visibility": "shared",
|
||||||
|
"devOnly": true
|
||||||
|
}
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"name": "@kbn/ai-tools-cli",
|
||||||
|
"private": true,
|
||||||
|
"version": "1.0.0",
|
||||||
|
"license": "Elastic License 2.0"
|
||||||
|
}
|
|
@ -0,0 +1,91 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { run } from '@kbn/dev-cli-runner';
|
||||||
|
import { createKibanaClient, toolingLogToLogger } from '@kbn/kibana-api-cli';
|
||||||
|
import { castArray, keyBy } from 'lodash';
|
||||||
|
import { loadHuggingFaceDatasets } from '../src/hf_dataset_loader/load_hugging_face_datasets';
|
||||||
|
import { ALL_HUGGING_FACE_DATASETS } from '../src/hf_dataset_loader/config';
|
||||||
|
|
||||||
|
interface Flags {
|
||||||
|
// the number of rows per dataset to load into ES
|
||||||
|
limit?: string;
|
||||||
|
// the names of the datasets to load
|
||||||
|
datasets?: string | string[];
|
||||||
|
// whether all specified dataset's indices should be cleared before loading
|
||||||
|
clear?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
run(
|
||||||
|
async ({ log, flags }) => {
|
||||||
|
const signal = new AbortController().signal;
|
||||||
|
|
||||||
|
const accessToken = process.env.HUGGING_FACE_ACCESS_TOKEN;
|
||||||
|
|
||||||
|
if (!accessToken) {
|
||||||
|
throw new Error(
|
||||||
|
`process.env.HUGGING_FACE_ACCESS_TOKEN not set - this is required for API access`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const kibanaClient = await createKibanaClient({
|
||||||
|
log,
|
||||||
|
signal,
|
||||||
|
});
|
||||||
|
|
||||||
|
// destructure and normalize CLI flags
|
||||||
|
const { limit, datasets, clear } = flags as Flags;
|
||||||
|
|
||||||
|
const datasetNames = !!datasets
|
||||||
|
? castArray(datasets)
|
||||||
|
.flatMap((set) => set.split(','))
|
||||||
|
.map((set) => set.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
|
const specsByName = keyBy(ALL_HUGGING_FACE_DATASETS, (val) => val.name);
|
||||||
|
|
||||||
|
const specs =
|
||||||
|
datasetNames?.map((name) => {
|
||||||
|
if (!specsByName[name]) {
|
||||||
|
throw new Error(`Dataset spec for ${name} not found`);
|
||||||
|
}
|
||||||
|
return specsByName[name];
|
||||||
|
}) ?? ALL_HUGGING_FACE_DATASETS;
|
||||||
|
|
||||||
|
if (!specs.length) {
|
||||||
|
throw new Error(`No datasets to load`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await loadHuggingFaceDatasets({
|
||||||
|
esClient: kibanaClient.es,
|
||||||
|
logger: toolingLogToLogger({ flags, log }),
|
||||||
|
clear: Boolean(clear),
|
||||||
|
limit: !!limit ? Number(limit) : undefined,
|
||||||
|
datasets: specs,
|
||||||
|
accessToken,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: `Loads HuggingFace datasets into an Elasticsearch cluster`,
|
||||||
|
flags: {
|
||||||
|
string: ['limit', 'datasets'],
|
||||||
|
boolean: ['clear'],
|
||||||
|
help: `
|
||||||
|
Usage: node --require ./src/setup_node_env/index.js x-pack/platform/packages/shared/kbn-ai-tools-cli/scripts/hf_dataset_loader.ts [options]
|
||||||
|
|
||||||
|
--datasets Comma-separated list of HuggingFace dataset names to load
|
||||||
|
--limit Number of rows per dataset to load into Elasticsearch
|
||||||
|
--clear Clear the existing indices for the specified datasets before loading
|
||||||
|
`,
|
||||||
|
default: {
|
||||||
|
clear: false,
|
||||||
|
},
|
||||||
|
allowUnexpected: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
|
@ -0,0 +1,40 @@
|
||||||
|
# HuggingFace Dataset Loader
|
||||||
|
|
||||||
|
`loadHuggingFaceDatasets()` loads publicly
|
||||||
|
|
||||||
|
A small Kibana Dev CLI script that ingests one or more public HuggingFace datasets into the **Elasticsearch instance discovered from your local Kibana**. It uses the default ELSER v2 endpoint to generate embeddings and index them into your cluster. You can then use these indices for evaluating RAG-based workflows and features.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- A running **Kibana** + **Elasticsearch** (the script will auto-discover the base URL using `@kbn/kibana-api-cli`)
|
||||||
|
- Internet connection – the datasets are downloaded straight from the HF Hub and cached on disk (`./data`) unless `DISABLE_KBN_CLI_CACHE=1`).
|
||||||
|
- [A HuggingFace access token](https://huggingface.co/docs/hub/en/security-tokens) - this can be acquired by signing up to HF (free).
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
HUGGING_FACE_ACCESS_TOKEN=<SNIP> \
|
||||||
|
node --require ./src/setup_node_env/index.js \
|
||||||
|
x-pack/platform/packages/shared/kbn-ai-tools-cli/scripts/hf_dataset_loader.ts \
|
||||||
|
--datasets beir-trec-covid,beir-msmarco \
|
||||||
|
--limit 1000 \
|
||||||
|
--clear
|
||||||
|
```
|
||||||
|
|
||||||
|
### CLI flags
|
||||||
|
|
||||||
|
| Flag | Type | Description |
|
||||||
|
| ------------ | --------- | ----------------------------------------------------------------------------------------------------- |
|
||||||
|
| `--datasets` | `string` | Comma-separated list of dataset **names** to load. Omit the flag to load **all** predefined datasets. |
|
||||||
|
| `--limit` | `number` | Max docs per dataset (handy while testing). Defaults to 1k. |
|
||||||
|
| `--clear` | `boolean` | Delete the target index **before** indexing. Defaults to `false`. |
|
||||||
|
|
||||||
|
## Built-in dataset specs
|
||||||
|
|
||||||
|
The script ships with ready-made specifications located in `config.ts`.
|
||||||
|
|
||||||
|
Feel free to extend or tweak these specs in `src/hf_dataset_loader/config.ts`.
|
||||||
|
|
||||||
|
## Disabling local cache
|
||||||
|
|
||||||
|
Set the environment variable `DISABLE_KBN_CLI_CACHE=1` to force fresh downloads instead of using the on-disk cache.
|
|
@ -0,0 +1,87 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { HuggingFaceDatasetSpec } from './types';
|
||||||
|
|
||||||
|
const BEIR_NAMES = [
|
||||||
|
'trec-covid',
|
||||||
|
'msmarco',
|
||||||
|
'nq',
|
||||||
|
'hotpotqa',
|
||||||
|
'fiqa',
|
||||||
|
'dbpedia-entity',
|
||||||
|
'robust04',
|
||||||
|
'touche-2020',
|
||||||
|
'arguana',
|
||||||
|
'climate-fever',
|
||||||
|
'scifact',
|
||||||
|
'scidocs',
|
||||||
|
'quora',
|
||||||
|
];
|
||||||
|
|
||||||
|
const INFERENCE_ENDPOINT = `.elser-2-elasticsearch`;
|
||||||
|
|
||||||
|
const SEMANTIC_TEXT = {
|
||||||
|
type: 'semantic_text' as const,
|
||||||
|
inference_id: INFERENCE_ENDPOINT,
|
||||||
|
};
|
||||||
|
|
||||||
|
const BEIR_DATASETS: HuggingFaceDatasetSpec[] = BEIR_NAMES.map((name) => ({
|
||||||
|
name: `beir-${name}`,
|
||||||
|
repo: `BeIR/${name}`,
|
||||||
|
file: 'corpus.jsonl.gz',
|
||||||
|
revision: 'main',
|
||||||
|
index: `beir-${name}`,
|
||||||
|
mapDocument: (r) => ({
|
||||||
|
_id: r._id,
|
||||||
|
title: r.title,
|
||||||
|
content: r.text,
|
||||||
|
}),
|
||||||
|
mapping: {
|
||||||
|
properties: {
|
||||||
|
title: SEMANTIC_TEXT,
|
||||||
|
content: SEMANTIC_TEXT,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
|
||||||
|
const EXTRA_DATASETS: HuggingFaceDatasetSpec[] = [
|
||||||
|
{
|
||||||
|
name: 'huffpost',
|
||||||
|
repo: 'khalidalt/HuffPost',
|
||||||
|
file: 'News_Category_Dataset_v2.json',
|
||||||
|
index: 'huffpost',
|
||||||
|
mapDocument: (r) => ({
|
||||||
|
_id: r.link,
|
||||||
|
title: r.headline,
|
||||||
|
content: r.short_description,
|
||||||
|
date: r.date,
|
||||||
|
author: r.authors,
|
||||||
|
category: r.category,
|
||||||
|
}),
|
||||||
|
mapping: {
|
||||||
|
properties: {
|
||||||
|
title: SEMANTIC_TEXT,
|
||||||
|
content: SEMANTIC_TEXT,
|
||||||
|
author: {
|
||||||
|
type: 'keyword',
|
||||||
|
},
|
||||||
|
category: {
|
||||||
|
type: 'keyword',
|
||||||
|
},
|
||||||
|
date: {
|
||||||
|
type: 'date',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
export const ALL_HUGGING_FACE_DATASETS: HuggingFaceDatasetSpec[] = [
|
||||||
|
...BEIR_DATASETS,
|
||||||
|
...EXTRA_DATASETS,
|
||||||
|
];
|
|
@ -0,0 +1,46 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { ElasticsearchClient } from '@kbn/core/server';
|
||||||
|
import { errors } from '@elastic/elasticsearch';
|
||||||
|
import { HuggingFaceDatasetSpec } from './types';
|
||||||
|
|
||||||
|
export async function ensureDatasetIndexExists({
|
||||||
|
esClient,
|
||||||
|
dataset,
|
||||||
|
clear,
|
||||||
|
}: {
|
||||||
|
esClient: ElasticsearchClient;
|
||||||
|
dataset: HuggingFaceDatasetSpec;
|
||||||
|
clear?: boolean;
|
||||||
|
}) {
|
||||||
|
const { index, mapping } = dataset;
|
||||||
|
|
||||||
|
let exists = await esClient.indices.exists({ index, allow_no_indices: true }).catch((error) => {
|
||||||
|
if (error instanceof errors.ResponseError && error.statusCode === 404) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (clear && exists) {
|
||||||
|
await esClient.indices.delete({ index, allow_no_indices: true });
|
||||||
|
exists = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (exists) {
|
||||||
|
await esClient.indices.putMapping({
|
||||||
|
...mapping,
|
||||||
|
index,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
await esClient.indices.create({
|
||||||
|
index,
|
||||||
|
mappings: mapping,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { fileDownloadInfo } from '@huggingface/hub';
|
||||||
|
import { Logger } from '@kbn/core/server';
|
||||||
|
import streamWeb from 'stream/web';
|
||||||
|
import { Readable } from 'stream';
|
||||||
|
import { createGunzip } from 'zlib';
|
||||||
|
import * as readline from 'node:readline';
|
||||||
|
import { pickBy } from 'lodash';
|
||||||
|
import { format } from 'util';
|
||||||
|
import { HuggingFaceDatasetSpec } from './types';
|
||||||
|
|
||||||
|
function toMb(bytes: number): string {
|
||||||
|
return (bytes / 1024 / 1024).toFixed(1) + 'mb';
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fetchRowsFromDataset({
|
||||||
|
dataset,
|
||||||
|
logger,
|
||||||
|
limit = 1000,
|
||||||
|
accessToken,
|
||||||
|
}: {
|
||||||
|
dataset: HuggingFaceDatasetSpec;
|
||||||
|
logger: Logger;
|
||||||
|
limit?: number;
|
||||||
|
accessToken: string;
|
||||||
|
}): Promise<Array<Record<string, unknown>>> {
|
||||||
|
const options = {
|
||||||
|
repo: dataset.repo,
|
||||||
|
path: dataset.file,
|
||||||
|
revision: dataset.revision ?? 'main',
|
||||||
|
hubUrl: `https://huggingface.co/datasets`,
|
||||||
|
accessToken,
|
||||||
|
};
|
||||||
|
|
||||||
|
const fileInfo = await fileDownloadInfo(options);
|
||||||
|
|
||||||
|
if (!fileInfo) {
|
||||||
|
throw new Error(
|
||||||
|
`Cannot fetch files for dataset (${dataset.repo}/${dataset.file}@${options.revision})`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { url, size } = fileInfo;
|
||||||
|
|
||||||
|
const res = await fetch(url);
|
||||||
|
if (!res.ok || !res.body) {
|
||||||
|
throw new Error(`HTTP ${res.status} while fetching ${url}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const inputStream = Readable.fromWeb(res.body as unknown as streamWeb.ReadableStream<any>);
|
||||||
|
|
||||||
|
const isGzip = new URL(url).searchParams.get('response-content-type') === 'application/gzip';
|
||||||
|
|
||||||
|
const totalMb = toMb(size);
|
||||||
|
|
||||||
|
let downloadedBytes = 0;
|
||||||
|
|
||||||
|
let lastDownloadLog = Date.now();
|
||||||
|
|
||||||
|
inputStream.on('data', (chunk: Buffer) => {
|
||||||
|
downloadedBytes += chunk.length;
|
||||||
|
const now = Date.now();
|
||||||
|
if (now - lastDownloadLog >= 10_000) {
|
||||||
|
lastDownloadLog = now;
|
||||||
|
const downloadedMb = toMb(downloadedBytes);
|
||||||
|
logger.info(`Downloading ${dataset.name}: ${downloadedMb} out of ${totalMb} so far`);
|
||||||
|
lastDownloadLog = now;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
inputStream.on('end', () => {
|
||||||
|
logger.debug('Completed download');
|
||||||
|
});
|
||||||
|
|
||||||
|
inputStream.on('error', (err) => {
|
||||||
|
logger.debug(`Ended download prematurely: ${format(err)}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
const decompressed: Readable = isGzip ? inputStream.pipe(createGunzip()) : inputStream;
|
||||||
|
|
||||||
|
const rl = readline.createInterface({ input: decompressed, crlfDelay: Infinity });
|
||||||
|
|
||||||
|
const docs: Array<Record<string, unknown>> = [];
|
||||||
|
for await (const line of rl) {
|
||||||
|
if (!line) continue;
|
||||||
|
const raw = JSON.parse(line);
|
||||||
|
const doc = dataset.mapDocument(raw);
|
||||||
|
docs.push(pickBy(doc, (val) => val !== undefined && val !== null && val !== ''));
|
||||||
|
|
||||||
|
if (docs.length === limit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inputStream.destroy();
|
||||||
|
|
||||||
|
logger.debug(`Fetched ${docs.length} rows for ${dataset.name}`);
|
||||||
|
|
||||||
|
return docs;
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { ElasticsearchClient, Logger } from '@kbn/core/server';
|
||||||
|
import { indexDocuments } from './index_documents';
|
||||||
|
import { HuggingFaceDatasetSpec } from './types';
|
||||||
|
|
||||||
|
export async function getEmbeddings({
|
||||||
|
esClient,
|
||||||
|
documents,
|
||||||
|
dataset,
|
||||||
|
logger,
|
||||||
|
}: {
|
||||||
|
esClient: ElasticsearchClient;
|
||||||
|
documents: Array<Record<string, unknown>>;
|
||||||
|
dataset: HuggingFaceDatasetSpec;
|
||||||
|
logger: Logger;
|
||||||
|
}): Promise<Array<Record<string, unknown>>> {
|
||||||
|
const indexName = dataset.index + '_tmp';
|
||||||
|
|
||||||
|
await indexDocuments({
|
||||||
|
documents,
|
||||||
|
dataset: {
|
||||||
|
...dataset,
|
||||||
|
index: indexName,
|
||||||
|
},
|
||||||
|
esClient,
|
||||||
|
logger,
|
||||||
|
});
|
||||||
|
|
||||||
|
const docsWithEmbeddings = await esClient
|
||||||
|
.search<Record<string, any>>({
|
||||||
|
index: indexName,
|
||||||
|
size: documents.length,
|
||||||
|
fields: ['_inference_fields'],
|
||||||
|
})
|
||||||
|
.then((response) =>
|
||||||
|
response.hits.hits.map((hit) => {
|
||||||
|
const source = hit._source!;
|
||||||
|
Object.entries(source._inference_fields ?? {}).forEach(([fieldName, config]) => {
|
||||||
|
delete (config as Record<string, any>).inference.model_settings.service;
|
||||||
|
});
|
||||||
|
return source;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
await esClient.indices.delete({ index: indexName });
|
||||||
|
|
||||||
|
return docsWithEmbeddings;
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { ElasticsearchClient, Logger } from '@kbn/core/server';
|
||||||
|
import { Readable } from 'stream';
|
||||||
|
import { inspect } from 'util';
|
||||||
|
import { HuggingFaceDatasetSpec } from './types';
|
||||||
|
import { ensureDatasetIndexExists } from './ensure_dataset_index_exists';
|
||||||
|
|
||||||
|
export async function indexDocuments({
|
||||||
|
esClient,
|
||||||
|
documents,
|
||||||
|
dataset,
|
||||||
|
logger,
|
||||||
|
}: {
|
||||||
|
esClient: ElasticsearchClient;
|
||||||
|
documents: Array<Record<string, unknown>>;
|
||||||
|
dataset: HuggingFaceDatasetSpec;
|
||||||
|
logger: Logger;
|
||||||
|
}): Promise<void> {
|
||||||
|
const indexName = dataset.index;
|
||||||
|
|
||||||
|
await ensureDatasetIndexExists({
|
||||||
|
dataset,
|
||||||
|
esClient,
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.debug(`Indexing ${documents.length} into ${indexName}`);
|
||||||
|
|
||||||
|
await esClient.helpers.bulk<Record<string, unknown>>({
|
||||||
|
datasource: Readable.from(documents),
|
||||||
|
index: indexName,
|
||||||
|
retries: 2,
|
||||||
|
concurrency: 1,
|
||||||
|
flushBytes: 1024 * 128,
|
||||||
|
onDocument: (document) => {
|
||||||
|
const { _id, ...doc } = document;
|
||||||
|
return [{ index: { _id: String(_id) } }, doc];
|
||||||
|
},
|
||||||
|
onDrop: (doc) => {
|
||||||
|
logger.warn(`Dropped document: ${doc.status} (${inspect(doc.error, { depth: 5 })})`);
|
||||||
|
},
|
||||||
|
refresh: 'wait_for',
|
||||||
|
});
|
||||||
|
}
|
|
@ -0,0 +1,103 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { ElasticsearchClient, Logger } from '@kbn/core/server';
|
||||||
|
import { createLocalDirDiskCacheStore, fromCache } from '@kbn/cache-cli';
|
||||||
|
import { createCache } from 'cache-manager';
|
||||||
|
import { errors } from '@elastic/elasticsearch';
|
||||||
|
import { ALL_HUGGING_FACE_DATASETS } from './config';
|
||||||
|
import { HuggingFaceDatasetSpec } from './types';
|
||||||
|
import { ensureDatasetIndexExists } from './ensure_dataset_index_exists';
|
||||||
|
import { fetchRowsFromDataset } from './fetch_rows_from_dataset';
|
||||||
|
import { indexDocuments } from './index_documents';
|
||||||
|
import { getEmbeddings } from './get_embeddings';
|
||||||
|
|
||||||
|
const DATASET_ROWS_CACHE = createCache({
|
||||||
|
stores: [
|
||||||
|
createLocalDirDiskCacheStore({
|
||||||
|
dir: `hugging_face_dataset_rows`,
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const DATASET_EMBEDDINGS_CACHE = createCache({
|
||||||
|
stores: [
|
||||||
|
createLocalDirDiskCacheStore({
|
||||||
|
dir: `hugging_face_dataset_embeddings`,
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
export async function loadHuggingFaceDatasets({
|
||||||
|
esClient,
|
||||||
|
logger,
|
||||||
|
accessToken,
|
||||||
|
datasets = ALL_HUGGING_FACE_DATASETS,
|
||||||
|
limit = 1000,
|
||||||
|
clear = false,
|
||||||
|
}: {
|
||||||
|
esClient: ElasticsearchClient;
|
||||||
|
logger: Logger;
|
||||||
|
accessToken: string;
|
||||||
|
datasets?: HuggingFaceDatasetSpec[];
|
||||||
|
limit?: number;
|
||||||
|
clear?: boolean;
|
||||||
|
}) {
|
||||||
|
if (clear) {
|
||||||
|
await esClient.indices
|
||||||
|
.delete({
|
||||||
|
index: datasets.map((dataset) => dataset.index),
|
||||||
|
allow_no_indices: true,
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
if (error instanceof errors.ResponseError && error.statusCode === 404) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const dataset of datasets) {
|
||||||
|
logger.info(`Indexing dataset ${dataset.name}`);
|
||||||
|
|
||||||
|
await ensureDatasetIndexExists({
|
||||||
|
esClient,
|
||||||
|
dataset,
|
||||||
|
});
|
||||||
|
|
||||||
|
const documents = await fromCache(dataset.name, DATASET_ROWS_CACHE, () =>
|
||||||
|
fetchRowsFromDataset({
|
||||||
|
dataset,
|
||||||
|
logger,
|
||||||
|
limit,
|
||||||
|
accessToken,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
logger.debug('Generating embeddings');
|
||||||
|
|
||||||
|
const docsWithEmbeddings = await fromCache(dataset.name, DATASET_EMBEDDINGS_CACHE, () =>
|
||||||
|
getEmbeddings({
|
||||||
|
esClient,
|
||||||
|
documents,
|
||||||
|
dataset,
|
||||||
|
logger,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
logger.debug(`Indexing documents with embeddings`);
|
||||||
|
|
||||||
|
await indexDocuments({
|
||||||
|
esClient,
|
||||||
|
documents: docsWithEmbeddings,
|
||||||
|
dataset,
|
||||||
|
logger,
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.debug(`Indexed dataset`);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { IndicesPutMappingRequest } from '@elastic/elasticsearch/lib/api/types';
|
||||||
|
|
||||||
|
/** One dataset to import. */
|
||||||
|
export interface HuggingFaceDatasetSpec {
|
||||||
|
/** Human-readable name (purely for logging). */
|
||||||
|
name: string;
|
||||||
|
/** HuggingFace Hub repository id, e.g. "BeIR/msmarco" (required when `url` is omitted). */
|
||||||
|
repo: string;
|
||||||
|
/** File path inside the repo, e.g. "corpus.jsonl.gz" (required when `url` is omitted). */
|
||||||
|
file: string;
|
||||||
|
/** Optional revision (tag/branch/commit). Defaults to "main" when not provided. */
|
||||||
|
revision?: string;
|
||||||
|
/** Target Elasticsearch index. */
|
||||||
|
index: string;
|
||||||
|
mapping: Omit<IndicesPutMappingRequest, 'index'>;
|
||||||
|
/**
|
||||||
|
* Convert raw JSON objects into whatever you want stored.
|
||||||
|
* Return value **must** include the doc‘s unique identifier
|
||||||
|
* under `_id` (or change the code below).
|
||||||
|
*/
|
||||||
|
mapDocument: (raw: any) => Record<string, unknown>;
|
||||||
|
}
|
|
@ -0,0 +1,22 @@
|
||||||
|
{
|
||||||
|
"extends": "../../../../../tsconfig.base.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"outDir": "target/types",
|
||||||
|
"types": [
|
||||||
|
"jest",
|
||||||
|
"node"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"include": [
|
||||||
|
"**/*.ts",
|
||||||
|
],
|
||||||
|
"exclude": [
|
||||||
|
"target/**/*"
|
||||||
|
],
|
||||||
|
"kbn_references": [
|
||||||
|
"@kbn/core",
|
||||||
|
"@kbn/cache-cli",
|
||||||
|
"@kbn/dev-cli-runner",
|
||||||
|
"@kbn/kibana-api-cli"
|
||||||
|
]
|
||||||
|
}
|
|
@ -26,7 +26,12 @@ export function createProxyTransport({
|
||||||
}): typeof Transport {
|
}): typeof Transport {
|
||||||
return class ProxyTransport extends Transport {
|
return class ProxyTransport extends Transport {
|
||||||
constructor(options: TransportOptions) {
|
constructor(options: TransportOptions) {
|
||||||
super(options);
|
super({
|
||||||
|
...options,
|
||||||
|
// the elastic-x-product headers cause issues w/ the proxy transport,
|
||||||
|
// as the returned headers are from the proxy endpoint and not ES
|
||||||
|
productCheck: undefined,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
request<TResponse = unknown>(
|
request<TResponse = unknown>(
|
||||||
|
@ -84,7 +89,7 @@ export function createProxyTransport({
|
||||||
|
|
||||||
if (statusCode >= 400) {
|
if (statusCode >= 400) {
|
||||||
throw new errors.ResponseError({
|
throw new errors.ResponseError({
|
||||||
statusCode: response.statusCode,
|
statusCode,
|
||||||
body: response.body,
|
body: response.body,
|
||||||
meta: response.meta,
|
meta: response.meta,
|
||||||
warnings: response.warnings,
|
warnings: response.warnings,
|
||||||
|
|
67
yarn.lock
67
yarn.lock
|
@ -3386,6 +3386,18 @@
|
||||||
react-redux "^9.2.0"
|
react-redux "^9.2.0"
|
||||||
redux "^5.0.1"
|
redux "^5.0.1"
|
||||||
|
|
||||||
|
"@huggingface/hub@^2.2.0":
|
||||||
|
version "2.2.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/@huggingface/hub/-/hub-2.2.0.tgz#0fbe96d09341e68e927315b860b866aee2b1c85d"
|
||||||
|
integrity sha512-G+VS1eMp80KovIHBlsiEigS6I6qmI4j+VQ1UZ8CaXT+pw2A7tj6e/crfxFdKNE2uOK5oQkRFiCBJykMwrWQ8OA==
|
||||||
|
dependencies:
|
||||||
|
"@huggingface/tasks" "^0.19.11"
|
||||||
|
|
||||||
|
"@huggingface/tasks@^0.19.11":
|
||||||
|
version "0.19.16"
|
||||||
|
resolved "https://registry.yarnpkg.com/@huggingface/tasks/-/tasks-0.19.16.tgz#f3a83a04b0c06cdd022718504dae9eceb1c87d3b"
|
||||||
|
integrity sha512-8PfeMpvHqax9biZaw9/u1Ut9xWIJ1qIUAd8jAKkv6bKvIAAyTRCZgnwHJmzPffDE6B0VkLlPl5it6GaSg/kxlw==
|
||||||
|
|
||||||
"@humanwhocodes/config-array@^0.11.14":
|
"@humanwhocodes/config-array@^0.11.14":
|
||||||
version "0.11.14"
|
version "0.11.14"
|
||||||
resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.14.tgz#d78e481a039f7566ecc9660b4ea7fe6b1fec442b"
|
resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.14.tgz#d78e481a039f7566ecc9660b4ea7fe6b1fec442b"
|
||||||
|
@ -3821,6 +3833,10 @@
|
||||||
version "0.0.0"
|
version "0.0.0"
|
||||||
uid ""
|
uid ""
|
||||||
|
|
||||||
|
"@kbn/ai-tools-cli@link:x-pack/platform/packages/shared/kbn-ai-tools-cli":
|
||||||
|
version "0.0.0"
|
||||||
|
uid ""
|
||||||
|
|
||||||
"@kbn/aiops-change-point-detection@link:x-pack/platform/packages/private/ml/aiops_change_point_detection":
|
"@kbn/aiops-change-point-detection@link:x-pack/platform/packages/private/ml/aiops_change_point_detection":
|
||||||
version "0.0.0"
|
version "0.0.0"
|
||||||
uid ""
|
uid ""
|
||||||
|
@ -4029,6 +4045,10 @@
|
||||||
version "0.0.0"
|
version "0.0.0"
|
||||||
uid ""
|
uid ""
|
||||||
|
|
||||||
|
"@kbn/cache-cli@link:src/platform/packages/shared/kbn-cache-cli":
|
||||||
|
version "0.0.0"
|
||||||
|
uid ""
|
||||||
|
|
||||||
"@kbn/calculate-auto@link:src/platform/packages/shared/kbn-calculate-auto":
|
"@kbn/calculate-auto@link:src/platform/packages/shared/kbn-calculate-auto":
|
||||||
version "0.0.0"
|
version "0.0.0"
|
||||||
uid ""
|
uid ""
|
||||||
|
@ -8229,6 +8249,13 @@
|
||||||
version "0.0.0"
|
version "0.0.0"
|
||||||
uid ""
|
uid ""
|
||||||
|
|
||||||
|
"@keyv/serialize@^1.0.3":
|
||||||
|
version "1.0.3"
|
||||||
|
resolved "https://registry.yarnpkg.com/@keyv/serialize/-/serialize-1.0.3.tgz#e0fe3710e2a379cb0490cd41e5a5ffa2bab58bf6"
|
||||||
|
integrity sha512-qnEovoOp5Np2JDGonIDL6Ayihw0RhnRh6vxPuHo4RDn1UOzwEo4AeIfpL6UGIrsceWrCMiVPgwRjbHu4vYFc3g==
|
||||||
|
dependencies:
|
||||||
|
buffer "^6.0.3"
|
||||||
|
|
||||||
"@kwsites/file-exists@^1.1.1":
|
"@kwsites/file-exists@^1.1.1":
|
||||||
version "1.1.1"
|
version "1.1.1"
|
||||||
resolved "https://registry.yarnpkg.com/@kwsites/file-exists/-/file-exists-1.1.1.tgz#ad1efcac13e1987d8dbaf235ef3be5b0d96faa99"
|
resolved "https://registry.yarnpkg.com/@kwsites/file-exists/-/file-exists-1.1.1.tgz#ad1efcac13e1987d8dbaf235ef3be5b0d96faa99"
|
||||||
|
@ -11388,6 +11415,18 @@
|
||||||
resolved "https://registry.yarnpkg.com/@types/byte-size/-/byte-size-8.1.2.tgz#abb3d70ab62c400d8753bed1ff2aa315ef9ff7f5"
|
resolved "https://registry.yarnpkg.com/@types/byte-size/-/byte-size-8.1.2.tgz#abb3d70ab62c400d8753bed1ff2aa315ef9ff7f5"
|
||||||
integrity sha512-jGyVzYu6avI8yuqQCNTZd65tzI8HZrLjKX9sdMqZrGWVlNChu0rf6p368oVEDCYJe5BMx2Ov04tD1wqtgTwGSA==
|
integrity sha512-jGyVzYu6avI8yuqQCNTZd65tzI8HZrLjKX9sdMqZrGWVlNChu0rf6p368oVEDCYJe5BMx2Ov04tD1wqtgTwGSA==
|
||||||
|
|
||||||
|
"@types/cache-manager-fs-hash@^0.0.5":
|
||||||
|
version "0.0.5"
|
||||||
|
resolved "https://registry.yarnpkg.com/@types/cache-manager-fs-hash/-/cache-manager-fs-hash-0.0.5.tgz#39c2c93b0a6a873dfefbbd4a926b34216cda66c8"
|
||||||
|
integrity sha512-mSqk9YisfK/NkB/R5SzGeuSIVtwHhM5m6MLB0VrrFteTphKiQ2Fyz88IRtiX+SYEX6Nw2H3kB9qtpfnVSE/mSQ==
|
||||||
|
dependencies:
|
||||||
|
"@types/cache-manager" "<4"
|
||||||
|
|
||||||
|
"@types/cache-manager@<4":
|
||||||
|
version "3.4.3"
|
||||||
|
resolved "https://registry.yarnpkg.com/@types/cache-manager/-/cache-manager-3.4.3.tgz#eba99bf795b997ad0c309658101398c34d7faecb"
|
||||||
|
integrity sha512-71aBXoFYXZW4TnDHHH8gExw2lS28BZaWeKefgsiJI7QYZeJfUEbMKw6CQtzGjlYQcGIWwB76hcCrkVA3YHSvsw==
|
||||||
|
|
||||||
"@types/cacheable-request@^6.0.1":
|
"@types/cacheable-request@^6.0.1":
|
||||||
version "6.0.1"
|
version "6.0.1"
|
||||||
resolved "https://registry.yarnpkg.com/@types/cacheable-request/-/cacheable-request-6.0.1.tgz#5d22f3dded1fd3a84c0bbeb5039a7419c2c91976"
|
resolved "https://registry.yarnpkg.com/@types/cacheable-request/-/cacheable-request-6.0.1.tgz#5d22f3dded1fd3a84c0bbeb5039a7419c2c91976"
|
||||||
|
@ -14843,6 +14882,20 @@ cache-base@^1.0.1:
|
||||||
union-value "^1.0.0"
|
union-value "^1.0.0"
|
||||||
unset-value "^1.0.0"
|
unset-value "^1.0.0"
|
||||||
|
|
||||||
|
cache-manager-fs-hash@^2.0.0:
|
||||||
|
version "2.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/cache-manager-fs-hash/-/cache-manager-fs-hash-2.0.0.tgz#daa422ffe689ef16d15c8e3fd47932c8afd14171"
|
||||||
|
integrity sha512-w03tp8mvfglRUFtItCdC114rFyzk0umu5LnnRM5spnu2+Mj8/2PrDHCnaoPltto/2fK94fC/Kw2rHqBXqIEgTQ==
|
||||||
|
dependencies:
|
||||||
|
lockfile "^1.0.4"
|
||||||
|
|
||||||
|
cache-manager@^7.0.0:
|
||||||
|
version "7.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/cache-manager/-/cache-manager-7.0.0.tgz#3a591187372bcfa32e9cb479764a411a0a0d6a74"
|
||||||
|
integrity sha512-5HLGorfU4g2GyLTXd+bbq8RhZPwLRlVm7hfS1EssJx4Ujq1FjyQAjHND93sI6ByQTlUlCQ0jrHZqLI0qtBFyHA==
|
||||||
|
dependencies:
|
||||||
|
keyv "^5.3.3"
|
||||||
|
|
||||||
cacheable-lookup@6:
|
cacheable-lookup@6:
|
||||||
version "6.1.0"
|
version "6.1.0"
|
||||||
resolved "https://registry.yarnpkg.com/cacheable-lookup/-/cacheable-lookup-6.1.0.tgz#0330a543471c61faa4e9035db583aad753b36385"
|
resolved "https://registry.yarnpkg.com/cacheable-lookup/-/cacheable-lookup-6.1.0.tgz#0330a543471c61faa4e9035db583aad753b36385"
|
||||||
|
@ -22823,6 +22876,13 @@ keyv@^4.0.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
json-buffer "3.0.1"
|
json-buffer "3.0.1"
|
||||||
|
|
||||||
|
keyv@^5.3.3, keyv@^5.3.4:
|
||||||
|
version "5.3.4"
|
||||||
|
resolved "https://registry.yarnpkg.com/keyv/-/keyv-5.3.4.tgz#e0548d9449c51fc332abdd637c2b3bb2d24c9bc9"
|
||||||
|
integrity sha512-ypEvQvInNpUe+u+w8BIcPkQvEqXquyyibWE/1NB5T2BTzIpS5cGEV1LZskDzPSTvNAaT4+5FutvzlvnkxOSKlw==
|
||||||
|
dependencies:
|
||||||
|
"@keyv/serialize" "^1.0.3"
|
||||||
|
|
||||||
kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0:
|
kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0:
|
||||||
version "3.2.2"
|
version "3.2.2"
|
||||||
resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-3.2.2.tgz#31ea21a734bab9bbb0f32466d893aea51e4a3c64"
|
resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-3.2.2.tgz#31ea21a734bab9bbb0f32466d893aea51e4a3c64"
|
||||||
|
@ -23211,6 +23271,13 @@ locate-path@^7.1.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
p-locate "^6.0.0"
|
p-locate "^6.0.0"
|
||||||
|
|
||||||
|
lockfile@^1.0.4:
|
||||||
|
version "1.0.4"
|
||||||
|
resolved "https://registry.yarnpkg.com/lockfile/-/lockfile-1.0.4.tgz#07f819d25ae48f87e538e6578b6964a4981a5609"
|
||||||
|
integrity sha512-cvbTwETRfsFh4nHsL1eGWapU1XFi5Ot9E85sWAwia7Y7EgB7vfqcZhTKZ+l7hCGxSPoushMv5GKhT5PdLv03WA==
|
||||||
|
dependencies:
|
||||||
|
signal-exit "^3.0.2"
|
||||||
|
|
||||||
lodash-es@^4.17.21:
|
lodash-es@^4.17.21:
|
||||||
version "4.17.21"
|
version "4.17.21"
|
||||||
resolved "https://registry.yarnpkg.com/lodash-es/-/lodash-es-4.17.21.tgz#43e626c46e6591b7750beb2b50117390c609e3ee"
|
resolved "https://registry.yarnpkg.com/lodash-es/-/lodash-es-4.17.21.tgz#43e626c46e6591b7750beb2b50117390c609e3ee"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue