mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 09:19:04 -04:00
# Backport This will backport the following commits from `main` to `8.x`: - [[product doc] adapt for new format of semantic_text field (#206051)](https://github.com/elastic/kibana/pull/206051) <!--- Backport version: 9.4.3 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Pierre Gayvallet","email":"pierre.gayvallet@elastic.co"},"sourceCommit":{"committedDate":"2025-01-13T09:05:16Z","message":"[product doc] adapt for new format of semantic_text field (#206051)\n\n## Summary\r\n\r\nfix https://github.com/elastic/kibana/issues/205908\r\n\r\nAdapt the product documentation's usages of `semantic_text` for the\r\nbreaking changes that will be introduced in 8.18 and 9.0.\r\n\r\nThis PR introduces a new format version (`2.0.0`) for the product\r\ndocumentation, introducing the required changes for the incoming\r\n`semantic_text` breaking change.\r\n- include the `_inference_fields` meta field when bundling the doc\r\nartifacts\r\n- set the `index.mapping.semantic_text.use_legacy_format` index setting\r\nto `false` to force the new format\r\n- change the way we're internally overriding the `inference_id` when\r\ningesting the data\r\n- adapt the `search` logic to retrieve the data at the right place \r\n\r\n\r\nDoing that with a new format version also makes the transition\r\ninvisible, as our system will simply adapt depending on the version of\r\nthe artifact's manifest.\r\n\r\n\r\n### How to test\r\n\r\n**1. test that the behavior is not broken for current artifacts**\r\n\r\nRun the branch, install the product doc from the prod repository, make\r\nsure that the 8.17 artifacts are installed, then check if the feature\r\nstill works using the o11y assistant.\r\n\r\n**2. test that the behavior works with the new artifacts**\r\n\r\n**Keeping your ES instance up**, configure your local Kibana to use the\r\ndev repository (where the 8.18 artifacts with the new format are\r\npresent)\r\n\r\n```yaml\r\nxpack.productDocBase.artifactRepositoryUrl: \"https://storage.googleapis.com/kibana-ai-assistant-kb-artifacts-dev\"\r\n```\r\n\r\nThen restart Kibana, confirms the artifacts gets updated to 8.18\r\nautomatically, and then test that the feature still works as expected\r\nusing the o11y assistant.","sha":"f77dc3d04351bc2f9ae6b6395dabd39c2464b442","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","backport:version","Team:AI Infra","v8.18.0","Feature:AI Product Docs"],"title":"[product doc] adapt for new format of semantic_text field","number":206051,"url":"https://github.com/elastic/kibana/pull/206051","mergeCommit":{"message":"[product doc] adapt for new format of semantic_text field (#206051)\n\n## Summary\r\n\r\nfix https://github.com/elastic/kibana/issues/205908\r\n\r\nAdapt the product documentation's usages of `semantic_text` for the\r\nbreaking changes that will be introduced in 8.18 and 9.0.\r\n\r\nThis PR introduces a new format version (`2.0.0`) for the product\r\ndocumentation, introducing the required changes for the incoming\r\n`semantic_text` breaking change.\r\n- include the `_inference_fields` meta field when bundling the doc\r\nartifacts\r\n- set the `index.mapping.semantic_text.use_legacy_format` index setting\r\nto `false` to force the new format\r\n- change the way we're internally overriding the `inference_id` when\r\ningesting the data\r\n- adapt the `search` logic to retrieve the data at the right place \r\n\r\n\r\nDoing that with a new format version also makes the transition\r\ninvisible, as our system will simply adapt depending on the version of\r\nthe artifact's manifest.\r\n\r\n\r\n### How to test\r\n\r\n**1. test that the behavior is not broken for current artifacts**\r\n\r\nRun the branch, install the product doc from the prod repository, make\r\nsure that the 8.17 artifacts are installed, then check if the feature\r\nstill works using the o11y assistant.\r\n\r\n**2. test that the behavior works with the new artifacts**\r\n\r\n**Keeping your ES instance up**, configure your local Kibana to use the\r\ndev repository (where the 8.18 artifacts with the new format are\r\npresent)\r\n\r\n```yaml\r\nxpack.productDocBase.artifactRepositoryUrl: \"https://storage.googleapis.com/kibana-ai-assistant-kb-artifacts-dev\"\r\n```\r\n\r\nThen restart Kibana, confirms the artifacts gets updated to 8.18\r\nautomatically, and then test that the feature still works as expected\r\nusing the o11y assistant.","sha":"f77dc3d04351bc2f9ae6b6395dabd39c2464b442"}},"sourceBranch":"main","suggestedTargetBranches":["8.x"],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/206051","number":206051,"mergeCommit":{"message":"[product doc] adapt for new format of semantic_text field (#206051)\n\n## Summary\r\n\r\nfix https://github.com/elastic/kibana/issues/205908\r\n\r\nAdapt the product documentation's usages of `semantic_text` for the\r\nbreaking changes that will be introduced in 8.18 and 9.0.\r\n\r\nThis PR introduces a new format version (`2.0.0`) for the product\r\ndocumentation, introducing the required changes for the incoming\r\n`semantic_text` breaking change.\r\n- include the `_inference_fields` meta field when bundling the doc\r\nartifacts\r\n- set the `index.mapping.semantic_text.use_legacy_format` index setting\r\nto `false` to force the new format\r\n- change the way we're internally overriding the `inference_id` when\r\ningesting the data\r\n- adapt the `search` logic to retrieve the data at the right place \r\n\r\n\r\nDoing that with a new format version also makes the transition\r\ninvisible, as our system will simply adapt depending on the version of\r\nthe artifact's manifest.\r\n\r\n\r\n### How to test\r\n\r\n**1. test that the behavior is not broken for current artifacts**\r\n\r\nRun the branch, install the product doc from the prod repository, make\r\nsure that the 8.17 artifacts are installed, then check if the feature\r\nstill works using the o11y assistant.\r\n\r\n**2. test that the behavior works with the new artifacts**\r\n\r\n**Keeping your ES instance up**, configure your local Kibana to use the\r\ndev repository (where the 8.18 artifacts with the new format are\r\npresent)\r\n\r\n```yaml\r\nxpack.productDocBase.artifactRepositoryUrl: \"https://storage.googleapis.com/kibana-ai-assistant-kb-artifacts-dev\"\r\n```\r\n\r\nThen restart Kibana, confirms the artifacts gets updated to 8.18\r\nautomatically, and then test that the feature still works as expected\r\nusing the o11y assistant.","sha":"f77dc3d04351bc2f9ae6b6395dabd39c2464b442"}},{"branch":"8.x","label":"v8.18.0","branchLabelMappingKey":"^v8.18.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}] BACKPORT--> Co-authored-by: Pierre Gayvallet <pierre.gayvallet@elastic.co>
This commit is contained in:
parent
4bd76d573e
commit
55e58ece0c
20 changed files with 240 additions and 17 deletions
|
@ -10,12 +10,14 @@ import type { ArtifactManifest, ProductName } from '@kbn/product-doc-common';
|
|||
export const getArtifactManifest = ({
|
||||
productName,
|
||||
stackVersion,
|
||||
formatVersion,
|
||||
}: {
|
||||
productName: ProductName;
|
||||
stackVersion: string;
|
||||
formatVersion: string;
|
||||
}): ArtifactManifest => {
|
||||
return {
|
||||
formatVersion: '1.0.0',
|
||||
formatVersion,
|
||||
productName,
|
||||
productVersion: stackVersion,
|
||||
};
|
||||
|
|
|
@ -8,9 +8,14 @@
|
|||
import Path from 'path';
|
||||
import AdmZip from 'adm-zip';
|
||||
import type { ToolingLog } from '@kbn/tooling-log';
|
||||
import { getArtifactName, type ProductName } from '@kbn/product-doc-common';
|
||||
import {
|
||||
LATEST_MANIFEST_FORMAT_VERSION,
|
||||
getArtifactName,
|
||||
type ProductName,
|
||||
} from '@kbn/product-doc-common';
|
||||
import { getArtifactMappings } from '../artifact/mappings';
|
||||
import { getArtifactManifest } from '../artifact/manifest';
|
||||
import { DEFAULT_ELSER } from './create_index';
|
||||
|
||||
export const createArtifact = async ({
|
||||
productName,
|
||||
|
@ -31,11 +36,15 @@ export const createArtifact = async ({
|
|||
|
||||
const zip = new AdmZip();
|
||||
|
||||
const mappings = getArtifactMappings('.default-elser');
|
||||
const mappings = getArtifactMappings(DEFAULT_ELSER);
|
||||
const mappingFileContent = JSON.stringify(mappings, undefined, 2);
|
||||
zip.addFile('mappings.json', Buffer.from(mappingFileContent, 'utf-8'));
|
||||
|
||||
const manifest = getArtifactManifest({ productName, stackVersion });
|
||||
const manifest = getArtifactManifest({
|
||||
productName,
|
||||
stackVersion,
|
||||
formatVersion: LATEST_MANIFEST_FORMAT_VERSION,
|
||||
});
|
||||
const manifestFileContent = JSON.stringify(manifest, undefined, 2);
|
||||
zip.addFile('manifest.json', Buffer.from(manifestFileContent, 'utf-8'));
|
||||
|
||||
|
|
|
@ -30,6 +30,8 @@ export const createChunkFiles = async ({
|
|||
const searchRes = await client.search({
|
||||
index,
|
||||
size: 10000,
|
||||
// includes inference field meta info in source
|
||||
fields: ['_inference_fields'],
|
||||
query: {
|
||||
bool: {
|
||||
must: [{ term: { product_name: productName } }],
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
import type { Client } from '@elastic/elasticsearch';
|
||||
import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types';
|
||||
|
||||
const DEFAULT_ELSER = '.elser-2-elasticsearch';
|
||||
export const DEFAULT_ELSER = '.elser-2-elasticsearch';
|
||||
|
||||
const mappings: MappingTypeMapping = {
|
||||
dynamic: 'strict',
|
||||
|
@ -46,5 +46,8 @@ export const createTargetIndex = async ({
|
|||
await client.indices.create({
|
||||
index: indexName,
|
||||
mappings,
|
||||
settings: {
|
||||
'index.mapping.semantic_text.use_legacy_format': false,
|
||||
},
|
||||
});
|
||||
};
|
||||
|
|
|
@ -56,12 +56,23 @@ const processDocument = (document: ExtractedDocument) => {
|
|||
})
|
||||
// remove edit links
|
||||
.replaceAll(/\[\s*edit\s*\]\(\s*[^)]+\s*\)/g, '')
|
||||
// remove empty links
|
||||
// // remove empty links
|
||||
.replaceAll('[]()', '')
|
||||
// remove image links
|
||||
.replaceAll(/\[\]\(\s*[^)]+\s*\)/g, '')
|
||||
// limit to 2 consecutive carriage return
|
||||
.replaceAll(/\n\n+/g, '\n\n');
|
||||
|
||||
document.content_title = document.content_title.split('|')[0].trim();
|
||||
|
||||
// specific to security: remove rule query section as it's usually large without much value for the LLM
|
||||
if (document.product_name === 'security') {
|
||||
const ruleQueryTitle = '### Rule query';
|
||||
const ruleQueryPos = document.content_body.indexOf(ruleQueryTitle);
|
||||
if (ruleQueryPos > -1) {
|
||||
document.content_body = document.content_body.substring(0, ruleQueryPos);
|
||||
}
|
||||
}
|
||||
|
||||
return document;
|
||||
};
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*/
|
||||
|
||||
export { getArtifactName, parseArtifactName } from './src/artifact';
|
||||
export { type ArtifactManifest } from './src/manifest';
|
||||
export { LATEST_MANIFEST_FORMAT_VERSION, type ArtifactManifest } from './src/manifest';
|
||||
export { DocumentationProduct, type ProductName } from './src/product';
|
||||
export { isArtifactContentFilePath } from './src/artifact_content';
|
||||
export {
|
||||
|
|
|
@ -18,14 +18,15 @@ interface SemanticTextArrayField {
|
|||
|
||||
export interface ProductDocumentationAttributes {
|
||||
content_title: string;
|
||||
content_body: SemanticTextField;
|
||||
// backward compatibility for the legacy semantic_text mode
|
||||
content_body: string | SemanticTextField;
|
||||
product_name: ProductName;
|
||||
root_type: string;
|
||||
slug: string;
|
||||
url: string;
|
||||
version: string;
|
||||
ai_subtitle: string;
|
||||
ai_summary: SemanticTextField;
|
||||
ai_questions_answered: SemanticTextArrayField;
|
||||
ai_summary: string | SemanticTextField;
|
||||
ai_questions_answered: string[] | SemanticTextArrayField;
|
||||
ai_tags: string[];
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
|
||||
import type { ProductName } from './product';
|
||||
|
||||
export const LATEST_MANIFEST_FORMAT_VERSION = '2.0.0';
|
||||
|
||||
export interface ArtifactManifest {
|
||||
formatVersion: string;
|
||||
productName: ProductName;
|
||||
|
|
|
@ -24,6 +24,7 @@ jest.doMock('./steps', () => {
|
|||
export const downloadToDiskMock = jest.fn();
|
||||
export const openZipArchiveMock = jest.fn();
|
||||
export const loadMappingFileMock = jest.fn();
|
||||
export const loadManifestFileMock = jest.fn();
|
||||
export const ensureDefaultElserDeployedMock = jest.fn();
|
||||
|
||||
jest.doMock('./utils', () => {
|
||||
|
@ -33,6 +34,7 @@ jest.doMock('./utils', () => {
|
|||
downloadToDisk: downloadToDiskMock,
|
||||
openZipArchive: openZipArchiveMock,
|
||||
loadMappingFile: loadMappingFileMock,
|
||||
loadManifestFile: loadManifestFileMock,
|
||||
ensureDefaultElserDeployed: ensureDefaultElserDeployedMock,
|
||||
};
|
||||
});
|
||||
|
|
|
@ -10,6 +10,7 @@ import {
|
|||
createIndexMock,
|
||||
populateIndexMock,
|
||||
loadMappingFileMock,
|
||||
loadManifestFileMock,
|
||||
openZipArchiveMock,
|
||||
validateArtifactArchiveMock,
|
||||
fetchArtifactVersionsMock,
|
||||
|
@ -36,6 +37,8 @@ const callOrder = (fn: { mock: { invocationCallOrder: number[] } }): number => {
|
|||
return fn.mock.invocationCallOrder[0];
|
||||
};
|
||||
|
||||
const TEST_FORMAT_VERSION = '2.0.0';
|
||||
|
||||
describe('PackageInstaller', () => {
|
||||
let logger: MockedLogger;
|
||||
let esClient: ReturnType<typeof elasticsearchServiceMock.createElasticsearchClient>;
|
||||
|
@ -55,6 +58,12 @@ describe('PackageInstaller', () => {
|
|||
artifactRepositoryUrl,
|
||||
kibanaVersion,
|
||||
});
|
||||
|
||||
loadManifestFileMock.mockResolvedValue({
|
||||
formatVersion: TEST_FORMAT_VERSION,
|
||||
productName: 'kibana',
|
||||
productVersion: '8.17',
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
|
@ -62,6 +71,7 @@ describe('PackageInstaller', () => {
|
|||
createIndexMock.mockReset();
|
||||
populateIndexMock.mockReset();
|
||||
loadMappingFileMock.mockReset();
|
||||
loadManifestFileMock.mockReset();
|
||||
openZipArchiveMock.mockReset();
|
||||
validateArtifactArchiveMock.mockReset();
|
||||
fetchArtifactVersionsMock.mockReset();
|
||||
|
@ -99,10 +109,14 @@ describe('PackageInstaller', () => {
|
|||
expect(loadMappingFileMock).toHaveBeenCalledTimes(1);
|
||||
expect(loadMappingFileMock).toHaveBeenCalledWith(zipArchive);
|
||||
|
||||
expect(loadManifestFileMock).toHaveBeenCalledTimes(1);
|
||||
expect(loadManifestFileMock).toHaveBeenCalledWith(zipArchive);
|
||||
|
||||
expect(createIndexMock).toHaveBeenCalledTimes(1);
|
||||
expect(createIndexMock).toHaveBeenCalledWith({
|
||||
indexName,
|
||||
mappings,
|
||||
manifestVersion: TEST_FORMAT_VERSION,
|
||||
esClient,
|
||||
log: logger,
|
||||
});
|
||||
|
@ -111,6 +125,7 @@ describe('PackageInstaller', () => {
|
|||
expect(populateIndexMock).toHaveBeenCalledWith({
|
||||
indexName,
|
||||
archive: zipArchive,
|
||||
manifestVersion: TEST_FORMAT_VERSION,
|
||||
esClient,
|
||||
log: logger,
|
||||
});
|
||||
|
@ -130,6 +145,7 @@ describe('PackageInstaller', () => {
|
|||
expect(callOrder(downloadToDiskMock)).toBeLessThan(callOrder(openZipArchiveMock));
|
||||
expect(callOrder(openZipArchiveMock)).toBeLessThan(callOrder(loadMappingFileMock));
|
||||
expect(callOrder(loadMappingFileMock)).toBeLessThan(callOrder(createIndexMock));
|
||||
expect(callOrder(loadManifestFileMock)).toBeLessThan(callOrder(createIndexMock));
|
||||
expect(callOrder(createIndexMock)).toBeLessThan(callOrder(populateIndexMock));
|
||||
expect(callOrder(populateIndexMock)).toBeLessThan(
|
||||
callOrder(productDocClient.setInstallationSuccessful)
|
||||
|
|
|
@ -18,6 +18,7 @@ import {
|
|||
downloadToDisk,
|
||||
openZipArchive,
|
||||
loadMappingFile,
|
||||
loadManifestFile,
|
||||
ensureDefaultElserDeployed,
|
||||
type ZipArchive,
|
||||
} from './utils';
|
||||
|
@ -158,19 +159,25 @@ export class PackageInstaller {
|
|||
|
||||
validateArtifactArchive(zipArchive);
|
||||
|
||||
const mappings = await loadMappingFile(zipArchive);
|
||||
const [manifest, mappings] = await Promise.all([
|
||||
loadManifestFile(zipArchive),
|
||||
loadMappingFile(zipArchive),
|
||||
]);
|
||||
|
||||
const manifestVersion = manifest.formatVersion;
|
||||
const indexName = getProductDocIndexName(productName);
|
||||
|
||||
await createIndex({
|
||||
indexName,
|
||||
mappings,
|
||||
manifestVersion,
|
||||
esClient: this.esClient,
|
||||
log: this.log,
|
||||
});
|
||||
|
||||
await populateIndex({
|
||||
indexName,
|
||||
manifestVersion,
|
||||
archive: zipArchive,
|
||||
esClient: this.esClient,
|
||||
log: this.log,
|
||||
|
|
|
@ -9,9 +9,12 @@ import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types';
|
|||
import { loggerMock, type MockedLogger } from '@kbn/logging-mocks';
|
||||
import type { ElasticsearchClient } from '@kbn/core/server';
|
||||
import { elasticsearchServiceMock } from '@kbn/core/server/mocks';
|
||||
import { LATEST_MANIFEST_FORMAT_VERSION } from '@kbn/product-doc-common';
|
||||
import { createIndex } from './create_index';
|
||||
import { internalElserInferenceId } from '../../../../common/consts';
|
||||
|
||||
const LEGACY_SEMANTIC_TEXT_VERSION = '1.0.0';
|
||||
|
||||
describe('createIndex', () => {
|
||||
let log: MockedLogger;
|
||||
let esClient: ElasticsearchClient;
|
||||
|
@ -21,7 +24,7 @@ describe('createIndex', () => {
|
|||
esClient = elasticsearchServiceMock.createElasticsearchClient();
|
||||
});
|
||||
|
||||
it('calls esClient.indices.create with the right parameters', async () => {
|
||||
it('calls esClient.indices.create with the right parameters for the current manifest version', async () => {
|
||||
const mappings: MappingTypeMapping = {
|
||||
properties: {},
|
||||
};
|
||||
|
@ -30,6 +33,7 @@ describe('createIndex', () => {
|
|||
await createIndex({
|
||||
indexName,
|
||||
mappings,
|
||||
manifestVersion: LATEST_MANIFEST_FORMAT_VERSION,
|
||||
log,
|
||||
esClient,
|
||||
});
|
||||
|
@ -41,6 +45,33 @@ describe('createIndex', () => {
|
|||
settings: {
|
||||
number_of_shards: 1,
|
||||
auto_expand_replicas: '0-1',
|
||||
'index.mapping.semantic_text.use_legacy_format': false,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('calls esClient.indices.create with the right parameters for the manifest version 1.0.0', async () => {
|
||||
const mappings: MappingTypeMapping = {
|
||||
properties: {},
|
||||
};
|
||||
const indexName = '.some-index';
|
||||
|
||||
await createIndex({
|
||||
indexName,
|
||||
mappings,
|
||||
manifestVersion: LEGACY_SEMANTIC_TEXT_VERSION,
|
||||
log,
|
||||
esClient,
|
||||
});
|
||||
|
||||
expect(esClient.indices.create).toHaveBeenCalledTimes(1);
|
||||
expect(esClient.indices.create).toHaveBeenCalledWith({
|
||||
index: indexName,
|
||||
mappings,
|
||||
settings: {
|
||||
number_of_shards: 1,
|
||||
auto_expand_replicas: '0-1',
|
||||
'index.mapping.semantic_text.use_legacy_format': true,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
@ -61,6 +92,7 @@ describe('createIndex', () => {
|
|||
await createIndex({
|
||||
indexName: '.some-index',
|
||||
mappings,
|
||||
manifestVersion: LEGACY_SEMANTIC_TEXT_VERSION,
|
||||
log,
|
||||
esClient,
|
||||
});
|
||||
|
|
|
@ -9,20 +9,25 @@ import type { Logger } from '@kbn/logging';
|
|||
import type { ElasticsearchClient } from '@kbn/core/server';
|
||||
import type { MappingTypeMapping, MappingProperty } from '@elastic/elasticsearch/lib/api/types';
|
||||
import { internalElserInferenceId } from '../../../../common/consts';
|
||||
import { isLegacySemanticTextVersion } from '../utils';
|
||||
|
||||
export const createIndex = async ({
|
||||
esClient,
|
||||
indexName,
|
||||
manifestVersion,
|
||||
mappings,
|
||||
log,
|
||||
}: {
|
||||
esClient: ElasticsearchClient;
|
||||
indexName: string;
|
||||
manifestVersion: string;
|
||||
mappings: MappingTypeMapping;
|
||||
log: Logger;
|
||||
}) => {
|
||||
log.debug(`Creating index ${indexName}`);
|
||||
|
||||
const legacySemanticText = isLegacySemanticTextVersion(manifestVersion);
|
||||
|
||||
overrideInferenceId(mappings, internalElserInferenceId);
|
||||
|
||||
await esClient.indices.create({
|
||||
|
@ -31,6 +36,7 @@ export const createIndex = async ({
|
|||
settings: {
|
||||
number_of_shards: 1,
|
||||
auto_expand_replicas: '0-1',
|
||||
'index.mapping.semantic_text.use_legacy_format': legacySemanticText,
|
||||
},
|
||||
});
|
||||
};
|
||||
|
|
|
@ -8,10 +8,13 @@
|
|||
import { times } from 'lodash';
|
||||
import { loggerMock, type MockedLogger } from '@kbn/logging-mocks';
|
||||
import { elasticsearchServiceMock } from '@kbn/core/server/mocks';
|
||||
import { LATEST_MANIFEST_FORMAT_VERSION } from '@kbn/product-doc-common';
|
||||
import { internalElserInferenceId } from '../../../../common/consts';
|
||||
import type { ZipArchive } from '../utils/zip_archive';
|
||||
import { populateIndex } from './populate_index';
|
||||
|
||||
const LEGACY_SEMANTIC_TEXT_VERSION = '1.0.0';
|
||||
|
||||
const createMockArchive = (entries: Record<string, string>): ZipArchive => {
|
||||
return {
|
||||
hasEntry: (entryPath) => Object.keys(entries).includes(entryPath),
|
||||
|
@ -44,6 +47,7 @@ describe('populateIndex', () => {
|
|||
|
||||
await populateIndex({
|
||||
indexName: '.foo',
|
||||
manifestVersion: LATEST_MANIFEST_FORMAT_VERSION,
|
||||
archive,
|
||||
log,
|
||||
esClient,
|
||||
|
@ -59,6 +63,7 @@ describe('populateIndex', () => {
|
|||
|
||||
await populateIndex({
|
||||
indexName: '.foo',
|
||||
manifestVersion: LATEST_MANIFEST_FORMAT_VERSION,
|
||||
archive,
|
||||
log,
|
||||
esClient,
|
||||
|
@ -77,6 +82,47 @@ describe('populateIndex', () => {
|
|||
});
|
||||
|
||||
it('rewrites the inference_id of semantic fields', async () => {
|
||||
const archive = createMockArchive({
|
||||
'content/content-0.ndjson': JSON.stringify({
|
||||
semantic: 'foo',
|
||||
_inference_fields: {
|
||||
semantic: {
|
||||
inference: {
|
||||
inference_id: '.some-inference',
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
await populateIndex({
|
||||
indexName: '.foo',
|
||||
manifestVersion: LATEST_MANIFEST_FORMAT_VERSION,
|
||||
archive,
|
||||
log,
|
||||
esClient,
|
||||
});
|
||||
|
||||
expect(esClient.bulk).toHaveBeenCalledTimes(1);
|
||||
expect(esClient.bulk).toHaveBeenCalledWith({
|
||||
refresh: false,
|
||||
operations: [
|
||||
{ index: { _index: '.foo' } },
|
||||
{
|
||||
semantic: 'foo',
|
||||
_inference_fields: {
|
||||
semantic: {
|
||||
inference: {
|
||||
inference_id: internalElserInferenceId,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('rewrites the inference_id of semantic fields for legacy semantic_field', async () => {
|
||||
const archive = createMockArchive({
|
||||
'content/content-0.ndjson': JSON.stringify({
|
||||
semantic: { text: 'foo', inference: { inference_id: '.some-inference' } },
|
||||
|
@ -85,6 +131,7 @@ describe('populateIndex', () => {
|
|||
|
||||
await populateIndex({
|
||||
indexName: '.foo',
|
||||
manifestVersion: LEGACY_SEMANTIC_TEXT_VERSION,
|
||||
archive,
|
||||
log,
|
||||
esClient,
|
||||
|
|
|
@ -11,27 +11,32 @@ import type { ElasticsearchClient } from '@kbn/core/server';
|
|||
import { isArtifactContentFilePath } from '@kbn/product-doc-common';
|
||||
import { internalElserInferenceId } from '../../../../common/consts';
|
||||
import type { ZipArchive } from '../utils/zip_archive';
|
||||
import { isLegacySemanticTextVersion } from '../utils';
|
||||
|
||||
export const populateIndex = async ({
|
||||
esClient,
|
||||
indexName,
|
||||
manifestVersion,
|
||||
archive,
|
||||
log,
|
||||
}: {
|
||||
esClient: ElasticsearchClient;
|
||||
indexName: string;
|
||||
manifestVersion: string;
|
||||
archive: ZipArchive;
|
||||
log: Logger;
|
||||
}) => {
|
||||
log.debug(`Starting populating index ${indexName}`);
|
||||
|
||||
const legacySemanticText = isLegacySemanticTextVersion(manifestVersion);
|
||||
|
||||
const contentEntries = archive.getEntryPaths().filter(isArtifactContentFilePath);
|
||||
|
||||
for (let i = 0; i < contentEntries.length; i++) {
|
||||
const entryPath = contentEntries[i];
|
||||
log.debug(`Indexing content for entry ${entryPath}`);
|
||||
const contentBuffer = await archive.getEntryContent(entryPath);
|
||||
await indexContentFile({ indexName, esClient, contentBuffer });
|
||||
await indexContentFile({ indexName, esClient, contentBuffer, legacySemanticText });
|
||||
}
|
||||
|
||||
log.debug(`Done populating index ${indexName}`);
|
||||
|
@ -41,10 +46,12 @@ const indexContentFile = async ({
|
|||
indexName,
|
||||
contentBuffer,
|
||||
esClient,
|
||||
legacySemanticText,
|
||||
}: {
|
||||
indexName: string;
|
||||
contentBuffer: Buffer;
|
||||
esClient: ElasticsearchClient;
|
||||
legacySemanticText: boolean;
|
||||
}) => {
|
||||
const fileContent = contentBuffer.toString('utf-8');
|
||||
const lines = fileContent.split('\n');
|
||||
|
@ -55,7 +62,13 @@ const indexContentFile = async ({
|
|||
.map((line) => {
|
||||
return JSON.parse(line);
|
||||
})
|
||||
.map((doc) => rewriteInferenceId(doc, internalElserInferenceId));
|
||||
.map((doc) =>
|
||||
rewriteInferenceId({
|
||||
document: doc,
|
||||
inferenceId: internalElserInferenceId,
|
||||
legacySemanticText,
|
||||
})
|
||||
);
|
||||
|
||||
const operations = documents.reduce((ops, document) => {
|
||||
ops!.push(...[{ index: { _index: indexName } }, document]);
|
||||
|
@ -73,9 +86,18 @@ const indexContentFile = async ({
|
|||
}
|
||||
};
|
||||
|
||||
const rewriteInferenceId = (document: Record<string, any>, inferenceId: string) => {
|
||||
const rewriteInferenceId = ({
|
||||
document,
|
||||
inferenceId,
|
||||
legacySemanticText,
|
||||
}: {
|
||||
document: Record<string, any>;
|
||||
inferenceId: string;
|
||||
legacySemanticText: boolean;
|
||||
}) => {
|
||||
const semanticFieldsRoot = legacySemanticText ? document : document._inference_fields;
|
||||
// we don't need to handle nested fields, we don't have any and won't.
|
||||
Object.values(document).forEach((field) => {
|
||||
Object.values(semanticFieldsRoot ?? {}).forEach((field: any) => {
|
||||
if (field.inference) {
|
||||
field.inference.inference_id = inferenceId;
|
||||
}
|
||||
|
|
|
@ -9,3 +9,4 @@ export { downloadToDisk } from './download';
|
|||
export { openZipArchive, type ZipArchive } from './zip_archive';
|
||||
export { loadManifestFile, loadMappingFile } from './archive_accessors';
|
||||
export { ensureDefaultElserDeployed } from './ensure_default_elser_deployed';
|
||||
export { isLegacySemanticTextVersion } from './manifest_versions';
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { isLegacySemanticTextVersion } from './manifest_versions';
|
||||
|
||||
describe('isLegacySemanticTextVersion', () => {
|
||||
it('returns true for version 1.0.0', () => {
|
||||
expect(isLegacySemanticTextVersion('1.0.0')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for version 2.0.0 and higher', () => {
|
||||
expect(isLegacySemanticTextVersion('2.0.0')).toBe(false);
|
||||
expect(isLegacySemanticTextVersion('4.92.3')).toBe(false);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import Semver from 'semver';
|
||||
|
||||
/**
|
||||
* checks if the provided manifest version was a version where legacy semantic_text behavior was being used
|
||||
*/
|
||||
export const isLegacySemanticTextVersion = (manifestVersion: string): boolean => {
|
||||
return Semver.lte(manifestVersion, '1.0.0');
|
||||
};
|
|
@ -20,6 +20,31 @@ const createHit = (
|
|||
|
||||
describe('mapResult', () => {
|
||||
it('returns the expected shape', () => {
|
||||
const input = createHit({
|
||||
content_title: 'content_title',
|
||||
content_body: 'content_body',
|
||||
product_name: 'kibana',
|
||||
root_type: 'documentation',
|
||||
slug: 'foo.html',
|
||||
url: 'http://lost.com/foo.html',
|
||||
version: '8.16',
|
||||
ai_subtitle: 'ai_subtitle',
|
||||
ai_summary: 'ai_summary',
|
||||
ai_questions_answered: ['question A'],
|
||||
ai_tags: ['foo', 'bar', 'test'],
|
||||
});
|
||||
|
||||
const output = mapResult(input);
|
||||
|
||||
expect(output).toEqual({
|
||||
content: 'content_body',
|
||||
productName: 'kibana',
|
||||
title: 'content_title',
|
||||
url: 'http://lost.com/foo.html',
|
||||
});
|
||||
});
|
||||
|
||||
it('returns the expected shape for legacy semantic_text fields', () => {
|
||||
const input = createHit({
|
||||
content_title: 'content_title',
|
||||
content_body: { text: 'content_body' },
|
||||
|
|
|
@ -10,9 +10,10 @@ import type { ProductDocumentationAttributes } from '@kbn/product-doc-common';
|
|||
import type { DocSearchResult } from '../types';
|
||||
|
||||
export const mapResult = (docHit: SearchHit<ProductDocumentationAttributes>): DocSearchResult => {
|
||||
const content = docHit._source!.content_body;
|
||||
return {
|
||||
title: docHit._source!.content_title,
|
||||
content: docHit._source!.content_body.text,
|
||||
content: typeof content === 'string' ? content : content.text,
|
||||
url: docHit._source!.url,
|
||||
productName: docHit._source!.product_name,
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue