[ML] File upload: Adds support for PDF files (#186956)

Also txt, rtf, doc, docx, xls, xlsx, ppt, pptx, odt, ods, and odp.

Adds the ability to automatically add a semantic text field to the
mappings and a `copy_to` processor to duplicate the field. This is
needed for the mappings generated for the attachment processor which
adds a nested `attachment.content` field which cannot be used as a
semantic text field.

After a successful import, a link to Search's Playground app is shown.
Navigating there lets the user instantly query the newly uploaded file.


https://github.com/user-attachments/assets/09b20a5f-0e02-47fa-885e-0ed21374cc60

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com>
This commit is contained in:
James Gowdy 2024-08-22 11:14:59 +01:00 committed by GitHub
parent c5b38e487a
commit 3177b037d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
50 changed files with 1658 additions and 507 deletions

View file

@ -28,7 +28,7 @@ export const FILE_FORMATS = {
DELIMITED: 'delimited',
NDJSON: 'ndjson',
SEMI_STRUCTURED_TEXT: 'semi_structured_text',
// XML: 'xml',
TIKA: 'tika',
};
export const SUPPORTED_FIELD_TYPES = {

View file

@ -0,0 +1,90 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { i18n } from '@kbn/i18n';
export function isTikaType(type: string) {
return getTikaDisplayType(type).isTikaType;
}
export const getTikaDisplayType = (type: string): { isTikaType: boolean; label: string } => {
switch (type) {
case 'application/doc':
case 'application/ms-doc':
case 'application/msword':
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
return {
isTikaType: true,
label: i18n.translate('xpack.dataVisualizer.file.tikaTypes.word', {
defaultMessage: 'Microsoft Office Word document',
}),
};
case 'application/excel':
case 'application/vnd.ms-excel':
case 'application/x-excel':
case 'application/x-msexcel':
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
return {
isTikaType: true,
label: i18n.translate('xpack.dataVisualizer.file.tikaTypes.excel', {
defaultMessage: 'Microsoft Office Excel document',
}),
};
case 'application/mspowerpoint':
case 'application/powerpoint':
case 'application/vnd.ms-powerpoint':
case 'application/x-mspowerpoint':
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
return {
isTikaType: true,
label: i18n.translate('xpack.dataVisualizer.file.tikaTypes.powerPoint', {
defaultMessage: 'Microsoft Office Power Point document',
}),
};
case 'application/vnd.oasis.opendocument.presentation':
case 'application/vnd.oasis.opendocument.spreadsheet':
case 'application/vnd.oasis.opendocument.text':
return {
isTikaType: true,
label: i18n.translate('xpack.dataVisualizer.file.tikaTypes.openDoc', {
defaultMessage: 'Open Document Format',
}),
};
case 'text/rtf':
case 'application/rtf':
return {
isTikaType: true,
label: i18n.translate('xpack.dataVisualizer.file.tikaTypes.richText', {
defaultMessage: 'Rich Text Format',
}),
};
case 'application/pdf':
return {
isTikaType: true,
label: i18n.translate('xpack.dataVisualizer.file.tikaTypes.pdf', {
defaultMessage: 'PDF',
}),
};
case 'text/plain':
case 'text/plain; charset=UTF-8':
return {
isTikaType: true,
label: i18n.translate('xpack.dataVisualizer.file.tikaTypes.plainText', {
defaultMessage: 'Plain text',
}),
};
default:
return { isTikaType: false, label: type };
}
};

View file

@ -8,6 +8,7 @@
import React from 'react';
import { EuiText } from '@elastic/eui';
import { ES_FIELD_TYPES } from '@kbn/field-types';
import type { CombinedField } from './types';
export function CombinedFieldLabel({ combinedField }: { combinedField: CombinedField }) {
@ -15,7 +16,11 @@ export function CombinedFieldLabel({ combinedField }: { combinedField: CombinedF
}
function getCombinedFieldLabel(combinedField: CombinedField) {
return `${combinedField.fieldNames.join(combinedField.delimiter)} => ${
combinedField.combinedFieldName
} (${combinedField.mappingType})`;
if (combinedField.mappingType === ES_FIELD_TYPES.GEO_POINT) {
return `${combinedField.fieldNames.join(combinedField.delimiter)} => ${
combinedField.combinedFieldName
} (${combinedField.mappingType})`;
}
return combinedField.combinedFieldName;
}

View file

@ -19,17 +19,13 @@ import {
EuiFlexItem,
} from '@elastic/eui';
import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common';
import type { FindFileStructureResponse, IngestPipeline } from '@kbn/file-upload-plugin/common';
import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import type { CombinedField } from './types';
import { GeoPointForm } from './geo_point';
import { SemanticTextForm } from './semantic_text';
import { CombinedFieldLabel } from './combined_field_label';
import {
addCombinedFieldsToMappings,
addCombinedFieldsToPipeline,
getNameCollisionMsg,
removeCombinedFieldsFromMappings,
removeCombinedFieldsFromPipeline,
} from './utils';
import { removeCombinedFieldsFromMappings, removeCombinedFieldsFromPipeline } from './utils';
interface Props {
mappingsString: string;
@ -46,6 +42,12 @@ interface State {
isPopoverOpen: boolean;
}
export type AddCombinedField = (
combinedField: CombinedField,
addToMappings: (mappings: MappingTypeMapping) => MappingTypeMapping,
addToPipeline: (pipeline: IngestPipeline) => IngestPipeline
) => void;
export class CombinedFieldsForm extends Component<Props, State> {
state: State = {
isPopoverOpen: false,
@ -63,20 +65,20 @@ export class CombinedFieldsForm extends Component<Props, State> {
});
};
addCombinedField = (combinedField: CombinedField) => {
if (this.hasNameCollision(combinedField.combinedFieldName)) {
throw new Error(getNameCollisionMsg(combinedField.combinedFieldName));
}
addCombinedField = (
combinedField: CombinedField,
addToMappings: (mappings: MappingTypeMapping) => {},
addToPipeline: (pipeline: IngestPipeline) => {}
) => {
const mappings = this.parseMappings();
const pipeline = this.parsePipeline();
this.props.onMappingsStringChange(
JSON.stringify(addCombinedFieldsToMappings(mappings, [combinedField]), null, 2)
);
this.props.onPipelineStringChange(
JSON.stringify(addCombinedFieldsToPipeline(pipeline, [combinedField]), null, 2)
);
const newMappings = addToMappings(mappings);
const newPipeline = addToPipeline(pipeline);
this.props.onMappingsStringChange(JSON.stringify(newMappings, null, 2));
this.props.onPipelineStringChange(JSON.stringify(newPipeline, null, 2));
this.props.onCombinedFieldsChange([...this.props.combinedFields, combinedField]);
this.closePopover();
@ -155,6 +157,13 @@ export class CombinedFieldsForm extends Component<Props, State> {
defaultMessage: 'Add geo point field',
}
);
const semanticTextLabel = i18n.translate(
'xpack.dataVisualizer.file.semanticTextForm.combinedFieldLabel',
{
defaultMessage: 'Add semantic text field',
}
);
const panels = [
{
id: 0,
@ -163,6 +172,10 @@ export class CombinedFieldsForm extends Component<Props, State> {
name: geoPointLabel,
panel: 1,
},
{
name: semanticTextLabel,
panel: 2,
},
],
},
{
@ -176,11 +189,22 @@ export class CombinedFieldsForm extends Component<Props, State> {
/>
),
},
{
id: 2,
title: semanticTextLabel,
content: (
<SemanticTextForm
addCombinedField={this.addCombinedField}
hasNameCollision={this.hasNameCollision}
results={this.props.results}
/>
),
},
];
return (
<EuiFormRow
label={i18n.translate('xpack.dataVisualizer.combinedFieldsLabel', {
defaultMessage: 'Combined fields',
defaultMessage: 'Automatically created fields',
})}
>
<div>
@ -217,7 +241,7 @@ export class CombinedFieldsForm extends Component<Props, State> {
>
<FormattedMessage
id="xpack.dataVisualizer.addCombinedFieldsLabel"
defaultMessage="Add combined field"
defaultMessage="Add additional field"
/>
</EuiButtonEmpty>
}

View file

@ -23,17 +23,19 @@ import {
} from '@elastic/eui';
import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common';
import type { CombinedField } from './types';
import {
createGeoPointCombinedField,
isWithinLatRange,
isWithinLonRange,
getFieldNames,
getNameCollisionMsg,
addCombinedFieldsToMappings,
addCombinedFieldsToPipeline,
} from './utils';
import type { AddCombinedField } from './combined_fields_form';
interface Props {
addCombinedField: (combinedField: CombinedField) => void;
addCombinedField: AddCombinedField;
hasNameCollision: (name: string) => boolean;
results: FindFileStructureResponse;
}
@ -99,13 +101,18 @@ export class GeoPointForm extends Component<Props, State> {
onSubmit = () => {
try {
this.props.addCombinedField(
createGeoPointCombinedField(
this.state.latField,
this.state.lonField,
this.state.geoPointField
)
const combinedField = createGeoPointCombinedField(
this.state.latField,
this.state.lonField,
this.state.geoPointField
);
this.props.addCombinedField(
combinedField,
(mappings) => addCombinedFieldsToMappings(mappings, [combinedField]),
(pipeline) => addCombinedFieldsToPipeline(pipeline, [combinedField])
);
this.setState({ submitError: '' });
} catch (error) {
this.setState({ submitError: error.message });

View file

@ -0,0 +1,210 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import React, { useState, useEffect, useMemo } from 'react';
import type { FC } from 'react';
import type {
FindFileStructureResponse,
IngestPipeline,
} from '@kbn/file-upload-plugin/common/types';
import type { EuiSelectOption } from '@elastic/eui';
import {
EuiButton,
EuiFormRow,
EuiSelect,
EuiSpacer,
EuiTextAlign,
EuiFieldText,
} from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { FormattedMessage } from '@kbn/i18n-react';
import { cloneDeep } from 'lodash';
import useDebounce from 'react-use/lib/useDebounce';
import type {
InferenceModelConfigContainer,
MappingTypeMapping,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { createSemanticTextCombinedField, getFieldNames, getNameCollisionMsg } from './utils';
import { useDataVisualizerKibana } from '../../../kibana_context';
import type { AddCombinedField } from './combined_fields_form';
interface Props {
addCombinedField: AddCombinedField;
hasNameCollision: (name: string) => boolean;
results: FindFileStructureResponse;
}
export const SemanticTextForm: FC<Props> = ({ addCombinedField, hasNameCollision, results }) => {
const {
services: { http },
} = useDataVisualizerKibana();
const [inferenceServices, setInferenceServices] = useState<EuiSelectOption[]>([]);
const [selectedInference, setSelectedInference] = useState<string | undefined>();
const [selectedFieldOption, setSelectedFieldOption] = useState<string | undefined>();
const [renameToFieldOption, setRenameToFieldOption] = useState<string>('');
const [fieldError, setFieldError] = useState<string | undefined>();
const fieldOptions = useMemo(
() =>
getFieldNames(results).map((columnName: string) => {
return { value: columnName, text: columnName };
}),
[results]
);
useEffect(() => {
setSelectedFieldOption(fieldOptions[0].value ?? null);
}, [fieldOptions]);
useEffect(() => {
http
.fetch<InferenceModelConfigContainer[]>('/internal/data_visualizer/inference_services', {
method: 'GET',
version: '1',
})
.then((response) => {
const inferenceServiceOptions = response.map((service) => ({
value: service.model_id,
text: service.model_id,
}));
setInferenceServices(inferenceServiceOptions);
setSelectedInference(inferenceServiceOptions[0]?.value ?? undefined);
});
}, [http]);
useEffect(() => {
if (selectedFieldOption?.includes('.')) {
setRenameToFieldOption(selectedFieldOption.split('.').pop()!);
} else {
setRenameToFieldOption(`${selectedFieldOption}_semantic`);
}
}, [selectedFieldOption]);
const onSubmit = () => {
if (
renameToFieldOption === '' ||
renameToFieldOption === undefined ||
selectedFieldOption === undefined ||
selectedInference === undefined
) {
return;
}
addCombinedField(
createSemanticTextCombinedField(renameToFieldOption, selectedFieldOption),
(mappings: MappingTypeMapping) => {
if (renameToFieldOption === undefined || selectedFieldOption === undefined) {
return mappings;
}
const newMappings = cloneDeep(mappings);
newMappings.properties![renameToFieldOption ?? selectedFieldOption] = {
// @ts-ignore types are missing semantic_text
type: 'semantic_text',
inference_id: selectedInference,
};
return newMappings;
},
(pipeline: IngestPipeline) => {
const newPipeline = cloneDeep(pipeline);
if (renameToFieldOption !== null) {
newPipeline.processors.push({
set: {
field: renameToFieldOption,
copy_from: selectedFieldOption,
},
});
}
return newPipeline;
}
);
};
useDebounce(
() => {
if (renameToFieldOption === undefined) {
return;
}
const error = hasNameCollision(renameToFieldOption)
? getNameCollisionMsg(renameToFieldOption)
: undefined;
setFieldError(error);
},
250,
[renameToFieldOption]
);
const isInvalid = useMemo(() => {
return (
!selectedInference ||
!selectedFieldOption ||
renameToFieldOption === '' ||
fieldError !== undefined
);
}, [selectedInference, selectedFieldOption, renameToFieldOption, fieldError]);
return (
<>
<EuiSpacer size="s" />
<EuiFormRow
label={i18n.translate('xpack.dataVisualizer.file.semanticTextForm.fieldLabel', {
defaultMessage: 'Field',
})}
>
<EuiSelect
options={fieldOptions}
value={selectedFieldOption}
onChange={(e) => setSelectedFieldOption(e.target.value)}
/>
</EuiFormRow>
{renameToFieldOption !== null ? (
<EuiFormRow
label={i18n.translate('xpack.dataVisualizer.file.semanticTextForm.copyFieldLabel', {
defaultMessage: 'Copy to field',
})}
isInvalid={fieldError !== undefined}
error={[fieldError]}
>
<EuiFieldText
placeholder={i18n.translate(
'xpack.dataVisualizer.file.semanticTextForm.copyFieldLabel.placeholder',
{
defaultMessage: 'Field name',
}
)}
value={renameToFieldOption}
onChange={(e) => setRenameToFieldOption(e.target.value)}
aria-label="field name"
/>
</EuiFormRow>
) : null}
<EuiFormRow
label={i18n.translate('xpack.dataVisualizer.file.semanticTextForm.inferenceLabel', {
defaultMessage: 'Inference service',
})}
>
<EuiSelect
options={inferenceServices}
value={selectedInference}
onChange={(e) => setSelectedInference(e.target.value)}
/>
</EuiFormRow>
<EuiSpacer size="s" />
<EuiTextAlign textAlign="right">
<EuiButton size="s" fill disabled={isInvalid} onClick={onSubmit}>
<FormattedMessage
id="xpack.dataVisualizer.file.geoPointForm.submitButtonLabel"
defaultMessage="Add"
/>
</EuiButton>
</EuiTextAlign>
</>
);
};

View file

@ -7,7 +7,7 @@
export interface CombinedField {
mappingType: string;
delimiter: string;
delimiter?: string;
combinedFieldName: string;
fieldNames: string[];
}

View file

@ -123,6 +123,17 @@ export function createGeoPointCombinedField(
};
}
export function createSemanticTextCombinedField(
sematicTextField: string,
originalField: string
): CombinedField {
return {
mappingType: 'semantic_text',
combinedFieldName: sematicTextField,
fieldNames: [originalField],
};
}
export function getNameCollisionMsg(name: string) {
return i18n.translate('xpack.dataVisualizer.nameCollisionMsg', {
defaultMessage: '"{name}" already exists, please provide a unique name',

View file

@ -11,7 +11,7 @@ import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common';
export function createFilebeatConfig(
index: string,
results: FindFileStructureResponse,
ingestPipelineId: string,
pipelineId: string,
username: string | null
) {
return [
@ -27,7 +27,7 @@ export function createFilebeatConfig(
' hosts: ["<es_url>"]',
...getUserDetails(username),
` index: "${index}"`,
` pipeline: "${ingestPipelineId}"`,
` pipeline: "${pipelineId}"`,
'',
'setup:',
' template.enabled: false',

View file

@ -34,15 +34,10 @@ export enum EDITOR_MODE {
interface Props {
index: string;
results: FindFileStructureResponse;
ingestPipelineId: string;
pipelineId: string;
closeFlyout(): void;
}
export const FilebeatConfigFlyout: FC<Props> = ({
index,
results,
ingestPipelineId,
closeFlyout,
}) => {
export const FilebeatConfigFlyout: FC<Props> = ({ index, results, pipelineId, closeFlyout }) => {
const [fileBeatConfig, setFileBeatConfig] = useState('');
const [username, setUsername] = useState<string | null>(null);
const {
@ -56,9 +51,9 @@ export const FilebeatConfigFlyout: FC<Props> = ({
}, [security]);
useEffect(() => {
const config = createFilebeatConfig(index, results, ingestPipelineId, username);
const config = createFilebeatConfig(index, results, pipelineId, username);
setFileBeatConfig(config);
}, [username, index, ingestPipelineId, results]);
}, [username, index, pipelineId, results]);
return (
<EuiFlyout onClose={closeFlyout} hideCloseButton size={'m'} ownFocus={false}>

View file

@ -19,6 +19,7 @@ import { isDefined } from '@kbn/ml-is-defined';
import type { ResultLinks } from '../../../../../common/app';
import type { LinkCardProps } from '../link_card/link_card';
import { useDataVisualizerKibana } from '../../../kibana_context';
import type { CombinedField } from '../combined_fields/types';
type LinkType = 'file' | 'index';
@ -44,7 +45,7 @@ export interface ResultLink {
}
interface Props {
fieldStats: FindFileStructureResponse['field_stats'];
results: FindFileStructureResponse;
index: string;
dataViewId: string;
timeFieldName?: string;
@ -52,6 +53,7 @@ interface Props {
showFilebeatFlyout(): void;
getAdditionalLinks?: GetAdditionalLinks;
resultLinks?: ResultLinks;
combinedFields: CombinedField[];
}
interface GlobalState {
@ -62,7 +64,7 @@ interface GlobalState {
const RECHECK_DELAY_MS = 3000;
export const ResultsLinks: FC<Props> = ({
fieldStats,
results,
index,
dataViewId,
timeFieldName,
@ -70,6 +72,7 @@ export const ResultsLinks: FC<Props> = ({
showFilebeatFlyout,
getAdditionalLinks,
resultLinks,
combinedFields,
}) => {
const {
services: {
@ -78,7 +81,7 @@ export const ResultsLinks: FC<Props> = ({
application: { getUrlForApp, capabilities },
},
} = useDataVisualizerKibana();
const fieldStats = results.field_stats;
const [duration, setDuration] = useState({
from: 'now-30m',
to: 'now',
@ -88,6 +91,7 @@ export const ResultsLinks: FC<Props> = ({
const [discoverLink, setDiscoverLink] = useState('');
const [indexManagementLink, setIndexManagementLink] = useState('');
const [dataViewsManagementLink, setDataViewsManagementLink] = useState('');
const [playgroundLink, setPlaygroundLink] = useState('');
const [asyncHrefCards, setAsyncHrefCards] = useState<LinkCardProps[]>();
useEffect(() => {
@ -96,7 +100,7 @@ export const ResultsLinks: FC<Props> = ({
const getDiscoverUrl = async (): Promise<void> => {
const isDiscoverAvailable = capabilities.discover?.show ?? false;
if (!isDiscoverAvailable) return;
const discoverLocator = url?.locators.get('DISCOVER_APP_LOCATOR');
const discoverLocator = url.locators.get('DISCOVER_APP_LOCATOR');
if (!discoverLocator) {
// eslint-disable-next-line no-console
@ -116,13 +120,13 @@ export const ResultsLinks: FC<Props> = ({
if (Array.isArray(getAdditionalLinks)) {
Promise.all(
getAdditionalLinks.map(async (asyncCardGetter) => {
const results = await asyncCardGetter({
const cardResults = await asyncCardGetter({
dataViewId,
globalState,
});
if (Array.isArray(results)) {
if (Array.isArray(cardResults)) {
return await Promise.all(
results.map(async (c) => ({
cardResults.map(async (c) => ({
...c,
canDisplay: await c.canDisplay(),
href: await c.getUrl(),
@ -140,6 +144,12 @@ export const ResultsLinks: FC<Props> = ({
}
if (!unmounted) {
const playgroundLocator = url.locators.get('PLAYGROUND_LOCATOR_ID');
if (playgroundLocator !== undefined) {
playgroundLocator.getUrl({ 'default-index': index }).then(setPlaygroundLink);
}
setIndexManagementLink(
getUrlForApp('management', { path: '/data/index_management/indices' })
);
@ -228,7 +238,6 @@ export const ResultsLinks: FC<Props> = ({
/>
</EuiFlexItem>
)}
{indexManagementLink && (
<EuiFlexItem>
<EuiCard
@ -245,7 +254,6 @@ export const ResultsLinks: FC<Props> = ({
/>
</EuiFlexItem>
)}
{dataViewsManagementLink && (
<EuiFlexItem>
<EuiCard
@ -262,7 +270,6 @@ export const ResultsLinks: FC<Props> = ({
/>
</EuiFlexItem>
)}
{resultLinks?.fileBeat?.enabled === false ? null : (
<EuiFlexItem>
<EuiCard
@ -281,6 +288,24 @@ export const ResultsLinks: FC<Props> = ({
</EuiFlexItem>
)}
{playgroundLink ? (
<EuiFlexItem>
<EuiCard
hasBorder
icon={<EuiIcon size="xxl" type={`logoEnterpriseSearch`} />}
data-test-subj="fileDataVisFilebeatConfigLink"
title={
<FormattedMessage
id="xpack.dataVisualizer.file.resultsLinks.playground"
defaultMessage="Playground"
/>
}
description=""
href={playgroundLink}
/>
</EuiFlexItem>
) : null}
{Array.isArray(asyncHrefCards) &&
asyncHrefCards.map((link) => (
<EuiFlexItem key={link.title}>

View file

@ -26,7 +26,7 @@ const overrideDefaults = {
linesToSample: undefined,
};
export function readFile(file: File) {
export function readFile(file: File): Promise<{ fileContents: string; data: ArrayBuffer }> {
return new Promise((resolve, reject) => {
if (file && file.size) {
const reader = new FileReader();

View file

@ -34,10 +34,11 @@ interface Props {
export const WelcomeContent: FC<Props> = ({ hasPermissionToImport }) => {
const {
services: {
fileUpload: { getMaxBytesFormatted },
fileUpload: { getMaxBytesFormatted, getMaxTikaBytesFormatted },
},
} = useDataVisualizerKibana();
const maxFileSize = getMaxBytesFormatted();
const maxTikaFileSize = getMaxTikaBytesFormatted();
return (
<EuiFlexGroup gutterSize="xl" alignItems="center">
@ -57,10 +58,17 @@ export const WelcomeContent: FC<Props> = ({ hasPermissionToImport }) => {
<EuiText>
<p>
{hasPermissionToImport ? (
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.visualizeAndImportDataFromLogFileDescription"
defaultMessage="Upload your file, analyze its data, and optionally import the data into an Elasticsearch index."
/>
<>
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.visualizeAndImportDataFromLogFileDescription"
defaultMessage="Upload your file, analyze its data, and optionally import the data into an index."
/>
<br />
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.visualizeAndImportDataFromLogFileDescription2"
defaultMessage="The data can also be automatically vectorized using semantic text."
/>
</>
) : (
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.visualizeDataFromLogFileDescription"
@ -73,12 +81,89 @@ export const WelcomeContent: FC<Props> = ({ hasPermissionToImport }) => {
<EuiText>
<p>
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.supportedFileFormatDescription"
defaultMessage="The following file formats are supported:"
id="xpack.dataVisualizer.file.welcomeContent.supportedTikaFileFormatDescription"
defaultMessage="The following file formats are supported up to {maxTikaFileSize}:"
values={{ maxTikaFileSize }}
/>
</p>
</EuiText>
<EuiSpacer size="s" />
<EuiFlexGroup gutterSize="m">
<EuiFlexItem grow={false} css={docIconStyle}>
<EuiIcon size="m" type="document" />
</EuiFlexItem>
<EuiFlexItem>
<EuiText>
<p>
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.pdfDescription"
defaultMessage="PDF"
/>
</p>
</EuiText>
</EuiFlexItem>
</EuiFlexGroup>
<EuiSpacer size="s" />
<EuiFlexGroup gutterSize="m">
<EuiFlexItem grow={false} css={docIconStyle}>
<EuiIcon size="m" type="document" />
</EuiFlexItem>
<EuiFlexItem>
<EuiText>
<p>
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.msFilesDescription"
defaultMessage="Microsoft Office Word, Excel, and PowerPoint"
/>
</p>
</EuiText>
</EuiFlexItem>
</EuiFlexGroup>
<EuiSpacer size="s" />
<EuiFlexGroup gutterSize="m">
<EuiFlexItem grow={false} css={docIconStyle}>
<EuiIcon size="m" type="document" />
</EuiFlexItem>
<EuiFlexItem>
<EuiText>
<p>
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.plainAndRichTextFilesDescription"
defaultMessage="Plain text and Rich Text Format"
/>
</p>
</EuiText>
</EuiFlexItem>
</EuiFlexGroup>
<EuiSpacer size="s" />
<EuiFlexGroup gutterSize="m">
<EuiFlexItem grow={false} css={docIconStyle}>
<EuiIcon size="m" type="document" />
</EuiFlexItem>
<EuiFlexItem>
<EuiText>
<p>
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.openDocFilesDescription"
defaultMessage="Open Document Format"
/>
</p>
</EuiText>
</EuiFlexItem>
</EuiFlexGroup>
<EuiSpacer size="m" />
<EuiText>
<p>
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.supportedFileFormatDescription"
defaultMessage="The following file formats are supported up to {maxFileSize}:"
values={{ maxFileSize }}
/>
</p>
</EuiText>
<EuiSpacer size="s" />
<EuiFlexGroup gutterSize="m">
<EuiFlexItem grow={false} css={docIconStyle}>
<EuiIcon size="m" type="document" />
@ -126,16 +211,6 @@ export const WelcomeContent: FC<Props> = ({ hasPermissionToImport }) => {
</EuiText>
</EuiFlexItem>
</EuiFlexGroup>
<EuiSpacer size="m" />
<EuiText>
<p>
<FormattedMessage
id="xpack.dataVisualizer.file.welcomeContent.uploadedFilesAllowedSizeDescription"
defaultMessage="You can upload files up to {maxFileSize}."
values={{ maxFileSize }}
/>
</p>
</EuiText>
</EuiFlexItem>
</EuiFlexGroup>
);

View file

@ -11,6 +11,7 @@ import React from 'react';
import { EuiTitle, EuiSpacer, EuiDescriptionList } from '@elastic/eui';
import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common';
import { getTikaDisplayType } from '../../../../../common/utils/tika_utils';
import { FILE_FORMATS } from '../../../../../common/constants';
export const AnalysisSummary: FC<{ results: FindFileStructureResponse }> = ({ results }) => {
@ -64,7 +65,7 @@ function createDisplayItems(results: FindFileStructureResponse) {
defaultMessage="Format"
/>
),
description: results.format,
description: getFormatLabel(results),
});
if (results.format === FILE_FORMATS.DELIMITED) {
@ -131,3 +132,9 @@ function createDisplayItems(results: FindFileStructureResponse) {
return items;
}
function getFormatLabel(results: FindFileStructureResponse) {
return results.format === FILE_FORMATS.TIKA && results.document_type !== undefined
? getTikaDisplayType(results.document_type).label
: results.format;
}

View file

@ -26,7 +26,7 @@ import { useGrokHighlighter } from './use_text_parser';
import { LINE_LIMIT } from './grok_highlighter';
interface Props {
data: string;
fileContents: string;
format: string;
numberOfLines: number;
semiStructureTextData: SemiStructureTextData | null;
@ -51,7 +51,12 @@ function semiStructureTextDataGuard(
);
}
export const FileContents: FC<Props> = ({ data, format, numberOfLines, semiStructureTextData }) => {
export const FileContents: FC<Props> = ({
fileContents,
format,
numberOfLines,
semiStructureTextData,
}) => {
let mode = EDITOR_MODE.TEXT;
if (format === EDITOR_MODE.JSON) {
mode = EDITOR_MODE.JSON;
@ -63,8 +68,8 @@ export const FileContents: FC<Props> = ({ data, format, numberOfLines, semiStruc
semiStructureTextDataGuard(semiStructureTextData)
);
const formattedData = useMemo(
() => limitByNumberOfLines(data, numberOfLines),
[data, numberOfLines]
() => limitByNumberOfLines(fileContents, numberOfLines),
[fileContents, numberOfLines]
);
const [highlightedLines, setHighlightedLines] = useState<JSX.Element[] | null>(null);
@ -78,7 +83,7 @@ export const FileContents: FC<Props> = ({ data, format, numberOfLines, semiStruc
semiStructureTextData!;
grokHighlighter(
data,
fileContents,
grokPattern!,
mappings,
ecsCompatibility,
@ -96,7 +101,7 @@ export const FileContents: FC<Props> = ({ data, format, numberOfLines, semiStruc
setIsSemiStructureTextData(false);
}
});
}, [data, semiStructureTextData, grokHighlighter, isSemiStructureTextData, isMounted]);
}, [fileContents, semiStructureTextData, grokHighlighter, isSemiStructureTextData, isMounted]);
return (
<>

View file

@ -0,0 +1,30 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { HttpSetup } from '@kbn/core-http-browser';
const URL = '/internal/file_upload/preview_pdf_contents';
export async function previewPDF(http: HttpSetup, data: ArrayBuffer) {
const dataString: string = [].reduce.call(
new Uint8Array(data),
(p, c) => {
return p + String.fromCharCode(c);
},
''
) as string;
const pdfBase64 = btoa(dataString);
const { preview } = await http.fetch<any>(URL, {
method: 'POST',
version: '1',
body: JSON.stringify({
pdfBase64,
}),
});
return preview;
}

View file

@ -28,8 +28,11 @@ import {
createUrlOverrides,
processResults,
} from '../../../common/components/utils';
import { analyzeTikaFile } from './tika_analyzer';
import { MODE } from './constants';
import { FileSizeChecker } from './file_size_check';
import { isTikaType } from '../../../../../common/utils/tika_utils';
export class FileDataVisualizerView extends Component {
constructor(props) {
@ -40,7 +43,7 @@ export class FileDataVisualizerView extends Component {
fileName: '',
fileContents: '',
data: [],
fileSize: 0,
base64Data: '',
fileTooLarge: false,
fileCouldNotBeRead: false,
serverError: null,
@ -60,8 +63,6 @@ export class FileDataVisualizerView extends Component {
this.originalSettings = {
linesToSample: DEFAULT_LINES_TO_SAMPLE,
};
this.maxFileUploadBytes = props.fileUpload.getMaxBytes();
}
async componentDidMount() {
@ -85,7 +86,6 @@ export class FileDataVisualizerView extends Component {
fileName: '',
fileContents: '',
data: [],
fileSize: 0,
fileTooLarge: false,
fileCouldNotBeRead: false,
fileCouldNotBeReadPermissionError: false,
@ -102,17 +102,25 @@ export class FileDataVisualizerView extends Component {
};
async loadFile(file) {
if (file.size <= this.maxFileUploadBytes) {
this.fileSizeChecker = new FileSizeChecker(this.props.fileUpload, file);
if (this.fileSizeChecker.check()) {
try {
const { data, fileContents } = await readFile(file);
this.setState({
data,
fileContents,
fileName: file.name,
fileSize: file.size,
});
if (isTikaType(file.type)) {
this.setState({
data,
fileName: file.name,
});
await this.analyzeFile(fileContents);
await this.analyzeTika(data);
} else {
this.setState({
data,
fileContents,
fileName: file.name,
});
await this.analyzeFile(fileContents);
}
} catch (error) {
this.setState({
loaded: false,
@ -126,7 +134,6 @@ export class FileDataVisualizerView extends Component {
loading: false,
fileTooLarge: true,
fileName: file.name,
fileSize: file.size,
});
}
}
@ -206,6 +213,21 @@ export class FileDataVisualizerView extends Component {
}
}
async analyzeTika(data, isRetry = false) {
const { tikaResults, standardResults } = await analyzeTikaFile(data, this.props.fileUpload);
const serverSettings = processResults(standardResults);
this.originalSettings = serverSettings;
this.setState({
fileContents: tikaResults.content,
results: standardResults.results,
explanation: standardResults.explanation,
loaded: true,
loading: false,
fileCouldNotBeRead: isRetry,
});
}
closeEditFlyout = () => {
this.setState({ isEditFlyoutVisible: false });
};
@ -258,7 +280,6 @@ export class FileDataVisualizerView extends Component {
fileContents,
data,
fileName,
fileSize,
fileTooLarge,
fileCouldNotBeRead,
serverError,
@ -287,9 +308,7 @@ export class FileDataVisualizerView extends Component {
{loading && <LoadingPanel />}
{fileTooLarge && (
<FileTooLarge fileSize={fileSize} maxFileSize={this.maxFileUploadBytes} />
)}
{fileTooLarge && <FileTooLarge fileSizeChecker={this.fileSizeChecker} />}
{fileCouldNotBeRead && loading === false && (
<>
@ -311,7 +330,7 @@ export class FileDataVisualizerView extends Component {
results={results}
explanation={explanation}
fileName={fileName}
data={fileContents}
fileContents={fileContents}
showEditFlyout={this.showEditFlyout}
showExplanationFlyout={this.showExplanationFlyout}
disableButtons={isEditFlyoutVisible || isExplanationFlyoutVisible}

View file

@ -11,18 +11,16 @@ import React from 'react';
import { EuiCallOut, EuiSpacer, EuiButtonEmpty, EuiHorizontalRule } from '@elastic/eui';
import numeral from '@elastic/numeral';
import type { FindFileStructureErrorResponse } from '@kbn/file-upload-plugin/common';
import { FILE_SIZE_DISPLAY_FORMAT } from '../../../../../common/constants';
import type { FileSizeChecker } from './file_size_check';
interface FileTooLargeProps {
fileSize: number;
maxFileSize: number;
fileSizeChecker: FileSizeChecker;
}
export const FileTooLarge: FC<FileTooLargeProps> = ({ fileSize, maxFileSize }) => {
const fileSizeFormatted = numeral(fileSize).format(FILE_SIZE_DISPLAY_FORMAT);
const maxFileSizeFormatted = numeral(maxFileSize).format(FILE_SIZE_DISPLAY_FORMAT);
export const FileTooLarge: FC<FileTooLargeProps> = ({ fileSizeChecker }) => {
const fileSizeFormatted = fileSizeChecker.fileSizeFormatted();
const maxFileSizeFormatted = fileSizeChecker.maxFileSizeFormatted();
// Format the byte values, using the second format if the difference between
// the file size and the max is so small that the formatted values are identical
@ -43,7 +41,7 @@ export const FileTooLarge: FC<FileTooLargeProps> = ({ fileSize, maxFileSize }) =
</p>
);
} else {
const diffFormatted = numeral(fileSize - maxFileSize).format(FILE_SIZE_DISPLAY_FORMAT);
const diffFormatted = fileSizeChecker.fileSizeDiffFormatted();
errorText = (
<p>
<FormattedMessage

View file

@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { FileUploadStartApi } from '@kbn/file-upload-plugin/public/api';
import numeral from '@elastic/numeral';
import { FILE_SIZE_DISPLAY_FORMAT } from '../../../../../common/constants';
import { isTikaType } from '../../../../../common/utils/tika_utils';
export class FileSizeChecker {
private _maxBytes: number;
private _fileSize: number;
constructor(fileUpload: FileUploadStartApi, file: File) {
this._fileSize = file.size;
this._maxBytes = isTikaType(file.type)
? fileUpload.getMaxTikaBytes()
: fileUpload.getMaxBytes();
}
public check(): boolean {
return this._fileSize <= this._maxBytes;
}
public maxBytes(): number {
return this._maxBytes;
}
public fileSizeFormatted(): string {
return numeral(this._fileSize).format(FILE_SIZE_DISPLAY_FORMAT);
}
public maxFileSizeFormatted(): string {
return numeral(this._maxBytes).format(FILE_SIZE_DISPLAY_FORMAT);
}
public fileSizeDiffFormatted(): string {
return numeral(this._fileSize - this._maxBytes).format(FILE_SIZE_DISPLAY_FORMAT);
}
}

View file

@ -0,0 +1,108 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { AnalysisResult, PreviewTikaResponse } from '@kbn/file-upload-plugin/common/types';
import type { FileUploadStartApi } from '@kbn/file-upload-plugin/public/api';
import { FILE_FORMATS } from '../../../../../common/constants';
export async function analyzeTikaFile(
data: ArrayBuffer,
fileUpload: FileUploadStartApi
): Promise<{ standardResults: AnalysisResult; tikaResults: PreviewTikaResponse }> {
const resp = await fileUpload.previewTikaFile(data);
const numLinesAnalyzed = (resp.content.match(/\n/g) || '').length + 1;
return {
tikaResults: resp,
standardResults: {
results: {
format: FILE_FORMATS.TIKA,
document_type: resp.content_type,
charset: 'utf-8',
has_header_row: false,
has_byte_order_marker: false,
sample_start: '',
quote: '',
delimiter: '',
need_client_timezone: false,
num_lines_analyzed: numLinesAnalyzed,
num_messages_analyzed: 0,
field_stats: {
// @ts-expect-error semantic_text not supported
'attachment.content': {},
// @ts-expect-error semantic_text not supported
'attachment.content_length': {},
// @ts-expect-error semantic_text not supported
'attachment.content_type': {},
// @ts-expect-error semantic_text not supported
'attachment.format': {},
// @ts-expect-error semantic_text not supported
'attachment.language': {},
},
mappings: {
properties: {
attachment: {
// @ts-expect-error semantic_text not supported
properties: {
content: {
type: 'text',
fields: {
keyword: {
type: 'keyword',
ignore_above: 256,
},
},
},
content_length: {
type: 'long',
},
content_type: {
type: 'text',
fields: {
keyword: {
type: 'keyword',
ignore_above: 256,
},
},
},
format: {
type: 'text',
fields: {
keyword: {
type: 'keyword',
ignore_above: 256,
},
},
},
language: {
type: 'text',
fields: {
keyword: {
type: 'keyword',
ignore_above: 256,
},
},
},
},
},
},
},
ingest_pipeline: {
description: 'Ingest pipeline created by file data visualizer',
processors: [
{
attachment: {
field: 'data',
remove_binary: true,
},
},
],
},
},
},
};
}

View file

@ -20,17 +20,18 @@ import {
} from '@elastic/eui';
import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common';
import type { CombinedField } from '../../../common/components/combined_fields';
import { CombinedFieldsForm } from '../../../common/components/combined_fields';
import { JsonEditor, EDITOR_MODE } from '../json_editor';
import { CreateDataViewToolTip } from './create_data_view_tooltip';
const EDITOR_HEIGHT = '300px';
import type { CombinedField } from '../../../../common/components/combined_fields';
import { CombinedFieldsForm } from '../../../../common/components/combined_fields';
import { CreateDataViewToolTip } from '../create_data_view_tooltip';
import { IndexSettings, IngestPipeline, Mappings } from './inputs';
import { SemanticTextInfo } from '../semantic_text_info';
interface Props {
index: string;
dataView: string;
initialized: boolean;
onIndexChange(): void;
onIndexChange(index: string): void;
createDataView: boolean;
onCreateDataViewChange(): void;
onDataViewChange(): void;
@ -70,7 +71,7 @@ export const AdvancedSettings: FC<Props> = ({
canCreateDataView,
}) => {
return (
<React.Fragment>
<>
<EuiFormRow
label={
<FormattedMessage
@ -90,7 +91,7 @@ export const AdvancedSettings: FC<Props> = ({
)}
value={index}
disabled={initialized === true}
onChange={onIndexChange}
onChange={(e) => onIndexChange(e.target.value)}
isInvalid={indexNameError !== ''}
aria-label={i18n.translate(
'xpack.dataVisualizer.file.advancedImportSettings.indexNameAriaLabel',
@ -139,6 +140,8 @@ export const AdvancedSettings: FC<Props> = ({
/>
</EuiFormRow>
<SemanticTextInfo results={results} />
<CombinedFieldsForm
mappingsString={mappingsString}
pipelineString={pipelineString}
@ -150,6 +153,8 @@ export const AdvancedSettings: FC<Props> = ({
isDisabled={initialized === true}
/>
<EuiSpacer size="s" />
<EuiFlexGroup>
<EuiFlexItem>
<IndexSettings
@ -175,84 +180,6 @@ export const AdvancedSettings: FC<Props> = ({
/>
</EuiFlexItem>
</EuiFlexGroup>
</React.Fragment>
);
};
interface JsonEditorProps {
initialized: boolean;
data: string;
onChange(value: string): void;
}
const IndexSettings: FC<JsonEditorProps> = ({ initialized, data, onChange }) => {
return (
<React.Fragment>
<EuiFormRow
label={
<FormattedMessage
id="xpack.dataVisualizer.file.advancedImportSettings.indexSettingsLabel"
defaultMessage="Index settings"
/>
}
fullWidth
>
<JsonEditor
mode={EDITOR_MODE.JSON}
readOnly={initialized === true}
value={data}
height={EDITOR_HEIGHT}
onChange={onChange}
/>
</EuiFormRow>
</React.Fragment>
);
};
const Mappings: FC<JsonEditorProps> = ({ initialized, data, onChange }) => {
return (
<React.Fragment>
<EuiFormRow
label={
<FormattedMessage
id="xpack.dataVisualizer.file.advancedImportSettings.mappingsLabel"
defaultMessage="Mappings"
/>
}
fullWidth
>
<JsonEditor
mode={EDITOR_MODE.JSON}
readOnly={initialized === true}
value={data}
height={EDITOR_HEIGHT}
onChange={onChange}
/>
</EuiFormRow>
</React.Fragment>
);
};
const IngestPipeline: FC<JsonEditorProps> = ({ initialized, data, onChange }) => {
return (
<React.Fragment>
<EuiFormRow
label={
<FormattedMessage
id="xpack.dataVisualizer.file.advancedImportSettings.ingestPipelineLabel"
defaultMessage="Ingest pipeline"
/>
}
fullWidth
>
<JsonEditor
mode={EDITOR_MODE.JSON}
readOnly={initialized === true}
value={data}
height={EDITOR_HEIGHT}
onChange={onChange}
/>
</EuiFormRow>
</React.Fragment>
</>
);
};

View file

@ -0,0 +1,8 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export { AdvancedSettings } from './advanced';

View file

@ -0,0 +1,96 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { FormattedMessage } from '@kbn/i18n-react';
import type { FC } from 'react';
import React from 'react';
import { EuiFormRow } from '@elastic/eui';
import { JsonEditor, EDITOR_MODE } from '../../json_editor';
const EDITOR_HEIGHT = '300px';
interface JsonEditorProps {
initialized: boolean;
data: string;
onChange(value: string): void;
indexName?: string;
}
export const IndexSettings: FC<JsonEditorProps> = ({ initialized, data, onChange }) => {
return (
<EuiFormRow
label={
<FormattedMessage
id="xpack.dataVisualizer.file.advancedImportSettings.indexSettingsLabel"
defaultMessage="Index settings"
/>
}
fullWidth
>
<JsonEditor
mode={EDITOR_MODE.JSON}
readOnly={initialized === true}
value={data}
height={EDITOR_HEIGHT}
onChange={onChange}
/>
</EuiFormRow>
);
};
export const Mappings: FC<JsonEditorProps> = ({ initialized, data, onChange, indexName }) => {
return (
<EuiFormRow
label={
indexName ? (
<FormattedMessage
id="xpack.dataVisualizer.file.advancedImportSettings.mappingsOfIndexLabel"
defaultMessage="Mappings of index {indexName}"
values={{ indexName }}
/>
) : (
<FormattedMessage
id="xpack.dataVisualizer.file.advancedImportSettings.mappingsLabel"
defaultMessage="Mappings"
/>
)
}
fullWidth
>
<JsonEditor
mode={EDITOR_MODE.JSON}
readOnly={initialized === true}
value={data}
height={EDITOR_HEIGHT}
onChange={onChange}
/>
</EuiFormRow>
);
};
export const IngestPipeline: FC<JsonEditorProps> = ({ initialized, data, onChange }) => {
return (
<EuiFormRow
label={
<FormattedMessage
id="xpack.dataVisualizer.file.advancedImportSettings.ingestPipelineLabel"
defaultMessage="Ingest pipeline"
/>
}
fullWidth
>
<JsonEditor
mode={EDITOR_MODE.JSON}
readOnly={initialized === true}
value={data}
height={EDITOR_HEIGHT}
onChange={onChange}
/>
</EuiFormRow>
);
};

View file

@ -0,0 +1,58 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { useCallback, useState, useEffect } from 'react';
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { useDataVisualizerKibana } from '../../../../kibana_context';
interface EsIndex {
name: string;
hidden: boolean;
frozen: boolean;
}
type Pipeline = estypes.IngestPipelineConfig & {
name: string;
};
export function useExistingIndices() {
const {
services: { http },
} = useDataVisualizerKibana();
const [indices, setIndices] = useState<EsIndex[]>([]);
const [pipelines, setPipelines] = useState<Pipeline[]>([]);
const loadIndices = useCallback(() => {
http.get<EsIndex[]>('/api/index_management/indices').then((resp) => {
setIndices(resp.filter((i) => !(i.hidden || i.frozen)));
});
}, [http]);
const loadPipelines = useCallback(() => {
http.get<Pipeline[]>('/api/ingest_pipelines').then((resp) => {
setPipelines(resp.sort((a, b) => a.name.localeCompare(b.name)));
});
}, [http]);
useEffect(() => {
loadIndices();
loadPipelines();
}, [loadIndices, loadPipelines]);
const getMapping = useCallback(
async (indexName: string) => {
const resp = await http.get<estypes.IndicesGetFieldMappingTypeFieldMappings>(
`/api/index_management/mapping/${indexName}`
);
return resp.mappings;
},
[http]
);
return { indices, pipelines, getMapping };
}

View file

@ -21,7 +21,7 @@ interface Props {
index: string;
dataView: string;
initialized: boolean;
onIndexChange(): void;
onIndexChange(index: string): void;
createDataView: boolean;
onCreateDataViewChange(): void;
onDataViewChange(): void;
@ -74,7 +74,7 @@ export const ImportSettings: FC<Props> = ({
defaultMessage: 'Simple',
}),
content: (
<React.Fragment>
<>
<EuiSpacer size="m" />
<SimpleSettings
@ -86,8 +86,9 @@ export const ImportSettings: FC<Props> = ({
indexNameError={indexNameError}
combinedFields={combinedFields}
canCreateDataView={canCreateDataView}
results={results}
/>
</React.Fragment>
</>
),
},
{
@ -96,7 +97,7 @@ export const ImportSettings: FC<Props> = ({
defaultMessage: 'Advanced',
}),
content: (
<React.Fragment>
<>
<EuiSpacer size="m" />
<AdvancedSettings
@ -120,13 +121,13 @@ export const ImportSettings: FC<Props> = ({
results={results}
canCreateDataView={canCreateDataView}
/>
</React.Fragment>
</>
),
},
];
return (
<React.Fragment>
<>
<EuiTabbedContent tabs={tabs} initialSelectedTab={tabs[0]} onTabClick={() => {}} />
</React.Fragment>
</>
);
};

View file

@ -0,0 +1,60 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { FC } from 'react';
import React from 'react';
import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common';
import { EuiCallOut, EuiLink, EuiSpacer } from '@elastic/eui';
import { FormattedMessage } from '@kbn/i18n-react';
import { FILE_FORMATS } from '../../../../../common/constants';
interface Props {
results: FindFileStructureResponse;
}
export const SemanticTextInfo: FC<Props> = ({ results }) => {
return results.format === FILE_FORMATS.TIKA ? (
<>
<EuiSpacer size="m" />
<EuiCallOut
title={
<FormattedMessage
id="xpack.dataVisualizer.semanticTextInfo.title"
defaultMessage="Semantic text field type now available"
/>
}
color="primary"
iconType="iInCircle"
>
<FormattedMessage
id="xpack.dataVisualizer.semanticTextInfo.body"
defaultMessage="You can add a {semanticText} field when importing this file to easily enable semantic search on the content."
values={{
semanticText: (
<EuiLink
href="https://www.elastic.co/guide/en/elasticsearch/reference/current/semantic-text.html"
target="_blank"
external
>
<code css={{ fontWeight: 'bold' }}>semantic_text</code>
</EuiLink>
),
}}
/>
<br />
<FormattedMessage
id="xpack.dataVisualizer.semanticTextInfo.body2"
defaultMessage="In the Advanced tab, click 'Add additional field' and choose 'Add semantic text field'."
/>
</EuiCallOut>
<EuiSpacer size="m" />
</>
) : null;
};

View file

@ -11,19 +11,22 @@ import type { FC } from 'react';
import React from 'react';
import { EuiFieldText, EuiFormRow, EuiCheckbox, EuiSpacer } from '@elastic/eui';
import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common';
import type { CombinedField } from '../../../common/components/combined_fields';
import { CombinedFieldsReadOnlyForm } from '../../../common/components/combined_fields';
import { CreateDataViewToolTip } from './create_data_view_tooltip';
import { SemanticTextInfo } from './semantic_text_info';
interface Props {
index: string;
initialized: boolean;
onIndexChange(): void;
onIndexChange(i: string): void;
createDataView: boolean;
onCreateDataViewChange(): void;
indexNameError: string;
combinedFields: CombinedField[];
canCreateDataView: boolean;
results: FindFileStructureResponse;
}
export const SimpleSettings: FC<Props> = ({
@ -35,6 +38,7 @@ export const SimpleSettings: FC<Props> = ({
indexNameError,
combinedFields,
canCreateDataView,
results,
}) => {
return (
<React.Fragment>
@ -57,7 +61,7 @@ export const SimpleSettings: FC<Props> = ({
)}
value={index}
disabled={initialized === true}
onChange={onIndexChange}
onChange={(e) => onIndexChange(e.target.value)}
isInvalid={indexNameError !== ''}
aria-label={i18n.translate(
'xpack.dataVisualizer.file.simpleImportSettings.indexNameAriaLabel',
@ -87,7 +91,7 @@ export const SimpleSettings: FC<Props> = ({
/>
</CreateDataViewToolTip>
<EuiSpacer size="m" />
<SemanticTextInfo results={results} />
<CombinedFieldsReadOnlyForm combinedFields={combinedFields} />
</React.Fragment>

View file

@ -16,7 +16,7 @@ import { Failures } from './failures';
interface Props {
index: string;
dataView: string;
ingestPipelineId: string;
pipelineId: string;
docCount: number;
importFailures: DocFailure[];
createDataView: boolean;
@ -26,7 +26,7 @@ interface Props {
export const ImportSummary: FC<Props> = ({
index,
dataView,
ingestPipelineId,
pipelineId,
docCount,
importFailures,
createDataView,
@ -35,7 +35,7 @@ export const ImportSummary: FC<Props> = ({
const items = createDisplayItems(
index,
dataView,
ingestPipelineId,
pipelineId,
docCount,
importFailures,
createDataView,
@ -99,7 +99,7 @@ export const ImportSummary: FC<Props> = ({
function createDisplayItems(
index: string,
dataView: string,
ingestPipelineId: string,
pipelineId: string,
docCount: number,
importFailures: DocFailure[],
createDataView: boolean,
@ -134,7 +134,7 @@ function createDisplayItems(
defaultMessage="Ingest pipeline"
/>
),
description: ingestPipelineId,
description: pipelineId,
});
}

View file

@ -0,0 +1,242 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { DataViewsServicePublic } from '@kbn/data-views-plugin/public/types';
import type {
FindFileStructureResponse,
IngestPipeline,
} from '@kbn/file-upload-plugin/common/types';
import type { FileUploadStartApi } from '@kbn/file-upload-plugin/public/api';
import { i18n } from '@kbn/i18n';
import { IMPORT_STATUS } from '../import_progress/import_progress';
interface Props {
data: ArrayBuffer;
results: FindFileStructureResponse;
dataViewsContract: DataViewsServicePublic;
fileUpload: FileUploadStartApi;
}
interface Config {
index: string;
dataView: string;
createDataView: boolean;
indexSettingsString: string;
mappingsString: string;
pipelineString: string;
pipelineId: string | null;
}
export async function importData(props: Props, config: Config, setState: (state: unknown) => void) {
const { data, results, dataViewsContract, fileUpload } = props;
const {
index,
dataView,
createDataView,
indexSettingsString,
mappingsString,
pipelineString,
pipelineId,
} = config;
const { format } = results;
const errors = [];
if (index === '') {
return;
}
if (
(await fileUpload.hasImportPermission({
checkCreateDataView: createDataView,
checkHasManagePipeline: true,
indexName: index,
})) === false
) {
errors.push(
i18n.translate('xpack.dataVisualizer.file.importView.importPermissionError', {
defaultMessage: 'You do not have permission to create or import data into index {index}.',
values: {
index,
},
})
);
setState({
permissionCheckStatus: IMPORT_STATUS.FAILED,
importing: false,
imported: false,
errors,
});
return;
}
setState({
importing: true,
imported: false,
reading: true,
initialized: true,
permissionCheckStatus: IMPORT_STATUS.COMPLETE,
});
let success = true;
let settings = {};
let mappings = {};
let pipeline = {};
try {
settings = JSON.parse(indexSettingsString);
} catch (error) {
success = false;
const parseError = i18n.translate('xpack.dataVisualizer.file.importView.parseSettingsError', {
defaultMessage: 'Error parsing settings:',
});
errors.push(`${parseError} ${error.message}`);
}
try {
mappings = JSON.parse(mappingsString);
} catch (error) {
success = false;
const parseError = i18n.translate('xpack.dataVisualizer.file.importView.parseMappingsError', {
defaultMessage: 'Error parsing mappings:',
});
errors.push(`${parseError} ${error.message}`);
}
try {
pipeline = JSON.parse(pipelineString);
} catch (error) {
success = false;
const parseError = i18n.translate('xpack.dataVisualizer.file.importView.parsePipelineError', {
defaultMessage: 'Error parsing ingest pipeline:',
});
errors.push(`${parseError} ${error.message}`);
}
setState({
parseJSONStatus: getSuccess(success),
});
if (success === false) {
return;
}
const importer = await fileUpload.importerFactory(format, {
excludeLinesPattern: results.exclude_lines_pattern,
multilineStartPattern: results.multiline_start_pattern,
});
const readResp = importer.read(data);
success = readResp.success;
setState({
readStatus: getSuccess(success),
reading: false,
importer,
});
if (success === false) {
return;
}
const initializeImportResp = await importer.initializeImport(
index,
settings,
mappings,
pipeline as IngestPipeline
);
const timeFieldName = importer.getTimeField();
setState({ timeFieldName });
const indexCreated = initializeImportResp.index !== undefined;
setState({
indexCreatedStatus: getSuccess(indexCreated),
});
const pipelineCreated = initializeImportResp.pipelineId !== undefined;
if (indexCreated) {
setState({
ingestPipelineCreatedStatus: pipelineCreated ? IMPORT_STATUS.COMPLETE : IMPORT_STATUS.FAILED,
pipelineId: pipelineCreated ? initializeImportResp.pipelineId : '',
});
}
success = indexCreated && pipelineCreated;
if (success === false) {
errors.push(initializeImportResp.error);
return;
}
const importResp = await importer.import(
initializeImportResp.id,
index,
pipelineId ?? initializeImportResp.pipelineId,
(progress: number) => {
setState({
uploadProgress: progress,
});
}
);
success = importResp.success;
setState({
uploadStatus: getSuccess(importResp.success),
importFailures: importResp.failures,
docCount: importResp.docCount,
});
if (success === false) {
errors.push(importResp.error);
return;
}
if (createDataView) {
const dataViewName = dataView === '' ? index : dataView;
const dataViewResp = await createKibanaDataView(dataViewName, dataViewsContract, timeFieldName);
success = dataViewResp.success;
setState({
dataViewCreatedStatus: dataViewResp.success ? IMPORT_STATUS.COMPLETE : IMPORT_STATUS.FAILED,
dataViewId: dataViewResp.id,
});
if (success === false) {
errors.push(dataViewResp.error);
}
}
setState({
importing: false,
imported: success,
errors,
});
}
async function createKibanaDataView(
dataViewName: string,
dataViewsContract: DataViewsServicePublic,
timeFieldName?: string
) {
try {
const emptyPattern = await dataViewsContract.createAndSave({
title: dataViewName,
timeFieldName,
});
return {
success: true,
id: emptyPattern.id,
};
} catch (error) {
return {
success: false,
error,
};
}
}
function getSuccess(success: boolean) {
return success ? IMPORT_STATUS.COMPLETE : IMPORT_STATUS.FAILED;
}

View file

@ -20,7 +20,6 @@ import {
EuiButtonEmpty,
} from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { debounce } from 'lodash';
import { ResultsLinks } from '../../../common/components/results_links';
import { FilebeatConfigFlyout } from '../../../common/components/filebeat_config_flyout';
@ -35,6 +34,8 @@ import {
getDefaultCombinedFields,
} from '../../../common/components/combined_fields';
import { MODE as DATAVISUALIZER_MODE } from '../file_data_visualizer_view/constants';
import { importData } from './import';
import { FILE_FORMATS } from '../../../../../common/constants';
const DEFAULT_INDEX_SETTINGS = {};
const CONFIG_MODE = { SIMPLE: 0, ADVANCED: 1 };
@ -57,7 +58,7 @@ const DEFAULT_STATE = {
createDataView: true,
dataView: '',
dataViewId: '',
ingestPipelineId: '',
pipelineId: null,
errors: [],
importFailures: [],
docCount: 0,
@ -74,6 +75,7 @@ const DEFAULT_STATE = {
checkingValidIndex: false,
combinedFields: [],
importer: undefined,
createPipeline: true,
};
export class ImportView extends Component {
@ -96,228 +98,31 @@ export class ImportView extends Component {
};
clickImport = () => {
this.import();
};
// TODO - sort this function out. it's a mess
async import() {
const { data, results, dataViewsContract, fileUpload } = this.props;
const {
index,
dataView,
createDataView,
indexSettingsString,
mappingsString,
pipelineString,
pipelineId,
} = this.state;
const { format } = results;
let { timeFieldName } = this.state;
const { index, dataView, createDataView, indexSettingsString, mappingsString, pipelineString } =
this.state;
const errors = [];
if (index !== '') {
this.setState(
{
importing: true,
errors,
},
async () => {
// check to see if the user has permission to create and ingest data into the specified index
if (
(await fileUpload.hasImportPermission({
checkCreateDataView: createDataView,
checkHasManagePipeline: true,
indexName: index,
})) === false
) {
errors.push(
i18n.translate('xpack.dataVisualizer.file.importView.importPermissionError', {
defaultMessage:
'You do not have permission to create or import data into index {index}.',
values: {
index,
},
})
);
this.setState({
permissionCheckStatus: IMPORT_STATUS.FAILED,
importing: false,
imported: false,
errors,
});
return;
}
this.setState(
{
importing: true,
imported: false,
reading: true,
initialized: true,
permissionCheckStatus: IMPORT_STATUS.COMPLETE,
},
() => {
setTimeout(async () => {
let success = true;
const createPipeline = pipelineString !== '';
let settings = {};
let mappings = {};
let pipeline = {};
try {
settings = JSON.parse(indexSettingsString);
} catch (error) {
success = false;
const parseError = i18n.translate(
'xpack.dataVisualizer.file.importView.parseSettingsError',
{
defaultMessage: 'Error parsing settings:',
}
);
errors.push(`${parseError} ${error.message}`);
}
try {
mappings = JSON.parse(mappingsString);
} catch (error) {
success = false;
const parseError = i18n.translate(
'xpack.dataVisualizer.file.importView.parseMappingsError',
{
defaultMessage: 'Error parsing mappings:',
}
);
errors.push(`${parseError} ${error.message}`);
}
try {
if (createPipeline) {
pipeline = JSON.parse(pipelineString);
}
} catch (error) {
success = false;
const parseError = i18n.translate(
'xpack.dataVisualizer.file.importView.parsePipelineError',
{
defaultMessage: 'Error parsing ingest pipeline:',
}
);
errors.push(`${parseError} ${error.message}`);
}
this.setState({
parseJSONStatus: success ? IMPORT_STATUS.COMPLETE : IMPORT_STATUS.FAILED,
});
if (success) {
const importer = await fileUpload.importerFactory(format, {
excludeLinesPattern: results.exclude_lines_pattern,
multilineStartPattern: results.multiline_start_pattern,
});
if (importer !== undefined) {
const readResp = importer.read(data, this.setReadProgress);
success = readResp.success;
this.setState({
readStatus: success ? IMPORT_STATUS.COMPLETE : IMPORT_STATUS.FAILED,
reading: false,
importer,
});
if (readResp.success === false) {
console.error(readResp.error);
errors.push(readResp.error);
}
if (success) {
const initializeImportResp = await importer.initializeImport(
index,
settings,
mappings,
pipeline
);
timeFieldName = importer.getTimeField();
this.setState({ timeFieldName });
const indexCreated = initializeImportResp.index !== undefined;
this.setState({
indexCreatedStatus: indexCreated
? IMPORT_STATUS.COMPLETE
: IMPORT_STATUS.FAILED,
});
if (createPipeline) {
const pipelineCreated = initializeImportResp.pipelineId !== undefined;
if (indexCreated) {
this.setState({
ingestPipelineCreatedStatus: pipelineCreated
? IMPORT_STATUS.COMPLETE
: IMPORT_STATUS.FAILED,
ingestPipelineId: pipelineCreated
? initializeImportResp.pipelineId
: '',
});
}
success = indexCreated && pipelineCreated;
} else {
success = indexCreated;
}
if (success) {
const importId = initializeImportResp.id;
const pipelineId = initializeImportResp.pipelineId;
const importResp = await importer.import(
importId,
index,
pipelineId,
this.setImportProgress
);
success = importResp.success;
this.setState({
uploadStatus: importResp.success
? IMPORT_STATUS.COMPLETE
: IMPORT_STATUS.FAILED,
importFailures: importResp.failures,
docCount: importResp.docCount,
});
if (success) {
if (createDataView) {
const dataViewName = dataView === '' ? index : dataView;
const dataViewResp = await createKibanaDataView(
dataViewName,
dataViewsContract,
timeFieldName
);
success = dataViewResp.success;
this.setState({
dataViewCreatedStatus: dataViewResp.success
? IMPORT_STATUS.COMPLETE
: IMPORT_STATUS.FAILED,
dataViewId: dataViewResp.id,
});
if (dataViewResp.success === false) {
errors.push(dataViewResp.error);
}
}
} else {
errors.push(importResp.error);
}
} else {
errors.push(initializeImportResp.error);
}
}
}
}
this.setState({
importing: false,
imported: success,
errors,
});
}, 500);
}
);
}
);
}
}
importData(
{ data, results, dataViewsContract, fileUpload },
{
index,
dataView,
createDataView,
indexSettingsString,
mappingsString,
pipelineString,
pipelineId,
},
(state) => this.setState(state)
);
};
onConfigModeChange = (configMode) => {
this.setState({
@ -325,8 +130,7 @@ export class ImportView extends Component {
});
};
onIndexChange = (e) => {
const index = e.target.value;
onIndexChange = (index) => {
this.setState({
index,
checkingValidIndex: true,
@ -385,16 +189,22 @@ export class ImportView extends Component {
});
};
onCombinedFieldsChange = (combinedFields) => {
this.setState({ combinedFields });
onPipelineIdChange = (text) => {
this.setState({
pipelineId: text,
});
};
setImportProgress = (progress) => {
onCreatePipelineChange = (b) => {
this.setState({
uploadProgress: progress,
createPipeline: b,
});
};
onCombinedFieldsChange = (combinedFields) => {
this.setState({ combinedFields });
};
setReadProgress = (progress) => {
this.setState({
readProgress: progress,
@ -409,6 +219,10 @@ export class ImportView extends Component {
this.setState({ isFilebeatFlyoutVisible: false });
};
closeFilebeatFlyout = () => {
this.setState({ isFilebeatFlyoutVisible: false });
};
async loadDataViewNames() {
try {
const dataViewNames = await this.dataViewsContract.getTitles();
@ -423,7 +237,7 @@ export class ImportView extends Component {
index,
dataView,
dataViewId,
ingestPipelineId,
pipelineId,
importing,
imported,
reading,
@ -450,10 +264,9 @@ export class ImportView extends Component {
checkingValidIndex,
combinedFields,
importer,
createPipeline,
} = this.state;
const createPipeline = pipelineString !== '';
const statuses = {
reading,
readStatus,
@ -567,13 +380,15 @@ export class ImportView extends Component {
<EuiPanel hasShadow={false} hasBorder>
<ImportProgress statuses={statuses} />
{importer !== undefined && importer.initialized() && (
<DocCountChart
statuses={statuses}
dataStart={this.props.dataStart}
importer={importer}
/>
)}
{importer !== undefined &&
importer.initialized() &&
this.props.results.format !== FILE_FORMATS.TIKA && (
<DocCountChart
statuses={statuses}
dataStart={this.props.dataStart}
importer={importer}
/>
)}
{imported === true && (
<React.Fragment>
@ -582,7 +397,7 @@ export class ImportView extends Component {
<ImportSummary
index={index}
dataView={dataView === '' ? index : dataView}
ingestPipelineId={ingestPipelineId}
pipelineId={pipelineId}
docCount={docCount}
importFailures={importFailures}
createDataView={createDataView}
@ -613,7 +428,7 @@ export class ImportView extends Component {
<EuiSpacer size="l" />
<ResultsLinks
fieldStats={this.props.results?.field_stats}
results={this.props.results}
index={index}
dataViewId={dataViewId}
timeFieldName={timeFieldName}
@ -621,13 +436,14 @@ export class ImportView extends Component {
showFilebeatFlyout={this.showFilebeatFlyout}
getAdditionalLinks={this.props.getAdditionalLinks ?? []}
resultLinks={this.props.resultLinks}
combinedFields={combinedFields}
/>
{isFilebeatFlyoutVisible && (
<FilebeatConfigFlyout
index={index}
results={this.props.results}
ingestPipelineId={ingestPipelineId}
pipelineId={pipelineId}
closeFlyout={this.closeFilebeatFlyout}
/>
)}
@ -648,25 +464,6 @@ export class ImportView extends Component {
}
}
async function createKibanaDataView(dataViewName, dataViewsContract, timeFieldName) {
try {
const emptyPattern = await dataViewsContract.createAndSave({
title: dataViewName,
timeFieldName,
});
return {
success: true,
id: emptyPattern.id,
};
} catch (error) {
return {
success: false,
error,
};
}
}
function getDefaultState(state, results, capabilities) {
const indexSettingsString =
state.indexSettingsString === ''

View file

@ -28,7 +28,8 @@ import { FieldsStatsGrid } from '../../../common/components/fields_stats_grid';
import { MODE as DATAVISUALIZER_MODE } from '../file_data_visualizer_view/constants';
interface Props {
data: string;
fileContents: string;
data: ArrayBuffer;
fileName: string;
results: FindFileStructureResponse;
showEditFlyout(): void;
@ -40,7 +41,7 @@ interface Props {
}
export const ResultsView: FC<Props> = ({
data,
fileContents,
fileName,
results,
showEditFlyout,
@ -87,7 +88,7 @@ export const ResultsView: FC<Props> = ({
<div>
<EuiPanel data-test-subj="dataVisualizerFileFileContentPanel" hasShadow={false} hasBorder>
<FileContents
data={data}
fileContents={fileContents}
format={results.format}
numberOfLines={results.num_lines_analyzed}
semiStructureTextData={semiStructureTextData}
@ -124,30 +125,36 @@ export const ResultsView: FC<Props> = ({
</EuiButton>
</EuiFlexItem>
<EuiFlexItem grow={false}>
<EuiButtonEmpty onClick={() => showExplanationFlyout()} disabled={disableButtons}>
<FormattedMessage
id="xpack.dataVisualizer.file.resultsView.analysisExplanationButtonLabel"
defaultMessage="Analysis explanation"
/>
</EuiButtonEmpty>
{results.format !== FILE_FORMATS.TIKA ? (
<EuiButtonEmpty onClick={() => showExplanationFlyout()} disabled={disableButtons}>
<FormattedMessage
id="xpack.dataVisualizer.file.resultsView.analysisExplanationButtonLabel"
defaultMessage="Analysis explanation"
/>
</EuiButtonEmpty>
) : null}
</EuiFlexItem>
</EuiFlexGroup>
</EuiPanel>
<EuiSpacer size="m" />
{results.format !== FILE_FORMATS.TIKA ? (
<>
<EuiSpacer size="m" />
<EuiPanel data-test-subj="dataVisualizerFileFileStatsPanel" hasShadow={false} hasBorder>
<EuiTitle size="s">
<h2 data-test-subj="dataVisualizerFileStatsTitle">
<FormattedMessage
id="xpack.dataVisualizer.file.resultsView.fileStatsName"
defaultMessage="File stats"
/>
</h2>
</EuiTitle>
<EuiPanel data-test-subj="dataVisualizerFileFileStatsPanel" hasShadow={false} hasBorder>
<EuiTitle size="s">
<h2 data-test-subj="dataVisualizerFileStatsTitle">
<FormattedMessage
id="xpack.dataVisualizer.file.resultsView.fileStatsName"
defaultMessage="File stats"
/>
</h2>
</EuiTitle>
<FieldsStatsGrid results={results} />
</EuiPanel>
<FieldsStatsGrid results={results} />
</EuiPanel>
</>
) : null}
</div>
</EuiPageBody>
);

View file

@ -66,4 +66,32 @@ export function routes(coreSetup: CoreSetup<StartDeps, unknown>, logger: Logger)
}
}
);
router.versioned
.get({
path: '/internal/data_visualizer/inference_services',
access: 'internal',
options: {
tags: ['access:fileUpload:analyzeFile'],
},
})
.addVersion(
{
version: '1',
validate: false,
},
async (context, request, response) => {
try {
const esClient = (await context.core).elasticsearch.client;
// @ts-expect-error types are wrong
const { endpoints } = await esClient.asCurrentUser.inference.getModel({
inference_id: '_all',
});
return response.ok({ body: endpoints });
} catch (e) {
return response.customError(wrapError(e));
}
}
);
}

View file

@ -83,6 +83,7 @@
"@kbn/shared-ux-utility",
"@kbn/search-types",
"@kbn/unified-field-list",
"@kbn/core-http-browser",
"@kbn/content-management-utils",
"@kbn/core-lifecycle-browser",
"@kbn/presentation-containers",

View file

@ -8,9 +8,10 @@
export const UI_SETTING_MAX_FILE_SIZE = 'fileUpload:maxFileSize';
export const MB = Math.pow(2, 20);
export const MAX_FILE_SIZE = '100MB';
export const MAX_FILE_SIZE_BYTES = 104857600; // 100MB
export const MAX_FILE_SIZE_BYTES = 524288000; // 500MB
export const ABSOLUTE_MAX_FILE_SIZE_BYTES = 1073741274; // 1GB
export const FILE_SIZE_DISPLAY_FORMAT = '0,0.[0] b';
export const MAX_TIKA_FILE_SIZE_BYTES = 62914560; // 60MB
// Value to use in the Elasticsearch index mapping meta data to identify the
// index as having been created by the ML File Data Visualizer.
@ -20,5 +21,5 @@ export const FILE_FORMATS = {
DELIMITED: 'delimited',
NDJSON: 'ndjson',
SEMI_STRUCTURED_TEXT: 'semi_structured_text',
// XML: 'xml',
TIKA: 'tika',
};

View file

@ -28,6 +28,7 @@ export interface FindFileStructureResponse {
has_header_row: boolean;
has_byte_order_marker: boolean;
format: string;
document_type?: string;
field_stats: {
[fieldName: string]: {
count: number;
@ -56,6 +57,7 @@ export interface FindFileStructureResponse {
};
};
};
ingest_pipeline: IngestPipeline;
quote: string;
delimiter: string;
need_client_timezone: boolean;
@ -123,14 +125,32 @@ export interface ImportDocMessage {
message: string;
}
export type ImportDoc = ImportDocMessage | string | object;
export interface ImportDocTika {
data: string;
}
export type ImportDoc = ImportDocMessage | ImportDocTika | string | object;
export interface IngestPipelineWrapper {
id: string;
pipeline: IngestPipeline;
pipeline?: IngestPipeline;
}
export interface IngestPipeline {
description: string;
processors: any[];
isManaged?: boolean;
name?: string;
}
export interface PreviewTikaResponse {
date?: string;
content_type: string;
author?: string;
format: string;
modified: string;
language: string;
creator_tool?: string;
content: string;
content_length: number;
}

View file

@ -5,10 +5,20 @@
* 2.0.
*/
import { fromByteArray } from 'base64-js';
import { lazyLoadModules } from '../lazy_load_bundle';
import type { IImporter, ImportFactoryOptions } from '../importer';
import type { HasImportPermission, FindFileStructureResponse } from '../../common/types';
import type { getMaxBytes, getMaxBytesFormatted } from '../importer/get_max_bytes';
import type {
HasImportPermission,
FindFileStructureResponse,
PreviewTikaResponse,
} from '../../common/types';
import type {
getMaxBytes,
getMaxBytesFormatted,
getMaxTikaBytes,
getMaxTikaBytesFormatted,
} from '../importer/get_max_bytes';
import { GeoUploadWizardAsyncWrapper } from './geo_upload_wizard_async_wrapper';
import { IndexNameFormAsyncWrapper } from './index_name_form_async_wrapper';
@ -18,10 +28,13 @@ export interface FileUploadStartApi {
importerFactory: typeof importerFactory;
getMaxBytes: typeof getMaxBytes;
getMaxBytesFormatted: typeof getMaxBytesFormatted;
getMaxTikaBytes: typeof getMaxTikaBytes;
getMaxTikaBytesFormatted: typeof getMaxTikaBytesFormatted;
hasImportPermission: typeof hasImportPermission;
checkIndexExists: typeof checkIndexExists;
getTimeFieldRange: typeof getTimeFieldRange;
analyzeFile: typeof analyzeFile;
previewTikaFile: typeof previewTikaFile;
}
export interface GetTimeFieldRangeResponse {
@ -36,7 +49,7 @@ export const IndexNameFormComponent = IndexNameFormAsyncWrapper;
export async function importerFactory(
format: string,
options: ImportFactoryOptions
): Promise<IImporter | undefined> {
): Promise<IImporter> {
const fileUploadModules = await lazyLoadModules();
return fileUploadModules.importerFactory(format, options);
}
@ -62,6 +75,24 @@ export async function analyzeFile(
});
}
export async function previewTikaFile(
data: ArrayBuffer,
params: Record<string, string> = {}
): Promise<PreviewTikaResponse> {
const { getHttp } = await lazyLoadModules();
const base64File = fromByteArray(new Uint8Array(data));
const body = JSON.stringify({
base64File,
});
return await getHttp().fetch<PreviewTikaResponse>({
path: `/internal/file_upload/preview_tika_contents`,
method: 'POST',
version: '1',
body,
query: params,
});
}
export async function hasImportPermission(params: HasImportPermissionParams): Promise<boolean> {
const fileUploadModules = await lazyLoadModules();
try {

View file

@ -12,6 +12,7 @@ import {
MAX_FILE_SIZE,
MAX_FILE_SIZE_BYTES,
UI_SETTING_MAX_FILE_SIZE,
MAX_TIKA_FILE_SIZE_BYTES,
} from '../../common/constants';
import { getUiSettings } from '../kibana_services';
@ -28,3 +29,11 @@ export function getMaxBytes() {
export function getMaxBytesFormatted() {
return numeral(getMaxBytes()).format(FILE_SIZE_DISPLAY_FORMAT);
}
export function getMaxTikaBytes() {
return MAX_TIKA_FILE_SIZE_BYTES;
}
export function getMaxTikaBytesFormatted() {
return numeral(getMaxTikaBytes()).format(FILE_SIZE_DISPLAY_FORMAT);
}

View file

@ -27,7 +27,7 @@ const DEFAULT_TIME_FIELD = '@timestamp';
export abstract class Importer implements IImporter {
protected _docArray: ImportDoc[] = [];
private _chunkSize = CHUNK_SIZE;
protected _chunkSize = CHUNK_SIZE;
private _index: string | undefined;
private _pipeline: IngestPipeline | undefined;
private _timeFieldName: string | undefined;
@ -282,6 +282,10 @@ function updatePipelineTimezone(ingestPipeline: IngestPipeline) {
}
function createDocumentChunks(docArray: ImportDoc[], chunkSize: number) {
if (chunkSize === 0) {
return [docArray];
}
const chunks: ImportDoc[][] = [];
// chop docArray into chunks
const tempChunks = chunk(docArray, chunkSize);

View file

@ -7,6 +7,7 @@
import { MessageImporter } from './message_importer';
import { NdjsonImporter } from './ndjson_importer';
import { TikaImporter } from './tika_importer';
import { ImportFactoryOptions } from './types';
import { FILE_FORMATS } from '../../common/constants';
@ -21,7 +22,9 @@ export function importerFactory(format: string, options: ImportFactoryOptions) {
return new MessageImporter(options);
case FILE_FORMATS.NDJSON:
return new NdjsonImporter();
case FILE_FORMATS.TIKA:
return new TikaImporter();
default:
return;
throw new Error('Importer not found for format');
}
}

View file

@ -0,0 +1,48 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { fromByteArray } from 'base64-js';
import { ImportDocTika } from '../../common/types';
import { Importer } from './importer';
import { CreateDocsResponse } from './types';
export class TikaImporter extends Importer {
constructor() {
super();
}
public read(data: ArrayBuffer) {
this._chunkSize = 0;
const pdfBase64 = fromByteArray(new Uint8Array(data));
const { success, docs } = this._createDocs(pdfBase64);
if (success) {
this._docArray = this._docArray.concat(docs);
} else {
return { success: false };
}
return { success: true };
}
protected _createDocs(base64String: string): CreateDocsResponse<ImportDocTika> {
const remainder = 0;
try {
const docs = [{ data: base64String }];
return {
success: true,
docs,
remainder,
};
} catch (error) {
return {
success: false,
docs: [],
remainder,
error,
};
}
}
}

View file

@ -35,7 +35,7 @@ let loadModulesPromise: Promise<LazyLoadedFileUploadModules>;
export interface LazyLoadedFileUploadModules {
GeoUploadWizard: React.ComponentType<FileUploadComponentProps>;
IndexNameForm: React.ComponentType<IndexNameFormProps>;
importerFactory: (format: string, options: ImportFactoryOptions) => IImporter | undefined;
importerFactory: (format: string, options: ImportFactoryOptions) => IImporter;
getHttp: () => HttpStart;
}

View file

@ -16,9 +16,15 @@ import {
checkIndexExists,
getTimeFieldRange,
analyzeFile,
previewTikaFile,
} from './api';
import { setStartServices } from './kibana_services';
import { getMaxBytes, getMaxBytesFormatted } from './importer/get_max_bytes';
import {
getMaxBytes,
getMaxBytesFormatted,
getMaxTikaBytes,
getMaxTikaBytesFormatted,
} from './importer/get_max_bytes';
// eslint-disable-next-line @typescript-eslint/no-empty-interface
export interface FileUploadSetupDependencies {}
@ -48,10 +54,13 @@ export class FileUploadPlugin
importerFactory,
getMaxBytes,
getMaxBytesFormatted,
getMaxTikaBytes,
getMaxTikaBytesFormatted,
hasImportPermission,
checkIndexExists,
getTimeFieldRange,
analyzeFile,
previewTikaFile,
};
}
}

View file

@ -0,0 +1,50 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { IScopedClusterClient } from '@kbn/core/server';
import type { PreviewTikaResponse } from '../common/types';
/**
* Returns the contents of a file using the attachment ingest processor
* @param client IScopedClusterClient
* @param base64File bae64 encoded file
*/
export async function previewTikaContents(
client: IScopedClusterClient,
base64File: string
): Promise<PreviewTikaResponse> {
const pipeline = {
description: '',
processors: [
{
attachment: {
field: 'data',
remove_binary: true,
},
},
],
};
const resp = await client.asInternalUser.ingest.simulate({
pipeline,
docs: [
{
_index: 'index',
_id: 'id',
_source: {
data: base64File,
},
},
],
});
if (!resp.docs[0].doc?._source.attachment) {
throw new Error('Failed to extract text from file.');
}
return resp.docs[0].doc?._source.attachment;
}

View file

@ -12,7 +12,7 @@ import type {
IndicesIndexSettings,
MappingTypeMapping,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { MAX_FILE_SIZE_BYTES } from '../common/constants';
import { MAX_FILE_SIZE_BYTES, MAX_TIKA_FILE_SIZE_BYTES } from '../common/constants';
import type { IngestPipelineWrapper, InputData } from '../common/types';
import { wrapError } from './error_wrapper';
import { importDataProvider } from './import_data';
@ -29,6 +29,7 @@ import {
import type { StartDeps } from './types';
import { checkFileUploadPrivileges } from './check_privileges';
import { previewIndexTimeRange } from './preview_index_time_range';
import { previewTikaContents } from './preview_tika_contents';
function importData(
client: IScopedClusterClient,
@ -314,6 +315,51 @@ export function fileUploadRoutes(coreSetup: CoreSetup<StartDeps, unknown>, logge
const esClient = (await context.core).elasticsearch.client;
const resp = await previewIndexTimeRange(esClient, timeField, pipeline, docs);
return response.ok({
body: resp,
});
} catch (e) {
return response.customError(wrapError(e));
}
}
);
/**
* @apiGroup FileDataVisualizer
*
* @api {post} /internal/file_upload/preview_tika_contents Returns the contents of a file using the attachment ingest processor
* @apiName PreviewTikaContents
* @apiDescription Preview the contents of a file using the attachment ingest processor
*/
router.versioned
.post({
path: '/internal/file_upload/preview_tika_contents',
access: 'internal',
options: {
tags: ['access:fileUpload:analyzeFile'],
body: {
accepts: ['application/json'],
maxBytes: MAX_TIKA_FILE_SIZE_BYTES,
},
},
})
.addVersion(
{
version: '1',
validate: {
request: {
body: schema.object({
base64File: schema.string(),
}),
},
},
},
async (context, request, response) => {
try {
const { base64File } = request.body;
const esClient = (await context.core).elasticsearch.client;
const resp = await previewTikaContents(esClient, base64File);
return response.ok({
body: resp,
});

View file

@ -14992,8 +14992,6 @@
"xpack.dataVisualizer.file.welcomeContent.delimitedTextFilesDescription": "Fichiers texte délimités, tels que CSV et TSV",
"xpack.dataVisualizer.file.welcomeContent.logFilesWithCommonFormatDescription": "Fichiers log avec un format d'horodatage courant",
"xpack.dataVisualizer.file.welcomeContent.newlineDelimitedJsonDescription": "JSON délimité par une nouvelle ligne",
"xpack.dataVisualizer.file.welcomeContent.supportedFileFormatDescription": "Les formats de fichier suivants sont pris en charge :",
"xpack.dataVisualizer.file.welcomeContent.uploadedFilesAllowedSizeDescription": "Vous pouvez charger des fichiers d'une taille allant jusqu'à {maxFileSize}.",
"xpack.dataVisualizer.file.welcomeContent.visualizeAndImportDataFromLogFileDescription": "Chargez votre fichier, analysez ses données et, si vous le souhaitez, importez les données dans un index Elasticsearch.",
"xpack.dataVisualizer.file.welcomeContent.visualizeDataFromLogFileDescription": "Téléchargez votre fichier et analysez ses données.",
"xpack.dataVisualizer.file.welcomeContent.visualizeDataFromLogFileTitle": "Charger les données à partir d'un fichier",

View file

@ -14978,8 +14978,6 @@
"xpack.dataVisualizer.file.welcomeContent.delimitedTextFilesDescription": "CSV や TSV などの区切られたテキストファイル",
"xpack.dataVisualizer.file.welcomeContent.logFilesWithCommonFormatDescription": "タイムスタンプの一般的フォーマットのログファイル",
"xpack.dataVisualizer.file.welcomeContent.newlineDelimitedJsonDescription": "改行区切りの JSON",
"xpack.dataVisualizer.file.welcomeContent.supportedFileFormatDescription": "次のファイル形式がサポートされます。",
"xpack.dataVisualizer.file.welcomeContent.uploadedFilesAllowedSizeDescription": "最大{maxFileSize}のファイルをアップロードできます。",
"xpack.dataVisualizer.file.welcomeContent.visualizeAndImportDataFromLogFileDescription": "ファイルをアップロードして、データを分析し、任意でデータをElasticsearchインデックスにインポートできます。",
"xpack.dataVisualizer.file.welcomeContent.visualizeDataFromLogFileDescription": "ファイルをアップロードし、データを分析します。",
"xpack.dataVisualizer.file.welcomeContent.visualizeDataFromLogFileTitle": "ファイルからデータをアップロード",

View file

@ -15003,8 +15003,6 @@
"xpack.dataVisualizer.file.welcomeContent.delimitedTextFilesDescription": "分隔的文本文件,例如 CSV 和 TSV",
"xpack.dataVisualizer.file.welcomeContent.logFilesWithCommonFormatDescription": "具有时间戳通用格式的日志文件",
"xpack.dataVisualizer.file.welcomeContent.newlineDelimitedJsonDescription": "换行符分隔的 JSON",
"xpack.dataVisualizer.file.welcomeContent.supportedFileFormatDescription": "支持以下文件格式:",
"xpack.dataVisualizer.file.welcomeContent.uploadedFilesAllowedSizeDescription": "您可以上传不超过 {maxFileSize} 的文件。",
"xpack.dataVisualizer.file.welcomeContent.visualizeAndImportDataFromLogFileDescription": "上传文件、分析文件数据,然后根据需要将数据导入 Elasticsearch 索引。",
"xpack.dataVisualizer.file.welcomeContent.visualizeDataFromLogFileDescription": "上传您的文件并分析其数据。",
"xpack.dataVisualizer.file.welcomeContent.visualizeDataFromLogFileTitle": "从文件上传数据",

View file

@ -12,5 +12,6 @@ export default function ({ loadTestFile }: FtrProviderContext) {
loadTestFile(require.resolve('./has_import_permission'));
loadTestFile(require.resolve('./index_exists'));
loadTestFile(require.resolve('./preview_index_time_range'));
loadTestFile(require.resolve('./preview_tika_contents'));
});
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,49 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { ELASTIC_HTTP_VERSION_HEADER } from '@kbn/core-http-common';
import expect from '@kbn/expect';
import { FtrProviderContext } from '../../ftr_provider_context';
import { pdfBase64 } from './pdf_base64';
export default ({ getService }: FtrProviderContext) => {
const supertest = getService('supertest');
async function runRequest(base64File: string, expectedResponseCode: number = 200) {
const { body } = await supertest
.post(`/internal/file_upload/preview_tika_contents`)
.set('kbn-xsrf', 'kibana')
.set(ELASTIC_HTTP_VERSION_HEADER, '1')
.send({ base64File })
.expect(expectedResponseCode);
return body;
}
const expectedResponse = {
date: '2010-12-01T13:33:24Z',
content_type: 'application/pdf',
author: 'John',
format: 'application/pdf; version=1.5',
modified: '2010-12-01T13:33:24Z',
language: 'en',
creator_tool: 'Microsoft® Word 2010',
content: 'This is a test PDF file',
content_length: 28,
};
describe('POST /internal/file_upload/preview_tika_content', () => {
it('should return the text content from the file', async () => {
const resp = await runRequest(pdfBase64);
expect(resp).to.eql(expectedResponse);
});
it('should fail to return text when bad data is sent', async () => {
await runRequest('bad data', 500);
});
});
};