mirror of
https://github.com/elastic/kibana.git
synced 2025-04-25 02:09:32 -04:00
Don't submit empty seed_urls or sitemap_urls when making a partial crawl request (#126972)
Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
de3ae9bfec
commit
a24b1fc957
3 changed files with 37 additions and 9 deletions
|
@ -297,7 +297,25 @@ describe('CrawlCustomSettingsFlyoutLogic', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('startCustomCrawl', () => {
|
describe('startCustomCrawl', () => {
|
||||||
it('starts a custom crawl with the user set values', async () => {
|
it('can start a custom crawl for selected domains', async () => {
|
||||||
|
mount({
|
||||||
|
includeSitemapsInRobotsTxt: true,
|
||||||
|
maxCrawlDepth: 5,
|
||||||
|
selectedDomainUrls: ['https://www.elastic.co', 'https://swiftype.com'],
|
||||||
|
});
|
||||||
|
jest.spyOn(CrawlerLogic.actions, 'startCrawl');
|
||||||
|
|
||||||
|
CrawlCustomSettingsFlyoutLogic.actions.startCustomCrawl();
|
||||||
|
await nextTick();
|
||||||
|
|
||||||
|
expect(CrawlerLogic.actions.startCrawl).toHaveBeenCalledWith({
|
||||||
|
domain_allowlist: ['https://www.elastic.co', 'https://swiftype.com'],
|
||||||
|
max_crawl_depth: 5,
|
||||||
|
sitemap_discovery_disabled: false,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('can start a custom crawl selected domains, sitemaps, and seed urls', async () => {
|
||||||
mount({
|
mount({
|
||||||
includeSitemapsInRobotsTxt: true,
|
includeSitemapsInRobotsTxt: true,
|
||||||
maxCrawlDepth: 5,
|
maxCrawlDepth: 5,
|
||||||
|
|
|
@ -11,7 +11,7 @@ import { flashAPIErrors } from '../../../../../shared/flash_messages';
|
||||||
import { HttpLogic } from '../../../../../shared/http';
|
import { HttpLogic } from '../../../../../shared/http';
|
||||||
import { EngineLogic } from '../../../engine';
|
import { EngineLogic } from '../../../engine';
|
||||||
|
|
||||||
import { CrawlerLogic } from '../../crawler_logic';
|
import { CrawlerLogic, CrawlRequestOverrides } from '../../crawler_logic';
|
||||||
import { DomainConfig, DomainConfigFromServer } from '../../types';
|
import { DomainConfig, DomainConfigFromServer } from '../../types';
|
||||||
import { domainConfigServerToClient } from '../../utils';
|
import { domainConfigServerToClient } from '../../utils';
|
||||||
import { extractDomainAndEntryPointFromUrl } from '../add_domain/utils';
|
import { extractDomainAndEntryPointFromUrl } from '../add_domain/utils';
|
||||||
|
@ -213,13 +213,23 @@ export const CrawlCustomSettingsFlyoutLogic = kea<
|
||||||
actions.fetchDomainConfigData();
|
actions.fetchDomainConfigData();
|
||||||
},
|
},
|
||||||
startCustomCrawl: () => {
|
startCustomCrawl: () => {
|
||||||
CrawlerLogic.actions.startCrawl({
|
const overrides: CrawlRequestOverrides = {
|
||||||
domain_allowlist: values.selectedDomainUrls,
|
|
||||||
max_crawl_depth: values.maxCrawlDepth,
|
|
||||||
seed_urls: [...values.selectedEntryPointUrls, ...values.customEntryPointUrls],
|
|
||||||
sitemap_urls: [...values.selectedSitemapUrls, ...values.customSitemapUrls],
|
|
||||||
sitemap_discovery_disabled: !values.includeSitemapsInRobotsTxt,
|
sitemap_discovery_disabled: !values.includeSitemapsInRobotsTxt,
|
||||||
});
|
max_crawl_depth: values.maxCrawlDepth,
|
||||||
|
domain_allowlist: values.selectedDomainUrls,
|
||||||
|
};
|
||||||
|
|
||||||
|
const seedUrls = [...values.selectedEntryPointUrls, ...values.customEntryPointUrls];
|
||||||
|
if (seedUrls.length > 0) {
|
||||||
|
overrides.seed_urls = seedUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
const sitemapUrls = [...values.selectedSitemapUrls, ...values.customSitemapUrls];
|
||||||
|
if (sitemapUrls.length > 0) {
|
||||||
|
overrides.sitemap_urls = sitemapUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
CrawlerLogic.actions.startCrawl(overrides);
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
|
@ -33,7 +33,7 @@ const ACTIVE_STATUSES = [
|
||||||
CrawlerStatus.Canceling,
|
CrawlerStatus.Canceling,
|
||||||
];
|
];
|
||||||
|
|
||||||
interface CrawlRequestOverrides {
|
export interface CrawlRequestOverrides {
|
||||||
domain_allowlist?: string[];
|
domain_allowlist?: string[];
|
||||||
max_crawl_depth?: number;
|
max_crawl_depth?: number;
|
||||||
seed_urls?: string[];
|
seed_urls?: string[];
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue