mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 09:48:58 -04:00
Don't submit empty seed_urls or sitemap_urls when making a partial crawl request (#126972)
Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
de3ae9bfec
commit
a24b1fc957
3 changed files with 37 additions and 9 deletions
|
@ -297,7 +297,25 @@ describe('CrawlCustomSettingsFlyoutLogic', () => {
|
|||
});
|
||||
|
||||
describe('startCustomCrawl', () => {
|
||||
it('starts a custom crawl with the user set values', async () => {
|
||||
it('can start a custom crawl for selected domains', async () => {
|
||||
mount({
|
||||
includeSitemapsInRobotsTxt: true,
|
||||
maxCrawlDepth: 5,
|
||||
selectedDomainUrls: ['https://www.elastic.co', 'https://swiftype.com'],
|
||||
});
|
||||
jest.spyOn(CrawlerLogic.actions, 'startCrawl');
|
||||
|
||||
CrawlCustomSettingsFlyoutLogic.actions.startCustomCrawl();
|
||||
await nextTick();
|
||||
|
||||
expect(CrawlerLogic.actions.startCrawl).toHaveBeenCalledWith({
|
||||
domain_allowlist: ['https://www.elastic.co', 'https://swiftype.com'],
|
||||
max_crawl_depth: 5,
|
||||
sitemap_discovery_disabled: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('can start a custom crawl selected domains, sitemaps, and seed urls', async () => {
|
||||
mount({
|
||||
includeSitemapsInRobotsTxt: true,
|
||||
maxCrawlDepth: 5,
|
||||
|
|
|
@ -11,7 +11,7 @@ import { flashAPIErrors } from '../../../../../shared/flash_messages';
|
|||
import { HttpLogic } from '../../../../../shared/http';
|
||||
import { EngineLogic } from '../../../engine';
|
||||
|
||||
import { CrawlerLogic } from '../../crawler_logic';
|
||||
import { CrawlerLogic, CrawlRequestOverrides } from '../../crawler_logic';
|
||||
import { DomainConfig, DomainConfigFromServer } from '../../types';
|
||||
import { domainConfigServerToClient } from '../../utils';
|
||||
import { extractDomainAndEntryPointFromUrl } from '../add_domain/utils';
|
||||
|
@ -213,13 +213,23 @@ export const CrawlCustomSettingsFlyoutLogic = kea<
|
|||
actions.fetchDomainConfigData();
|
||||
},
|
||||
startCustomCrawl: () => {
|
||||
CrawlerLogic.actions.startCrawl({
|
||||
domain_allowlist: values.selectedDomainUrls,
|
||||
max_crawl_depth: values.maxCrawlDepth,
|
||||
seed_urls: [...values.selectedEntryPointUrls, ...values.customEntryPointUrls],
|
||||
sitemap_urls: [...values.selectedSitemapUrls, ...values.customSitemapUrls],
|
||||
const overrides: CrawlRequestOverrides = {
|
||||
sitemap_discovery_disabled: !values.includeSitemapsInRobotsTxt,
|
||||
});
|
||||
max_crawl_depth: values.maxCrawlDepth,
|
||||
domain_allowlist: values.selectedDomainUrls,
|
||||
};
|
||||
|
||||
const seedUrls = [...values.selectedEntryPointUrls, ...values.customEntryPointUrls];
|
||||
if (seedUrls.length > 0) {
|
||||
overrides.seed_urls = seedUrls;
|
||||
}
|
||||
|
||||
const sitemapUrls = [...values.selectedSitemapUrls, ...values.customSitemapUrls];
|
||||
if (sitemapUrls.length > 0) {
|
||||
overrides.sitemap_urls = sitemapUrls;
|
||||
}
|
||||
|
||||
CrawlerLogic.actions.startCrawl(overrides);
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
|
|
@ -33,7 +33,7 @@ const ACTIVE_STATUSES = [
|
|||
CrawlerStatus.Canceling,
|
||||
];
|
||||
|
||||
interface CrawlRequestOverrides {
|
||||
export interface CrawlRequestOverrides {
|
||||
domain_allowlist?: string[];
|
||||
max_crawl_depth?: number;
|
||||
seed_urls?: string[];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue