Don't submit empty seed_urls or sitemap_urls when making a partial crawl request (#126972)

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Byron Hulcher 2022-03-07 15:13:04 -05:00 committed by GitHub
parent de3ae9bfec
commit a24b1fc957
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 37 additions and 9 deletions

View file

@ -297,7 +297,25 @@ describe('CrawlCustomSettingsFlyoutLogic', () => {
});
describe('startCustomCrawl', () => {
it('starts a custom crawl with the user set values', async () => {
it('can start a custom crawl for selected domains', async () => {
mount({
includeSitemapsInRobotsTxt: true,
maxCrawlDepth: 5,
selectedDomainUrls: ['https://www.elastic.co', 'https://swiftype.com'],
});
jest.spyOn(CrawlerLogic.actions, 'startCrawl');
CrawlCustomSettingsFlyoutLogic.actions.startCustomCrawl();
await nextTick();
expect(CrawlerLogic.actions.startCrawl).toHaveBeenCalledWith({
domain_allowlist: ['https://www.elastic.co', 'https://swiftype.com'],
max_crawl_depth: 5,
sitemap_discovery_disabled: false,
});
});
it('can start a custom crawl selected domains, sitemaps, and seed urls', async () => {
mount({
includeSitemapsInRobotsTxt: true,
maxCrawlDepth: 5,

View file

@ -11,7 +11,7 @@ import { flashAPIErrors } from '../../../../../shared/flash_messages';
import { HttpLogic } from '../../../../../shared/http';
import { EngineLogic } from '../../../engine';
import { CrawlerLogic } from '../../crawler_logic';
import { CrawlerLogic, CrawlRequestOverrides } from '../../crawler_logic';
import { DomainConfig, DomainConfigFromServer } from '../../types';
import { domainConfigServerToClient } from '../../utils';
import { extractDomainAndEntryPointFromUrl } from '../add_domain/utils';
@ -213,13 +213,23 @@ export const CrawlCustomSettingsFlyoutLogic = kea<
actions.fetchDomainConfigData();
},
startCustomCrawl: () => {
CrawlerLogic.actions.startCrawl({
domain_allowlist: values.selectedDomainUrls,
max_crawl_depth: values.maxCrawlDepth,
seed_urls: [...values.selectedEntryPointUrls, ...values.customEntryPointUrls],
sitemap_urls: [...values.selectedSitemapUrls, ...values.customSitemapUrls],
const overrides: CrawlRequestOverrides = {
sitemap_discovery_disabled: !values.includeSitemapsInRobotsTxt,
});
max_crawl_depth: values.maxCrawlDepth,
domain_allowlist: values.selectedDomainUrls,
};
const seedUrls = [...values.selectedEntryPointUrls, ...values.customEntryPointUrls];
if (seedUrls.length > 0) {
overrides.seed_urls = seedUrls;
}
const sitemapUrls = [...values.selectedSitemapUrls, ...values.customSitemapUrls];
if (sitemapUrls.length > 0) {
overrides.sitemap_urls = sitemapUrls;
}
CrawlerLogic.actions.startCrawl(overrides);
},
}),
});

View file

@ -33,7 +33,7 @@ const ACTIVE_STATUSES = [
CrawlerStatus.Canceling,
];
interface CrawlRequestOverrides {
export interface CrawlRequestOverrides {
domain_allowlist?: string[];
max_crawl_depth?: number;
seed_urls?: string[];