From cc6597df23c9369f2d8df16ee3616a85ca2e82b5 Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Tue, 14 May 2024 17:49:05 -0400 Subject: [PATCH] Add support for the 'Domain' database to the geoip processor (#108639) --- docs/changelog/108639.yaml | 28 +++++++++ .../ingest/processors/geoip.asciidoc | 1 + .../elasticsearch/ingest/geoip/Database.java | 10 +++- .../geoip/DatabaseReaderLazyLoader.java | 7 +++ .../ingest/geoip/GeoIpDatabase.java | 4 ++ .../ingest/geoip/GeoIpProcessor.java | 31 ++++++++++ .../ingest/geoip/GeoIpProcessorTests.java | 55 ++++++++++++++---- .../ingest/geoip/MaxMindSupportTests.java | 12 +++- .../test/resources/GeoIP2-Domain-Test.mmdb | Bin 0 -> 6449 bytes 9 files changed, 132 insertions(+), 16 deletions(-) create mode 100644 docs/changelog/108639.yaml create mode 100644 modules/ingest-geoip/src/test/resources/GeoIP2-Domain-Test.mmdb diff --git a/docs/changelog/108639.yaml b/docs/changelog/108639.yaml new file mode 100644 index 000000000000..586270c3c761 --- /dev/null +++ b/docs/changelog/108639.yaml @@ -0,0 +1,28 @@ +pr: 108639 +summary: Add support for the 'Domain' database to the geoip processor +area: Ingest Node +type: enhancement +issues: [] +highlight: + title: Add support for the 'Domain' database to the geoip processor + body: |- + Follow on to #107287 and #107377 + + Adds support for the ['GeoIP2 + Domain'](https://dev.maxmind.com/geoip/docs/databases/domain) database + from MaxMind to the `geoip` processor. + + The `geoip` processor will automatically download the [various + 'GeoLite2' + databases](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data), + but the 'GeoIP2 Domain' database is not a 'GeoLite2' database -- it's a + commercial database available to those with a suitable license from + MaxMind. + + The support that is being added for it in this PR is in line with the + support that we already have for MaxMind's 'GeoIP2 City' and 'GeoIP2 + Country' databases -- that is, one would need to arrange their own + download management via some custom endpoint or otherwise arrange for + the relevant file(s) to be in the `$ES_CONFIG/ingest-geoip` directory on + the nodes of the cluster. + notable: true diff --git a/docs/reference/ingest/processors/geoip.asciidoc b/docs/reference/ingest/processors/geoip.asciidoc index 12e7a5f10135..3348ae9cbfee 100644 --- a/docs/reference/ingest/processors/geoip.asciidoc +++ b/docs/reference/ingest/processors/geoip.asciidoc @@ -59,6 +59,7 @@ in `properties`. * If the GeoIP2 Anonymous IP database is used, then the following fields may be added under the `target_field`: `ip`, `hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, and `residential_proxy`. The fields actually added depend on what has been found and which properties were configured in `properties`. +* If the GeoIP2 Domain database is used, then the following fields may be added under the `target_field`: `ip`, and `domain`. * If the GeoIP2 Enterprise database is used, then the following fields may be added under the `target_field`: `ip`, `country_iso_code`, `country_name`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `timezone`, `location`, `asn`, `organization_name`, `network`, `hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, and `residential_proxy`. diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/Database.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/Database.java index 889b4c490d23..5a9b00dde58c 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/Database.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/Database.java @@ -75,6 +75,7 @@ enum Database { Property.RESIDENTIAL_PROXY ) ), + Domain(Set.of(Property.IP, Property.DOMAIN), Set.of(Property.DOMAIN)), Enterprise( Set.of( Property.IP, @@ -94,7 +95,8 @@ enum Database { Property.ANONYMOUS_VPN, Property.ANONYMOUS, Property.PUBLIC_PROXY, - Property.RESIDENTIAL_PROXY + Property.RESIDENTIAL_PROXY, + Property.DOMAIN ), Set.of( Property.COUNTRY_ISO_CODE, @@ -111,6 +113,7 @@ enum Database { private static final String COUNTRY_DB_SUFFIX = "-Country"; private static final String ASN_DB_SUFFIX = "-ASN"; private static final String ANONYMOUS_IP_DB_SUFFIX = "-Anonymous-IP"; + private static final String DOMAIN_DB_SUFFIX = "-Domain"; private static final String ENTERPRISE_DB_SUFFIX = "-Enterprise"; /** @@ -133,6 +136,8 @@ enum Database { database = Database.Asn; } else if (databaseType.endsWith(Database.ANONYMOUS_IP_DB_SUFFIX)) { database = Database.AnonymousIp; + } else if (databaseType.endsWith(Database.DOMAIN_DB_SUFFIX)) { + database = Database.Domain; } else if (databaseType.endsWith(Database.ENTERPRISE_DB_SUFFIX)) { database = Database.Enterprise; } @@ -209,7 +214,8 @@ enum Database { ANONYMOUS_VPN, ANONYMOUS, PUBLIC_PROXY, - RESIDENTIAL_PROXY; + RESIDENTIAL_PROXY, + DOMAIN; /** * Parses a string representation of a property into an actual Property instance. Not all properties that exist are diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java index 12f6a299e123..97b90f612ea9 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java @@ -16,6 +16,7 @@ import com.maxmind.geoip2.model.AnonymousIpResponse; import com.maxmind.geoip2.model.AsnResponse; import com.maxmind.geoip2.model.CityResponse; import com.maxmind.geoip2.model.CountryResponse; +import com.maxmind.geoip2.model.DomainResponse; import com.maxmind.geoip2.model.EnterpriseResponse; import org.apache.logging.log4j.LogManager; @@ -177,6 +178,12 @@ class DatabaseReaderLazyLoader implements GeoIpDatabase, Closeable { return getResponse(ipAddress, DatabaseReader::tryAnonymousIp); } + @Nullable + @Override + public DomainResponse getDomain(InetAddress ipAddress) { + return getResponse(ipAddress, DatabaseReader::tryDomain); + } + @Nullable @Override public EnterpriseResponse getEnterprise(InetAddress ipAddress) { diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDatabase.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDatabase.java index 088fa2b0d1fa..7cbd423a5f2e 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDatabase.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDatabase.java @@ -12,6 +12,7 @@ import com.maxmind.geoip2.model.AnonymousIpResponse; import com.maxmind.geoip2.model.AsnResponse; import com.maxmind.geoip2.model.CityResponse; import com.maxmind.geoip2.model.CountryResponse; +import com.maxmind.geoip2.model.DomainResponse; import com.maxmind.geoip2.model.EnterpriseResponse; import org.elasticsearch.core.Nullable; @@ -58,6 +59,9 @@ public interface GeoIpDatabase { @Nullable AnonymousIpResponse getAnonymousIp(InetAddress ipAddress); + @Nullable + DomainResponse getDomain(InetAddress ipAddress); + @Nullable EnterpriseResponse getEnterprise(InetAddress ipAddress); diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java index 6898e4433579..16485987176b 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java @@ -13,6 +13,7 @@ import com.maxmind.geoip2.model.AnonymousIpResponse; import com.maxmind.geoip2.model.AsnResponse; import com.maxmind.geoip2.model.CityResponse; import com.maxmind.geoip2.model.CountryResponse; +import com.maxmind.geoip2.model.DomainResponse; import com.maxmind.geoip2.model.EnterpriseResponse; import com.maxmind.geoip2.record.City; import com.maxmind.geoip2.record.Continent; @@ -175,6 +176,7 @@ public final class GeoIpProcessor extends AbstractProcessor { case Country -> retrieveCountryGeoData(geoIpDatabase, ipAddress); case Asn -> retrieveAsnGeoData(geoIpDatabase, ipAddress); case AnonymousIp -> retrieveAnonymousIpGeoData(geoIpDatabase, ipAddress); + case Domain -> retrieveDomainGeoData(geoIpDatabase, ipAddress); case Enterprise -> retrieveEnterpriseGeoData(geoIpDatabase, ipAddress); }; } @@ -384,6 +386,28 @@ public final class GeoIpProcessor extends AbstractProcessor { return geoData; } + private Map retrieveDomainGeoData(GeoIpDatabase geoIpDatabase, InetAddress ipAddress) { + DomainResponse response = geoIpDatabase.getDomain(ipAddress); + if (response == null) { + return Map.of(); + } + + String domain = response.getDomain(); + + Map geoData = new HashMap<>(); + for (Property property : this.properties) { + switch (property) { + case IP -> geoData.put("ip", NetworkAddress.format(ipAddress)); + case DOMAIN -> { + if (domain != null) { + geoData.put("domain", domain); + } + } + } + } + return geoData; + } + private Map retrieveEnterpriseGeoData(GeoIpDatabase geoIpDatabase, InetAddress ipAddress) { EnterpriseResponse response = geoIpDatabase.getEnterprise(ipAddress); if (response == null) { @@ -407,6 +431,8 @@ public final class GeoIpProcessor extends AbstractProcessor { boolean isPublicProxy = response.getTraits().isPublicProxy(); boolean isResidentialProxy = response.getTraits().isResidentialProxy(); + String domain = response.getTraits().getDomain(); + Map geoData = new HashMap<>(); for (Property property : this.properties) { switch (property) { @@ -500,6 +526,11 @@ public final class GeoIpProcessor extends AbstractProcessor { case RESIDENTIAL_PROXY -> { geoData.put("residential_proxy", isResidentialProxy); } + case DOMAIN -> { + if (domain != null) { + geoData.put("domain", domain); + } + } } } return geoData; diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java index ec77cacbdb6b..cd6737cced30 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java @@ -336,8 +336,36 @@ public class GeoIpProcessorTests extends ESTestCase { assertThat(geoData.get("residential_proxy"), equalTo(true)); } + public void testDomain() throws Exception { + String ip = "69.219.64.2"; + GeoIpProcessor processor = new GeoIpProcessor( + randomAlphaOfLength(10), + null, + "source_field", + loader("/GeoIP2-Domain-Test.mmdb"), + () -> true, + "target_field", + ALL_PROPERTIES, + false, + false, + "filename" + ); + + Map document = new HashMap<>(); + document.put("source_field", ip); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + + assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo(ip)); + @SuppressWarnings("unchecked") + Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field"); + assertThat(geoData.size(), equalTo(2)); + assertThat(geoData.get("ip"), equalTo(ip)); + assertThat(geoData.get("domain"), equalTo("ameritech.net")); + } + public void testEnterprise() throws Exception { - String ip = "2.125.160.216"; + String ip = "74.209.24.4"; GeoIpProcessor processor = new GeoIpProcessor( randomAlphaOfLength(10), null, @@ -359,26 +387,29 @@ public class GeoIpProcessorTests extends ESTestCase { assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo(ip)); @SuppressWarnings("unchecked") Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field"); - assertThat(geoData.size(), equalTo(16)); + assertThat(geoData.size(), equalTo(19)); assertThat(geoData.get("ip"), equalTo(ip)); - assertThat(geoData.get("country_iso_code"), equalTo("GB")); - assertThat(geoData.get("country_name"), equalTo("United Kingdom")); - assertThat(geoData.get("continent_name"), equalTo("Europe")); - assertThat(geoData.get("region_iso_code"), equalTo("GB-WBK")); - assertThat(geoData.get("region_name"), equalTo("West Berkshire")); - assertThat(geoData.get("city_name"), equalTo("Boxford")); - assertThat(geoData.get("timezone"), equalTo("Europe/London")); + assertThat(geoData.get("country_iso_code"), equalTo("US")); + assertThat(geoData.get("country_name"), equalTo("United States")); + assertThat(geoData.get("continent_name"), equalTo("North America")); + assertThat(geoData.get("region_iso_code"), equalTo("US-NY")); + assertThat(geoData.get("region_name"), equalTo("New York")); + assertThat(geoData.get("city_name"), equalTo("Chatham")); + assertThat(geoData.get("timezone"), equalTo("America/New_York")); Map location = new HashMap<>(); - location.put("lat", 51.75); - location.put("lon", -1.25); + location.put("lat", 42.3478); + location.put("lon", -73.5549); assertThat(geoData.get("location"), equalTo(location)); - assertThat(geoData.get("network"), equalTo("2.125.160.216/29")); + assertThat(geoData.get("asn"), equalTo(14671L)); + assertThat(geoData.get("organization_name"), equalTo("FairPoint Communications")); + assertThat(geoData.get("network"), equalTo("74.209.16.0/20")); assertThat(geoData.get("hosting_provider"), equalTo(false)); assertThat(geoData.get("tor_exit_node"), equalTo(false)); assertThat(geoData.get("anonymous_vpn"), equalTo(false)); assertThat(geoData.get("anonymous"), equalTo(false)); assertThat(geoData.get("public_proxy"), equalTo(false)); assertThat(geoData.get("residential_proxy"), equalTo(false)); + assertThat(geoData.get("domain"), equalTo("frpt.net")); } public void testAddressIsNotInTheDatabase() throws Exception { diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MaxMindSupportTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MaxMindSupportTests.java index 4e6e1d11c0fd..07ea7f59eb52 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MaxMindSupportTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MaxMindSupportTests.java @@ -201,6 +201,9 @@ public class MaxMindSupportTests extends ESTestCase { "traits.userType" ); + private static final Set DOMAIN_SUPPORTED_FIELDS = Set.of("domain"); + private static final Set DOMAIN_UNSUPPORTED_FIELDS = Set.of("ipAddress", "network"); + private static final Set ENTERPRISE_SUPPORTED_FIELDS = Set.of( "city.name", "continent.name", @@ -215,6 +218,7 @@ public class MaxMindSupportTests extends ESTestCase { "traits.anonymousVpn", "traits.autonomousSystemNumber", "traits.autonomousSystemOrganization", + "traits.domain", "traits.hostingProvider", "traits.network", "traits.publicProxy", @@ -268,7 +272,6 @@ public class MaxMindSupportTests extends ESTestCase { "traits.anonymousProxy", "traits.anycast", "traits.connectionType", - "traits.domain", "traits.ipAddress", "traits.isp", "traits.legitimateProxy", @@ -290,6 +293,8 @@ public class MaxMindSupportTests extends ESTestCase { CITY_SUPPORTED_FIELDS, Database.Country, COUNTRY_SUPPORTED_FIELDS, + Database.Domain, + DOMAIN_SUPPORTED_FIELDS, Database.Enterprise, ENTERPRISE_SUPPORTED_FIELDS ); @@ -302,6 +307,8 @@ public class MaxMindSupportTests extends ESTestCase { CITY_UNSUPPORTED_FIELDS, Database.Country, COUNTRY_UNSUPPORTED_FIELDS, + Database.Domain, + DOMAIN_UNSUPPORTED_FIELDS, Database.Enterprise, ENTERPRISE_UNSUPPORTED_FIELDS ); @@ -314,13 +321,14 @@ public class MaxMindSupportTests extends ESTestCase { CityResponse.class, Database.Country, CountryResponse.class, + Database.Domain, + DomainResponse.class, Database.Enterprise, EnterpriseResponse.class ); private static final Set> KNOWN_UNSUPPORTED_RESPONSE_CLASSES = Set.of( ConnectionTypeResponse.class, - DomainResponse.class, IspResponse.class, IpRiskResponse.class ); diff --git a/modules/ingest-geoip/src/test/resources/GeoIP2-Domain-Test.mmdb b/modules/ingest-geoip/src/test/resources/GeoIP2-Domain-Test.mmdb new file mode 100644 index 0000000000000000000000000000000000000000..d21c2a93df7d475a043112d9d8617ff545c9c790 GIT binary patch literal 6449 zcmZYC2XvIh7Qpd4vk^p)rt}t2DFTrqMd^xQfB*_s#IV`@5*Cu(Wp@L?ir9Pas9;y@ zy(`$CV(%Th&w{;|cW3^~&Ux=~4(GS`&Ykj=>`g?BNNg7otL&!#_0!cNWkg#*GiVN5 zYDrq81<_W}5?aC5&>Gr6TWAOEVH?;MIzUJ01f8J^bcJrv9eO}d=mq7_8~VU@us!Sm zeW4%phXJr741_^27%E^V7y?6K7z~GAU1egeuz&$(t zzdKBZJzxqx=4+P9>5SBhvlRDwBu-M@uM}+7|Cai#?L{^5e zquI8Wv18#lk>h=doS;jeM)E{B2~LJn;8agc@0s2`cMWI2nQ#`I4d=kQ9ydqM6FI*W zx#<^b@@A2XNNPeZ7FiXdOW5{OxJ+cVACom&d9BFh8m(aL3Xv;)Bv*yx)r?)^ag{4{ z-gR(2tc4rkMv)WA-Y54&Jf+jes{w?BKI==zEE;MV-LWC zA`dZEyn~PE4sS5F4%Wk?B9Dbt9w+&P$de)Yl*rRM{RPJU13FE&YzjyCHgyv%C)pRjr75N z4?n<<@DuzDzj)jZZo+Sh?x&yRclbl(&yf5}NwyKwR-?XR+Oc$d*ao(R4$u)g>4Y9)I&1WTm@XcF zmabyDg{V9GX-Q8%Pm9I$5>p8Ty~!b|9BB zNX+0+Ss`X8jrg)ML&OXX(J(Q?OVQ?%5n@LA>1Gs+hFv^dvt6n^Ympmd%9vVl`zfYZh`hclMqwoE6q%e4&*Ge z#LOnS7wiplU@q+A@!RezWVvmq;jDc`k&uu5y{0+FJ<})n+8Zj2C}dOa$@pqRPeZYZug~O+ht;!NG=EZ zpE;E1FgP5JfFnI+`s$k%;yDUd!qFbL(Xm=`vKZ}s8u?2(Ud#!saw42m8rxh#|1@nDfHe`6Mq8b74qcMDk)Wt9;U262|C%<}ylF zi&;a-<+|-vVy@8Nt+y9*rN>>7i>~%1`bo?+E+OVxk304C8qtBwS}`|hbeEVL!+tlh z(amrR+zPkB?QjR&>2XWn&B^Y8dx8FE?l0}2mF}G8K`{@NlICGCk1$~!tcOQK$z#l; z|CuLZxg)(`d#2|$b0ZUd;lBaL-+_jhEH_qFU8RR%x5G&2ko_P zP3?WfD+-v<|I9aB<|Z-UGT}S;9)2iIH$PJOlbD|w`vratCBOL+x9uMx`KOq_LiD$o ze|%)6lq{aiTCp+4%3up7Fiju1OCjE!P!wAfu( zY*+nCm`-wx*s(sc<4VcRV-v(qWWuDdn4WWYl9NM{{%5DK=2WqJQc@X8wEwwVP~3I~ zQI(X{gpye#XN#r(`3ZZIoC9;k(*NwfVcvd>?JxF#kUUWAJRjMEV15`oSjrkhw1DaK zKU+hTfFwk~FCey#Xd$G4{%7fbww`2zM;3_9aEL4{fgI$4{%1LvT}pBpG{JJ9|Jg%{ z4uivi{%7fbb_L0!U?m(4$AI=fz0vkKI37-b6M_C`PbNAAPKDFpbkP3i7CRHpg0tZq zI2X=?^Wg%x5H5m?VHI2gm%?SR8rHz&a0OfmS9#pKN zBisZx!!2+t+@`BM=cbCiUF;ns?}WSHZn#HF9u<2p(S2|~JOB^EL+~&>0_$MCM_(tm zr{DH5v5$v{{%4;Q`&3BM|1AB_J|p&7KV~U1zv^M;B|Pz z{Rw};-|&woCSbsVW_6_){VzuU zi)sIJ$>y*nwD6RzBx(t*U~6a%`UqofB-WOw9khpSz?Ez(u?|EXp%Zk5F3=U+gl-b+ zPShhLdrGVqQ91MuV}1Oiis|2fZaV!hrv0yPDCsA${zL;paz}{`4ACH_50+R3$(>*b z425AZ9Cn5gFcL<=XiwQ0MDAq#{TCaN9dtwmzPSHeci%z1BoLU#Mx!r_xbAUo4xIFT`{8sr2GhKD+slbs2a3ibYx7d0M_M zQz)b=GPyc;j(ly-Rju8ysVd%7RlJ2?%&SSIy_tIne-a_ zzoV&i7+pQRrZH8Ytd6po#KJNgt?HPpO2!NEns`2{E-cSRRa2u(U`>uXifM3 E0HuCr2><{9 literal 0 HcmV?d00001