Deprecating regexp field for LOGSTASH-756

- Deprecating fields to columns
- Deprecating regexp fields to source
- Adding target for sub field specifcation
- Added and modified rspec checks
This commit is contained in:
Richard Pijnenburg 2013-01-12 11:52:32 +01:00
parent 834fb6abc0
commit b489155ca6
2 changed files with 177 additions and 34 deletions

View file

@ -14,32 +14,65 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
# The CSV data in the value of the source field will be expanded into a
# datastructure in the "dest" field. Note: if the "dest" field
# already exists, it will be overridden.
config /[A-Za-z0-9_-]+/, :validate => :string
config /[A-Za-z0-9_-]+/, :validate => :string, :deprecated => true
# Define a list of field names (in the order they appear in the CSV,
# as if it were a header line). If this is not specified or there
# are not enough fields specified, the default field name is "fieldN"
# (where N is the field number, starting from 1).
# Optional.
config :fields, :validate => :array, :default => []
config :fields, :validate => :array, :deprecated => true
# The CSV data in the value of the source field will be expanded into a
# datastructure.
# This deprecates the regexp '[A-Za-z0-9_-]' variable.
config :source, :validate => :string
# Define a list of field names (in the order they appear in the CSV,
# as if it were a header line). If this is not specified or there
# are not enough fields specified, the default field name is "fieldN"
# (where N is the field number, starting from 1).
# This deprecates the 'fields' variable.
# Optional.
config :columns, :validate => :array, :default => []
# Define the column separator value. If this is not specified the default
# is a comma ','
# Optional.
config :separator, :validate => :string, :default => ","
# Define target for placing the data
# Defaults to @fields
# Optional
config :target, :validate => :string, :default => "@fields"
public
def register
@csv = {}
#TODO(electrical): At some point 'fields' and the regexp parts need to be removed.
if @fields
if @columns
@logger.error("'fields' and 'columns' are the same setting, but 'fields' is deprecated. Please use only 'columns'")
end
@columns = @fields
end
@csv = {}
#TODO(electrical): At some point this can be removed
@config.each do |field, dest|
next if (RESERVED + ["fields", "separator"]).member?(field)
next if (RESERVED + ["fields", "separator", "source", "columns", "target"]).member?(field)
@csv[field] = dest
end
# Default to parsing @message and dumping into @fields
#TODO(electrical): Will we make @source required or not?
if @source
#Add the source field to the list.
@csv[@source] = @target
end
# Default to parsing @message and dumping into @target
if @csv.empty?
@csv["@message"] = "@fields"
@csv["@message"] = @target
end
end # def register
@ -50,6 +83,7 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
@logger.debug("Running csv filter", :event => event)
matches = 0
#TODO(electrical): When old stuff can be removed. this block will need to be changed also
@csv.each do |key, dest|
if event[key]
if event[key].is_a?(String)
@ -68,7 +102,7 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
values = CSV.parse_line(raw, {:col_sep => @separator})
data = {}
values.each_index do |i|
field_name = @fields[i] || "field#{i+1}"
field_name = @columns[i] || "column#{i+1}"
data[field_name] = values[i]
end

View file

@ -14,13 +14,28 @@ describe LogStash::Filters::CSV do
CONFIG
sample "big,bird,sesame street" do
insist { subject["field1"] } == "big"
insist { subject["field2"] } == "bird"
insist { subject["field3"] } == "sesame street"
insist { subject["column1"] } == "big"
insist { subject["column2"] } == "bird"
insist { subject["column3"] } == "sesame street"
end
end
describe "given fields" do
describe "custom separator" do
config <<-CONFIG
filter {
csv {
separator => ";"
}
}
CONFIG
sample "big,bird;sesame street" do
insist { subject["column1"] } == "big,bird"
insist { subject["column2"] } == "sesame street"
end
end
describe "given fields ( deprecated test )" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
@ -38,22 +53,7 @@ describe LogStash::Filters::CSV do
end
end
describe "custom separator" do
config <<-CONFIG
filter {
csv {
separator => ";"
}
}
CONFIG
sample "big,bird;sesame street" do
insist { subject["field1"] } == "big,bird"
insist { subject["field2"] } == "sesame street"
end
end
describe "parse csv with more data than defined field names" do
describe "parse csv with more data than defined field names ( deprecated test )" do
config <<-CONFIG
filter {
csv {
@ -65,11 +65,11 @@ describe LogStash::Filters::CSV do
sample "val1,val2,val3" do
insist { subject["custom1"] } == "val1"
insist { subject["custom2"] } == "val2"
insist { subject["field3"] } == "val3"
insist { subject["column3"] } == "val3"
end
end
describe "parse csv from a given field without field names" do
describe "parse csv from a given field without field names ( deprecated test )" do
config <<-CONFIG
filter {
csv {
@ -79,13 +79,13 @@ describe LogStash::Filters::CSV do
CONFIG
sample({"@fields" => {"raw" => "val1,val2,val3"}}) do
insist { subject["data"]["field1"] } == "val1"
insist { subject["data"]["field2"] } == "val2"
insist { subject["data"]["field3"] } == "val3"
insist { subject["data"]["column1"] } == "val1"
insist { subject["data"]["column2"] } == "val2"
insist { subject["data"]["column3"] } == "val3"
end
end
describe "parse csv from a given field with field names" do
describe "parse csv from a given field with field names ( deprecated test )" do
config <<-CONFIG
filter {
csv {
@ -102,7 +102,7 @@ describe LogStash::Filters::CSV do
end
end
describe "fail to parse any data in a multi-value field" do
describe "fail to parse any data in a multi-value field ( deprecated test )" do
config <<-CONFIG
filter {
csv {
@ -115,4 +115,113 @@ describe LogStash::Filters::CSV do
insist { subject["data"] } == nil
end
end
# New tests
describe "fail to parse any data in a multi-value field ( deprecated test )" do
config <<-CONFIG
filter {
csv {
source => "datain"
target => "data"
}
}
CONFIG
sample({"@fields" => {"datain" => ["val1,val2,val3", "val1,val2,val3"]}}) do
insist { subject["data"] } == nil
end
end
describe "given columns" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
csv {
columns => ["first", "last", "address" ]
}
}
CONFIG
sample "big,bird,sesame street" do
insist { subject["first"] } == "big"
insist { subject["last"] } == "bird"
insist { subject["address"] } == "sesame street"
end
end
describe "parse csv with more data than defined column names" do
config <<-CONFIG
filter {
csv {
columns => ["custom1", "custom2"]
}
}
CONFIG
sample "val1,val2,val3" do
insist { subject["custom1"] } == "val1"
insist { subject["custom2"] } == "val2"
insist { subject["column3"] } == "val3"
end
end
describe "parse csv from a given source with column names" do
config <<-CONFIG
filter {
csv {
source => "datafield"
columns => ["custom1", "custom2", "custom3"]
}
}
CONFIG
sample({"@fields" => {"datafield" => "val1,val2,val3"}}) do
insist { subject["custom1"] } == "val1"
insist { subject["custom2"] } == "val2"
insist { subject["custom3"] } == "val3"
end
end
describe "given target" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
csv {
target => "data"
}
}
CONFIG
sample "big,bird,sesame street" do
insist { subject["data"]["column1"] } == "big"
insist { subject["data"]["column2"] } == "bird"
insist { subject["data"]["column3"] } == "sesame street"
end
end
describe "given target and source" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
csv {
source => "datain"
target => "data"
}
}
CONFIG
sample({"@fields" => {"datain" => "big,bird,sesame street"}}) do
insist { subject["data"]["column1"] } == "big"
insist { subject["data"]["column2"] } == "bird"
insist { subject["data"]["column3"] } == "sesame street"
end
end
end