Change how PeriodicPollers log their exception

In a busy logstash install and the way we interact with core stats from
the jvm or the os, the poller can timeout. This exception is logged as
an error. but this shouldn't impact normal operation.

This PR changes the following:

- Change the interval for 5s instead of 1s
- Make the timeout bigger 160s instead of 60
- Concurrent::TimeoutError will be logged using debug instead of error
- Any other exception will use the error level.
- add tests

Fixes: #6160
Fixes: #6158

Fixes #6169
This commit is contained in:
Pier-Hugues Pellerin 2016-11-02 10:36:33 -04:00
parent de5c23cb0a
commit 21b7a49148
2 changed files with 54 additions and 8 deletions

View file

@ -8,8 +8,8 @@ module LogStash module Instrument module PeriodicPoller
include LogStash::Util::Loggable
DEFAULT_OPTIONS = {
:polling_interval => 1,
:polling_timeout => 60
:polling_interval => 5,
:polling_timeout => 120
}
public
@ -22,11 +22,25 @@ module LogStash module Instrument module PeriodicPoller
def update(time, result, exception)
return unless exception
logger.error("PeriodicPoller: exception",
:poller => self,
:result => result,
:exception => exception,
:executed_at => time)
if exception.is_a?(Concurrent::TimeoutError)
# On a busy system this can happen, we just log it as a debug
# event instead of an error, Some of the JVM calls can take a long time or block.
logger.debug("PeriodicPoller: Timeout exception",
:poller => self,
:result => result,
:polling_timeout => @options[:polling_timeout],
:polling_interval => @options[:polling_interval],
:exception => exception.class,
:executed_at => time)
else
logger.error("PeriodicPoller: exception",
:poller => self,
:result => result,
:exception => exception.class,
:polling_timeout => @options[:polling_timeout],
:polling_interval => @options[:polling_interval],
:executed_at => time)
end
end
def collect
@ -37,7 +51,7 @@ module LogStash module Instrument module PeriodicPoller
logger.debug("PeriodicPoller: Starting",
:polling_interval => @options[:polling_interval],
:polling_timeout => @options[:polling_timeout]) if logger.debug?
collect # Collect data right away if possible
@task.execute
end

View file

@ -0,0 +1,32 @@
# encoding: utf-8
require "logstash/instrument/periodic_poller/base"
require "logstash/instrument/metric"
require "logstash/instrument/collector"
describe LogStash::Instrument::PeriodicPoller::Base do
let(:metric) { LogStash::Instrument::Metric.new(LogStash::Instrument::Collector.new) }
let(:options) { {} }
subject { described_class.new(metric, options) }
describe "#update" do
it "logs an timeout exception to debug level" do
exception = Concurrent::TimeoutError.new
expect(subject.logger).to receive(:debug).with(anything, hash_including(:exception => exception.class))
subject.update(Time.now, "hola", exception)
end
it "logs any other exception to error level" do
exception = Class.new
expect(subject.logger).to receive(:error).with(anything, hash_including(:exception => exception.class))
subject.update(Time.now, "hola", exception)
end
it "doesnt log anything when no exception is received" do
exception = Concurrent::TimeoutError.new
expect(subject.logger).not_to receive(:debug).with(anything)
expect(subject.logger).not_to receive(:error).with(anything)
subject.update(Time.now, "hola", exception)
end
end
end