Add more node stats + refactor stats internals

Add node stats mem endpoint

Fixes #5381
This commit is contained in:
Andrew Cholakian 2016-05-27 15:42:07 -05:00
parent 829af589cd
commit 86f776fafa
13 changed files with 324 additions and 100 deletions

View file

@ -46,7 +46,6 @@ class LogStash::Agent
@collect_metric = setting("metric.collect")
# Create the collectors and configured it with the library
configure_metrics_collectors
end

View file

@ -3,6 +3,7 @@ require "logstash/api/service"
require "logstash/api/commands/system/basicinfo_command"
require "logstash/api/commands/system/plugins_command"
require "logstash/api/commands/stats"
require "logstash/api/commands/node"
module LogStash
@ -15,7 +16,8 @@ module LogStash
@factory = {
:system_basic_info => ::LogStash::Api::Commands::System::BasicInfo,
:plugins_command => ::LogStash::Api::Commands::System::Plugins,
:stats => ::LogStash::Api::Commands::Stats
:stats => ::LogStash::Api::Commands::Stats,
:node => ::LogStash::Api::Commands::Node
}
end

View file

@ -19,6 +19,10 @@ module LogStash
def started_at
(LogStash::Agent::STARTED_AT.to_f * 1000.0).to_i
end
def extract_metrics(path, *keys)
service.extract_metrics(path, *keys)
end
end
end
end

View file

@ -0,0 +1,117 @@
require "logstash/api/commands/base"
module LogStash
module Api
module Commands
class Node < Commands::Base
def all
{
:pipeline => pipeline,
:os => os,
:jvm => jvm,
:hot_threads => hot_threads
}
end
def pipeline
extract_metrics(
[:stats, :pipelines, :main, :config],
:workers, :batch_size, :batch_delay
)
end
def os
{
:name => java.lang.System.getProperty("os.name"),
:arch => java.lang.System.getProperty("os.arch"),
:version => java.lang.System.getProperty("os.version"),
:available_processors => java.lang.Runtime.getRuntime().availableProcessors()
}
end
def jvm
memory_bean = ManagementFactory.getMemoryMXBean()
{
:pid => ManagementFactory.getRuntimeMXBean().getName().split("@").first.to_i,
:version => java.lang.System.getProperty("java.version"),
:vm_name => java.lang.System.getProperty("java.vm.name"),
:vm_version => java.lang.System.getProperty("java.version"),
:vm_vendor => java.lang.System.getProperty("java.vendor"),
:vm_name => java.lang.System.getProperty("java.vm.name"),
:start_time_in_millis => started_at,
:mem => {
:heap_init_in_bytes => (memory_bean.getHeapMemoryUsage().getInit() < 0 ? 0 : memory_bean.getHeapMemoryUsage().getInit()),
:heap_max_in_bytes => (memory_bean.getHeapMemoryUsage().getMax() < 0 ? 0 : memory_bean.getHeapMemoryUsage().getMax()),
:non_heap_init_in_bytes => (memory_bean.getNonHeapMemoryUsage().getInit() < 0 ? 0 : memory_bean.getNonHeapMemoryUsage().getInit()),
:non_heap_max_in_bytes => (memory_bean.getNonHeapMemoryUsage().getMax() < 0 ? 0 : memory_bean.getNonHeapMemoryUsage().getMax())
}
}
end
def hot_threads(options={})
HotThreadsReport.new(self, options)
end
class HotThreadsReport
HOT_THREADS_STACK_TRACES_SIZE_DEFAULT = 10.freeze
def initialize(cmd, options)
@cmd = cmd
filter = { :stacktrace_size => options.fetch(:stacktrace_size, HOT_THREADS_STACK_TRACES_SIZE_DEFAULT) }
jr_dump = JRMonitor.threads.generate(filter)
@thread_dump = ::LogStash::Util::ThreadDump.new(options.merge(:dump => jr_dump))
end
def to_s
hash = to_hash
report = "#{I18n.t("logstash.web_api.hot_threads.title", :hostname => hash[:hostname], :time => hash[:time], :top_count => @thread_dump.top_count )} \n"
report << '=' * 80
report << "\n"
hash[:threads].each do |thread|
thread_report = ""
thread_report = "#{I18n.t("logstash.web_api.
hot_threads.thread_title", :percent_of_cpu_time => thread[:percent_of_cpu_time], :thread_state => thread[:state], :thread_name => thread[:name])} \n"
thread_report = "#{thread[:percent_of_cpu_time]} % of of cpu usage by #{thread[:state]} thread named '#{thread[:name]}'\n"
thread_report << "#{thread[:path]}\n" if thread[:path]
thread[:traces].each do |trace|
thread_report << "\t#{trace}\n"
end
report << thread_report
report << '-' * 80
report << "\n"
end
report
end
def to_hash
hash = { :hostname => @cmd.hostname, :time => Time.now.iso8601, :busiest_threads => @thread_dump.top_count, :threads => [] }
@thread_dump.each do |thread_name, _hash|
thread_name, thread_path = _hash["thread.name"].split(": ")
thread = { :name => thread_name,
:percent_of_cpu_time => cpu_time_as_percent(_hash),
:state => _hash["thread.state"]
}
thread[:path] = thread_path if thread_path
traces = []
_hash["thread.stacktrace"].each do |trace|
traces << trace
end
thread[:traces] = traces unless traces.empty?
hash[:threads] << thread
end
hash
end
def cpu_time_as_percent(hash)
(((cpu_time(hash) / @cmd.uptime * 1.0)*10000).to_i)/100.0
end
def cpu_time(hash)
hash["cpu.time"] / 1000000.0
end
end
end
end
end
end

View file

@ -5,99 +5,50 @@ module LogStash
module Api
module Commands
class Stats < Commands::Base
def jvm
{:threads => service.get_shallow(:jvm, :threads)}
{
:threads => extract_metrics(
[:jvm, :threads],
:count,
:peak_count
)
}
end
def process
service.get_shallow(:jvm, :process)
extract_metrics(
[:jvm, :process],
:open_file_descriptors,
:peak_open_file_descriptors,
:max_file_descriptors,
[:mem, [:total_virtual_in_bytes]],
[:cpu, [:total_in_millis, :percent]]
)
end
def events
service.get_shallow(:stats, :events)
extract_metrics(
[:stats, :events],
:in, :filtered, :out
)
end
def memory
memory = LogStash::Json.load(service.get(:jvm_memory_stats))
memory = service.get_shallow(:jvm, :memory)
{
:heap_used_in_bytes => memory["heap"]["used_in_bytes"],
:heap_used_percent => memory["heap"]["used_percent"],
:heap_committed_in_bytes => memory["heap"]["committed_in_bytes"],
:heap_max_in_bytes => memory["heap"]["max_in_bytes"],
:heap_used_in_bytes => memory["heap"]["used_in_bytes"],
:non_heap_used_in_bytes => memory["non_heap"]["used_in_bytes"],
:non_heap_committed_in_bytes => memory["non_heap"]["committed_in_bytes"],
:pools => memory["pools"].inject({}) do |acc, (type, hash)|
:heap_used_in_bytes => memory[:heap][:used_in_bytes],
:heap_used_percent => memory[:heap][:used_percent],
:heap_committed_in_bytes => memory[:heap][:committed_in_bytes],
:heap_max_in_bytes => memory[:heap][:max_in_bytes],
:heap_used_in_bytes => memory[:heap][:used_in_bytes],
:non_heap_used_in_bytes => memory[:non_heap][:used_in_bytes],
:non_heap_committed_in_bytes => memory[:non_heap][:committed_in_bytes],
:pools => memory[:pools].inject({}) do |acc, (type, hash)|
hash.delete("committed_in_bytes")
acc[type] = hash
acc
end
}
end
def hot_threads(options={})
HotThreadsReport.new(self, options)
end
class HotThreadsReport
HOT_THREADS_STACK_TRACES_SIZE_DEFAULT = 10.freeze
def initialize(cmd, options)
@cmd = cmd
filter = { :stacktrace_size => options.fetch(:stacktrace_size, HOT_THREADS_STACK_TRACES_SIZE_DEFAULT) }
jr_dump = JRMonitor.threads.generate(filter)
@thread_dump = ::LogStash::Util::ThreadDump.new(options.merge(:dump => jr_dump))
end
def to_s
hash = to_hash
report = "#{I18n.t("logstash.web_api.hot_threads.title", :hostname => hash[:hostname], :time => hash[:time], :top_count => @thread_dump.top_count )} \n"
report << '=' * 80
report << "\n"
hash[:threads].each do |thread|
thread_report = ""
thread_report = "#{I18n.t("logstash.web_api.
hot_threads.thread_title", :percent_of_cpu_time => thread[:percent_of_cpu_time], :thread_state => thread[:state], :thread_name => thread[:name])} \n"
thread_report = "#{thread[:percent_of_cpu_time]} % of of cpu usage by #{thread[:state]} thread named '#{thread[:name]}'\n"
thread_report << "#{thread[:path]}\n" if thread[:path]
thread[:traces].each do |trace|
thread_report << "\t#{trace}\n"
end
report << thread_report
report << '-' * 80
report << "\n"
end
report
end
def to_hash
hash = { :hostname => @cmd.hostname, :time => Time.now.iso8601, :busiest_threads => @thread_dump.top_count, :threads => [] }
@thread_dump.each do |thread_name, _hash|
thread_name, thread_path = _hash["thread.name"].split(": ")
thread = { :name => thread_name,
:percent_of_cpu_time => cpu_time_as_percent(_hash),
:state => _hash["thread.state"]
}
thread[:path] = thread_path if thread_path
traces = []
_hash["thread.stacktrace"].each do |trace|
traces << trace
end
thread[:traces] = traces unless traces.empty?
hash[:threads] << thread
end
hash
end
def cpu_time_as_percent(hash)
(((cpu_time(hash) / @cmd.uptime * 1.0)*10000).to_i)/100.0
end
def cpu_time(hash)
hash["cpu.time"] / 1000000.0
end
end
end
end

View file

@ -5,7 +5,26 @@ module LogStash
module Api
module Modules
class Node < ::LogStash::Api::Modules::Base
# return hot threads information
def node
factory.build(:node)
end
get "/" do
respond_with node.all
end
get "/os" do
respond_with :os => node.os
end
get "/jvm" do
respond_with :jvm => node.jvm
end
get "/pipeline" do
respond_with :pipeline => node.pipeline
end
get "/hot_threads" do
ignore_idle_threads = params["ignore_idle_threads"] || true
@ -15,10 +34,9 @@ module LogStash
}
options[:threads] = params["threads"].to_i if params.has_key?("threads")
stats = factory.build(:stats)
as = options[:human] ? :string : :json
respond_with(stats.hot_threads(options), {:as => as})
end
as = options[:human] ? :string : :json
respond_with({:hot_threads => node.hot_threads(options)}, {:as => as})
end
end
end
end

View file

@ -3,6 +3,11 @@ module LogStash
module Api
module Modules
class NodeStats < ::LogStash::Api::Modules::Base
#set :environment, :test
#set :dump_errors, true
#set :raise_errors, true
#set :logging, Logger.new(STDERR)
before do
@stats = factory.build(:stats)
@ -14,7 +19,8 @@ module LogStash
payload = {
:events => events_payload,
:jvm => jvm_payload,
:process => process_payload
:process => process_payload,
:mem => mem_payload
}
respond_with payload
@ -40,6 +46,10 @@ module LogStash
respond_with :process => process_payload
end
get "/mem" do
respond_with :mem => mem_payload
end
private
def events_payload
@ -53,6 +63,10 @@ module LogStash
def process_payload
@stats.process
end
def mem_payload
@stats.memory
end
end
end
end

View file

@ -26,14 +26,8 @@ module LogStash
snapshot.metric_store.get_shallow(*path)
end
def get(key)
metric_store = @snapshot_rotation_mutex.synchronize { @snapshot.metric_store }
if key == :jvm_memory_stats
data = metric_store.get_shallow(:jvm, :memory)
else
data = metric_store.get_with_path("stats/events")
end
LogStash::Json.dump(data)
def extract_metrics(path, *keys)
snapshot.metric_store.extract_metrics(path, *keys)
end
end
end

View file

@ -110,6 +110,54 @@ module LogStash module Instrument
key_paths.reduce(get(*key_paths)) {|acc, p| acc[p]}
end
# Return a hash including the values of the keys given at the path given
#
# Example Usage:
# extract_metrics(
# [:jvm, :process],
# :open_file_descriptors,
# [:cpu, [:total_in_millis, :percent]]
# )
#
# Returns:
# # From the jvm.process metrics namespace
# {
# :open_file_descriptors => 123
# :cpu => { :total_in_millis => 456, :percent => 789 }
# }
def extract_metrics(path, *keys)
metrics = get_shallow(*path)
keys.reduce({}) do |acc,k|
# Get the value of this key, recurse as needed
# to get deeply nested paths
v = if k.is_a?(Array)
# We have a nested hash, time to recurse
res = extract_metrics(path + k[0..-2], *k.last)
# We're only going one level deep into the array in this frame
# so make the key that one. Otherwise we get the full path
# as an array as the key, which makes no sense
k = k.first
res
else # Scalar value
metrics[k]
end
if v.is_a?(Hash)
# This is a nested structure, simple assignment
acc[k] = v
else
# This is a Metric object, we need to extract its value
# If the metric didn't exist it might be nil, but we still want its key
# to exist with a nil value
acc[k] = v ? v.value : nil; acc
end
acc
end
end
# Return all the individuals Metric,
# This call mimic a Enum's each if a block is provided
#

View file

@ -188,6 +188,12 @@ module LogStash; class Pipeline
batch_size = @settings.get("pipeline.batch.size")
batch_delay = @settings.get("pipeline.batch.delay")
max_inflight = batch_size * pipeline_workers
config_metric = metric.namespace([:stats, :pipelines, pipeline_id.to_s.to_sym, :config])
config_metric.gauge(:workers, pipeline_workers)
config_metric.gauge(:batch_size, batch_size)
config_metric.gauge(:batch_delay, batch_delay)
@logger.info("Starting pipeline",
"id" => self.pipeline_id,
"pipeline.workers" => pipeline_workers,

View file

@ -34,7 +34,7 @@ describe LogStash::Api::Modules::Node do
end
it "should return information for <= # requested threads" do
expect(payload["threads"].count).to be <= 5
expect(payload["hot_threads"]["threads"].count).to be <= 5
end
end
@ -55,5 +55,44 @@ describe LogStash::Api::Modules::Node do
end
end
describe "Generic JSON testing" do
extend ResourceDSLMethods
root_structure = {
"pipeline" => {
"workers" => Numeric,
"batch_size" => Numeric,
"batch_delay" => Numeric
},
"os" => {
"name" => String,
"arch" => String,
"version" => String,
"available_processors" => Numeric
},
"jvm" => {
"pid" => Numeric,
"version" => String,
"vm_name" => String,
"vm_version" => String,
"vm_vendor" => String,
"start_time_in_millis" => Numeric,
"mem" => {
"heap_init_in_bytes" => Numeric,
"heap_max_in_bytes" => Numeric,
"non_heap_init_in_bytes" => Numeric,
"non_heap_max_in_bytes" => Numeric
}
},
"hot_threads"=> {
"hostname" => String,
"time" => String,
"busiest_threads" => Numeric,
"threads" => Array
}
}
test_api_and_resources(root_structure)
end
end
end

View file

@ -33,8 +33,36 @@ describe LogStash::Api::Modules::NodeStats do
"total_in_millis"=>Numeric,
"percent"=>Numeric
}
},
"mem" => {
"heap_used_in_bytes" => Numeric,
"heap_used_percent" => Numeric,
"heap_committed_in_bytes" => Numeric,
"heap_max_in_bytes" => Numeric,
"non_heap_used_in_bytes" => Numeric,
"non_heap_committed_in_bytes" => Numeric,
"pools" => {
"survivor" => {
"peak_used_in_bytes" => Numeric,
"used_in_bytes" => Numeric,
"peak_max_in_bytes" => Numeric,
"max_in_bytes" => Numeric
},
"old" => {
"peak_used_in_bytes" => Numeric,
"used_in_bytes" => Numeric,
"peak_max_in_bytes" => Numeric,
"max_in_bytes" => Numeric
},
"young" => {
"peak_used_in_bytes" => Numeric,
"used_in_bytes" => Numeric,
"peak_max_in_bytes" => Numeric,
"max_in_bytes" => Numeric
}
}
}
}
test_api_and_resources(root_structure)
end

View file

@ -24,12 +24,16 @@ module ResourceDSLMethods
expect(last_response).to be_ok
end
hash_to_mapping(expected).each do |path,klass|
dotted = path.join(".")
hash_to_mapping(expected).each do |resource_path,klass|
dotted = resource_path.join(".")
it "should set '#{dotted}' to be a '#{klass}'" do
path_value = path.reduce(payload) {|acc,v| acc[v]}
expect(path_value).to be_a(klass), "could not find '#{dotted}' in #{payload}"
it "should set '#{dotted}' at '#{path}' to be a '#{klass}'" do
expect(last_response).to be_ok # fail early if need be
resource_path_value = resource_path.reduce(payload) do |acc,v|
expect(acc.has_key?(v)).to eql(true), "Expected to find value '#{v}' in structure '#{acc}', but could not. Payload was '#{payload}'"
acc[v]
end
expect(resource_path_value).to be_a(klass), "could not find '#{dotted}' in #{payload}"
end
end
end