Closes #205 - Refactor split brain logic

Closes #223
This commit is contained in:
Chris Cowan 2014-05-15 15:27:12 -07:00 committed by Chris Cowan
parent 5416f0324e
commit c5a1b61a1c
20 changed files with 425 additions and 40 deletions

View file

@ -0,0 +1,115 @@
define(function (require) {
'use strict';
var _ = require('lodash');
return function (rawdata) {
var results = { master: null, status: 'green', yellow: [], red: [], series: {} };
var previousTime = null;
var extractNodes = function (row) {
return row.id;
};
// Aggergate the series into a single object where the key
// is the timestamp and the value is an array of the events.
_.each(rawdata.facets, function (facet, node) {
_.each(facet.entries, function (entry) {
var event = _.clone(entry);
event.id = node;
if (!_.isArray(results.series[entry.time])) {
results.series[entry.time] = [];
}
results.series[entry.time].push(event);
});
});
var times = _.keys(results.series).sort(); // get all the times
var lastPeriod = _.last(times, 2); // retrieve the last 2
var lastSeries = results.series[lastPeriod[1]]; // get the last entry
// Check to see that there is 2 entries for the last 2 periods. (possible red state)
if (results.series[lastPeriod[0]].length > 1 && results.series[lastPeriod[1]].length > 1) {
// Double check to see that the periods are consecutive
var periodMatch = (lastPeriod[1]-lastPeriod[0] === 60000);
// Check to see that the nodes match
var firstPeriodNodes = _.map(results.series[lastPeriod[0]], extractNodes);
var secondPeriodNodes = _.map(results.series[lastPeriod[1]], extractNodes);
var involvedNodes = _.intersection(firstPeriodNodes, secondPeriodNodes);
var nodeMatch = (involvedNodes.length > 1);
// If everything matches then we have a red event.
if (periodMatch && nodeMatch) {
results.red.push({
from: lastPeriod[0],
to: lastPeriod[1],
nodes: involvedNodes
});
}
}
// If any of the series is within 60 secnods of each other then the
// status is set to yellow
_.each(times, function (currentTime) {
if (previousTime) {
if (results.series[previousTime].length > 1 && results.series[currentTime].length > 1) {
// Check to see that the periods are consecutive
var periodMatch = (currentTime - previousTime === 60000);
// Check to see that the nodes match
var firstPeriodNodes = _.map(results.series[previousTime], extractNodes);
var secondPeriodNodes = _.map(results.series[currentTime], extractNodes);
var involvedNodes = _.intersection(firstPeriodNodes, secondPeriodNodes);
var nodeMatch = (involvedNodes.length > 1);
// If everything matches then we have a yellow event
if (periodMatch && nodeMatch) {
// Dedup using the red events so we don't have red events in yellow
var redCheck = _.find(results.red, { from: previousTime, to: currentTime });
// Not in red then add to yellow.
if (!redCheck) {
results.yellow.push({
from: previousTime,
to: currentTime,
nodes: _.map(results.series[currentTime], extractNodes)
});
}
}
}
}
previousTime = currentTime;
});
// Set the status color
if (results.red.length !== 0) {
results.status = 'red';
} else if (results.yellow.length !== 0) {
results.status = 'yellow';
}
// If the last series is greater then on we need to figure
// out who is the master by using max to determine last one
// to report. If the status is red then we are going to mark
// all the nodes as master.
if (lastSeries.length > 1) {
// Mark everything as master
if (results.status === 'red') {
results.master = results.red[0].nodes;
// Mark the newest node as master
} else {
results.master = [_.max(lastSeries, function (row) {
return row.max;
}).id];
}
// There shall only be one!
} else {
results.master = [lastSeries[0].id];
}
return results;
};
});

View file

@ -0,0 +1,56 @@
define(function (require) {
'use strict';
var _ = require('lodash');
var config = require('config');
return function ($http, dashboard, persistent_field) {
persistent_field = _.isUndefined(persistent_field) ? 'node.ip_port' : persistent_field;
return function (nodes, to) {
var body = {
query: {
filtered: {
filter: {
range: {
'@timestamp': {
from: to+'-10m/m',
to: to+'/m'
}
}
}
}
},
facets: { }
};
_.each(nodes, function (node) {
body.facets[node] = {
date_histogram: {
key_field: '@timestamp',
value_field: '@timestamp',
interval: 'minute'
},
facet_filter: {
bool: {
must: [
{
term: {}
},
{
term: { "node.master": true }
}
]
}
}
};
// ah JavaScript, you're so adorable.
body.facets[node].facet_filter.bool.must[0].term[persistent_field] = node;
});
var url = config.elasticsearch+'/'+dashboard.indices.join(',')+'/node_stats/_search?search_type=count';
return $http.post(url, body).then(function (resp) {
return resp.data;
});
};
};
});

View file

@ -5,11 +5,13 @@ define([
'lodash',
'jquery',
'numeral',
'./lib/detectSplitBrain',
'./lib/splitBrainHistogram',
'jquery.flot',
'jquery.flot.time',
'services/marvel/index'
],
function (angular, app, kbn, _, $, numeral) {
function (angular, app, kbn, _, $, numeral, detectSplitBrain, splitBrainHistogram) {
'use strict';
var module = angular.module('kibana.panels.marvel.stats_table', []);
@ -62,7 +64,7 @@ define([
}
module.controller('marvel.stats_table', function ($scope, dashboard, filterSrv, esVersion, $clusterState, $filter,
alertSrv) {
alertSrv, $http) {
$scope.panelMeta = {
modals: [],
editorTabs: [],
@ -362,7 +364,8 @@ define([
var
request,
filter,
results;
results,
master_nodes;
filter = filterSrv.getBoolFilter(filterSrv.ids);
@ -386,11 +389,10 @@ define([
// master node detection
if ($scope.panel.mode === "nodes") {
request.facet($scope.ejs.TermStatsFacet("master_periods")
.keyField($scope.panel.persistent_field).valueField("@timestamp")
.order('term').facetFilter($scope.ejs.TermFilter("node.master", "true"))
request.facet($scope.ejs.TermsFacet("master_nodes")
.field("node.ip_port.raw")
.facetFilter($scope.ejs.TermFilter("node.master", "true"))
.size(2000));
}
_.each($scope.panel.metrics, function (m) {
@ -439,34 +441,10 @@ define([
};
});
if (r.facets['master_periods']) {
var most_recent_master = _.max(r.facets['master_periods'].terms, function (f) {
return f.max;
});
newData[most_recent_master.term].master = true;
// now check we have other active master within the same time frame
var other_masters = _.filter(r.facets['master_periods'].terms, function (t) {
if (t.term === most_recent_master.term) {
return false;
}
if (maxFilterTime - t.max > $scope.staleIntervalCount * newData[t.term].reporting_interval * 1000) {
// stale master info, we don't care.
return false;
}
// enough of overlap to not be a master swap
return (t.max - most_recent_master.min >
Math.min(300 * 1000, $scope.staleIntervalCount * newData[t.term].reporting_interval * 1000));
});
_.each(other_masters, function (t) {
newData[t.term].master = true;
});
if (other_masters.length > 0) {
// mark all master nodes as alerting
_.each(newData, function (n) {
if (n.master) {
n.alert_level = n.id_alert_level = 2;
}
});
if (r.facets['master_nodes']) {
master_nodes = r.facets['master_nodes'];
if (master_nodes.terms.length === 1) {
newData[master_nodes.terms[0].term].master = true;
}
}
@ -604,8 +582,66 @@ define([
}
(newData[persistent_name] || {}).display_name = display_name;
});
$scope._register_data_end();
$scope.select_display_data_and_enrich(newData);
if (master_nodes.terms.length !== 1) {
var nodes = _.map(master_nodes.terms, function (term) {
return term.term;
});
var req = splitBrainHistogram($http, dashboard, $scope.panel.persistent_field);
var success = function (data) {
// Get the splitBrainReport. This will return an object with
// the following attributes:
//
// status: The status of the report:
// red = immediate split
// yellow = was a split
// green = all clear
//
// red: a list of the red events. Each event will have
// the following attributes: to, from, nodes
//
// yellow: Same as the red events excpet these events
// are events that happened in the past
//
// series: an object of the raw events used to calculate the
// the different status.
//
// As you can see there is a lot more we can do with this report
// for now we just use it to highlight the correct nodes that will
// trigger an error message for split brain.
var splitBrainReport = detectSplitBrain(data);
// Highlight each master
_.each(splitBrainReport.master, function(node) {
newData[node].master = true;
});
// Set alert level for red nodes
_.each(splitBrainReport.red, function (event) {
_.each(event.nodes, function (node) {
newData[node].alert_level = newData[node].id_alert_level = 2;
});
});
$scope._register_data_end();
$scope.select_display_data_and_enrich(newData);
};
var error = function (resp) {
$scope.error = resp.data.error;
$scope._register_data_end();
$scope.select_display_data_and_enrich(newData);
};
req(nodes, to).then(success, error);
} else {
$scope._register_data_end();
$scope.select_display_data_and_enrich(newData);
}
});
}, $scope._register_data_end);
}, $scope._register_data_end);

View file

@ -7,7 +7,8 @@ module.exports = function (grunt) {
test: {
options: {
data: {
tests: JSON.stringify(tests)
tests: JSON.stringify(tests),
host: '<%= kibanaHost %>'
},
client: false
},

12
test/fixtures/createSplitBrainEvent.js vendored Normal file
View file

@ -0,0 +1,12 @@
define(function (require) {
'use strict';
return function (num, maxBy, minBy) {
var time = moment('2014-01-01T00:00:00Z').subtract('minute', num).toDate().getTime();
return {
time: time,
count: 6,
max: maxBy && time + maxBy || time,
min: minBy && time + minBy || time
};
};
});

View file

@ -2,6 +2,7 @@ define(function () {
'use strict';
return function () {
return {
_id: new Date().getTime(),
status: 'green',
nodes: {
'node-1': {

32
test/fixtures/masterSwap.js vendored Normal file
View file

@ -0,0 +1,32 @@
define(function (require) {
'use strict';
var moment = require('moment');
var createEvent = require('./createSplitBrainEvent.js');
return {
facets: {
"127.0.0.1:9300": {
_type: 'date_histogram',
entries: [
createEvent(10),
createEvent(9),
createEvent(8),
createEvent(7),
createEvent(6),
createEvent(5),
createEvent(4),
createEvent(3),
createEvent(2),
createEvent(1)
]
},
'127.0.0.1:9301': {
_type: 'date_histogram',
entries: [
createEvent(1, 20)
]
}
}
};
});

40
test/fixtures/redSplit.js vendored Normal file
View file

@ -0,0 +1,40 @@
define(function (require) {
'use strict';
var moment = require('moment');
var createEvent = require('./createSplitBrainEvent.js');
return {
facets: {
"127.0.0.1:9300": {
_type: 'date_histogram',
entries: [
createEvent(10),
createEvent(9),
createEvent(8),
createEvent(7),
createEvent(6),
createEvent(5),
createEvent(4),
createEvent(3),
createEvent(2),
createEvent(1)
]
},
'127.0.0.1:9301': {
_type: 'date_histogram',
entries: [
createEvent(2),
createEvent(1)
]
},
'127.0.0.1:9302': {
_type: 'date_histogram',
entries: [
createEvent(1)
]
}
}
};
});

View file

@ -2,6 +2,7 @@ define(function () {
'use strict';
return function () {
return {
_id: new Date().getTime(),
status: 'red',
nodes: {
'node-1': {

View file

@ -2,6 +2,7 @@ define(function () {
'use strict';
return function () {
return {
_id: new Date().getTime(),
status: 'red',
nodes: {
'node-1': {

View file

@ -2,6 +2,7 @@ define(function () {
'use strict';
return function () {
return {
_id: new Date().getTime(),
status: 'red',
nodes: {
'node-1': {

View file

@ -2,6 +2,7 @@ define(function () {
'use strict';
return function () {
return {
_id: new Date().getTime(),
status: 'red',
nodes: {
'node-1': {

View file

@ -2,6 +2,7 @@ define(function () {
'use strict';
return function () {
return {
_id: new Date().getTime(),
status: 'green',
nodes: {
'node-1': {

31
test/fixtures/yellowSplit.js vendored Normal file
View file

@ -0,0 +1,31 @@
define(function (require) {
'use strict';
var moment = require('moment');
var createEvent = require('./createSplitBrainEvent.js');
return {
facets: {
"127.0.0.1:9300": {
_type: 'date_histogram',
entries: [
createEvent(10),
createEvent(8),
createEvent(5),
createEvent(4),
createEvent(3)
]
},
'127.0.0.1:9301': {
_type: 'date_histogram',
entries: [
createEvent(4),
createEvent(3),
createEvent(2),
createEvent(1, 20)
]
}
}
};
});

View file

@ -2,6 +2,7 @@ define(function () {
'use strict';
return function () {
return {
_id: new Date().getTime(),
status: 'yellow',
nodes: {
'node-1': {

View file

@ -2,6 +2,7 @@ define(function () {
'use strict';
return function () {
return {
_id: new Date().getTime(),
status: 'yellow',
nodes: {
'node-1': {

View file

@ -2,6 +2,7 @@ define(function () {
'use strict';
return function () {
return {
_id: new Date().getTime(),
status: 'yellow',
nodes: {
'node-1': {

View file

@ -4,7 +4,7 @@
</script><script type="text/javascript">require.config({
baseUrl: '../kibana/app'
});
require(["/test/unit/lib/ClusterState/explainStatus.js","/test/unit/lib/ClusterState/filterShards.js","/test/unit/lib/ClusterState/getIndices.js","/test/unit/lib/ClusterState/getState.js","/test/unit/lib/ClusterState/groupIndicesByState.js","/test/unit/lib/ClusterState/incrementIndexShardStatusCount.js","/test/unit/lib/ClusterState/popFirstIndexAndReturnEndpoint.js","/test/unit/lib/ClusterState/refreshState.js","/test/unit/shard_allocation/calculateClass.js","/test/unit/shard_allocation/countChildren.js","/test/unit/shard_allocation/extractIp.js","/test/unit/shard_allocation/extractMarkers.js","/test/unit/shard_allocation/extractShards.js","/test/unit/shard_allocation/filterByName.js","/test/unit/shard_allocation/filterHiddenIndices.js","/test/unit/shard_allocation/generateQueryAndLink.js","/test/unit/shard_allocation/getStateSource.js","/test/unit/shard_allocation/getTimelineData.js","/test/unit/shard_allocation/hasUnassigned.js","/test/unit/shard_allocation/hasUnassignedPrimaries.js","/test/unit/shard_allocation/updateColors.js","/test/unit/shard_allocation/vents.js"], function () {
require(["/test/unit/lib/ClusterState/explainStatus.js","/test/unit/lib/ClusterState/filterShards.js","/test/unit/lib/ClusterState/getIndices.js","/test/unit/lib/ClusterState/getState.js","/test/unit/lib/ClusterState/groupIndicesByState.js","/test/unit/lib/ClusterState/incrementIndexShardStatusCount.js","/test/unit/lib/ClusterState/popFirstIndexAndReturnEndpoint.js","/test/unit/lib/ClusterState/refreshState.js","/test/unit/shard_allocation/calculateClass.js","/test/unit/shard_allocation/countChildren.js","/test/unit/shard_allocation/extractIp.js","/test/unit/shard_allocation/extractMarkers.js","/test/unit/shard_allocation/extractShards.js","/test/unit/shard_allocation/filterByName.js","/test/unit/shard_allocation/filterHiddenIndices.js","/test/unit/shard_allocation/generateQueryAndLink.js","/test/unit/shard_allocation/getStateSource.js","/test/unit/shard_allocation/hasUnassigned.js","/test/unit/shard_allocation/hasUnassignedPrimaries.js","/test/unit/shard_allocation/updateColors.js","/test/unit/shard_allocation/vents.js","/test/unit/stats_table/lib/detectSplitBrain.js"], function () {
setTimeout(function () {
window.mochaRunner = mocha.run();
if (window.mochaRunner) {

View file

@ -36,4 +36,4 @@ html
}
}, 100);
});
script(src="//localhost:35729/livereload.js")
script(src="//#{host}:35729/livereload.js")

View file

@ -0,0 +1,53 @@
define(function (require) {
'use strict';
var detectSplitBrain = require('panels/marvel/stats_table/lib/detectSplitBrain');
var masterSwap = require('/test/fixtures/masterSwap.js');
var redSplit = require('/test/fixtures/redSplit.js');
var yellowSplit = require('/test/fixtures/yellowSplit.js');
describe('stats_table', function () {
describe('lib/detectSplitBrain.js', function() {
it('should detect a master swap and set status to green', function () {
var results = detectSplitBrain(masterSwap);
expect(results).to.have.property('status', 'green');
expect(results).to.have.property('master')
.to.be.instanceOf(Array)
.to.have.length(1);
expect(results.master[0]).to.equal('127.0.0.1:9301');
});
it('should detect split brain and set status to red', function () {
var results = detectSplitBrain(redSplit);
expect(results).to.have.property('status', 'red');
expect(results).to.have.property('master')
.to.be.instanceOf(Array)
.to.have.length(2);
expect(results).to.have.property('red')
.to.be.instanceOf(Array)
.to.have.length(1);
expect(results).to.have.property('yellow')
.to.be.instanceOf(Array)
.to.have.length(0);
expect(results.master[0]).to.equal('127.0.0.1:9300');
expect(results.master[1]).to.equal('127.0.0.1:9301');
});
it('should detect split in the past and set status to yellow', function () {
var results = detectSplitBrain(yellowSplit);
expect(results).to.have.property('status', 'yellow');
expect(results).to.have.property('master')
.to.be.instanceOf(Array)
.to.have.length(1);
expect(results).to.have.property('yellow')
.to.be.instanceOf(Array)
.to.have.length(1);
expect(results).to.have.property('red')
.to.be.instanceOf(Array)
.to.have.length(0);
expect(results.master[0]).to.equal('127.0.0.1:9301');
});
});
});
});