[8.9] [ftr] Improve FTR error handling for NoSuchSessionError (#161025) (#161207)

# Backport

This will backport the following commits from `main` to `8.9`:
- [[ftr] Improve FTR error handling for NoSuchSessionError
(#161025)](https://github.com/elastic/kibana/pull/161025)

<!--- Backport version: 8.9.7 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Dzmitry
Lemechko","email":"dzmitry.lemechko@elastic.co"},"sourceCommit":{"committedDate":"2023-07-04T16:26:57Z","message":"[ftr]
Improve FTR error handling for NoSuchSessionError (#161025)\n\n##
Summary\r\n\r\nSometimes browser/driver process dies during test run on
CI and FTR\r\nfails with errors cascade, good example
is\r\n[here](https://buildkite.com/elastic/kibana-pull-request/builds/138535#0188fd74-9adf-4011-8168-1bdc6e3d0f17)\r\n\r\n\r\n\r\nCurrent
behaviour on `main`: FTR lifecycle hooks, defined
in\r\n[remote](57aea91fae/test/functional/services/remote/remote.ts)\r\nservice,
has no access to the information about test suite run and\r\nparticular
test failure. These hooks are related to WebDriver (browser)\r\nstate
management and suppose to reset it to default state.\r\nCurrently we
silently fail screenshot taking which means tests execution\r\nis
continued even if `--bail` flag is passed. It ends with cascade
of\r\nfailures with the same error `NoSuchSessionError: invalid session
id`\r\n\r\n<details>\r\n\r\n<summary>FTR output on
failure</summary>\r\n\r\n```\r\n └- ✖ fail: discover/group1 discover
test query should show correct time range string by timepicker\r\n │
Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @
18:31:44.000'\r\n │ at Assertion.assert (expect.js💯11)\r\n │ at
Assertion.apply (expect.js:227:8)\r\n │ at Assertion.be
(expect.js:69:22)\r\n │ at Context.<anonymous> (_discover.ts:53:31)\r\n
│ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n └- ✖ fail:
discover/group1 discover test query \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook in \"discover test\"\r\n │ debg Cleaning all saved
objects { space: undefined }\r\n │ succ deleted 2 objects\r\n └->
\"after all\" hook: afterTestSuite.trigger in \"discover test\"\r\n └- ✖
fail: discover/group1 discover test \"after all\" hook:
afterTestSuite.trigger in \"discover test\"\r\n │ NoSuchSessionError:
invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook: unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n └- ✖ fail:
discover/group1 \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\"\r\n │ NoSuchSessionError: invalid session id\r\n │
at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │\r\n │0 passing
(15.7s)\r\n │4 failing\r\n │\r\n │1) discover/group1\r\n │ discover
test\r\n │ query\r\n │ should show correct time range string by
timepicker:\r\n │\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to
equal 'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js💯11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n │2) discover/group1\r\n │
discover test\r\n │ query\r\n │ \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │3)
discover/group1\r\n │ discover test\r\n │ \"after all\" hook:
afterTestSuite.trigger in \"discover test\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │4)
discover/group1\r\n │ \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\":\r\n │\r\n │ NoSuchSessionError: invalid session
id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n```\r\n\r\n</details>\r\n\r\nThis
PR change: I didn't find a good reason why we need to fail
silently\r\non screenshot taking. I added a check WebDriver session
status with\r\n`hasOpenWindow` and take failure artefacts only if is
still valid.\r\nNext change is to fail FTR after hooks related to
WebDriver silently:\r\nthere is no help having cascade of the repeated
stacktrace so I wrap\r\nWebDriver call in hooks with `tryWebDriverCall`
that catches the error\r\nand only prints it for
visibility.\r\n\r\n<details>\r\n\r\n<summary>FTR new output on
failure</summary>\r\n\r\n```\r\n │ERROR WebDriver session is no longer
valid.\r\n │ Probably Chrome process crashed when it tried to use more
memory than what was available.\r\n │ERROR Browser is closed, no
artifacts were captured for the failure\r\n └- ✖ fail: discover/group1
discover test query should show correct time range string by
timepicker\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal
'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js💯11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │ERROR WebDriver
session is no longer valid\r\n └-> \"after all\" hook in \"discover
test\"\r\n │ debg Cleaning all saved objects { space: undefined }\r\n │
warn browser[SEVERE] ERROR FETCHING BROWSR LOGS: This driver instance
does not have a valid session ID (did you call WebDriver.quit()?) and
may no longer be used.\r\n │ succ deleted 2 objects\r\n └-> \"after
all\" hook: afterTestSuite.trigger in \"discover test\"\r\n │ERROR
WebDriver session is no longer valid\r\n └-> \"after all\" hook:
unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n │ERROR WebDriver
session is no longer valid\r\n\r\n0 passing (16.2s)\r\n1
failing\r\n\r\n1) discover/group1\r\n discover test\r\n query\r\n should
show correct time range string by timepicker:\r\n\r\n Error: expected
'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @ 18:31:44.000'\r\n
at Assertion.assert (expect.js💯11)\r\n at Assertion.apply
(expect.js:227:8)\r\n at Assertion.be (expect.js:69:22)\r\n at
Context.<anonymous> (_discover.ts:53:31)\r\n at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n
at Object.apply
(wrap_function.js:73:16)\r\n```\r\n\r\n</details>\r\n\r\n\r\nFlaky-test-runner
verification: started 100x to hopefully catch invalid\r\nsession on
CI\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/2539\r\nNote:
locally I was simulating it by calling `this.driver.close()` to\r\nclose
browser before screenshot
taking","sha":"8a95bf7fabe34c601725313c922f95ee0fa641e5","branchLabelMapping":{"^v8.10.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v8.9.0","v8.10.0","v8.8.3"],"number":161025,"url":"https://github.com/elastic/kibana/pull/161025","mergeCommit":{"message":"[ftr]
Improve FTR error handling for NoSuchSessionError (#161025)\n\n##
Summary\r\n\r\nSometimes browser/driver process dies during test run on
CI and FTR\r\nfails with errors cascade, good example
is\r\n[here](https://buildkite.com/elastic/kibana-pull-request/builds/138535#0188fd74-9adf-4011-8168-1bdc6e3d0f17)\r\n\r\n\r\n\r\nCurrent
behaviour on `main`: FTR lifecycle hooks, defined
in\r\n[remote](57aea91fae/test/functional/services/remote/remote.ts)\r\nservice,
has no access to the information about test suite run and\r\nparticular
test failure. These hooks are related to WebDriver (browser)\r\nstate
management and suppose to reset it to default state.\r\nCurrently we
silently fail screenshot taking which means tests execution\r\nis
continued even if `--bail` flag is passed. It ends with cascade
of\r\nfailures with the same error `NoSuchSessionError: invalid session
id`\r\n\r\n<details>\r\n\r\n<summary>FTR output on
failure</summary>\r\n\r\n```\r\n └- ✖ fail: discover/group1 discover
test query should show correct time range string by timepicker\r\n │
Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @
18:31:44.000'\r\n │ at Assertion.assert (expect.js💯11)\r\n │ at
Assertion.apply (expect.js:227:8)\r\n │ at Assertion.be
(expect.js:69:22)\r\n │ at Context.<anonymous> (_discover.ts:53:31)\r\n
│ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n └- ✖ fail:
discover/group1 discover test query \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook in \"discover test\"\r\n │ debg Cleaning all saved
objects { space: undefined }\r\n │ succ deleted 2 objects\r\n └->
\"after all\" hook: afterTestSuite.trigger in \"discover test\"\r\n └- ✖
fail: discover/group1 discover test \"after all\" hook:
afterTestSuite.trigger in \"discover test\"\r\n │ NoSuchSessionError:
invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook: unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n └- ✖ fail:
discover/group1 \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\"\r\n │ NoSuchSessionError: invalid session id\r\n │
at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │\r\n │0 passing
(15.7s)\r\n │4 failing\r\n │\r\n │1) discover/group1\r\n │ discover
test\r\n │ query\r\n │ should show correct time range string by
timepicker:\r\n │\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to
equal 'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js💯11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n │2) discover/group1\r\n │
discover test\r\n │ query\r\n │ \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │3)
discover/group1\r\n │ discover test\r\n │ \"after all\" hook:
afterTestSuite.trigger in \"discover test\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │4)
discover/group1\r\n │ \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\":\r\n │\r\n │ NoSuchSessionError: invalid session
id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n```\r\n\r\n</details>\r\n\r\nThis
PR change: I didn't find a good reason why we need to fail
silently\r\non screenshot taking. I added a check WebDriver session
status with\r\n`hasOpenWindow` and take failure artefacts only if is
still valid.\r\nNext change is to fail FTR after hooks related to
WebDriver silently:\r\nthere is no help having cascade of the repeated
stacktrace so I wrap\r\nWebDriver call in hooks with `tryWebDriverCall`
that catches the error\r\nand only prints it for
visibility.\r\n\r\n<details>\r\n\r\n<summary>FTR new output on
failure</summary>\r\n\r\n```\r\n │ERROR WebDriver session is no longer
valid.\r\n │ Probably Chrome process crashed when it tried to use more
memory than what was available.\r\n │ERROR Browser is closed, no
artifacts were captured for the failure\r\n └- ✖ fail: discover/group1
discover test query should show correct time range string by
timepicker\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal
'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js💯11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │ERROR WebDriver
session is no longer valid\r\n └-> \"after all\" hook in \"discover
test\"\r\n │ debg Cleaning all saved objects { space: undefined }\r\n │
warn browser[SEVERE] ERROR FETCHING BROWSR LOGS: This driver instance
does not have a valid session ID (did you call WebDriver.quit()?) and
may no longer be used.\r\n │ succ deleted 2 objects\r\n └-> \"after
all\" hook: afterTestSuite.trigger in \"discover test\"\r\n │ERROR
WebDriver session is no longer valid\r\n └-> \"after all\" hook:
unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n │ERROR WebDriver
session is no longer valid\r\n\r\n0 passing (16.2s)\r\n1
failing\r\n\r\n1) discover/group1\r\n discover test\r\n query\r\n should
show correct time range string by timepicker:\r\n\r\n Error: expected
'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @ 18:31:44.000'\r\n
at Assertion.assert (expect.js💯11)\r\n at Assertion.apply
(expect.js:227:8)\r\n at Assertion.be (expect.js:69:22)\r\n at
Context.<anonymous> (_discover.ts:53:31)\r\n at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n
at Object.apply
(wrap_function.js:73:16)\r\n```\r\n\r\n</details>\r\n\r\n\r\nFlaky-test-runner
verification: started 100x to hopefully catch invalid\r\nsession on
CI\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/2539\r\nNote:
locally I was simulating it by calling `this.driver.close()` to\r\nclose
browser before screenshot
taking","sha":"8a95bf7fabe34c601725313c922f95ee0fa641e5"}},"sourceBranch":"main","suggestedTargetBranches":["8.9","8.8"],"targetPullRequestStates":[{"branch":"8.9","label":"v8.9.0","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v8.10.0","labelRegex":"^v8.10.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/161025","number":161025,"mergeCommit":{"message":"[ftr]
Improve FTR error handling for NoSuchSessionError (#161025)\n\n##
Summary\r\n\r\nSometimes browser/driver process dies during test run on
CI and FTR\r\nfails with errors cascade, good example
is\r\n[here](https://buildkite.com/elastic/kibana-pull-request/builds/138535#0188fd74-9adf-4011-8168-1bdc6e3d0f17)\r\n\r\n\r\n\r\nCurrent
behaviour on `main`: FTR lifecycle hooks, defined
in\r\n[remote](57aea91fae/test/functional/services/remote/remote.ts)\r\nservice,
has no access to the information about test suite run and\r\nparticular
test failure. These hooks are related to WebDriver (browser)\r\nstate
management and suppose to reset it to default state.\r\nCurrently we
silently fail screenshot taking which means tests execution\r\nis
continued even if `--bail` flag is passed. It ends with cascade
of\r\nfailures with the same error `NoSuchSessionError: invalid session
id`\r\n\r\n<details>\r\n\r\n<summary>FTR output on
failure</summary>\r\n\r\n```\r\n └- ✖ fail: discover/group1 discover
test query should show correct time range string by timepicker\r\n │
Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @
18:31:44.000'\r\n │ at Assertion.assert (expect.js💯11)\r\n │ at
Assertion.apply (expect.js:227:8)\r\n │ at Assertion.be
(expect.js:69:22)\r\n │ at Context.<anonymous> (_discover.ts:53:31)\r\n
│ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n └- ✖ fail:
discover/group1 discover test query \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook in \"discover test\"\r\n │ debg Cleaning all saved
objects { space: undefined }\r\n │ succ deleted 2 objects\r\n └->
\"after all\" hook: afterTestSuite.trigger in \"discover test\"\r\n └- ✖
fail: discover/group1 discover test \"after all\" hook:
afterTestSuite.trigger in \"discover test\"\r\n │ NoSuchSessionError:
invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook: unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n └- ✖ fail:
discover/group1 \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\"\r\n │ NoSuchSessionError: invalid session id\r\n │
at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │\r\n │0 passing
(15.7s)\r\n │4 failing\r\n │\r\n │1) discover/group1\r\n │ discover
test\r\n │ query\r\n │ should show correct time range string by
timepicker:\r\n │\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to
equal 'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js💯11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n │2) discover/group1\r\n │
discover test\r\n │ query\r\n │ \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │3)
discover/group1\r\n │ discover test\r\n │ \"after all\" hook:
afterTestSuite.trigger in \"discover test\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │4)
discover/group1\r\n │ \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\":\r\n │\r\n │ NoSuchSessionError: invalid session
id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n```\r\n\r\n</details>\r\n\r\nThis
PR change: I didn't find a good reason why we need to fail
silently\r\non screenshot taking. I added a check WebDriver session
status with\r\n`hasOpenWindow` and take failure artefacts only if is
still valid.\r\nNext change is to fail FTR after hooks related to
WebDriver silently:\r\nthere is no help having cascade of the repeated
stacktrace so I wrap\r\nWebDriver call in hooks with `tryWebDriverCall`
that catches the error\r\nand only prints it for
visibility.\r\n\r\n<details>\r\n\r\n<summary>FTR new output on
failure</summary>\r\n\r\n```\r\n │ERROR WebDriver session is no longer
valid.\r\n │ Probably Chrome process crashed when it tried to use more
memory than what was available.\r\n │ERROR Browser is closed, no
artifacts were captured for the failure\r\n └- ✖ fail: discover/group1
discover test query should show correct time range string by
timepicker\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal
'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js💯11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │ERROR WebDriver
session is no longer valid\r\n └-> \"after all\" hook in \"discover
test\"\r\n │ debg Cleaning all saved objects { space: undefined }\r\n │
warn browser[SEVERE] ERROR FETCHING BROWSR LOGS: This driver instance
does not have a valid session ID (did you call WebDriver.quit()?) and
may no longer be used.\r\n │ succ deleted 2 objects\r\n └-> \"after
all\" hook: afterTestSuite.trigger in \"discover test\"\r\n │ERROR
WebDriver session is no longer valid\r\n └-> \"after all\" hook:
unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n │ERROR WebDriver
session is no longer valid\r\n\r\n0 passing (16.2s)\r\n1
failing\r\n\r\n1) discover/group1\r\n discover test\r\n query\r\n should
show correct time range string by timepicker:\r\n\r\n Error: expected
'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @ 18:31:44.000'\r\n
at Assertion.assert (expect.js💯11)\r\n at Assertion.apply
(expect.js:227:8)\r\n at Assertion.be (expect.js:69:22)\r\n at
Context.<anonymous> (_discover.ts:53:31)\r\n at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n
at Object.apply
(wrap_function.js:73:16)\r\n```\r\n\r\n</details>\r\n\r\n\r\nFlaky-test-runner
verification: started 100x to hopefully catch invalid\r\nsession on
CI\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/2539\r\nNote:
locally I was simulating it by calling `this.driver.close()` to\r\nclose
browser before screenshot
taking","sha":"8a95bf7fabe34c601725313c922f95ee0fa641e5"}},{"branch":"8.8","label":"v8.8.3","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

Co-authored-by: Dzmitry Lemechko <dzmitry.lemechko@elastic.co>
This commit is contained in:
Kibana Machine 2023-07-04 13:24:20 -04:00 committed by GitHub
parent 039ff3be1c
commit 474c1813f3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 63 additions and 24 deletions

View file

@ -12,6 +12,7 @@ import { Key, Origin, WebDriver } from 'selenium-webdriver';
import { modifyUrl } from '@kbn/std';
import sharp from 'sharp';
import { NoSuchSessionError } from 'selenium-webdriver/lib/error';
import { WebElementWrapper } from '../lib/web_element_wrapper';
import { FtrProviderContext, FtrService } from '../../ftr_provider_context';
import { Browsers } from '../remote/browsers';
@ -632,8 +633,33 @@ class BrowserService extends FtrService {
return Boolean(result?.state === 'granted');
}
public getClipboardValue(): Promise<string> {
return this.driver.executeAsyncScript('navigator.clipboard.readText().then(arguments[0])');
public async getClipboardValue(): Promise<string> {
return await this.driver.executeAsyncScript(
'navigator.clipboard.readText().then(arguments[0])'
);
}
/**
* Checks if browser session is active and any browser window exists
* @returns {Promise<boolean>}
*/
public async hasOpenWindow(): Promise<boolean> {
if (this.driver == null) {
return false;
} else {
try {
const windowHandles = await this.driver.getAllWindowHandles();
return windowHandles.length > 0;
} catch (err) {
if (err instanceof NoSuchSessionError) {
// https://developer.mozilla.org/en-US/docs/Web/WebDriver/Errors/InvalidSessionID
this.log.error(
`WebDriver session is no longer valid.\nProbably Chrome process crashed when it tried to use more memory than what was available.`
);
}
return false;
}
}
}
}

View file

@ -50,7 +50,12 @@ export async function FailureDebuggingProvider({ getService }: FtrProviderContex
async function onFailure(_: any, test: Test) {
const name = FtrScreenshotFilename.create(test.fullTitle(), { ext: false });
await Promise.all([screenshots.takeForFailure(name), logCurrentUrl(), savePageHtml(name)]);
const hasOpenWindow = await browser.hasOpenWindow();
if (hasOpenWindow) {
await Promise.all([screenshots.takeForFailure(name), logCurrentUrl(), savePageHtml(name)]);
} else {
log.error('Browser is closed, no artifacts were captured for the failure');
}
}
lifecycle.testFailure.add(onFailure);

View file

@ -9,7 +9,6 @@
import { resolve, dirname } from 'path';
import { writeFile, readFileSync, mkdir } from 'fs';
import { promisify } from 'util';
import { NoSuchSessionError } from 'selenium-webdriver/lib/error';
import del from 'del';
@ -85,20 +84,9 @@ export class ScreenshotsService extends FtrService {
}
private async capture(path: string, el?: WebElementWrapper) {
try {
this.log.info(`Taking screenshot "${path}"`);
const screenshot = await (el ? el.takeScreenshot() : this.browser.takeScreenshot());
await mkdirAsync(dirname(path), { recursive: true });
await writeFileAsync(path, screenshot, 'base64');
} catch (err) {
this.log.error('SCREENSHOT FAILED');
this.log.error(err);
if (err instanceof NoSuchSessionError) {
// https://developer.mozilla.org/en-US/docs/Web/WebDriver/Errors/InvalidSessionID
this.log.error(
`WebDriver session is no longer valid.\nProbably Chrome process crashed when it tried to use more memory than what was available.`
);
}
}
this.log.info(`Taking ${el ? 'element' : 'window'} screenshot "${path}"`);
const screenshot = await (el ? el.takeScreenshot() : this.browser.takeScreenshot());
await mkdirAsync(dirname(path), { recursive: true });
await writeFileAsync(path, screenshot, 'base64');
}
}

View file

@ -6,6 +6,7 @@
* Side Public License, v 1.
*/
import { NoSuchSessionError } from 'selenium-webdriver/lib/error';
import { FtrProviderContext } from '../../ftr_provider_context';
import { initWebDriver, BrowserConfig } from './webdriver';
import { Browsers } from './browsers';
@ -27,6 +28,21 @@ export async function RemoteProvider({ getService }: FtrProviderContext) {
}
};
const tryWebDriverCall = async (command: () => Promise<void>) => {
// Since WebDriver session may be deleted, we fail silently. Use only in after hooks.
try {
await command();
} catch (error) {
if (error instanceof NoSuchSessionError) {
// Avoid duplicating NoSuchSessionError error output on each hook
// https://developer.mozilla.org/en-US/docs/Web/WebDriver/Errors/InvalidSessionID
log.error('WebDriver session is no longer valid');
} else {
throw error;
}
}
};
const browserConfig: BrowserConfig = {
logPollingMs: config.get('browser.logPollingMs'),
acceptInsecureCerts: config.get('browser.acceptInsecureCerts'),
@ -72,14 +88,18 @@ export async function RemoteProvider({ getService }: FtrProviderContext) {
});
lifecycle.afterTestSuite.add(async () => {
const { width, height } = windowSizeStack.shift()!;
await driver.manage().window().setRect({ width, height });
await clearBrowserStorage('sessionStorage');
await clearBrowserStorage('localStorage');
await tryWebDriverCall(async () => {
const { width, height } = windowSizeStack.shift()!;
await driver.manage().window().setRect({ width, height });
await clearBrowserStorage('sessionStorage');
await clearBrowserStorage('localStorage');
});
});
lifecycle.cleanup.add(async () => {
await driver.quit();
await tryWebDriverCall(async () => {
await driver.quit();
});
});
return { driver, browserType, consoleLog$ };