Improve req handling dashboard (#8322)

Display per method/path combos for various metrics, adjust titles, and sort tooltip by decreasing

Signed-off-by: Naseem Ullah <24660299+naseemkullah@users.noreply.github.com>
This commit is contained in:
Naseem Ullah 2022-04-12 17:40:46 -04:00 committed by GitHub
parent 47a266df45
commit ea5a3036bf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -9,17 +9,18 @@
"pluginName": "Prometheus"
}
],
"__elements": [],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "6.6.0"
"version": "8.3.4"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"name": "Graph (old)",
"version": ""
},
{
@ -38,25 +39,41 @@
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 9614,
"graphTooltip": 1,
"id": null,
"iteration": 1582146566338,
"iteration": 1646929474557,
"links": [],
"liveNow": false,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"description": "Total time taken for nginx and upstream servers to process a request and send a response",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"description": "Total time for NGINX and upstream servers to process a request and send a response",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -80,9 +97,10 @@
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -92,63 +110,52 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(\n 0.5,\n sum by (le)(\n rate(\n nginx_ingress_controller_request_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)",
"expr": "histogram_quantile(\n 0.5,\n sum by (le)(\n rate(\n nginx_ingress_controller_request_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[5m]\n )\n )\n)",
"interval": "",
"legendFormat": ".5",
"refId": "D"
},
{
"expr": "histogram_quantile(\n 0.95,\n sum by (le)(\n rate(\n nginx_ingress_controller_request_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)",
"expr": "histogram_quantile(\n 0.95,\n sum by (le)(\n rate(\n nginx_ingress_controller_request_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[5m]\n )\n )\n)",
"interval": "",
"legendFormat": ".95",
"refId": "B"
},
{
"expr": "histogram_quantile(\n 0.99,\n sum by (le)(\n rate(\n nginx_ingress_controller_request_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)",
"expr": "histogram_quantile(\n 0.99,\n sum by (le)(\n rate(\n nginx_ingress_controller_request_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[5m]\n )\n )\n)",
"interval": "",
"legendFormat": ".99",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Total request handling time",
"title": "Request Latency Percentiles",
"tooltip": {
"shared": true,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -156,8 +163,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"description": "The time spent on receiving the response from the upstream server",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -181,9 +196,10 @@
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -193,7 +209,7 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(\n 0.5,\n sum by (le)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)",
"expr": "histogram_quantile(\n 0.5,\n sum by (le)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[5m]\n )\n )\n)",
"instant": false,
"interval": "",
"intervalFactor": 1,
@ -201,57 +217,46 @@
"refId": "D"
},
{
"expr": "histogram_quantile(\n 0.95,\n sum by (le)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)",
"expr": "histogram_quantile(\n 0.95,\n sum by (le)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[5m]\n )\n )\n)",
"interval": "",
"legendFormat": ".95",
"refId": "B"
},
{
"expr": "histogram_quantile(\n 0.99,\n sum by (le)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)",
"expr": "histogram_quantile(\n 0.99,\n sum by (le)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[5m]\n )\n )\n)",
"interval": "",
"legendFormat": ".99",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Upstream response time",
"title": "Upstream Response Latency Percentiles",
"tooltip": {
"shared": true,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -259,7 +264,15 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -285,9 +298,10 @@
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -297,52 +311,41 @@
"steppedLine": false,
"targets": [
{
"expr": " sum by (path)(\n rate(\n nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n",
"expr": " sum by (method, host, path)(\n rate(\n nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\"\n }[5m]\n )\n )\n",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{ path }}",
"legendFormat": "{{ method }} {{ host }}{{path }}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Request volume by Path",
"title": "Request Rate by Method and Path",
"tooltip": {
"shared": true,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "reqps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -350,8 +353,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"description": "For each path observed, its median upstream response time",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -377,9 +388,10 @@
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -389,52 +401,41 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(\n .5,\n sum by (le, path)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)",
"expr": "histogram_quantile(\n .5,\n sum by (le, method, host, path)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[5m]\n )\n )\n)",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{ path }}",
"legendFormat": "{{ method }} {{ host }}{{path }}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Median upstream response time by Path",
"title": "Median Upstream Response Time by Method and Path",
"tooltip": {
"shared": true,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -442,8 +443,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"description": "Percentage of 4xx and 5xx responses among all responses.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -469,9 +478,10 @@
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"dataLinks": []
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -481,52 +491,41 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n status =~ \"[4-5].*\"\n}[1m])) / sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n}[1m]))",
"expr": "sum by (method, host, path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n status =~ \"[4-5].*\"\n}[5m])) / sum by (method, host, path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n}[5m]))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{ path }}",
"legendFormat": "{{ method }} {{ host }}{{path }}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Response error rate by Path",
"title": "Response Error Rate by Method and Path",
"tooltip": {
"shared": true,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -534,8 +533,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"description": "For each path observed, the sum of upstream request time",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -561,9 +568,10 @@
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -573,52 +581,41 @@
"steppedLine": false,
"targets": [
{
"expr": "sum by (path) (rate(nginx_ingress_controller_response_duration_seconds_sum{ingress =~ \"$ingress\"}[1m]))",
"expr": "sum by (method, host, path) (rate(nginx_ingress_controller_response_duration_seconds_sum{ingress =~ \"$ingress\"}[5m]))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{ path }}",
"legendFormat": "{{ method }} {{ host }}{{path }}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Upstream time consumed by Path",
"title": "Upstream Response Time by Method and Path",
"tooltip": {
"shared": true,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -626,7 +623,15 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -652,9 +657,10 @@
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -664,52 +670,41 @@
"steppedLine": false,
"targets": [
{
"expr": " sum (\n rate(\n nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n status =~\"[4-5].*\",\n }[1m]\n )\n ) by(path, status)\n",
"expr": " sum (\n rate(\n nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n status =~\"[4-5].*\",\n }[5m]\n )\n ) by(method, host, path, status)\n",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{ path }} {{ status }}",
"legendFormat": "{{ method }} {{ host }}{{path }} {{ status }}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Response error volume by Path",
"title": "Response Error Rate by Method and Path",
"tooltip": {
"shared": true,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "reqps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -717,7 +712,15 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -743,9 +746,10 @@
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -755,60 +759,49 @@
"steppedLine": false,
"targets": [
{
"expr": "sum (\n rate (\n nginx_ingress_controller_response_size_sum {\n ingress =~ \"$ingress\",\n }[1m]\n )\n) by (path) / sum (\n rate(\n nginx_ingress_controller_response_size_count {\n ingress =~ \"$ingress\",\n }[1m]\n )\n) by (path)\n",
"expr": "sum (\n rate (\n nginx_ingress_controller_response_size_sum {\n ingress =~ \"$ingress\",\n }[5m]\n )\n) by (method, host, path) / sum (\n rate(\n nginx_ingress_controller_response_size_count {\n ingress =~ \"$ingress\",\n }[5m]\n )\n) by (method, host, path)\n",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{ path }}",
"legendFormat": "{{ method }} {{ host }}{{path }}",
"refId": "D"
},
{
"expr": " sum (rate(nginx_ingress_controller_response_size_bucket{\n ingress =~ \"$ingress\",\n }[1m])) by (le)\n",
"expr": " sum (rate(nginx_ingress_controller_response_size_bucket{\n ingress =~ \"$ingress\",\n }[5m])) by (le)\n",
"hide": true,
"legendFormat": "{{le}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Average response size by Path",
"title": "Average Response Size by Method and Path",
"tooltip": {
"shared": true,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -816,7 +809,15 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -840,9 +841,10 @@
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -852,7 +854,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum (\n rate(\n nginx_ingress_controller_ingress_upstream_latency_seconds_sum {\n ingress =~ \"$ingress\",\n }[1m]\n)) / sum (\n rate(\n nginx_ingress_controller_ingress_upstream_latency_seconds_count {\n ingress =~ \"$ingress\",\n }[1m]\n )\n)\n",
"expr": "sum (\n rate(\n nginx_ingress_controller_ingress_upstream_latency_seconds_sum {\n ingress =~ \"$ingress\",\n }[5m]\n)) / sum (\n rate(\n nginx_ingress_controller_ingress_upstream_latency_seconds_count {\n ingress =~ \"$ingress\",\n }[5m]\n )\n)\n",
"hide": false,
"instant": false,
"interval": "",
@ -862,49 +864,38 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Upstream service latency",
"title": "Upstream Service Latency",
"tooltip": {
"shared": true,
"sort": 0,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
}
],
"refresh": "30s",
"schemaVersion": 22,
"schemaVersion": 34,
"style": "dark",
"tags": [
"nginx"
@ -912,19 +903,29 @@
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Prometheus",
"value": "Prometheus"
},
"hide": 0,
"includeAll": false,
"label": "datasource",
"multi": false,
"name": "DS_PROMETHEUS",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": ".*",
"current": {},
"datasource": "${DS_PROMETHEUS}",
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"definition": "label_values(nginx_ingress_controller_requests, ingress) ",
"hide": 0,
"includeAll": true,
@ -932,13 +933,15 @@
"multi": false,
"name": "ingress",
"options": [],
"query": "label_values(nginx_ingress_controller_requests, ingress) ",
"query": {
"query": "label_values(nginx_ingress_controller_requests, ingress) ",
"refId": "Prometheus-ingress-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 2,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
@ -977,5 +980,6 @@
"timezone": "browser",
"title": "Request Handling Performance",
"uid": "4GFbkOsZk",
"version": 1
"version": 1,
"weekStart": ""
}