Monitoring Dashboard (#630)

This commit is contained in:
Jeremy Edwards
2019-07-09 18:08:04 -07:00
committed by GitHub
parent ce038bc6dd
commit a84eda4dab
7 changed files with 1052 additions and 25 deletions

View File

@ -16,3 +16,7 @@ Steps
1. Select "Export"
1. Select "Save to File"
1. Download the file into this directory.
Some templates came from the Grafana Labs site.
go-processes.json - https://grafana.com/dashboards/6671

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,7 @@
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"iteration": 1557247578070,
"iteration": 1562708434300,
"links": [],
"panels": [
{
@ -28,7 +28,7 @@
},
"id": 10,
"panels": [],
"title": "Client",
"title": "Server",
"type": "row"
},
{
@ -58,6 +58,7 @@
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
@ -68,11 +69,11 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(grpc_io_client_received_messages_per_rpc_count[$timewindow])",
"expr": "sum by (grpc_server_method)(rate(grpc_io_server_completed_rpcs[$timewindow]))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{component}} {{grpc_client_method}}",
"legendFormat": "{{grpc_server_method}}",
"refId": "A"
}
],
@ -122,6 +123,7 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"description": "",
"fill": 1,
"gridPos": {
"h": 8,
@ -143,6 +145,7 @@
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
@ -153,11 +156,11 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.9, rate(grpc_io_client_roundtrip_latency_bucket[$timewindow]))",
"expr": "histogram_quantile(0.95, sum(rate(grpc_io_server_server_latency_bucket[$timewindow])) by (grpc_server_method, le))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{component}} {{grpc_client_method}}",
"legendFormat": "{{grpc_server_method}}",
"refId": "A"
}
],
@ -165,7 +168,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Latency",
"title": "95%-ile Latency",
"tooltip": {
"shared": true,
"sort": 0,
@ -213,7 +216,7 @@
},
"id": 8,
"panels": [],
"title": "Server",
"title": "Client",
"type": "row"
},
{
@ -243,6 +246,7 @@
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 2,
@ -254,26 +258,19 @@
"steppedLine": false,
"targets": [
{
"expr": "rate(grpc_io_server_received_messages_per_rpc_count[$timewindow])",
"expr": "sum by (grpc_client_method)(rate(grpc_io_client_completed_rpcs[$timewindow]))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "Recv /{{component}}.{{grpc_server_method}}",
"legendFormat": "{{grpc_client_method}}",
"refId": "A"
},
{
"expr": "rate(grpc_io_server_sent_messages_per_rpc_count[$timewindow])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Sent /{{component}}.{{grpc_server_method}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Request Rate",
"title": "Client Request Rate",
"tooltip": {
"shared": true,
"sort": 0,
@ -316,6 +313,7 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"description": "",
"fill": 1,
"gridPos": {
"h": 9,
@ -337,6 +335,7 @@
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"paceLength": 10,
"percentage": false,
"pointradius": 2,
@ -348,10 +347,11 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.9, rate(grpc_io_server_server_latency_bucket[$timewindow]))",
"expr": "histogram_quantile(0.95, sum(rate(grpc_io_client_roundtrip_latency_bucket[$timewindow])) by (grpc_client_method, le))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{grpc_server_method}}",
"legendFormat": "{{grpc_client_method}}",
"refId": "A"
}
],
@ -359,7 +359,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "RPC Latency",
"title": "95%-ile Client Latency",
"tooltip": {
"shared": true,
"sort": 0,

View File

@ -19,7 +19,9 @@ import (
"time"
"github.com/sirupsen/logrus"
"go.opencensus.io/plugin/ocgrpc"
"go.opencensus.io/stats/view"
"google.golang.org/grpc"
"open-match.dev/open-match/internal/config"
)
@ -30,6 +32,11 @@ var (
})
)
// MetricsForClient yeah
func MetricsForClient() grpc.DialOption {
return grpc.WithStatsHandler(new(ocgrpc.ClientHandler))
}
// Setup configures the monitoring for the server.
func Setup(mux *http.ServeMux, cfg config.View) {
periodString := cfg.GetString("monitoring.reportingPeriod")

View File

@ -29,6 +29,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"open-match.dev/open-match/internal/config"
"open-match.dev/open-match/internal/monitoring"
)
var (
@ -77,7 +78,7 @@ func GRPCClientFromConfig(cfg config.View, prefix string) (*grpc.ClientConn, err
// GRPCClientFromEndpoint creates a gRPC client connection from endpoint.
func GRPCClientFromEndpoint(cfg config.View, address string) (*grpc.ClientConn, error) {
// TODO: investigate if it is possible to keep a cache of the certpool and transport credentials
grpcOptions := []grpc.DialOption{}
grpcOptions := newDefaultGRPCDialOptions()
if cfg.GetBool("tls.enabled") {
_, err := os.Stat(cfg.GetString("tls.trustedCertificatePath"))
@ -111,7 +112,7 @@ func GRPCClientFromEndpoint(cfg config.View, address string) (*grpc.ClientConn,
func GRPCClientFromParams(params *ClientParams) (*grpc.ClientConn, error) {
address := fmt.Sprintf("%s:%d", params.Hostname, params.Port)
grpcOptions := []grpc.DialOption{}
grpcOptions := newDefaultGRPCDialOptions()
if params.usingTLS() {
trustedCertPool, err := trustedCertificateFromFileData(params.TrustedCertificate)
@ -257,3 +258,7 @@ func HTTPClientFromParams(params *ClientParams) (*http.Client, string, error) {
return httpClient, baseURL, nil
}
func newDefaultGRPCDialOptions() []grpc.DialOption {
return []grpc.DialOption{monitoring.MetricsForClient()}
}

View File

@ -88,7 +88,9 @@ func (s *insecureServer) start(params *ServerParams) (func(), error) {
ctx, cancel := context.WithCancel(context.Background())
for _, handlerFunc := range params.handlersForGrpcProxy {
if err = handlerFunc(ctx, s.proxyMux, grpcListener.Addr().String(), []grpc.DialOption{grpc.WithInsecure()}); err != nil {
dialOpts := newDefaultGRPCDialOptions()
dialOpts = append(dialOpts, grpc.WithInsecure())
if err = handlerFunc(ctx, s.proxyMux, grpcListener.Addr().String(), dialOpts); err != nil {
cancel()
return func() {}, errors.WithStack(err)
}

View File

@ -110,7 +110,8 @@ func (s *tlsServer) start(params *ServerParams) (func(), error) {
// Bind gRPC handlers
ctx, cancel := context.WithCancel(context.Background())
httpsToGrpcProxyOptions := []grpc.DialOption{grpc.WithTransportCredentials(credentials.NewClientTLSFromCert(certPoolForGrpcEndpoint, ""))}
httpsToGrpcProxyOptions := newDefaultGRPCDialOptions()
httpsToGrpcProxyOptions = append(httpsToGrpcProxyOptions, grpc.WithTransportCredentials(credentials.NewClientTLSFromCert(certPoolForGrpcEndpoint, "")))
for _, handlerFunc := range params.handlersForGrpcProxy {
if err = handlerFunc(ctx, s.proxyMux, grpcAddress, httpsToGrpcProxyOptions); err != nil {