Skip to content

Commit ccc8ce5

Browse files
committed
changefeedccl: add static labels to changefeed stage metrics
This will improve the user experience at query time. Part of: #156290 Release note: None
1 parent 6d96fe6 commit ccc8ce5

File tree

2 files changed

+51
-17
lines changed

2 files changed

+51
-17
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,6 +1817,7 @@ layers:
18171817
derivative: NON_NEGATIVE_DERIVATIVE
18181818
- name: changefeed.stage.checkpoint_job_progress.latency
18191819
exported_name: changefeed_stage_checkpoint_job_progress_latency
1820+
labeled_name: 'changefeed.stage.latency{name: changefeed.stage.checkpoint_job_progress.latency, status: checkpoint_job_progress}'
18201821
description: 'Latency of the changefeed stage: checkpointing job progress'
18211822
y_axis_label: Latency
18221823
type: HISTOGRAM
@@ -1825,6 +1826,7 @@ layers:
18251826
derivative: NONE
18261827
- name: changefeed.stage.downstream_client_send.latency
18271828
exported_name: changefeed_stage_downstream_client_send_latency
1829+
labeled_name: 'changefeed.stage.latency{name: changefeed.stage.downstream_client_send.latency, status: downstream_client_send}'
18281830
description: 'Latency of the changefeed stage: flushing messages from the sink''s client to its downstream. This includes sends that failed for most but not all sinks.'
18291831
y_axis_label: Latency
18301832
type: HISTOGRAM
@@ -1833,6 +1835,7 @@ layers:
18331835
derivative: NONE
18341836
- name: changefeed.stage.emit_row.latency
18351837
exported_name: changefeed_stage_emit_row_latency
1838+
labeled_name: 'changefeed.stage.latency{name: changefeed.stage.emit_row.latency, status: emit_row}'
18361839
description: 'Latency of the changefeed stage: emitting row to sink'
18371840
y_axis_label: Latency
18381841
type: HISTOGRAM
@@ -1841,6 +1844,7 @@ layers:
18411844
derivative: NONE
18421845
- name: changefeed.stage.encode.latency
18431846
exported_name: changefeed_stage_encode_latency
1847+
labeled_name: 'changefeed.stage.latency{name: changefeed.stage.encode.latency, status: encode}'
18441848
description: 'Latency of the changefeed stage: encoding data'
18451849
y_axis_label: Latency
18461850
type: HISTOGRAM
@@ -1849,6 +1853,7 @@ layers:
18491853
derivative: NONE
18501854
- name: changefeed.stage.frontier_persistence.latency
18511855
exported_name: changefeed_stage_frontier_persistence_latency
1856+
labeled_name: 'changefeed.stage.latency{name: changefeed.stage.frontier_persistence.latency, status: frontier_persistence}'
18521857
description: 'Latency of the changefeed stage: persisting frontier to job info'
18531858
y_axis_label: Latency
18541859
type: HISTOGRAM
@@ -1857,6 +1862,7 @@ layers:
18571862
derivative: NONE
18581863
- name: changefeed.stage.kv_feed_buffer.latency
18591864
exported_name: changefeed_stage_kv_feed_buffer_latency
1865+
labeled_name: 'changefeed.stage.latency{name: changefeed.stage.kv_feed_buffer.latency, status: kv_feed_buffer}'
18601866
description: 'Latency of the changefeed stage: waiting to buffer kv events'
18611867
y_axis_label: Latency
18621868
type: HISTOGRAM
@@ -1865,6 +1871,7 @@ layers:
18651871
derivative: NONE
18661872
- name: changefeed.stage.kv_feed_wait_for_table_event.latency
18671873
exported_name: changefeed_stage_kv_feed_wait_for_table_event_latency
1874+
labeled_name: 'changefeed.stage.latency{name: changefeed.stage.kv_feed_wait_for_table_event.latency, status: kv_feed_wait_for_table_event}'
18681875
description: 'Latency of the changefeed stage: waiting for a table schema event to join to the kv event'
18691876
y_axis_label: Latency
18701877
type: HISTOGRAM
@@ -1873,6 +1880,7 @@ layers:
18731880
derivative: NONE
18741881
- name: changefeed.stage.pts.create.latency
18751882
exported_name: changefeed_stage_pts_create_latency
1883+
labeled_name: 'changefeed.stage.pts.latency{name: changefeed.stage.pts.create.latency, status: create}'
18761884
description: 'Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation'
18771885
y_axis_label: Latency
18781886
type: HISTOGRAM
@@ -1881,6 +1889,7 @@ layers:
18811889
derivative: NONE
18821890
- name: changefeed.stage.pts.manage.latency
18831891
exported_name: changefeed_stage_pts_manage_latency
1892+
labeled_name: 'changefeed.stage.pts.latency{name: changefeed.stage.pts.manage.latency, status: manage}'
18841893
description: 'Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed'
18851894
y_axis_label: Latency
18861895
type: HISTOGRAM
@@ -1889,6 +1898,7 @@ layers:
18891898
derivative: NONE
18901899
- name: changefeed.stage.pts.manage_error.latency
18911900
exported_name: changefeed_stage_pts_manage_error_latency
1901+
labeled_name: 'changefeed.stage.pts.latency{name: changefeed.stage.pts.manage_error.latency, status: manage_error}'
18921902
description: 'Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error'
18931903
y_axis_label: Latency
18941904
type: HISTOGRAM
@@ -1897,6 +1907,7 @@ layers:
18971907
derivative: NONE
18981908
- name: changefeed.stage.rangefeed_buffer_checkpoint.latency
18991909
exported_name: changefeed_stage_rangefeed_buffer_checkpoint_latency
1910+
labeled_name: 'changefeed.stage.latency{name: changefeed.stage.rangefeed_buffer_checkpoint.latency, status: rangefeed_buffer_checkpoint}'
19001911
description: 'Latency of the changefeed stage: buffering rangefeed checkpoint events'
19011912
y_axis_label: Latency
19021913
type: HISTOGRAM
@@ -1905,6 +1916,7 @@ layers:
19051916
derivative: NONE
19061917
- name: changefeed.stage.rangefeed_buffer_value.latency
19071918
exported_name: changefeed_stage_rangefeed_buffer_value_latency
1919+
labeled_name: 'changefeed.stage.latency{name: changefeed.stage.rangefeed_buffer_value.latency, status: rangefeed_buffer_value}'
19081920
description: 'Latency of the changefeed stage: buffering rangefeed value events'
19091921
y_axis_label: Latency
19101922
type: HISTOGRAM

pkg/ccl/changefeedccl/timers/timers.go

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package timers
77

88
import (
9+
"fmt"
910
"time"
1011

1112
"github.com/cockroachdb/cockroach/pkg/util/metric"
@@ -34,34 +35,55 @@ func (*Timers) MetricStruct() {}
3435
var _ metric.Struct = &Timers{}
3536

3637
func New(histogramWindow time.Duration) *Timers {
37-
histogramOptsFor := func(name, desc string) metric.HistogramOptions {
38+
const (
39+
stagePrefix = "changefeed.stage"
40+
latencySuffix = "latency"
41+
ptsSubCategory = "pts"
42+
)
43+
44+
histogramOptsFor := func(nameFormat, labeledName, labelStatus, desc string) metric.HistogramOptions {
45+
name := fmt.Sprintf(nameFormat, labelStatus)
3846
return metric.HistogramOptions{
3947
Metadata: metric.Metadata{
40-
Name: name,
41-
Help: desc,
42-
Unit: metric.Unit_NANOSECONDS,
43-
Measurement: "Latency",
48+
Name: name,
49+
Help: desc,
50+
Unit: metric.Unit_NANOSECONDS,
51+
Measurement: "Latency",
52+
LabeledName: labeledName,
53+
StaticLabels: metric.MakeLabelPairs(metric.LabelName, name, metric.LabelStatus, labelStatus),
4454
},
4555
Duration: histogramWindow,
4656
Buckets: prometheus.ExponentialBucketsRange(float64(1*time.Microsecond), float64(1*time.Hour), 60),
4757
Mode: metric.HistogramModePrometheus,
4858
}
4959
}
5060

61+
stageOpts := func(labelStatus, desc string) metric.HistogramOptions {
62+
nameFormat := fmt.Sprintf("%s.%%s.%s", stagePrefix, latencySuffix)
63+
labeledName := fmt.Sprintf("%s.%s", stagePrefix, latencySuffix)
64+
return histogramOptsFor(nameFormat, labeledName, labelStatus, desc)
65+
}
66+
67+
ptsStageOpts := func(labelStatus, desc string) metric.HistogramOptions {
68+
nameFormat := fmt.Sprintf("%s.%s.%%s.%s", stagePrefix, ptsSubCategory, latencySuffix)
69+
labeledName := fmt.Sprintf("%s.%s.%s", stagePrefix, ptsSubCategory, latencySuffix)
70+
return histogramOptsFor(nameFormat, labeledName, labelStatus, desc)
71+
}
72+
5173
b := aggmetric.MakeBuilder("scope")
5274
return &Timers{
53-
CheckpointJobProgress: b.Histogram(histogramOptsFor("changefeed.stage.checkpoint_job_progress.latency", "Latency of the changefeed stage: checkpointing job progress")),
54-
FrontierPersistence: b.Histogram(histogramOptsFor("changefeed.stage.frontier_persistence.latency", "Latency of the changefeed stage: persisting frontier to job info")),
55-
Encode: b.Histogram(histogramOptsFor("changefeed.stage.encode.latency", "Latency of the changefeed stage: encoding data")),
56-
EmitRow: b.Histogram(histogramOptsFor("changefeed.stage.emit_row.latency", "Latency of the changefeed stage: emitting row to sink")),
57-
DownstreamClientSend: b.Histogram(histogramOptsFor("changefeed.stage.downstream_client_send.latency", "Latency of the changefeed stage: flushing messages from the sink's client to its downstream. This includes sends that failed for most but not all sinks.")),
58-
KVFeedWaitForTableEvent: b.Histogram(histogramOptsFor("changefeed.stage.kv_feed_wait_for_table_event.latency", "Latency of the changefeed stage: waiting for a table schema event to join to the kv event")),
59-
KVFeedBuffer: b.Histogram(histogramOptsFor("changefeed.stage.kv_feed_buffer.latency", "Latency of the changefeed stage: waiting to buffer kv events")),
60-
RangefeedBufferValue: b.Histogram(histogramOptsFor("changefeed.stage.rangefeed_buffer_value.latency", "Latency of the changefeed stage: buffering rangefeed value events")),
61-
RangefeedBufferCheckpoint: b.Histogram(histogramOptsFor("changefeed.stage.rangefeed_buffer_checkpoint.latency", "Latency of the changefeed stage: buffering rangefeed checkpoint events")),
62-
PTSManage: b.Histogram(histogramOptsFor("changefeed.stage.pts.manage.latency", "Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed")),
63-
PTSManageError: b.Histogram(histogramOptsFor("changefeed.stage.pts.manage_error.latency", "Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error")),
64-
PTSCreate: b.Histogram(histogramOptsFor("changefeed.stage.pts.create.latency", "Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation")),
75+
CheckpointJobProgress: b.Histogram(stageOpts("checkpoint_job_progress", "Latency of the changefeed stage: checkpointing job progress")),
76+
FrontierPersistence: b.Histogram(stageOpts("frontier_persistence", "Latency of the changefeed stage: persisting frontier to job info")),
77+
Encode: b.Histogram(stageOpts("encode", "Latency of the changefeed stage: encoding data")),
78+
EmitRow: b.Histogram(stageOpts("emit_row", "Latency of the changefeed stage: emitting row to sink")),
79+
DownstreamClientSend: b.Histogram(stageOpts("downstream_client_send", "Latency of the changefeed stage: flushing messages from the sink's client to its downstream. This includes sends that failed for most but not all sinks.")),
80+
KVFeedWaitForTableEvent: b.Histogram(stageOpts("kv_feed_wait_for_table_event", "Latency of the changefeed stage: waiting for a table schema event to join to the kv event")),
81+
KVFeedBuffer: b.Histogram(stageOpts("kv_feed_buffer", "Latency of the changefeed stage: waiting to buffer kv events")),
82+
RangefeedBufferValue: b.Histogram(stageOpts("rangefeed_buffer_value", "Latency of the changefeed stage: buffering rangefeed value events")),
83+
RangefeedBufferCheckpoint: b.Histogram(stageOpts("rangefeed_buffer_checkpoint", "Latency of the changefeed stage: buffering rangefeed checkpoint events")),
84+
PTSManage: b.Histogram(ptsStageOpts("manage", "Latency of the changefeed stage: Time spent successfully managing protected timestamp records on highwater advance, including time spent creating new protected timestamps when needed")),
85+
PTSManageError: b.Histogram(ptsStageOpts("manage_error", "Latency of the changefeed stage: Time spent managing protected timestamp when we eventually error")),
86+
PTSCreate: b.Histogram(ptsStageOpts("create", "Latency of the changefeed stage: Time spent creating protected timestamp records on changefeed creation")),
6587
}
6688
}
6789

0 commit comments

Comments
 (0)