609609 "title" : " TPOT (p95) by Model (sec/token)" ,
610610 "type" : " timeseries"
611611 },
612+ {
613+ "datasource" : {
614+ "type" : " prometheus" ,
615+ "uid" : " ${DS_PROMETHEUS}"
616+ },
617+ "fieldConfig" : {
618+ "defaults" : {
619+ "color" : {
620+ "mode" : " palette-classic"
621+ },
622+ "custom" : {
623+ "axisBorderShow" : false ,
624+ "axisCenteredZero" : false ,
625+ "axisColorMode" : " text" ,
626+ "axisLabel" : " Seconds" ,
627+ "axisPlacement" : " auto" ,
628+ "barAlignment" : 0 ,
629+ "barWidthFactor" : 0.6 ,
630+ "drawStyle" : " line" ,
631+ "fillOpacity" : 10 ,
632+ "gradientMode" : " none" ,
633+ "hideFrom" : {
634+ "legend" : false ,
635+ "tooltip" : false ,
636+ "viz" : false
637+ },
638+ "insertNulls" : false ,
639+ "lineInterpolation" : " smooth" ,
640+ "lineWidth" : 1 ,
641+ "pointSize" : 5 ,
642+ "scaleDistribution" : {
643+ "type" : " linear"
644+ },
645+ "showPoints" : " auto" ,
646+ "spanNulls" : false ,
647+ "stacking" : {
648+ "group" : " A" ,
649+ "mode" : " none"
650+ },
651+ "thresholdsStyle" : {
652+ "mode" : " off"
653+ }
654+ },
655+ "mappings" : [],
656+ "thresholds" : {
657+ "mode" : " absolute" ,
658+ "steps" : [
659+ {
660+ "color" : " green" ,
661+ "value" : null
662+ }
663+ ]
664+ },
665+ "unit" : " s"
666+ },
667+ "overrides" : []
668+ },
669+ "gridPos" : {
670+ "h" : 8 ,
671+ "w" : 12 ,
672+ "x" : 12 ,
673+ "y" : 24
674+ },
675+ "id" : 7 ,
676+ "options" : {
677+ "legend" : {
678+ "calcs" : [
679+ " mean" ,
680+ " max" ,
681+ " lastNotNull"
682+ ],
683+ "displayMode" : " table" ,
684+ "placement" : " bottom" ,
685+ "showLegend" : true
686+ },
687+ "tooltip" : {
688+ "hideZeros" : false ,
689+ "mode" : " multi" ,
690+ "sort" : " none"
691+ }
692+ },
693+ "pluginVersion" : " 11.5.1" ,
694+ "targets" : [
695+ {
696+ "datasource" : {
697+ "type" : " prometheus" ,
698+ "uid" : " ${DS_PROMETHEUS}"
699+ },
700+ "editorMode" : " code" ,
701+ "expr" : " histogram_quantile(0.50, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))" ,
702+ "legendFormat" : " p50 {{model}}" ,
703+ "range" : true ,
704+ "refId" : " A"
705+ },
706+ {
707+ "datasource" : {
708+ "type" : " prometheus" ,
709+ "uid" : " ${DS_PROMETHEUS}"
710+ },
711+ "editorMode" : " code" ,
712+ "expr" : " histogram_quantile(0.90, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))" ,
713+ "legendFormat" : " p90 {{model}}" ,
714+ "range" : true ,
715+ "refId" : " B"
716+ },
717+ {
718+ "datasource" : {
719+ "type" : " prometheus" ,
720+ "uid" : " ${DS_PROMETHEUS}"
721+ },
722+ "editorMode" : " code" ,
723+ "expr" : " histogram_quantile(0.99, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))" ,
724+ "legendFormat" : " p99 {{model}}" ,
725+ "range" : true ,
726+ "refId" : " C"
727+ }
728+ ],
729+ "title" : " Model Completion Latency (p50/p90/p99)" ,
730+ "type" : " timeseries"
731+ }
612732 {
613733 "datasource" : {
614734 "type" : " prometheus" ,
672792 "x" : 0 ,
673793 "y" : 24
674794 },
675- "id" : 7 ,
795+ "id" : 8 ,
676796 "options" : {
677797 "legend" : {
678798 "calcs" : [
779899 "h" : 8 ,
780900 "w" : 12 ,
781901 "x" : 12 ,
782- "y" : 24
902+ "y" : 48
783903 },
784- "id" : 8 ,
904+ "id" : 9 ,
785905 "options" : {
786906 "legend" : {
787907 "calcs" : [
8831003 "x" : 0 ,
8841004 "y" : 32
8851005 },
886- "id" : 9 ,
1006+ "id" : 10 ,
8871007 "options" : {
8881008 "legend" : {
8891009 "calcs" : [
9671087 "x" : 12 ,
9681088 "y" : 32
9691089 },
970- "id" : 10 ,
1090+ "id" : 11 ,
9711091 "options" : {
9721092 "displayMode" : " gradient" ,
9731093 "legend" : {
10391159 "x" : 0 ,
10401160 "y" : 40
10411161 },
1042- "id" : 11 ,
1162+ "id" : 12 ,
10431163 "options" : {
10441164 "displayMode" : " gradient" ,
10451165 "legend" : {
10881208 },
10891209 "fieldConfig" : {
10901210 "defaults" : {
1091- "color" : {
1092- "mode" : " palette-classic"
1093- },
1211+ "color" : { "mode" : " palette-classic" },
10941212 "custom" : {
1095- "axisBorderShow" : false ,
1096- "axisCenteredZero" : false ,
1097- "axisColorMode" : " text" ,
1098- "axisLabel" : " Seconds" ,
1099- "axisPlacement" : " auto" ,
1100- "barAlignment" : 0 ,
1101- "barWidthFactor" : 0.6 ,
1213+ "axisLabel" : " Requests/sec" ,
11021214 "drawStyle" : " line" ,
11031215 "fillOpacity" : 10 ,
1104- "gradientMode" : " none" ,
1105- "hideFrom" : {
1106- "legend" : false ,
1107- "tooltip" : false ,
1108- "viz" : false
1109- },
1110- "insertNulls" : false ,
11111216 "lineInterpolation" : " smooth" ,
11121217 "lineWidth" : 1 ,
1113- "pointSize" : 5 ,
1114- "scaleDistribution" : {
1115- "type" : " linear"
1116- },
1117- "showPoints" : " auto" ,
1118- "spanNulls" : false ,
1119- "stacking" : {
1120- "group" : " A" ,
1121- "mode" : " none"
1122- },
1123- "thresholdsStyle" : {
1124- "mode" : " off"
1125- }
1218+ "showPoints" : " auto"
11261219 },
11271220 "mappings" : [],
1128- "thresholds" : {
1129- "mode" : " absolute" ,
1130- "steps" : [
1131- {
1132- "color" : " green" ,
1133- "value" : null
1134- }
1135- ]
1136- },
1137- "unit" : " s"
1221+ "thresholds" : { "mode" : " absolute" , "steps" : [{"color" :" green" ,"value" :null }] },
1222+ "unit" : " reqps"
11381223 },
11391224 "overrides" : []
11401225 },
1141- "gridPos" : {
1142- "h" : 8 ,
1143- "w" : 12 ,
1144- "x" : 12 ,
1145- "y" : 40
1146- },
1147- "id" : 12 ,
1226+ "gridPos" : { "h" : 8 , "w" : 12 , "x" : 0 , "y" : 36 },
1227+ "id" : 13 ,
11481228 "options" : {
1149- "legend" : {
1150- "calcs" : [
1151- " mean" ,
1152- " max" ,
1153- " lastNotNull"
1154- ],
1155- "displayMode" : " table" ,
1156- "placement" : " bottom" ,
1157- "showLegend" : true
1158- },
1159- "tooltip" : {
1160- "hideZeros" : false ,
1161- "mode" : " multi" ,
1162- "sort" : " none"
1163- }
1229+ "legend" : { "calcs" : [" mean" ," max" ," lastNotNull" ], "displayMode" : " table" , "placement" : " bottom" , "showLegend" : true },
1230+ "tooltip" : { "mode" : " multi" , "sort" : " none" }
11641231 },
1165- "pluginVersion" : " 11.5.1" ,
11661232 "targets" : [
11671233 {
1168- "datasource" : {
1169- "type" : " prometheus" ,
1170- "uid" : " ${DS_PROMETHEUS}"
1171- },
1234+ "datasource" : { "type" : " prometheus" , "uid" : " ${DS_PROMETHEUS}" },
11721235 "editorMode" : " code" ,
1173- "expr" : " histogram_quantile(0.50, sum(rate(llm_model_completion_latency_seconds_bucket [5m])) by (le, model) )" ,
1174- "legendFormat" : " p50 {{model }}" ,
1236+ "expr" : " sum(rate(llm_responses_adapter_requests_total [5m])) by (streaming )" ,
1237+ "legendFormat" : " Requests {{streaming }}" ,
11751238 "range" : true ,
11761239 "refId" : " A"
1177- },
1178- {
1179- "datasource" : {
1180- "type" : " prometheus" ,
1181- "uid" : " ${DS_PROMETHEUS}"
1240+ }
1241+ ],
1242+ "title" : " Responses Adapter Requests Rate" ,
1243+ "type" : " timeseries"
1244+ },
1245+ {
1246+ "datasource" : {
1247+ "type" : " prometheus" ,
1248+ "uid" : " ${DS_PROMETHEUS}"
1249+ },
1250+ "fieldConfig" : {
1251+ "defaults" : {
1252+ "color" : { "mode" : " palette-classic" },
1253+ "custom" : {
1254+ "axisLabel" : " Events/sec" ,
1255+ "drawStyle" : " line" ,
1256+ "fillOpacity" : 10 ,
1257+ "lineInterpolation" : " smooth" ,
1258+ "lineWidth" : 1 ,
1259+ "showPoints" : " auto"
11821260 },
1183- "editorMode" : " code" ,
1184- "expr" : " histogram_quantile(0.90, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))" ,
1185- "legendFormat" : " p90 {{model}}" ,
1186- "range" : true ,
1187- "refId" : " B"
1261+ "mappings" : [],
1262+ "thresholds" : { "mode" : " absolute" , "steps" : [{"color" :" green" ,"value" :null }] },
1263+ "unit" : " ops"
11881264 },
1265+ "overrides" : []
1266+ },
1267+ "gridPos" : { "h" : 8 , "w" : 12 , "x" : 12 , "y" : 36 },
1268+ "id" : 14 ,
1269+ "options" : {
1270+ "legend" : { "calcs" : [" mean" ," max" ," lastNotNull" ], "displayMode" : " table" , "placement" : " bottom" , "showLegend" : true },
1271+ "tooltip" : { "mode" : " multi" , "sort" : " none" }
1272+ },
1273+ "targets" : [
11891274 {
1190- "datasource" : {
1191- "type" : " prometheus" ,
1192- "uid" : " ${DS_PROMETHEUS}"
1193- },
1275+ "datasource" : { "type" : " prometheus" , "uid" : " ${DS_PROMETHEUS}" },
11941276 "editorMode" : " code" ,
1195- "expr" : " histogram_quantile(0.99, sum(rate(llm_model_completion_latency_seconds_bucket [5m])) by (le, model) )" ,
1196- "legendFormat" : " p99 {{model }}" ,
1277+ "expr" : " sum(rate(llm_responses_adapter_sse_events_total [5m])) by (event_type )" ,
1278+ "legendFormat" : " {{event_type }}" ,
11971279 "range" : true ,
1198- "refId" : " C "
1280+ "refId" : " A "
11991281 }
12001282 ],
1201- "title" : " Model Completion Latency (p50/p90/p99) " ,
1283+ "title" : " Responses Adapter SSE Events Rate " ,
12021284 "type" : " timeseries"
1203- }
1285+ },
12041286 ],
12051287 "preload" : false ,
12061288 "refresh" : " 10s" ,
12331315 "timezone" : " " ,
12341316 "title" : " LLM Router Metrics" ,
12351317 "uid" : " llm-router-metrics" ,
1236- "version" : 14 ,
1318+ "version" : 15 ,
12371319 "weekStart" : " "
12381320}
0 commit comments