|
|
|
package cgroups
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
cgroups "github.com/containerd/cgroups/v3/cgroup2"
|
|
|
|
"github.com/containerd/cgroups/v3/cgroup2/stats"
|
|
|
|
"go.opentelemetry.io/otel"
|
|
|
|
"go.opentelemetry.io/otel/metric"
|
|
|
|
)
|
|
|
|
|
|
|
|
func WithProvider(provider metric.MeterProvider) Option {
|
|
|
|
return func(c *config) {
|
|
|
|
c.provider = provider
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func WithPrefic(name string) Option {
|
|
|
|
return func(c *config) {
|
|
|
|
c.prefix = name
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func WithControllers(ctrls ...Controller) Option {
|
|
|
|
return func(c *config) {
|
|
|
|
c.controllers = ctrls
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Option supports configuring optional settings for runtime metrics.
|
|
|
|
type Option func(*config)
|
|
|
|
|
|
|
|
func newConfig(opts ...Option) config {
|
|
|
|
cfg := config{
|
|
|
|
controllers: []Controller{ControllerCPU},
|
|
|
|
provider: otel.GetMeterProvider(),
|
|
|
|
prefix: "cgroups.",
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, opt := range opts {
|
|
|
|
opt(&cfg)
|
|
|
|
}
|
|
|
|
|
|
|
|
return cfg
|
|
|
|
}
|
|
|
|
|
|
|
|
type config struct {
|
|
|
|
controllers []Controller
|
|
|
|
provider metric.MeterProvider
|
|
|
|
prefix string
|
|
|
|
}
|
|
|
|
|
|
|
|
type cgroup struct {
|
|
|
|
meter metric.Meter
|
|
|
|
stats func() (*stats.Metrics, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *cgroup) cpu(prefix string) (func(context.Context, metric.Observer, *stats.Metrics), []metric.Observable, error) {
|
|
|
|
var (
|
|
|
|
err error
|
|
|
|
usageUsec metric.Int64ObservableGauge
|
|
|
|
userUsec metric.Int64ObservableGauge
|
|
|
|
systemUsec metric.Int64ObservableGauge
|
|
|
|
nrPeriods metric.Int64ObservableGauge
|
|
|
|
nrThrottled metric.Int64ObservableGauge
|
|
|
|
throttledUsec metric.Int64ObservableGauge
|
|
|
|
)
|
|
|
|
|
|
|
|
if usageUsec, err = c.meter.Int64ObservableGauge(
|
|
|
|
prefix+"usage_usec",
|
|
|
|
metric.WithDescription("Total cpu usage"),
|
|
|
|
metric.WithUnit("microseconds"),
|
|
|
|
); err != nil {
|
|
|
|
return nil, nil, fmt.Errorf("usage_usec:%w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if userUsec, err = c.meter.Int64ObservableGauge(
|
|
|
|
prefix+"user_usec",
|
|
|
|
metric.WithDescription("Current cpu usage in user space"),
|
|
|
|
metric.WithUnit("microseconds"),
|
|
|
|
); err != nil {
|
|
|
|
return nil, nil, fmt.Errorf("user_usec:%w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if systemUsec, err = c.meter.Int64ObservableGauge(
|
|
|
|
prefix+"system_usec",
|
|
|
|
metric.WithDescription("Current cpu usage in kernel space"),
|
|
|
|
metric.WithUnit("microseconds"),
|
|
|
|
); err != nil {
|
|
|
|
return nil, nil, fmt.Errorf("system_usec:%w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if nrPeriods, err = c.meter.Int64ObservableGauge(
|
|
|
|
prefix+"nr_periods",
|
|
|
|
metric.WithDescription("Current cpu number of periods (only if controller is enabled)"),
|
|
|
|
); err != nil {
|
|
|
|
return nil, nil, fmt.Errorf("nr_periods:%w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if nrThrottled, err = c.meter.Int64ObservableGauge(
|
|
|
|
prefix+"nr_throttled",
|
|
|
|
metric.WithDescription("Total number of times tasks have been throttled (only if controller is enabled)"),
|
|
|
|
); err != nil {
|
|
|
|
return nil, nil, fmt.Errorf("nr_throttled:%w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if throttledUsec, err = c.meter.Int64ObservableGauge(
|
|
|
|
prefix+"throttled_usec",
|
|
|
|
metric.WithDescription("Total time duration for which tasks have been throttled. (only if controller is enabled)"),
|
|
|
|
metric.WithUnit("microseconds"),
|
|
|
|
); err != nil {
|
|
|
|
return nil, nil, fmt.Errorf("throttled_usec:%w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return func(ctx context.Context, o metric.Observer, m *stats.Metrics) {
|
|
|
|
o.ObserveInt64(usageUsec, int64(m.CPU.UsageUsec))
|
|
|
|
o.ObserveInt64(userUsec, int64(m.CPU.UserUsec))
|
|
|
|
o.ObserveInt64(systemUsec, int64(m.CPU.SystemUsec))
|
|
|
|
o.ObserveInt64(nrPeriods, int64(m.CPU.NrPeriods))
|
|
|
|
o.ObserveInt64(nrThrottled, int64(m.CPU.NrThrottled))
|
|
|
|
o.ObserveInt64(throttledUsec, int64(m.CPU.ThrottledUsec))
|
|
|
|
}, []metric.Observable{
|
|
|
|
usageUsec, userUsec, systemUsec, nrPeriods, nrThrottled, throttledUsec,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *cgroup) register(prefix string, ctrls ...Controller) (metric.Registration, error) {
|
|
|
|
observable := make([]metric.Observable, 0, len(ctrls))
|
|
|
|
callbacks := make([]func(context.Context, metric.Observer, *stats.Metrics), 0, len(ctrls))
|
|
|
|
for _, ctrl := range ctrls {
|
|
|
|
switch ctrl {
|
|
|
|
case ControllerCPU:
|
|
|
|
cpu, cpus, err := c.cpu(prefix + "cpu.")
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("controller cpu:%w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
observable = append(observable, cpus...)
|
|
|
|
callbacks = append(callbacks, cpu)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var lock sync.Mutex
|
|
|
|
|
|
|
|
lock.Lock()
|
|
|
|
defer lock.Unlock()
|
|
|
|
|
|
|
|
ureg, err := c.meter.RegisterCallback(
|
|
|
|
func(ctx context.Context, obsrv metric.Observer) error {
|
|
|
|
lock.Lock()
|
|
|
|
defer lock.Unlock()
|
|
|
|
|
|
|
|
stats, uErr := c.stats()
|
|
|
|
if uErr != nil {
|
|
|
|
return fmt.Errorf("cpu get stat:%w", uErr)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, callback := range callbacks {
|
|
|
|
callback(ctx, obsrv, stats)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
},
|
|
|
|
observable...,
|
|
|
|
)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("register callback:%w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return ureg, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func Start(group string, opts ...Option) (metric.Registration, error) {
|
|
|
|
manager, err := cgroups.Load(group)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("load:%w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
cfg := newConfig(opts...)
|
|
|
|
|
|
|
|
cgr := cgroup{
|
|
|
|
meter: cfg.provider.Meter(
|
|
|
|
"gitoa.ru/go-4devs/otel/cgroups",
|
|
|
|
metric.WithInstrumentationVersion(Version()),
|
|
|
|
),
|
|
|
|
stats: manager.Stat,
|
|
|
|
}
|
|
|
|
|
|
|
|
return cgr.register(cfg.prefix+group+".", cfg.controllers...)
|
|
|
|
}
|