package cgroups import ( "context" "fmt" "sync" cgroups "github.com/containerd/cgroups/v3/cgroup2" "github.com/containerd/cgroups/v3/cgroup2/stats" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/metric" ) // Option supports configuring optional settings for runtime metrics. type Option interface { apply(*config) } func newConfig(controllers []Controller, opts ...Option) config { cfg := config{ controllers: controllers, provider: otel.GetMeterProvider(), prefix: "cgroups.", } for _, opt := range opts { opt.apply(&cfg) } return cfg } type config struct { controllers []Controller provider metric.MeterProvider prefix string } type cgroup struct { meter metric.Meter stats func() (*stats.Metrics, error) } func (c *cgroup) cpu(prefix string) (func(context.Context, metric.Observer, *stats.Metrics), []metric.Observable, error) { var ( err error usageUsec metric.Int64ObservableGauge userUsec metric.Int64ObservableGauge systemUsec metric.Int64ObservableGauge nrPeriods metric.Int64ObservableGauge nrThrottled metric.Int64ObservableGauge throttledUsec metric.Int64ObservableGauge ) if usageUsec, err = c.meter.Int64ObservableGauge( prefix+"usage_usec", metric.WithDescription("Total cpu usage"), metric.WithUnit("microseconds"), ); err != nil { return nil, nil, fmt.Errorf("usage_usec:%w", err) } if userUsec, err = c.meter.Int64ObservableGauge( prefix+"user_usec", metric.WithDescription("Current cpu usage in user space"), metric.WithUnit("microseconds"), ); err != nil { return nil, nil, fmt.Errorf("user_usec:%w", err) } if systemUsec, err = c.meter.Int64ObservableGauge( prefix+"system_usec", metric.WithDescription("Current cpu usage in kernel space"), metric.WithUnit("microseconds"), ); err != nil { return nil, nil, fmt.Errorf("system_usec:%w", err) } if nrPeriods, err = c.meter.Int64ObservableGauge( prefix+"nr_periods", metric.WithDescription("Current cpu number of periods (only if controller is enabled)"), ); err != nil { return nil, nil, fmt.Errorf("nr_periods:%w", err) } if nrThrottled, err = c.meter.Int64ObservableGauge( prefix+"nr_throttled", metric.WithDescription("Total number of times tasks have been throttled (only if controller is enabled)"), ); err != nil { return nil, nil, fmt.Errorf("nr_throttled:%w", err) } if throttledUsec, err = c.meter.Int64ObservableGauge( prefix+"throttled_usec", metric.WithDescription("Total time duration for which tasks have been throttled. (only if controller is enabled)"), metric.WithUnit("microseconds"), ); err != nil { return nil, nil, fmt.Errorf("throttled_usec:%w", err) } return func(ctx context.Context, o metric.Observer, m *stats.Metrics) { o.ObserveInt64(usageUsec, int64(m.CPU.UsageUsec)) o.ObserveInt64(userUsec, int64(m.CPU.UserUsec)) o.ObserveInt64(systemUsec, int64(m.CPU.SystemUsec)) o.ObserveInt64(nrPeriods, int64(m.CPU.NrPeriods)) o.ObserveInt64(nrThrottled, int64(m.CPU.NrThrottled)) o.ObserveInt64(throttledUsec, int64(m.CPU.ThrottledUsec)) }, []metric.Observable{ usageUsec, userUsec, systemUsec, nrPeriods, nrThrottled, throttledUsec, }, nil } func (c *cgroup) register(prefix string, ctrls ...Controller) (metric.Registration, error) { observable := make([]metric.Observable, 0, len(ctrls)) callbacks := make([]func(context.Context, metric.Observer, *stats.Metrics), 0, len(ctrls)) for _, ctrl := range ctrls { switch ctrl { case ControllerCPU: cpu, cpus, err := c.cpu(prefix + "cpu.") if err != nil { return nil, fmt.Errorf("controller cpu:%w", err) } observable = append(observable, cpus...) callbacks = append(callbacks, cpu) } } var lock sync.Mutex lock.Lock() defer lock.Unlock() ureg, err := c.meter.RegisterCallback( func(ctx context.Context, obsrv metric.Observer) error { lock.Lock() defer lock.Unlock() stats, uErr := c.stats() if uErr != nil { return fmt.Errorf("cpu get stat:%w", uErr) } for _, callback := range callbacks { callback(ctx, obsrv, stats) } return nil }, observable..., ) if err != nil { return nil, fmt.Errorf("register callback:%w", err) } return ureg, nil } func Start(group string, opts ...Option) (metric.Registration, error) { manager, err := cgroups.Load(group) if err != nil { return nil, fmt.Errorf("load:%w", err) } controllers, cerr := manager.Controllers() if cerr != nil { return nil, fmt.Errorf("controllers:%w", cerr) } ctrls := make([]Controller, 0, len(controllers)) for _, controller := range controllers { ctrl, err := ParseController(controller) if err == nil { ctrls = append(ctrls, ctrl) } } cfg := newConfig(ctrls, opts...) cgr := cgroup{ meter: cfg.provider.Meter( "gitoa.ru/go-4devs/otel/cgroups", metric.WithInstrumentationVersion(Version()), ), stats: manager.Stat, } return cgr.register(cfg.prefix+group+".", cfg.controllers...) }