OpenTelemetry
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

193 lines
4.8 KiB

package cgroup
import (
"context"
"fmt"
"sync"
cgroups "github.com/containerd/cgroups/v3/cgroup2"
"github.com/containerd/cgroups/v3/cgroup2/stats"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/metric"
)
func WithProvider(provider metric.MeterProvider) Option {
return func(c *config) {
c.provider = provider
}
}
func WithPrefic(name string) Option {
return func(c *config) {
c.prefix = name
}
}
func WithControllers(ctrls ...Controller) Option {
return func(c *config) {
c.controllers = ctrls
}
}
// Option supports configuring optional settings for runtime metrics.
type Option func(*config)
func newConfig(opts ...Option) config {
cfg := config{
controllers: []Controller{ControllerCPU},
provider: otel.GetMeterProvider(),
prefix: "cgroups.",
}
for _, opt := range opts {
opt(&cfg)
}
return cfg
}
type config struct {
controllers []Controller
provider metric.MeterProvider
prefix string
}
type cgroup struct {
meter metric.Meter
stats func() (*stats.Metrics, error)
}
func (c *cgroup) cpu(prefix string) (func(context.Context, metric.Observer, *stats.Metrics), []metric.Observable, error) {
var (
err error
usageUsec metric.Int64ObservableGauge
userUsec metric.Int64ObservableGauge
systemUsec metric.Int64ObservableGauge
nrPeriods metric.Int64ObservableGauge
nrThrottled metric.Int64ObservableGauge
throttledUsec metric.Int64ObservableGauge
)
if usageUsec, err = c.meter.Int64ObservableGauge(
prefix+"usage_usec",
metric.WithDescription("Total cpu usage"),
metric.WithUnit("microseconds"),
); err != nil {
return nil, nil, fmt.Errorf("usage_usec:%w", err)
}
if userUsec, err = c.meter.Int64ObservableGauge(
prefix+"user_usec",
metric.WithDescription("Current cpu usage in user space"),
metric.WithUnit("microseconds"),
); err != nil {
return nil, nil, fmt.Errorf("user_usec:%w", err)
}
if systemUsec, err = c.meter.Int64ObservableGauge(
prefix+"system_usec",
metric.WithDescription("Current cpu usage in kernel space"),
metric.WithUnit("microseconds"),
); err != nil {
return nil, nil, fmt.Errorf("system_usec:%w", err)
}
if nrPeriods, err = c.meter.Int64ObservableGauge(
prefix+"nr_periods",
metric.WithDescription("Current cpu number of periods (only if controller is enabled)"),
); err != nil {
return nil, nil, fmt.Errorf("nr_periods:%w", err)
}
if nrThrottled, err = c.meter.Int64ObservableGauge(
prefix+"nr_throttled",
metric.WithDescription("Total number of times tasks have been throttled (only if controller is enabled)"),
); err != nil {
return nil, nil, fmt.Errorf("nr_throttled:%w", err)
}
if throttledUsec, err = c.meter.Int64ObservableGauge(
prefix+"throttled_usec",
metric.WithDescription("Total time duration for which tasks have been throttled. (only if controller is enabled)"),
metric.WithUnit("microseconds"),
); err != nil {
return nil, nil, fmt.Errorf("throttled_usec:%w", err)
}
return func(ctx context.Context, o metric.Observer, m *stats.Metrics) {
o.ObserveInt64(usageUsec, int64(m.CPU.UsageUsec))
o.ObserveInt64(userUsec, int64(m.CPU.UserUsec))
o.ObserveInt64(systemUsec, int64(m.CPU.SystemUsec))
o.ObserveInt64(nrPeriods, int64(m.CPU.NrPeriods))
o.ObserveInt64(nrThrottled, int64(m.CPU.NrThrottled))
o.ObserveInt64(throttledUsec, int64(m.CPU.ThrottledUsec))
}, []metric.Observable{
usageUsec, userUsec, systemUsec, nrPeriods, nrThrottled, throttledUsec,
}, nil
}
func (c *cgroup) register(prefix string, ctrls ...Controller) (metric.Registration, error) {
observable := make([]metric.Observable, 0, len(ctrls))
callbacks := make([]func(context.Context, metric.Observer, *stats.Metrics), 0, len(ctrls))
for _, ctrl := range ctrls {
switch ctrl {
case ControllerCPU:
cpu, cpus, err := c.cpu(prefix + "cpu.")
if err != nil {
return nil, fmt.Errorf("controller cpu:%w", err)
}
observable = append(observable, cpus...)
callbacks = append(callbacks, cpu)
}
}
var lock sync.Mutex
lock.Lock()
defer lock.Unlock()
ureg, err := c.meter.RegisterCallback(
func(ctx context.Context, obsrv metric.Observer) error {
lock.Lock()
defer lock.Unlock()
stats, uErr := c.stats()
if uErr != nil {
return fmt.Errorf("cpu get stat:%w", uErr)
}
for _, callback := range callbacks {
callback(ctx, obsrv, stats)
}
return nil
},
observable...,
)
if err != nil {
return nil, fmt.Errorf("register callback:%w", err)
}
return ureg, nil
}
func Start(group string, opts ...Option) (metric.Registration, error) {
manager, err := cgroups.Load(group)
if err != nil {
return nil, fmt.Errorf("load:%w", err)
}
cfg := newConfig(opts...)
cgr := cgroup{
meter: cfg.provider.Meter(
"gitoa.ru/go-4devs/otel/cgroups",
metric.WithInstrumentationVersion(Version()),
),
stats: manager.Stat,
}
return cgr.register(cfg.prefix+group+".", cfg.controllers...)
}