init

2024-10-04 14:55:21 +03:00
parent 01de72c729
commit 779818e067
9 changed files with 775 additions and 0 deletions
--- a/size.go
+++ b/size.go
@@ -0,0 +1,357 @@
+package bytesize
+
+import (
+	"errors"
+	"fmt"
+)
+
+type Size int64
+
+// Byte size prefix.
+const (
+	Byte     Size = 1                // byte
+	Kibibyte Size = 1 << (10 * iota) // kibibyte
+	Mebibyte                         // mebibyte
+	Gibibyte                         // gibibyte
+	Tebibyte                         // tebibyte
+	Pebibyte                         // pebibyte
+
+	Kilobyte Size = 1000 * Byte     // kilobyte
+	Megabyte Size = 1000 * Kilobyte // megabyte
+	Gigabyte Size = 1000 * Megabyte // gigabyte
+	Terabyte Size = 1000 * Gigabyte // terabyte
+	Petabyte Size = 1000 * Terabyte // petabyte
+)
+
+var unitMap = map[string]uint64{
+	"B":   uint64(Byte),
+	"KiB": uint64(Kibibyte),
+	"MiB": uint64(Mebibyte),
+	"GiB": uint64(Gibibyte),
+	"TiB": uint64(Tebibyte),
+	"PiB": uint64(Pebibyte),
+
+	"kB": uint64(Kilobyte),
+	"KB": uint64(Kilobyte),
+	"MB": uint64(Megabyte),
+	"GB": uint64(Gigabyte),
+	"TB": uint64(Terabyte),
+	"PB": uint64(Petabyte),
+}
+
+var (
+	ErrInvalidSize = errors.New("bytesize: invalid size")
+	ErrMissingUnit = errors.New("bytesize: missing unit")
+	ErrUnknownUnit = errors.New("bytesize: unknown unit")
+)
+
+// Parse parses a size string.
+// A byte size string is a possibly signed sequence of
+// decimal numbers, each with optional fraction and a unit suffix,
+// such as "300kB", "-1.5GiB" or "2GB45MB".
+// Valid units are "B", "KiB", "MiB", "GiB", "TiB", "PiB" for binary size.
+// Valid units are "B", "kB", "MB", "GB", "TB", "PB" for human size.
+//
+// nolint: funlen,gocognit,gocyclo,cyclop
+func Parse(in string) (Size, error) {
+	// [-+]?([0-9]*(\.[0-9]*)?[a-z]+)+
+	orig := in
+	neg := false
+
+	var res uint64
+
+	// Consume [-+]?
+	if in != "" {
+		c := in[0]
+		if c == '-' || c == '+' {
+			neg = c == '-'
+			in = in[1:]
+		}
+	}
+	// Special case: if all that is left is "0", this is zero.
+	if in == "0" {
+		return 0, nil
+	}
+
+	if in == "" {
+		return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
+	}
+
+	for in != "" {
+		var (
+			val, fVal uint64      // integers before, after decimal point
+			scale     float64 = 1 // value = v + f/scale
+		)
+
+		var err error
+
+		// The next character must be [0-9.]
+		if !(in[0] == '.' || '0' <= in[0] && in[0] <= '9') {
+			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
+		}
+
+		// Consume [0-9]*
+		pl := len(in)
+
+		val, in, err = leadingInt(in)
+		if err != nil {
+			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
+		}
+
+		pre := pl != len(in) // whether we consumed anything before a period
+
+		// Consume (\.[0-9]*)?
+		post := false
+
+		if in != "" && in[0] == '.' {
+			in = in[1:]
+			pl := len(in)
+			fVal, scale, in = leadingFraction(in)
+			post = pl != len(in)
+		}
+
+		if !pre && !post {
+			// no digits (e.g. ".s" or "-.s")
+			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
+		}
+
+		// Consume unit.
+		i := 0
+		for ; i < len(in); i++ {
+			c := in[i]
+			if c == '.' || '0' <= c && c <= '9' {
+				break
+			}
+		}
+
+		if i == 0 {
+			return 0, fmt.Errorf("%w %q", ErrMissingUnit, orig)
+		}
+
+		u := in[:i]
+		in = in[i:]
+		unit, ok := unitMap[u]
+
+		if !ok {
+			return 0, fmt.Errorf("%w %s in size %q", ErrMissingUnit, u, orig)
+		}
+
+		if val > 1<<63/unit {
+			// overflow
+			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
+		}
+
+		val *= unit
+		if fVal > 0 {
+			// float64 is needed to be nanosecond accurate for fractions of hours.
+			// v >= 0 && (f*unit/scale) <= 3.6e+12 (ns/h, h is the largest unit)
+			val += uint64(float64(fVal) * (float64(unit) / scale))
+			if val > 1<<63 {
+				// overflow
+				return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
+			}
+		}
+
+		res += val
+		if res > 1<<63 {
+			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
+		}
+	}
+
+	if neg {
+		return -Size(res), nil
+	}
+
+	if res > 1<<63-1 {
+		return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
+	}
+
+	return Size(res), nil
+}
+
+// leadingFraction consumes the leading [0-9]* from s.
+// It is used only for fractions, so does not return an error on overflow,
+// it just stops accumulating precision.
+func leadingFraction(in string) (uint64, float64, string) {
+	i := 0
+	scale := float64(1)
+	overflow := false
+
+	var res uint64
+
+	for ; i < len(in); i++ {
+		cur := in[i]
+		if cur < '0' || cur > '9' {
+			break
+		}
+
+		if overflow {
+			continue
+		}
+
+		if res > (1<<63-1)/10 {
+			// It's possible for overflow to give a positive number, so take care.
+			overflow = true
+
+			continue
+		}
+
+		pres := res*10 + uint64(cur) - '0'
+		if pres > 1<<63 {
+			overflow = true
+
+			continue
+		}
+
+		res = pres
+		scale *= 10
+	}
+
+	return res, scale, in[i:]
+}
+
+var errLeadingInt = errors.New("bytesize: bad [0-9]*")
+
+// leadingInt consumes the leading [0-9]* from in.
+func leadingInt[bytes []byte | string](in bytes) (uint64, bytes, error) {
+	i := 0
+
+	var (
+		res uint64
+		rem bytes
+	)
+
+	for ; i < len(in); i++ {
+		curr := in[i]
+
+		if curr < '0' || curr > '9' {
+			break
+		}
+
+		if res > 1<<63/10 {
+			// overflow
+			return 0, rem, errLeadingInt
+		}
+
+		res = res*10 + uint64(curr) - '0'
+		if res > 1<<63 {
+			// overflow
+			return 0, rem, errLeadingInt
+		}
+	}
+
+	return res, in[i:], nil
+}
+
+func (s Size) String() string {
+	var arr [32]byte
+	n := s.format(&arr)
+
+	return string(arr[n:])
+}
+
+// format formats the representation of d into the end of buf and
+// returns the offset of the first character.
+// nolint: gosec
+func (s Size) format(buf *[32]byte) int {
+	wLen := len(buf)
+
+	data := uint64(s)
+	neg := s < 0
+
+	if neg {
+		data = -data
+	}
+
+	prec := 3
+	wLen--
+	buf[wLen] = 'B'
+	wLen--
+
+	switch {
+	case data == 0:
+		buf[wLen] = '0'
+
+		return wLen
+	case data < uint64(Kilobyte):
+		// print bytes
+		wLen++
+		prec = 0
+	case data < uint64(Megabyte):
+		// print kipobytes
+		buf[wLen] = 'k'
+	case data < uint64(Gigabyte):
+		// print megabytes
+		prec = 6
+		buf[wLen] = 'M'
+	case data < uint64(Terabyte):
+		// print gigabytes
+		prec = 9
+		buf[wLen] = 'G'
+	case data < uint64(Petabyte):
+		// print terabytes
+		prec = 12
+		buf[wLen] = 'T'
+	default:
+		// print petobytes
+		prec = 15
+		buf[wLen] = 'P'
+	}
+
+	wLen, data = fmtFrac(buf[:wLen], data, prec)
+	wLen = fmtInt(buf[:wLen], data)
+
+	if neg {
+		wLen--
+		buf[wLen] = '-'
+	}
+
+	return wLen
+}
+
+// fmtFrac formats the fraction of v/10**prec (e.g., ".12345") into the
+// tail of buf, omitting trailing zeros. It omits the decimal
+// point too when the fraction is 0. It returns the index where the
+// output bytes begin and the value v/10**prec.
+func fmtFrac(buf []byte, val uint64, prec int) (int, uint64) {
+	// Omit trailing zeros up to and including decimal point.
+	wLen := len(buf)
+	show := false
+
+	for range prec {
+		digit := val % 10
+		show = show || digit != 0
+
+		if show {
+			wLen--
+			buf[wLen] = byte(digit) + '0'
+		}
+
+		val /= 10
+	}
+
+	if show {
+		wLen--
+		buf[wLen] = '.'
+	}
+
+	return wLen, val
+}
+
+// fmtInt formats v into the tail of buf.
+// It returns the index where the output begins.
+func fmtInt(buf []byte, val uint64) int {
+	wLen := len(buf)
+	if val == 0 {
+		wLen--
+		buf[wLen] = '0'
+	} else {
+		for val > 0 {
+			wLen--
+			buf[wLen] = byte(val%10) + '0'
+			val /= 10
+		}
+	}
+
+	return wLen
+}