bytesize/size.go

package bytesize

import (
	"errors"
	"fmt"
)

type Size int64

// Byte size prefix.
const (
	Byte     Size = 1                // byte
	Kibibyte Size = 1 << (10 * iota) // kibibyte
	Mebibyte                         // mebibyte
	Gibibyte                         // gibibyte
	Tebibyte                         // tebibyte
	Pebibyte                         // pebibyte

	Kilobyte Size = 1000 * Byte     // kilobyte
	Megabyte Size = 1000 * Kilobyte // megabyte
	Gigabyte Size = 1000 * Megabyte // gigabyte
	Terabyte Size = 1000 * Gigabyte // terabyte
	Petabyte Size = 1000 * Terabyte // petabyte
)

var unitMap = map[string]uint64{
	"B":   uint64(Byte),
	"KiB": uint64(Kibibyte),
	"MiB": uint64(Mebibyte),
	"GiB": uint64(Gibibyte),
	"TiB": uint64(Tebibyte),
	"PiB": uint64(Pebibyte),

	"kB": uint64(Kilobyte),
	"KB": uint64(Kilobyte),
	"MB": uint64(Megabyte),
	"GB": uint64(Gigabyte),
	"TB": uint64(Terabyte),
	"PB": uint64(Petabyte),
}

var (
	ErrInvalidSize = errors.New("bytesize: invalid size")
	ErrMissingUnit = errors.New("bytesize: missing unit")
	ErrUnknownUnit = errors.New("bytesize: unknown unit")
)

// Parse parses a size string.
// A byte size string is a possibly signed sequence of
// decimal numbers, each with optional fraction and a unit suffix,
// such as "300kB", "-1.5GiB" or "2GB45MB".
// Valid units are "B", "KiB", "MiB", "GiB", "TiB", "PiB" for binary size.
// Valid units are "B", "kB", "MB", "GB", "TB", "PB" for human size.
//
// nolint: funlen,gocognit,gocyclo,cyclop
func Parse(in string) (Size, error) {
	// [-+]?([0-9]*(\.[0-9]*)?[a-z]+)+
	orig := in
	neg := false

	var res uint64

	// Consume [-+]?
	if in != "" {
		c := in[0]
		if c == '-' || c == '+' {
			neg = c == '-'
			in = in[1:]
		}
	}
	// Special case: if all that is left is "0", this is zero.
	if in == "0" {
		return 0, nil
	}

	if in == "" {
		return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
	}

	for in != "" {
		var (
			val, fVal uint64      // integers before, after decimal point
			scale     float64 = 1 // value = v + f/scale
		)

		var err error

		// The next character must be [0-9.]
		if !(in[0] == '.' || '0' <= in[0] && in[0] <= '9') {
			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
		}

		// Consume [0-9]*
		pl := len(in)

		val, in, err = leadingInt(in)
		if err != nil {
			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
		}

		pre := pl != len(in) // whether we consumed anything before a period

		// Consume (\.[0-9]*)?
		post := false

		if in != "" && in[0] == '.' {
			in = in[1:]
			pl := len(in)
			fVal, scale, in = leadingFraction(in)
			post = pl != len(in)
		}

		if !pre && !post {
			// no digits (e.g. ".s" or "-.s")
			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
		}

		// Consume unit.
		i := 0
		for ; i < len(in); i++ {
			c := in[i]
			if c == '.' || '0' <= c && c <= '9' {
				break
			}
		}

		if i == 0 {
			return 0, fmt.Errorf("%w %q", ErrMissingUnit, orig)
		}

		u := in[:i]
		in = in[i:]
		unit, ok := unitMap[u]

		if !ok {
			return 0, fmt.Errorf("%w %s in size %q", ErrMissingUnit, u, orig)
		}

		if val > 1<<63/unit {
			// overflow
			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
		}

		val *= unit
		if fVal > 0 {
			// float64 is needed to be nanosecond accurate for fractions of hours.
			// v >= 0 && (f*unit/scale) <= 3.6e+12 (ns/h, h is the largest unit)
			val += uint64(float64(fVal) * (float64(unit) / scale))
			if val > 1<<63 {
				// overflow
				return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
			}
		}

		res += val
		if res > 1<<63 {
			return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
		}
	}

	if neg {
		return -Size(res), nil
	}

	if res > 1<<63-1 {
		return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)
	}

	return Size(res), nil
}

// leadingFraction consumes the leading [0-9]* from s.
// It is used only for fractions, so does not return an error on overflow,
// it just stops accumulating precision.
func leadingFraction(in string) (uint64, float64, string) {
	i := 0
	scale := float64(1)
	overflow := false

	var res uint64

	for ; i < len(in); i++ {
		cur := in[i]
		if cur < '0' || cur > '9' {
			break
		}

		if overflow {
			continue
		}

		if res > (1<<63-1)/10 {
			// It's possible for overflow to give a positive number, so take care.
			overflow = true

			continue
		}

		pres := res*10 + uint64(cur) - '0'
		if pres > 1<<63 {
			overflow = true

			continue
		}

		res = pres
		scale *= 10
	}

	return res, scale, in[i:]
}

var errLeadingInt = errors.New("bytesize: bad [0-9]*")

// leadingInt consumes the leading [0-9]* from in.
func leadingInt[bytes []byte | string](in bytes) (uint64, bytes, error) {
	i := 0

	var (
		res uint64
		rem bytes
	)

	for ; i < len(in); i++ {
		curr := in[i]

		if curr < '0' || curr > '9' {
			break
		}

		if res > 1<<63/10 {
			// overflow
			return 0, rem, errLeadingInt
		}

		res = res*10 + uint64(curr) - '0'
		if res > 1<<63 {
			// overflow
			return 0, rem, errLeadingInt
		}
	}

	return res, in[i:], nil
}

func (s Size) String() string {
	var arr [32]byte
	n := s.format(&arr)

	return string(arr[n:])
}

// format formats the representation of d into the end of buf and
// returns the offset of the first character.
// nolint: gosec
func (s Size) format(buf *[32]byte) int {
	wLen := len(buf)

	data := uint64(s)
	neg := s < 0

	if neg {
		data = -data
	}

	prec := 3
	wLen--
	buf[wLen] = 'B'
	wLen--

	switch {
	case data == 0:
		buf[wLen] = '0'

		return wLen
	case data < uint64(Kilobyte):
		// print bytes
		wLen++
		prec = 0
	case data < uint64(Megabyte):
		// print kipobytes
		buf[wLen] = 'k'
	case data < uint64(Gigabyte):
		// print megabytes
		prec = 6
		buf[wLen] = 'M'
	case data < uint64(Terabyte):
		// print gigabytes
		prec = 9
		buf[wLen] = 'G'
	case data < uint64(Petabyte):
		// print terabytes
		prec = 12
		buf[wLen] = 'T'
	default:
		// print petobytes
		prec = 15
		buf[wLen] = 'P'
	}

	wLen, data = fmtFrac(buf[:wLen], data, prec)
	wLen = fmtInt(buf[:wLen], data)

	if neg {
		wLen--
		buf[wLen] = '-'
	}

	return wLen
}

// fmtFrac formats the fraction of v/10**prec (e.g., ".12345") into the
// tail of buf, omitting trailing zeros. It omits the decimal
// point too when the fraction is 0. It returns the index where the
// output bytes begin and the value v/10**prec.
func fmtFrac(buf []byte, val uint64, prec int) (int, uint64) {
	// Omit trailing zeros up to and including decimal point.
	wLen := len(buf)
	show := false

	for range prec {
		digit := val % 10
		show = show || digit != 0

		if show {
			wLen--
			buf[wLen] = byte(digit) + '0'
		}

		val /= 10
	}

	if show {
		wLen--
		buf[wLen] = '.'
	}

	return wLen, val
}

// fmtInt formats v into the tail of buf.
// It returns the index where the output begins.
func fmtInt(buf []byte, val uint64) int {
	wLen := len(buf)
	if val == 0 {
		wLen--
		buf[wLen] = '0'
	} else {
		for val > 0 {
			wLen--
			buf[wLen] = byte(val%10) + '0'
			val /= 10
		}
	}

	return wLen
}
init 4 weeks ago			`package bytesize`

			`import (`
			`"errors"`
			`"fmt"`
			`)`

			`type Size int64`

			`// Byte size prefix.`
			`const (`
			`Byte Size = 1 // byte`
			`Kibibyte Size = 1 << (10 * iota) // kibibyte`
			`Mebibyte // mebibyte`
			`Gibibyte // gibibyte`
			`Tebibyte // tebibyte`
			`Pebibyte // pebibyte`

			`Kilobyte Size = 1000 * Byte // kilobyte`
			`Megabyte Size = 1000 * Kilobyte // megabyte`
			`Gigabyte Size = 1000 * Megabyte // gigabyte`
			`Terabyte Size = 1000 * Gigabyte // terabyte`
			`Petabyte Size = 1000 * Terabyte // petabyte`
			`)`

			`var unitMap = map[string]uint64{`
			`"B": uint64(Byte),`
			`"KiB": uint64(Kibibyte),`
			`"MiB": uint64(Mebibyte),`
			`"GiB": uint64(Gibibyte),`
			`"TiB": uint64(Tebibyte),`
			`"PiB": uint64(Pebibyte),`

			`"kB": uint64(Kilobyte),`
			`"KB": uint64(Kilobyte),`
			`"MB": uint64(Megabyte),`
			`"GB": uint64(Gigabyte),`
			`"TB": uint64(Terabyte),`
			`"PB": uint64(Petabyte),`
			`}`

			`var (`
			`ErrInvalidSize = errors.New("bytesize: invalid size")`
			`ErrMissingUnit = errors.New("bytesize: missing unit")`
			`ErrUnknownUnit = errors.New("bytesize: unknown unit")`
			`)`

			`// Parse parses a size string.`
			`// A byte size string is a possibly signed sequence of`
			`// decimal numbers, each with optional fraction and a unit suffix,`
			`// such as "300kB", "-1.5GiB" or "2GB45MB".`
			`// Valid units are "B", "KiB", "MiB", "GiB", "TiB", "PiB" for binary size.`
			`// Valid units are "B", "kB", "MB", "GB", "TB", "PB" for human size.`
			`//`
			`// nolint: funlen,gocognit,gocyclo,cyclop`
			`func Parse(in string) (Size, error) {`
			`// [-+]?([0-9](\.[0-9])?[a-z]+)+`
			`orig := in`
			`neg := false`

			`var res uint64`

			`// Consume [-+]?`
			`if in != "" {`
			`c := in[0]`
			`if c == '-' \|\| c == '+' {`
			`neg = c == '-'`
			`in = in[1:]`
			`}`
			`}`
			`// Special case: if all that is left is "0", this is zero.`
			`if in == "0" {`
			`return 0, nil`
			`}`

			`if in == "" {`
			`return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)`
			`}`

			`for in != "" {`
			`var (`
			`val, fVal uint64 // integers before, after decimal point`
			`scale float64 = 1 // value = v + f/scale`
			`)`

			`var err error`

			`// The next character must be [0-9.]`
			`if !(in[0] == '.' \|\| '0' <= in[0] && in[0] <= '9') {`
			`return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)`
			`}`

			`// Consume [0-9]*`
			`pl := len(in)`

			`val, in, err = leadingInt(in)`
			`if err != nil {`
			`return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)`
			`}`

			`pre := pl != len(in) // whether we consumed anything before a period`

			`// Consume (\.[0-9]*)?`
			`post := false`

			`if in != "" && in[0] == '.' {`
			`in = in[1:]`
			`pl := len(in)`
			`fVal, scale, in = leadingFraction(in)`
			`post = pl != len(in)`
			`}`

			`if !pre && !post {`
			`// no digits (e.g. ".s" or "-.s")`
			`return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)`
			`}`

			`// Consume unit.`
			`i := 0`
			`for ; i < len(in); i++ {`
			`c := in[i]`
			`if c == '.' \|\| '0' <= c && c <= '9' {`
			`break`
			`}`
			`}`

			`if i == 0 {`
			`return 0, fmt.Errorf("%w %q", ErrMissingUnit, orig)`
			`}`

			`u := in[:i]`
			`in = in[i:]`
			`unit, ok := unitMap[u]`

			`if !ok {`
			`return 0, fmt.Errorf("%w %s in size %q", ErrMissingUnit, u, orig)`
			`}`

			`if val > 1<<63/unit {`
			`// overflow`
			`return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)`
			`}`

			`val *= unit`
			`if fVal > 0 {`
			`// float64 is needed to be nanosecond accurate for fractions of hours.`
			`// v >= 0 && (f*unit/scale) <= 3.6e+12 (ns/h, h is the largest unit)`
			`val += uint64(float64(fVal) * (float64(unit) / scale))`
			`if val > 1<<63 {`
			`// overflow`
			`return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)`
			`}`
			`}`

			`res += val`
			`if res > 1<<63 {`
			`return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)`
			`}`
			`}`

			`if neg {`
			`return -Size(res), nil`
			`}`

			`if res > 1<<63-1 {`
			`return 0, fmt.Errorf("%w %q", ErrInvalidSize, orig)`
			`}`

			`return Size(res), nil`
			`}`

			`// leadingFraction consumes the leading [0-9]* from s.`
			`// It is used only for fractions, so does not return an error on overflow,`
			`// it just stops accumulating precision.`
			`func leadingFraction(in string) (uint64, float64, string) {`
			`i := 0`
			`scale := float64(1)`
			`overflow := false`

			`var res uint64`

			`for ; i < len(in); i++ {`
			`cur := in[i]`
			`if cur < '0' \|\| cur > '9' {`
			`break`
			`}`

			`if overflow {`
			`continue`
			`}`

			`if res > (1<<63-1)/10 {`
			`// It's possible for overflow to give a positive number, so take care.`
			`overflow = true`

			`continue`
			`}`

			`pres := res*10 + uint64(cur) - '0'`
			`if pres > 1<<63 {`
			`overflow = true`

			`continue`
			`}`

			`res = pres`
			`scale *= 10`
			`}`

			`return res, scale, in[i:]`
			`}`

			`var errLeadingInt = errors.New("bytesize: bad [0-9]*")`

			`// leadingInt consumes the leading [0-9]* from in.`
			`func leadingInt[bytes []byte \| string](in bytes) (uint64, bytes, error) {`
			`i := 0`

			`var (`
			`res uint64`
			`rem bytes`
			`)`

			`for ; i < len(in); i++ {`
			`curr := in[i]`

			`if curr < '0' \|\| curr > '9' {`
			`break`
			`}`

			`if res > 1<<63/10 {`
			`// overflow`
			`return 0, rem, errLeadingInt`
			`}`

			`res = res*10 + uint64(curr) - '0'`
			`if res > 1<<63 {`
			`// overflow`
			`return 0, rem, errLeadingInt`
			`}`
			`}`

			`return res, in[i:], nil`
			`}`

			`func (s Size) String() string {`
			`var arr [32]byte`
			`n := s.format(&arr)`

			`return string(arr[n:])`
			`}`

			`// format formats the representation of d into the end of buf and`
			`// returns the offset of the first character.`
			`// nolint: gosec`
			`func (s Size) format(buf *[32]byte) int {`
			`wLen := len(buf)`

			`data := uint64(s)`
			`neg := s < 0`

			`if neg {`
			`data = -data`
			`}`

			`prec := 3`
			`wLen--`
			`buf[wLen] = 'B'`
			`wLen--`

			`switch {`
			`case data == 0:`
			`buf[wLen] = '0'`

			`return wLen`
			`case data < uint64(Kilobyte):`
			`// print bytes`
			`wLen++`
			`prec = 0`
			`case data < uint64(Megabyte):`
			`// print kipobytes`
			`buf[wLen] = 'k'`
			`case data < uint64(Gigabyte):`
			`// print megabytes`
			`prec = 6`
			`buf[wLen] = 'M'`
			`case data < uint64(Terabyte):`
			`// print gigabytes`
			`prec = 9`
			`buf[wLen] = 'G'`
			`case data < uint64(Petabyte):`
			`// print terabytes`
			`prec = 12`
			`buf[wLen] = 'T'`
			`default:`
			`// print petobytes`
			`prec = 15`
			`buf[wLen] = 'P'`
			`}`

			`wLen, data = fmtFrac(buf[:wLen], data, prec)`
			`wLen = fmtInt(buf[:wLen], data)`

			`if neg {`
			`wLen--`
			`buf[wLen] = '-'`
			`}`

			`return wLen`
			`}`

			`// fmtFrac formats the fraction of v/10**prec (e.g., ".12345") into the`
			`// tail of buf, omitting trailing zeros. It omits the decimal`
			`// point too when the fraction is 0. It returns the index where the`
			`// output bytes begin and the value v/10**prec.`
			`func fmtFrac(buf []byte, val uint64, prec int) (int, uint64) {`
			`// Omit trailing zeros up to and including decimal point.`
			`wLen := len(buf)`
			`show := false`

			`for range prec {`
			`digit := val % 10`
			`show = show \|\| digit != 0`

			`if show {`
			`wLen--`
			`buf[wLen] = byte(digit) + '0'`
			`}`

			`val /= 10`
			`}`

			`if show {`
			`wLen--`
			`buf[wLen] = '.'`
			`}`

			`return wLen, val`
			`}`

			`// fmtInt formats v into the tail of buf.`
			`// It returns the index where the output begins.`
			`func fmtInt(buf []byte, val uint64) int {`
			`wLen := len(buf)`
			`if val == 0 {`
			`wLen--`
			`buf[wLen] = '0'`
			`} else {`
			`for val > 0 {`
			`wLen--`
			`buf[wLen] = byte(val%10) + '0'`
			`val /= 10`
			`}`
			`}`

			`return wLen`
			`}`