Algorithms/floating_point.rkt

#lang racket
(require data/bit-vector)

; https://imgur.com/a/ClKK5Ac

; See http://fabiensanglard.net/floating_point_visually_explained/ for an intuitive explanation

(define (to-bin fl)
  (~r fl
      #:base 2
      #:precision 52))

(define (bit-vector->string bv)
 (list->string
  (for/list [(i (in-range (bit-vector-length bv)))]
   (cond
      [(bit-vector-ref bv i) #\1]
      [else #\0]))))

(define (bit-vector->posint bv)
  (string->number
   (format "#b~a"
     (bit-vector->string bv))))

(define (show-bv-slice bv start end)
  (bit-vector->list
   (bit-vector-copy bv start end)))

(define (bool->number b)
  (cond
    [b 1]
    [else 0]))

(define (number->bool n)
  (match n
    [0 #f]
    [1 #t]
    [_ #f]))

(define (sum xs)
  (foldr + 0 xs))

; conversion from base 10 functions

;; Have to calculate the number of digits to remove from
;; the precision based on how far the decimal point needs
;; to be moved left,
;; or else maybe just do the calculation, and lop off digits from the right?


(define (int->binary n)
  (let-values
      ([(q r) (quotient/remainder n 2)])
    (match q
      [0 (list r)]
      [_ (cons r (int->binary q))])))

(define (real->binary-frac n [precision 0])
  (define p
    (* 2 n))
  
  (displayln p)
  (cond
    [(= p 0.0) ""]
    [(> precision 51) ""]
    [(>= p 1)
     (string-append "1"
                    (real->binary-frac
                     (sub1 p)
                     (add1 precision)))]
    [(< p 1)
     (string-append "0"
                    (real->binary-frac
                     p
                     (add1 precision)))]))

; do the conversion from w.fff.. to binary
(define (real->bits whole fraction)
  (list
   (cond
     [(> whole 0) 0]
     [else 1])
   (bit-vector->string
    (list->bit-vector
     (map number->bool
          (reverse (int->binary whole)))))
   
   (real->binary-frac fraction)))


; Conversion from base-2 functions

(define (calculate-number bv)
  (define sign (bv-sign bv))
  (define mantissa (bv-mantissa bv))
  (define exponent (bv-exponent bv))

  (displayln (format "Sign = ~a" (cond ((= 0 sign) "positive") (else "negative"))))
  
  (displayln (format "Mantissa = ~a"
                     (exact->inexact
                      (calculate-mantissa mantissa))))
  
  (displayln (format "Exponent = ~a" exponent))
  
  (*
   (expt -1 sign)
   (calculate-mantissa mantissa)
   (expt 2 exponent)))

(define (exp-len bv)
  (match (bit-vector-length bv)
    [32 8]
    [64 11]))

(define (bv-mantissa bv)
   (bit-vector-copy bv
                   (add1 (exp-len bv))
                   (bit-vector-length bv)))


;; Floating point numbers

(define example
  (string->bit-vector
   ; 0.052 in binary
   ;seeeeeeeeeeemmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm
   "0011111110101010100111111011111001110110110010001011010000111001"))

;; In this example, we are representing 0.052 as a 64 bit floating point number
;; The first bit is our sign
;; The next 11 bits are our exponent
;; The next 52 bits are our mantissa (also called the significand or fraction)

;; Starting with the sign, if it is 1 it is negative, otherwise positive

(define (bv-sign bv)
  (cond
   [(bit-vector-ref bv 0) -1]
   [else 0]))

;; The exponent (next 11 bits) is represented in a biased form, meaning there is a subtraction that occurs
;; So for 0.052, the exponent is -5
;; 01111111010 = 1018 in binary
;; the bias is 1023, so we do 1018 - (2^10-1) = 1028 - 1023 = -5

(define (bv-exponent bv)
  ; bias is basically half the range of the exp minus 1
  (define bias
    (sub1
     (expt 2
          (sub1 (exp-len bv)))))
  ; subtract bias from exponent
  (-
   (bit-vector->posint
    (bit-vector-copy bv 1 (add1 (exp-len bv))))
   bias))

;; The mantissa (next 52 bits) is usually represented in a *normalized* form, meaning 1.xxx... (52 bits for a 64 bit float)
;; The mantissa can be calculated in decimal using a summation, e.g. b1 / 2^1 + b2 / 2^2 + ... (b1 and b2 are bits)

(define (calculate-mantissa n)
  (define bits
    (map bool->number (bit-vector->list n)))
  (define powers
    (map add1 (range (length bits))))
  ; add 1 for the implicit 1.xxx
  ; sum of bits divided by increasing powers of 2
  ; basically each "place" in the binary digits
    (add1 (sum (map
     (lambda (b p)
       (/ b (expt 2 p)))
     bits powers))))

;;                                     s    m        exp
;; Putting that together, you get (-1)^0 * 1.664 * 2^(-5) = 0.052

;; Keep in mind that the computer does not do this conversion every time it calculates something
;; There are various algorithms for adding/multiplying binary floating point numbers efficiently (which I won't get into)

;; You may ask why there is always an implicit leading 1. in the mantissa/significand. The answer is that it's
;; somewhat arbitrary. There are things called subnormal, or denormalized numbers, which can change this.

;; From wikipedia:

;; In a denormal number, since the exponent is the least that it can be,
;; zero is the leading significand digit (0.m1m2m3...mp−2mp−1)
;; allowing the representation of numbers closer to zero than the smallest normal number.

;; 

;; Other fun things about floating point numbers

;; You may also notice that as the exponent gets larger and larger, the range of numbers between a given whole number
;; and the next one increases.

;; There is something called "epsilon" which essentially tells you which number is the upper bound on any rounding error
;; For example, on my machine 2.0 + 2.220446049250313e-16 = 2.0

;; Why? because 2.220446049250313e-16 (or anything smaller) is going to simply get rounded off.
;; This number basically tells you the limit of the precision for your floats on a given machine
;; It ends up being useful for various numerical algorithms that you probably don't need to care about.

;; It is important to understand that floating point intervals have an inherent limit to the range of numbers

;; NaN
;; NaNs are represented with an exponent that is all 1s, and a mantissa that is anything except all 0s
;; NaN == NaN is always false. This implies there is more than one NaN. Some software will actually use this
;; as a way of encoding error codes.

;; Infinity is represented with a mantissa of all 0s and an exponent of all 1s
;; We can have -/+ Infinity because of this

;; E.g.
;; NaN = 0111111111111000000000000000000000000000000000000000000000000000
;;-Inf = 1111111111110000000000000000000000000000000000000000000000000000

;; (note that my code does not properly handle infinity or NaNs)

;; Decimal floating point
;; There is an entire separate standard for this but all in Decimal, not Binary! Conceptually, you could do this
;; with any base. There are even hexadecimal floating point number systems.

;; If you need to deal with anything that must be exact, use rationals. If you need performance, use floats.
;; The problem with using floats is that some numbers can only be approximated, not perfectly accurately represented.
;; This is true of any base, not just base 2. It is also true of irrational numbers like pi.

;; There are things called "Minifloats" which are only 16 bits or smaller, and are non-standard, but useful
;; E.g. in graphics where you don't care too much about precision but performance matters a lot

(displayln
 (exact->inexact (calculate-number example)))
Add floating point stuff 4 years ago			`#lang racket`
			`(require data/bit-vector)`

			`; https://imgur.com/a/ClKK5Ac`

			`; See http://fabiensanglard.net/floating_point_visually_explained/ for an intuitive explanation`

			`(define (to-bin fl)`
			`(~r fl`
			`#:base 2`
			`#:precision 52))`

			`(define (bit-vector->string bv)`
			`(list->string`
			`(for/list [(i (in-range (bit-vector-length bv)))]`
			`(cond`
			`[(bit-vector-ref bv i) #\1]`
			`[else #\0]))))`

			`(define (bit-vector->posint bv)`
			`(string->number`
			`(format "#b~a"`
			`(bit-vector->string bv))))`

			`(define (show-bv-slice bv start end)`
			`(bit-vector->list`
			`(bit-vector-copy bv start end)))`

			`(define (bool->number b)`
			`(cond`
			`[b 1]`
			`[else 0]))`

			`(define (number->bool n)`
			`(match n`
			`[0 #f]`
			`[1 #t]`
			`[_ #f]))`

			`(define (sum xs)`
			`(foldr + 0 xs))`

			`; conversion from base 10 functions`

			`;; Have to calculate the number of digits to remove from`
			`;; the precision based on how far the decimal point needs`
			`;; to be moved left,`
			`;; or else maybe just do the calculation, and lop off digits from the right?`


			`(define (int->binary n)`
			`(let-values`
			`([(q r) (quotient/remainder n 2)])`
			`(match q`
			`[0 (list r)]`
			`[_ (cons r (int->binary q))])))`

			`(define (real->binary-frac n [precision 0])`
			`(define p`
			`(* 2 n))`

			`(displayln p)`
			`(cond`
			`[(= p 0.0) ""]`
			`[(> precision 51) ""]`
			`[(>= p 1)`
			`(string-append "1"`
			`(real->binary-frac`
			`(sub1 p)`
			`(add1 precision)))]`
			`[(< p 1)`
			`(string-append "0"`
			`(real->binary-frac`
			`p`
			`(add1 precision)))]))`

			`; do the conversion from w.fff.. to binary`
			`(define (real->bits whole fraction)`
			`(list`
			`(cond`
			`[(> whole 0) 0]`
			`[else 1])`
			`(bit-vector->string`
			`(list->bit-vector`
			`(map number->bool`
			`(reverse (int->binary whole)))))`

			`(real->binary-frac fraction)))`


			`; Conversion from base-2 functions`

			`(define (calculate-number bv)`
			`(define sign (bv-sign bv))`
			`(define mantissa (bv-mantissa bv))`
			`(define exponent (bv-exponent bv))`

			`(displayln (format "Sign = ~a" (cond ((= 0 sign) "positive") (else "negative"))))`

			`(displayln (format "Mantissa = ~a"`
			`(exact->inexact`
			`(calculate-mantissa mantissa))))`

			`(displayln (format "Exponent = ~a" exponent))`

			`(*`
			`(expt -1 sign)`
			`(calculate-mantissa mantissa)`
			`(expt 2 exponent)))`

			`(define (exp-len bv)`
			`(match (bit-vector-length bv)`
			`[32 8]`
			`[64 11]))`

			`(define (bv-mantissa bv)`
			`(bit-vector-copy bv`
			`(add1 (exp-len bv))`
			`(bit-vector-length bv)))`


			`;; Floating point numbers`

			`(define example`
			`(string->bit-vector`
			`; 0.052 in binary`
			`;seeeeeeeeeeemmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm`
			`"0011111110101010100111111011111001110110110010001011010000111001"))`

			`;; In this example, we are representing 0.052 as a 64 bit floating point number`
			`;; The first bit is our sign`
			`;; The next 11 bits are our exponent`
			`;; The next 52 bits are our mantissa (also called the significand or fraction)`

			`;; Starting with the sign, if it is 1 it is negative, otherwise positive`

			`(define (bv-sign bv)`
			`(cond`
			`[(bit-vector-ref bv 0) -1]`
			`[else 0]))`

			`;; The exponent (next 11 bits) is represented in a biased form, meaning there is a subtraction that occurs`
			`;; So for 0.052, the exponent is -5`
			`;; 01111111010 = 1018 in binary`
			`;; the bias is 1023, so we do 1018 - (2^10-1) = 1028 - 1023 = -5`

			`(define (bv-exponent bv)`
			`; bias is basically half the range of the exp minus 1`
			`(define bias`
			`(sub1`
			`(expt 2`
			`(sub1 (exp-len bv)))))`
			`; subtract bias from exponent`
			`(-`
			`(bit-vector->posint`
			`(bit-vector-copy bv 1 (add1 (exp-len bv))))`
			`bias))`

			`;; The mantissa (next 52 bits) is usually represented in a normalized form, meaning 1.xxx... (52 bits for a 64 bit float)`
			`;; The mantissa can be calculated in decimal using a summation, e.g. b1 / 2^1 + b2 / 2^2 + ... (b1 and b2 are bits)`

			`(define (calculate-mantissa n)`
			`(define bits`
			`(map bool->number (bit-vector->list n)))`
			`(define powers`
			`(map add1 (range (length bits))))`
			`; add 1 for the implicit 1.xxx`
			`; sum of bits divided by increasing powers of 2`
			`; basically each "place" in the binary digits`
			`(add1 (sum (map`
			`(lambda (b p)`
			`(/ b (expt 2 p)))`
			`bits powers))))`

			`;; s m exp`
			`;; Putting that together, you get (-1)^0 * 1.664 * 2^(-5) = 0.052`

			`;; Keep in mind that the computer does not do this conversion every time it calculates something`
			`;; There are various algorithms for adding/multiplying binary floating point numbers efficiently (which I won't get into)`

			`;; You may ask why there is always an implicit leading 1. in the mantissa/significand. The answer is that it's`
			`;; somewhat arbitrary. There are things called subnormal, or denormalized numbers, which can change this.`

			`;; From wikipedia:`

			`;; In a denormal number, since the exponent is the least that it can be,`
			`;; zero is the leading significand digit (0.m1m2m3...mp−2mp−1)`
			`;; allowing the representation of numbers closer to zero than the smallest normal number.`

			`;;`

			`;; Other fun things about floating point numbers`

			`;; You may also notice that as the exponent gets larger and larger, the range of numbers between a given whole number`
			`;; and the next one increases.`

			`;; There is something called "epsilon" which essentially tells you which number is the upper bound on any rounding error`
			`;; For example, on my machine 2.0 + 2.220446049250313e-16 = 2.0`

			`;; Why? because 2.220446049250313e-16 (or anything smaller) is going to simply get rounded off.`
			`;; This number basically tells you the limit of the precision for your floats on a given machine`
			`;; It ends up being useful for various numerical algorithms that you probably don't need to care about.`

			`;; It is important to understand that floating point intervals have an inherent limit to the range of numbers`

			`;; NaN`
			`;; NaNs are represented with an exponent that is all 1s, and a mantissa that is anything except all 0s`
			`;; NaN == NaN is always false. This implies there is more than one NaN. Some software will actually use this`
			`;; as a way of encoding error codes.`

			`;; Infinity is represented with a mantissa of all 0s and an exponent of all 1s`
			`;; We can have -/+ Infinity because of this`

			`;; E.g.`
			`;; NaN = 0111111111111000000000000000000000000000000000000000000000000000`
			`;;-Inf = 1111111111110000000000000000000000000000000000000000000000000000`

			`;; (note that my code does not properly handle infinity or NaNs)`

			`;; Decimal floating point`
			`;; There is an entire separate standard for this but all in Decimal, not Binary! Conceptually, you could do this`
			`;; with any base. There are even hexadecimal floating point number systems.`

			`;; If you need to deal with anything that must be exact, use rationals. If you need performance, use floats.`
			`;; The problem with using floats is that some numbers can only be approximated, not perfectly accurately represented.`
			`;; This is true of any base, not just base 2. It is also true of irrational numbers like pi.`

			`;; There are things called "Minifloats" which are only 16 bits or smaller, and are non-standard, but useful`
			`;; E.g. in graphics where you don't care too much about precision but performance matters a lot`

			`(displayln`
			`(exact->inexact (calculate-number example)))`