Add floating point stuff

5 years ago · bd0039a405
1 changed files with 232 additions and 0 deletions
--- a/floating_point.rkt
+++ b/floating_point.rkt
@ -0,0 +1,232 @@
 #lang racket
 (require data/bit-vector)
 ; https://imgur.com/a/ClKK5Ac
 ; See http://fabiensanglard.net/floating_point_visually_explained/ for an intuitive explanation
 (define (to-bin fl)
  (~r fl
      #:base 2
      #:precision 52))
 (define (bit-vector->string bv)
 (list->string
  (for/list [(i (in-range (bit-vector-length bv)))]
   (cond
      [(bit-vector-ref bv i) #\1]
      [else #\0]))))
 (define (bit-vector->posint bv)
  (string->number
   (format "#b~a"
     (bit-vector->string bv))))
 (define (show-bv-slice bv start end)
  (bit-vector->list
   (bit-vector-copy bv start end)))
 (define (bool->number b)
  (cond
    [b 1]
    [else 0]))
 (define (number->bool n)
  (match n
    [0 #f]
    [1 #t]
    [_ #f]))
 (define (sum xs)
  (foldr + 0 xs))
 ; conversion from base 10 functions
 ;; Have to calculate the number of digits to remove from
 ;; the precision based on how far the decimal point needs
 ;; to be moved left,
 ;; or else maybe just do the calculation, and lop off digits from the right?
 (define (int->binary n)
  (let-values
      ([(q r) (quotient/remainder n 2)])
    (match q
      [0 (list r)]
      [_ (cons r (int->binary q))])))
 (define (real->binary-frac n [precision 0])
  (define p
    (* 2 n))
  (displayln p)
  (cond
    [(= p 0.0) ""]
    [(> precision 51) ""]
    [(>= p 1)
     (string-append "1"
                    (real->binary-frac
                     (sub1 p)
                     (add1 precision)))]
    [(< p 1)
     (string-append "0"
                    (real->binary-frac
                     p
                     (add1 precision)))]))
 ; do the conversion from w.fff.. to binary
 (define (real->bits whole fraction)
  (list
   (cond
     [(> whole 0) 0]
     [else 1])
   (bit-vector->string
    (list->bit-vector
     (map number->bool
          (reverse (int->binary whole)))))
   (real->binary-frac fraction)))
 ; Conversion from base-2 functions
 (define (calculate-number bv)
  (define sign (bv-sign bv))
  (define mantissa (bv-mantissa bv))
  (define exponent (bv-exponent bv))
  (displayln (format "Sign = ~a" (cond ((= 0 sign) "positive") (else "negative"))))
  (displayln (format "Mantissa = ~a"
                     (exact->inexact
                      (calculate-mantissa mantissa))))
  (displayln (format "Exponent = ~a" exponent))
  (*
   (expt -1 sign)
   (calculate-mantissa mantissa)
   (expt 2 exponent)))
 (define (exp-len bv)
  (match (bit-vector-length bv)
    [32 8]
    [64 11]))
 (define (bv-mantissa bv)
   (bit-vector-copy bv
                   (add1 (exp-len bv))
                   (bit-vector-length bv)))
 ;; Floating point numbers
 (define example
  (string->bit-vector
   ; 0.052 in binary
   ;seeeeeeeeeeemmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm
   "0011111110101010100111111011111001110110110010001011010000111001"))
 ;; In this example, we are representing 0.052 as a 64 bit floating point number
 ;; The first bit is our sign
 ;; The next 11 bits are our exponent
 ;; The next 52 bits are our mantissa (also called the significand or fraction)
 ;; Starting with the sign, if it is 1 it is negative, otherwise positive
 (define (bv-sign bv)
  (cond
   [(bit-vector-ref bv 0) -1]
   [else 0]))
 ;; The exponent (next 11 bits) is represented in a biased form, meaning there is a subtraction that occurs
 ;; So for 0.052, the exponent is -5
 ;; 01111111010 = 1018 in binary
 ;; the bias is 1023, so we do 1018 - (2^10-1) = 1028 - 1023 = -5
 (define (bv-exponent bv)
  ; bias is basically half the range of the exp minus 1
  (define bias
    (sub1
     (expt 2
          (sub1 (exp-len bv)))))
  ; subtract bias from exponent
  (-
   (bit-vector->posint
    (bit-vector-copy bv 1 (add1 (exp-len bv))))
   bias))
 ;; The mantissa (next 52 bits) is usually represented in a *normalized* form, meaning 1.xxx... (52 bits for a 64 bit float)
 ;; The mantissa can be calculated in decimal using a summation, e.g. b1 / 2^1 + b2 / 2^2 + ... (b1 and b2 are bits)
 (define (calculate-mantissa n)
  (define bits
    (map bool->number (bit-vector->list n)))
  (define powers
    (map add1 (range (length bits))))
  ; add 1 for the implicit 1.xxx
  ; sum of bits divided by increasing powers of 2
  ; basically each "place" in the binary digits
    (add1 (sum (map
     (lambda (b p)
       (/ b (expt 2 p)))
     bits powers))))
 ;;                                     s    m        exp
 ;; Putting that together, you get (-1)^0 * 1.664 * 2^(-5) = 0.052
 ;; Keep in mind that the computer does not do this conversion every time it calculates something
 ;; There are various algorithms for adding/multiplying binary floating point numbers efficiently (which I won't get into)
 ;; You may ask why there is always an implicit leading 1. in the mantissa/significand. The answer is that it's
 ;; somewhat arbitrary. There are things called subnormal, or denormalized numbers, which can change this.
 ;; From wikipedia:
 ;; In a denormal number, since the exponent is the least that it can be,
 ;; zero is the leading significand digit (0.m1m2m3...mp−2mp−1)
 ;; allowing the representation of numbers closer to zero than the smallest normal number.
 ;; 
 ;; Other fun things about floating point numbers
 ;; You may also notice that as the exponent gets larger and larger, the range of numbers between a given whole number
 ;; and the next one increases.
 ;; There is something called "epsilon" which essentially tells you which number is the upper bound on any rounding error
 ;; For example, on my machine 2.0 + 2.220446049250313e-16 = 2.0
 ;; Why? because 2.220446049250313e-16 (or anything smaller) is going to simply get rounded off.
 ;; This number basically tells you the limit of the precision for your floats on a given machine
 ;; It ends up being useful for various numerical algorithms that you probably don't need to care about.
 ;; It is important to understand that floating point intervals have an inherent limit to the range of numbers
 ;; NaN
 ;; NaNs are represented with an exponent that is all 1s, and a mantissa that is anything except all 0s
 ;; NaN == NaN is always false. This implies there is more than one NaN. Some software will actually use this
 ;; as a way of encoding error codes.
 ;; Infinity is represented with a mantissa of all 0s and an exponent of all 1s
 ;; We can have -/+ Infinity because of this
 ;; E.g.
 ;; NaN = 0111111111111000000000000000000000000000000000000000000000000000
 ;;-Inf = 1111111111110000000000000000000000000000000000000000000000000000
 ;; (note that my code does not properly handle infinity or NaNs)
 ;; Decimal floating point
 ;; There is an entire separate standard for this but all in Decimal, not Binary! Conceptually, you could do this
 ;; with any base. There are even hexadecimal floating point number systems.
 ;; If you need to deal with anything that must be exact, use rationals. If you need performance, use floats.
 ;; The problem with using floats is that some numbers can only be approximated, not perfectly accurately represented.
 ;; This is true of any base, not just base 2. It is also true of irrational numbers like pi.
 ;; There are things called "Minifloats" which are only 16 bits or smaller, and are non-standard, but useful
 ;; E.g. in graphics where you don't care too much about precision but performance matters a lot
 (displayln
 (exact->inexact (calculate-number example)))