Browse Source

Add floating point stuff

pull/1/head
Wesley Kerfoot 4 years ago
parent
commit
bd0039a405
  1. 232
      floating_point.rkt

232
floating_point.rkt

@ -0,0 +1,232 @@
#lang racket
(require data/bit-vector)
; https://imgur.com/a/ClKK5Ac
; See http://fabiensanglard.net/floating_point_visually_explained/ for an intuitive explanation
(define (to-bin fl)
(~r fl
#:base 2
#:precision 52))
(define (bit-vector->string bv)
(list->string
(for/list [(i (in-range (bit-vector-length bv)))]
(cond
[(bit-vector-ref bv i) #\1]
[else #\0]))))
(define (bit-vector->posint bv)
(string->number
(format "#b~a"
(bit-vector->string bv))))
(define (show-bv-slice bv start end)
(bit-vector->list
(bit-vector-copy bv start end)))
(define (bool->number b)
(cond
[b 1]
[else 0]))
(define (number->bool n)
(match n
[0 #f]
[1 #t]
[_ #f]))
(define (sum xs)
(foldr + 0 xs))
; conversion from base 10 functions
;; Have to calculate the number of digits to remove from
;; the precision based on how far the decimal point needs
;; to be moved left,
;; or else maybe just do the calculation, and lop off digits from the right?
(define (int->binary n)
(let-values
([(q r) (quotient/remainder n 2)])
(match q
[0 (list r)]
[_ (cons r (int->binary q))])))
(define (real->binary-frac n [precision 0])
(define p
(* 2 n))
(displayln p)
(cond
[(= p 0.0) ""]
[(> precision 51) ""]
[(>= p 1)
(string-append "1"
(real->binary-frac
(sub1 p)
(add1 precision)))]
[(< p 1)
(string-append "0"
(real->binary-frac
p
(add1 precision)))]))
; do the conversion from w.fff.. to binary
(define (real->bits whole fraction)
(list
(cond
[(> whole 0) 0]
[else 1])
(bit-vector->string
(list->bit-vector
(map number->bool
(reverse (int->binary whole)))))
(real->binary-frac fraction)))
; Conversion from base-2 functions
(define (calculate-number bv)
(define sign (bv-sign bv))
(define mantissa (bv-mantissa bv))
(define exponent (bv-exponent bv))
(displayln (format "Sign = ~a" (cond ((= 0 sign) "positive") (else "negative"))))
(displayln (format "Mantissa = ~a"
(exact->inexact
(calculate-mantissa mantissa))))
(displayln (format "Exponent = ~a" exponent))
(*
(expt -1 sign)
(calculate-mantissa mantissa)
(expt 2 exponent)))
(define (exp-len bv)
(match (bit-vector-length bv)
[32 8]
[64 11]))
(define (bv-mantissa bv)
(bit-vector-copy bv
(add1 (exp-len bv))
(bit-vector-length bv)))
;; Floating point numbers
(define example
(string->bit-vector
; 0.052 in binary
;seeeeeeeeeeemmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm
"0011111110101010100111111011111001110110110010001011010000111001"))
;; In this example, we are representing 0.052 as a 64 bit floating point number
;; The first bit is our sign
;; The next 11 bits are our exponent
;; The next 52 bits are our mantissa (also called the significand or fraction)
;; Starting with the sign, if it is 1 it is negative, otherwise positive
(define (bv-sign bv)
(cond
[(bit-vector-ref bv 0) -1]
[else 0]))
;; The exponent (next 11 bits) is represented in a biased form, meaning there is a subtraction that occurs
;; So for 0.052, the exponent is -5
;; 01111111010 = 1018 in binary
;; the bias is 1023, so we do 1018 - (2^10-1) = 1028 - 1023 = -5
(define (bv-exponent bv)
; bias is basically half the range of the exp minus 1
(define bias
(sub1
(expt 2
(sub1 (exp-len bv)))))
; subtract bias from exponent
(-
(bit-vector->posint
(bit-vector-copy bv 1 (add1 (exp-len bv))))
bias))
;; The mantissa (next 52 bits) is usually represented in a *normalized* form, meaning 1.xxx... (52 bits for a 64 bit float)
;; The mantissa can be calculated in decimal using a summation, e.g. b1 / 2^1 + b2 / 2^2 + ... (b1 and b2 are bits)
(define (calculate-mantissa n)
(define bits
(map bool->number (bit-vector->list n)))
(define powers
(map add1 (range (length bits))))
; add 1 for the implicit 1.xxx
; sum of bits divided by increasing powers of 2
; basically each "place" in the binary digits
(add1 (sum (map
(lambda (b p)
(/ b (expt 2 p)))
bits powers))))
;; s m exp
;; Putting that together, you get (-1)^0 * 1.664 * 2^(-5) = 0.052
;; Keep in mind that the computer does not do this conversion every time it calculates something
;; There are various algorithms for adding/multiplying binary floating point numbers efficiently (which I won't get into)
;; You may ask why there is always an implicit leading 1. in the mantissa/significand. The answer is that it's
;; somewhat arbitrary. There are things called subnormal, or denormalized numbers, which can change this.
;; From wikipedia:
;; In a denormal number, since the exponent is the least that it can be,
;; zero is the leading significand digit (0.m1m2m3...mp−2mp−1)
;; allowing the representation of numbers closer to zero than the smallest normal number.
;;
;; Other fun things about floating point numbers
;; You may also notice that as the exponent gets larger and larger, the range of numbers between a given whole number
;; and the next one increases.
;; There is something called "epsilon" which essentially tells you which number is the upper bound on any rounding error
;; For example, on my machine 2.0 + 2.220446049250313e-16 = 2.0
;; Why? because 2.220446049250313e-16 (or anything smaller) is going to simply get rounded off.
;; This number basically tells you the limit of the precision for your floats on a given machine
;; It ends up being useful for various numerical algorithms that you probably don't need to care about.
;; It is important to understand that floating point intervals have an inherent limit to the range of numbers
;; NaN
;; NaNs are represented with an exponent that is all 1s, and a mantissa that is anything except all 0s
;; NaN == NaN is always false. This implies there is more than one NaN. Some software will actually use this
;; as a way of encoding error codes.
;; Infinity is represented with a mantissa of all 0s and an exponent of all 1s
;; We can have -/+ Infinity because of this
;; E.g.
;; NaN = 0111111111111000000000000000000000000000000000000000000000000000
;;-Inf = 1111111111110000000000000000000000000000000000000000000000000000
;; (note that my code does not properly handle infinity or NaNs)
;; Decimal floating point
;; There is an entire separate standard for this but all in Decimal, not Binary! Conceptually, you could do this
;; with any base. There are even hexadecimal floating point number systems.
;; If you need to deal with anything that must be exact, use rationals. If you need performance, use floats.
;; The problem with using floats is that some numbers can only be approximated, not perfectly accurately represented.
;; This is true of any base, not just base 2. It is also true of irrational numbers like pi.
;; There are things called "Minifloats" which are only 16 bits or smaller, and are non-standard, but useful
;; E.g. in graphics where you don't care too much about precision but performance matters a lot
(displayln
(exact->inexact (calculate-number example)))
Loading…
Cancel
Save