You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
232 lines
7.2 KiB
232 lines
7.2 KiB
4 years ago
|
#lang racket
|
||
|
(require data/bit-vector)
|
||
|
|
||
|
; https://imgur.com/a/ClKK5Ac
|
||
|
|
||
|
; See http://fabiensanglard.net/floating_point_visually_explained/ for an intuitive explanation
|
||
|
|
||
|
(define (to-bin fl)
|
||
|
(~r fl
|
||
|
#:base 2
|
||
|
#:precision 52))
|
||
|
|
||
|
(define (bit-vector->string bv)
|
||
|
(list->string
|
||
|
(for/list [(i (in-range (bit-vector-length bv)))]
|
||
|
(cond
|
||
|
[(bit-vector-ref bv i) #\1]
|
||
|
[else #\0]))))
|
||
|
|
||
|
(define (bit-vector->posint bv)
|
||
|
(string->number
|
||
|
(format "#b~a"
|
||
|
(bit-vector->string bv))))
|
||
|
|
||
|
(define (show-bv-slice bv start end)
|
||
|
(bit-vector->list
|
||
|
(bit-vector-copy bv start end)))
|
||
|
|
||
|
(define (bool->number b)
|
||
|
(cond
|
||
|
[b 1]
|
||
|
[else 0]))
|
||
|
|
||
|
(define (number->bool n)
|
||
|
(match n
|
||
|
[0 #f]
|
||
|
[1 #t]
|
||
|
[_ #f]))
|
||
|
|
||
|
(define (sum xs)
|
||
|
(foldr + 0 xs))
|
||
|
|
||
|
; conversion from base 10 functions
|
||
|
|
||
|
;; Have to calculate the number of digits to remove from
|
||
|
;; the precision based on how far the decimal point needs
|
||
|
;; to be moved left,
|
||
|
;; or else maybe just do the calculation, and lop off digits from the right?
|
||
|
|
||
|
|
||
|
(define (int->binary n)
|
||
|
(let-values
|
||
|
([(q r) (quotient/remainder n 2)])
|
||
|
(match q
|
||
|
[0 (list r)]
|
||
|
[_ (cons r (int->binary q))])))
|
||
|
|
||
|
(define (real->binary-frac n [precision 0])
|
||
|
(define p
|
||
|
(* 2 n))
|
||
|
|
||
|
(displayln p)
|
||
|
(cond
|
||
|
[(= p 0.0) ""]
|
||
|
[(> precision 51) ""]
|
||
|
[(>= p 1)
|
||
|
(string-append "1"
|
||
|
(real->binary-frac
|
||
|
(sub1 p)
|
||
|
(add1 precision)))]
|
||
|
[(< p 1)
|
||
|
(string-append "0"
|
||
|
(real->binary-frac
|
||
|
p
|
||
|
(add1 precision)))]))
|
||
|
|
||
|
; do the conversion from w.fff.. to binary
|
||
|
(define (real->bits whole fraction)
|
||
|
(list
|
||
|
(cond
|
||
|
[(> whole 0) 0]
|
||
|
[else 1])
|
||
|
(bit-vector->string
|
||
|
(list->bit-vector
|
||
|
(map number->bool
|
||
|
(reverse (int->binary whole)))))
|
||
|
|
||
|
(real->binary-frac fraction)))
|
||
|
|
||
|
|
||
|
; Conversion from base-2 functions
|
||
|
|
||
|
(define (calculate-number bv)
|
||
|
(define sign (bv-sign bv))
|
||
|
(define mantissa (bv-mantissa bv))
|
||
|
(define exponent (bv-exponent bv))
|
||
|
|
||
|
(displayln (format "Sign = ~a" (cond ((= 0 sign) "positive") (else "negative"))))
|
||
|
|
||
|
(displayln (format "Mantissa = ~a"
|
||
|
(exact->inexact
|
||
|
(calculate-mantissa mantissa))))
|
||
|
|
||
|
(displayln (format "Exponent = ~a" exponent))
|
||
|
|
||
|
(*
|
||
|
(expt -1 sign)
|
||
|
(calculate-mantissa mantissa)
|
||
|
(expt 2 exponent)))
|
||
|
|
||
|
(define (exp-len bv)
|
||
|
(match (bit-vector-length bv)
|
||
|
[32 8]
|
||
|
[64 11]))
|
||
|
|
||
|
(define (bv-mantissa bv)
|
||
|
(bit-vector-copy bv
|
||
|
(add1 (exp-len bv))
|
||
|
(bit-vector-length bv)))
|
||
|
|
||
|
|
||
|
;; Floating point numbers
|
||
|
|
||
|
(define example
|
||
|
(string->bit-vector
|
||
|
; 0.052 in binary
|
||
|
;seeeeeeeeeeemmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm
|
||
|
"0011111110101010100111111011111001110110110010001011010000111001"))
|
||
|
|
||
|
;; In this example, we are representing 0.052 as a 64 bit floating point number
|
||
|
;; The first bit is our sign
|
||
|
;; The next 11 bits are our exponent
|
||
|
;; The next 52 bits are our mantissa (also called the significand or fraction)
|
||
|
|
||
|
;; Starting with the sign, if it is 1 it is negative, otherwise positive
|
||
|
|
||
|
(define (bv-sign bv)
|
||
|
(cond
|
||
|
[(bit-vector-ref bv 0) -1]
|
||
|
[else 0]))
|
||
|
|
||
|
;; The exponent (next 11 bits) is represented in a biased form, meaning there is a subtraction that occurs
|
||
|
;; So for 0.052, the exponent is -5
|
||
|
;; 01111111010 = 1018 in binary
|
||
|
;; the bias is 1023, so we do 1018 - (2^10-1) = 1028 - 1023 = -5
|
||
|
|
||
|
(define (bv-exponent bv)
|
||
|
; bias is basically half the range of the exp minus 1
|
||
|
(define bias
|
||
|
(sub1
|
||
|
(expt 2
|
||
|
(sub1 (exp-len bv)))))
|
||
|
; subtract bias from exponent
|
||
|
(-
|
||
|
(bit-vector->posint
|
||
|
(bit-vector-copy bv 1 (add1 (exp-len bv))))
|
||
|
bias))
|
||
|
|
||
|
;; The mantissa (next 52 bits) is usually represented in a *normalized* form, meaning 1.xxx... (52 bits for a 64 bit float)
|
||
|
;; The mantissa can be calculated in decimal using a summation, e.g. b1 / 2^1 + b2 / 2^2 + ... (b1 and b2 are bits)
|
||
|
|
||
|
(define (calculate-mantissa n)
|
||
|
(define bits
|
||
|
(map bool->number (bit-vector->list n)))
|
||
|
(define powers
|
||
|
(map add1 (range (length bits))))
|
||
|
; add 1 for the implicit 1.xxx
|
||
|
; sum of bits divided by increasing powers of 2
|
||
|
; basically each "place" in the binary digits
|
||
|
(add1 (sum (map
|
||
|
(lambda (b p)
|
||
|
(/ b (expt 2 p)))
|
||
|
bits powers))))
|
||
|
|
||
|
;; s m exp
|
||
|
;; Putting that together, you get (-1)^0 * 1.664 * 2^(-5) = 0.052
|
||
|
|
||
|
;; Keep in mind that the computer does not do this conversion every time it calculates something
|
||
|
;; There are various algorithms for adding/multiplying binary floating point numbers efficiently (which I won't get into)
|
||
|
|
||
|
;; You may ask why there is always an implicit leading 1. in the mantissa/significand. The answer is that it's
|
||
|
;; somewhat arbitrary. There are things called subnormal, or denormalized numbers, which can change this.
|
||
|
|
||
|
;; From wikipedia:
|
||
|
|
||
|
;; In a denormal number, since the exponent is the least that it can be,
|
||
|
;; zero is the leading significand digit (0.m1m2m3...mp−2mp−1)
|
||
|
;; allowing the representation of numbers closer to zero than the smallest normal number.
|
||
|
|
||
|
;;
|
||
|
|
||
|
;; Other fun things about floating point numbers
|
||
|
|
||
|
;; You may also notice that as the exponent gets larger and larger, the range of numbers between a given whole number
|
||
|
;; and the next one increases.
|
||
|
|
||
|
;; There is something called "epsilon" which essentially tells you which number is the upper bound on any rounding error
|
||
|
;; For example, on my machine 2.0 + 2.220446049250313e-16 = 2.0
|
||
|
|
||
|
;; Why? because 2.220446049250313e-16 (or anything smaller) is going to simply get rounded off.
|
||
|
;; This number basically tells you the limit of the precision for your floats on a given machine
|
||
|
;; It ends up being useful for various numerical algorithms that you probably don't need to care about.
|
||
|
|
||
|
;; It is important to understand that floating point intervals have an inherent limit to the range of numbers
|
||
|
|
||
|
;; NaN
|
||
|
;; NaNs are represented with an exponent that is all 1s, and a mantissa that is anything except all 0s
|
||
|
;; NaN == NaN is always false. This implies there is more than one NaN. Some software will actually use this
|
||
|
;; as a way of encoding error codes.
|
||
|
|
||
|
;; Infinity is represented with a mantissa of all 0s and an exponent of all 1s
|
||
|
;; We can have -/+ Infinity because of this
|
||
|
|
||
|
;; E.g.
|
||
|
;; NaN = 0111111111111000000000000000000000000000000000000000000000000000
|
||
|
;;-Inf = 1111111111110000000000000000000000000000000000000000000000000000
|
||
|
|
||
|
;; (note that my code does not properly handle infinity or NaNs)
|
||
|
|
||
|
;; Decimal floating point
|
||
|
;; There is an entire separate standard for this but all in Decimal, not Binary! Conceptually, you could do this
|
||
|
;; with any base. There are even hexadecimal floating point number systems.
|
||
|
|
||
|
;; If you need to deal with anything that must be exact, use rationals. If you need performance, use floats.
|
||
|
;; The problem with using floats is that some numbers can only be approximated, not perfectly accurately represented.
|
||
|
;; This is true of any base, not just base 2. It is also true of irrational numbers like pi.
|
||
|
|
||
|
;; There are things called "Minifloats" which are only 16 bits or smaller, and are non-standard, but useful
|
||
|
;; E.g. in graphics where you don't care too much about precision but performance matters a lot
|
||
|
|
||
|
(displayln
|
||
|
(exact->inexact (calculate-number example)))
|