1 changed files with 232 additions and 0 deletions
@ -0,0 +1,232 @@ |
|||||
|
#lang racket |
||||
|
(require data/bit-vector) |
||||
|
|
||||
|
; https://imgur.com/a/ClKK5Ac |
||||
|
|
||||
|
; See http://fabiensanglard.net/floating_point_visually_explained/ for an intuitive explanation |
||||
|
|
||||
|
(define (to-bin fl) |
||||
|
(~r fl |
||||
|
#:base 2 |
||||
|
#:precision 52)) |
||||
|
|
||||
|
(define (bit-vector->string bv) |
||||
|
(list->string |
||||
|
(for/list [(i (in-range (bit-vector-length bv)))] |
||||
|
(cond |
||||
|
[(bit-vector-ref bv i) #\1] |
||||
|
[else #\0])))) |
||||
|
|
||||
|
(define (bit-vector->posint bv) |
||||
|
(string->number |
||||
|
(format "#b~a" |
||||
|
(bit-vector->string bv)))) |
||||
|
|
||||
|
(define (show-bv-slice bv start end) |
||||
|
(bit-vector->list |
||||
|
(bit-vector-copy bv start end))) |
||||
|
|
||||
|
(define (bool->number b) |
||||
|
(cond |
||||
|
[b 1] |
||||
|
[else 0])) |
||||
|
|
||||
|
(define (number->bool n) |
||||
|
(match n |
||||
|
[0 #f] |
||||
|
[1 #t] |
||||
|
[_ #f])) |
||||
|
|
||||
|
(define (sum xs) |
||||
|
(foldr + 0 xs)) |
||||
|
|
||||
|
; conversion from base 10 functions |
||||
|
|
||||
|
;; Have to calculate the number of digits to remove from |
||||
|
;; the precision based on how far the decimal point needs |
||||
|
;; to be moved left, |
||||
|
;; or else maybe just do the calculation, and lop off digits from the right? |
||||
|
|
||||
|
|
||||
|
(define (int->binary n) |
||||
|
(let-values |
||||
|
([(q r) (quotient/remainder n 2)]) |
||||
|
(match q |
||||
|
[0 (list r)] |
||||
|
[_ (cons r (int->binary q))]))) |
||||
|
|
||||
|
(define (real->binary-frac n [precision 0]) |
||||
|
(define p |
||||
|
(* 2 n)) |
||||
|
|
||||
|
(displayln p) |
||||
|
(cond |
||||
|
[(= p 0.0) ""] |
||||
|
[(> precision 51) ""] |
||||
|
[(>= p 1) |
||||
|
(string-append "1" |
||||
|
(real->binary-frac |
||||
|
(sub1 p) |
||||
|
(add1 precision)))] |
||||
|
[(< p 1) |
||||
|
(string-append "0" |
||||
|
(real->binary-frac |
||||
|
p |
||||
|
(add1 precision)))])) |
||||
|
|
||||
|
; do the conversion from w.fff.. to binary |
||||
|
(define (real->bits whole fraction) |
||||
|
(list |
||||
|
(cond |
||||
|
[(> whole 0) 0] |
||||
|
[else 1]) |
||||
|
(bit-vector->string |
||||
|
(list->bit-vector |
||||
|
(map number->bool |
||||
|
(reverse (int->binary whole))))) |
||||
|
|
||||
|
(real->binary-frac fraction))) |
||||
|
|
||||
|
|
||||
|
; Conversion from base-2 functions |
||||
|
|
||||
|
(define (calculate-number bv) |
||||
|
(define sign (bv-sign bv)) |
||||
|
(define mantissa (bv-mantissa bv)) |
||||
|
(define exponent (bv-exponent bv)) |
||||
|
|
||||
|
(displayln (format "Sign = ~a" (cond ((= 0 sign) "positive") (else "negative")))) |
||||
|
|
||||
|
(displayln (format "Mantissa = ~a" |
||||
|
(exact->inexact |
||||
|
(calculate-mantissa mantissa)))) |
||||
|
|
||||
|
(displayln (format "Exponent = ~a" exponent)) |
||||
|
|
||||
|
(* |
||||
|
(expt -1 sign) |
||||
|
(calculate-mantissa mantissa) |
||||
|
(expt 2 exponent))) |
||||
|
|
||||
|
(define (exp-len bv) |
||||
|
(match (bit-vector-length bv) |
||||
|
[32 8] |
||||
|
[64 11])) |
||||
|
|
||||
|
(define (bv-mantissa bv) |
||||
|
(bit-vector-copy bv |
||||
|
(add1 (exp-len bv)) |
||||
|
(bit-vector-length bv))) |
||||
|
|
||||
|
|
||||
|
;; Floating point numbers |
||||
|
|
||||
|
(define example |
||||
|
(string->bit-vector |
||||
|
; 0.052 in binary |
||||
|
;seeeeeeeeeeemmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm |
||||
|
"0011111110101010100111111011111001110110110010001011010000111001")) |
||||
|
|
||||
|
;; In this example, we are representing 0.052 as a 64 bit floating point number |
||||
|
;; The first bit is our sign |
||||
|
;; The next 11 bits are our exponent |
||||
|
;; The next 52 bits are our mantissa (also called the significand or fraction) |
||||
|
|
||||
|
;; Starting with the sign, if it is 1 it is negative, otherwise positive |
||||
|
|
||||
|
(define (bv-sign bv) |
||||
|
(cond |
||||
|
[(bit-vector-ref bv 0) -1] |
||||
|
[else 0])) |
||||
|
|
||||
|
;; The exponent (next 11 bits) is represented in a biased form, meaning there is a subtraction that occurs |
||||
|
;; So for 0.052, the exponent is -5 |
||||
|
;; 01111111010 = 1018 in binary |
||||
|
;; the bias is 1023, so we do 1018 - (2^10-1) = 1028 - 1023 = -5 |
||||
|
|
||||
|
(define (bv-exponent bv) |
||||
|
; bias is basically half the range of the exp minus 1 |
||||
|
(define bias |
||||
|
(sub1 |
||||
|
(expt 2 |
||||
|
(sub1 (exp-len bv))))) |
||||
|
; subtract bias from exponent |
||||
|
(- |
||||
|
(bit-vector->posint |
||||
|
(bit-vector-copy bv 1 (add1 (exp-len bv)))) |
||||
|
bias)) |
||||
|
|
||||
|
;; The mantissa (next 52 bits) is usually represented in a *normalized* form, meaning 1.xxx... (52 bits for a 64 bit float) |
||||
|
;; The mantissa can be calculated in decimal using a summation, e.g. b1 / 2^1 + b2 / 2^2 + ... (b1 and b2 are bits) |
||||
|
|
||||
|
(define (calculate-mantissa n) |
||||
|
(define bits |
||||
|
(map bool->number (bit-vector->list n))) |
||||
|
(define powers |
||||
|
(map add1 (range (length bits)))) |
||||
|
; add 1 for the implicit 1.xxx |
||||
|
; sum of bits divided by increasing powers of 2 |
||||
|
; basically each "place" in the binary digits |
||||
|
(add1 (sum (map |
||||
|
(lambda (b p) |
||||
|
(/ b (expt 2 p))) |
||||
|
bits powers)))) |
||||
|
|
||||
|
;; s m exp |
||||
|
;; Putting that together, you get (-1)^0 * 1.664 * 2^(-5) = 0.052 |
||||
|
|
||||
|
;; Keep in mind that the computer does not do this conversion every time it calculates something |
||||
|
;; There are various algorithms for adding/multiplying binary floating point numbers efficiently (which I won't get into) |
||||
|
|
||||
|
;; You may ask why there is always an implicit leading 1. in the mantissa/significand. The answer is that it's |
||||
|
;; somewhat arbitrary. There are things called subnormal, or denormalized numbers, which can change this. |
||||
|
|
||||
|
;; From wikipedia: |
||||
|
|
||||
|
;; In a denormal number, since the exponent is the least that it can be, |
||||
|
;; zero is the leading significand digit (0.m1m2m3...mp−2mp−1) |
||||
|
;; allowing the representation of numbers closer to zero than the smallest normal number. |
||||
|
|
||||
|
;; |
||||
|
|
||||
|
;; Other fun things about floating point numbers |
||||
|
|
||||
|
;; You may also notice that as the exponent gets larger and larger, the range of numbers between a given whole number |
||||
|
;; and the next one increases. |
||||
|
|
||||
|
;; There is something called "epsilon" which essentially tells you which number is the upper bound on any rounding error |
||||
|
;; For example, on my machine 2.0 + 2.220446049250313e-16 = 2.0 |
||||
|
|
||||
|
;; Why? because 2.220446049250313e-16 (or anything smaller) is going to simply get rounded off. |
||||
|
;; This number basically tells you the limit of the precision for your floats on a given machine |
||||
|
;; It ends up being useful for various numerical algorithms that you probably don't need to care about. |
||||
|
|
||||
|
;; It is important to understand that floating point intervals have an inherent limit to the range of numbers |
||||
|
|
||||
|
;; NaN |
||||
|
;; NaNs are represented with an exponent that is all 1s, and a mantissa that is anything except all 0s |
||||
|
;; NaN == NaN is always false. This implies there is more than one NaN. Some software will actually use this |
||||
|
;; as a way of encoding error codes. |
||||
|
|
||||
|
;; Infinity is represented with a mantissa of all 0s and an exponent of all 1s |
||||
|
;; We can have -/+ Infinity because of this |
||||
|
|
||||
|
;; E.g. |
||||
|
;; NaN = 0111111111111000000000000000000000000000000000000000000000000000 |
||||
|
;;-Inf = 1111111111110000000000000000000000000000000000000000000000000000 |
||||
|
|
||||
|
;; (note that my code does not properly handle infinity or NaNs) |
||||
|
|
||||
|
;; Decimal floating point |
||||
|
;; There is an entire separate standard for this but all in Decimal, not Binary! Conceptually, you could do this |
||||
|
;; with any base. There are even hexadecimal floating point number systems. |
||||
|
|
||||
|
;; If you need to deal with anything that must be exact, use rationals. If you need performance, use floats. |
||||
|
;; The problem with using floats is that some numbers can only be approximated, not perfectly accurately represented. |
||||
|
;; This is true of any base, not just base 2. It is also true of irrational numbers like pi. |
||||
|
|
||||
|
;; There are things called "Minifloats" which are only 16 bits or smaller, and are non-standard, but useful |
||||
|
;; E.g. in graphics where you don't care too much about precision but performance matters a lot |
||||
|
|
||||
|
(displayln |
||||
|
(exact->inexact (calculate-number example))) |
Loading…
Reference in new issue