From bd0039a40596d5946b32e1577d4ea760e4d276fc Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Mon, 20 Jan 2020 20:26:05 -0500 Subject: [PATCH] Add floating point stuff --- floating_point.rkt | 232 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 floating_point.rkt diff --git a/floating_point.rkt b/floating_point.rkt new file mode 100644 index 0000000..599861e --- /dev/null +++ b/floating_point.rkt @@ -0,0 +1,232 @@ +#lang racket +(require data/bit-vector) + +; https://imgur.com/a/ClKK5Ac + +; See http://fabiensanglard.net/floating_point_visually_explained/ for an intuitive explanation + +(define (to-bin fl) + (~r fl + #:base 2 + #:precision 52)) + +(define (bit-vector->string bv) + (list->string + (for/list [(i (in-range (bit-vector-length bv)))] + (cond + [(bit-vector-ref bv i) #\1] + [else #\0])))) + +(define (bit-vector->posint bv) + (string->number + (format "#b~a" + (bit-vector->string bv)))) + +(define (show-bv-slice bv start end) + (bit-vector->list + (bit-vector-copy bv start end))) + +(define (bool->number b) + (cond + [b 1] + [else 0])) + +(define (number->bool n) + (match n + [0 #f] + [1 #t] + [_ #f])) + +(define (sum xs) + (foldr + 0 xs)) + +; conversion from base 10 functions + +;; Have to calculate the number of digits to remove from +;; the precision based on how far the decimal point needs +;; to be moved left, +;; or else maybe just do the calculation, and lop off digits from the right? + + +(define (int->binary n) + (let-values + ([(q r) (quotient/remainder n 2)]) + (match q + [0 (list r)] + [_ (cons r (int->binary q))]))) + +(define (real->binary-frac n [precision 0]) + (define p + (* 2 n)) + + (displayln p) + (cond + [(= p 0.0) ""] + [(> precision 51) ""] + [(>= p 1) + (string-append "1" + (real->binary-frac + (sub1 p) + (add1 precision)))] + [(< p 1) + (string-append "0" + (real->binary-frac + p + (add1 precision)))])) + +; do the conversion from w.fff.. to binary +(define (real->bits whole fraction) + (list + (cond + [(> whole 0) 0] + [else 1]) + (bit-vector->string + (list->bit-vector + (map number->bool + (reverse (int->binary whole))))) + + (real->binary-frac fraction))) + + +; Conversion from base-2 functions + +(define (calculate-number bv) + (define sign (bv-sign bv)) + (define mantissa (bv-mantissa bv)) + (define exponent (bv-exponent bv)) + + (displayln (format "Sign = ~a" (cond ((= 0 sign) "positive") (else "negative")))) + + (displayln (format "Mantissa = ~a" + (exact->inexact + (calculate-mantissa mantissa)))) + + (displayln (format "Exponent = ~a" exponent)) + + (* + (expt -1 sign) + (calculate-mantissa mantissa) + (expt 2 exponent))) + +(define (exp-len bv) + (match (bit-vector-length bv) + [32 8] + [64 11])) + +(define (bv-mantissa bv) + (bit-vector-copy bv + (add1 (exp-len bv)) + (bit-vector-length bv))) + + +;; Floating point numbers + +(define example + (string->bit-vector + ; 0.052 in binary + ;seeeeeeeeeeemmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm + "0011111110101010100111111011111001110110110010001011010000111001")) + +;; In this example, we are representing 0.052 as a 64 bit floating point number +;; The first bit is our sign +;; The next 11 bits are our exponent +;; The next 52 bits are our mantissa (also called the significand or fraction) + +;; Starting with the sign, if it is 1 it is negative, otherwise positive + +(define (bv-sign bv) + (cond + [(bit-vector-ref bv 0) -1] + [else 0])) + +;; The exponent (next 11 bits) is represented in a biased form, meaning there is a subtraction that occurs +;; So for 0.052, the exponent is -5 +;; 01111111010 = 1018 in binary +;; the bias is 1023, so we do 1018 - (2^10-1) = 1028 - 1023 = -5 + +(define (bv-exponent bv) + ; bias is basically half the range of the exp minus 1 + (define bias + (sub1 + (expt 2 + (sub1 (exp-len bv))))) + ; subtract bias from exponent + (- + (bit-vector->posint + (bit-vector-copy bv 1 (add1 (exp-len bv)))) + bias)) + +;; The mantissa (next 52 bits) is usually represented in a *normalized* form, meaning 1.xxx... (52 bits for a 64 bit float) +;; The mantissa can be calculated in decimal using a summation, e.g. b1 / 2^1 + b2 / 2^2 + ... (b1 and b2 are bits) + +(define (calculate-mantissa n) + (define bits + (map bool->number (bit-vector->list n))) + (define powers + (map add1 (range (length bits)))) + ; add 1 for the implicit 1.xxx + ; sum of bits divided by increasing powers of 2 + ; basically each "place" in the binary digits + (add1 (sum (map + (lambda (b p) + (/ b (expt 2 p))) + bits powers)))) + +;; s m exp +;; Putting that together, you get (-1)^0 * 1.664 * 2^(-5) = 0.052 + +;; Keep in mind that the computer does not do this conversion every time it calculates something +;; There are various algorithms for adding/multiplying binary floating point numbers efficiently (which I won't get into) + +;; You may ask why there is always an implicit leading 1. in the mantissa/significand. The answer is that it's +;; somewhat arbitrary. There are things called subnormal, or denormalized numbers, which can change this. + +;; From wikipedia: + +;; In a denormal number, since the exponent is the least that it can be, +;; zero is the leading significand digit (0.m1m2m3...mp−2mp−1) +;; allowing the representation of numbers closer to zero than the smallest normal number. + +;; + +;; Other fun things about floating point numbers + +;; You may also notice that as the exponent gets larger and larger, the range of numbers between a given whole number +;; and the next one increases. + +;; There is something called "epsilon" which essentially tells you which number is the upper bound on any rounding error +;; For example, on my machine 2.0 + 2.220446049250313e-16 = 2.0 + +;; Why? because 2.220446049250313e-16 (or anything smaller) is going to simply get rounded off. +;; This number basically tells you the limit of the precision for your floats on a given machine +;; It ends up being useful for various numerical algorithms that you probably don't need to care about. + +;; It is important to understand that floating point intervals have an inherent limit to the range of numbers + +;; NaN +;; NaNs are represented with an exponent that is all 1s, and a mantissa that is anything except all 0s +;; NaN == NaN is always false. This implies there is more than one NaN. Some software will actually use this +;; as a way of encoding error codes. + +;; Infinity is represented with a mantissa of all 0s and an exponent of all 1s +;; We can have -/+ Infinity because of this + +;; E.g. +;; NaN = 0111111111111000000000000000000000000000000000000000000000000000 +;;-Inf = 1111111111110000000000000000000000000000000000000000000000000000 + +;; (note that my code does not properly handle infinity or NaNs) + +;; Decimal floating point +;; There is an entire separate standard for this but all in Decimal, not Binary! Conceptually, you could do this +;; with any base. There are even hexadecimal floating point number systems. + +;; If you need to deal with anything that must be exact, use rationals. If you need performance, use floats. +;; The problem with using floats is that some numbers can only be approximated, not perfectly accurately represented. +;; This is true of any base, not just base 2. It is also true of irrational numbers like pi. + +;; There are things called "Minifloats" which are only 16 bits or smaller, and are non-standard, but useful +;; E.g. in graphics where you don't care too much about precision but performance matters a lot + +(displayln + (exact->inexact (calculate-number example))) \ No newline at end of file