**Wesley Kerfoot**3 years ago

**1 changed files**with

**232 additions**and

**0 deletions**

`@ -0,0 +1,232 @@` |
|||

`#lang racket` |
|||

`(require data/bit-vector)` |
|||

```
``` |
|||

`; https://imgur.com/a/ClKK5Ac` |
|||

```
``` |
|||

`; See http://fabiensanglard.net/floating_point_visually_explained/ for an intuitive explanation` |
|||

```
``` |
|||

`(define (to-bin fl)` |
|||

` (~r fl` |
|||

` #:base 2` |
|||

` #:precision 52))` |
|||

```
``` |
|||

`(define (bit-vector->string bv)` |
|||

` (list->string` |
|||

` (for/list [(i (in-range (bit-vector-length bv)))]` |
|||

` (cond` |
|||

` [(bit-vector-ref bv i) #\1]` |
|||

` [else #\0]))))` |
|||

```
``` |
|||

`(define (bit-vector->posint bv)` |
|||

` (string->number` |
|||

` (format "#b~a"` |
|||

` (bit-vector->string bv))))` |
|||

```
``` |
|||

`(define (show-bv-slice bv start end)` |
|||

` (bit-vector->list` |
|||

` (bit-vector-copy bv start end)))` |
|||

```
``` |
|||

`(define (bool->number b)` |
|||

` (cond` |
|||

` [b 1]` |
|||

` [else 0]))` |
|||

```
``` |
|||

`(define (number->bool n)` |
|||

` (match n` |
|||

` [0 #f]` |
|||

` [1 #t]` |
|||

` [_ #f]))` |
|||

```
``` |
|||

`(define (sum xs)` |
|||

` (foldr + 0 xs))` |
|||

```
``` |
|||

`; conversion from base 10 functions` |
|||

```
``` |
|||

`;; Have to calculate the number of digits to remove from` |
|||

`;; the precision based on how far the decimal point needs` |
|||

`;; to be moved left,` |
|||

`;; or else maybe just do the calculation, and lop off digits from the right?` |
|||

```
``` |
|||

```
``` |
|||

`(define (int->binary n)` |
|||

` (let-values` |
|||

` ([(q r) (quotient/remainder n 2)])` |
|||

` (match q` |
|||

` [0 (list r)]` |
|||

` [_ (cons r (int->binary q))])))` |
|||

```
``` |
|||

`(define (real->binary-frac n [precision 0])` |
|||

` (define p` |
|||

` (* 2 n))` |
|||

` ` |
|||

` (displayln p)` |
|||

` (cond` |
|||

` [(= p 0.0) ""]` |
|||

` [(> precision 51) ""]` |
|||

` [(>= p 1)` |
|||

` (string-append "1"` |
|||

` (real->binary-frac` |
|||

` (sub1 p)` |
|||

` (add1 precision)))]` |
|||

` [(< p 1)` |
|||

` (string-append "0"` |
|||

` (real->binary-frac` |
|||

` p` |
|||

` (add1 precision)))]))` |
|||

```
``` |
|||

`; do the conversion from w.fff.. to binary` |
|||

`(define (real->bits whole fraction)` |
|||

` (list` |
|||

` (cond` |
|||

` [(> whole 0) 0]` |
|||

` [else 1])` |
|||

` (bit-vector->string` |
|||

` (list->bit-vector` |
|||

` (map number->bool` |
|||

` (reverse (int->binary whole)))))` |
|||

` ` |
|||

` (real->binary-frac fraction)))` |
|||

```
``` |
|||

```
``` |
|||

`; Conversion from base-2 functions` |
|||

```
``` |
|||

`(define (calculate-number bv)` |
|||

` (define sign (bv-sign bv))` |
|||

` (define mantissa (bv-mantissa bv))` |
|||

` (define exponent (bv-exponent bv))` |
|||

```
``` |
|||

` (displayln (format "Sign = ~a" (cond ((= 0 sign) "positive") (else "negative"))))` |
|||

` ` |
|||

` (displayln (format "Mantissa = ~a"` |
|||

` (exact->inexact` |
|||

` (calculate-mantissa mantissa))))` |
|||

` ` |
|||

` (displayln (format "Exponent = ~a" exponent))` |
|||

` ` |
|||

` (*` |
|||

` (expt -1 sign)` |
|||

` (calculate-mantissa mantissa)` |
|||

` (expt 2 exponent)))` |
|||

```
``` |
|||

`(define (exp-len bv)` |
|||

` (match (bit-vector-length bv)` |
|||

` [32 8]` |
|||

` [64 11]))` |
|||

```
``` |
|||

`(define (bv-mantissa bv)` |
|||

` (bit-vector-copy bv` |
|||

` (add1 (exp-len bv))` |
|||

` (bit-vector-length bv)))` |
|||

```
``` |
|||

```
``` |
|||

`;; Floating point numbers` |
|||

```
``` |
|||

`(define example` |
|||

` (string->bit-vector` |
|||

` ; 0.052 in binary` |
|||

` ;seeeeeeeeeeemmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm` |
|||

` "0011111110101010100111111011111001110110110010001011010000111001"))` |
|||

```
``` |
|||

`;; In this example, we are representing 0.052 as a 64 bit floating point number` |
|||

`;; The first bit is our sign` |
|||

`;; The next 11 bits are our exponent` |
|||

`;; The next 52 bits are our mantissa (also called the significand or fraction)` |
|||

```
``` |
|||

`;; Starting with the sign, if it is 1 it is negative, otherwise positive` |
|||

```
``` |
|||

`(define (bv-sign bv)` |
|||

` (cond` |
|||

` [(bit-vector-ref bv 0) -1]` |
|||

` [else 0]))` |
|||

```
``` |
|||

`;; The exponent (next 11 bits) is represented in a biased form, meaning there is a subtraction that occurs` |
|||

`;; So for 0.052, the exponent is -5` |
|||

`;; 01111111010 = 1018 in binary` |
|||

`;; the bias is 1023, so we do 1018 - (2^10-1) = 1028 - 1023 = -5` |
|||

```
``` |
|||

`(define (bv-exponent bv)` |
|||

` ; bias is basically half the range of the exp minus 1` |
|||

` (define bias` |
|||

` (sub1` |
|||

` (expt 2` |
|||

` (sub1 (exp-len bv)))))` |
|||

` ; subtract bias from exponent` |
|||

` (-` |
|||

` (bit-vector->posint` |
|||

` (bit-vector-copy bv 1 (add1 (exp-len bv))))` |
|||

` bias))` |
|||

```
``` |
|||

`;; The mantissa (next 52 bits) is usually represented in a *normalized* form, meaning 1.xxx... (52 bits for a 64 bit float)` |
|||

`;; The mantissa can be calculated in decimal using a summation, e.g. b1 / 2^1 + b2 / 2^2 + ... (b1 and b2 are bits)` |
|||

```
``` |
|||

`(define (calculate-mantissa n)` |
|||

` (define bits` |
|||

` (map bool->number (bit-vector->list n)))` |
|||

` (define powers` |
|||

` (map add1 (range (length bits))))` |
|||

` ; add 1 for the implicit 1.xxx` |
|||

` ; sum of bits divided by increasing powers of 2` |
|||

` ; basically each "place" in the binary digits` |
|||

` (add1 (sum (map` |
|||

` (lambda (b p)` |
|||

` (/ b (expt 2 p)))` |
|||

` bits powers))))` |
|||

```
``` |
|||

`;; s m exp` |
|||

`;; Putting that together, you get (-1)^0 * 1.664 * 2^(-5) = 0.052` |
|||

```
``` |
|||

`;; Keep in mind that the computer does not do this conversion every time it calculates something` |
|||

`;; There are various algorithms for adding/multiplying binary floating point numbers efficiently (which I won't get into)` |
|||

```
``` |
|||

`;; You may ask why there is always an implicit leading 1. in the mantissa/significand. The answer is that it's` |
|||

`;; somewhat arbitrary. There are things called subnormal, or denormalized numbers, which can change this.` |
|||

```
``` |
|||

`;; From wikipedia:` |
|||

```
``` |
|||

`;; In a denormal number, since the exponent is the least that it can be,` |
|||

`;; zero is the leading significand digit (0.m1m2m3...mp−2mp−1)` |
|||

`;; allowing the representation of numbers closer to zero than the smallest normal number.` |
|||

```
``` |
|||

`;; ` |
|||

```
``` |
|||

`;; Other fun things about floating point numbers` |
|||

```
``` |
|||

`;; You may also notice that as the exponent gets larger and larger, the range of numbers between a given whole number` |
|||

`;; and the next one increases.` |
|||

```
``` |
|||

`;; There is something called "epsilon" which essentially tells you which number is the upper bound on any rounding error` |
|||

`;; For example, on my machine 2.0 + 2.220446049250313e-16 = 2.0` |
|||

```
``` |
|||

`;; Why? because 2.220446049250313e-16 (or anything smaller) is going to simply get rounded off.` |
|||

`;; This number basically tells you the limit of the precision for your floats on a given machine` |
|||

`;; It ends up being useful for various numerical algorithms that you probably don't need to care about.` |
|||

```
``` |
|||

`;; It is important to understand that floating point intervals have an inherent limit to the range of numbers` |
|||

```
``` |
|||

`;; NaN` |
|||

`;; NaNs are represented with an exponent that is all 1s, and a mantissa that is anything except all 0s` |
|||

`;; NaN == NaN is always false. This implies there is more than one NaN. Some software will actually use this` |
|||

`;; as a way of encoding error codes.` |
|||

```
``` |
|||

`;; Infinity is represented with a mantissa of all 0s and an exponent of all 1s` |
|||

`;; We can have -/+ Infinity because of this` |
|||

```
``` |
|||

`;; E.g.` |
|||

`;; NaN = 0111111111111000000000000000000000000000000000000000000000000000` |
|||

`;;-Inf = 1111111111110000000000000000000000000000000000000000000000000000` |
|||

```
``` |
|||

`;; (note that my code does not properly handle infinity or NaNs)` |
|||

```
``` |
|||

`;; Decimal floating point` |
|||

`;; There is an entire separate standard for this but all in Decimal, not Binary! Conceptually, you could do this` |
|||

`;; with any base. There are even hexadecimal floating point number systems.` |
|||

```
``` |
|||

`;; If you need to deal with anything that must be exact, use rationals. If you need performance, use floats.` |
|||

`;; The problem with using floats is that some numbers can only be approximated, not perfectly accurately represented.` |
|||

`;; This is true of any base, not just base 2. It is also true of irrational numbers like pi.` |
|||

```
``` |
|||

`;; There are things called "Minifloats" which are only 16 bits or smaller, and are non-standard, but useful` |
|||

`;; E.g. in graphics where you don't care too much about precision but performance matters a lot` |
|||

```
``` |
|||

`(displayln` |
|||

` (exact->inexact (calculate-number example)))` |

Loading…

Reference in new issue