You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('') and can be up to 35 characters long.
232 lines
7.2 KiB
232 lines
7.2 KiB
#lang racket


(require data/bitvector)




; https://imgur.com/a/ClKK5Ac




; See http://fabiensanglard.net/floating_point_visually_explained/ for an intuitive explanation




(define (tobin fl)


(~r fl


#:base 2


#:precision 52))




(define (bitvector>string bv)


(list>string


(for/list [(i (inrange (bitvectorlength bv)))]


(cond


[(bitvectorref bv i) #\1]


[else #\0]))))




(define (bitvector>posint bv)


(string>number


(format "#b~a"


(bitvector>string bv))))




(define (showbvslice bv start end)


(bitvector>list


(bitvectorcopy bv start end)))




(define (bool>number b)


(cond


[b 1]


[else 0]))




(define (number>bool n)


(match n


[0 #f]


[1 #t]


[_ #f]))




(define (sum xs)


(foldr + 0 xs))




; conversion from base 10 functions




;; Have to calculate the number of digits to remove from


;; the precision based on how far the decimal point needs


;; to be moved left,


;; or else maybe just do the calculation, and lop off digits from the right?






(define (int>binary n)


(letvalues


([(q r) (quotient/remainder n 2)])


(match q


[0 (list r)]


[_ (cons r (int>binary q))])))




(define (real>binaryfrac n [precision 0])


(define p


(* 2 n))




(displayln p)


(cond


[(= p 0.0) ""]


[(> precision 51) ""]


[(>= p 1)


(stringappend "1"


(real>binaryfrac


(sub1 p)


(add1 precision)))]


[(< p 1)


(stringappend "0"


(real>binaryfrac


p


(add1 precision)))]))




; do the conversion from w.fff.. to binary


(define (real>bits whole fraction)


(list


(cond


[(> whole 0) 0]


[else 1])


(bitvector>string


(list>bitvector


(map number>bool


(reverse (int>binary whole)))))




(real>binaryfrac fraction)))






; Conversion from base2 functions




(define (calculatenumber bv)


(define sign (bvsign bv))


(define mantissa (bvmantissa bv))


(define exponent (bvexponent bv))




(displayln (format "Sign = ~a" (cond ((= 0 sign) "positive") (else "negative"))))




(displayln (format "Mantissa = ~a"


(exact>inexact


(calculatemantissa mantissa))))




(displayln (format "Exponent = ~a" exponent))




(*


(expt 1 sign)


(calculatemantissa mantissa)


(expt 2 exponent)))




(define (explen bv)


(match (bitvectorlength bv)


[32 8]


[64 11]))




(define (bvmantissa bv)


(bitvectorcopy bv


(add1 (explen bv))


(bitvectorlength bv)))






;; Floating point numbers




(define example


(string>bitvector


; 0.052 in binary


;seeeeeeeeeeemmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm


"0011111110101010100111111011111001110110110010001011010000111001"))




;; In this example, we are representing 0.052 as a 64 bit floating point number


;; The first bit is our sign


;; The next 11 bits are our exponent


;; The next 52 bits are our mantissa (also called the significand or fraction)




;; Starting with the sign, if it is 1 it is negative, otherwise positive




(define (bvsign bv)


(cond


[(bitvectorref bv 0) 1]


[else 0]))




;; The exponent (next 11 bits) is represented in a biased form, meaning there is a subtraction that occurs


;; So for 0.052, the exponent is 5


;; 01111111010 = 1018 in binary


;; the bias is 1023, so we do 1018  (2^101) = 1028  1023 = 5




(define (bvexponent bv)


; bias is basically half the range of the exp minus 1


(define bias


(sub1


(expt 2


(sub1 (explen bv)))))


; subtract bias from exponent


(


(bitvector>posint


(bitvectorcopy bv 1 (add1 (explen bv))))


bias))




;; The mantissa (next 52 bits) is usually represented in a *normalized* form, meaning 1.xxx... (52 bits for a 64 bit float)


;; The mantissa can be calculated in decimal using a summation, e.g. b1 / 2^1 + b2 / 2^2 + ... (b1 and b2 are bits)




(define (calculatemantissa n)


(define bits


(map bool>number (bitvector>list n)))


(define powers


(map add1 (range (length bits))))


; add 1 for the implicit 1.xxx


; sum of bits divided by increasing powers of 2


; basically each "place" in the binary digits


(add1 (sum (map


(lambda (b p)


(/ b (expt 2 p)))


bits powers))))




;; s m exp


;; Putting that together, you get (1)^0 * 1.664 * 2^(5) = 0.052




;; Keep in mind that the computer does not do this conversion every time it calculates something


;; There are various algorithms for adding/multiplying binary floating point numbers efficiently (which I won't get into)




;; You may ask why there is always an implicit leading 1. in the mantissa/significand. The answer is that it's


;; somewhat arbitrary. There are things called subnormal, or denormalized numbers, which can change this.




;; From wikipedia:




;; In a denormal number, since the exponent is the least that it can be,


;; zero is the leading significand digit (0.m1m2m3...mp−2mp−1)


;; allowing the representation of numbers closer to zero than the smallest normal number.




;;




;; Other fun things about floating point numbers




;; You may also notice that as the exponent gets larger and larger, the range of numbers between a given whole number


;; and the next one increases.




;; There is something called "epsilon" which essentially tells you which number is the upper bound on any rounding error


;; For example, on my machine 2.0 + 2.220446049250313e16 = 2.0




;; Why? because 2.220446049250313e16 (or anything smaller) is going to simply get rounded off.


;; This number basically tells you the limit of the precision for your floats on a given machine


;; It ends up being useful for various numerical algorithms that you probably don't need to care about.




;; It is important to understand that floating point intervals have an inherent limit to the range of numbers




;; NaN


;; NaNs are represented with an exponent that is all 1s, and a mantissa that is anything except all 0s


;; NaN == NaN is always false. This implies there is more than one NaN. Some software will actually use this


;; as a way of encoding error codes.




;; Infinity is represented with a mantissa of all 0s and an exponent of all 1s


;; We can have /+ Infinity because of this




;; E.g.


;; NaN = 0111111111111000000000000000000000000000000000000000000000000000


;;Inf = 1111111111110000000000000000000000000000000000000000000000000000




;; (note that my code does not properly handle infinity or NaNs)




;; Decimal floating point


;; There is an entire separate standard for this but all in Decimal, not Binary! Conceptually, you could do this


;; with any base. There are even hexadecimal floating point number systems.




;; If you need to deal with anything that must be exact, use rationals. If you need performance, use floats.


;; The problem with using floats is that some numbers can only be approximated, not perfectly accurately represented.


;; This is true of any base, not just base 2. It is also true of irrational numbers like pi.




;; There are things called "Minifloats" which are only 16 bits or smaller, and are nonstandard, but useful


;; E.g. in graphics where you don't care too much about precision but performance matters a lot




(displayln


(exact>inexact (calculatenumber example)))
