;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; OPEN HASHING ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define open-hash-table (make-record-type "open-hash-table" '(size vect hash member?))) ;;; ;;; An open hash table, it has ;;; (1) a size, ie. number of buckets ;;; (2) a vector, where the ith element is the ith bucket, and that ;;; bucket is a list of items that have the same hash value ;;; (3) a hash function, that takes an element and delivers a bucket number ;;; (4) a member? function for testing if an element is in a bucket ;;; (again, POLYMORPHISM, allowing us to reuse the open hash table ;;; for different kinds of objects ;;; (define create-open-hash-table (record-constructor open-hash-table '(size vect hash member?))) (define (make-open-hash-table size hash member?) (create-open-hash-table size (make-vector size '()) hash member?)) (define open-hash-table? (record-predicate open-hash-table)) (define size (record-accessor open-hash-table 'size)) (define vect (record-accessor open-hash-table 'vect)) (define hash (record-accessor open-hash-table 'hash)) (define member? (record-accessor open-hash-table 'member?)) ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (define (found? e l p) (cond ((null? l) nil) (t (or (p e (car l)) (found? e (cdr l) p))))) ;;; ;;; Is there an element x in the list l that ;;; satisfies the predicate (p e x)? ;;; So, if we are wanting to find something in a list ;;; of integers we would use (found? e l =), and if ;;; a list of strings we would use (found? e l string=?) ;;; again ... POLYMORPHIC ;;; (define (insert e table) (let* ((h (hash table)) (v (vect table)) (s (size table)) (p= (member? table)) (i (h e s)) (l (vector-ref v i))) (cond ((not (found? e l p=)) (vector-set! v i (cons e l)))))) ;;; ;;; Insert e in the open hash table (ignoring e if duplicated) ;;; Get the bucket, i, where e will be inserted ;;; Get the list in that bucket, l, and push e onto the front of the list ;;; (define (hash-1 str buckets) (let ((hash 0) (l (string->list str))) (do ((l' l (cdr l'))) ((null? l') (modulo hash buckets)) (let ((ch (car l'))) (set! hash (+ hash (char->integer ch))))))) ;;; ;;; A hash function (not very clever) for words, where a word is a ;;; string of characters ;;; ;;; Given a word that has been converted to a list of characters ;;; and an integer number of buckets (size of open hash table) ;;; compute a hash value ;;; ;;; The hash value is computed by taking each character in turn, ;;; getting its integer equivalent, summing this for all integers ;;; in the list, and delivering a value modulo the number of buckets ;;; (define (present? e table) (let* ((h (hash table)) (v (vect table)) (s (size table)) (member? (member? table)) (i (h e s)) (l (vector-ref v i))) (found? e l member?))) ;;; ;;; Is e in the open hash table? ;;; Get the hash value for e, namely i, and then try and find ;;; e in the ith bucket. ;;; (define (dictionary->table table) (let ((fin (open-input-file "/home/s7/pat/scheme/pub/dictionary.tex"))) (do ((w (read fin) (read fin))) ((eof-object? w) (close-input-port fin)) (insert (symbol->string w) table)))) ;;; ;;; Reads into an open hash-table the 25,104 words in the above dictionary. ;;; The dictionary originated from /usr/dict/words but has ;;; all duplicates removed, and all sorted with respect to stringtable fname table) (let ((fin (open-input-file fname))) (do ((w (read-word fin) (read-word fin))) ((eof-object? w) (close-input-port fin)) (insert w table)))) ;;; ;;; (file->table "/usr/dict/words" *t26000*) ;;; (define (show table) (do ((i 0 (+ i 1))) ((= i (size table))) (let ((l (vector-ref (vect table) i))) (display (list i (length l) l)) (newline)))) ;;; ;;; Show the contents of an open hash table ;;; Show each bucket at a time. ;;; ;;; NOTE: use with caution, only on small table otherwise .... ;;; (define (scan table) (let ((l '()) (v (vect table)) (s (size table))) (do ((i 0 (+ i 1))) ((= i s) l) (set! l (cons (length (vector-ref v i)) l))))) ;;; ;;; Gets the length of each bucket in the table, and delivers ;;; this as a list of numbers. So, if we have an open hash table of ;;; size 1000 this will deliver a list of length 1000 ;;; You can now use the code you developed in the first exercise ;;; to analyse the table, finding the size of the largest bucket, ;;; avarage bucket length, etc ;;; ;;; WARNING: do not get the median on anything other than a small table ;;; (life is too short) ;;; ;;; Wee example ;;; (define t2000 (make-open-hash-table 2000 hash-1 string=?)) ;;; (dictionary->table t2000) ;;; ;;; (present? "paddy" t2000) ;;; (present? "happy" t2000) ;;; ;;; Look at the run time for each query ;;;