代码的流程,首先是统计样本数据;然后根据bayesian定理,创建一个
用于分类的模型;最后在用这个模型去对测试数据进行分类。
宏add-class-data负责统计数据, 她的第0个参数是分类,第1个参数是
在这个分类下的一个数据项,最后一个参数是个hash table,用于记录。
调用一次add-class-data就增加一项数据的统计值。
宏calc-bayesian-model会生成一个贝叶斯模型,并作为她的返回值。
她的参数是记录了多种统计结果的hash table。
函数bayesian-predict对测试数据进行分类。她的第0个参数是一个列表,
存放的是一个待测试的向量。第1个参数是分类模型。
函数mytest用于驱动这个玩具。
我初学Lisp,代码幼稚。随便写写玩具程序,变量胡乱起名。宏里面
的变量名应该用(gensym)生成,但我图方便没这么干。但代码还是可以
正常执行的,分类结果还能勉强接受。我在fedora 17上用sbcl简单的
测试了程序,可以这样执行:
sbcl --script ./bayesian.lisp
bayesian.lisp:
===========================================
;;; 2013年 03月 10日 星期日 10:16:49 CST
;;; author: 李小丹(Li Shao Dan) 字 殊恒(shuheng)
;;; K.I.S.S
;;; S.P.O.T
(defmacro add-count (k h) `(incf (gethash ,k ,h 0)))
(defmacro add-class-data (c k h)
`(multiple-value-bind (v p) (gethash ,c ,h)
(if p (add-count ,k v) (add-count ,k (setf (gethash ,c ,h) (make-hash-table :test #'equal))))))
(defun print-k-v (k v) (format t "~t~t~a: ~a~%" k v))
(defun print-class (k v) (format t "~a:~%" k) (maphash #'print-k-v v))
(defmacro print-all (h) `(progn (maphash #'print-class ,h) (format t "====================~%")))
(defmacro calc-one-sum (h) `(let ((s 0)) (maphash #'(lambda (k v) (setf s (+ s v))) ,h) s))
(defmacro calc-item-sum (h &optional (c nil c-p))
`(if ,c-p (multiple-value-bind (v p) (gethash ,c ,h) (if p (calc-one-sum v)))
(let ((s 0)) (maphash #'(lambda (k v) (setf s (+ s (calc-one-sum v)))) ,h) s)))
(defmacro count-class (h) `(hash-table-count ,h))
(defmacro count-item-uniq (h &optional (c nil c-p))
`(if ,c-p (multiple-value-bind (v p) (gethash ,c ,h) (if p (hash-table-count v)))
(let ((s 0)) (maphash #'(lambda (k v) (setf s (+ s (hash-table-count v)))) ,h) s)))
(defmacro count-item (h c i)
`(multiple-value-bind (ch ch-p) (gethash ,c ,h)
(if ch-p (gethash ,i ch 0))))
(defmacro calc-p-class (h c)
`(multiple-value-bind (v p) (gethash ,c ,h)
(if p (let (sc cc) (setf sc (calc-item-sum ,h)) (setf cc (calc-one-sum v))
(/ cc sc)))))
(defmacro calc-p-item-class (h c i)
`(let ((sum (calc-item-sum ,h ,c)))
(if (and sum (not (equal 0 sum))) (/ (count-item ,h ,c ,i) sum))))
(defmacro calc-bayesian-model (h)
`(let ((items nil) (classes nil) (ret (make-hash-table :test #'equal)))
(maphash #'(lambda (k v) (pushnew k classes) (maphash #'(lambda (kk vv) (pushnew kk items)) v)) ,h)
(dolist (i items)
(let ((sum 0) (temp))
(multiple-value-bind (value v-p) (gethash i ret)
(if (not v-p) (setf value (setf (gethash i ret) (make-hash-table :test #'equal))))
(dolist (c classes)
(setf temp (* (calc-p-item-class ,h c i) (calc-p-class ,h c)))
(incf sum temp)
(setf (gethash c value) temp))
(maphash #'(lambda (x y) (setf (gethash x value) (/ y sum))) value)
(maphash #'(lambda (x y) (if (equal 0 y) (setf (gethash x value) 1/100000))) value)))) ret))
(defun bayesian-predict (vec model)
(let ((coll (make-hash-table :test #'equal)) (temp 0) (mc) (sum 0) (classes nil))
(maphash #'(lambda (k v) (maphash #'(lambda (kk vv) (pushnew kk classes)) v)) model)
(dolist (c classes)
(let ((mul 1))
(dolist (d vec)
(multiple-value-bind (val v-p) (gethash d model)
(if v-p (setf mul (* mul (gethash c val 1/100000))) (setf mul (* mul (/ 1 (length classes)))))))
(setf (gethash c coll) mul)))
(maphash #'(lambda (k v) (incf sum v) (when (> v temp) (setf temp v) (setf mc k))) coll)
(if (>= (/ temp sum) 1/2) mc)))
;; ---------- test ---------------
(defvar *h* (make-hash-table :test #'equal))
(defun mytest()
(add-class-data 1 "a" *h*)
(add-class-data 1 "a" *h*)
(add-class-data 1 "a" *h*)
(add-class-data 1 "b" *h*)
(add-class-data 1 "c" *h*)
(add-class-data 1 "c" *h*)
(add-class-data 1 "d" *h*)
(add-class-data 2 "a" *h*)
(add-class-data 2 "a" *h*)
(add-class-data 2 "c" *h*)
(add-class-data 2 "c" *h*)
(add-class-data 2 "d" *h*)
(add-class-data 2 "d" *h*)
(add-class-data 2 "f" *h*)
(add-class-data 2 "f" *h*)
(add-class-data 2 "f" *h*)
(add-class-data 3 "a" *h*)
(add-class-data 3 "a" *h*)
(add-class-data 3 "b" *h*)
(add-class-data 3 "b" *h*)
(add-class-data 3 "d" *h*)
(add-class-data 3 "d" *h*)
(add-class-data 3 "d" *h*)
(add-class-data 3 "d" *h*)
(add-class-data 3 "f" *h*)
(add-class-data 3 "f" *h*)
(print-all *h*)
; (format t "sum is ~a.~%" (calc-item-sum *h*))
; (dotimes (i 5) (format t "class(~a) sum: ~a.~%" i (calc-item-sum *h* i)))
; (format t "class count ~a.~%" (count-class *h*))
; (format t "item count: ~a.~%" (count-item-uniq *h*))
; (dotimes (i 5) (format t "item(~a) type count: ~a.~%" i (count-item-uniq *h* i)))
; (dotimes (i 5) (format t "class(~a) probability: ~a.~%" i (calc-p-class *h* i)))
; (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%" i "aa" (count-item *h* i "aa")))
; (dotimes (i 5) (format t "class(~a) item(~a) count: ~a.~%&q
补充:综合编程 , 其他综合 ,