From: kim Date: Tue, 26 May 2009 15:47:18 +0000 (+0000) Subject: Fixed bug in NextElement, improved caching X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=3445f7f08f15fe41e0d1bfaaabaacf60cdc10b61;p=SXSI%2Fxpathcomp.git Fixed bug in NextElement, improved caching git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@410 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index e231a45..9df73d6 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -261,7 +261,7 @@ extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ } extern "C" CAMLprim value caml_xml_tree_next_element(value tree, value id){ - return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); + return(Val_int (XMLTREE(tree)->NextElement(TREENODEVAL(id)))); } extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){ diff --git a/ata.ml b/ata.ml index 90458a3..7949921 100644 --- a/ata.ml +++ b/ata.ml @@ -518,107 +518,6 @@ END let string_of_ts tags = (Ptset.Int.fold (fun t a -> a ^ " " ^ (Tag.to_string t) ) tags "{")^ " }" -(* - module Algebra = - struct - type jump = [ `LONG | `CLOSE | `NIL ] - type t = jump*Ptset.Int.t - - let merge_jump (j1,l1) (j2,l2) = - match j1,j2 with - | _ when j1 = j2 -> (j1,Ptset.Int.union l1 l2) - | _,`NIL -> j1,l1 - | `NIL,_ -> j2,l2 - | _,_ -> (`CLOSE, Ptset.Int.union l1 l2) - - let merge_jump_list = function - | [] -> `NIL,Ptset.Int.empty - | p::r -> List.fold_left (merge_jump) p r - - let labels a s = - Hashtbl.fold - ( - fun q l acc -> - if (q == s) - then - - (List.fold_left - (fun acc (ts,f) -> - let _,_,_,bur = Transition.node f in - if bur then acc else TagSet.cup acc ts) - acc l) - else acc ) a.trans TagSet.empty - exception Found - - let is_rec a s access = - List.exists - (fun (_,t) -> let _,_,f,_ = Transition.node t in - StateSet.mem s (access f)) (Hashtbl.find a.trans s) - - - let decide a c_label l_label dir_states access = - - let l = StateSet.fold - (fun s l -> - let s_rec= is_rec a s access in - let tlabels,jmp = - if s_rec then l_label,`LONG - else c_label,`CLOSE in - let slabels = TagSet.positive ((TagSet.cap (labels a s) tlabels)) - in - (if Ptset.Int.is_empty slabels - then `NIL,Ptset.Int.empty - else jmp,slabels)::l) dir_states [] - in merge_jump_list l - - - - - - end - - - let choose_jump tagset qtags1 qtagsn a f_nil f_t1 f_s1 f_tn f_sn f_notext f_maytext = - let tags1,hastext1,fin1 = inter_text tagset (tags a qtags1) in - let tagsn,hastextn,finn = inter_text tagset (tags a qtagsn) in - (*if (hastext1||hastextn) then (`ANY,f_text) (* jumping to text nodes doesn't work really well *) - else*) - if (Ptset.Int.is_empty tags1) && (Ptset.Int.is_empty tagsn) then (`NIL,f_nil) - else if (Ptset.Int.is_empty tagsn) then - if (Ptset.Int.is_singleton tags1) - then (* TaggedChild/Sibling *) - let tag = (Ptset.Int.choose tags1) in (`TAG(tag),mk_app_fun f_t1 tag (Tag.to_string tag)) - else (* SelectChild/Sibling *) - (`ANY,mk_app_fun f_s1 tags1 (string_of_ts tags1)) - else if (Ptset.Int.is_empty tags1) then - if (Ptset.Int.is_singleton tagsn) - then (* TaggedDesc/Following *) - let tag = (Ptset.Int.choose tagsn) in (`TAG(tag),mk_app_fun f_tn tag (Tag.to_string tag)) - else (* SelectDesc/Following *) - (`ANY,mk_app_fun f_sn tagsn (string_of_ts tagsn)) - else if (hastext1||hastextn) then (`ANY,f_maytext) - else (`ANY,f_notext) - - let choose_jump_down tree a b c d = - choose_jump a b c d - (mk_fun (fun _ -> Tree.nil) "Tree.mk_nil") - (mk_fun (Tree.tagged_child tree) "Tree.tagged_child") - (mk_fun (Tree.select_child tree) "Tree.select_child") - (mk_fun (Tree.tagged_desc tree) "Tree.tagged_desc") - (mk_fun (Tree.select_desc tree) "Tree.select_desc") - (mk_fun (Tree.first_element tree) "Tree.first_element") - (mk_fun (Tree.first_child tree) "Tree.first_child") - - let choose_jump_next tree a b c d = - choose_jump a b c d - (mk_fun (fun _ _ -> Tree.nil) "Tree.mk_nil2") - (mk_fun (Tree.tagged_sibling_ctx tree) "Tree.tagged_sibling_ctx") - (mk_fun (Tree.select_sibling_ctx tree) "Tree.select_sibling_ctx") - (mk_fun (Tree.tagged_foll_ctx tree) "Tree.tagged_foll_ctx") - (mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx") - (mk_fun (Tree.next_element_ctx tree) "Tree.node_element_ctx") - (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") -*) module Algebra = struct type jump = [ `NIL | `ANY |`ANYNOTEXT | `JUMP ] @@ -752,24 +651,53 @@ END (mk_fun (Tree.next_element_ctx tree) "Tree.next_element_ctx") (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") - module SetTagKey = - struct - type t = Tag.t*SList.t - let equal (t1,s1) (t2,s2) = t1 == t2 && s1 == s2 - let hash (t,s) = HASHINT2(t,s.SList.Node.id) + + module SListTable = Hashtbl.Make(struct type t = SList.t + let equal = (==) + let hash t = t.SList.Node.id + end) + module TransCache = + struct + type 'a t = Obj.t array SListTable.t + let create n = SListTable.create n + let dummy = Obj.repr (fun _ -> assert false) + let find (h :'a t) tag slist : 'a = + let tab = + try + SListTable.find h slist + with + Not_found -> + SListTable.add h slist (Array.create 10000 dummy); + raise Not_found + in + let res = tab.(tag) in + if res == dummy then raise Not_found else (Obj.magic res) + + let add (h : 'a t) tag slist (data : 'a) = + let tab = + try + SListTable.find h slist + with + Not_found -> + let arr = Array.create 10000 dummy in + SListTable.add h slist arr; + arr + in + tab.(tag) <- (Obj.repr data) + + end - - module CachedTransTable = Hashtbl.Make(SetTagKey) - let td_trans = CachedTransTable.create 4093 - - + + let td_trans = TransCache.create 10000 (* should be number of tags *number of states^2 + in the document *) + let empty_size n = let rec loop acc = function 0 -> acc | n -> loop (SList.cons StateSet.empty acc) (n-1) in loop SList.nil n - module Fold2Res = Hashtbl.Make(struct + module Fold2ResOld = Hashtbl.Make(struct type t = Formlistlist.t*SList.t*SList.t let hash (f,s,t) = HASHINT3(f.Formlistlist.Node.id, s.SList.Node.id, @@ -777,6 +705,39 @@ END let equal (a,b,c) (d,e,f) = a==d && b == e && c == f end) + module FllTable = Hashtbl.Make (struct type t = Formlistlist.t + let equal = (==) + let hash t = t.Formlistlist.Node.id + end) + + module Fold2Res = + struct + type 'a t = 'a SListTable.t SListTable.t FllTable.t + + let create n = FllTable.create n + + let find hf fl s1 s2 = + let hs1 = FllTable.find hf fl in + let hs2 = SListTable.find hs1 s1 in + SListTable.find hs2 s2 + + let add hf fl s1 s2 data = + let hs1 = + try FllTable.find hf fl with + | Not_found -> + let hs1 = SListTable.create SMALL_H_SIZE + in FllTable.add hf fl hs1;hs1 + in + let hs2 = + try SListTable.find hs1 s1 + with + | Not_found -> + let hs2 = SListTable.create SMALL_H_SIZE + in SListTable.add hs1 s1 hs2;hs2 + in + SListTable.add hs2 s2 data + end + let h_fold2 = Fold2Res.create BIG_H_SIZE let top_down ?(noright=false) a tree t slist ctx slot_size = @@ -786,7 +747,7 @@ END let eval_fold2_slist fll t (sl2,res2) (sl1,res1) = let res = Array.copy rempty in try - let r,b,btab = Fold2Res.find h_fold2 (fll,sl1,sl2) in + let r,b,btab = Fold2Res.find h_fold2 fll sl1 sl2 in if b then for i=0 to slot_size - 1 do res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); done; @@ -809,7 +770,7 @@ END | _ -> aq,ab in let r,b = fold sl1 sl2 fll 0 SList.nil false in - Fold2Res.add h_fold2 (fll,sl1,sl2) (r,b,btab); + Fold2Res.add h_fold2 fll sl1 sl2 (r,b,btab); if b then for i=0 to slot_size - 1 do res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); done; @@ -817,16 +778,16 @@ END in let null_result = (pempty,Array.copy rempty) in - let rec loop t slist ctx= + let rec loop t slist ctx = if t == Tree.nil then null_result else get_trans t slist (Tree.tag tree t) ctx and loop_tag tag t slist ctx = if t == Tree.nil then null_result else get_trans t slist tag ctx and loop_no_right t slist ctx = if t == Tree.nil then null_result else get_trans ~noright:true t slist (Tree.tag tree t) ctx - and get_trans ?(noright=false) t slist tag ctx = + and get_trans ?(noright=false) t slist tag ctx = let cont = try - CachedTransTable.find td_trans (tag,slist) + TransCache.find td_trans tag slist with | Not_found -> let fl_list,llist,rlist,ca,da,sa,fa = @@ -875,63 +836,73 @@ END let cont = match f_kind,n_kind with | `NIL,`NIL -> - (fun t _ -> eval_fold2_slist fl_list t empty_res empty_res ) - | _,`NIL -> ( + (fun t _ -> eval_fold2_slist fl_list t empty_res empty_res) + | _,`NIL -> ( match f_kind with |`TAG(tag) -> (fun t _ -> eval_fold2_slist fl_list t empty_res - (loop_tag tag (first t) llist t)) - | `ANY -> - (fun t _ -> eval_fold2_slist fl_list t empty_res - (loop (first t) llist t)) - | _ -> assert false) - + (loop_tag tag (first t) llist t )) + | `ANY -> + (fun t _ -> eval_fold2_slist fl_list t empty_res + (loop (first t) llist t )) + | _ -> assert false) | `NIL,_ -> ( - match n_kind with - |`TAG(tag) -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx) empty_res) - - | `ANY -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) empty_res) - - | _ -> assert false) - - | `TAG(tag1),`TAG(tag2) -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop_tag tag2 (next t ctx) rlist ctx) - (loop_tag tag1 (first t) llist t)) - + match n_kind with + |`TAG(tag) -> + if SList.equal rlist slist then + let rec loop t ctx = + if t == Tree.nil then empty_res + else + let res2 = loop (next t ctx) ctx in + eval_fold2_slist fl_list t res2 empty_res + in loop + else + (fun t ctx -> eval_fold2_slist fl_list t + (loop_tag tag (next t ctx) rlist ctx ) empty_res) + + | `ANY -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx ) empty_res) + + | _ -> assert false) + + | `TAG(tag1),`TAG(tag2) -> + (fun t ctx -> + eval_fold2_slist fl_list t + (loop_tag tag2 (next t ctx) rlist ctx ) + (loop_tag tag1 (first t) llist t )) + | `TAG(tag),`ANY -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop_tag tag (first t) llist t)) + (fun t ctx -> + eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx ) + (loop_tag tag (first t) llist t )) + | `ANY,`TAG(tag) -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx) - (loop (first t) llist t) ) + (fun t ctx -> + eval_fold2_slist fl_list t + (loop_tag tag (next t ctx) rlist ctx ) + (loop (first t) llist t )) + | `ANY,`ANY -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop (first t) llist t) ) + (fun t ctx -> + eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx ) + (loop (first t) llist t )) | _ -> assert false - in - let cont = D_IF_( (fun t ctx -> + in + let cont = D_IF_( (fun t ctx -> let a,b = cont t ctx in register_trace tree t (slist,a,fl_list,first,next,ctx); - (a,b) + (a,b) ) ,cont) in - (CachedTransTable.add td_trans (tag,slist) cont;cont) - in cont t ctx - - in - (if noright then loop_no_right else loop) t slist ctx + (TransCache.add td_trans tag slist (Obj.repr cont) ;cont) + in (Obj.magic cont) t ctx + + in + (if noright then loop_no_right else loop) t slist ctx - let run_top_down a tree = let init = SList.cons a.init SList.nil in let _,res = top_down a tree Tree.root init Tree.root 1