From 645b7263119a1262cbb442a3166783ad372becef Mon Sep 17 00:00:00 2001 From: kim Date: Sun, 17 May 2009 07:41:16 +0000 Subject: [PATCH 01/16] fixed debugging code git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@394 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- ata.ml | 6 +++--- html_trace.ml | 47 ++++++++++++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/ata.ml b/ata.ml index fa44fd6..4ef9ccf 100644 --- a/ata.ml +++ b/ata.ml @@ -767,7 +767,7 @@ END in let cont = D_IF_( (fun t ctx -> let a,b = cont t ctx in - register_trace t (slist,a,fl_list,first,next,ctx); + register_trace tree t (slist,a,fl_list,first,next,ctx); (a,b) ) ,cont) in @@ -783,8 +783,8 @@ END let _,res = top_down a tree Tree.root init Tree.root 1 in D_IGNORE_( - output_trace a tree root "trace.html" - (RS.fold (fun t a -> IntSet.add (Tree.id t) a) res.(0) IntSet.empty), + output_trace a tree "trace.html" + (RS.fold (fun t a -> IntSet.add (Tree.id tree t) a) res.(0) IntSet.empty), res.(0)) ;; diff --git a/html_trace.ml b/html_trace.ml index c5ed980..333682c 100644 --- a/html_trace.ml +++ b/html_trace.ml @@ -155,11 +155,21 @@ let html_footer = " " let h_trace = Hashtbl.create 4096 -let register_trace t x = Hashtbl.add h_trace (Tree.id t) x -let h_fname = Hashtbl.create 401 +let r_trace = Hashtbl.create 4096 +let register_trace tree t x = + Hashtbl.add h_trace (Tree.id tree t) x -let register_funname f s = Hashtbl.add h_fname (Hashtbl.hash f) s -let get_funname f = try Hashtbl.find h_fname (Hashtbl.hash f) with _ -> "[anon_fun]" +module HFname = Hashtbl.Make (struct + type t = Obj.t + let hash = Hashtbl.hash + let equal = (==) + end) + +let h_fname = HFname.create 401 + +let register_funname f s = + HFname.add h_fname (Obj.repr f) s +let get_funname f = try HFname.find h_fname (Obj.repr f) with _ -> "[anon_fun]" let tag_to_str tag = let s = Tag.to_string tag in let num =ref 0 in @@ -185,7 +195,7 @@ let tag_to_str tag = ns -let output_trace a t file results = +let output_trace a tree file results = let h_auto = 6+ (Hashtbl.fold (fun _ l a -> (List.length l)+a) a.trans 0) in let max_tt = ref 0 in let outc = open_out file in @@ -195,29 +205,28 @@ let output_trace a t file results = let pr_out x = Format.fprintf outf x in let rec loop t = if not (Tree.is_nil t) then - let id = Tree.id t in - let tag = Tree.tag t in + let id = Tree.id tree t in + let tag = Tree.tag tree t in let tooltip,selected = try let (inconf,outconf,trans,first_fun,next_fun,ctx) = Hashtbl.find h_trace id in let selected = IntSet.mem id results in pr_str "
Subtree %i, tag='%s', internal node = %s\n" - id id (tag_to_str tag) (Tree.dump_node t); - + id id (tag_to_str tag) (Tree.dump_node t); pr_str "Context node is %i, tag='%s', internal node = '%s'\n" - (Tree.id ctx) (tag_to_str (Tree.tag ctx)) (Tree.dump_node ctx); + (Tree.id tree ctx) (tag_to_str (Tree.tag tree ctx)) (Tree.dump_node ctx); pr_str "%s" "\nEntered with configuration:\n"; SList.iter (fun s -> StateSet.print strf s) inconf; pr_str "%s" "\nLeft with configuration:\n"; SList.iter (fun s -> StateSet.print strf s) outconf; (let ft = first_fun t in pr_str "\nLeft successor is: id=%i, tag='%s', internal node = '%s'\n" - (Tree.id ft) (Tree.id ft) (Tree.id ft) (tag_to_str (Tree.tag ft)) (Tree.dump_node ft); + (Tree.id tree ft) (Tree.id tree ft) (Tree.id tree ft) (tag_to_str (Tree.tag tree ft)) (Tree.dump_node ft); pr_str "Moving with : %s (tree=%i)\n" (get_funname first_fun) id; ); (let nt = next_fun t ctx in pr_str "\nRight successor is: id=%i, tag='%s', internal node = '%s'\n" - (Tree.id nt) (Tree.id nt) (Tree.id nt) (tag_to_str (Tree.tag nt)) (Tree.dump_node nt); - pr_str "Moving with : %s (tree=%i) (ctx=%i)\n" (get_funname first_fun) id (Tree.id ctx); + (Tree.id tree nt) (Tree.id tree nt) (Tree.id tree nt) (tag_to_str (Tree.tag tree nt)) (Tree.dump_node nt); + pr_str "Moving with : %s (tree=%i) (ctx=%i)\n" (get_funname next_fun) id (Tree.id tree ctx); ); pr_str "%s" "\nTriggered transitions:\n"; pr_str "%s" ""; @@ -225,7 +234,7 @@ let output_trace a t file results = pr_str "%s" ""; max_tt := max !max_tt (Formlist.length fl); ) trans; - pr_str "%s" "
";Formlist.print strf fl;pr_str "
\n"; + pr_str "%s" "
\n"; pr_str "In result set : %s\n
" (if selected then "Yes" else "No"); Format.flush_str_formatter(),selected with @@ -235,23 +244,23 @@ let output_trace a t file results = (if tag == Tag.pcdata || tag== Tag.attribute_data then "_text" else"") in if tag == Tag.pcdata || tag== Tag.attribute_data then - pr_out "
%s%s
" div_class id (Tree.get_text t) tooltip + pr_out "
%s%s
" div_class id (Tree.get_text tree t) tooltip else begin - if (Tree.is_nil (Tree.first_child t)) + if (Tree.is_nil (Tree.first_child tree t)) then pr_out "
<%s/>%s
" div_class id id id (tag_to_str tag) tooltip else begin pr_out "
<%s>%s
" div_class id id id (tag_to_str tag) tooltip; - loop (Tree.first_child t); + loop (Tree.first_child tree t); if (tooltip="") then pr_out "
</%s>
" div_class (tag_to_str tag) else pr_out "
</%s>
" id id div_class (tag_to_str tag); end; end; - loop (Tree.next_sibling t); + loop (Tree.next_sibling tree t); in let max_tt = 25*(!max_tt + 15)+20 in let height = max max_tt (25*h_auto) in @@ -261,7 +270,7 @@ let output_trace a t file results = dump outf a; pr_out "%s" "
"; pr_out "%s" "
"; - loop t; + loop (Tree.root); pr_out "%s" html_footer; pr_out "%!"; close_out outc -- 2.17.1 From f583e10adcefdebc0a682980c8d73454eedefecf Mon Sep 17 00:00:00 2001 From: kim Date: Sun, 17 May 2009 08:17:05 +0000 Subject: [PATCH 02/16] Fixed uneeded free in get cached text git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@396 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 2 -- ata.ml | 2 +- ata.mli | 2 +- main.ml | 4 ++-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 0d0a24c..e231a45 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -150,7 +150,6 @@ extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){ CAMLlocal1(str); uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id)); str = caml_copy_string((const char*)txt); - delete (txt); CAMLreturn (str); } @@ -159,7 +158,6 @@ extern "C" CAMLprim value caml_text_collection_get_cached_text(value tree, value CAMLlocal1(str); char* txt = (char*) XMLTREE(tree)->GetText((DocID) Int_val(id)); str = caml_copy_string(txt); - free(txt); CAMLreturn (str); } diff --git a/ata.ml b/ata.ml index 4ef9ccf..043d3d2 100644 --- a/ata.ml +++ b/ata.ml @@ -1009,7 +1009,7 @@ END end let top_down_count a t = let module RI = Run(Integer) in Integer.length (RI.run_top_down a t) - let top_down a t = let module RI = Run(GResult) in (RI.run_top_down a t) + let top_down a t = let module RI = Run(IdSet) in (RI.run_top_down a t) let bottom_up_count a t k = let module RI = Run(Integer) in Integer.length (RI.run_bottom_up a t k) diff --git a/ata.mli b/ata.mli index e8d64b1..fe328ad 100644 --- a/ata.mli +++ b/ata.mli @@ -99,6 +99,6 @@ module IdSet : ResultSet module GResult : ResultSet val top_down_count : 'a t -> Tree.t -> int -val top_down : 'a t -> Tree.t -> GResult.t +val top_down : 'a t -> Tree.t -> IdSet.t val bottom_up_count : 'a t -> Tree.t -> [> `CONTAINS of 'b | `TAG of Tag.t ] -> int diff --git a/main.ml b/main.ml index 7418fa1..2f12d19 100644 --- a/main.ml +++ b/main.ml @@ -94,7 +94,7 @@ let main v query_string output = in () else let result = time (top_down auto) v in - let rcount = GResult.length result in + let rcount = IdSet.length result in Printf.eprintf "Number of nodes in the result set : %i\n" rcount; Printf.eprintf "\n%!"; begin @@ -105,7 +105,7 @@ let main v query_string output = time( fun () -> let oc = open_out f in output_string oc "\n"; - GResult.iter (fun t -> + IdSet.iter (fun t -> Tree.print_xml_fast oc v t; output_char oc '\n'; -- 2.17.1 From c5f06d325240c808a9be4d71e20fc01969420bb3 Mon Sep 17 00:00:00 2001 From: kim Date: Mon, 18 May 2009 15:54:27 +0000 Subject: [PATCH 03/16] Expose the internal structure of Hconsed value git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@397 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- ata.ml | 44 +++++++++++++++++++-------------------- hcons.ml | 42 +++++++++++++++++++++++-------------- hcons.mli | 15 +++++++++++++- hlist.ml | 62 ++++++++++++++++++++++++++++--------------------------- hlist.mli | 14 +++++++++++-- ptset.ml | 2 +- 6 files changed, 107 insertions(+), 72 deletions(-) diff --git a/ata.ml b/ata.ml index 043d3d2..7a5a64d 100644 --- a/ata.ml +++ b/ata.ml @@ -220,13 +220,10 @@ end module TransTable = Hashtbl module Formlist = struct - include Hlist.Make(Transition) - type data = t node - let make _ = failwith "make" + include Hlist.Make(Transition) let print ppf fl = iter (fun t -> Transition.print ppf t; Format.pp_print_newline ppf ()) fl end - type 'a t = { id : int; @@ -495,11 +492,7 @@ let tags_of_state a q = module Run (RS : ResultSet) = struct - module SList = struct - include Hlist.Make (StateSet) - type data = t node - let make _ = failwith "make" - end + module SList = Hlist.Make (StateSet) @@ -644,32 +637,34 @@ END else RS.concat res1 res2 else RS.empty - let top_down ?(noright=false) a tree t slist ctx slot_size = let pempty = empty_size slot_size in - (* evaluation starts from the right so we put sl1,res1 at the end *) + (* evaluation starts from the right so we put sl1,res1 at the end *) let eval_fold2_slist fll t (sl2,res2) (sl1,res1) = let res = Array.copy res1 in let rec fold l1 l2 fll i aq = - match SList.node l1,SList.node l2, fll with - | SList.Cons(s1,ll1), - SList.Cons(s2,ll2), - fl::fll -> - let r',flags = eval_formlist s1 s2 fl in - let _ = res.(i) <- RS.merge flags t res1.(i) res2.(i) - in + match fll with + [fl] -> (* inline for speed *) + let s1 = SList.hd l1 + and s2 = SList.hd l2 in + let r',flags = eval_formlist s1 s2 fl in + let _ = res.(i) <- RS.merge flags t res1.(i) res2.(i) in + (SList.cons r' aq),res + | fl::fll -> + let SList.Cons(s1,ll1) = l1.SList.Node.node + and SList.Cons(s2,ll2) = l2.SList.Node.node in + let r',flags = eval_formlist s1 s2 fl in + let _ = res.(i) <- RS.merge flags t res1.(i) res2.(i) + in fold ll1 ll2 fll (i+1) (SList.cons r' aq) - - | SList.Nil, SList.Nil,[] -> aq,res - | _ -> assert false + | _ -> aq,res in - fold sl1 sl2 fll 0 SList.nil + fold sl1 sl2 fll 0 SList.nil in let null_result() = (pempty,Array.make slot_size RS.empty) in let rec loop t slist ctx = if t == Tree.nil then null_result() else get_trans t slist (Tree.tag tree t) ctx - and loop_tag tag t slist ctx = if t == Tree.nil then null_result() else get_trans t slist tag ctx and loop_no_right t slist ctx = @@ -713,6 +708,9 @@ END in (* Logic to chose the first and next function *) let _,tags_below,_,tags_after = Tree.tags tree tag in +(* let _ = Printf.eprintf "Tags below %s are : \n" (Tag.to_string tag) in + let _ = Ptset.Int.iter (fun i -> Printf.eprintf "%s " (Tag.to_string i)) tags_below in + let _ = Printf.eprintf "\n%!" in *) let f_kind,first = choose_jump_down tree tags_below ca da a and n_kind,next = if noright then (`NIL, fun _ _ -> Tree.nil ) else choose_jump_next tree tags_after sa fa a in diff --git a/hcons.ml b/hcons.ml index eb60385..35bc942 100644 --- a/hcons.ml +++ b/hcons.ml @@ -1,22 +1,34 @@ INCLUDE "utils.ml" -module type S = -sig - type data - type t - val make : data -> t - val node : t -> data - val hash : t -> int - val uid : t -> int - val equal : t -> t -> bool -end +module type SA = + sig + type data + type t + val make : data -> t + val node : t -> data + val hash : t -> int + val uid : t -> int + val equal : t -> t -> bool + end + +module type S = + sig + type data + type t = private { id : int; + key : int; + node : data } + val make : data -> t + val node : t -> data + val hash : t -> int + val uid : t -> int + val equal : t -> t -> bool + end + module Make (H : Hashtbl.HashedType) : S with type data = H.t = struct type data = H.t - type t = { id : int; - key : int; - node : data; - } - + type t = { id : int; + key : int; + node : data } let node t = t.node let hash t = t.key let uid t = t.id diff --git a/hcons.mli b/hcons.mli index 49af121..dcdcb69 100644 --- a/hcons.mli +++ b/hcons.mli @@ -1,4 +1,4 @@ -module type S = +module type SA = sig type data type t @@ -9,4 +9,17 @@ module type S = val equal : t -> t -> bool end +module type S = + sig + type data + type t = private { id : int; + key : int; + node : data } + val make : data -> t + val node : t -> data + val hash : t -> int + val uid : t -> int + val equal : t -> t -> bool + end + module Make (H : Hashtbl.HashedType) : S with type data = H.t diff --git a/hlist.ml b/hlist.ml index 4b83668..e5a4aa5 100644 --- a/hlist.ml +++ b/hlist.ml @@ -2,9 +2,19 @@ INCLUDE "utils.ml" module type S = sig type elt type 'a node = Nil | Cons of elt * 'a - type t + + module rec Node : + sig + include Hcons.S with type data = Data.t + end + and Data : sig + include Hashtbl.HashedType with type t = Node.t node + end + type data = Data.t + type t = Node.t val hash : t -> int val uid : t -> int + val make : data -> t val equal : t -> t -> bool val nil : t val node : t -> t node @@ -19,59 +29,51 @@ module type S = sig val length : t -> int end -module Make ( H : Hcons.S ) : S with type elt = H.t = +module Make ( H : Hcons.SA ) : S with type elt = H.t = struct type elt = H.t type 'a node = Nil | Cons of elt * 'a - module rec HNode : Hcons.S with type data = Node.t = Hcons.Make (Node) - and Node : Hashtbl.HashedType with type t = HNode.t node = + module rec Node : Hcons.S with type data = Data.t = Hcons.Make (Data) + and Data : Hashtbl.HashedType with type t = Node.t node = struct - type t = HNode.t node + type t = Node.t node let equal x y = match x,y with - | Nil,Nil -> true - | Cons (a,aa), Cons(b,bb) -> (H.equal a b) && (HNode.equal aa bb) + | _,_ when x==y -> true + | Cons (a,aa), Cons(b,bb) -> (aa==bb) && (H.equal a b) | _ -> false let hash = function | Nil -> 0 - | Cons(a,aa) -> HASHINT3(PRIME3,H.uid a, HNode.uid aa) + | Cons(a,aa) -> HASHINT3(PRIME3,H.uid a, aa.Node.id) end - ;; - - type t = HNode.t - let node = HNode.node - let hash = HNode.hash - let equal = HNode.equal - let uid = HNode.uid - let nil = HNode.make Nil - let cons a b = HNode.make (Cons(a,b)) - let hd a = - match HNode.node a with - | Nil -> failwith "hd" - | Cons(a,_) -> a - - let tl a = - match HNode.node a with - | Nil -> failwith "tl" - | Cons(_,a) -> a - + type data = Data.t + type t = Node.t + let make = Node.make + let node = Node.node + let hash = Node.hash + let equal = Node.equal + let uid = Node.uid + let nil = Node.make Nil + let cons a b = Node.make (Cons(a,b)) + let hd = function { Node.node = Cons(a,_) } -> a | _ -> failwith "hd" + let tl = function { Node.node = Cons(_,a) } -> a | _ -> failwith "tl" let fold f l acc = - let rec loop acc l = match HNode.node l with + let rec loop acc l = match l.Node.node with | Nil -> acc | Cons(a,aa) -> loop (f a acc) aa in loop acc l let map f l = - let rec loop l = match HNode.node l with + let rec loop l = match l.Node.node with | Nil -> nil | Cons(a,aa) -> cons (f a) (loop aa) in loop l let iter f l = - let rec loop l = match HNode.node l with + let rec loop l = match l.Node.node with | Nil -> () | Cons(a,aa) -> (f a);(loop aa) in diff --git a/hlist.mli b/hlist.mli index 7796833..7210250 100644 --- a/hlist.mli +++ b/hlist.mli @@ -1,9 +1,19 @@ module type S = sig type elt type 'a node = Nil | Cons of elt * 'a - type t + + module rec Node : + sig + include Hcons.S with type data = Data.t + end + and Data : sig + include Hashtbl.HashedType with type t = Node.t node + end + type data = Data.t + type t = Node.t val hash : t -> int val uid : t -> int + val make : data -> t val equal : t -> t -> bool val nil : t val node : t -> t node @@ -18,4 +28,4 @@ module type S = sig val length : t -> int end -module Make (H : Hcons.S) : S with type elt = H.t +module Make (H : Hcons.SA) : S with type elt = H.t diff --git a/ptset.ml b/ptset.ml index 4fc92d6..584ea0a 100644 --- a/ptset.ml +++ b/ptset.ml @@ -21,7 +21,7 @@ sig val node : t -> data end -module Make ( H : Hcons.S ) : S with type elt = H.t = +module Make ( H : Hcons.SA ) : S with type elt = H.t = struct type elt = H.t type 'a node = -- 2.17.1 From 83b4813de7204842bb59d5cb0aec71aff633ca85 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 20 May 2009 06:38:43 +0000 Subject: [PATCH 04/16] Fixed caching bugs in ata.ml removed debugging stuff in print_xml_fast (tree.ml) git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@406 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- Makefile | 6 +- ata.ml | 285 +++++++++++++++++++++++++++++-------------------------- hlist.ml | 6 +- main.ml | 97 ++++++++++++------- tree.ml | 4 - 5 files changed, 220 insertions(+), 178 deletions(-) diff --git a/Makefile b/Makefile index 7dfc4d0..154c166 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -INLINE=1000 +INLINE=10000 DEBUG=false PROFILE=false VERBOSE=false @@ -52,7 +52,7 @@ CXX = g++ endif ifeq ($(PROFILE), true) -PROFILE_FLAGS = -p +PROFILE_FLAGS = -p -S SYNT_PROF = -ppopt -DPROFILE endif SYNT_FLAGS = $(SYNT_DEBUG) $(SYNT_PROF) @@ -113,7 +113,7 @@ libcamlshredder.a: $(CXXOBJECTS) XMLTree/XMLTree.a clean: @echo [CLEAN] - $(HIDE) rm -f *~ *.cm* *.[oa] *.so main + $(HIDE) rm -f *~ *.cm* *.[oa] *.so main *.s $(HIDE) rm -rf .libs diff --git a/ata.ml b/ata.ml index 7a5a64d..42bf24e 100644 --- a/ata.ml +++ b/ata.ml @@ -34,6 +34,7 @@ struct | Or of 'hcons * 'hcons | And of 'hcons * 'hcons | Atom of ([ `Left | `Right | `LLeft | `RRight ]*bool*State.t) + type 'hcons node = { pos : 'hcons expr; mutable neg : 'hcons; @@ -42,34 +43,33 @@ struct } external hash_const_variant : [> ] -> int = "%identity" - module rec HNode : Hcons.S with type data = Node.t = Hcons.Make (Node) - and Node : Hashtbl.HashedType with type t = HNode.t node = + module rec Node : Hcons.S with type data = Data.t = Hcons.Make (Data) + and Data : Hashtbl.HashedType with type t = Node.t node = struct - type t = HNode.t node + type t = Node.t node let equal x y = x.size == y.size && match x.pos,y.pos with - | False,False - | True,True -> true - | Or(xf1,xf2),Or(yf1,yf2) - | And(xf1,xf2),And(yf1,yf2) -> (HNode.equal xf1 yf1) && (HNode.equal xf2 yf2) - | Atom(d1,p1,s1), Atom(d2,p2,s2) -> d1 == d2 && (p1==p2) && s1 == s2 - | _ -> false + | a,b when a == b -> true + | Or(xf1,xf2),Or(yf1,yf2) + | And(xf1,xf2),And(yf1,yf2) -> (xf1 == yf1) && (xf2 == yf2) + | Atom(d1,p1,s1), Atom(d2,p2,s2) -> d1 == d2 && (p1==p2) && s1 == s2 + | _ -> false let hash f = match f.pos with | False -> 0 | True -> 1 - | Or (f1,f2) -> HASHINT3(PRIME2,HNode.uid f1,HNode.uid f2) - | And (f1,f2) -> HASHINT3(PRIME3,HNode.uid f1,HNode.uid f2) + | Or (f1,f2) -> HASHINT3(PRIME2,f1.Node.id, f2.Node.id) + | And (f1,f2) -> HASHINT3(PRIME3,f1.Node.id,f2.Node.id) | Atom(d,p,s) -> HASHINT4(PRIME4,hash_const_variant d,vb p,s) end - type t = HNode.t - let hash = HNode.hash - let uid = HNode.uid - let equal = HNode.equal - let expr f = (HNode.node f).pos - let st f = (HNode.node f ).st - let size f = (HNode.node f).size + type t = Node.t + let hash x = x.Node.key + let uid x = x.Node.id + let equal = Node.equal + let expr f = f.Node.node.pos + let st f = f.Node.node.st + let size f = f.Node.node.size let prio f = match expr f with @@ -108,10 +108,10 @@ struct let cons pos neg s1 s2 size1 size2 = - let nnode = HNode.make { pos = neg; neg = (Obj.magic 0); st = s2; size = size2 } in - let pnode = HNode.make { pos = pos; neg = nnode ; st = s1; size = size1 } + let nnode = Node.make { pos = neg; neg = (Obj.magic 0); st = s2; size = size2 } in + let pnode = Node.make { pos = pos; neg = nnode ; st = s1; size = size1 } in - (HNode.node nnode).neg <- pnode; (* works because the neg field isn't taken into + (Node.node nnode).neg <- pnode; (* works because the neg field isn't taken into account for hashing ! *) pnode,nnode @@ -127,7 +127,7 @@ struct | `RRight -> empty_triple,(StateSet.empty,si,si) in fst (cons (Atom(d,p,s)) (Atom(d,not p,s)) ss ss 1 1) - let not_ f = (HNode.node f).neg + let not_ f = f.Node.node.neg let union_hex ((l1,ll1,lll1),(r1,rr1,rrr1)) ((l2,ll2,lll2),(r2,rr2,rrr2)) = (StateSet.mem_union l1 l2 ,StateSet.mem_union ll1 ll2,StateSet.mem_union lll1 lll2), (StateSet.mem_union r1 r2 ,StateSet.mem_union rr1 rr2,StateSet.mem_union rrr1 rrr2) @@ -224,6 +224,13 @@ module Formlist = struct let print ppf fl = iter (fun t -> Transition.print ppf t; Format.pp_print_newline ppf ()) fl end + +module Formlistlist = +struct + include Hlist.Make(Formlist) + let print ppf fll = + iter (fun fl -> Formlist.print ppf fl; Format.pp_print_newline ppf ())fll +end type 'a t = { id : int; @@ -569,7 +576,6 @@ END end - let choose_jump tagset qtags1 qtagsn a f_nil f_t1 f_s1 f_tn f_sn f_notext f_maytext = let tags1,hastext1,fin1 = inter_text tagset (tags a qtags1) in let tagsn,hastextn,finn = inter_text tagset (tags a qtagsn) in @@ -591,7 +597,7 @@ END else if (hastext1||hastextn) then (`ANY,f_maytext) else (`ANY,f_notext) - let choose_jump_down tree a b c d = + let choose_jump_down tree a b c d = choose_jump a b c d (mk_fun (fun _ -> Tree.nil) "Tree.mk_nil") (mk_fun (Tree.tagged_child tree) "Tree.tagged_child") @@ -610,65 +616,78 @@ END (mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx") (mk_fun (Tree.next_element_ctx tree) "Tree.node_element_ctx") (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") - - - module SetTagKey = - struct - type t = Tag.t*SList.t - let equal (t1,s1) (t2,s2) = t1 == t2 && s1 == s2 - let hash (t,s) = HASHINT2(t,SList.uid s) - end - - module CachedTransTable = Hashtbl.Make(SetTagKey) - let td_trans = CachedTransTable.create 4093 - - + + module SetTagKey = + struct + type t = Tag.t*SList.t + let equal (t1,s1) (t2,s2) = t1 == t2 && s1 == s2 + let hash (t,s) = HASHINT2(t,s.SList.Node.id) + end + + module CachedTransTable = Hashtbl.Make(SetTagKey) + let td_trans = CachedTransTable.create 4093 + + let empty_size n = let rec loop acc = function 0 -> acc | n -> loop (SList.cons StateSet.empty acc) (n-1) in loop SList.nil n - let merge rb rb1 rb2 mark t res1 res2 = - if rb then - let res1 = if rb1 then res1 else RS.empty - and res2 = if rb2 then res2 else RS.empty - in - if mark then RS.cons t (RS.concat res1 res2) - else RS.concat res1 res2 - else RS.empty + + module Fold2Res = Hashtbl.Make(struct + type t = Formlistlist.t*SList.t*SList.t + let hash (f,s,t) = HASHINT3(f.Formlistlist.Node.id, + s.SList.Node.id, + t.SList.Node.id) + let equal (a,b,c) (d,e,f) = a==d && b == e && c == f + end) + + let h_fold2 = Fold2Res.create BIG_H_SIZE let top_down ?(noright=false) a tree t slist ctx slot_size = let pempty = empty_size slot_size in + let rempty = Array.make slot_size RS.empty in (* evaluation starts from the right so we put sl1,res1 at the end *) let eval_fold2_slist fll t (sl2,res2) (sl1,res1) = - let res = Array.copy res1 in - let rec fold l1 l2 fll i aq = - match fll with - [fl] -> (* inline for speed *) - let s1 = SList.hd l1 - and s2 = SList.hd l2 in - let r',flags = eval_formlist s1 s2 fl in - let _ = res.(i) <- RS.merge flags t res1.(i) res2.(i) in - (SList.cons r' aq),res - | fl::fll -> - let SList.Cons(s1,ll1) = l1.SList.Node.node - and SList.Cons(s2,ll2) = l2.SList.Node.node in - let r',flags = eval_formlist s1 s2 fl in - let _ = res.(i) <- RS.merge flags t res1.(i) res2.(i) - in - fold ll1 ll2 fll (i+1) (SList.cons r' aq) - | _ -> aq,res - in - fold sl1 sl2 fll 0 SList.nil + let res = Array.copy rempty in + try + let r,b,btab = Fold2Res.find h_fold2 (fll,sl1,sl2) in + if b then for i=0 to slot_size - 1 do + res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); + done; + r,res + with + Not_found -> + let btab = Array.make slot_size (false,false,false,false) in + let rec fold l1 l2 fll i aq ab = + match fll.Formlistlist.Node.node, + l1.SList.Node.node, + l2.SList.Node.node + with + | Formlistlist.Cons(fl,fll), + SList.Cons(s1,ll1), + SList.Cons(s2,ll2) -> + let r',((b,_,_,_) as flags) = eval_formlist s1 s2 fl in + let _ = btab.(i) <- flags + in + fold ll1 ll2 fll (i+1) (SList.cons r' aq) (b||ab) + | _ -> aq,ab + in + let r,b = fold sl1 sl2 fll 0 SList.nil false in + Fold2Res.add h_fold2 (fll,sl1,sl2) (r,b,btab); + if b then for i=0 to slot_size - 1 do + res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); + done; + r,res in - let null_result() = (pempty,Array.make slot_size RS.empty) in - let rec loop t slist ctx = - if t == Tree.nil then null_result() else get_trans t slist (Tree.tag tree t) ctx + let null_result = (pempty,Array.copy rempty) in + let rec loop t slist ctx= + if t == Tree.nil then null_result else get_trans t slist (Tree.tag tree t) ctx and loop_tag tag t slist ctx = - if t == Tree.nil then null_result() else get_trans t slist tag ctx + if t == Tree.nil then null_result else get_trans t slist tag ctx and loop_no_right t slist ctx = - if t == Tree.nil then null_result() else get_trans ~noright:true t slist (Tree.tag tree t) ctx + if t == Tree.nil then null_result else get_trans ~noright:true t slist (Tree.tag tree t) ctx and get_trans ?(noright=false) t slist tag ctx = let cont = try @@ -703,79 +722,79 @@ END ) ) set (Formlist.nil,StateSet.empty,StateSet.empty,ca,da,sa,fa) - in fl::fll_acc, (SList.cons ll lllacc), (SList.cons rr rllacc),ca,da,sa,fa) - slist ([],SList.nil,SList.nil,StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty) + in (Formlistlist.cons fl fll_acc), (SList.cons ll lllacc), (SList.cons rr rllacc),ca,da,sa,fa) + slist (Formlistlist.nil,SList.nil,SList.nil,StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty) in (* Logic to chose the first and next function *) let _,tags_below,_,tags_after = Tree.tags tree tag in (* let _ = Printf.eprintf "Tags below %s are : \n" (Tag.to_string tag) in let _ = Ptset.Int.iter (fun i -> Printf.eprintf "%s " (Tag.to_string i)) tags_below in let _ = Printf.eprintf "\n%!" in *) + let tags_below = Ptset.Int.remove tag tags_below in let f_kind,first = choose_jump_down tree tags_below ca da a and n_kind,next = if noright then (`NIL, fun _ _ -> Tree.nil ) else choose_jump_next tree tags_after sa fa a in - let empty_res = null_result() in - let cont = - match f_kind,n_kind with - | `NIL,`NIL -> - (fun _ _ -> eval_fold2_slist fl_list t empty_res empty_res ) - | _,`NIL -> ( - match f_kind with - |`TAG(tag) -> - (fun t _ -> eval_fold2_slist fl_list t empty_res - (loop_tag tag (first t) llist t)) - | `ANY -> - (fun t _ -> eval_fold2_slist fl_list t empty_res - (loop (first t) llist t)) - | _ -> assert false) - - | `NIL,_ -> ( - match n_kind with - |`TAG(tag) -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx) empty_res) - - | `ANY -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) empty_res) - - | _ -> assert false) - - | `TAG(tag1),`TAG(tag2) -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop (first t) llist t)) - - | `TAG(tag),`ANY -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop_tag tag (first t) llist t)) - | `ANY,`TAG(tag) -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx) - (loop (first t) llist t) ) - | `ANY,`ANY -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop (first t) llist t) ) - | _ -> assert false - in - let cont = D_IF_( (fun t ctx -> - let a,b = cont t ctx in - register_trace tree t (slist,a,fl_list,first,next,ctx); - (a,b) - ) ,cont) - in - (CachedTransTable.add td_trans (tag,slist) cont;cont) - in cont t ctx - - in - (if noright then loop_no_right else loop) t slist ctx - - + let empty_res = null_result in + let cont = + match f_kind,n_kind with + | `NIL,`NIL -> + (fun t _ -> eval_fold2_slist fl_list t empty_res empty_res ) + | _,`NIL -> ( + match f_kind with + |`TAG(tag) -> + (fun t _ -> eval_fold2_slist fl_list t empty_res + (loop_tag tag (first t) llist t)) + | `ANY -> + (fun t _ -> eval_fold2_slist fl_list t empty_res + (loop (first t) llist t)) + | _ -> assert false) + + | `NIL,_ -> ( + match n_kind with + |`TAG(tag) -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop_tag tag (next t ctx) rlist ctx) empty_res) + + | `ANY -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx) empty_res) + + | _ -> assert false) + + | `TAG(tag1),`TAG(tag2) -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop_tag tag2 (next t ctx) rlist ctx) + (loop_tag tag1 (first t) llist t)) + + | `TAG(tag),`ANY -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx) + (loop_tag tag (first t) llist t)) + | `ANY,`TAG(tag) -> + (fun t ctx -> + eval_fold2_slist fl_list t + (loop_tag tag (next t ctx) rlist ctx) + (loop (first t) llist t) ) + | `ANY,`ANY -> + (fun t ctx -> + eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx) + (loop (first t) llist t) ) + | _ -> assert false + in + let cont = D_IF_( (fun t ctx -> + let a,b = cont t ctx in + register_trace tree t (slist,a,fl_list,first,next,ctx); + (a,b) + ) ,cont) + in + (CachedTransTable.add td_trans (tag,slist) cont;cont) + in cont t ctx + + in + (if noright then loop_no_right else loop) t slist ctx + + let run_top_down a tree = let init = SList.cons a.init SList.nil in let _,res = top_down a tree Tree.root init Tree.root 1 diff --git a/hlist.ml b/hlist.ml index e5a4aa5..5509871 100644 --- a/hlist.ml +++ b/hlist.ml @@ -49,10 +49,10 @@ struct type data = Data.t type t = Node.t let make = Node.make - let node = Node.node - let hash = Node.hash + let node x = x.Node.node + let hash x = x.Node.key let equal = Node.equal - let uid = Node.uid + let uid x= x.Node.id let nil = Node.make Nil let cons a b = Node.make (Cons(a,b)) let hd = function { Node.node = Cons(a,_) } -> a | _ -> failwith "hd" diff --git a/main.ml b/main.ml index 2f12d19..ee14b34 100644 --- a/main.ml +++ b/main.ml @@ -13,13 +13,34 @@ let enabled_gc = Gc.get() let disabled_gc = { Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 } - - - - +let hash x = 131*x/(x-1+1) +let test_loop tree tag = + let t' = Tree.tagged_desc tree tag Tree.root in + let f = Hashtbl.create 4096 + in + let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in + let g t ctx = + if t == Tree.nil then 0 + else 1+ ((Hashtbl.find f (hash 101)) (jump t ctx) ctx) + in + Hashtbl.add f (hash 101) g; + (Hashtbl.find f (hash 101)) t' Tree.root +let test_loop2 tree tag = + let t' = Tree.tagged_desc tree tag Tree.root in + let f = Hashtbl.create 4096 + in + let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in + let rec g t ctx = + if t == Tree.nil then 0 + else 1+ (match (Hashtbl.find f (hash 101)) with + `Foo ->g (jump t ctx) ctx + ) + in + Hashtbl.add f (hash 101) `Foo; + g t' Tree.root let main v query_string output = - + let _ = Tag.init (Tree.tag_pool v) in Printf.eprintf "Parsing query : "; let query = try @@ -28,36 +49,42 @@ let main v query_string output = with Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in - XPath.Ast.print Format.err_formatter query; - Format.fprintf Format.err_formatter "\n%!"; - Printf.eprintf "Compiling query : "; - let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in - let _ = Ata.dump Format.err_formatter auto in - let _ = Printf.eprintf "%!" in - let jump_to = - match contains with - None -> (max_int,`NOTHING) - | Some s -> - let r = Tree.count v s - in - Printf.eprintf "%i documents in the TextCollection\n" (Tree.text_size v); - Printf.eprintf "Global count is %i, using " r; - if r < !Options.tc_threshold then begin - Printf.eprintf "TextCollection contains\nCalling global contains : "; - time (Tree.init_contains v) s; - end - else begin - Printf.eprintf "Naive contains\nCalling global contains : "; - time (Tree.init_naive_contains v) s - end;(r,`CONTAINS(s)) - in - let test_list = jump_to in - (* + let _ = Printf.eprintf "Timing //keyword :" in + let r = time (test_loop v) (Tag.tag "keyword") in + let _ = Printf.eprintf "Count is %i\n%!" r in + let _ = Printf.eprintf "Timing //keyword 2:" in + let r = time (test_loop2 v) (Tag.tag "keyword") in + let _ = Printf.eprintf "Count is %i\n%!" r in + XPath.Ast.print Format.err_formatter query; + Format.fprintf Format.err_formatter "\n%!"; + Printf.eprintf "Compiling query : "; + let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in + let _ = Ata.dump Format.err_formatter auto in + let _ = Printf.eprintf "%!" in + let jump_to = + match contains with + None -> (max_int,`NOTHING) + | Some s -> + let r = Tree.count v s + in + Printf.eprintf "%i documents in the TextCollection\n" (Tree.text_size v); + Printf.eprintf "Global count is %i, using " r; + if r < !Options.tc_threshold then begin + Printf.eprintf "TextCollection contains\nCalling global contains : "; + time (Tree.init_contains v) s; + end + else begin + Printf.eprintf "Naive contains\nCalling global contains : "; + time (Tree.init_naive_contains v) s + end;(r,`CONTAINS(s)) + in + let test_list = jump_to in + (* let test_list = - if (!Options.backward) then begin - Printf.eprintf "Finding min occurences : "; - time - ( List.fold_left (fun ((min_occ,kind)as acc) (tag,_) -> + if (!Options.backward) then begin + Printf.eprintf "Finding min occurences : "; + time + ( List.fold_left (fun ((min_occ,kind)as acc) (tag,_) -> let numtags = Tree.subtree_tags v tag Tree.root in if ((numtags < min_occ) && numtags >= 2) then (numtags,`TAG(tag)) @@ -78,7 +105,7 @@ let main v query_string output = begin let _ = Gc.full_major();Gc.compact() in let _ = Printf.eprintf "%!" in - (* let _ = Gc.set (disabled_gc) in *) + let _ = Gc.set (disabled_gc) in if !Options.backward && ((snd test_list) != `NOTHING )then let r = time (bottom_up_count auto v )(snd test_list) in diff --git a/tree.ml b/tree.ml index 24fca56..a1ddcac 100644 --- a/tree.ml +++ b/tree.ml @@ -481,8 +481,6 @@ let array_find a i j = then begin let tid = tree_my_text tree.doc t in - let _ = Printf.eprintf "my_text %i returned %i\n%!" (inode t) (inode tid) - in output_string outc (text_get_cached_text tree.doc tid); if print_right then loop (next_sibling tree t); @@ -525,8 +523,6 @@ let array_find a i j = let attname = String.sub s 3 ((String.length s) -3) in let fsa = first_child tree a in let tid = tree_my_text tree.doc fsa in - let _ = Printf.eprintf "my_text %i returned %i\n%!" (inode fsa) (inode tid) - in output_char outc ' '; output_string outc attname; output_string outc "=\""; -- 2.17.1 From 9728f46b5f256250a4451c0a9bda30ce81be5b8b Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 20 May 2009 06:38:56 +0000 Subject: [PATCH 05/16] Added correct decision procedure git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@407 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- ata.ml | 147 +++++++++++++++++++++++++++++++++++++++++++-- finiteCofinite.ml | 4 ++ finiteCofinite.mli | 2 + 3 files changed, 148 insertions(+), 5 deletions(-) diff --git a/ata.ml b/ata.ml index 42bf24e..eb4f394 100644 --- a/ata.ml +++ b/ata.ml @@ -512,11 +512,13 @@ END let mk_fun f s = D_IGNORE_(register_funname f s,f) let mk_app_fun f arg s = let g = f arg in D_IGNORE_(register_funname g ((get_funname f) ^ " " ^ s), g) + let mk_app_fun2 f arg1 arg2 s = let g = f arg1 arg2 in + D_IGNORE_(register_funname g ((get_funname f) ^ " " ^ s), g) let string_of_ts tags = (Ptset.Int.fold (fun t a -> a ^ " " ^ (Tag.to_string t) ) tags "{")^ " }" - +(* module Algebra = struct type jump = [ `LONG | `CLOSE | `NIL ] @@ -616,6 +618,139 @@ END (mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx") (mk_fun (Tree.next_element_ctx tree) "Tree.node_element_ctx") (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") +*) + module Algebra = + struct + type jump = [ `NIL | `ANY |`ANYNOTEXT | `JUMP ] + type t = jump*Ptset.Int.t*Ptset.Int.t + let jts = function + | `JUMP -> "JUMP" + | `NIL -> "NIL" + | `ANY -> "ANY" + | `ANYNOTEXT -> "ANYNOTEXT" + let merge_jump (j1,c1,l1) (j2,c2,l2) = + match j1,j2 with + | _,`NIL -> (j1,c1,l1) + | `NIL,_ -> (j2,c2,l2) + | `ANY,_ -> (`ANY,Ptset.Int.empty,Ptset.Int.empty) + | _,`ANY -> (`ANY,Ptset.Int.empty,Ptset.Int.empty) + | `ANYNOTEXT,_ -> + if Ptset.Int.mem Tag.pcdata (Ptset.Int.union c2 l2) then + (`ANY,Ptset.Int.empty,Ptset.Int.empty) + else + (`ANYNOTEXT,Ptset.Int.empty,Ptset.Int.empty) + | _,`ANYNOTEXT -> + if Ptset.Int.mem Tag.pcdata (Ptset.Int.union c1 l1) then + (`ANY,Ptset.Int.empty,Ptset.Int.empty) + else + (`ANYNOTEXT,Ptset.Int.empty,Ptset.Int.empty) + | `JUMP,`JUMP -> (`JUMP, Ptset.Int.union c1 c2,Ptset.Int.union l1 l2) + + let merge_jump_list = function + | [] -> `NIL,Ptset.Int.empty,Ptset.Int.empty + | p::r -> + List.fold_left (merge_jump) p r + + let labels a s = + Hashtbl.fold + ( + fun q l acc -> + if (q == s) + then + + (List.fold_left + (fun acc (ts,f) -> + let _,_,_,bur = Transition.node f in + if bur then acc else TagSet.cup acc ts) + acc l) + else acc ) a.trans TagSet.empty + exception Found + + let is_rec a s access = + List.exists + (fun (_,t) -> let _,_,f,_ = Transition.node t in + StateSet.mem s ((fun (_,_,x) -> x) (access (Formula.st f)))) (Hashtbl.find a.trans s) + + + let decide a c_label l_label dir_states dir = + + let l = StateSet.fold + (fun s l -> + let s_rec = is_rec a s (if dir then fst else snd) in + let s_rec = if dir then s_rec else + (* right move *) + is_rec a s fst + in + let s_lab = labels a s in + let jmp,cc,ll = + if (not (TagSet.is_finite s_lab)) then + if TagSet.mem Tag.pcdata s_lab then (`ANY,Ptset.Int.empty,Ptset.Int.empty) + else (`ANYNOTEXT,Ptset.Int.empty,Ptset.Int.empty) + else + if s_rec + then (`JUMP,Ptset.Int.empty, TagSet.positive + (TagSet.cap (TagSet.inj_positive l_label) s_lab)) + else (`JUMP,TagSet.positive + (TagSet.cap (TagSet.inj_positive c_label) s_lab), + Ptset.Int.empty ) + in + (if jmp != `ANY + && jmp != `ANYNOTEXT + && Ptset.Int.is_empty cc + && Ptset.Int.is_empty ll + then (`NIL,Ptset.Int.empty,Ptset.Int.empty) + else (jmp,cc,ll))::l) dir_states [] + in merge_jump_list l + + + end + + + + let choose_jump (d,cl,ll) f_nil f_t1 f_s1 f_tn f_sn f_s1n f_notext f_maytext = + match d with + | `NIL -> (`NIL,f_nil) + | `ANYNOTEXT -> `ANY,f_notext + | `ANY -> `ANY,f_maytext + | `JUMP -> + if Ptset.Int.is_empty cl then + if Ptset.Int.is_singleton ll then + let tag = Ptset.Int.choose ll in + (`TAG(tag),mk_app_fun f_tn tag (Tag.to_string tag)) + else + (`ANY,mk_app_fun f_sn ll (string_of_ts ll)) + else if Ptset.Int.is_empty ll then + if Ptset.Int.is_singleton cl then + let tag = Ptset.Int.choose cl in + (`TAG(tag),mk_app_fun f_t1 tag (Tag.to_string tag)) + else + (`ANY,mk_app_fun f_s1 cl (string_of_ts cl)) + else + (`ANY,mk_app_fun2 f_s1n cl ll ((string_of_ts cl) ^ " " ^ (string_of_ts ll))) + + | _ -> assert false + + let choose_jump_down tree d = + choose_jump d + (mk_fun (fun _ -> Tree.nil) "Tree.mk_nil") + (mk_fun (Tree.tagged_child tree) "Tree.tagged_child") + (mk_fun (Tree.select_child tree) "Tree.select_child") + (mk_fun (Tree.tagged_desc tree) "Tree.tagged_desc") + (mk_fun (Tree.select_desc tree) "Tree.select_desc") + (mk_fun (fun _ _ -> Tree.first_child tree) "[FIRSTCHILD]Tree.select_child_desc") + (mk_fun (Tree.first_element tree) "Tree.first_element") + (mk_fun (Tree.first_child tree) "Tree.first_child") + + let choose_jump_next tree d = + choose_jump d + (mk_fun (fun _ _ -> Tree.nil) "Tree.mk_nil2") + (mk_fun (Tree.tagged_sibling_ctx tree) "Tree.tagged_sibling_ctx") + (mk_fun (Tree.select_sibling_ctx tree) "Tree.select_sibling_ctx") + (mk_fun (Tree.tagged_foll_ctx tree) "Tree.tagged_foll_ctx") + (mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx") + (mk_fun (fun _ _ -> Tree.next_sibling_ctx tree) "[NEXTSIBLING]Tree.select_sibling_foll_ctx") + (mk_fun (Tree.next_element_ctx tree) "Tree.next_element_ctx") + (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") module SetTagKey = struct @@ -726,14 +861,16 @@ END slist (Formlistlist.nil,SList.nil,SList.nil,StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty) in (* Logic to chose the first and next function *) - let _,tags_below,_,tags_after = Tree.tags tree tag in + let tags_child,tags_below,tags_siblings,tags_after = Tree.tags tree tag in + let d_f = Algebra.decide a tags_child tags_below (StateSet.union ca da) true in + let d_n = Algebra.decide a tags_siblings tags_after (StateSet.union sa fa) false in (* let _ = Printf.eprintf "Tags below %s are : \n" (Tag.to_string tag) in let _ = Ptset.Int.iter (fun i -> Printf.eprintf "%s " (Tag.to_string i)) tags_below in let _ = Printf.eprintf "\n%!" in *) - let tags_below = Ptset.Int.remove tag tags_below in - let f_kind,first = choose_jump_down tree tags_below ca da a +(* let tags_below = Ptset.Int.remove tag tags_below in *) + let f_kind,first = choose_jump_down tree d_f and n_kind,next = if noright then (`NIL, fun _ _ -> Tree.nil ) - else choose_jump_next tree tags_after sa fa a in + else choose_jump_next tree d_n in let empty_res = null_result in let cont = match f_kind,n_kind with diff --git a/finiteCofinite.ml b/finiteCofinite.ml index 58f0730..907b604 100644 --- a/finiteCofinite.ml +++ b/finiteCofinite.ml @@ -41,6 +41,8 @@ sig val equal : t -> t -> bool val positive : t -> set val negative : t -> set + val inj_positive : set -> t + val inj_negative : set -> t end module Make (E : Sigs.Set) : S with type elt = E.elt and type set = E.t = @@ -192,5 +194,7 @@ struct | CoFinite x -> x | _ -> E.empty + let inj_positive t = Finite t + let inj_negative t = CoFinite t end diff --git a/finiteCofinite.mli b/finiteCofinite.mli index 7f98130..72b1aec 100644 --- a/finiteCofinite.mli +++ b/finiteCofinite.mli @@ -35,6 +35,8 @@ module type S = val equal : t -> t -> bool val positive : t -> set val negative : t -> set + val inj_positive : set -> t + val inj_negative : set -> t end module Make : functor (E : Sigs.Set) -> S with type elt = E.elt and type set = E.t -- 2.17.1 From ede0fe9faff0bffe5f9f010d8942a057186d1ce1 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 20 May 2009 06:44:05 +0000 Subject: [PATCH 06/16] Removed debugg printing from main git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@408 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- main.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.ml b/main.ml index ee14b34..b30eff2 100644 --- a/main.ml +++ b/main.ml @@ -49,12 +49,12 @@ let main v query_string output = with Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in - let _ = Printf.eprintf "Timing //keyword :" in +(* let _ = Printf.eprintf "Timing //keyword :" in let r = time (test_loop v) (Tag.tag "keyword") in let _ = Printf.eprintf "Count is %i\n%!" r in let _ = Printf.eprintf "Timing //keyword 2:" in let r = time (test_loop2 v) (Tag.tag "keyword") in - let _ = Printf.eprintf "Count is %i\n%!" r in + let _ = Printf.eprintf "Count is %i\n%!" r in *) XPath.Ast.print Format.err_formatter query; Format.fprintf Format.err_formatter "\n%!"; Printf.eprintf "Compiling query : "; -- 2.17.1 From 9a1792faff5e38231a74f9e761a7ff94aae5e6d9 Mon Sep 17 00:00:00 2001 From: kim Date: Tue, 26 May 2009 15:47:04 +0000 Subject: [PATCH 07/16] Fixed bug in collect_tags (Tree.ml) Tag Table was wrong (following copied in descendant tags too) git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@409 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- ata.ml | 2 +- html_trace.ml | 2 +- tests/test.xml | 8 ++------ tree.ml | 48 +++++++++++++++++++++++++++++++++--------------- 4 files changed, 37 insertions(+), 23 deletions(-) diff --git a/ata.ml b/ata.ml index eb4f394..90458a3 100644 --- a/ata.ml +++ b/ata.ml @@ -382,7 +382,7 @@ let tags_of_state a q = module type ResultSet = sig type t - type elt = [` Tree] Tree.node + type elt = [` Tree ] Tree.node val empty : t val cons : elt -> t -> t val concat : t -> t -> t diff --git a/html_trace.ml b/html_trace.ml index 333682c..bcd086e 100644 --- a/html_trace.ml +++ b/html_trace.ml @@ -230,7 +230,7 @@ let output_trace a tree file results = ); pr_str "%s" "\nTriggered transitions:\n"; pr_str "%s" ""; - List.iter (fun fl -> + Formlistlist.iter (fun fl -> pr_str "%s" ""; max_tt := max !max_tt (Formlist.length fl); ) trans; diff --git a/tests/test.xml b/tests/test.xml index a9c7a55..8f0e255 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,10 +1,6 @@ - foo - foo - foo - foo - foo - foo + + diff --git a/tree.ml b/tree.ml index a1ddcac..0a31e83 100644 --- a/tree.ml +++ b/tree.ml @@ -178,24 +178,28 @@ let collect_tags tree = Hashtbl.replace h t (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa) in - let rec loop_right id acc_sibling acc_after= + let rec loop_right id acc_after = if id == nil - then (acc_sibling,acc_after) + then Ptset.Int.empty,Ptset.Int.empty,acc_after else - let sibling2,after2 = loop_right (tree_next_sibling tree id) acc_sibling acc_after in - let child1,below1 = loop_left (tree_first_child tree id) after2 in - let tag = tree_tag_id tree id in - update tag child1 below1 sibling2 after2; - (pt_add tag sibling2, (pt_add tag (pt_cup after2 below1))) + let sibling2,desc2,after2 = loop_right (tree_next_sibling tree id) acc_after in + let child1,desc1,after1 = loop_left (tree_first_child tree id) after2 in + let tag = tree_tag_id tree id in + update tag child1 desc1 sibling2 after2; + ( pt_add tag sibling2, + pt_add tag (pt_cup desc1 desc2), + pt_cup after1 (pt_cup desc1 desc2) ) and loop_left id acc_after = - if id == nil - then (Ptset.Int.empty,Ptset.Int.empty) + if id == nil + then Ptset.Int.empty,Ptset.Int.empty,acc_after else - let sibling2,after2 = loop_right (tree_next_sibling tree id) Ptset.Int.empty acc_after in - let child1,below1 = loop_left (tree_first_child tree id) after2 in - let tag = tree_tag_id tree id in - update tag child1 below1 sibling2 after2; - (pt_add tag sibling2,(pt_add tag (pt_cup after2 below1))) + let sibling2,desc2,after2 = loop_right (tree_next_sibling tree id) acc_after in + let child1,desc1,after1 = loop_left (tree_first_child tree id) after2 in + let tag = tree_tag_id tree id in + update tag child1 desc1 sibling2 after2; + (pt_add tag sibling2, + pt_add tag (pt_cup desc1 desc2), + acc_after ) in let _ = loop_left (tree_root tree) Ptset.Int.empty in h @@ -288,7 +292,21 @@ let is_root t = t == root let node_of_t t = let _ = Tag.init (Obj.magic t) in let table = collect_tags t + in (* + let _ = Hashtbl.iter (fun t (c,d,ns,f) -> + Printf.eprintf "Tag %s has:\n" (Tag.to_string t); + Printf.eprintf "Child tags: "; + Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) c; + Printf.eprintf "\nDescendant tags: "; + Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) d; + Printf.eprintf "\nNextSibling tags: "; + Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) ns; + Printf.eprintf "\nFollowing tags: "; + Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) f; + Printf.eprintf "\n\n%!";) table in + + *) { doc= t; ttable = table; } @@ -310,7 +328,7 @@ let parse_xml_string str = parse parse_xml_string str external pool : tree -> Tag.pool = "%identity" let magic_string = "SXSI_INDEX" -let version_string = "1" +let version_string = "2" let pos fd = Unix.lseek fd 0 Unix.SEEK_CUR -- 2.17.1 From 3445f7f08f15fe41e0d1bfaaabaacf60cdc10b61 Mon Sep 17 00:00:00 2001 From: kim Date: Tue, 26 May 2009 15:47:18 +0000 Subject: [PATCH 08/16] Fixed bug in NextElement, improved caching git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@410 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 2 +- ata.ml | 293 ++++++++++++++++++++++-------------------------- 2 files changed, 133 insertions(+), 162 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index e231a45..9df73d6 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -261,7 +261,7 @@ extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ } extern "C" CAMLprim value caml_xml_tree_next_element(value tree, value id){ - return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); + return(Val_int (XMLTREE(tree)->NextElement(TREENODEVAL(id)))); } extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){ diff --git a/ata.ml b/ata.ml index 90458a3..7949921 100644 --- a/ata.ml +++ b/ata.ml @@ -518,107 +518,6 @@ END let string_of_ts tags = (Ptset.Int.fold (fun t a -> a ^ " " ^ (Tag.to_string t) ) tags "{")^ " }" -(* - module Algebra = - struct - type jump = [ `LONG | `CLOSE | `NIL ] - type t = jump*Ptset.Int.t - - let merge_jump (j1,l1) (j2,l2) = - match j1,j2 with - | _ when j1 = j2 -> (j1,Ptset.Int.union l1 l2) - | _,`NIL -> j1,l1 - | `NIL,_ -> j2,l2 - | _,_ -> (`CLOSE, Ptset.Int.union l1 l2) - - let merge_jump_list = function - | [] -> `NIL,Ptset.Int.empty - | p::r -> List.fold_left (merge_jump) p r - - let labels a s = - Hashtbl.fold - ( - fun q l acc -> - if (q == s) - then - - (List.fold_left - (fun acc (ts,f) -> - let _,_,_,bur = Transition.node f in - if bur then acc else TagSet.cup acc ts) - acc l) - else acc ) a.trans TagSet.empty - exception Found - - let is_rec a s access = - List.exists - (fun (_,t) -> let _,_,f,_ = Transition.node t in - StateSet.mem s (access f)) (Hashtbl.find a.trans s) - - - let decide a c_label l_label dir_states access = - - let l = StateSet.fold - (fun s l -> - let s_rec= is_rec a s access in - let tlabels,jmp = - if s_rec then l_label,`LONG - else c_label,`CLOSE in - let slabels = TagSet.positive ((TagSet.cap (labels a s) tlabels)) - in - (if Ptset.Int.is_empty slabels - then `NIL,Ptset.Int.empty - else jmp,slabels)::l) dir_states [] - in merge_jump_list l - - - - - - end - - - let choose_jump tagset qtags1 qtagsn a f_nil f_t1 f_s1 f_tn f_sn f_notext f_maytext = - let tags1,hastext1,fin1 = inter_text tagset (tags a qtags1) in - let tagsn,hastextn,finn = inter_text tagset (tags a qtagsn) in - (*if (hastext1||hastextn) then (`ANY,f_text) (* jumping to text nodes doesn't work really well *) - else*) - if (Ptset.Int.is_empty tags1) && (Ptset.Int.is_empty tagsn) then (`NIL,f_nil) - else if (Ptset.Int.is_empty tagsn) then - if (Ptset.Int.is_singleton tags1) - then (* TaggedChild/Sibling *) - let tag = (Ptset.Int.choose tags1) in (`TAG(tag),mk_app_fun f_t1 tag (Tag.to_string tag)) - else (* SelectChild/Sibling *) - (`ANY,mk_app_fun f_s1 tags1 (string_of_ts tags1)) - else if (Ptset.Int.is_empty tags1) then - if (Ptset.Int.is_singleton tagsn) - then (* TaggedDesc/Following *) - let tag = (Ptset.Int.choose tagsn) in (`TAG(tag),mk_app_fun f_tn tag (Tag.to_string tag)) - else (* SelectDesc/Following *) - (`ANY,mk_app_fun f_sn tagsn (string_of_ts tagsn)) - else if (hastext1||hastextn) then (`ANY,f_maytext) - else (`ANY,f_notext) - - let choose_jump_down tree a b c d = - choose_jump a b c d - (mk_fun (fun _ -> Tree.nil) "Tree.mk_nil") - (mk_fun (Tree.tagged_child tree) "Tree.tagged_child") - (mk_fun (Tree.select_child tree) "Tree.select_child") - (mk_fun (Tree.tagged_desc tree) "Tree.tagged_desc") - (mk_fun (Tree.select_desc tree) "Tree.select_desc") - (mk_fun (Tree.first_element tree) "Tree.first_element") - (mk_fun (Tree.first_child tree) "Tree.first_child") - - let choose_jump_next tree a b c d = - choose_jump a b c d - (mk_fun (fun _ _ -> Tree.nil) "Tree.mk_nil2") - (mk_fun (Tree.tagged_sibling_ctx tree) "Tree.tagged_sibling_ctx") - (mk_fun (Tree.select_sibling_ctx tree) "Tree.select_sibling_ctx") - (mk_fun (Tree.tagged_foll_ctx tree) "Tree.tagged_foll_ctx") - (mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx") - (mk_fun (Tree.next_element_ctx tree) "Tree.node_element_ctx") - (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") -*) module Algebra = struct type jump = [ `NIL | `ANY |`ANYNOTEXT | `JUMP ] @@ -752,24 +651,53 @@ END (mk_fun (Tree.next_element_ctx tree) "Tree.next_element_ctx") (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") - module SetTagKey = - struct - type t = Tag.t*SList.t - let equal (t1,s1) (t2,s2) = t1 == t2 && s1 == s2 - let hash (t,s) = HASHINT2(t,s.SList.Node.id) + + module SListTable = Hashtbl.Make(struct type t = SList.t + let equal = (==) + let hash t = t.SList.Node.id + end) + module TransCache = + struct + type 'a t = Obj.t array SListTable.t + let create n = SListTable.create n + let dummy = Obj.repr (fun _ -> assert false) + let find (h :'a t) tag slist : 'a = + let tab = + try + SListTable.find h slist + with + Not_found -> + SListTable.add h slist (Array.create 10000 dummy); + raise Not_found + in + let res = tab.(tag) in + if res == dummy then raise Not_found else (Obj.magic res) + + let add (h : 'a t) tag slist (data : 'a) = + let tab = + try + SListTable.find h slist + with + Not_found -> + let arr = Array.create 10000 dummy in + SListTable.add h slist arr; + arr + in + tab.(tag) <- (Obj.repr data) + + end - - module CachedTransTable = Hashtbl.Make(SetTagKey) - let td_trans = CachedTransTable.create 4093 - - + + let td_trans = TransCache.create 10000 (* should be number of tags *number of states^2 + in the document *) + let empty_size n = let rec loop acc = function 0 -> acc | n -> loop (SList.cons StateSet.empty acc) (n-1) in loop SList.nil n - module Fold2Res = Hashtbl.Make(struct + module Fold2ResOld = Hashtbl.Make(struct type t = Formlistlist.t*SList.t*SList.t let hash (f,s,t) = HASHINT3(f.Formlistlist.Node.id, s.SList.Node.id, @@ -777,6 +705,39 @@ END let equal (a,b,c) (d,e,f) = a==d && b == e && c == f end) + module FllTable = Hashtbl.Make (struct type t = Formlistlist.t + let equal = (==) + let hash t = t.Formlistlist.Node.id + end) + + module Fold2Res = + struct + type 'a t = 'a SListTable.t SListTable.t FllTable.t + + let create n = FllTable.create n + + let find hf fl s1 s2 = + let hs1 = FllTable.find hf fl in + let hs2 = SListTable.find hs1 s1 in + SListTable.find hs2 s2 + + let add hf fl s1 s2 data = + let hs1 = + try FllTable.find hf fl with + | Not_found -> + let hs1 = SListTable.create SMALL_H_SIZE + in FllTable.add hf fl hs1;hs1 + in + let hs2 = + try SListTable.find hs1 s1 + with + | Not_found -> + let hs2 = SListTable.create SMALL_H_SIZE + in SListTable.add hs1 s1 hs2;hs2 + in + SListTable.add hs2 s2 data + end + let h_fold2 = Fold2Res.create BIG_H_SIZE let top_down ?(noright=false) a tree t slist ctx slot_size = @@ -786,7 +747,7 @@ END let eval_fold2_slist fll t (sl2,res2) (sl1,res1) = let res = Array.copy rempty in try - let r,b,btab = Fold2Res.find h_fold2 (fll,sl1,sl2) in + let r,b,btab = Fold2Res.find h_fold2 fll sl1 sl2 in if b then for i=0 to slot_size - 1 do res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); done; @@ -809,7 +770,7 @@ END | _ -> aq,ab in let r,b = fold sl1 sl2 fll 0 SList.nil false in - Fold2Res.add h_fold2 (fll,sl1,sl2) (r,b,btab); + Fold2Res.add h_fold2 fll sl1 sl2 (r,b,btab); if b then for i=0 to slot_size - 1 do res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); done; @@ -817,16 +778,16 @@ END in let null_result = (pempty,Array.copy rempty) in - let rec loop t slist ctx= + let rec loop t slist ctx = if t == Tree.nil then null_result else get_trans t slist (Tree.tag tree t) ctx and loop_tag tag t slist ctx = if t == Tree.nil then null_result else get_trans t slist tag ctx and loop_no_right t slist ctx = if t == Tree.nil then null_result else get_trans ~noright:true t slist (Tree.tag tree t) ctx - and get_trans ?(noright=false) t slist tag ctx = + and get_trans ?(noright=false) t slist tag ctx = let cont = try - CachedTransTable.find td_trans (tag,slist) + TransCache.find td_trans tag slist with | Not_found -> let fl_list,llist,rlist,ca,da,sa,fa = @@ -875,63 +836,73 @@ END let cont = match f_kind,n_kind with | `NIL,`NIL -> - (fun t _ -> eval_fold2_slist fl_list t empty_res empty_res ) - | _,`NIL -> ( + (fun t _ -> eval_fold2_slist fl_list t empty_res empty_res) + | _,`NIL -> ( match f_kind with |`TAG(tag) -> (fun t _ -> eval_fold2_slist fl_list t empty_res - (loop_tag tag (first t) llist t)) - | `ANY -> - (fun t _ -> eval_fold2_slist fl_list t empty_res - (loop (first t) llist t)) - | _ -> assert false) - + (loop_tag tag (first t) llist t )) + | `ANY -> + (fun t _ -> eval_fold2_slist fl_list t empty_res + (loop (first t) llist t )) + | _ -> assert false) | `NIL,_ -> ( - match n_kind with - |`TAG(tag) -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx) empty_res) - - | `ANY -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) empty_res) - - | _ -> assert false) - - | `TAG(tag1),`TAG(tag2) -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop_tag tag2 (next t ctx) rlist ctx) - (loop_tag tag1 (first t) llist t)) - + match n_kind with + |`TAG(tag) -> + if SList.equal rlist slist then + let rec loop t ctx = + if t == Tree.nil then empty_res + else + let res2 = loop (next t ctx) ctx in + eval_fold2_slist fl_list t res2 empty_res + in loop + else + (fun t ctx -> eval_fold2_slist fl_list t + (loop_tag tag (next t ctx) rlist ctx ) empty_res) + + | `ANY -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx ) empty_res) + + | _ -> assert false) + + | `TAG(tag1),`TAG(tag2) -> + (fun t ctx -> + eval_fold2_slist fl_list t + (loop_tag tag2 (next t ctx) rlist ctx ) + (loop_tag tag1 (first t) llist t )) + | `TAG(tag),`ANY -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop_tag tag (first t) llist t)) + (fun t ctx -> + eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx ) + (loop_tag tag (first t) llist t )) + | `ANY,`TAG(tag) -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx) - (loop (first t) llist t) ) + (fun t ctx -> + eval_fold2_slist fl_list t + (loop_tag tag (next t ctx) rlist ctx ) + (loop (first t) llist t )) + | `ANY,`ANY -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop (first t) llist t) ) + (fun t ctx -> + eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx ) + (loop (first t) llist t )) | _ -> assert false - in - let cont = D_IF_( (fun t ctx -> + in + let cont = D_IF_( (fun t ctx -> let a,b = cont t ctx in register_trace tree t (slist,a,fl_list,first,next,ctx); - (a,b) + (a,b) ) ,cont) in - (CachedTransTable.add td_trans (tag,slist) cont;cont) - in cont t ctx - - in - (if noright then loop_no_right else loop) t slist ctx + (TransCache.add td_trans tag slist (Obj.repr cont) ;cont) + in (Obj.magic cont) t ctx + + in + (if noright then loop_no_right else loop) t slist ctx - let run_top_down a tree = let init = SList.cons a.init SList.nil in let _,res = top_down a tree Tree.root init Tree.root 1 -- 2.17.1 From 6a7025fee3c050eff58baa536a14d80baf5c1b87 Mon Sep 17 00:00:00 2001 From: kim Date: Tue, 26 May 2009 15:47:33 +0000 Subject: [PATCH 09/16] added optimisations in the run function git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@411 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- ata.ml | 144 +++++++++++++++++++++++------------------- ata.mli | 2 +- finiteCofinite.ml | 152 +++++++++++++++++++++++++-------------------- finiteCofinite.mli | 3 +- main.ml | 6 +- tagSet.ml | 15 +++++ tagSet.mli | 1 + utils.ml | 5 +- 8 files changed, 188 insertions(+), 140 deletions(-) diff --git a/ata.ml b/ata.ml index 7949921..9eb5311 100644 --- a/ata.ml +++ b/ata.ml @@ -194,16 +194,18 @@ end module Transition = struct - type node = State.t*bool*Formula.t*bool + type node = State.t*TagSet.t*bool*Formula.t*bool include Hcons.Make(struct type t = node - let hash (s,m,f,b) = HASHINT4(s,Formula.uid f,vb m,vb b) - let equal (s,b,f,m) (s',b',f',m') = - s == s' && b==b' && m==m' && Formula.equal f f' + let hash (s,ts,m,f,b) = HASHINT5(s,TagSet.uid ts,Formula.uid f,vb m,vb b) + let equal (s,ts,b,f,m) (s',ts',b',f',m') = + s == s' && ts == ts' && b==b' && m==m' && f == f' end) - let print ppf f = let (st,mark,form,b) = node f in - Format.fprintf ppf "%i %s" st (if mark then "⇒" else "→"); + let print ppf f = let (st,ts,mark,form,b) = node f in + Format.fprintf ppf "(%i, " st; + TagSet.print ppf ts; + Format.fprintf ppf ") %s" (if mark then "⇒" else "→"); Formula.print ppf form; Format.fprintf ppf "%s%!" (if b then " (b)" else "") @@ -212,7 +214,7 @@ module Transition = struct let ( ?< ) x = x let ( >< ) state (l,mark) = state,(l,mark,false) let ( ><@ ) state (l,mark) = state,(l,mark,true) - let ( >=> ) (state,(label,mark,bur)) form = (state,label,(make (state,mark,form,bur))) + let ( >=> ) (state,(label,mark,bur)) form = (state,label,(make (state,label,mark,form,bur))) end end @@ -254,7 +256,7 @@ let dump ppf a = if y-x == 0 then TagSet.compare tsy tsx else y-x) l in let maxh,maxt,l_print = List.fold_left ( - fun (maxh,maxt,l) ((ts,q),(_,b,f,_)) -> + fun (maxh,maxt,l) ((ts,q),(_,_,b,f,_)) -> let s = if TagSet.is_finite ts then "{" ^ (TagSet.fold (fun t a -> a ^ " '" ^ (Tag.to_string t)^"'") ts "") ^" }" @@ -327,29 +329,31 @@ let eval_form_bool = module FTable = Hashtbl.Make( struct - type t = Formlist.t*StateSet.t*StateSet.t - let equal (f1,s1,t1) (f2,s2,t2) = - f1 == f2 && s1 == s2 && t1 == t2;; - let hash (f,s,t) = HASHINT3(Formlist.uid f ,StateSet.uid s,StateSet.uid t);; + type t = Tag.t*Formlist.t*StateSet.t*StateSet.t + let equal (tg1,f1,s1,t1) (tg2,f2,s2,t2) = + tg1 == tg2 && f1 == f2 && s1 == s2 && t1 == t2;; + let hash (tg,f,s,t) = HASHINT4(tg,Formlist.uid f ,StateSet.uid s,StateSet.uid t);; end) let h_f = FTable.create BIG_H_SIZE -let eval_formlist s1 s2 fl = +let eval_formlist tag s1 s2 fl = let rec loop fl = try - FTable.find h_f (fl,s1,s2) + FTable.find h_f (tag,fl,s1,s2) with | Not_found -> match Formlist.node fl with | Formlist.Cons(f,fll) -> - let q,mark,f,_ = Transition.node f in - let b,b1,b2 = eval_form_bool f s1 s2 in + let q,ts,mark,f,_ = Transition.node f in + let b,b1,b2 = + if TagSet.mem tag ts then eval_form_bool f s1 s2 else (false,false,false) + in let (s,(b',b1',b2',amark)) as res = loop fll in let r = if b then (StateSet.add q s, (b, b1'||b1,b2'||b2,mark||amark)) else res - in FTable.add h_f (fl,s1,s2) r;r + in FTable.add h_f (tag,fl,s1,s2) r;r | Formlist.Nil -> StateSet.empty,(false,false,false,false) in loop fl @@ -358,7 +362,7 @@ let tags_of_state a q = (fun p l acc -> if p == q then List.fold_left (fun acc (ts,t) -> - let _,_,_,aux = Transition.node t in + let _,_,_,_,aux = Transition.node t in if aux then acc else TagSet.cup ts acc) acc l @@ -559,7 +563,7 @@ END (List.fold_left (fun acc (ts,f) -> - let _,_,_,bur = Transition.node f in + let _,_,_,_,bur = Transition.node f in if bur then acc else TagSet.cup acc ts) acc l) else acc ) a.trans TagSet.empty @@ -567,7 +571,7 @@ END let is_rec a s access = List.exists - (fun (_,t) -> let _,_,f,_ = Transition.node t in + (fun (_,t) -> let _,_,_,f,_ = Transition.node t in StateSet.mem s ((fun (_,_,x) -> x) (access (Formula.st f)))) (Hashtbl.find a.trans s) @@ -617,13 +621,13 @@ END let tag = Ptset.Int.choose ll in (`TAG(tag),mk_app_fun f_tn tag (Tag.to_string tag)) else - (`ANY,mk_app_fun f_sn ll (string_of_ts ll)) + (`MANY(ll),mk_app_fun f_sn ll (string_of_ts ll)) else if Ptset.Int.is_empty ll then if Ptset.Int.is_singleton cl then let tag = Ptset.Int.choose cl in (`TAG(tag),mk_app_fun f_t1 tag (Tag.to_string tag)) else - (`ANY,mk_app_fun f_s1 cl (string_of_ts cl)) + (`MANY(cl),mk_app_fun f_s1 cl (string_of_ts cl)) else (`ANY,mk_app_fun2 f_s1n cl ll ((string_of_ts cl) ^ " " ^ (string_of_ts ll))) @@ -713,15 +717,16 @@ END module Fold2Res = struct type 'a t = 'a SListTable.t SListTable.t FllTable.t + let create n = Array.init 10000 (fun _ -> FllTable.create n) - let create n = FllTable.create n - - let find hf fl s1 s2 = + let find h tag fl s1 s2 = + let hf = h.(tag) in let hs1 = FllTable.find hf fl in let hs2 = SListTable.find hs1 s1 in SListTable.find hs2 s2 - let add hf fl s1 s2 data = + let add h tag fl s1 s2 data = + let hf = h.(tag) in let hs1 = try FllTable.find hf fl with | Not_found -> @@ -738,22 +743,22 @@ END SListTable.add hs2 s2 data end - let h_fold2 = Fold2Res.create BIG_H_SIZE + let h_fold2 = Fold2Res.create SMALL_H_SIZE let top_down ?(noright=false) a tree t slist ctx slot_size = let pempty = empty_size slot_size in let rempty = Array.make slot_size RS.empty in (* evaluation starts from the right so we put sl1,res1 at the end *) - let eval_fold2_slist fll t (sl2,res2) (sl1,res1) = + let eval_fold2_slist fll t tag (sl2,res2) (sl1,res1) = let res = Array.copy rempty in try - let r,b,btab = Fold2Res.find h_fold2 fll sl1 sl2 in + let r,b,btab = Fold2Res.find h_fold2 tag fll sl1 sl2 in if b then for i=0 to slot_size - 1 do res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); done; r,res with - Not_found -> + Not_found -> let btab = Array.make slot_size (false,false,false,false) in let rec fold l1 l2 fll i aq ab = match fll.Formlistlist.Node.node, @@ -763,14 +768,14 @@ END | Formlistlist.Cons(fl,fll), SList.Cons(s1,ll1), SList.Cons(s2,ll2) -> - let r',((b,_,_,_) as flags) = eval_formlist s1 s2 fl in + let r',((b,_,_,_) as flags) = eval_formlist tag s1 s2 fl in let _ = btab.(i) <- flags in fold ll1 ll2 fll (i+1) (SList.cons r' aq) (b||ab) | _ -> aq,ab in let r,b = fold sl1 sl2 fll 0 SList.nil false in - Fold2Res.add h_fold2 fll sl1 sl2 (r,b,btab); + Fold2Res.add h_fold2 tag fll sl1 sl2 (r,b,btab); if b then for i=0 to slot_size - 1 do res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); done; @@ -801,7 +806,7 @@ END (ts,t) -> if (TagSet.mem tag ts) then - let _,_,f,_ = Transition.node t in + let _,_,_,f,_ = Transition.node t in let (child,desc,below),(sibl,foll,after) = Formula.st f in (Formlist.cons t fl_acc, StateSet.union ll_acc below, @@ -825,10 +830,6 @@ END let tags_child,tags_below,tags_siblings,tags_after = Tree.tags tree tag in let d_f = Algebra.decide a tags_child tags_below (StateSet.union ca da) true in let d_n = Algebra.decide a tags_siblings tags_after (StateSet.union sa fa) false in -(* let _ = Printf.eprintf "Tags below %s are : \n" (Tag.to_string tag) in - let _ = Ptset.Int.iter (fun i -> Printf.eprintf "%s " (Tag.to_string i)) tags_below in - let _ = Printf.eprintf "\n%!" in *) -(* let tags_below = Ptset.Int.remove tag tags_below in *) let f_kind,first = choose_jump_down tree d_f and n_kind,next = if noright then (`NIL, fun _ _ -> Tree.nil ) else choose_jump_next tree d_n in @@ -836,57 +837,71 @@ END let cont = match f_kind,n_kind with | `NIL,`NIL -> - (fun t _ -> eval_fold2_slist fl_list t empty_res empty_res) + (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res empty_res) | _,`NIL -> ( match f_kind with - |`TAG(tag) -> - (fun t _ -> eval_fold2_slist fl_list t empty_res - (loop_tag tag (first t) llist t )) + |`TAG(tag') -> + (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res + (loop_tag tag' (first t) llist t )) | `ANY -> - (fun t _ -> eval_fold2_slist fl_list t empty_res + (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res (loop (first t) llist t )) | _ -> assert false) | `NIL,_ -> ( match n_kind with - |`TAG(tag) -> - if SList.equal rlist slist then + |`TAG(tag') -> + if SList.equal rlist slist && tag == tag' then let rec loop t ctx = - if t == Tree.nil then empty_res - else + if t == Tree.nil then empty_res else let res2 = loop (next t ctx) ctx in - eval_fold2_slist fl_list t res2 empty_res + eval_fold2_slist fl_list t tag res2 empty_res in loop else - (fun t ctx -> eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx ) empty_res) + (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) + (loop_tag tag' (next t ctx) rlist ctx ) empty_res) | `ANY -> - (fun t ctx -> eval_fold2_slist fl_list t + (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) empty_res) | _ -> assert false) | `TAG(tag1),`TAG(tag2) -> (fun t ctx -> - eval_fold2_slist fl_list t + eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag2 (next t ctx) rlist ctx ) (loop_tag tag1 (first t) llist t )) - - | `TAG(tag),`ANY -> + + | `TAG(tag'),`ANY -> (fun t ctx -> - eval_fold2_slist fl_list t + eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) - (loop_tag tag (first t) llist t )) + (loop_tag tag' (first t) llist t )) - | `ANY,`TAG(tag) -> + | `ANY,`TAG(tag') -> (fun t ctx -> - eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx ) + eval_fold2_slist fl_list t (Tree.tag tree t) + (loop_tag tag' (next t ctx) rlist ctx ) (loop (first t) llist t )) | `ANY,`ANY -> + if SList.equal slist rlist && SList.equal slist llist + then + let rec loop t ctx = + if t == Tree.nil then empty_res else + let r1 = loop (first t) t + and r2 = loop (next t ctx) ctx + in + eval_fold2_slist fl_list t (Tree.tag tree t) r2 r1 + in loop + else (fun t ctx -> - eval_fold2_slist fl_list t + eval_fold2_slist fl_list t (Tree.tag tree t) + (loop (next t ctx) rlist ctx ) + (loop (first t) llist t )) + | _,_ -> + (fun t ctx -> + eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) (loop (first t) llist t )) | _ -> assert false @@ -975,7 +990,8 @@ END let h_fold = Hashtbl.create 511 - let fold_f_conf t slist fl_list conf dir= + let fold_f_conf tree t slist fl_list conf dir= + let tag = Tree.tag tree t in let rec loop sl fl acc = match SList.node sl,fl with |SList.Nil,[] -> acc @@ -986,8 +1002,8 @@ END Hashtbl.find h_fold key with Not_found -> let res = - if dir then eval_formlist s Ptset.Int.empty formlist - else eval_formlist Ptset.Int.empty s formlist + if dir then eval_formlist tag s Ptset.Int.empty formlist + else eval_formlist tag Ptset.Int.empty s formlist in (Hashtbl.add h_fold key res;res) in if rb && ((dir&&rb1)|| ((not dir) && rb2)) @@ -1057,7 +1073,7 @@ END let slist = Configuration.Ptss.fold (fun e a -> SList.cons e a) conf.Configuration.sets SList.nil in let fl_list = get_up_trans slist ptag a parent in let slist = SList.rev (slist) in - let newconf = fold_f_conf parent slist fl_list conf dir in + let newconf = fold_f_conf tree parent slist fl_list conf dir in let accu,newconf = Configuration.IMap.fold (fun s res (ar,nc) -> if Ptset.Int.intersect s init then ( RS.concat res ar ,nc) @@ -1100,7 +1116,7 @@ END in let init = List.fold_left (fun acc (_,t) -> - let _,_,f,_ = Transition.node t in + let _,_,_,f,_ = Transition.node t in let _,_,l = fst ( Formula.st f ) in StateSet.union acc l) StateSet.empty trlist diff --git a/ata.mli b/ata.mli index fe328ad..649a496 100644 --- a/ata.mli +++ b/ata.mli @@ -52,7 +52,7 @@ module Formula : end module Transition : sig - type node = State.t * bool * Formula.t * bool + type node = State.t * TagSet.t * bool * Formula.t * bool type data = node type t val make : data -> t diff --git a/finiteCofinite.ml b/finiteCofinite.ml index 907b604..29ce60c 100644 --- a/finiteCofinite.ml +++ b/finiteCofinite.ml @@ -4,6 +4,7 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) +INCLUDE "utils.ml" exception InfiniteSet module type S = @@ -39,77 +40,92 @@ sig val choose : t -> elt val hash : t -> int val equal : t -> t -> bool + val uid : t -> int val positive : t -> set val negative : t -> set val inj_positive : set -> t val inj_negative : set -> t end -module Make (E : Sigs.Set) : S with type elt = E.elt and type set = E.t = +module Make (E : Ptset.S) : S with type elt = E.elt and type set = E.t = struct type elt = E.elt - type t = Finite of E.t | CoFinite of E.t + type node = Finite of E.t | CoFinite of E.t type set = E.t - - let empty = Finite E.empty - let any = CoFinite E.empty + module Node = Hcons.Make(struct + type t = node + let equal a b = + match a,b with + (Finite(s1),Finite(s2)) + | (CoFinite(s1),CoFinite(s2)) -> E.equal s1 s2 + | _ -> false + let hash = function + Finite (s) -> HASHINT2(PRIME2,E.hash s) + | CoFinite(s) -> HASHINT2(PRIME7,E.hash s) + end) + type t = Node.t + let empty = Node.make (Finite E.empty) + let any = Node.make (CoFinite E.empty) + let finite x = Node.make (Finite x) + let cofinite x = Node.make (CoFinite x) let is_empty = function - Finite s when E.is_empty s -> true + { Node.node = Finite s } when E.is_empty s -> true | _ -> false let is_any = function - CoFinite s when E.is_empty s -> true + { Node.node = CoFinite s } when E.is_empty s -> true | _ -> false - let is_finite = function + let is_finite t = match t.Node.node with | Finite _ -> true | _ -> false - let kind = function + let kind t = match t.Node.node with Finite _ -> `Finite | _ -> `Cofinite - let mem x = function Finite s -> E.mem x s + let mem x t = match t.Node.node with + | Finite s -> E.mem x s | CoFinite s -> not (E.mem x s) - let singleton x = Finite (E.singleton x) - let add e = function - | Finite s -> Finite (E.add e s) - | CoFinite s -> CoFinite (E.remove e s) - let remove e = function - | Finite s -> Finite (E.remove e s) - | CoFinite s -> CoFinite (E.add e s) + let singleton x = finite (E.singleton x) + let add e t = match t.Node.node with + | Finite s -> finite (E.add e s) + | CoFinite s -> cofinite (E.remove e s) + let remove e t = match t.Node.node with + | Finite s -> finite (E.remove e s) + | CoFinite s -> cofinite (E.add e s) - let cup s t = match (s,t) with - | Finite s, Finite t -> Finite (E.union s t) - | CoFinite s, CoFinite t -> CoFinite ( E.inter s t) - | Finite s, CoFinite t -> CoFinite (E.diff t s) - | CoFinite s, Finite t-> CoFinite (E.diff s t) - - let cap s t = match (s,t) with - | Finite s, Finite t -> Finite (E.inter s t) - | CoFinite s, CoFinite t -> CoFinite (E.union s t) - | Finite s, CoFinite t -> Finite (E.diff s t) - | CoFinite s, Finite t-> Finite (E.diff t s) + let cup s t = match (s.Node.node,t.Node.node) with + | Finite s, Finite t -> finite (E.union s t) + | CoFinite s, CoFinite t -> cofinite ( E.inter s t) + | Finite s, CoFinite t -> cofinite (E.diff t s) + | CoFinite s, Finite t-> cofinite (E.diff s t) + + let cap s t = match (s.Node.node,t.Node.node) with + | Finite s, Finite t -> finite (E.inter s t) + | CoFinite s, CoFinite t -> cofinite (E.union s t) + | Finite s, CoFinite t -> finite (E.diff s t) + | CoFinite s, Finite t-> finite (E.diff t s) - let diff s t = match (s,t) with - | Finite s, Finite t -> Finite (E.diff s t) - | Finite s, CoFinite t -> Finite(E.inter s t) - | CoFinite s, Finite t -> CoFinite(E.union t s) - | CoFinite s, CoFinite t -> Finite (E.diff t s) - - let neg = function - | Finite s -> CoFinite s - | CoFinite s -> Finite s + let diff s t = match (s.Node.node,t.Node.node) with + | Finite s, Finite t -> finite (E.diff s t) + | Finite s, CoFinite t -> finite(E.inter s t) + | CoFinite s, Finite t -> cofinite(E.union t s) + | CoFinite s, CoFinite t -> finite (E.diff t s) + + let neg t = match t.Node.node with + | Finite s -> cofinite s + | CoFinite s -> finite s - let compare s t = match (s,t) with + let compare s t = match (s.Node.node,t.Node.node) with | Finite s , Finite t -> E.compare s t | CoFinite s , CoFinite t -> E.compare t s | Finite _, CoFinite _ -> -1 | CoFinite _, Finite _ -> 1 - let subset s t = match (s,t) with + let subset s t = match (s.Node.node,t.Node.node) with | Finite s , Finite t -> E.subset s t | CoFinite s , CoFinite t -> E.subset t s | Finite s, CoFinite t -> E.is_empty (E.inter s t) @@ -128,73 +144,71 @@ struct let kind_split l = let rec next_finite_cofinite facc cacc = function - | [] -> Finite facc, CoFinite (E.diff cacc facc) - | Finite s ::r -> next_finite_cofinite (E.union s facc) cacc r - | CoFinite _ ::r when E.is_empty cacc -> next_finite_cofinite facc cacc r - | CoFinite s ::r -> next_finite_cofinite facc (E.inter cacc s) r + | [] -> finite facc, cofinite (E.diff cacc facc) + | { Node.node = Finite s } ::r -> next_finite_cofinite (E.union s facc) cacc r + | { Node.node = CoFinite _ } ::r when E.is_empty cacc -> next_finite_cofinite facc cacc r + | { Node.node = CoFinite s } ::r -> next_finite_cofinite facc (E.inter cacc s) r in let rec first_cofinite facc = function | [] -> empty,empty - | Finite s :: r-> first_cofinite (E.union s facc) r - | CoFinite s :: r -> next_finite_cofinite facc s r + | { Node.node = Finite s } :: r-> first_cofinite (E.union s facc) r + | { Node.node = CoFinite s } :: r -> next_finite_cofinite facc s r in first_cofinite E.empty l - let fold f t a = match t with + let fold f t a = match t.Node.node with | Finite s -> E.fold f s a | CoFinite _ -> raise InfiniteSet - let for_all f = function + let for_all f t = match t.Node.node with | Finite s -> E.for_all f s | CoFinite _ -> raise InfiniteSet - let exists f = function + let exists f t = match t.Node.node with | Finite s -> E.exists f s | CoFinite _ -> raise InfiniteSet - let filter f = function - | Finite s -> Finite (E.filter f s) + let filter f t = match t.Node.node with + | Finite s -> finite (E.filter f s) | CoFinite _ -> raise InfiniteSet - let partition f = function - | Finite s -> let a,b = E.partition f s in Finite a,Finite b + let partition f t = match t.Node.node with + | Finite s -> let a,b = E.partition f s in finite a,finite b | CoFinite _ -> raise InfiniteSet - let cardinal = function + let cardinal t = match t.Node.node with | Finite s -> E.cardinal s | CoFinite _ -> raise InfiniteSet - let elements = function + let elements t = match t.Node.node with | Finite s -> E.elements s | CoFinite _ -> raise InfiniteSet let from_list l = - Finite(List.fold_left (fun x a -> E.add a x ) E.empty l) + finite (List.fold_left (fun x a -> E.add a x ) E.empty l) - let choose = function + let choose t = match t.Node.node with Finite s -> E.choose s | _ -> raise InfiniteSet - let equal a b = - match a,b with - | Finite x, Finite y | CoFinite x, CoFinite y -> E.equal x y - | _ -> false + let equal = (==) + + let hash t = t.Node.key - let hash = - function Finite x -> (E.hash x) - | CoFinite x -> ( ~-(E.hash x) land max_int) + let uid t = t.Node.id + - let positive = - function + let positive t = + match t.Node.node with | Finite x -> x | _ -> E.empty - let negative = - function + let negative t = + match t.Node.node with | CoFinite x -> x | _ -> E.empty - let inj_positive t = Finite t - let inj_negative t = CoFinite t + let inj_positive t = finite t + let inj_negative t = cofinite t end diff --git a/finiteCofinite.mli b/finiteCofinite.mli index 72b1aec..b8d8c1d 100644 --- a/finiteCofinite.mli +++ b/finiteCofinite.mli @@ -33,11 +33,12 @@ module type S = val choose : t -> elt val hash : t -> int val equal : t -> t -> bool + val uid : t -> int val positive : t -> set val negative : t -> set val inj_positive : set -> t val inj_negative : set -> t end -module Make : functor (E : Sigs.Set) -> S with type elt = E.elt and type set = E.t +module Make : functor (E : Ptset.S) -> S with type elt = E.elt and type set = E.t diff --git a/main.ml b/main.ml index b30eff2..cb890e7 100644 --- a/main.ml +++ b/main.ml @@ -51,10 +51,10 @@ let main v query_string output = in (* let _ = Printf.eprintf "Timing //keyword :" in let r = time (test_loop v) (Tag.tag "keyword") in - let _ = Printf.eprintf "Count is %i\n%!" r in + let _ = Printf.eprintf "Count is %i\n%!" r in *) let _ = Printf.eprintf "Timing //keyword 2:" in let r = time (test_loop2 v) (Tag.tag "keyword") in - let _ = Printf.eprintf "Count is %i\n%!" r in *) + let _ = Printf.eprintf "Count is %i\n%!" r in XPath.Ast.print Format.err_formatter query; Format.fprintf Format.err_formatter "\n%!"; Printf.eprintf "Compiling query : "; @@ -105,7 +105,7 @@ let main v query_string output = begin let _ = Gc.full_major();Gc.compact() in let _ = Printf.eprintf "%!" in - let _ = Gc.set (disabled_gc) in +(* let _ = Gc.set (disabled_gc) in *) if !Options.backward && ((snd test_list) != `NOTHING )then let r = time (bottom_up_count auto v )(snd test_list) in diff --git a/tagSet.ml b/tagSet.ml index 74784cf..ba3f431 100644 --- a/tagSet.ml +++ b/tagSet.ml @@ -15,3 +15,18 @@ let attribute = singleton Tag.attribute let star = diff any (cup pcdata attribute) let node = neg attribute +let print ppf t = + let print_set s = + Format.fprintf ppf "{"; + Ptset.Int.iter + (fun t -> Format.fprintf ppf "'%s' " (Tag.to_string t)) + s; + Format.fprintf ppf "}" + in + if is_finite t then + if is_empty t then + Format.fprintf ppf "∅" else print_set (positive t) + else + Format.fprintf ppf "Σ"; + if not (is_any t) then + (Format.fprintf ppf "\\" ; print_set (negative t)) diff --git a/tagSet.mli b/tagSet.mli index a232c0a..825757a 100644 --- a/tagSet.mli +++ b/tagSet.mli @@ -12,3 +12,4 @@ val pcdata : t val attribute : t val star : t val node : t +val print : Format.formatter -> t -> unit diff --git a/utils.ml b/utils.ml index ce99932..658d84e 100644 --- a/utils.ml +++ b/utils.ml @@ -13,7 +13,7 @@ THEN DEFINE HPARAM = 65599 DEFINE HPARAM2 = 4303228801 DEFINE HPARAM3 = 282287506116799 - + DEFINE HPARAM4 = 71034040046345985 ELSE DEFINE WORDSIZE = 32 DEFINE HALFWORDSIZE = 16 @@ -23,13 +23,14 @@ ELSE DEFINE HPARAM = 65599 DEFINE HPARAM2 = 8261505 DEFINE HPARAM3 = 780587199 - + DEFINE HPARAM4 = 549173308 END DEFINE HASHINT2 (x,y) = ((x)+HPARAM*(y)) DEFINE HASHINT3 (x,y,z) = ((x) + (y) * HPARAM + (z) * HPARAM2) DEFINE HASHINT4 (x,y,z,t) = ((x) + (y) * HPARAM + (z)*HPARAM2 + (t)* HPARAM3) +DEFINE HASHINT5 (x,y,z,t,u) = ((x) + (y) * HPARAM + (z)*HPARAM2 + (t)* HPARAM3 + (u)*HPARAM4) DEFINE PRIME1 = 7 DEFINE PRIME2 = 19 -- 2.17.1 From be1caa5c46009c13241cc48ed34a36ee2936ef87 Mon Sep 17 00:00:00 2001 From: kim Date: Sat, 6 Jun 2009 03:46:01 +0000 Subject: [PATCH 10/16] Safety commit git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@428 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 42 +++++++--- ata.ml | 156 ++++++++++++++++++++++++++++-------- ata.mli | 10 ++- main.ml | 41 +++++++--- tag.ml | 4 + tag.mli | 4 + tests/test.xml | 3 +- tests/xpathmark-queries.txt | 14 ++++ tree.ml | 120 ++++++++++++++++++++++++--- tree.mli | 8 ++ utils.ml | 11 ++- 11 files changed, 346 insertions(+), 67 deletions(-) create mode 100644 tests/xpathmark-queries.txt diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 9df73d6..a57e0c6 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -29,7 +29,7 @@ extern "C" { #define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) )) #define NOT_IMPLEMENTED(s) (caml_failwith(s)) #define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) -#define HSET(x) ((std::unordered_set*)((* (XMLTree**) Data_custom_val(x)))) +#define HSET(x) ((std::unordered_set*)((* (std::unordered_set**) Data_custom_val(x)))) #define TEXTCOLLECTION(x) #define TREENODEVAL(i) ((treeNode) (Int_val(i))) #define XMLTREE_ROOT 0 @@ -248,6 +248,13 @@ extern "C" CAMLprim value caml_xml_tree_is_first_child(value tree, value id){ extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){ return(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_closing(value tree, value id){ + return(Val_int (XMLTREE(tree)->Closing(TREENODEVAL(id)))); +} +extern "C" CAMLprim value caml_xml_tree_is_open(value tree, value id){ + return(Val_bool (XMLTREE(tree)->IsOpen(TREENODEVAL(id)))); +} + extern "C" CAMLprim value caml_xml_tree_first_element(value tree, value id){ return(Val_int (XMLTREE(tree)->FirstElement(TREENODEVAL(id)))); } @@ -285,12 +292,14 @@ extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, return(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); } - - extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){ return(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id))))); } +extern "C" CAMLprim value caml_xml_tree_my_text_unsafe(value tree, value id){ + return(Val_int((XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(id))))); +} + extern "C" CAMLprim value caml_xml_tree_text_xml_id(value tree, value id){ return(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id))))); } @@ -320,6 +329,10 @@ extern "C" CAMLprim value caml_xml_tree_subtree_size(value tree,value id){ return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_subtree_elements(value tree,value id){ + return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(id)))); +} + extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ CAMLparam2(tree,str); @@ -382,31 +395,38 @@ extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ CAMLreturn (tuple); } -extern "C" CAMLprim value caml_result_set_create(value size){ - CAMLparam1(size); +extern "C" value caml_result_set_create(value size){ results* res = (results*) malloc(sizeof(results)); - results r = createResults (Int_val(size)); + results r = createResults (Int_val(size)); res->n = r.n; res->lgn = r.lgn; res->tree = r.tree; - CAMLreturn ((value) (res)); + return ((value) (res)); } extern "C" CAMLprim value caml_result_set_set(value result,value p){ - CAMLparam2(result,p); + CAMLparam1(p); + results r; setResult ( *((results*) result), Int_val(p)); CAMLreturn (Val_unit); } extern "C" CAMLprim value caml_result_set_clear(value result,value p1,value p2){ - CAMLparam3(result,p1,p2); + CAMLparam2(p1,p2); clearRange ( *((results*) result), Int_val(p1), Int_val(p2)); CAMLreturn (Val_unit); } extern "C" CAMLprim value caml_result_set_next(value result,value p){ - CAMLparam2(result,p); - CAMLreturn (Val_int(nextResult(*((results*) result), Int_val(p)))); + CAMLparam1(p); + results r; + r = *( (results *) result); + CAMLreturn (Val_int(nextResult(r, Int_val(p)))); } +extern "C" CAMLprim value caml_xml_tree_print(value tree,value node,value fd){ + CAMLparam3(tree,node,fd); + XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node)); + CAMLreturn(Val_unit); +} diff --git a/ata.ml b/ata.ml index 9eb5311..3741b56 100644 --- a/ata.ml +++ b/ata.ml @@ -328,12 +328,12 @@ let eval_form_bool = in loop f -module FTable = Hashtbl.Make( struct - type t = Tag.t*Formlist.t*StateSet.t*StateSet.t - let equal (tg1,f1,s1,t1) (tg2,f2,s2,t2) = - tg1 == tg2 && f1 == f2 && s1 == s2 && t1 == t2;; - let hash (tg,f,s,t) = HASHINT4(tg,Formlist.uid f ,StateSet.uid s,StateSet.uid t);; - end) +module FTable = Hashtbl.Make(struct + type t = Tag.t*Formlist.t*StateSet.t*StateSet.t + let equal (tg1,f1,s1,t1) (tg2,f2,s2,t2) = + tg1 == tg2 && f1 == f2 && s1 == s2 && t1 == t2;; + let hash (tg,f,s,t) = HASHINT4(tg,Formlist.uid f ,StateSet.uid s,StateSet.uid t);; + end) let h_f = FTable.create BIG_H_SIZE @@ -395,6 +395,8 @@ let tags_of_state a q = val map : ( elt -> elt) -> t -> t val length : t -> int val merge : (bool*bool*bool*bool) -> elt -> t -> t -> t + val mk_quick_tag_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> Tag.t -> (elt -> elt -> 'a*t array) + val mk_quick_star_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> (elt -> elt -> 'a*t array) end module Integer : ResultSet = @@ -416,6 +418,13 @@ let tags_of_state a q = if mark then 1+res1+res2 else res1+res2 else 0 + let mk_quick_tag_loop _ sl ss tree tag = (); + fun t ctx -> + (sl, Array.make ss (Tree.subtree_tags tree tag t)) + let mk_quick_star_loop _ sl ss tree = (); + fun t ctx -> + (sl, Array.make ss (Tree.subtree_elements tree t)) + end module IdSet : ResultSet = @@ -470,35 +479,99 @@ let tags_of_state a q = else { node = (Concat(res1.node,res2.node)); length = res1.length + res2.length ;} - else empty - - + else empty + let mk_quick_tag_loop f _ _ _ _ = f + let mk_quick_star_loop f _ _ _ = f end - module GResult = struct - type t + module GResult(Doc : sig val doc : Tree.t end) = struct + type bits type elt = [` Tree] Tree.node - external create_empty : int -> t = "caml_result_set_create" - external set : t -> int -> t = "caml_result_set_set" - external next : t -> int -> int = "caml_result_set_next" - external clear : t -> int -> int -> unit = "caml_result_set_clear" - let empty = create_empty 100000000 + external create_empty : int -> bits = "caml_result_set_create" + external set : bits -> int -> unit = "caml_result_set_set" + external next : bits -> int -> int = "caml_result_set_next" + external clear : bits -> elt -> elt -> unit = "caml_result_set_clear" + + type t = + { segments : elt list; + bits : bits; + } + + let ebits = + let size = (Tree.subtree_size Doc.doc Tree.root) in + create_empty (size*2+1) + + let empty = { segments = []; + bits = ebits } - let cons e t = set t (Obj.magic e) - let concat _ t = t + let cons e t = + let rec loop l = match l with + | [] -> { bits = (set t.bits (Obj.magic e);t.bits); + segments = [ e ] } + | p::r -> + if Tree.is_binary_ancestor Doc.doc e p then + loop r + else + { bits = (set t.bits (Obj.magic e);t.bits); + segments = e::l } + in + loop t.segments + + let concat t1 t2 = + if t2.segments == [] then t1 + else + if t1.segments == [] then t2 + else + let h2 = List.hd t2.segments in + let rec loop l = match l with + | [] -> t2.segments + | p::r -> + if Tree.is_binary_ancestor Doc.doc p h2 then + l + else + p::(loop r) + in + { bits = t1.bits; + segments = loop t1.segments + } + let iter f t = let rec loop i = if i == -1 then () - else (f (Obj.magic i);loop (next t i)) - in loop 0 + else (f ((Obj.magic i):elt);loop (next t.bits i)) + in loop (next t.bits 0) let fold _ _ _ = failwith "noop" let map _ _ = failwith "noop" - let length t = let cpt = ref ~-1 in + let length t = let cpt = ref 0 in iter (fun _ -> incr cpt) t; !cpt let merge (rb,rb1,rb2,mark) elt t1 t2 = - if mark then (set t1 (Obj.magic elt) ; t1) else t1 - + if rb then +(* let _ = Printf.eprintf "Lenght before merging is %i %i\n" + (List.length t1.segments) (List.length t2.segments) + in *) + match t1.segments,t2.segments with + [],[] -> if mark then cons elt empty else empty + | [p],[] when rb1 -> if mark then cons elt t1 else t1 + | [], [p] when rb2 -> if mark then cons elt t2 else t2 + | [x],[y] when rb1 && rb2 -> if mark then cons elt empty else + concat t1 t2 + | _,_ -> + let t1 = if rb1 then t1 else + (List.iter (fun idx -> clear t1.bits idx (Tree.closing Doc.doc idx)) t1.segments;empty) + and t2 = if rb2 then t2 else + (List.iter (fun idx -> clear t2.bits idx (Tree.closing Doc.doc idx)) t2.segments;empty) + in + (if mark then cons elt (concat t1 t2) + else concat t1 t2) + else + let _ = + List.iter (fun idx -> clear t1.bits idx (Tree.closing Doc.doc idx)) t1.segments; + List.iter (fun idx -> clear t2.bits idx (Tree.closing Doc.doc idx)) t2.segments + in + empty + let mk_quick_tag_loop f _ _ _ _ = f + let mk_quick_star_loop f _ _ _ = f end module Run (RS : ResultSet) = struct @@ -574,7 +647,11 @@ END (fun (_,t) -> let _,_,_,f,_ = Transition.node t in StateSet.mem s ((fun (_,_,x) -> x) (access (Formula.st f)))) (Hashtbl.find a.trans s) - + let is_final_marking a s = + List.exists (fun (_,t) -> let _,_,m,f,_ = Transition.node t in m&& (Formula.is_true f)) + (Hashtbl.find a.trans s) + + let decide a c_label l_label dir_states dir = let l = StateSet.fold @@ -841,12 +918,22 @@ END | _,`NIL -> ( match f_kind with |`TAG(tag') -> - (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res - (loop_tag tag' (first t) llist t )) - | `ANY -> + let default = fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res + (loop_tag tag' (first t) llist t ) + in + let cf = SList.hd llist in + if (slot_size == 1) && StateSet.is_singleton cf + then + let s = StateSet.choose cf in + if (Algebra.is_rec a s fst) && (Algebra.is_rec a s snd) + && (Algebra.is_final_marking a s) + then RS.mk_quick_subtree default llist 1 tree tag' + else default + else default + | _ -> (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res (loop (first t) llist t )) - | _ -> assert false) + ) | `NIL,_ -> ( match n_kind with |`TAG(tag') -> @@ -860,11 +947,10 @@ END (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag' (next t ctx) rlist ctx ) empty_res) - | `ANY -> + | _ -> (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) empty_res) - - | _ -> assert false) + ) | `TAG(tag1),`TAG(tag2) -> (fun t ctx -> @@ -904,7 +990,7 @@ END eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) (loop (first t) llist t )) - | _ -> assert false + in let cont = D_IF_( (fun t ctx -> let a,b = cont t ctx in @@ -1154,3 +1240,9 @@ END let bottom_up_count a t k = let module RI = Run(Integer) in Integer.length (RI.run_bottom_up a t k) + module Test (Doc : sig val doc : Tree.t end) = + struct + module Results = GResult(Doc) + let top_down a t = let module R = Run(Results) in (R.run_top_down a t) + end + diff --git a/ata.mli b/ata.mli index 649a496..159bd78 100644 --- a/ata.mli +++ b/ata.mli @@ -93,12 +93,20 @@ module type ResultSet = val map : (elt -> elt) -> t -> t val length : t -> int val merge : (bool*bool*bool*bool)-> elt -> t -> t -> t + val mk_quick_tag_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> Tag.t -> (elt -> elt -> 'a*t array) + val mk_quick_star_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> (elt -> elt -> 'a*t array) end module IdSet : ResultSet -module GResult : ResultSet +module GResult (Doc : sig val doc : Tree.t end) : ResultSet val top_down_count : 'a t -> Tree.t -> int val top_down : 'a t -> Tree.t -> IdSet.t val bottom_up_count : 'a t -> Tree.t -> [> `CONTAINS of 'b | `TAG of Tag.t ] -> int + +module Test (Doc : sig val doc : Tree.t end ) : +sig + module Results : ResultSet + val top_down : 'a t -> Tree.t -> Results.t +end diff --git a/main.ml b/main.ml index cb890e7..19bdcf6 100644 --- a/main.ml +++ b/main.ml @@ -14,6 +14,7 @@ let disabled_gc = { Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 } let hash x = 131*x/(x-1+1) + let test_loop tree tag = let t' = Tree.tagged_desc tree tag Tree.root in let f = Hashtbl.create 4096 @@ -25,6 +26,20 @@ let test_loop tree tag = in Hashtbl.add f (hash 101) g; (Hashtbl.find f (hash 101)) t' Tree.root + +let test_full tree = + let root = Tree.root in + let fin = Tree.closing tree root in + let rec loop t = if t <= fin then + let tag = Tree.tag tree t in +(* let _ = Tag.to_string tag in *) + if tag == Tag.pcdata then (ignore (Tree.get_text tree t)); + let t = (Obj.magic ((Obj.magic t) + 1)) in + loop t + in + loop root + + let test_loop2 tree tag = let t' = Tree.tagged_desc tree tag Tree.root in let f = Hashtbl.create 4096 @@ -51,10 +66,12 @@ let main v query_string output = in (* let _ = Printf.eprintf "Timing //keyword :" in let r = time (test_loop v) (Tag.tag "keyword") in - let _ = Printf.eprintf "Count is %i\n%!" r in *) + let _ = Printf.eprintf "Count is %i\n%!" r in let _ = Printf.eprintf "Timing //keyword 2:" in let r = time (test_loop2 v) (Tag.tag "keyword") in - let _ = Printf.eprintf "Count is %i\n%!" r in + let _ = Printf.eprintf "Count is %i\n%!" r in *) + let _ = Printf.eprintf "Timing //node() :" in + let _ = time (test_full) v in XPath.Ast.print Format.err_formatter query; Format.fprintf Format.err_formatter "\n%!"; Printf.eprintf "Compiling query : "; @@ -120,8 +137,10 @@ let main v query_string output = let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r in () else - let result = time (top_down auto) v in - let rcount = IdSet.length result in + let module GR = Ata.Test(struct let doc = v end) in + let result = time (GR.top_down auto) v in + let _ = Printf.eprintf "Counting results " in + let rcount = time (GR.Results.length) result in Printf.eprintf "Number of nodes in the result set : %i\n" rcount; Printf.eprintf "\n%!"; begin @@ -130,13 +149,13 @@ let main v query_string output = | Some f -> Printf.eprintf "Serializing results : "; time( fun () -> - let oc = open_out f in - output_string oc "\n"; - IdSet.iter (fun t -> - Tree.print_xml_fast oc v t; - output_char oc '\n'; - - ) result) (); + (*let oc = open_out f in *) + let oc = Unix.openfile f [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in + (*output_string oc "\n";*) + GR.Results.iter (fun t -> + Tree.print_xml_fast3 v t oc; + (*output_char oc '\n'; *) + ) result) (); end; end; let _ = Gc.set enabled_gc in diff --git a/tag.ml b/tag.ml index f9c0275..ec80df4 100644 --- a/tag.ml +++ b/tag.ml @@ -21,6 +21,10 @@ let document_node = 0 let attribute = 1 let pcdata = 2 let attribute_data= 3 +let document_node_close = 4 +let attribute_close = 5 +let pcdata_close = 6 +let attribute_data_close= 7 let pool = Weak.create 1 diff --git a/tag.mli b/tag.mli index bc3ee55..1fcd5b1 100644 --- a/tag.mli +++ b/tag.mli @@ -6,6 +6,10 @@ val document_node : t val attribute : t val pcdata : t val attribute_data : t +val document_node_close : t +val attribute_close : t +val pcdata_close : t +val attribute_data_close : t val init : pool -> unit diff --git a/tests/test.xml b/tests/test.xml index 8f0e255..c4c1de0 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,6 +1,5 @@ - - + diff --git a/tests/xpathmark-queries.txt b/tests/xpathmark-queries.txt new file mode 100644 index 0000000..12ed53e --- /dev/null +++ b/tests/xpathmark-queries.txt @@ -0,0 +1,14 @@ +#XPATHMARK A queries +/site/closed_auctions/closed_auction/annotation/description/text/keyword +/descendant::closed_auction/descendant::keyword +/site/closed_auctions/closed_auction/descendant::keyword +/site/closed_auctions/closed_auction[annotation/description/text/keyword]/date +/site/closed_auctions/closed_auction[descendant::keyword]/date +/site/people/person[profile/gender and profile/age]/name +/site/people/person[phone or homepage]/name +/site/people/person[address and (phone or homepage) and (creditcard or profile)]/name + +#XPATHMARK B queries +#/site/open_auctions/open_auction/bidder[following-sibling::bidder] +#/descendant::person[profile/@income]/name + diff --git a/tree.ml b/tree.ml index 0a31e83..1ff2082 100644 --- a/tree.ml +++ b/tree.ml @@ -28,7 +28,7 @@ let equal_node : 'a node -> 'a node -> bool = (==) external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shredder_uri" external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string" - +external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print" external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save" external tree_load : Unix.file_descr -> tree = "caml_xml_tree_load" @@ -53,6 +53,7 @@ external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_coll external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" +external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" let tree_is_nil x = equal_node x nil @@ -60,6 +61,8 @@ external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_par external tree_parent_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" "noalloc" (*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *) external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc" +external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc" +external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc" external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc" external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc" @@ -80,6 +83,7 @@ let tree_is_last t n = equal_node nil (tree_next_sibling t n) (*external tree_prev_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" "noalloc" *) external tree_my_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" "noalloc" +external tree_my_text_unsafe : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text_unsafe" "noalloc" (*external tree_next_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" "noalloc" *) external tree_doc_ids : tree -> [`Tree ] node -> [`Text ] node * [`Text ] node = "caml_xml_tree_doc_ids" @@ -129,6 +133,7 @@ type t = { ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; } let subtree_size t i = tree_subtree_size t.doc i +let subtree_elements t i = tree_subtree_elements t.doc i let text_size t = text_size t.doc module MemUnion = Hashtbl.Make (struct @@ -421,9 +426,18 @@ let dump_node t = nts (inode t) let is_left t n = tree_is_first_child t.doc n + + let is_below_right t n1 n2 = tree_is_ancestor t.doc (tree_parent t.doc n1) n2 && not (tree_is_ancestor t.doc n1 n2) + +let is_binary_ancestor t n1 n2 = + let p = tree_parent t.doc n1 in + let fin = tree_closing t.doc p in + n2 > n1 && n2 < fin +(* (is_below_right t n1 n2) || + (tree_is_ancestor t.doc n1 n2) *) let parent t n = tree_parent t.doc n @@ -474,6 +488,10 @@ let select_foll_ctx t = fun ts -> let v = (ptset_to_vector ts) in (); fun n ctx -> tree_select_foll_below t.doc n v ctx +let closing t n = tree_closing t.doc n +let is_open t n = tree_is_open t.doc n +let get_text_id t n = tree_my_text t.doc n + let last_idx = ref 0 let array_find a i j = let l = Array.length a in @@ -489,8 +507,92 @@ let array_find a i j = let count t s = text_count t.doc s - - let print_xml_fast outc tree t = + let stack = ref [] + let init_stack () = stack := [] + let push x = stack:= x::!stack + let peek () = match !stack with + p::_ -> p + | _ -> failwith "peek" + let pop () = match !stack with + p::r -> stack:=r; p + | _ -> failwith "pop" + + let next t = nodei ( (inode t) + 1 ) + let next2 t = nodei ( (inode t) + 2 ) + let next3 t = nodei ( (inode t) + 3 ) + + let print_xml_fast2 = + let _ = init_stack () in + let h = Hashtbl.create MED_H_SIZE in + let tag_str t = try Hashtbl.find h t with + Not_found -> let s = Tag.to_string t in + Hashtbl.add h t s;s + in + let h_att = Hashtbl.create MED_H_SIZE in + let att_str t = try Hashtbl.find h_att t with + Not_found -> let s = Tag.to_string t in + let attname = String.sub s 3 ((String.length s) -3) in + Hashtbl.add h_att t attname;attname + in fun outc tree t -> + let tree = tree.doc in + let fin = tree_closing tree t in + let rec loop_tag t tag = + if t <= fin then + if tree_is_open tree t then + (* opening tag *) + if tag == Tag.pcdata then + begin + output_string outc (text_get_cached_text tree (tree_my_text_unsafe tree t)); + loop (next2 t) (* skip closing $ *) + end + else + let tagstr = tag_str tag in + let _ = output_char outc '<'; + output_string outc tagstr in + let t' = next t in + if tree_is_open tree t' then + let _ = push tagstr in + let tag' = tree_tag_id tree t' in + if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in + output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag') + else (* closing with no content *) + let _ = output_string outc "/>" in + loop (next t') + else + begin + (* closing tag *) + output_string outc "'; + loop (next t); + end + and loop t = loop_tag t (tree_tag_id tree t) + and loop_attr t n = + if tree_is_open tree t then + let attname = att_str (tree_tag_id tree t) in + output_char outc ' '; + output_string outc attname; + output_string outc "=\""; + let t = next t in (* open $@ *) + output_string outc (text_get_cached_text tree (tree_my_text_unsafe tree t)); + output_char outc '"'; + loop_attr (next3 t) (n+1) + else + next t (* close @ *) + in loop t + + let print_xml_fast = + let h = Hashtbl.create MED_H_SIZE in + let tag_str t = try Hashtbl.find h t with + Not_found -> let s = Tag.to_string t in + Hashtbl.add h t s;s + in + let h_att = Hashtbl.create MED_H_SIZE in + let att_str t = try Hashtbl.find h_att t with + Not_found -> let s = Tag.to_string t in + let attname = String.sub s 3 ((String.length s) -3) in + Hashtbl.add h_att t attname;attname + in fun outc tree t -> let rec loop ?(print_right=true) t = if t != nil then @@ -498,18 +600,18 @@ let array_find a i j = if tagid==Tag.pcdata then begin - let tid = tree_my_text tree.doc t in + let tid = tree_my_text_unsafe tree.doc t in output_string outc (text_get_cached_text tree.doc tid); if print_right then loop (next_sibling tree t); end else - let tagstr = Tag.to_string tagid in + let tagstr = tag_str tagid in let l = first_child tree t and r = next_sibling tree t in output_char outc '<'; - output_string outc tagstr; + output_string outc tagstr; if l == nil then output_string outc "/>" else if (tag tree l) == Tag.attribute then @@ -537,10 +639,9 @@ let array_find a i j = and loop_attributes a = if a != nil then - let s = (Tag.to_string (tag tree a)) in - let attname = String.sub s 3 ((String.length s) -3) in + let attname = att_str (tag tree a) in let fsa = first_child tree a in - let tid = tree_my_text tree.doc fsa in + let tid = tree_my_text_unsafe tree.doc fsa in output_char outc ' '; output_string outc attname; output_string outc "=\""; @@ -612,3 +713,4 @@ let dump_tree fmt tree = ;; +let print_xml_fast3 t = tree_print_xml_fast3 t.doc diff --git a/tree.mli b/tree.mli index ba0fc44..e4bbd2d 100644 --- a/tree.mli +++ b/tree.mli @@ -56,6 +56,8 @@ val select_foll_ctx : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -> [ ` val count : t -> string -> int val print_xml_fast : out_channel -> t -> [ `Tree ] node -> unit +val print_xml_fast2 : out_channel -> t -> [ `Tree ] node -> unit +val print_xml_fast3 : t -> [ `Tree ] node -> Unix.file_descr -> unit val tags_children : t -> Tag.t -> Ptset.Int.t val tags_below : t -> Tag.t -> Ptset.Int.t @@ -63,6 +65,7 @@ val tags_siblings : t -> Tag.t -> Ptset.Int.t val tags_after : t -> Tag.t -> Ptset.Int.t val tags : t -> Tag.t -> Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t val is_below_right : t -> [`Tree] node -> [`Tree] node -> bool +val is_binary_ancestor : t -> [`Tree] node -> [`Tree] node -> bool val is_left : t -> [`Tree] node -> bool val binary_parent : t -> [`Tree] node -> [`Tree] node @@ -73,8 +76,13 @@ val text_size : t -> int val doc_ids : t -> [`Tree] node -> [`Text] node * [`Text] node val subtree_tags : t -> Tag.t -> [`Tree] node -> int val get_text : t -> [`Tree] node -> string +val get_text_id : t -> [`Tree] node -> [`Text ] node val dump_tree : Format.formatter -> t -> unit val subtree_size : t -> [`Tree] node -> int +val subtree_elements : t -> [`Tree] node -> int val text_below : t -> [`Tree] node -> [`Tree] node val text_next : t -> [`Tree] node -> [`Tree] node -> [`Tree] node + +val closing : t -> [`Tree] node -> [`Tree] node +val is_open : t -> [`Tree] node -> bool diff --git a/utils.ml b/utils.ml index 658d84e..76c0432 100644 --- a/utils.ml +++ b/utils.ml @@ -70,7 +70,7 @@ let read_procmem () = let l = ref [] ;; let init_timer() = l := [];; -let time f x = +let time_mem f x = let s1 = read_procmem() in let t1 = Unix.gettimeofday () in let r = f x in @@ -83,6 +83,15 @@ let time f x = Printf.eprintf "Mem use after: %s\n\n\n%!" s2; r ;; +let time f x = + let t1 = Unix.gettimeofday () in + let r = f x in + let t2 = Unix.gettimeofday () in + let t = (1000. *. (t2 -. t1)) in + l:= t::!l; + Printf.eprintf " %fms\n%!" t ; + r +;; let total_time () = List.fold_left (+.) 0. !l;; END (* IFNDEF UTILS__ML__ *) -- 2.17.1 From f067dedce5a6b8386010aec45475ad42d24f1d5b Mon Sep 17 00:00:00 2001 From: kim Date: Sat, 6 Jun 2009 03:53:43 +0000 Subject: [PATCH 11/16] fixed compilation bug git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@430 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- ata.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ata.ml b/ata.ml index 3741b56..99f5c97 100644 --- a/ata.ml +++ b/ata.ml @@ -927,7 +927,7 @@ END let s = StateSet.choose cf in if (Algebra.is_rec a s fst) && (Algebra.is_rec a s snd) && (Algebra.is_final_marking a s) - then RS.mk_quick_subtree default llist 1 tree tag' + then RS.mk_quick_tag_loop default llist 1 tree tag' else default else default | _ -> -- 2.17.1 From 329088598ab63bc2d67ff0dfc4f54e90f5d4f283 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 19 Aug 2009 01:59:25 +0000 Subject: [PATCH 12/16] safety commit git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@551 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 34 +++++- ata.ml | 301 ++++++++++++++++++++++++++++++++++++++---------- ata.mli | 4 +- main.ml | 4 +- tree.ml | 25 ++-- 5 files changed, 286 insertions(+), 82 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index a57e0c6..85cc813 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -25,6 +25,7 @@ extern "C" { #include #include #include "results.h" +#include #define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) )) #define NOT_IMPLEMENTED(s) (caml_failwith(s)) @@ -143,8 +144,6 @@ extern "C" CAMLprim value caml_xml_tree_load(value fd){ catch (char const * msg){ CAMLRAISEMSG(msg); }; } - - extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){ CAMLparam2(tree,id); CAMLlocal1(str); @@ -256,7 +255,7 @@ extern "C" CAMLprim value caml_xml_tree_is_open(value tree, value id){ } extern "C" CAMLprim value caml_xml_tree_first_element(value tree, value id){ - return(Val_int (XMLTREE(tree)->FirstElement(TREENODEVAL(id)))); + return(Val_int (XMLTREE(Field(tree,0))->FirstElement(TREENODEVAL(id)))); } extern "C" CAMLprim value caml_xml_tree_tagged_child(value tree, value id, value tag){ @@ -268,7 +267,7 @@ extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ } extern "C" CAMLprim value caml_xml_tree_next_element(value tree, value id){ - return(Val_int (XMLTREE(tree)->NextElement(TREENODEVAL(id)))); + return(Val_int (XMLTREE(Field(tree,0))->NextElement(TREENODEVAL(id)))); } extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){ @@ -291,6 +290,9 @@ extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, value tag,value root){ return(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); } +extern "C" CAMLprim value caml_xml_tree_tagged_foll_before(value tree, value id, value tag,value root){ + return(Val_int (XMLTREE(tree)->TaggedFollBefore(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); +} extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){ return(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id))))); @@ -383,6 +385,11 @@ extern "C" CAMLprim value caml_xml_tree_select_foll_below(value tree, value node HSET(tags), TREENODEVAL(ctx)))); } +extern "C" CAMLprim value caml_xml_tree_select_foll_before(value tree, value node, value tags,value ctx){ + return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node), + HSET(tags), + TREENODEVAL(ctx)))); +} extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ @@ -406,7 +413,6 @@ extern "C" value caml_result_set_create(value size){ extern "C" CAMLprim value caml_result_set_set(value result,value p){ CAMLparam1(p); - results r; setResult ( *((results*) result), Int_val(p)); CAMLreturn (Val_unit); } @@ -430,3 +436,21 @@ extern "C" CAMLprim value caml_xml_tree_print(value tree,value node,value fd){ CAMLreturn(Val_unit); } +extern "C" CAMLprim value caml_set_tag_bits(value result, value tag, value tree, value node) +{ + CAMLparam3(tag,tree,node); + results r; + XMLTree *t = XMLTREE(Field(tree,0)); + treeNode opening = TREENODEVAL(node); + treeNode closing = t->Closing(opening); + TagType target_tag = Int_val(tag); + treeNode first = t->TaggedDesc(opening,target_tag); + r = *( (results *) result); + opening = first; + while (opening != NULLT){ + setResult(r,opening); + opening = t->TaggedFollBefore(opening,target_tag,closing); + }; + CAMLreturn(Val_int(first)); +} + diff --git a/ata.ml b/ata.ml index 99f5c97..eed81bc 100644 --- a/ata.ml +++ b/ata.ml @@ -1,6 +1,6 @@ INCLUDE "debug.ml" INCLUDE "utils.ml" - +open Camlp4.Struct type jump_kind = [ `TAG of Tag.t | `CONTAINS of string | `NOTHING ] (* Todo : move elsewhere *) @@ -337,6 +337,7 @@ module FTable = Hashtbl.Make(struct let h_f = FTable.create BIG_H_SIZE +type merge_conf = NO | MARK | ONLY1 | ONLY2 | ONLY12 | MARK1 | MARK2 | MARK12 let eval_formlist tag s1 s2 fl = let rec loop fl = @@ -355,8 +356,32 @@ let eval_formlist tag s1 s2 fl = else res in FTable.add h_f (tag,fl,s1,s2) r;r | Formlist.Nil -> StateSet.empty,(false,false,false,false) - in loop fl - + in + let r,conf = loop fl + in + r,(match conf with + | (false,_,_,_) -> NO + | (_,false,false,false) -> NO + | (_,true,false,false) -> ONLY1 + | (_,false,true,false) -> ONLY2 + | (_,true,true,false) -> ONLY12 + | (_,false,false,true) -> MARK + | (_,true,false,true) -> MARK1 + | (_,false,true,true) -> MARK2 + | _ -> MARK12) + +let bool_of_merge conf = + match conf with + | NO -> false,false,false,false + | ONLY1 -> true,true,false,false + | ONLY2 -> true,false,true,false + | ONLY12 -> true,true,true,false + | MARK -> true,false,false,true + | MARK1 -> true,true,false,true + | MARK2 -> true,false,true,true + | MARK12 -> true,true,true,true + + let tags_of_state a q = Hashtbl.fold (fun p l acc -> @@ -394,7 +419,7 @@ let tags_of_state a q = val fold : ( elt -> 'a -> 'a) -> t -> 'a -> 'a val map : ( elt -> elt) -> t -> t val length : t -> int - val merge : (bool*bool*bool*bool) -> elt -> t -> t -> t + val merge : merge_conf -> elt -> t -> t -> t val mk_quick_tag_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> Tag.t -> (elt -> elt -> 'a*t array) val mk_quick_star_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> (elt -> elt -> 'a*t array) end @@ -403,6 +428,7 @@ let tags_of_state a q = struct type t = int type elt = [`Tree] Tree.node + let empty = 0 let cons _ x = x+1 let concat x y = x + y @@ -410,7 +436,8 @@ let tags_of_state a q = let fold _ _ _ = failwith "fold not implemented" let map _ _ = failwith "map not implemented" let length x = x - let merge (rb,rb1,rb2,mark) t res1 res2 = + let merge2 conf t res1 res2 = + let rb,rb1,rb2,mark = conf in if rb then let res1 = if rb1 then res1 else 0 and res2 = if rb2 then res2 else 0 @@ -418,6 +445,17 @@ let tags_of_state a q = if mark then 1+res1+res2 else res1+res2 else 0 + let merge conf t res1 res2 = + match conf with + NO -> 0 + | MARK -> 1 + | ONLY12 -> res1+res2 + | ONLY1 -> res1 + | ONLY2 -> res2 + | MARK12 -> res1+res2+1 + | MARK1 -> res1+1 + | MARK2 -> res2+1 + let mk_quick_tag_loop _ sl ss tree tag = (); fun t ctx -> (sl, Array.make ss (Tree.subtree_tags tree tag t)) @@ -427,7 +465,7 @@ let tags_of_state a q = end - module IdSet : ResultSet = + module IdSet : ResultSet= struct type elt = [`Tree] Tree.node type node = Nil @@ -469,17 +507,21 @@ let tags_of_state a q = in { l with node = loop l.node } - let merge (rb,rb1,rb2,mark) t res1 res2 = - if rb then - let res1 = if rb1 then res1 else empty - and res2 = if rb2 then res2 else empty - in - if mark then { node = Cons(t,(Concat(res1.node,res2.node))); - length = res1.length + res2.length + 1;} - else - { node = (Concat(res1.node,res2.node)); - length = res1.length + res2.length ;} - else empty + let merge conf t res1 res2 = + match conf with + NO -> empty + | MARK -> cons t empty + | ONLY1 -> res1 + | ONLY2 -> res2 + | ONLY12 -> { node = (Concat(res1.node,res2.node)); + length = res1.length + res2.length ;} + | MARK12 -> { node = Cons(t,(Concat(res1.node,res2.node))); + length = res1.length + res2.length + 1;} + | MARK1 -> { node = Cons(t,res1.node); + length = res1.length + 1;} + | MARK2 -> { node = Cons(t,res2.node); + length = res2.length + 1;} + let mk_quick_tag_loop f _ _ _ _ = f let mk_quick_star_loop f _ _ _ = f end @@ -490,7 +532,7 @@ let tags_of_state a q = external set : bits -> int -> unit = "caml_result_set_set" external next : bits -> int -> int = "caml_result_set_next" external clear : bits -> elt -> elt -> unit = "caml_result_set_clear" - + external set_tag_bits : bits -> Tag.t -> Tree.t -> elt -> elt = "caml_set_tag_bits" type t = { segments : elt list; bits : bits; @@ -540,37 +582,74 @@ let tags_of_state a q = else (f ((Obj.magic i):elt);loop (next t.bits i)) in loop (next t.bits 0) - let fold _ _ _ = failwith "noop" + let fold f t acc = + let rec loop i acc = + if i == -1 then acc + else loop (next t.bits i) (f ((Obj.magic i):elt) acc) + in loop (next t.bits 0) acc + let map _ _ = failwith "noop" let length t = let cpt = ref 0 in iter (fun _ -> incr cpt) t; !cpt + let clear_bits t = + let rec loop l = match l with + [] -> () + | idx::ll -> + clear t.bits idx (Tree.closing Doc.doc idx); loop ll + in + loop t.segments;empty + let merge (rb,rb1,rb2,mark) elt t1 t2 = if rb then (* let _ = Printf.eprintf "Lenght before merging is %i %i\n" (List.length t1.segments) (List.length t2.segments) - in *) + in *) match t1.segments,t2.segments with [],[] -> if mark then cons elt empty else empty - | [p],[] when rb1 -> if mark then cons elt t1 else t1 - | [], [p] when rb2 -> if mark then cons elt t2 else t2 - | [x],[y] when rb1 && rb2 -> if mark then cons elt empty else + | [_],[] when rb1 -> if mark then cons elt t1 else t1 + | [], [_] when rb2 -> if mark then cons elt t2 else t2 + | [_],[_] when rb1 && rb2 -> if mark then cons elt empty else concat t1 t2 - | _,_ -> - let t1 = if rb1 then t1 else - (List.iter (fun idx -> clear t1.bits idx (Tree.closing Doc.doc idx)) t1.segments;empty) - and t2 = if rb2 then t2 else - (List.iter (fun idx -> clear t2.bits idx (Tree.closing Doc.doc idx)) t2.segments;empty) + | _ -> + let t1 = if rb1 then t1 else clear_bits t1 + and t2 = if rb2 then t2 else clear_bits t2 in (if mark then cons elt (concat t1 t2) else concat t1 t2) else - let _ = - List.iter (fun idx -> clear t1.bits idx (Tree.closing Doc.doc idx)) t1.segments; - List.iter (fun idx -> clear t2.bits idx (Tree.closing Doc.doc idx)) t2.segments - in - empty - let mk_quick_tag_loop f _ _ _ _ = f + let _ = clear_bits t1 in + clear_bits t2 + + let merge conf t t1 t2 = + match t1.segments,t2.segments,conf with + | _,_,NO -> let _ = clear_bits t1 in clear_bits t2 + | [],[],(MARK1|MARK2|MARK12|MARK) -> cons t empty + | [],[],_ -> empty + | [_],[],(ONLY1|ONLY12) -> t1 + | [_],[],(MARK1|MARK12) -> cons t t1 + | [],[_],(ONLY2|ONLY12) -> t2 + | [],[_],(MARK2|MARK12) -> cons t t2 + | [_],[_],ONLY12 -> concat t1 t2 + | [_],[_],MARK12 -> cons t empty + | _,_,MARK -> let _ = clear_bits t2 in cons t (clear_bits t1) + | _,_,ONLY1 -> let _ = clear_bits t2 in t1 + | _,_,ONLY2 -> let _ = clear_bits t1 in t2 + | _,_,ONLY12 -> concat t1 t2 + | _,_,MARK1 -> let _ = clear_bits t2 in cons t t1 + | _,_,MARK2 -> let _ = clear_bits t1 in cons t t2 + | _,_,MARK12 -> cons t (concat t1 t2) + + let mk_quick_tag_loop _ sl ss tree tag = (); + fun t _ -> + let res = empty in + let first = set_tag_bits empty.bits tag tree t in + let res = + if first == Tree.nil then res else + cons first res + in + (sl, Array.make ss res) + let mk_quick_star_loop f _ _ _ = f end module Run (RS : ResultSet) = @@ -737,7 +816,9 @@ END let equal = (==) let hash t = t.SList.Node.id end) - module TransCache = + + + module TransCacheOld = struct type 'a t = Obj.t array SListTable.t let create n = SListTable.create n @@ -769,6 +850,32 @@ END end + module TransCache = + struct + external get : 'a array -> int ->'a = "%array_unsafe_get" + external set : 'a array -> int -> 'a -> unit = "%array_unsafe_set" + type fun_tree = [`Tree] Tree.node -> [`Tree] Tree.node -> SList.t*RS.t array + type t = fun_tree array array + let dummy_cell = [||] + let create n = Array.create n dummy_cell + let dummy = fun _ _-> assert false + let find h tag slist = + let tab = get h slist.SList.Node.id in + if tab == dummy_cell then raise Not_found + else + let res = get tab tag in + if res == dummy then raise Not_found else res + + let add (h : t) tag slist (data : fun_tree) = + let tab = get h slist.SList.Node.id in + let tab = if tab == dummy_cell then + let x = Array.create 10000 dummy in + (set h slist.SList.Node.id x;x) + else tab + in + set tab tag data + end + let td_trans = TransCache.create 10000 (* should be number of tags *number of states^2 in the document *) @@ -777,21 +884,12 @@ END | n -> loop (SList.cons StateSet.empty acc) (n-1) in loop SList.nil n - - module Fold2ResOld = Hashtbl.Make(struct - type t = Formlistlist.t*SList.t*SList.t - let hash (f,s,t) = HASHINT3(f.Formlistlist.Node.id, - s.SList.Node.id, - t.SList.Node.id) - let equal (a,b,c) (d,e,f) = a==d && b == e && c == f - end) - module FllTable = Hashtbl.Make (struct type t = Formlistlist.t let equal = (==) let hash t = t.Formlistlist.Node.id end) - module Fold2Res = + module Fold2ResOld = struct type 'a t = 'a SListTable.t SListTable.t FllTable.t let create n = Array.init 10000 (fun _ -> FllTable.create n) @@ -820,7 +918,64 @@ END SListTable.add hs2 s2 data end - let h_fold2 = Fold2Res.create SMALL_H_SIZE + module Fold2Res = struct + external get : 'a array -> int ->'a = "%array_unsafe_get" + external set : 'a array -> int -> 'a -> unit = "%array_unsafe_set" + external field1 : 'a -> 'b = "%field1" + type 'a t = 'a array array array array + let dummy = [||] + let dummy_val : 'a = + let v = Obj.repr ((),2,()) in + Obj.magic v + + let create n = Array.create n dummy + + let find h tag fl s1 s2 = + let af = get h tag in + if af == dummy then raise Not_found + else + let as1 = get af fl.Formlistlist.Node.id in + if as1 == dummy then raise Not_found + else + let as2 = get as1 s1.SList.Node.id in + if as2 == dummy then raise Not_found + else let v = get as2 s2.SList.Node.id in + if field1 v == 2 then raise Not_found + else v + + let add h tag fl s1 s2 data = + let af = + let x = get h tag in + if x == dummy then + begin + let y = Array.make 10000 dummy in + set h tag y;y + end + else x + in + let as1 = + let x = get af fl.Formlistlist.Node.id in + if x == dummy then + begin + let y = Array.make 10000 dummy in + set af fl.Formlistlist.Node.id y;y + end + else x + in + let as2 = + let x = get as1 s1.SList.Node.id in + if x == dummy then + begin + let y = Array.make 10000 dummy_val in + set as1 s1.SList.Node.id y;y + end + else x + in + set as2 s2.SList.Node.id data + end + + + let h_fold2 = Fold2Res.create 10000 let top_down ?(noright=false) a tree t slist ctx slot_size = let pempty = empty_size slot_size in @@ -836,7 +991,7 @@ END r,res with Not_found -> - let btab = Array.make slot_size (false,false,false,false) in + let btab = Array.make slot_size NO in let rec fold l1 l2 fll i aq ab = match fll.Formlistlist.Node.node, l1.SList.Node.node, @@ -845,10 +1000,10 @@ END | Formlistlist.Cons(fl,fll), SList.Cons(s1,ll1), SList.Cons(s2,ll2) -> - let r',((b,_,_,_) as flags) = eval_formlist tag s1 s2 fl in - let _ = btab.(i) <- flags + let r',conf = eval_formlist tag s1 s2 fl in + let _ = btab.(i) <- conf in - fold ll1 ll2 fll (i+1) (SList.cons r' aq) (b||ab) + fold ll1 ll2 fll (i+1) (SList.cons r' aq) ((conf!=NO)||ab) | _ -> aq,ab in let r,b = fold sl1 sl2 fll 0 SList.nil false in @@ -914,10 +1069,11 @@ END let cont = match f_kind,n_kind with | `NIL,`NIL -> + Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__); (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res empty_res) | _,`NIL -> ( match f_kind with - |`TAG(tag') -> + |`TAG(tag') -> let default = fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res (loop_tag tag' (first t) llist t ) in @@ -927,10 +1083,17 @@ END let s = StateSet.choose cf in if (Algebra.is_rec a s fst) && (Algebra.is_rec a s snd) && (Algebra.is_final_marking a s) - then RS.mk_quick_tag_loop default llist 1 tree tag' - else default - else default + then + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in + RS.mk_quick_tag_loop default llist 1 tree tag' + else + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in + default + else + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in + default | _ -> + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res (loop (first t) llist t )) ) @@ -942,50 +1105,63 @@ END if t == Tree.nil then empty_res else let res2 = loop (next t ctx) ctx in eval_fold2_slist fl_list t tag res2 empty_res - in loop + in Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__);loop else + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag' (next t ctx) rlist ctx ) empty_res) | _ -> + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) empty_res) ) - | `TAG(tag1),`TAG(tag2) -> + | `TAG(tag1),`TAG(tag2) -> + let _ = Printf.eprintf "Using %i %s %s\n" (Loc.start_line __LOCATION__) + (Tag.to_string tag1) + (Tag.to_string tag2) + in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag2 (next t ctx) rlist ctx ) (loop_tag tag1 (first t) llist t )) | `TAG(tag'),`ANY -> + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) (loop_tag tag' (first t) llist t )) | `ANY,`TAG(tag') -> + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag' (next t ctx) rlist ctx ) (loop (first t) llist t )) | `ANY,`ANY -> + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in if SList.equal slist rlist && SList.equal slist llist then let rec loop t ctx = if t == Tree.nil then empty_res else - let r1 = loop (first t) t + let r1 = loop (first t) t and r2 = loop (next t ctx) ctx in eval_fold2_slist fl_list t (Tree.tag tree t) r2 r1 - in loop - else + in + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in + loop + else + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) (loop (first t) llist t )) | _,_ -> + let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) @@ -998,8 +1174,8 @@ END (a,b) ) ,cont) in - (TransCache.add td_trans tag slist (Obj.repr cont) ;cont) - in (Obj.magic cont) t ctx + (TransCache.add td_trans tag slist cont ;cont) + in cont t ctx in (if noright then loop_no_right else loop) t slist ctx @@ -1082,7 +1258,7 @@ END match SList.node sl,fl with |SList.Nil,[] -> acc |SList.Cons(s,sll), formlist::fll -> - let r',(rb,rb1,rb2,mark) = + let r',mcnf = let key = SList.hash sl,Formlist.hash formlist,dir in try Hashtbl.find h_fold key @@ -1092,6 +1268,7 @@ END else eval_formlist tag Ptset.Int.empty s formlist in (Hashtbl.add h_fold key res;res) in + let (rb,rb1,rb2,mark) = bool_of_merge mcnf in if rb && ((dir&&rb1)|| ((not dir) && rb2)) then let acc = diff --git a/ata.mli b/ata.mli index 159bd78..1565caa 100644 --- a/ata.mli +++ b/ata.mli @@ -4,7 +4,7 @@ sig include Sigs.T with type t = int val make : unit -> t end - +type merge_conf = NO | MARK | ONLY1 | ONLY2 | ONLY12 | MARK1 | MARK2 | MARK12 module StateSet : sig include Ptset.S with type elt = int @@ -92,7 +92,7 @@ module type ResultSet = val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a val map : (elt -> elt) -> t -> t val length : t -> int - val merge : (bool*bool*bool*bool)-> elt -> t -> t -> t + val merge : merge_conf -> elt -> t -> t -> t val mk_quick_tag_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> Tag.t -> (elt -> elt -> 'a*t array) val mk_quick_star_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> (elt -> elt -> 'a*t array) end diff --git a/main.ml b/main.ml index 19bdcf6..dc2cb3e 100644 --- a/main.ml +++ b/main.ml @@ -69,9 +69,9 @@ let main v query_string output = let _ = Printf.eprintf "Count is %i\n%!" r in let _ = Printf.eprintf "Timing //keyword 2:" in let r = time (test_loop2 v) (Tag.tag "keyword") in - let _ = Printf.eprintf "Count is %i\n%!" r in *) + let _ = Printf.eprintf "Count is %i\n%!" r in let _ = Printf.eprintf "Timing //node() :" in - let _ = time (test_full) v in + let _ = time (test_full) v in *) XPath.Ast.print Format.err_formatter query; Format.fprintf Format.err_formatter "\n%!"; Printf.eprintf "Compiling query : "; diff --git a/tree.ml b/tree.ml index 1ff2082..26dc770 100644 --- a/tree.ml +++ b/tree.ml @@ -20,6 +20,11 @@ type tree type 'a node = private int type node_kind = [`Text | `Tree ] +type t = { + doc : tree; + ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; +} + external inode : 'a node -> int = "%identity" external nodei : int -> 'a node = "%identity" let compare_node x y = (inode x) - (inode y) @@ -63,10 +68,10 @@ external tree_parent_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tr external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc" external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc" external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc" -external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" +external tree_first_element : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc" external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc" -external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc" +external tree_next_element : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc" external tree_tagged_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_sibling" "noalloc" external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc" @@ -79,8 +84,7 @@ external tree_tag_id : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" let tree_is_last t n = equal_node nil (tree_next_sibling t n) - -(*external tree_prev_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" "noalloc" *) + external tree_my_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" "noalloc" external tree_my_text_unsafe : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text_unsafe" "noalloc" @@ -100,6 +104,7 @@ external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_x external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc" external tree_tagged_desc : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" "noalloc" external tree_tagged_foll_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below" "noalloc" +external tree_tagged_foll_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_before" "noalloc" external tree_subtree_tags : tree -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" "noalloc" @@ -112,6 +117,7 @@ external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] n external tree_select_foll_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_foll_sibling" "noalloc" external tree_select_desc : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_desc" "noalloc" external tree_select_foll_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_below" "noalloc" +external tree_select_foll_before : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_before" "noalloc" module HPtset = Hashtbl.Make(Ptset.Int) @@ -128,10 +134,7 @@ let ptset_to_vector s = HPtset.add vector_htbl s v; v -type t = { - doc : tree; - ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; -} + let subtree_size t i = tree_subtree_size t.doc i let subtree_elements t i = tree_subtree_elements t.doc i let text_size t = text_size t.doc @@ -442,7 +445,7 @@ let is_binary_ancestor t n1 n2 = let parent t n = tree_parent t.doc n let first_child t = (); fun n -> tree_first_child t.doc n -let first_element t = (); fun n -> tree_first_element t.doc n +let first_element t = (); fun n -> tree_first_element t n (* these function will be called in two times: first partial application on the tag, then application of the tag and the tree, then application of @@ -456,7 +459,7 @@ let select_child t = fun ts -> fun n -> tree_select_child t.doc n v let next_sibling t = (); fun n -> tree_next_sibling t.doc n -let next_element t = (); fun n -> tree_next_element t.doc n +let next_element t = (); fun n -> tree_next_element t n let tagged_sibling t tag = (); fun n -> tree_tagged_sibling t.doc n tag @@ -465,7 +468,7 @@ let select_sibling t = fun ts -> fun n -> tree_select_foll_sibling t.doc n v let next_sibling_ctx t = (); fun n _ -> tree_next_sibling t.doc n -let next_element_ctx t = (); fun n _ -> tree_next_element t.doc n +let next_element_ctx t = (); fun n _ -> tree_next_element t n let tagged_sibling_ctx t tag = (); fun n _ -> tree_tagged_sibling t.doc n tag let select_sibling_ctx t = fun ts -> -- 2.17.1 From cf6d366b25132eea7b0f1966c11d034d748af0fa Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 19 Aug 2009 01:59:43 +0000 Subject: [PATCH 13/16] Safety before Techfest git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@552 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 76 +++++++++++++++++++++++++++++++++++++++++++------ ata.ml | 37 +++++++----------------- ata.mli | 2 ++ main.ml | 50 ++++++++++++++++++++++++++------ results.c | 17 +++++++++++ results.h | 2 ++ tests/testbu.sh | 18 +++++------- tree.ml | 21 +++++++++----- tree.mli | 6 ++-- ulexer.ml | 2 +- utils.ml | 2 +- xPath.ml | 12 ++++++-- xPath.mli | 2 +- 13 files changed, 176 insertions(+), 71 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 85cc813..35d03b8 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -128,15 +128,15 @@ extern "C" CAMLprim value caml_xml_tree_save(value tree,value fd){ CAMLreturn (Val_unit); } -extern "C" CAMLprim value caml_xml_tree_load(value fd){ - CAMLparam1(fd); +extern "C" CAMLprim value caml_xml_tree_load(value fd, value load_tc,value sf){ + CAMLparam3(fd,load_tc,sf); CAMLlocal1(doc); XMLTree * tree; try { - tree = XMLTree::Load(Int_val(fd)); - doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); - memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); - CAMLreturn(doc); + tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf)); + doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); + memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); + CAMLreturn(doc); } catch (const xmlpp::internal_error& e){ CAMLRAISEMSG(e.what()); } catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } @@ -187,13 +187,13 @@ extern "C" CAMLprim value caml_text_collection_count(value tree,value str){ } bool docId_comp(DocID x, DocID y) { return x < y; }; + extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){ CAMLparam2(tree,str); CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); std::vector results; results = XMLTREE(tree)->Contains(cstr); - //free(cstr); std::sort(results.begin(), results.end(), docId_comp); size_t s = results.size(); resarray = caml_alloc_tuple(s); @@ -204,12 +204,65 @@ extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){ CAMLreturn (resarray); } +extern "C" CAMLprim value caml_text_collection_equals(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results; + results = XMLTREE(tree)->Equal(cstr); + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); + + for (size_t i = 0; i < s ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + CAMLreturn (resarray); +} +extern "C" CAMLprim value caml_text_collection_startswith(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results; + results = XMLTREE(tree)->Prefix(cstr); + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); + + for (size_t i = 0; i < s ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + CAMLreturn (resarray); +} +extern "C" CAMLprim value caml_text_collection_endswith(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results; + results = XMLTREE(tree)->Suffix(cstr); + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); + + for (size_t i = 0; i < s ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + CAMLreturn (resarray); +} + + + extern "C" CAMLprim value caml_text_collection_unsorted_contains(value tree,value str){ CAMLparam2(tree,str); + CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); std::vector results; results = XMLTREE(tree)->Contains(cstr); - CAMLreturn (Val_unit); + resarray = caml_alloc_tuple(results.size()); + for (size_t i = 0; i < results.size() ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + CAMLreturn (resarray); } @@ -430,6 +483,13 @@ extern "C" CAMLprim value caml_result_set_next(value result,value p){ CAMLreturn (Val_int(nextResult(r, Int_val(p)))); } +extern "C" CAMLprim value caml_result_set_count(value result){ + CAMLparam0(); + results r; + r = *( (results *) result); + CAMLreturn (Val_int(countResult(r))); +} + extern "C" CAMLprim value caml_xml_tree_print(value tree,value node,value fd){ CAMLparam3(tree,node,fd); XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node)); diff --git a/ata.ml b/ata.ml index eed81bc..a5d4a3f 100644 --- a/ata.ml +++ b/ata.ml @@ -531,7 +531,9 @@ let tags_of_state a q = external create_empty : int -> bits = "caml_result_set_create" external set : bits -> int -> unit = "caml_result_set_set" external next : bits -> int -> int = "caml_result_set_next" + external count : bits -> int = "caml_result_set_count" external clear : bits -> elt -> elt -> unit = "caml_result_set_clear" + external set_tag_bits : bits -> Tag.t -> Tree.t -> elt -> elt = "caml_set_tag_bits" type t = { segments : elt list; @@ -589,8 +591,9 @@ let tags_of_state a q = in loop (next t.bits 0) acc let map _ _ = failwith "noop" - let length t = let cpt = ref 0 in - iter (fun _ -> incr cpt) t; !cpt + (*let length t = let cpt = ref 0 in + iter (fun _ -> incr cpt) t; !cpt *) + let length t = count t.bits let clear_bits t = let rec loop l = match l with @@ -1069,7 +1072,6 @@ END let cont = match f_kind,n_kind with | `NIL,`NIL -> - Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__); (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res empty_res) | _,`NIL -> ( match f_kind with @@ -1084,16 +1086,10 @@ END if (Algebra.is_rec a s fst) && (Algebra.is_rec a s snd) && (Algebra.is_final_marking a s) then - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in RS.mk_quick_tag_loop default llist 1 tree tag' - else - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in - default - else - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in - default + else default + else default | _ -> - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res (loop (first t) llist t )) ) @@ -1105,44 +1101,35 @@ END if t == Tree.nil then empty_res else let res2 = loop (next t ctx) ctx in eval_fold2_slist fl_list t tag res2 empty_res - in Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__);loop + in loop else - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag' (next t ctx) rlist ctx ) empty_res) | _ -> - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) empty_res) ) | `TAG(tag1),`TAG(tag2) -> - let _ = Printf.eprintf "Using %i %s %s\n" (Loc.start_line __LOCATION__) - (Tag.to_string tag1) - (Tag.to_string tag2) - in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag2 (next t ctx) rlist ctx ) (loop_tag tag1 (first t) llist t )) | `TAG(tag'),`ANY -> - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) (loop_tag tag' (first t) llist t )) | `ANY,`TAG(tag') -> - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag' (next t ctx) rlist ctx ) (loop (first t) llist t )) | `ANY,`ANY -> - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in if SList.equal slist rlist && SList.equal slist llist then let rec loop t ctx = @@ -1151,17 +1138,13 @@ END and r2 = loop (next t ctx) ctx in eval_fold2_slist fl_list t (Tree.tag tree t) r2 r1 - in - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in - loop + in loop else - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) (loop (first t) llist t )) | _,_ -> - let _ = Printf.eprintf "Using %i\n" (Loc.start_line __LOCATION__) in (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) @@ -1415,7 +1398,7 @@ END let top_down_count a t = let module RI = Run(Integer) in Integer.length (RI.run_top_down a t) let top_down a t = let module RI = Run(IdSet) in (RI.run_top_down a t) let bottom_up_count a t k = let module RI = Run(Integer) in Integer.length (RI.run_bottom_up a t k) - + let bottom_up a t k = let module RI = Run(IdSet) in (RI.run_bottom_up a t k) module Test (Doc : sig val doc : Tree.t end) = struct diff --git a/ata.mli b/ata.mli index 1565caa..a5fb3e1 100644 --- a/ata.mli +++ b/ata.mli @@ -104,6 +104,8 @@ val top_down_count : 'a t -> Tree.t -> int val top_down : 'a t -> Tree.t -> IdSet.t val bottom_up_count : 'a t -> Tree.t -> [> `CONTAINS of 'b | `TAG of Tag.t ] -> int +val bottom_up : + 'a t -> Tree.t -> [> `CONTAINS of 'b | `TAG of Tag.t ] -> IdSet.t module Test (Doc : sig val doc : Tree.t end ) : sig diff --git a/main.ml b/main.ml index dc2cb3e..c889833 100644 --- a/main.ml +++ b/main.ml @@ -81,14 +81,14 @@ let main v query_string output = let jump_to = match contains with None -> (max_int,`NOTHING) - | Some s -> + | Some ((op,s)) -> let r = Tree.count v s in Printf.eprintf "%i documents in the TextCollection\n" (Tree.text_size v); Printf.eprintf "Global count is %i, using " r; if r < !Options.tc_threshold then begin Printf.eprintf "TextCollection contains\nCalling global contains : "; - time (Tree.init_contains v) s; + time (Tree.init_textfun op v) s; end else begin Printf.eprintf "Naive contains\nCalling global contains : "; @@ -124,21 +124,40 @@ let main v query_string output = let _ = Printf.eprintf "%!" in (* let _ = Gc.set (disabled_gc) in *) if !Options.backward && ((snd test_list) != `NOTHING )then - - let r = time (bottom_up_count auto v )(snd test_list) in + if !Options.count_only then + let r = time_mem (bottom_up_count auto v )(snd test_list) in let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r in () + else begin + let r = time_mem (bottom_up auto v )(snd test_list) in + let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" (IdSet.length r) + in + match output with + + | None -> () + | Some f -> + Printf.eprintf "Serializing results : "; + time( fun () -> + (*let oc = open_out f in *) + let oc = Unix.openfile f [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in + (*output_string oc "\n";*) + IdSet.iter (fun t -> + Tree.print_xml_fast3 v t oc; + (*output_char oc '\n'; *) + ) r) (); + end + else let _ = if !Options.backward then Printf.eprintf "WARNING: couldn't find a jumping point, running top-down\n" in if !Options.count_only then - let r = time ( top_down_count auto ) v in + let r = time_mem ( top_down_count auto ) v in let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r in () else let module GR = Ata.Test(struct let doc = v end) in - let result = time (GR.top_down auto) v in + let result = time_mem (GR.top_down auto) v in let _ = Printf.eprintf "Counting results " in let rcount = time (GR.Results.length) result in Printf.eprintf "Number of nodes in the result set : %i\n" rcount; @@ -152,10 +171,23 @@ let main v query_string output = (*let oc = open_out f in *) let oc = Unix.openfile f [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in (*output_string oc "\n";*) + let t1 = ref (Unix.gettimeofday()) in + let count = ref 1 in + let old_count = ref 1 in GR.Results.iter (fun t -> - Tree.print_xml_fast3 v t oc; + incr count; + begin + if (!count mod 15) == 0 + then + let t2 = Unix.gettimeofday() in + let _ = Printf.eprintf "Printing %i elements in %f ms\n" + (!count - !old_count) (1000. *.(t2 -. !t1)) + in + ( old_count := !count; t1 := Unix.gettimeofday()) + end; + Tree.print_xml_fast3 v t oc; (*output_char oc '\n'; *) - ) result) (); + ) result) (); end; end; let _ = Gc.set enabled_gc in @@ -169,7 +201,7 @@ let v = then begin Printf.eprintf "Loading from file : "; - time (Tree.load ~sample:!Options.sample_factor ) + time (Tree.load ~sample:!Options.sample_factor ~load_text:(not !Options.count_only)) !Options.input_file; end else diff --git a/results.c b/results.c index 161f295..7570e5a 100644 --- a/results.c +++ b/results.c @@ -26,8 +26,11 @@ results createResults (int n) { fprintf(stderr,"Error, redefine logW as %i and recompile\n",lg(W)-1); exit(1); } + R.n = 2*n-1; R.lgn = lg(n); + fprintf(stderr,"Size of the result set : %i elements, %li kB\n", n, + (((R.n+W-1)/W)*sizeof(int)/1024)); R.tree = (int*) malloc (((R.n+W-1)/W)*sizeof(int)); clearBit(R.tree,0); // clear all return R; @@ -206,6 +209,20 @@ int nextResult (results R, int p) // returns pos of next(p) or -1 if none return unconv(answ,R.n,R.lgn); } +// Naively implemented by kim + +unsigned int countResult(results R) { + unsigned int result = 0; + int i = 0; + while ( i != -1 && i < R.n) { + result ++; + i = unconv(nextLarger(R.tree,R.n,conv(i+1,R.n,R.lgn),0,R.lgn),R.n,R.lgn); + }; + return result; + +} + + static void prnspace (int k) { while (k--) putchar(' '); diff --git a/results.h b/results.h index 020c188..c504894 100644 --- a/results.h +++ b/results.h @@ -13,6 +13,8 @@ void freeResults (results R); // returns 0/1 telling whether result p is not/is present in R int readResult (results R, int p); +unsigned int countResult (results R); + // inserts result p into R void setResult (results R, int p); diff --git a/tests/testbu.sh b/tests/testbu.sh index 752804d..49f6191 100755 --- a/tests/testbu.sh +++ b/tests/testbu.sh @@ -4,18 +4,14 @@ declare -a QUERY QUERY[0]="/descendant::MedlineCitation/descendant::*/contains('brain')" QUERY[1]="/descendant::MedlineCitation/descendant::Country/contains('AUSTRALIA')" QUERY[2]="/descendant::Country/contains('AUSTRALIA')" -QUERY[3]="/descendant::*/contains('AUSTRALIA')" -QUERY[4]="/descendant::*/contains('?')" -QUERY[5]="/descendant::MedlineCitation/descendant::*/contains('?')" +QUERY[3]="/descendant::*/contains('1930')" +QUERY[4]="/descendant::MedlineCitation/descendant::*/contains('1930')" +QUERY[5]="/descendant::MedlineCitation/Article/AuthorList/Author/LastName/startswith('Bar')" +QUERY[6]="/descendant::MedlineCitation[MedlineJournalInfo/Country/endswith('LAND')]" -for ((i=0;i<=5;i++)) +for ((i=0;i<=6;i++)) do echo Running query "$i" : "${QUERY[$i]}" - ../main -b medline.srx "${QUERY[$i]}" > q_"$i".time 2>&1 & + ../main -f 0 -b medline_05.srx "${QUERY[$i]}" - while pidof main >/dev/null 2>&1 - do - cat /proc/`pidof main`/status | grep "VmRSS" >> q_"$i".mem - sleep 4 - done -done \ No newline at end of file +done diff --git a/tree.ml b/tree.ml index 26dc770..f21015d 100644 --- a/tree.ml +++ b/tree.ml @@ -35,7 +35,7 @@ external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shre external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string" external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print" external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save" -external tree_load : Unix.file_descr -> tree = "caml_xml_tree_load" +external tree_load : Unix.file_descr -> bool -> int -> tree = "caml_xml_tree_load" external nullt : unit -> 'a node = "caml_xml_tree_nullt" @@ -53,7 +53,10 @@ external text_is_contains : tree -> string -> bool = "caml_text_collection_is_co external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains" external text_count : tree -> string -> int = "caml_text_collection_count" external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" -external text_unsorted_contains : tree -> string -> unit = "caml_text_collection_unsorted_contains" +external text_startswith : tree -> string -> [`Text ] node array = "caml_text_collection_startswith" +external text_endswith : tree -> string -> [`Text ] node array = "caml_text_collection_endswith" +external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals" +external text_unsorted_contains : tree -> string -> [`Text ] node array = "caml_text_collection_unsorted_contains" external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" @@ -222,10 +225,14 @@ let in_array _ i = with Not_found -> false -let init_contains t s = - let a = text_contains t.doc s +let init_textfun f t s = + let a = match f with + | `CONTAINS -> text_contains t.doc s + | `STARTSWITH -> text_startswith t.doc s + | `ENDSWITH -> text_endswith t.doc s + | `EQUALS -> text_equals t.doc s in - Array.fast_sort (compare) a; + (*Array.fast_sort (compare) a; *) contains_array := a; Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array @@ -380,7 +387,7 @@ let save t str = close_out out_c ;; -let load ?(sample=64) str = +let load ?(sample=64) ?(load_text=true) str = let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in let in_c = Unix.in_channel_of_descr fd in let _ = set_binary_mode_in in_c true in @@ -407,7 +414,7 @@ let load ?(sample=64) str = let _ = Printf.eprintf "\nLoading tag table : " in let ntable = time (load_table) () in ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET); - let tree = { doc = tree_load fd; + let tree = { doc = tree_load fd load_text sample; ttable = ntable;} in close_in in_c; tree diff --git a/tree.mli b/tree.mli index e4bbd2d..8cffd18 100644 --- a/tree.mli +++ b/tree.mli @@ -1,13 +1,13 @@ type t -val init_contains : t -> string -> unit +val init_textfun : [ `CONTAINS | `STARTSWITH | `ENDSWITH | `EQUALS ] -> t -> string -> unit val init_naive_contains : t -> string -> unit val parse_xml_uri : string -> t val parse_xml_string : string -> t val save : t -> string -> unit -val load : ?sample:int -> string -> t +val load : ?sample:int -> ?load_text:bool -> string -> t val tag_pool : t -> Tag.pool @@ -71,7 +71,7 @@ val is_left : t -> [`Tree] node -> bool val binary_parent : t -> [`Tree] node -> [`Tree] node val count_contains : t -> string -> int -val unsorted_contains : t -> string -> unit +(* val unsorted_contains : t -> string -> unit *) val text_size : t -> int val doc_ids : t -> [`Tree] node -> [`Text] node * [`Text] node val subtree_tags : t -> Tag.t -> [`Tree] node -> int diff --git a/ulexer.ml b/ulexer.ml index a6ea991..84ae969 100644 --- a/ulexer.ml +++ b/ulexer.ml @@ -188,7 +188,7 @@ let parse_char lexbuf base i = let rec token = lexer | [' ' '\t'] -> token lexbuf | "text()" | "node()" | "and" | "not" | "or" - | "contains" | "contains_full" + | "contains" | "contains_full" | "endswith" | "startswith" | "equals" | "self" | "descendant" | "child" | "descendant-or-self" | "attribute" | "following-sibling" | "preceding-sibling" | "parent" | "ancestor" | "ancestor-or-self" | "preceding" | "following" diff --git a/utils.ml b/utils.ml index 76c0432..2e02bc5 100644 --- a/utils.ml +++ b/utils.ml @@ -80,7 +80,7 @@ let time_mem f x = l:= t::!l; Printf.eprintf " %fms\n%!" t ; Printf.eprintf "Mem use before: %s\n%!" s1; - Printf.eprintf "Mem use after: %s\n\n\n%!" s2; + Printf.eprintf "Final Mem: %s\n\n\n%!" s2; r ;; let time f x = diff --git a/xPath.ml b/xPath.ml index bef0336..2ac43b7 100644 --- a/xPath.ml +++ b/xPath.ml @@ -153,10 +153,16 @@ step : [ | [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ] | [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ] | [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [ - let _ = contains := Some(s) in (Child,TagSet.singleton Tag.pcdata, p)] + let _ = contains := Some((`CONTAINS,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] -| [ "contains_full"; "(" ; s = STRING ; ")";p=top_pred -> [ - let _ = contains := Some(s) in (Descendant,TagSet.singleton Tag.pcdata, p)] +| [ "equals"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some((`EQUALS,s)) in (Child,TagSet.singleton Tag.pcdata, p)] + ] +| [ "startswith"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some((`STARTSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)] + ] +| [ "endswith"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some((`ENDSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] | [ test = test; p = top_pred -> [(Child,test, p)] ] | [ att = ATT ; p = top_pred -> diff --git a/xPath.mli b/xPath.mli index 6d8d8a7..ad145f9 100644 --- a/xPath.mli +++ b/xPath.mli @@ -35,5 +35,5 @@ sig end module Compile : sig -val compile : ?querystring:string -> Ast.path -> 'a Ata.t * (Tag.t*Ata.StateSet.t) list * string option +val compile : ?querystring:string -> Ast.path -> 'a Ata.t * (Tag.t*Ata.StateSet.t) list * ([ `CONTAINS | `STARTSWITH | `ENDSWITH | `EQUALS ]*string) option end -- 2.17.1 From cad5e2e2831477cba1f6211c57b9a4cc5b58bd55 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 19 Aug 2009 21:10:21 +0000 Subject: [PATCH 14/16] Added benchmarking funtions, Need to debug symbol table generaion. git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@555 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 708 +++++++++++++++++++++++++++++++++--------------- ata.ml | 170 ++++++------ ata.mli | 2 +- main.ml | 14 +- results.c | 2 +- tag.ml | 2 +- tree.ml | 266 ++++++++++-------- tree.mli | 25 +- 8 files changed, 758 insertions(+), 431 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 35d03b8..c10edf1 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -8,6 +8,11 @@ * Date: 04/11/08 */ +/*** + * Conventions: + * functions never doing any allocation (non caml_alloc*, caml_copy_string,...) + * have NOALLOC in the comment and their external declaration can have "noalloc" + */ #include @@ -30,10 +35,13 @@ extern "C" { #define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) )) #define NOT_IMPLEMENTED(s) (caml_failwith(s)) #define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) -#define HSET(x) ((std::unordered_set*)((* (std::unordered_set**) Data_custom_val(x)))) +#define HSET(x) ((TagIdSet*)((* (TagIdSet**) Data_custom_val(x)))) #define TEXTCOLLECTION(x) #define TREENODEVAL(i) ((treeNode) (Int_val(i))) +#define TAGVAL(i) ((TagType) (Int_val(i))) #define XMLTREE_ROOT 0 +#define NoAlloc + static struct custom_operations ops; static struct custom_operations set_ops; @@ -52,7 +60,7 @@ extern "C" void caml_hset_finalize(value hblock){ return; } -extern "C" CAMLprim value caml_init_lib (value unit) { +extern "C" value caml_init_lib (value unit) { CAMLparam1(unit); if (!ops_initialized){ @@ -75,7 +83,7 @@ extern "C" CAMLprim value caml_init_lib (value unit) { CAMLreturn(Val_unit); } -extern "C" CAMLprim value caml_shredder_parse(XMLDocShredder *shredder){ +extern "C" value caml_shredder_parse(XMLDocShredder *shredder){ CAMLparam0(); CAMLlocal1(doc); XMLTree * tree; @@ -89,7 +97,7 @@ extern "C" CAMLprim value caml_shredder_parse(XMLDocShredder *shredder){ } -extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){ +extern "C" value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){ CAMLparam1(uri); CAMLlocal1(doc); char *fn = String_val(uri); @@ -105,7 +113,7 @@ extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, CAMLreturn (doc); } -extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value iet, value dtc){ +extern "C" value caml_call_shredder_string(value data,value sf, value iet, value dtc){ CAMLparam1(data); CAMLlocal1(doc); XMLDocShredder * shredder; @@ -122,18 +130,19 @@ extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value i CAMLreturn(doc); } -extern "C" CAMLprim value caml_xml_tree_save(value tree,value fd){ +extern "C" value caml_xml_tree_save(value tree,value fd){ CAMLparam2(tree,fd); XMLTREE(tree)->Save(Int_val(fd)); CAMLreturn (Val_unit); } -extern "C" CAMLprim value caml_xml_tree_load(value fd, value load_tc,value sf){ +extern "C" value caml_xml_tree_load(value fd, value load_tc,value sf){ CAMLparam3(fd,load_tc,sf); CAMLlocal1(doc); XMLTree * tree; try { tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf)); + printf("Pointer to tree is %p\n", (void*) tree); doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); CAMLreturn(doc); @@ -144,7 +153,16 @@ extern "C" CAMLprim value caml_xml_tree_load(value fd, value load_tc,value sf){ catch (char const * msg){ CAMLRAISEMSG(msg); }; } -extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){ + +/** + * Interface to the TextCollection + */ + +/** + * Utility functions + */ + +extern "C" value caml_text_collection_get_text(value tree, value id){ CAMLparam2(tree,id); CAMLlocal1(str); uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id)); @@ -152,310 +170,499 @@ extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){ CAMLreturn (str); } -extern "C" CAMLprim value caml_text_collection_get_cached_text(value tree, value id){ + +extern "C" value caml_text_collection_empty_text(value tree,value id){ CAMLparam2(tree,id); - CAMLlocal1(str); - char* txt = (char*) XMLTREE(tree)->GetText((DocID) Int_val(id)); - str = caml_copy_string(txt); - CAMLreturn (str); + CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id)))); } +bool docId_comp(DocID x, DocID y) { return x < y; }; -extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){ - CAMLparam2(tree,id); - CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id)))); +/** + * Existential queries + */ + +extern "C" value caml_text_collection_is_prefix(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_bool((int) XMLTREE(tree)->IsPrefix(cstr))); } -extern "C" CAMLprim value caml_text_collection_is_contains(value tree,value str){ +extern "C" value caml_text_collection_is_suffix(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_bool((int) XMLTREE(tree)->IsSuffix(cstr))); +} +extern "C" value caml_text_collection_is_equal(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_bool((int) XMLTREE(tree)->IsEqual(cstr))); +} +extern "C" value caml_text_collection_is_contains(value tree,value str){ CAMLparam2(tree,str); uchar * cstr = (uchar *) String_val(str); CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr))); } -extern "C" CAMLprim value caml_text_collection_count_contains(value tree,value str){ +extern "C" value caml_text_collection_is_lessthan(value tree,value str){ CAMLparam2(tree,str); uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr)))); - + CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsLessThan(cstr))); } -extern "C" CAMLprim value caml_text_collection_count(value tree,value str){ + + +/** + * Count Queries + */ + +/** + * Global counting + */ +extern "C" value caml_text_collection_count(value tree,value str){ CAMLparam2(tree,str); uchar * cstr = (uchar *) String_val(str); CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr)))); - CAMLreturn (Val_unit); - } -bool docId_comp(DocID x, DocID y) { return x < y; }; +extern "C" value caml_text_collection_count_prefix(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_int((XMLTREE(tree)->CountPrefix(cstr)))); +} -extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){ +extern "C" value caml_text_collection_count_suffix(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_int((XMLTREE(tree)->CountSuffix(cstr)))); +} + +extern "C" value caml_text_collection_count_equal(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_int((XMLTREE(tree)->CountEqual(cstr)))); +} + +extern "C" value caml_text_collection_count_contains(value tree,value str){ CAMLparam2(tree,str); - CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Contains(cstr); - std::sort(results.begin(), results.end(), docId_comp); - size_t s = results.size(); - resarray = caml_alloc_tuple(s); - - for (size_t i = 0; i < s ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); - }; - CAMLreturn (resarray); + CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr)))); } -extern "C" CAMLprim value caml_text_collection_equals(value tree,value str){ +extern "C" value caml_text_collection_count_lessthan(value tree,value str){ CAMLparam2(tree,str); - CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Equal(cstr); - std::sort(results.begin(), results.end(), docId_comp); - size_t s = results.size(); - resarray = caml_alloc_tuple(s); - - for (size_t i = 0; i < s ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); - }; - CAMLreturn (resarray); + CAMLreturn (Val_int((XMLTREE(tree)->CountLessThan(cstr)))); } -extern "C" CAMLprim value caml_text_collection_startswith(value tree,value str){ + +static value sort_alloc_array(std::vector results, value resarray){ + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); + for (size_t i = 0; i < s ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + return resarray; +} + +/** + * Full reporting queries + */ + +extern "C" value caml_text_collection_prefix(value tree,value str){ CAMLparam2(tree,str); CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Prefix(cstr); - std::sort(results.begin(), results.end(), docId_comp); - size_t s = results.size(); - resarray = caml_alloc_tuple(s); - - for (size_t i = 0; i < s ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); - }; - CAMLreturn (resarray); + std::vector results = XMLTREE(tree)->Prefix(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); } -extern "C" CAMLprim value caml_text_collection_endswith(value tree,value str){ + +extern "C" value caml_text_collection_suffix(value tree,value str){ CAMLparam2(tree,str); CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Suffix(cstr); - std::sort(results.begin(), results.end(), docId_comp); - size_t s = results.size(); - resarray = caml_alloc_tuple(s); - - for (size_t i = 0; i < s ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); - }; - CAMLreturn (resarray); + std::vector results = XMLTREE(tree)->Suffix(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); } +extern "C" value caml_text_collection_equals(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) strdup(String_val(str)); + std::vector results = XMLTREE(tree)->Equals(cstr); + free(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); +} +extern "C" value caml_text_collection_contains(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results = XMLTREE(tree)->Contains(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); +} -extern "C" CAMLprim value caml_text_collection_unsorted_contains(value tree,value str){ +extern "C" value caml_text_collection_lessthan(value tree,value str){ CAMLparam2(tree,str); CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Contains(cstr); - resarray = caml_alloc_tuple(results.size()); - for (size_t i = 0; i < results.size() ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); + std::vector results = XMLTREE(tree)->LessThan(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); +} + +/** Full reporting into a bit vector + */ + +extern "C" value caml_text_collection_prefix_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + std::vector results = XMLTREE(tree)->Prefix(cstr); + std::vector *bv = new std::vector(XMLTREE(tree)->Size(),false); + for (unsigned int i=0; i < results.size(); i++) + bv->at(XMLTREE(tree)->ParentNode(results[i]))=true; + free(cstr); + CAMLreturn ((value) bv); +} + +extern "C" value caml_text_collection_suffix_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + std::vector results = XMLTREE(tree)->Suffix(cstr); + std::vector *bv = new std::vector(XMLTREE(tree)->Size(),false); + for (unsigned int i=0; i < results.size(); i++) + bv->at(XMLTREE(tree)->ParentNode(results[i]))=true; + free(cstr); + CAMLreturn ((value) bv); +} + +extern "C" value caml_text_collection_equals_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + XMLTree* xt = XMLTREE(tree); + std::vector results = xt->Equals(cstr); + std::vector *bv = new std::vector(xt->Size(),false); + for (unsigned int i=0; i < results.size(); i++) + bv->at(xt->Parent(xt->ParentNode(results[i])))=true; + free(cstr); + CAMLreturn ((value) bv); +} + + +extern "C" value caml_text_collection_contains_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + XMLTree* xt = XMLTREE(tree); + std::vector results = xt->Contains(cstr); + std::vector *bv = new std::vector(xt->Size(),false); + for (unsigned int i=0; i < results.size(); i++){ + bv->at(xt->Parent(xt->ParentNode(results[i])))=true; + } + free(cstr); + CAMLreturn ((value) bv); +} + +extern "C" value caml_text_collection_contains_bv_update(value tree,value str,value vbv){ + CAMLparam3(tree,str,vbv); + uchar * cstr = (uchar *) strdup(String_val(str)); + XMLTree* xt = XMLTREE(tree); + std::vector results = xt->Contains(cstr); + std::vector *bv = (std::vector *) vbv; + for (unsigned int i=0; i < results.size(); i++){ + /** Hack for the Techfest demo */ + (*bv)[xt->Parent(xt->Parent(xt->ParentNode(results[i])))]=true; + } + free(cstr); + CAMLreturn ((value) bv); +} +extern "C" value caml_text_collection_contains_bv_update_list(value tree,value str,value acc,value vbv,value count){ + CAMLparam4(tree,str,acc,vbv); + CAMLlocal1(head); + uchar * cstr = (uchar *) strdup(String_val(str)); + XMLTree* xt = XMLTREE(tree); + std::vector results = xt->Contains(cstr); + std::vector *bv = (std::vector *) vbv; + treeNode idx; + int acc_count = Int_val(count); + for (unsigned int i=0; i < results.size(); i++){ + idx = xt->Parent(xt->Parent(xt->ParentNode(results[i]))); + if (!(*bv)[idx]) { + (*bv)[idx]=true; + head = caml_alloc_tuple(2); + caml_initialize(&Field(head,0),Val_int(idx)); + caml_initialize(&Field(head,1),acc); + acc=head; + acc_count++; + }; }; - CAMLreturn (resarray); + free(cstr); + head = caml_alloc_tuple(3); + caml_initialize(&Field(head,0),acc); + caml_initialize(&Field(head,1),(value) bv); + caml_initialize(&Field(head,2),Val_int(acc_count)); + CAMLreturn (head); } +extern "C" value caml_text_collection_lessthan_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + std::vector results = XMLTREE(tree)->LessThan(cstr); + std::vector *bv = new std::vector(XMLTREE(tree)->Size(),false); + for (unsigned int i=0; i < results.size(); i++) + bv->at(XMLTREE(tree)->ParentNode(results[i]))=true; + free(cstr); + CAMLreturn ((value) bv); +} + +/*************************************************************************/ + +/** + * XMLTree bindings + * All of the functions here call the _unsafe version and implement the logics themselves + * (test for NULLT and so on). This avoids one indirection + one call when the tests fails. + */ -extern "C" CAMLprim value caml_xml_tree_root(value tree){ - CAMLparam1(tree); - CAMLreturn (Val_int(TREENODEVAL(XMLTREE_ROOT))); + +NoAlloc extern "C" value caml_xml_tree_root(value tree){ + return (Val_int(XMLTREE_ROOT)); } -extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){ - CAMLparam1(tree); - CAMLreturn((value) XMLTREE(tree)->getTextCollection()); + +NoAlloc extern "C" value caml_xml_tree_size(value tree){ + return (Val_int(XMLTREE(tree)->Size())); } -extern "C" CAMLprim value caml_xml_tree_parent(value tree, value id){ - return(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_subtree_size(value tree, value node){ + return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){ - return(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_subtree_tags(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(node), TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){ - return (Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id)))); +NoAlloc extern "C" value caml_xml_tree_subtree_elements(value tree, value node){ + return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) { - CAMLparam3(tree,id1,id2); - CAMLreturn(Val_bool (XMLTREE(tree)->IsAncestor(TREENODEVAL(id1),TREENODEVAL(id2)))); +NoAlloc extern "C" value caml_xml_tree_is_leaf(value tree, value node){ + return (Val_bool(XMLTREE(tree)->IsLeaf(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_last_child(value tree, value id){ - return(Val_int (XMLTREE(tree)->LastChild(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_is_ancestor(value tree, value node1,value node2){ + return (Val_bool(XMLTREE(tree)->IsAncestor(TREENODEVAL(node1),TREENODEVAL(node2)))); } -extern "C" CAMLprim value caml_xml_tree_is_first_child(value tree, value id){ - return Val_bool (XMLTREE(tree)->IsFirstChild(TREENODEVAL(id))); +NoAlloc extern "C" value caml_xml_tree_is_child(value tree, value node1,value node2){ + return (Val_bool(XMLTREE(tree)->IsChild(TREENODEVAL(node1),TREENODEVAL(node2)))); } -extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){ - return(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_is_first_child(value tree, value node){ + return (Val_bool(XMLTREE(tree)->IsFirstChild(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_closing(value tree, value id){ - return(Val_int (XMLTREE(tree)->Closing(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_num_children(value tree, value node){ + return (Val_int(XMLTREE(tree)->NumChildren(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_is_open(value tree, value id){ - return(Val_bool (XMLTREE(tree)->IsOpen(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_child_number(value tree, value node){ + return (Val_int(XMLTREE(tree)->ChildNumber(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_first_element(value tree, value id){ - return(Val_int (XMLTREE(Field(tree,0))->FirstElement(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_depth(value tree, value node){ + return (Val_int(XMLTREE(tree)->Depth(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_child(value tree, value id, value tag){ - return(Val_int (XMLTREE(tree)->TaggedChild(TREENODEVAL(id),Int_val(tag)))); +NoAlloc extern "C" value caml_xml_tree_preorder(value tree, value node){ + return (Val_int(XMLTREE(tree)->Preorder(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ - return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_postorder(value tree, value node){ + return (Val_int(XMLTREE(tree)->Postorder(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_next_element(value tree, value id){ - return(Val_int (XMLTREE(Field(tree,0))->NextElement(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_tag(value tree, value node){ + return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){ - return(Val_int (XMLTREE(tree)->TaggedFollSibling(TREENODEVAL(id),Int_val(tag)))); +extern "C" value caml_xml_tree_doc_ids(value tree, value node){ + CAMLparam2(tree,node); + CAMLlocal1(tuple); + range ids; + tuple = caml_alloc(2,0); + ids = XMLTREE(tree)->DocIds(Int_val(node)); + Store_field(tuple,0,Val_int(ids.min)); + Store_field(tuple,1,Val_int(ids.max)); + CAMLreturn (tuple); } +NoAlloc extern "C" value caml_xml_tree_parent(value tree, value node){ + return (Val_int(XMLTREE(tree)->Parent(TREENODEVAL(node)))); +} -extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){ - return(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_child(value tree, value node,value idx){ + return (Val_int(XMLTREE(tree)->Child(TREENODEVAL(node),Int_val(idx)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value tag){ - return(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag)))); +NoAlloc extern "C" value caml_xml_tree_first_child(value tree, value node){ + return (Val_int(XMLTREE(tree)->FirstChild(TREENODEVAL(node)))); } +NoAlloc extern "C" value caml_xml_tree_first_element(value tree, value node){ + return (Val_int(XMLTREE(tree)->FirstElement(TREENODEVAL(node)))); +} -extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){ - return(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag)))); +NoAlloc extern "C" value caml_xml_tree_last_child(value tree, value node){ + return (Val_int(XMLTREE(tree)->LastChild(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, value tag,value root){ - return(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); + +NoAlloc extern "C" value caml_xml_tree_next_sibling(value tree, value node){ + return (Val_int(XMLTREE(tree)->NextSibling(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_foll_before(value tree, value id, value tag,value root){ - return(Val_int (XMLTREE(tree)->TaggedFollBefore(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); + +NoAlloc extern "C" value caml_xml_tree_next_element(value tree, value node){ + return (Val_int(XMLTREE(tree)->NextElement(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){ - return(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id))))); +NoAlloc extern "C" value caml_xml_tree_prev_sibling(value tree, value node){ + return (Val_int(XMLTREE(tree)->PrevSibling(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_my_text_unsafe(value tree, value id){ - return(Val_int((XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(id))))); +NoAlloc extern "C" value caml_xml_tree_tagged_child(value tree, value node,value tag){ + return (Val_int(XMLTREE(tree)->TaggedChild(TREENODEVAL(node),TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_text_xml_id(value tree, value id){ - return(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id))))); +NoAlloc extern "C" value caml_xml_tree_select_child(value tree, value node,value tags){ + return (Val_int(XMLTREE(tree)->SelectChild(TREENODEVAL(node), HSET(tags)))); } -extern "C" CAMLprim value caml_xml_tree_node_xml_id(value tree, value id){ - return(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id))))); + +NoAlloc extern "C" value caml_xml_tree_tagged_following_sibling(value tree, value node,value tag){ + return (Val_int(XMLTREE(tree)->TaggedFollowingSibling(TREENODEVAL(node),TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){ - CAMLparam2(tree,tagid); - CAMLlocal1(str); - char* tag; - tag = (char*) XMLTREE(tree)->GetTagNameByRef((TagType) (Int_val(tagid))); - str = caml_copy_string((const char*) tag); - CAMLreturn (str); +NoAlloc extern "C" value caml_xml_tree_select_following_sibling(value tree, value node,value tags){ + return (Val_int(XMLTREE(tree)->SelectFollowingSibling(TREENODEVAL(node), HSET(tags)))); } +NoAlloc extern "C" value caml_xml_tree_tagged_descendant(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->TaggedDescendant(TREENODEVAL(node), TAGVAL(tag)))); +} -extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){ - return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_select_descendant(value tree, value node, value tags){ + return (Val_int(XMLTREE(tree)->SelectDescendant(TREENODEVAL(node), HSET(tags)))); } -extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value tag){ - return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag)))); +NoAlloc extern "C" value caml_xml_tree_tagged_preceding(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->TaggedPreceding(TREENODEVAL(node), TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_subtree_size(value tree,value id){ - return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_tagged_following(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->TaggedFollowing(TREENODEVAL(node), TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_subtree_elements(value tree,value id){ - return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_tagged_following_below(value tree, value node, value tag, value ancestor){ + return (Val_int(XMLTREE(tree)->TaggedFollowingBelow(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(ancestor)))); } +NoAlloc extern "C" value caml_xml_tree_select_following_below(value tree, value node, value tags, value ancestor){ + return (Val_int(XMLTREE(tree)->SelectFollowingBelow(TREENODEVAL(node), HSET(tags), TREENODEVAL(ancestor)))); +} -extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ - CAMLparam2(tree,str); - CAMLlocal1(id); - unsigned char* tag; - tag = (unsigned char*) (String_val(str)); - id = Val_int(XMLTREE(tree)->RegisterTag(tag)); - CAMLreturn (id); +NoAlloc extern "C" value caml_xml_tree_tagged_following_before(value tree, value node, value tag, value closing){ + return (Val_int(XMLTREE(tree)->TaggedFollowingBefore(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(closing)))); } -extern "C" CAMLprim value caml_xml_tree_nullt(value unit){ - return (NULLT); +NoAlloc extern "C" value caml_xml_tree_select_following_before(value tree, value node, value tags, value closing){ + return (Val_int(XMLTREE(tree)->SelectFollowingBefore(TREENODEVAL(node), HSET(tags), TREENODEVAL(closing)))); } -extern "C" CAMLprim value caml_unordered_set_length(value hset){ - CAMLparam1(hset); - CAMLreturn (Val_int((HSET(hset))->size())); +NoAlloc extern "C" value caml_xml_tree_tagged_ancestor(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->TaggedAncestor(TREENODEVAL(node), TAGVAL(tag)))); } -extern "C" CAMLprim value caml_unordered_set_alloc(value len){ - CAMLparam1(len); - CAMLlocal1(hset); - hset = caml_alloc_custom(&set_ops,sizeof(std::unordered_set*),1,2); - std::unordered_set* ht = new std::unordered_set(); - memcpy(Data_custom_val(hset),&ht,sizeof(std::unordered_set*)); - CAMLreturn (hset); +NoAlloc extern "C" value caml_xml_tree_my_text(value tree, value node){ + return (Val_int(XMLTREE(tree)->MyText(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_unordered_set_set(value vec, value v){ - HSET(vec)->insert((int) Int_val(v)); - return (Val_unit); +NoAlloc extern "C" value caml_xml_tree_my_text_unsafe(value tree, value node){ + return (Val_int(XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_select_desc(value tree, value node, value tags){ - return (Val_int (XMLTREE(tree)->SelectDesc(TREENODEVAL(node), - HSET(tags)))); +NoAlloc extern "C" value caml_xml_tree_text_xml_id(value tree, value docid){ + return (Val_int(XMLTREE(tree)->TextXMLId(Int_val(docid)))); } -extern "C" CAMLprim value caml_xml_tree_select_child(value tree, value node, value tags){ - return (Val_int (XMLTREE(tree)->SelectChild(TREENODEVAL(node), - HSET(tags)))); + +NoAlloc extern "C" value caml_xml_tree_node_xml_id(value tree, value node){ + return (Val_int(XMLTREE(tree)->NodeXMLId(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_select_foll_sibling(value tree, value node, value tags){ - return (Val_int (XMLTREE(tree)->SelectFollSibling(TREENODEVAL(node), - HSET(tags)))); + +NoAlloc extern "C" value caml_xml_tree_parent_node(value tree, value docid){ + return (Val_int(XMLTREE(tree)->ParentNode(Int_val(docid)))); } -extern "C" CAMLprim value caml_xml_tree_select_foll_below(value tree, value node, value tags,value ctx){ - return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node), - HSET(tags), - TREENODEVAL(ctx)))); +/* +NoAlloc extern "C" value caml_xml_tree_prev_node(value tree, value docid){ + return (Val_int(XMLTREE(tree)->PrevNode(Int_val(docid)))); } -extern "C" CAMLprim value caml_xml_tree_select_foll_before(value tree, value node, value tags,value ctx){ - return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node), - HSET(tags), - TREENODEVAL(ctx)))); +*/ +extern "C" value caml_xml_tree_get_tag_id(value tree, value tagname){ + CAMLparam2(tree,tagname); + CAMLlocal1(res); + unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname)); + res = Val_int(XMLTREE(tree)->GetTagId(ctagname)); + free(ctagname); + CAMLreturn(res); } +extern "C" value caml_xml_tree_get_tag_name(value tree, value tag){ + CAMLparam2(tree,tag); + CAMLlocal1(res); + res = caml_copy_string((const char*) XMLTREE(tree)->GetTagNameByRef(TAGVAL(tag))); + CAMLreturn(res); +} -extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ - CAMLparam2(tree,node); - CAMLlocal1(tuple); - tuple = caml_alloc_tuple(2); - range r = (XMLTREE(tree)->DocIds(TREENODEVAL(node))); - caml_initialize(&Field(tuple,0),Val_int(r.min)); - caml_initialize(&Field(tuple,1),Val_int(r.max)); - CAMLreturn (tuple); +extern "C" value caml_xml_tree_register_tag(value tree, value tagname){ + CAMLparam2(tree,tagname); + CAMLlocal1(res); + unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname)); + res = Val_int(XMLTREE(tree)->RegisterTag(ctagname)); + free(ctagname); + CAMLreturn(res); +} + + +NoAlloc extern "C" value caml_xml_tree_get_text_collection(value tree){ + return((value) XMLTREE(tree)->getTextCollection()); +} + +NoAlloc extern "C" value caml_xml_tree_closing(value tree, value node){ + return (Val_int(XMLTREE(tree)->Closing(TREENODEVAL(node)))); +} + +NoAlloc extern "C" value caml_xml_tree_is_open(value tree, value node){ + return (Val_bool(XMLTREE(tree)->IsOpen(TREENODEVAL(node)))); +} + + + +NoAlloc extern "C" value caml_xml_tree_nullt(value unit){ + return (NULLT); } -extern "C" value caml_result_set_create(value size){ +NoAlloc extern "C" value caml_unordered_set_length(value hset){ + return (Val_int((HSET(hset))->size())); +} + +extern "C" value caml_unordered_set_alloc(value unit){ + CAMLparam1(unit); + CAMLlocal1(hset); + hset = caml_alloc_custom(&set_ops,sizeof(TagIdSet*),1,2); + TagIdSet* ht = new TagIdSet(); + memcpy(Data_custom_val(hset),&ht,sizeof(TagIdSet*)); + CAMLreturn (hset); +} + +NoAlloc extern "C" value caml_unordered_set_set(value set, value v){ + HSET(set)->insert((int) Int_val(v)); + return (Val_unit); +} + +NoAlloc extern "C" value caml_result_set_create(value size){ results* res = (results*) malloc(sizeof(results)); results r = createResults (Int_val(size)); res->n = r.n; @@ -464,53 +671,132 @@ extern "C" value caml_result_set_create(value size){ return ((value) (res)); } -extern "C" CAMLprim value caml_result_set_set(value result,value p){ - CAMLparam1(p); +NoAlloc extern "C" value caml_result_set_set(value result,value p){ setResult ( *((results*) result), Int_val(p)); - CAMLreturn (Val_unit); + return (Val_unit); } -extern "C" CAMLprim value caml_result_set_clear(value result,value p1,value p2){ - CAMLparam2(p1,p2); +NoAlloc extern "C" value caml_result_set_clear(value result,value p1,value p2){ clearRange ( *((results*) result), Int_val(p1), Int_val(p2)); - CAMLreturn (Val_unit); + return (Val_unit); } -extern "C" CAMLprim value caml_result_set_next(value result,value p){ - CAMLparam1(p); +NoAlloc extern "C" value caml_result_set_next(value result,value p){ results r; r = *( (results *) result); - CAMLreturn (Val_int(nextResult(r, Int_val(p)))); + return (Val_int(nextResult(r, Int_val(p)))); } -extern "C" CAMLprim value caml_result_set_count(value result){ - CAMLparam0(); +NoAlloc extern "C" value caml_result_set_count(value result){ results r; r = *( (results *) result); - CAMLreturn (Val_int(countResult(r))); + return (Val_int(countResult(r))); } -extern "C" CAMLprim value caml_xml_tree_print(value tree,value node,value fd){ +NoAlloc extern "C" value caml_xml_tree_print(value tree,value node,value fd){ CAMLparam3(tree,node,fd); XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node)); CAMLreturn(Val_unit); } -extern "C" CAMLprim value caml_set_tag_bits(value result, value tag, value tree, value node) +NoAlloc extern "C" value caml_set_tag_bits(value result, value tag, value tree, value node) { - CAMLparam3(tag,tree,node); results r; XMLTree *t = XMLTREE(Field(tree,0)); treeNode opening = TREENODEVAL(node); treeNode closing = t->Closing(opening); TagType target_tag = Int_val(tag); - treeNode first = t->TaggedDesc(opening,target_tag); + treeNode first = t->TaggedDescendant(opening,target_tag); r = *( (results *) result); opening = first; while (opening != NULLT){ setResult(r,opening); - opening = t->TaggedFollBefore(opening,target_tag,closing); + opening = t->TaggedFollowingBefore(opening,target_tag,closing); }; - CAMLreturn(Val_int(first)); + return(Val_int(first)); } + +NoAlloc extern "C" value caml_bit_vector_create(value size){ + return (value) (new vector(Int_val(size),false)); +} + +NoAlloc extern "C" value caml_bit_vector_free(value vect){ + delete ((vector*) vect); + return Val_unit; +} + +NoAlloc extern "C" value caml_bit_vector_get(value vect,value idx){ + return Val_bool (((vector*)vect)->at(Int_val(idx))); +} + +NoAlloc extern "C" value caml_bit_vector_set(value vect,value idx,value b){ + (((vector*)vect)->at(Int_val(idx))) = (bool) Bool_val(b); + return Val_unit; +} + +NoAlloc extern "C" value caml_bit_vector_next(value vect,value idx){ + vector* bv = (vector*) vect; + int i = Int_val(idx); + int l = bv->size(); + while (i < l && !((*bv)[i])) + i++; + return Val_int(i); +} +NoAlloc extern "C" value caml_bit_vector_prev(value vect,value idx){ + int i = Int_val(idx); + while (i >= 0 && !((*((vector*) vect))[i])) + i--; + return Val_int(i); +} + +extern "C" value caml_bit_vector_node_array(value vect){ + CAMLparam0(); + CAMLlocal1(res); + vector* bv = (vector*) vect; + vector vr; + int l = bv->size(); + int i = 0; + while (i < l){ + if ((*bv)[i]) vr.push_back(i); + i++; + }; + l = vr.size(); + res = caml_alloc_tuple(l); + for(i=0;iTaggedDescendant(node,tag),tag); + iterjump(tree,tree->TaggedFollowing(node,tag),tag); + return; + }; +} + +extern "C" value caml_benchmark_jump(value tree,value tag){ + iterjump(XMLTREE(tree),0, Int_val(tag)); + return Val_unit; +} + +int iterfsns(XMLTree* tree, treeNode node){ + if (node == NULLT) + return 0; + else { + int x = tree->Tag(node); + x += iterfsns(tree,tree->FirstChild(node)); + x += iterfsns(tree,tree->NextSibling(node)); + return x; + }; +} + +extern "C" value caml_benchmark_fsns(value tree){ + iterfsns(XMLTREE(tree),0); + return Val_unit; + +} diff --git a/ata.ml b/ata.ml index a5d4a3f..77d5008 100644 --- a/ata.ml +++ b/ata.ml @@ -337,8 +337,8 @@ module FTable = Hashtbl.Make(struct let h_f = FTable.create BIG_H_SIZE -type merge_conf = NO | MARK | ONLY1 | ONLY2 | ONLY12 | MARK1 | MARK2 | MARK12 - +type merge_conf = NO | ONLY1 | ONLY2 | ONLY12 | MARK | MARK1 | MARK2 | MARK12 +(* 000 001 010 011 100 101 110 111 *) let eval_formlist tag s1 s2 fl = let rec loop fl = try @@ -447,14 +447,14 @@ let tags_of_state a q = else 0 let merge conf t res1 res2 = match conf with - NO -> 0 + NO -> 0 | MARK -> 1 - | ONLY12 -> res1+res2 - | ONLY1 -> res1 - | ONLY2 -> res2 - | MARK12 -> res1+res2+1 - | MARK1 -> res1+1 - | MARK2 -> res2+1 + | MARK1 -> res1+1 + | ONLY1 -> res1 + | ONLY2 -> res2 + | ONLY12 -> res1+res2 + | MARK2 -> res2+1 + | MARK12 -> res1+res2+1 let mk_quick_tag_loop _ sl ss tree tag = (); fun t ctx -> @@ -797,8 +797,8 @@ END (mk_fun (fun _ -> Tree.nil) "Tree.mk_nil") (mk_fun (Tree.tagged_child tree) "Tree.tagged_child") (mk_fun (Tree.select_child tree) "Tree.select_child") - (mk_fun (Tree.tagged_desc tree) "Tree.tagged_desc") - (mk_fun (Tree.select_desc tree) "Tree.select_desc") + (mk_fun (Tree.tagged_descendant tree) "Tree.tagged_desc") + (mk_fun (Tree.select_descendant tree) "Tree.select_desc") (mk_fun (fun _ _ -> Tree.first_child tree) "[FIRSTCHILD]Tree.select_child_desc") (mk_fun (Tree.first_element tree) "Tree.first_element") (mk_fun (Tree.first_child tree) "Tree.first_child") @@ -806,13 +806,13 @@ END let choose_jump_next tree d = choose_jump d (mk_fun (fun _ _ -> Tree.nil) "Tree.mk_nil2") - (mk_fun (Tree.tagged_sibling_ctx tree) "Tree.tagged_sibling_ctx") - (mk_fun (Tree.select_sibling_ctx tree) "Tree.select_sibling_ctx") - (mk_fun (Tree.tagged_foll_ctx tree) "Tree.tagged_foll_ctx") - (mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx") - (mk_fun (fun _ _ -> Tree.next_sibling_ctx tree) "[NEXTSIBLING]Tree.select_sibling_foll_ctx") - (mk_fun (Tree.next_element_ctx tree) "Tree.next_element_ctx") - (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") + (mk_fun (Tree.tagged_following_sibling_below tree) "Tree.tagged_sibling_ctx") + (mk_fun (Tree.select_following_sibling_below tree) "Tree.select_sibling_ctx") + (mk_fun (Tree.tagged_following_below tree) "Tree.tagged_foll_ctx") + (mk_fun (Tree.select_following_below tree) "Tree.select_foll_ctx") + (mk_fun (fun _ _ -> Tree.next_sibling_below tree) "[NEXTSIBLING]Tree.select_sibling_foll_ctx") + (mk_fun (Tree.next_element_below tree) "Tree.next_element_ctx") + (mk_fun (Tree.next_sibling_below tree) "Tree.node_sibling_ctx") module SListTable = Hashtbl.Make(struct type t = SList.t @@ -878,7 +878,18 @@ END in set tab tag data end - + + module TransCache2 = struct + include Hashtbl.Make (struct + type t = Tag.t*SList.t + let equal (a,b) (c,d) = a==c && b==d + let hash (a,b) = HASHINT2((Obj.magic a), b.SList.Node.id) + end) + + let add h t s d = add h (t,s) d + let find h t s = find h (t,s) + end + let td_trans = TransCache.create 10000 (* should be number of tags *number of states^2 in the document *) @@ -892,35 +903,6 @@ END let hash t = t.Formlistlist.Node.id end) - module Fold2ResOld = - struct - type 'a t = 'a SListTable.t SListTable.t FllTable.t - let create n = Array.init 10000 (fun _ -> FllTable.create n) - - let find h tag fl s1 s2 = - let hf = h.(tag) in - let hs1 = FllTable.find hf fl in - let hs2 = SListTable.find hs1 s1 in - SListTable.find hs2 s2 - - let add h tag fl s1 s2 data = - let hf = h.(tag) in - let hs1 = - try FllTable.find hf fl with - | Not_found -> - let hs1 = SListTable.create SMALL_H_SIZE - in FllTable.add hf fl hs1;hs1 - in - let hs2 = - try SListTable.find hs1 s1 - with - | Not_found -> - let hs2 = SListTable.create SMALL_H_SIZE - in SListTable.add hs1 s1 hs2;hs2 - in - SListTable.add hs2 s2 data - end - module Fold2Res = struct external get : 'a array -> int ->'a = "%array_unsafe_get" external set : 'a array -> int -> 'a -> unit = "%array_unsafe_set" @@ -976,7 +958,20 @@ END in set as2 s2.SList.Node.id data end - + + module Fold2Res2 = struct + include Hashtbl.Make(struct + type t = Tag.t*Formlistlist.t*SList.t*SList.t + let equal (a,b,c,d) (x,y,z,t) = + a == x && b == y && c == z && d == t + let hash (a,b,c,d) = HASHINT4 (a,b.Formlistlist.Node.id, + c.SList.Node.id,d.SList.Node.id) + end) + let add h t f s1 s2 d = + add h (t,f,s1,s2) d + let find h t f s1 s2 = + find h (t,f,s1,s2) + end let h_fold2 = Fold2Res.create 10000 @@ -986,35 +981,37 @@ END (* evaluation starts from the right so we put sl1,res1 at the end *) let eval_fold2_slist fll t tag (sl2,res2) (sl1,res1) = let res = Array.copy rempty in - try - let r,b,btab = Fold2Res.find h_fold2 tag fll sl1 sl2 in - if b then for i=0 to slot_size - 1 do - res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); - done; - r,res - with - Not_found -> - let btab = Array.make slot_size NO in - let rec fold l1 l2 fll i aq ab = - match fll.Formlistlist.Node.node, - l1.SList.Node.node, - l2.SList.Node.node - with - | Formlistlist.Cons(fl,fll), - SList.Cons(s1,ll1), - SList.Cons(s2,ll2) -> - let r',conf = eval_formlist tag s1 s2 fl in - let _ = btab.(i) <- conf + try + let r,b,btab = Fold2Res.find h_fold2 tag fll sl1 sl2 in + if b then for i=0 to slot_size - 1 do + res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); + done; + r,res + with + Not_found -> + begin + let btab = Array.make slot_size NO in + let rec fold l1 l2 fll i aq ab = + match fll.Formlistlist.Node.node, + l1.SList.Node.node, + l2.SList.Node.node + with + | Formlistlist.Cons(fl,fll), + SList.Cons(s1,ll1), + SList.Cons(s2,ll2) -> + let r',conf = eval_formlist tag s1 s2 fl in + let _ = btab.(i) <- conf in - fold ll1 ll2 fll (i+1) (SList.cons r' aq) ((conf!=NO)||ab) - | _ -> aq,ab - in - let r,b = fold sl1 sl2 fll 0 SList.nil false in - Fold2Res.add h_fold2 tag fll sl1 sl2 (r,b,btab); - if b then for i=0 to slot_size - 1 do - res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); - done; - r,res + fold ll1 ll2 fll (i+1) (SList.cons r' aq) ((conf!=NO)||ab) + | _ -> aq,ab + in + let r,b = fold sl1 sl2 fll 0 SList.nil false in + Fold2Res.add h_fold2 tag fll sl1 sl2 (r,b,btab); + if b then for i=0 to slot_size - 1 do + res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); + done; + r,res; + end in let null_result = (pempty,Array.copy rempty) in @@ -1029,7 +1026,7 @@ END try TransCache.find td_trans tag slist with - | Not_found -> + | Not_found -> let fl_list,llist,rlist,ca,da,sa,fa = SList.fold (fun set (fll_acc,lllacc,rllacc,ca,da,sa,fa) -> (* For each set *) @@ -1067,7 +1064,10 @@ END let d_n = Algebra.decide a tags_siblings tags_after (StateSet.union sa fa) false in let f_kind,first = choose_jump_down tree d_f and n_kind,next = if noright then (`NIL, fun _ _ -> Tree.nil ) - else choose_jump_next tree d_n in + else choose_jump_next tree d_n in + (*let f_kind,first = `ANY, Tree.first_child tree + and n_kind,next = `ANY, Tree.next_sibling_below tree + in *) let empty_res = null_result in let cont = match f_kind,n_kind with @@ -1096,13 +1096,13 @@ END | `NIL,_ -> ( match n_kind with |`TAG(tag') -> - if SList.equal rlist slist && tag == tag' then + (*if SList.equal rlist slist && tag == tag' then let rec loop t ctx = if t == Tree.nil then empty_res else let res2 = loop (next t ctx) ctx in eval_fold2_slist fl_list t tag res2 empty_res in loop - else + else *) (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag' (next t ctx) rlist ctx ) empty_res) @@ -1130,7 +1130,7 @@ END (loop (first t) llist t )) | `ANY,`ANY -> - if SList.equal slist rlist && SList.equal slist llist + (*if SList.equal slist rlist && SList.equal slist llist then let rec loop t ctx = if t == Tree.nil then empty_res else @@ -1139,7 +1139,7 @@ END in eval_fold2_slist fl_list t (Tree.tag tree t) r2 r1 in loop - else + else *) (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) @@ -1157,7 +1157,7 @@ END (a,b) ) ,cont) in - (TransCache.add td_trans tag slist cont ;cont) + ( TransCache.add td_trans tag slist cont ; cont) in cont t ctx in @@ -1371,7 +1371,7 @@ END match k with | `TAG (tag) -> (*Tree.tagged_lowest t tag, fun tree -> Tree.tagged_next tree tag*) - (Tree.tagged_desc tree tag t, let jump = Tree.tagged_foll_ctx tree tag + (Tree.tagged_descendant tree tag t, let jump = Tree.tagged_following_below tree tag in fun n -> jump n t ) | `CONTAINS(_) -> (Tree.text_below tree t,let jump = Tree.text_next tree in fun n -> jump n t) diff --git a/ata.mli b/ata.mli index a5fb3e1..13e8431 100644 --- a/ata.mli +++ b/ata.mli @@ -4,7 +4,7 @@ sig include Sigs.T with type t = int val make : unit -> t end -type merge_conf = NO | MARK | ONLY1 | ONLY2 | ONLY12 | MARK1 | MARK2 | MARK12 +type merge_conf = NO | ONLY1 | ONLY2 | ONLY12 | MARK | MARK1 | MARK2 | MARK12 module StateSet : sig include Ptset.S with type elt = int diff --git a/main.ml b/main.ml index c889833..342bb2f 100644 --- a/main.ml +++ b/main.ml @@ -16,10 +16,10 @@ let disabled_gc = { Gc.get() with let hash x = 131*x/(x-1+1) let test_loop tree tag = - let t' = Tree.tagged_desc tree tag Tree.root in + let t' = Tree.tagged_descendant tree tag Tree.root in let f = Hashtbl.create 4096 in - let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in + let jump t _ = Tree.tagged_following_below tree tag t Tree.root in let g t ctx = if t == Tree.nil then 0 else 1+ ((Hashtbl.find f (hash 101)) (jump t ctx) ctx) @@ -41,10 +41,10 @@ let test_full tree = let test_loop2 tree tag = - let t' = Tree.tagged_desc tree tag Tree.root in + let t' = Tree.tagged_descendant tree tag Tree.root in let f = Hashtbl.create 4096 in - let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in + let jump t _ = Tree.tagged_following_below tree tag t Tree.root in let rec g t ctx = if t == Tree.nil then 0 else 1+ (match (Hashtbl.find f (hash 101)) with @@ -64,6 +64,12 @@ let main v query_string output = with Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in + let _ = Printf.eprintf "Number of nodes %i\n%!" (Tree.size v) in + let _ = Printf.eprintf "Timing first_child/next_sibling %!" in + let _ = time (Tree.benchmark_fsns) v in + let _ = Printf.eprintf "Timing jump to a %!" in + let _ = time (Tree.benchmark_jump v) (Tag.tag "a") in + (* let _ = Printf.eprintf "Timing //keyword :" in let r = time (test_loop v) (Tag.tag "keyword") in let _ = Printf.eprintf "Count is %i\n%!" r in diff --git a/results.c b/results.c index 7570e5a..1f1deda 100644 --- a/results.c +++ b/results.c @@ -212,7 +212,7 @@ int nextResult (results R, int p) // returns pos of next(p) or -1 if none // Naively implemented by kim unsigned int countResult(results R) { - unsigned int result = 0; + unsigned int result = -1; int i = 0; while ( i != -1 && i < R.n) { result ++; diff --git a/tag.ml b/tag.ml index ec80df4..139ff91 100644 --- a/tag.ml +++ b/tag.ml @@ -13,7 +13,7 @@ type pool external null_pool : unit -> pool = "caml_xml_tree_nullt" external null_tag : unit -> t = "caml_xml_tree_nullt" external register_tag : pool -> string -> t = "caml_xml_tree_register_tag" -external tag_name : pool -> t -> string = "caml_xml_tree_tag_name" +external tag_name : pool -> t -> string = "caml_xml_tree_get_tag_name" let nullt = null_tag () (* Defined in XMLTree.cpp *) diff --git a/tree.ml b/tree.ml index f21015d..86f7e5b 100644 --- a/tree.ml +++ b/tree.ml @@ -43,84 +43,112 @@ let nil : [`Tree ] node = nodei ~-1 let nulldoc : [`Text ] node = nodei ~-1 let root : [`Tree ] node = nodei 0 -external text_get_tc_text : tree -> [`Text] node -> string = "caml_text_collection_get_text" - +external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text" external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text" let text_is_empty t n = (equal_node nulldoc n) || text_is_empty t n +external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix" +external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix" +external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal" external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains" -external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains" -external text_count : tree -> string -> int = "caml_text_collection_count" -external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" -external text_startswith : tree -> string -> [`Text ] node array = "caml_text_collection_startswith" -external text_endswith : tree -> string -> [`Text ] node array = "caml_text_collection_endswith" -external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals" -external text_unsorted_contains : tree -> string -> [`Text ] node array = "caml_text_collection_unsorted_contains" -external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" +external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan" + +external text_count : tree -> string -> int = "caml_text_collection_count" +external text_count_prefix : tree -> string -> int = "caml_text_collection_count_prefix" +external text_count_suffix : tree -> string -> int = "caml_text_collection_count_suffix" +external text_count_equal : tree -> string -> int = "caml_text_collection_count_equal" +external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains" +external text_count_lessthan : tree -> string -> int = "caml_text_collection_count_lessthan" + +external text_prefix : tree -> string -> [`Text ] node array = "caml_text_collection_prefix" +external text_suffix : tree -> string -> [`Text ] node array = "caml_text_collection_suffix" +external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals" +external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" +external text_lessthan : tree -> string -> [`Text ] node array = "caml_text_collection_lessthan" + -external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" -external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" -external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" - +external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" "noalloc" +external tree_size : tree -> int = "caml_xml_tree_size" "noalloc" +external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" "noalloc" +external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" "noalloc" +external tree_subtree_tags : tree -> [`Tree] node -> Tag.t -> int = "caml_xml_tree_subtree_elements" "noalloc" + let tree_is_nil x = equal_node x nil +external tree_is_leaf : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_leaf" "noalloc" +external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc" +external tree_is_child : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_child" "noalloc" +external tree_is_first_child : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_first_child" "noalloc" +external tree_num_children : tree -> [`Tree ] node -> int = "caml_xml_tree_num_children" "noalloc" +external tree_child_number : tree -> [`Tree ] node -> int = "caml_xml_tree_child_number" "noalloc" +external tree_depth : tree -> [`Tree ] node -> int = "caml_xml_tree_depth" "noalloc" +external tree_preorder : tree -> [`Tree ] node -> int = "caml_xml_tree_preorder" "noalloc" +external tree_postorder : tree -> [`Tree ] node -> int = "caml_xml_tree_postorder" "noalloc" +external tree_tag : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" "noalloc" +external tree_doc_ids : tree -> [`Tree ] node -> [`Text] node*[`Text] node = "caml_xml_tree_doc_ids" external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" "noalloc" -external tree_parent_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" "noalloc" -(*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *) +external tree_child : tree -> [`Tree] node -> int -> [`Tree] node = "caml_xml_tree_child" "noalloc" external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc" +external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" +external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" "noalloc" +external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc" +external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc" +external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc" +external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc" + +type unordered_set +external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc" +external unordered_set_length : unordered_set -> int = "caml_unordered_set_length" +external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc" + +external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_child" "noalloc" +external tree_tagged_following_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_following_sibling" "noalloc" +external tree_select_following_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_following_sibling" "noalloc" +external tree_tagged_descendant : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_descendant" "noalloc" +external tree_select_descendant : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_descendant" "noalloc" +external tree_tagged_following : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_following" "noalloc" +external tree_tagged_following_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_below" "noalloc" +external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_below" "noalloc" + + +external tree_tagged_following_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_before" "noalloc" +external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_before" "noalloc" + +external tree_my_text : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text" "noalloc" +external tree_my_text_unsafe : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text_unsafe" "noalloc" +external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" "noalloc" +external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" "noalloc" + +external tree_parent_node : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_node" "noalloc" + +(*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *) + external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc" external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc" -external tree_first_element : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" -external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc" -external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc" -external tree_next_element : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc" -external tree_tagged_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_sibling" "noalloc" -external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc" -external tree_is_leaf : tree -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" "noalloc" -external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" "noalloc" -external tree_is_first_child : tree -> [`Tree] node -> bool = "caml_xml_tree_is_first_child" "noalloc" +external benchmark_jump : tree -> Tag.t -> unit = "caml_benchmark_jump" "noalloc" -external tree_tag_id : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" "noalloc" - +let benchmark_jump t s = benchmark_jump t.doc s -let tree_is_last t n = equal_node nil (tree_next_sibling t n) +external benchmark_fsns : tree -> unit = "caml_benchmark_fsns" "noalloc" +let benchmark_fsns t = benchmark_fsns t.doc -external tree_my_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" "noalloc" -external tree_my_text_unsafe : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text_unsafe" "noalloc" -(*external tree_next_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" "noalloc" *) -external tree_doc_ids : tree -> [`Tree ] node -> [`Text ] node * [`Text ] node = "caml_xml_tree_doc_ids" -let text_size tree = inode (snd ( tree_doc_ids tree root )) -let text_get_cached_text t (x:[`Text] node) = - if x == nulldoc then "" - else - text_get_cached_text t x -external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" "noalloc" -external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" "noalloc" -external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc" -external tree_tagged_desc : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" "noalloc" -external tree_tagged_foll_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below" "noalloc" -external tree_tagged_foll_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_before" "noalloc" -external tree_subtree_tags : tree -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" "noalloc" -type unordered_set -external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc" -external unordered_set_length : unordered_set -> int = "caml_unordered_set_length" -external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc" +let text_size tree = inode (snd ( tree_doc_ids tree root )) + +let text_get_text t (x:[`Text] node) = + if x == nulldoc then "" + else text_get_text t x + -external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_child" "noalloc" -external tree_select_foll_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_foll_sibling" "noalloc" -external tree_select_desc : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_desc" "noalloc" -external tree_select_foll_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_below" "noalloc" -external tree_select_foll_before : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_before" "noalloc" module HPtset = Hashtbl.Make(Ptset.Int) @@ -144,24 +172,25 @@ let text_size t = text_size t.doc module MemUnion = Hashtbl.Make (struct type t = Ptset.Int.t*Ptset.Int.t - let equal (x,y) (z,t) = (Ptset.Int.equal x z)&&(Ptset.Int.equal y t) + let equal (x,y) (z,t) = x == z || y == t let equal a b = equal a b || equal b a let hash (x,y) = (* commutative hash *) - let x = Ptset.Int.hash x - and y = Ptset.Int.hash y + let x = Ptset.Int.uid x + and y = Ptset.Int.uid y in - if x < y then HASHINT2(x,y) else HASHINT2(y,x) + if x <= y then HASHINT2(x,y) else HASHINT2(y,x) end) module MemAdd = Hashtbl.Make ( struct type t = Tag.t*Ptset.Int.t - let equal (x,y) (z,t) = (x == z)&&(Ptset.Int.equal y t) - let hash (x,y) = HASHINT2(x,Ptset.Int.hash y) + let equal (x,y) (z,t) = (x == z)&&(y == t) + let hash (x,y) = HASHINT2(x,Ptset.Int.uid y) end) let collect_tags tree = - let h_union = MemUnion.create BIG_H_SIZE in + let _ = Printf.eprintf "Collecting Tags\n%!" in +(* let h_union = MemUnion.create BIG_H_SIZE in let pt_cup s1 s2 = try MemUnion.find h_union (s1,s2) @@ -176,9 +205,11 @@ let collect_tags tree = with | Not_found -> let r = Ptset.Int.add t s in MemAdd.add h_add (t,s) r;r - in + in *) + let pt_cup = Ptset.Int.union in + let pt_add = Ptset.Int.add in let h = Hashtbl.create BIG_H_SIZE in - let update t sc sb ss sa = + let update t sc sb ss sa = let schild,sbelow,ssibling,safter = try Hashtbl.find h t @@ -187,32 +218,22 @@ let collect_tags tree = (Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty) in Hashtbl.replace h t - (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa) + (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa) in - let rec loop_right id acc_after = + let rec loop right id acc_after = if id == nil - then Ptset.Int.empty,Ptset.Int.empty,acc_after - else - let sibling2,desc2,after2 = loop_right (tree_next_sibling tree id) acc_after in - let child1,desc1,after1 = loop_left (tree_first_child tree id) after2 in - let tag = tree_tag_id tree id in + then Ptset.Int.empty,Ptset.Int.empty,acc_after else + let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in + let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in + let tag = tree_tag tree id in update tag child1 desc1 sibling2 after2; ( pt_add tag sibling2, pt_add tag (pt_cup desc1 desc2), - pt_cup after1 (pt_cup desc1 desc2) ) - and loop_left id acc_after = - if id == nil - then Ptset.Int.empty,Ptset.Int.empty,acc_after - else - let sibling2,desc2,after2 = loop_right (tree_next_sibling tree id) acc_after in - let child1,desc1,after1 = loop_left (tree_first_child tree id) after2 in - let tag = tree_tag_id tree id in - update tag child1 desc1 sibling2 after2; - (pt_add tag sibling2, - pt_add tag (pt_cup desc1 desc2), - acc_after ) + if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after ) in - let _ = loop_left (tree_root tree) Ptset.Int.empty in h + let _ = loop false (tree_root tree) Ptset.Int.empty in + let _ = Printf.eprintf "Finished\n%!" in + h @@ -228,8 +249,8 @@ let in_array _ i = let init_textfun f t s = let a = match f with | `CONTAINS -> text_contains t.doc s - | `STARTSWITH -> text_startswith t.doc s - | `ENDSWITH -> text_endswith t.doc s + | `STARTSWITH -> text_prefix t.doc s + | `ENDSWITH -> text_suffix t.doc s | `EQUALS -> text_equals t.doc s in (*Array.fast_sort (compare) a; *) @@ -237,7 +258,6 @@ let init_textfun f t s = Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array let count_contains t s = text_count_contains t.doc s -let unsorted_contains t s = text_unsorted_contains t.doc s let init_naive_contains t s = let i,j = tree_doc_ids t.doc (tree_root t.doc) @@ -252,7 +272,7 @@ let init_naive_contains t s = let rec loop n acc l = if n >= j then acc,l else - let s = text_get_cached_text t.doc n + let s = text_get_text t.doc n in if matching s then loop (nodei ((inode n)+1)) (n::acc) (l+1) @@ -281,7 +301,7 @@ let text_below tree t = let l = Array.length !contains_array in let i,j = tree_doc_ids tree.doc t in let id = if l == 0 then i else (array_find !contains_array i j) in - tree_parent_doc tree.doc id + tree_parent_node tree.doc id let text_next tree t root = let l = Array.length !contains_array in @@ -290,7 +310,7 @@ let text_next tree t root = let id = if l == 0 then if inf > j then nulldoc else inf else array_find !contains_array inf j in - tree_parent_doc tree.doc id + tree_parent_node tree.doc id @@ -339,6 +359,7 @@ let parse f str = let parse_xml_uri str = parse parse_xml_uri str let parse_xml_string str = parse parse_xml_string str +let size t = tree_size t.doc;; external pool : tree -> Tag.pool = "%identity" @@ -452,7 +473,7 @@ let is_binary_ancestor t n1 n2 = let parent t n = tree_parent t.doc n let first_child t = (); fun n -> tree_first_child t.doc n -let first_element t = (); fun n -> tree_first_element t n +let first_element t = (); fun n -> tree_first_element t.doc n (* these function will be called in two times: first partial application on the tag, then application of the tag and the tree, then application of @@ -466,37 +487,37 @@ let select_child t = fun ts -> fun n -> tree_select_child t.doc n v let next_sibling t = (); fun n -> tree_next_sibling t.doc n -let next_element t = (); fun n -> tree_next_element t n +let next_element t = (); fun n -> tree_next_element t.doc n -let tagged_sibling t tag = (); fun n -> tree_tagged_sibling t.doc n tag +let tagged_following_sibling t tag = (); fun n -> tree_tagged_following_sibling t.doc n tag -let select_sibling t = fun ts -> +let select_following_sibling t = fun ts -> let v = (ptset_to_vector ts) in (); - fun n -> tree_select_foll_sibling t.doc n v + fun n -> tree_select_following_sibling t.doc n v -let next_sibling_ctx t = (); fun n _ -> tree_next_sibling t.doc n -let next_element_ctx t = (); fun n _ -> tree_next_element t n -let tagged_sibling_ctx t tag = (); fun n _ -> tree_tagged_sibling t.doc n tag +let next_sibling_below t = (); fun n _ -> tree_next_sibling t.doc n +let next_element_below t = (); fun n _ -> tree_next_element t.doc n +let tagged_following_sibling_below t tag = (); fun n _ -> tree_tagged_following_sibling t.doc n tag -let select_sibling_ctx t = fun ts -> +let select_following_sibling_below t = fun ts -> let v = (ptset_to_vector ts) in (); - fun n _ -> tree_select_foll_sibling t.doc n v + fun n _ -> tree_select_following_sibling t.doc n v let id t n = tree_node_xml_id t.doc n -let tag t n = if n == nil then Tag.nullt else tree_tag_id t.doc n +let tag t n = if n == nil then Tag.nullt else tree_tag t.doc n -let tagged_desc t tag = (); fun n -> tree_tagged_desc t.doc n tag +let tagged_descendant t tag = (); fun n -> tree_tagged_descendant t.doc n tag -let select_desc t = fun ts -> +let select_descendant t = fun ts -> let v = (ptset_to_vector ts) in (); - fun n -> tree_select_desc t.doc n v + fun n -> tree_select_descendant t.doc n v -let tagged_foll_ctx t tag = (); fun n ctx -> tree_tagged_foll_below t.doc n tag ctx +let tagged_following_below t tag = (); fun n ctx -> tree_tagged_following_below t.doc n tag ctx -let select_foll_ctx t = fun ts -> +let select_following_below t = fun ts -> let v = (ptset_to_vector ts) in (); - fun n ctx -> tree_select_foll_below t.doc n v ctx + fun n ctx -> tree_select_following_below t.doc n v ctx let closing t n = tree_closing t.doc n let is_open t n = tree_is_open t.doc n @@ -552,7 +573,7 @@ let array_find a i j = (* opening tag *) if tag == Tag.pcdata then begin - output_string outc (text_get_cached_text tree (tree_my_text_unsafe tree t)); + output_string outc (text_get_text tree (tree_my_text_unsafe tree t)); loop (next2 t) (* skip closing $ *) end else @@ -562,7 +583,7 @@ let array_find a i j = let t' = next t in if tree_is_open tree t' then let _ = push tagstr in - let tag' = tree_tag_id tree t' in + let tag' = tree_tag tree t' in if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag') else (* closing with no content *) @@ -576,15 +597,15 @@ let array_find a i j = output_char outc '>'; loop (next t); end - and loop t = loop_tag t (tree_tag_id tree t) + and loop t = loop_tag t (tree_tag tree t) and loop_attr t n = if tree_is_open tree t then - let attname = att_str (tree_tag_id tree t) in + let attname = att_str (tree_tag tree t) in output_char outc ' '; output_string outc attname; output_string outc "=\""; let t = next t in (* open $@ *) - output_string outc (text_get_cached_text tree (tree_my_text_unsafe tree t)); + output_string outc (text_get_text tree (tree_my_text_unsafe tree t)); output_char outc '"'; loop_attr (next3 t) (n+1) else @@ -606,12 +627,12 @@ let array_find a i j = let rec loop ?(print_right=true) t = if t != nil then - let tagid = tree_tag_id tree.doc t in + let tagid = tree_tag tree.doc t in if tagid==Tag.pcdata then begin let tid = tree_my_text_unsafe tree.doc t in - output_string outc (text_get_cached_text tree.doc tid); + output_string outc (text_get_text tree.doc tid); if print_right then loop (next_sibling tree t); end @@ -655,7 +676,7 @@ let array_find a i j = output_char outc ' '; output_string outc attname; output_string outc "=\""; - output_string outc (text_get_cached_text tree.doc tid); + output_string outc (text_get_text tree.doc tid); output_char outc '"'; loop_attributes (next_sibling tree a) in @@ -685,7 +706,7 @@ let rec binary_parent t n = if tree_is_first_child t.doc n then tree_parent t.doc n else tree_prev_sibling t.doc n - in if tree_tag_id t.doc r = Tag.pcdata then + in if tree_tag t.doc r = Tag.pcdata then binary_parent t r else r @@ -698,20 +719,20 @@ let subtree_tags t tag = (); let get_text t n = let tid = tree_my_text t.doc n in if tid == nulldoc then "" else - text_get_cached_text t.doc tid + text_get_text t.doc tid let dump_tree fmt tree = let rec loop t n = if t != nil then - let tag = (tree_tag_id tree.doc t ) in + let tag = (tree_tag tree.doc t ) in let tagstr = Tag.to_string tag in let tab = String.make n ' ' in if tag == Tag.pcdata || tag == Tag.attribute_data then Format.fprintf fmt "%s<%s>%s\n" - tab tagstr (text_get_cached_text tree.doc (tree_my_text tree.doc t)) tagstr + tab tagstr (text_get_text tree.doc (tree_my_text tree.doc t)) tagstr else begin Format.fprintf fmt "%s<%s>\n" tab tagstr; loop (tree_first_child tree.doc t) (n+2); @@ -724,3 +745,14 @@ let dump_tree fmt tree = let print_xml_fast3 t = tree_print_xml_fast3 t.doc + + + + + + + + + + + diff --git a/tree.mli b/tree.mli index 8cffd18..59bbcc2 100644 --- a/tree.mli +++ b/tree.mli @@ -20,7 +20,7 @@ val dump_node : 'a node -> string val nil : [ `Tree ] node val root : [ `Tree ] node - +val size : t -> int val is_root : [ `Tree ] node -> bool val is_nil : [ `Tree ] node -> bool @@ -34,25 +34,25 @@ val select_child : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node val next_sibling : t -> [ `Tree ] node -> [ `Tree ] node val next_element : t -> [ `Tree ] node -> [ `Tree ] node -val next_sibling_ctx : t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node -val next_element_ctx : t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node +val next_sibling_below : t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node +val next_element_below : t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node -val tagged_sibling : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node -val tagged_sibling_ctx : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node +val tagged_following_sibling : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node +val tagged_following_sibling_below : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node -val select_sibling : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node -val select_sibling_ctx : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node +val select_following_sibling : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node +val select_following_sibling_below : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node val tag : t -> [ `Tree ] node -> Tag.t val id : t -> [ `Tree ] node -> int -val tagged_desc : t -> Tag.t -> [ `Tree ] node -> [`Tree] node -val select_desc : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node +val tagged_descendant : t -> Tag.t -> [ `Tree ] node -> [`Tree] node +val select_descendant : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -val tagged_foll_ctx : t -> Tag.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node -val select_foll_ctx : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node +val tagged_following_below : t -> Tag.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node +val select_following_below : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node val count : t -> string -> int val print_xml_fast : out_channel -> t -> [ `Tree ] node -> unit @@ -86,3 +86,6 @@ val text_next : t -> [`Tree] node -> [`Tree] node -> [`Tree] node val closing : t -> [`Tree] node -> [`Tree] node val is_open : t -> [`Tree] node -> bool + +val benchmark_jump : t -> Tag.t -> unit +val benchmark_fsns : t -> unit -- 2.17.1 From 95310b9c1cf213f4ba28cab981400678d399717a Mon Sep 17 00:00:00 2001 From: kim Date: Fri, 21 Aug 2009 00:28:53 +0000 Subject: [PATCH 15/16] Added -O3 for gonzalo's result sets git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@557 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 154c166..0fdc017 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -INLINE=10000 +INLINE=100 DEBUG=false PROFILE=false VERBOSE=false @@ -36,6 +36,7 @@ CXXINCLUDES = \ -IXMLTree/TextCollection CXXFLAGS = -O3 -Wall $(INCLUDEDIRS) -fPIC -std=c++0x +CCFLAGS = -O3 -Wall -fPIC ifeq ($(VERBOSE),true) HIDE= @@ -45,10 +46,12 @@ endif ifeq ($(DEBUG), true) CXX = g++ -DDEBUG +CC = gcc -DDEBUG DEBUG_FLAGS = -g SYNT_DEBUG = -ppopt -DDEBUG else CXX = g++ +CC = gcc -DDEBUG endif ifeq ($(PROFILE), true) @@ -66,7 +69,7 @@ OCAMLMKLIB = ocamlmklib OCAMLDEP = ocamldep #Ugly but seems difficult with a makefile -LINK=$(OCAMLOPT) -linkpkg `ocamlc -version | grep -q "3.11.0" && echo dynlink.cmxa` camlp4lib.cmxa +LINK=$(OCAMLOPT) -linkpkg `ocamlc -version | grep -q "3.11.[01]" && echo dynlink.cmxa` camlp4lib.cmxa SYNTAX= -syntax camlp4o $(PPINCLUDES) -ppopt pa_macro.cmo $(SYNT_FLAGS) @@ -88,6 +91,10 @@ unit_test: libcamlshredder.a $(BASEOBJS) unit_test.cmx .SUFFIXES: .ml .mli .cmx .cmi .cpp .PHONY:compute_depend version +.c.o: + @echo [CC] $@ + $(HIDE) $(CC) -c $(CCFLAGS) $< + .cpp.o: @echo [CPP] $@ $(HIDE) $(CXX) $(CXXINCLUDES) -c $(CXXFLAGS) $< -- 2.17.1 From 609094fe14ca90cd5417ee22de621f76d1d0ec94 Mon Sep 17 00:00:00 2001 From: kim Date: Fri, 21 Aug 2009 00:29:10 +0000 Subject: [PATCH 16/16] Various cleanups git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@558 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- ata.ml | 22 +++++++++++----------- results.c | 2 +- tree.ml | 7 +++++-- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/ata.ml b/ata.ml index 77d5008..4ea2067 100644 --- a/ata.ml +++ b/ata.ml @@ -528,13 +528,13 @@ let tags_of_state a q = module GResult(Doc : sig val doc : Tree.t end) = struct type bits type elt = [` Tree] Tree.node - external create_empty : int -> bits = "caml_result_set_create" - external set : bits -> int -> unit = "caml_result_set_set" - external next : bits -> int -> int = "caml_result_set_next" - external count : bits -> int = "caml_result_set_count" - external clear : bits -> elt -> elt -> unit = "caml_result_set_clear" + external create_empty : int -> bits = "caml_result_set_create" "noalloc" + external set : bits -> int -> unit = "caml_result_set_set" "noalloc" + external next : bits -> int -> int = "caml_result_set_next" "noalloc" + external count : bits -> int = "caml_result_set_count" "noalloc" + external clear : bits -> elt -> elt -> unit = "caml_result_set_clear" "noalloc" - external set_tag_bits : bits -> Tag.t -> Tree.t -> elt -> elt = "caml_set_tag_bits" + external set_tag_bits : bits -> Tag.t -> Tree.t -> elt -> elt = "caml_set_tag_bits" "noalloc" type t = { segments : elt list; bits : bits; @@ -1075,10 +1075,10 @@ END (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res empty_res) | _,`NIL -> ( match f_kind with - |`TAG(tag') -> + (*|`TAG(tag') -> let default = fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res (loop_tag tag' (first t) llist t ) - in + in default (* let cf = SList.hd llist in if (slot_size == 1) && StateSet.is_singleton cf then @@ -1088,7 +1088,7 @@ END then RS.mk_quick_tag_loop default llist 1 tree tag' else default - else default + else default *) *) | _ -> (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res (loop (first t) llist t )) @@ -1096,13 +1096,13 @@ END | `NIL,_ -> ( match n_kind with |`TAG(tag') -> - (*if SList.equal rlist slist && tag == tag' then + if SList.equal rlist slist && tag == tag' then let rec loop t ctx = if t == Tree.nil then empty_res else let res2 = loop (next t ctx) ctx in eval_fold2_slist fl_list t tag res2 empty_res in loop - else *) + else (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag' (next t ctx) rlist ctx ) empty_res) diff --git a/results.c b/results.c index 1f1deda..19480fd 100644 --- a/results.c +++ b/results.c @@ -184,9 +184,9 @@ static int nextLarger (int *tree, int n, int p, int pos, int pot) { int answ; if (!getBit(tree,pos)) return -1; // no answer - pot--; pos = (pos<<1)+1; if (pos >= n) return 0; // when n is not a power of 2, missing leaves + pot--; if ((p>>pot) == 0) // p goes left { answ = nextLarger(tree,n,p&~(1< tree_tagged_descendant t.doc n tag +let tagged_descendant t tag = + let doc = t.doc in (); fun n -> tree_tagged_descendant doc n tag let select_descendant t = fun ts -> let v = (ptset_to_vector ts) in (); fun n -> tree_select_descendant t.doc n v -let tagged_following_below t tag = (); fun n ctx -> tree_tagged_following_below t.doc n tag ctx +let tagged_following_below t tag = + let doc = t.doc in + (); fun n ctx -> tree_tagged_following_below doc n tag ctx let select_following_below t = fun ts -> let v = (ptset_to_vector ts) in (); -- 2.17.1
";Formlist.print strf fl;pr_str "