From 477c1bef21a38e8371a745da9491fd6a6aae2aa5 Mon Sep 17 00:00:00 2001 From: kim Date: Mon, 11 May 2009 05:32:40 +0000 Subject: [PATCH 1/1] Restored bottom up run git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@382 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- Makefile | 7 +++--- OCamlDriver.cpp | 35 +++++++++++++++++++++++++++- ata.ml | 61 ++++++++++++++++++++++++++++++------------------- ata.mli | 3 ++- main.ml | 10 ++++---- tests/test.xml | 17 ++++++-------- tree.ml | 46 ++++++++++++++++++++++++++++++++----- tree.mli | 3 +++ 8 files changed, 134 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index 97ac568..7dfc4d0 100644 --- a/Makefile +++ b/Makefile @@ -17,9 +17,9 @@ OCAMLPACKAGES = str,unix,ulex,camlp4 PPINCLUDES=$(OCAMLINCLUDES:%=-ppopt %) -CXXSOURCES = XMLDocShredder.cpp OCamlDriver.cpp -CXXOBJECTS = $(CXXSOURCES:.cpp=.o) - +CXXSOURCES = results.c XMLDocShredder.cpp OCamlDriver.cpp +CXXOBJECTS1 = $(CXXSOURCES:.cpp=.o) +CXXOBJECTS = $(CXXOBJECTS1:.c=.o) CXXINCLUDES = \ -I/usr/include/libxml++-2.6 \ -I/usr/include/libxml2 \ @@ -133,6 +133,7 @@ timeXMLTree: $(CXXOBJECTS) XMLTree/XMLTree.a timeXMLTree.cpp myTimeXMLTree.cpp XMLDocShredder.o: XMLDocShredder.h XMLDocShredder.cpp OCamlDriver.o: XMLDocShredder.h +results.o: results.h compute_depend: @echo [DEP] diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index e2c6f00..baf9698 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -24,7 +24,7 @@ extern "C" { #include #include #include - +#include "results.h" #define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) )) #define NOT_IMPLEMENTED(s) (caml_failwith(s)) @@ -318,6 +318,10 @@ extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value t return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag)))); } +extern "C" CAMLprim value caml_xml_tree_subtree_size(value tree,value id){ + return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(id)))); +} + extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ CAMLparam2(tree,str); @@ -379,3 +383,32 @@ extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ caml_initialize(&Field(tuple,1),Val_int(r.max)); CAMLreturn (tuple); } + +extern "C" CAMLprim value caml_result_set_create(value size){ + CAMLparam1(size); + results* res = (results*) malloc(sizeof(results)); + results r = createResults (Int_val(size)); + res->n = r.n; + res->lgn = r.lgn; + res->tree = r.tree; + CAMLreturn ((value) (res)); +} + +extern "C" CAMLprim value caml_result_set_set(value result,value p){ + CAMLparam2(result,p); + setResult ( *((results*) result), Int_val(p)); + CAMLreturn (Val_unit); +} + +extern "C" CAMLprim value caml_result_set_clear(value result,value p1,value p2){ + CAMLparam3(result,p1,p2); + clearRange ( *((results*) result), Int_val(p1), Int_val(p2)); + CAMLreturn (Val_unit); +} + +extern "C" CAMLprim value caml_result_set_next(value result,value p){ + CAMLparam2(result,p); + CAMLreturn (Val_int(nextResult(*((results*) result), Int_val(p)))); +} + + diff --git a/ata.ml b/ata.ml index c3dbc47..fa44fd6 100644 --- a/ata.ml +++ b/ata.ml @@ -455,7 +455,7 @@ let tags_of_state a q = let merge (rb,rb1,rb2,mark) t res1 res2 = if rb then let res1 = if rb1 then res1 else empty - and res2 = if rb2 then res2 else empty + and res2 = if rb2 then res2 else empty in if mark then { node = Cons(t,(Concat(res1.node,res2.node))); length = res1.length + res2.length + 1;} @@ -466,7 +466,32 @@ let tags_of_state a q = end - + module GResult = struct + type t + type elt = [` Tree] Tree.node + external create_empty : int -> t = "caml_result_set_create" + external set : t -> int -> t = "caml_result_set_set" + external next : t -> int -> int = "caml_result_set_next" + external clear : t -> int -> int -> unit = "caml_result_set_clear" + let empty = create_empty 100000000 + + let cons e t = set t (Obj.magic e) + let concat _ t = t + let iter f t = + let rec loop i = + if i == -1 then () + else (f (Obj.magic i);loop (next t i)) + in loop 0 + + let fold _ _ _ = failwith "noop" + let map _ _ = failwith "noop" + let length t = let cpt = ref ~-1 in + iter (fun _ -> incr cpt) t; !cpt + + let merge (rb,rb1,rb2,mark) elt t1 t2 = + if mark then (set t1 (Obj.magic elt) ; t1) else t1 + + end module Run (RS : ResultSet) = struct @@ -876,12 +901,12 @@ END in (Hashtbl.add h_trans key res;res) + let h_tdconf = Hashtbl.create 511 let rec bottom_up a tree t conf next jump_fun root dotd init accu = if (not dotd) && (Configuration.is_empty conf ) then - - accu,conf,next + accu,conf,next else let below_right = Tree.is_below_right tree t next in @@ -900,7 +925,7 @@ END let conf,next = (Configuration.merge rightconf sub, next_of_next) in - if t == root then accu,conf,next else + if t == root then accu,conf,next else let parent = Tree.binary_parent tree t in let ptag = Tree.tag tree parent in let dir = Tree.is_left tree t in @@ -919,8 +944,6 @@ END and prepare_topdown a tree t noright = let tag = Tree.tag tree t in -(* pr "Going top down on tree with tag %s = %s " - (if Tree.is_nil t then "###" else (Tag.to_string(Tree.tag t))) (Tree.dump_node t); *) let r = try Hashtbl.find h_tdconf tag @@ -941,24 +964,21 @@ END let set = match SList.node set with | SList.Cons(x,_) ->x | _ -> assert false - in -(* pr "Result of topdown run is %!"; - StateSet.print fmt (Ptset.Int.elements set); - pr ", number is %i\n%!" (RS.length res.(0)); *) - Configuration.add Configuration.empty set res.(0) + in + Configuration.add Configuration.empty set res.(0) let run_bottom_up a tree k = let t = Tree.root in - let trlist = Hashtbl.find a.trans (Ptset.Int.choose a.init) + let trlist = Hashtbl.find a.trans (StateSet.choose a.init) in let init = List.fold_left (fun acc (_,t) -> let _,_,f,_ = Transition.node t in let _,_,l = fst ( Formula.st f ) in - Ptset.Int.union acc l) - Ptset.Int.empty trlist + StateSet.union acc l) + StateSet.empty trlist in let tree1,jump_fun = match k with @@ -966,22 +986,17 @@ END (*Tree.tagged_lowest t tag, fun tree -> Tree.tagged_next tree tag*) (Tree.tagged_desc tree tag t, let jump = Tree.tagged_foll_ctx tree tag in fun n -> jump n t ) - | `CONTAINS(_) -> (Tree.first_child tree t,let jump = Tree.next_sibling_ctx tree + | `CONTAINS(_) -> (Tree.text_below tree t,let jump = Tree.text_next tree in fun n -> jump n t) | _ -> assert false in let tree2 = jump_fun tree1 in let rec loop t next acc = -(* let _ = pr "\n_________________________\nNew iteration\n" in - let _ = pr "Jumping to %s\n%!" (Tree.dump_node tree) in *) let acc,conf,next_of_next = bottom_up a tree t Configuration.empty next jump_fun (Tree.root) true init acc in - (* let _ = pr "End of first iteration, conf is:\n%!"; - Configuration.pr fmt conf - in *) let acc = Configuration.IMap.fold - ( fun s res acc -> if Ptset.Int.intersect init s + ( fun s res acc -> if StateSet.intersect init s then RS.concat res acc else acc) conf.Configuration.results acc in if Tree.is_nil next_of_next (*|| Tree.equal next next_of_next *)then @@ -994,7 +1009,7 @@ END end let top_down_count a t = let module RI = Run(Integer) in Integer.length (RI.run_top_down a t) - let top_down a t = let module RI = Run(IdSet) in (RI.run_top_down a t) + let top_down a t = let module RI = Run(GResult) in (RI.run_top_down a t) let bottom_up_count a t k = let module RI = Run(Integer) in Integer.length (RI.run_bottom_up a t k) diff --git a/ata.mli b/ata.mli index 24d0832..e8d64b1 100644 --- a/ata.mli +++ b/ata.mli @@ -96,8 +96,9 @@ module type ResultSet = end module IdSet : ResultSet +module GResult : ResultSet val top_down_count : 'a t -> Tree.t -> int -val top_down : 'a t -> Tree.t -> IdSet.t +val top_down : 'a t -> Tree.t -> GResult.t val bottom_up_count : 'a t -> Tree.t -> [> `CONTAINS of 'b | `TAG of Tag.t ] -> int diff --git a/main.ml b/main.ml index 021a18e..c9885e6 100644 --- a/main.ml +++ b/main.ml @@ -38,7 +38,7 @@ let main v query_string output = let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in let _ = Ata.dump Format.err_formatter auto in let _ = Printf.eprintf "%!" in - let jump_to = + let jump_to = match contains with None -> (max_int,`NOTHING) | Some s -> @@ -55,6 +55,8 @@ let main v query_string output = time (Tree.init_naive_contains v) s end;(r,`CONTAINS(s)) in + let test_list = jump_to in + (* let test_list = if (!Options.backward) then begin Printf.eprintf "Finding min occurences : "; @@ -66,7 +68,7 @@ let main v query_string output = else acc) jump_to) ltags end else (max_int,`NOTHING) - in + in*) let _ = if (snd test_list) != `NOTHING then let occ,s1,s2 = match test_list with | (x,`TAG (tag)) -> (x, "tag", (Tag.to_string tag)) @@ -96,7 +98,7 @@ let main v query_string output = in () else let result = time (top_down auto) v in - let rcount = IdSet.length result in + let rcount = GResult.length result in Printf.eprintf "Number of nodes in the result set : %i\n" rcount; Printf.eprintf "\n%!"; begin @@ -107,7 +109,7 @@ let main v query_string output = time( fun () -> let oc = open_out f in output_string oc "\n"; - IdSet.iter (fun t -> + GResult.iter (fun t -> Tree.print_xml_fast oc v t; output_char oc '\n'; diff --git a/tests/test.xml b/tests/test.xml index 0d548e4..a9c7a55 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,13 +1,10 @@ - - - - - - -

- - -
+
+ foo + foo + foo + foo + foo + foo diff --git a/tree.ml b/tree.ml index 9e80e6d..730e174 100644 --- a/tree.ml +++ b/tree.ml @@ -52,7 +52,8 @@ external text_unsorted_contains : tree -> string -> unit = "caml_text_collection external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" - +external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" + let tree_is_nil x = equal_node x nil external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" "noalloc" @@ -127,7 +128,7 @@ type t = { doc : tree; ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; } - +let subtree_size t i = tree_subtree_size t.doc i let text_size t = text_size t.doc module MemUnion = Hashtbl.Make (struct @@ -243,7 +244,35 @@ let init_naive_contains t s = let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc in contains_array := a + +let last_idx = ref 0 + +let array_find a i j = + let l = Array.length a in + let rec loop idx x y = + if x > y || idx >= l then nulldoc + else + if a.(idx) >= x then if a.(idx) > y then nulldoc else (last_idx := idx;a.(idx)) + else loop (idx+1) x y + in + if a.(0) > j || a.(l-1) < i then nulldoc + else loop !last_idx i j +let text_below tree t = + let l = Array.length !contains_array in + let i,j = tree_doc_ids tree.doc t in + let id = if l == 0 then i else (array_find !contains_array i j) in + tree_parent_doc tree.doc id + +let text_next tree t root = + let l = Array.length !contains_array in + let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in + let _,j = tree_doc_ids tree.doc root in + let id = if l == 0 then if inf > j then nulldoc else inf + else array_find !contains_array inf j + in + tree_parent_doc tree.doc id + module DocIdSet = struct @@ -367,11 +396,12 @@ let nts = function let dump_node t = nts (inode t) - let is_left t n = tree_is_first_child t.doc n -let is_below_right t n1 n2 = tree_is_ancestor t.doc (tree_parent t.doc n1) n2 - +let is_below_right t n1 n2 = + tree_is_ancestor t.doc (tree_parent t.doc n1) n2 + && not (tree_is_ancestor t.doc n1 n2) + let parent t n = tree_parent t.doc n let first_child t = (); fun n -> tree_first_child t.doc n @@ -520,10 +550,14 @@ let tags_after t tag = let tags t tag = Hashtbl.find t.ttable tag -let binary_parent t n = +let rec binary_parent t n = + let r = if tree_is_first_child t.doc n then tree_parent t.doc n else tree_prev_sibling t.doc n + in if tree_tag_id t.doc r = Tag.pcdata then + binary_parent t r + else r let doc_ids t n = tree_doc_ids t.doc n diff --git a/tree.mli b/tree.mli index c38ab02..ba0fc44 100644 --- a/tree.mli +++ b/tree.mli @@ -75,3 +75,6 @@ val subtree_tags : t -> Tag.t -> [`Tree] node -> int val get_text : t -> [`Tree] node -> string val dump_tree : Format.formatter -> t -> unit +val subtree_size : t -> [`Tree] node -> int +val text_below : t -> [`Tree] node -> [`Tree] node +val text_next : t -> [`Tree] node -> [`Tree] node -> [`Tree] node -- 2.17.1