From be1caa5c46009c13241cc48ed34a36ee2936ef87 Mon Sep 17 00:00:00 2001 From: kim Date: Sat, 6 Jun 2009 03:46:01 +0000 Subject: [PATCH] Safety commit git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@428 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 42 +++++++--- ata.ml | 156 ++++++++++++++++++++++++++++-------- ata.mli | 10 ++- main.ml | 41 +++++++--- tag.ml | 4 + tag.mli | 4 + tests/test.xml | 3 +- tests/xpathmark-queries.txt | 14 ++++ tree.ml | 120 ++++++++++++++++++++++++--- tree.mli | 8 ++ utils.ml | 11 ++- 11 files changed, 346 insertions(+), 67 deletions(-) create mode 100644 tests/xpathmark-queries.txt diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 9df73d6..a57e0c6 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -29,7 +29,7 @@ extern "C" { #define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) )) #define NOT_IMPLEMENTED(s) (caml_failwith(s)) #define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) -#define HSET(x) ((std::unordered_set*)((* (XMLTree**) Data_custom_val(x)))) +#define HSET(x) ((std::unordered_set*)((* (std::unordered_set**) Data_custom_val(x)))) #define TEXTCOLLECTION(x) #define TREENODEVAL(i) ((treeNode) (Int_val(i))) #define XMLTREE_ROOT 0 @@ -248,6 +248,13 @@ extern "C" CAMLprim value caml_xml_tree_is_first_child(value tree, value id){ extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){ return(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_closing(value tree, value id){ + return(Val_int (XMLTREE(tree)->Closing(TREENODEVAL(id)))); +} +extern "C" CAMLprim value caml_xml_tree_is_open(value tree, value id){ + return(Val_bool (XMLTREE(tree)->IsOpen(TREENODEVAL(id)))); +} + extern "C" CAMLprim value caml_xml_tree_first_element(value tree, value id){ return(Val_int (XMLTREE(tree)->FirstElement(TREENODEVAL(id)))); } @@ -285,12 +292,14 @@ extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, return(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); } - - extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){ return(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id))))); } +extern "C" CAMLprim value caml_xml_tree_my_text_unsafe(value tree, value id){ + return(Val_int((XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(id))))); +} + extern "C" CAMLprim value caml_xml_tree_text_xml_id(value tree, value id){ return(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id))))); } @@ -320,6 +329,10 @@ extern "C" CAMLprim value caml_xml_tree_subtree_size(value tree,value id){ return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_subtree_elements(value tree,value id){ + return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(id)))); +} + extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ CAMLparam2(tree,str); @@ -382,31 +395,38 @@ extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ CAMLreturn (tuple); } -extern "C" CAMLprim value caml_result_set_create(value size){ - CAMLparam1(size); +extern "C" value caml_result_set_create(value size){ results* res = (results*) malloc(sizeof(results)); - results r = createResults (Int_val(size)); + results r = createResults (Int_val(size)); res->n = r.n; res->lgn = r.lgn; res->tree = r.tree; - CAMLreturn ((value) (res)); + return ((value) (res)); } extern "C" CAMLprim value caml_result_set_set(value result,value p){ - CAMLparam2(result,p); + CAMLparam1(p); + results r; setResult ( *((results*) result), Int_val(p)); CAMLreturn (Val_unit); } extern "C" CAMLprim value caml_result_set_clear(value result,value p1,value p2){ - CAMLparam3(result,p1,p2); + CAMLparam2(p1,p2); clearRange ( *((results*) result), Int_val(p1), Int_val(p2)); CAMLreturn (Val_unit); } extern "C" CAMLprim value caml_result_set_next(value result,value p){ - CAMLparam2(result,p); - CAMLreturn (Val_int(nextResult(*((results*) result), Int_val(p)))); + CAMLparam1(p); + results r; + r = *( (results *) result); + CAMLreturn (Val_int(nextResult(r, Int_val(p)))); } +extern "C" CAMLprim value caml_xml_tree_print(value tree,value node,value fd){ + CAMLparam3(tree,node,fd); + XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node)); + CAMLreturn(Val_unit); +} diff --git a/ata.ml b/ata.ml index 9eb5311..3741b56 100644 --- a/ata.ml +++ b/ata.ml @@ -328,12 +328,12 @@ let eval_form_bool = in loop f -module FTable = Hashtbl.Make( struct - type t = Tag.t*Formlist.t*StateSet.t*StateSet.t - let equal (tg1,f1,s1,t1) (tg2,f2,s2,t2) = - tg1 == tg2 && f1 == f2 && s1 == s2 && t1 == t2;; - let hash (tg,f,s,t) = HASHINT4(tg,Formlist.uid f ,StateSet.uid s,StateSet.uid t);; - end) +module FTable = Hashtbl.Make(struct + type t = Tag.t*Formlist.t*StateSet.t*StateSet.t + let equal (tg1,f1,s1,t1) (tg2,f2,s2,t2) = + tg1 == tg2 && f1 == f2 && s1 == s2 && t1 == t2;; + let hash (tg,f,s,t) = HASHINT4(tg,Formlist.uid f ,StateSet.uid s,StateSet.uid t);; + end) let h_f = FTable.create BIG_H_SIZE @@ -395,6 +395,8 @@ let tags_of_state a q = val map : ( elt -> elt) -> t -> t val length : t -> int val merge : (bool*bool*bool*bool) -> elt -> t -> t -> t + val mk_quick_tag_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> Tag.t -> (elt -> elt -> 'a*t array) + val mk_quick_star_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> (elt -> elt -> 'a*t array) end module Integer : ResultSet = @@ -416,6 +418,13 @@ let tags_of_state a q = if mark then 1+res1+res2 else res1+res2 else 0 + let mk_quick_tag_loop _ sl ss tree tag = (); + fun t ctx -> + (sl, Array.make ss (Tree.subtree_tags tree tag t)) + let mk_quick_star_loop _ sl ss tree = (); + fun t ctx -> + (sl, Array.make ss (Tree.subtree_elements tree t)) + end module IdSet : ResultSet = @@ -470,35 +479,99 @@ let tags_of_state a q = else { node = (Concat(res1.node,res2.node)); length = res1.length + res2.length ;} - else empty - - + else empty + let mk_quick_tag_loop f _ _ _ _ = f + let mk_quick_star_loop f _ _ _ = f end - module GResult = struct - type t + module GResult(Doc : sig val doc : Tree.t end) = struct + type bits type elt = [` Tree] Tree.node - external create_empty : int -> t = "caml_result_set_create" - external set : t -> int -> t = "caml_result_set_set" - external next : t -> int -> int = "caml_result_set_next" - external clear : t -> int -> int -> unit = "caml_result_set_clear" - let empty = create_empty 100000000 + external create_empty : int -> bits = "caml_result_set_create" + external set : bits -> int -> unit = "caml_result_set_set" + external next : bits -> int -> int = "caml_result_set_next" + external clear : bits -> elt -> elt -> unit = "caml_result_set_clear" + + type t = + { segments : elt list; + bits : bits; + } + + let ebits = + let size = (Tree.subtree_size Doc.doc Tree.root) in + create_empty (size*2+1) + + let empty = { segments = []; + bits = ebits } - let cons e t = set t (Obj.magic e) - let concat _ t = t + let cons e t = + let rec loop l = match l with + | [] -> { bits = (set t.bits (Obj.magic e);t.bits); + segments = [ e ] } + | p::r -> + if Tree.is_binary_ancestor Doc.doc e p then + loop r + else + { bits = (set t.bits (Obj.magic e);t.bits); + segments = e::l } + in + loop t.segments + + let concat t1 t2 = + if t2.segments == [] then t1 + else + if t1.segments == [] then t2 + else + let h2 = List.hd t2.segments in + let rec loop l = match l with + | [] -> t2.segments + | p::r -> + if Tree.is_binary_ancestor Doc.doc p h2 then + l + else + p::(loop r) + in + { bits = t1.bits; + segments = loop t1.segments + } + let iter f t = let rec loop i = if i == -1 then () - else (f (Obj.magic i);loop (next t i)) - in loop 0 + else (f ((Obj.magic i):elt);loop (next t.bits i)) + in loop (next t.bits 0) let fold _ _ _ = failwith "noop" let map _ _ = failwith "noop" - let length t = let cpt = ref ~-1 in + let length t = let cpt = ref 0 in iter (fun _ -> incr cpt) t; !cpt let merge (rb,rb1,rb2,mark) elt t1 t2 = - if mark then (set t1 (Obj.magic elt) ; t1) else t1 - + if rb then +(* let _ = Printf.eprintf "Lenght before merging is %i %i\n" + (List.length t1.segments) (List.length t2.segments) + in *) + match t1.segments,t2.segments with + [],[] -> if mark then cons elt empty else empty + | [p],[] when rb1 -> if mark then cons elt t1 else t1 + | [], [p] when rb2 -> if mark then cons elt t2 else t2 + | [x],[y] when rb1 && rb2 -> if mark then cons elt empty else + concat t1 t2 + | _,_ -> + let t1 = if rb1 then t1 else + (List.iter (fun idx -> clear t1.bits idx (Tree.closing Doc.doc idx)) t1.segments;empty) + and t2 = if rb2 then t2 else + (List.iter (fun idx -> clear t2.bits idx (Tree.closing Doc.doc idx)) t2.segments;empty) + in + (if mark then cons elt (concat t1 t2) + else concat t1 t2) + else + let _ = + List.iter (fun idx -> clear t1.bits idx (Tree.closing Doc.doc idx)) t1.segments; + List.iter (fun idx -> clear t2.bits idx (Tree.closing Doc.doc idx)) t2.segments + in + empty + let mk_quick_tag_loop f _ _ _ _ = f + let mk_quick_star_loop f _ _ _ = f end module Run (RS : ResultSet) = struct @@ -574,7 +647,11 @@ END (fun (_,t) -> let _,_,_,f,_ = Transition.node t in StateSet.mem s ((fun (_,_,x) -> x) (access (Formula.st f)))) (Hashtbl.find a.trans s) - + let is_final_marking a s = + List.exists (fun (_,t) -> let _,_,m,f,_ = Transition.node t in m&& (Formula.is_true f)) + (Hashtbl.find a.trans s) + + let decide a c_label l_label dir_states dir = let l = StateSet.fold @@ -841,12 +918,22 @@ END | _,`NIL -> ( match f_kind with |`TAG(tag') -> - (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res - (loop_tag tag' (first t) llist t )) - | `ANY -> + let default = fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res + (loop_tag tag' (first t) llist t ) + in + let cf = SList.hd llist in + if (slot_size == 1) && StateSet.is_singleton cf + then + let s = StateSet.choose cf in + if (Algebra.is_rec a s fst) && (Algebra.is_rec a s snd) + && (Algebra.is_final_marking a s) + then RS.mk_quick_subtree default llist 1 tree tag' + else default + else default + | _ -> (fun t _ -> eval_fold2_slist fl_list t (Tree.tag tree t) empty_res (loop (first t) llist t )) - | _ -> assert false) + ) | `NIL,_ -> ( match n_kind with |`TAG(tag') -> @@ -860,11 +947,10 @@ END (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag' (next t ctx) rlist ctx ) empty_res) - | `ANY -> + | _ -> (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) empty_res) - - | _ -> assert false) + ) | `TAG(tag1),`TAG(tag2) -> (fun t ctx -> @@ -904,7 +990,7 @@ END eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) (loop (first t) llist t )) - | _ -> assert false + in let cont = D_IF_( (fun t ctx -> let a,b = cont t ctx in @@ -1154,3 +1240,9 @@ END let bottom_up_count a t k = let module RI = Run(Integer) in Integer.length (RI.run_bottom_up a t k) + module Test (Doc : sig val doc : Tree.t end) = + struct + module Results = GResult(Doc) + let top_down a t = let module R = Run(Results) in (R.run_top_down a t) + end + diff --git a/ata.mli b/ata.mli index 649a496..159bd78 100644 --- a/ata.mli +++ b/ata.mli @@ -93,12 +93,20 @@ module type ResultSet = val map : (elt -> elt) -> t -> t val length : t -> int val merge : (bool*bool*bool*bool)-> elt -> t -> t -> t + val mk_quick_tag_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> Tag.t -> (elt -> elt -> 'a*t array) + val mk_quick_star_loop : (elt -> elt -> 'a*t array) -> 'a -> int -> Tree.t -> (elt -> elt -> 'a*t array) end module IdSet : ResultSet -module GResult : ResultSet +module GResult (Doc : sig val doc : Tree.t end) : ResultSet val top_down_count : 'a t -> Tree.t -> int val top_down : 'a t -> Tree.t -> IdSet.t val bottom_up_count : 'a t -> Tree.t -> [> `CONTAINS of 'b | `TAG of Tag.t ] -> int + +module Test (Doc : sig val doc : Tree.t end ) : +sig + module Results : ResultSet + val top_down : 'a t -> Tree.t -> Results.t +end diff --git a/main.ml b/main.ml index cb890e7..19bdcf6 100644 --- a/main.ml +++ b/main.ml @@ -14,6 +14,7 @@ let disabled_gc = { Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 } let hash x = 131*x/(x-1+1) + let test_loop tree tag = let t' = Tree.tagged_desc tree tag Tree.root in let f = Hashtbl.create 4096 @@ -25,6 +26,20 @@ let test_loop tree tag = in Hashtbl.add f (hash 101) g; (Hashtbl.find f (hash 101)) t' Tree.root + +let test_full tree = + let root = Tree.root in + let fin = Tree.closing tree root in + let rec loop t = if t <= fin then + let tag = Tree.tag tree t in +(* let _ = Tag.to_string tag in *) + if tag == Tag.pcdata then (ignore (Tree.get_text tree t)); + let t = (Obj.magic ((Obj.magic t) + 1)) in + loop t + in + loop root + + let test_loop2 tree tag = let t' = Tree.tagged_desc tree tag Tree.root in let f = Hashtbl.create 4096 @@ -51,10 +66,12 @@ let main v query_string output = in (* let _ = Printf.eprintf "Timing //keyword :" in let r = time (test_loop v) (Tag.tag "keyword") in - let _ = Printf.eprintf "Count is %i\n%!" r in *) + let _ = Printf.eprintf "Count is %i\n%!" r in let _ = Printf.eprintf "Timing //keyword 2:" in let r = time (test_loop2 v) (Tag.tag "keyword") in - let _ = Printf.eprintf "Count is %i\n%!" r in + let _ = Printf.eprintf "Count is %i\n%!" r in *) + let _ = Printf.eprintf "Timing //node() :" in + let _ = time (test_full) v in XPath.Ast.print Format.err_formatter query; Format.fprintf Format.err_formatter "\n%!"; Printf.eprintf "Compiling query : "; @@ -120,8 +137,10 @@ let main v query_string output = let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r in () else - let result = time (top_down auto) v in - let rcount = IdSet.length result in + let module GR = Ata.Test(struct let doc = v end) in + let result = time (GR.top_down auto) v in + let _ = Printf.eprintf "Counting results " in + let rcount = time (GR.Results.length) result in Printf.eprintf "Number of nodes in the result set : %i\n" rcount; Printf.eprintf "\n%!"; begin @@ -130,13 +149,13 @@ let main v query_string output = | Some f -> Printf.eprintf "Serializing results : "; time( fun () -> - let oc = open_out f in - output_string oc "\n"; - IdSet.iter (fun t -> - Tree.print_xml_fast oc v t; - output_char oc '\n'; - - ) result) (); + (*let oc = open_out f in *) + let oc = Unix.openfile f [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in + (*output_string oc "\n";*) + GR.Results.iter (fun t -> + Tree.print_xml_fast3 v t oc; + (*output_char oc '\n'; *) + ) result) (); end; end; let _ = Gc.set enabled_gc in diff --git a/tag.ml b/tag.ml index f9c0275..ec80df4 100644 --- a/tag.ml +++ b/tag.ml @@ -21,6 +21,10 @@ let document_node = 0 let attribute = 1 let pcdata = 2 let attribute_data= 3 +let document_node_close = 4 +let attribute_close = 5 +let pcdata_close = 6 +let attribute_data_close= 7 let pool = Weak.create 1 diff --git a/tag.mli b/tag.mli index bc3ee55..1fcd5b1 100644 --- a/tag.mli +++ b/tag.mli @@ -6,6 +6,10 @@ val document_node : t val attribute : t val pcdata : t val attribute_data : t +val document_node_close : t +val attribute_close : t +val pcdata_close : t +val attribute_data_close : t val init : pool -> unit diff --git a/tests/test.xml b/tests/test.xml index 8f0e255..c4c1de0 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,6 +1,5 @@ - - + diff --git a/tests/xpathmark-queries.txt b/tests/xpathmark-queries.txt new file mode 100644 index 0000000..12ed53e --- /dev/null +++ b/tests/xpathmark-queries.txt @@ -0,0 +1,14 @@ +#XPATHMARK A queries +/site/closed_auctions/closed_auction/annotation/description/text/keyword +/descendant::closed_auction/descendant::keyword +/site/closed_auctions/closed_auction/descendant::keyword +/site/closed_auctions/closed_auction[annotation/description/text/keyword]/date +/site/closed_auctions/closed_auction[descendant::keyword]/date +/site/people/person[profile/gender and profile/age]/name +/site/people/person[phone or homepage]/name +/site/people/person[address and (phone or homepage) and (creditcard or profile)]/name + +#XPATHMARK B queries +#/site/open_auctions/open_auction/bidder[following-sibling::bidder] +#/descendant::person[profile/@income]/name + diff --git a/tree.ml b/tree.ml index 0a31e83..1ff2082 100644 --- a/tree.ml +++ b/tree.ml @@ -28,7 +28,7 @@ let equal_node : 'a node -> 'a node -> bool = (==) external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shredder_uri" external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string" - +external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print" external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save" external tree_load : Unix.file_descr -> tree = "caml_xml_tree_load" @@ -53,6 +53,7 @@ external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_coll external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" +external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" let tree_is_nil x = equal_node x nil @@ -60,6 +61,8 @@ external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_par external tree_parent_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" "noalloc" (*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *) external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc" +external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc" +external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc" external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc" external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc" @@ -80,6 +83,7 @@ let tree_is_last t n = equal_node nil (tree_next_sibling t n) (*external tree_prev_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" "noalloc" *) external tree_my_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" "noalloc" +external tree_my_text_unsafe : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text_unsafe" "noalloc" (*external tree_next_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" "noalloc" *) external tree_doc_ids : tree -> [`Tree ] node -> [`Text ] node * [`Text ] node = "caml_xml_tree_doc_ids" @@ -129,6 +133,7 @@ type t = { ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; } let subtree_size t i = tree_subtree_size t.doc i +let subtree_elements t i = tree_subtree_elements t.doc i let text_size t = text_size t.doc module MemUnion = Hashtbl.Make (struct @@ -421,9 +426,18 @@ let dump_node t = nts (inode t) let is_left t n = tree_is_first_child t.doc n + + let is_below_right t n1 n2 = tree_is_ancestor t.doc (tree_parent t.doc n1) n2 && not (tree_is_ancestor t.doc n1 n2) + +let is_binary_ancestor t n1 n2 = + let p = tree_parent t.doc n1 in + let fin = tree_closing t.doc p in + n2 > n1 && n2 < fin +(* (is_below_right t n1 n2) || + (tree_is_ancestor t.doc n1 n2) *) let parent t n = tree_parent t.doc n @@ -474,6 +488,10 @@ let select_foll_ctx t = fun ts -> let v = (ptset_to_vector ts) in (); fun n ctx -> tree_select_foll_below t.doc n v ctx +let closing t n = tree_closing t.doc n +let is_open t n = tree_is_open t.doc n +let get_text_id t n = tree_my_text t.doc n + let last_idx = ref 0 let array_find a i j = let l = Array.length a in @@ -489,8 +507,92 @@ let array_find a i j = let count t s = text_count t.doc s - - let print_xml_fast outc tree t = + let stack = ref [] + let init_stack () = stack := [] + let push x = stack:= x::!stack + let peek () = match !stack with + p::_ -> p + | _ -> failwith "peek" + let pop () = match !stack with + p::r -> stack:=r; p + | _ -> failwith "pop" + + let next t = nodei ( (inode t) + 1 ) + let next2 t = nodei ( (inode t) + 2 ) + let next3 t = nodei ( (inode t) + 3 ) + + let print_xml_fast2 = + let _ = init_stack () in + let h = Hashtbl.create MED_H_SIZE in + let tag_str t = try Hashtbl.find h t with + Not_found -> let s = Tag.to_string t in + Hashtbl.add h t s;s + in + let h_att = Hashtbl.create MED_H_SIZE in + let att_str t = try Hashtbl.find h_att t with + Not_found -> let s = Tag.to_string t in + let attname = String.sub s 3 ((String.length s) -3) in + Hashtbl.add h_att t attname;attname + in fun outc tree t -> + let tree = tree.doc in + let fin = tree_closing tree t in + let rec loop_tag t tag = + if t <= fin then + if tree_is_open tree t then + (* opening tag *) + if tag == Tag.pcdata then + begin + output_string outc (text_get_cached_text tree (tree_my_text_unsafe tree t)); + loop (next2 t) (* skip closing $ *) + end + else + let tagstr = tag_str tag in + let _ = output_char outc '<'; + output_string outc tagstr in + let t' = next t in + if tree_is_open tree t' then + let _ = push tagstr in + let tag' = tree_tag_id tree t' in + if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in + output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag') + else (* closing with no content *) + let _ = output_string outc "/>" in + loop (next t') + else + begin + (* closing tag *) + output_string outc "'; + loop (next t); + end + and loop t = loop_tag t (tree_tag_id tree t) + and loop_attr t n = + if tree_is_open tree t then + let attname = att_str (tree_tag_id tree t) in + output_char outc ' '; + output_string outc attname; + output_string outc "=\""; + let t = next t in (* open $@ *) + output_string outc (text_get_cached_text tree (tree_my_text_unsafe tree t)); + output_char outc '"'; + loop_attr (next3 t) (n+1) + else + next t (* close @ *) + in loop t + + let print_xml_fast = + let h = Hashtbl.create MED_H_SIZE in + let tag_str t = try Hashtbl.find h t with + Not_found -> let s = Tag.to_string t in + Hashtbl.add h t s;s + in + let h_att = Hashtbl.create MED_H_SIZE in + let att_str t = try Hashtbl.find h_att t with + Not_found -> let s = Tag.to_string t in + let attname = String.sub s 3 ((String.length s) -3) in + Hashtbl.add h_att t attname;attname + in fun outc tree t -> let rec loop ?(print_right=true) t = if t != nil then @@ -498,18 +600,18 @@ let array_find a i j = if tagid==Tag.pcdata then begin - let tid = tree_my_text tree.doc t in + let tid = tree_my_text_unsafe tree.doc t in output_string outc (text_get_cached_text tree.doc tid); if print_right then loop (next_sibling tree t); end else - let tagstr = Tag.to_string tagid in + let tagstr = tag_str tagid in let l = first_child tree t and r = next_sibling tree t in output_char outc '<'; - output_string outc tagstr; + output_string outc tagstr; if l == nil then output_string outc "/>" else if (tag tree l) == Tag.attribute then @@ -537,10 +639,9 @@ let array_find a i j = and loop_attributes a = if a != nil then - let s = (Tag.to_string (tag tree a)) in - let attname = String.sub s 3 ((String.length s) -3) in + let attname = att_str (tag tree a) in let fsa = first_child tree a in - let tid = tree_my_text tree.doc fsa in + let tid = tree_my_text_unsafe tree.doc fsa in output_char outc ' '; output_string outc attname; output_string outc "=\""; @@ -612,3 +713,4 @@ let dump_tree fmt tree = ;; +let print_xml_fast3 t = tree_print_xml_fast3 t.doc diff --git a/tree.mli b/tree.mli index ba0fc44..e4bbd2d 100644 --- a/tree.mli +++ b/tree.mli @@ -56,6 +56,8 @@ val select_foll_ctx : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -> [ ` val count : t -> string -> int val print_xml_fast : out_channel -> t -> [ `Tree ] node -> unit +val print_xml_fast2 : out_channel -> t -> [ `Tree ] node -> unit +val print_xml_fast3 : t -> [ `Tree ] node -> Unix.file_descr -> unit val tags_children : t -> Tag.t -> Ptset.Int.t val tags_below : t -> Tag.t -> Ptset.Int.t @@ -63,6 +65,7 @@ val tags_siblings : t -> Tag.t -> Ptset.Int.t val tags_after : t -> Tag.t -> Ptset.Int.t val tags : t -> Tag.t -> Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t val is_below_right : t -> [`Tree] node -> [`Tree] node -> bool +val is_binary_ancestor : t -> [`Tree] node -> [`Tree] node -> bool val is_left : t -> [`Tree] node -> bool val binary_parent : t -> [`Tree] node -> [`Tree] node @@ -73,8 +76,13 @@ val text_size : t -> int val doc_ids : t -> [`Tree] node -> [`Text] node * [`Text] node val subtree_tags : t -> Tag.t -> [`Tree] node -> int val get_text : t -> [`Tree] node -> string +val get_text_id : t -> [`Tree] node -> [`Text ] node val dump_tree : Format.formatter -> t -> unit val subtree_size : t -> [`Tree] node -> int +val subtree_elements : t -> [`Tree] node -> int val text_below : t -> [`Tree] node -> [`Tree] node val text_next : t -> [`Tree] node -> [`Tree] node -> [`Tree] node + +val closing : t -> [`Tree] node -> [`Tree] node +val is_open : t -> [`Tree] node -> bool diff --git a/utils.ml b/utils.ml index 658d84e..76c0432 100644 --- a/utils.ml +++ b/utils.ml @@ -70,7 +70,7 @@ let read_procmem () = let l = ref [] ;; let init_timer() = l := [];; -let time f x = +let time_mem f x = let s1 = read_procmem() in let t1 = Unix.gettimeofday () in let r = f x in @@ -83,6 +83,15 @@ let time f x = Printf.eprintf "Mem use after: %s\n\n\n%!" s2; r ;; +let time f x = + let t1 = Unix.gettimeofday () in + let r = f x in + let t2 = Unix.gettimeofday () in + let t = (1000. *. (t2 -. t1)) in + l:= t::!l; + Printf.eprintf " %fms\n%!" t ; + r +;; let total_time () = List.fold_left (+.) 0. !l;; END (* IFNDEF UTILS__ML__ *) -- 2.17.1