From: kim Date: Thu, 30 Apr 2009 14:25:01 +0000 (+0000) Subject: Further optimisations, changed the prototype of Tree.mli X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;ds=sidebyside;h=70ff0bfc463882ecf233f1b1a7ac4a8007fa4cc2;p=SXSI%2Fxpathcomp.git Further optimisations, changed the prototype of Tree.mli git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@366 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index afa1e3b..97f3352 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ -INLINE=10000 +INLINE=1000 DEBUG=false PROFILE=false VERBOSE=false -BASESRC=custom.ml memoizer.ml hcons.ml memhashtbl.ml hlist.ml ptset.ml finiteCofinite.ml tag.ml tagSet.ml options.ml tree.ml ata.ml -BASEMLI=sigs.mli memoizer.mli hcons.mli memhashtbl.ml hlist.mli ptset.mli finiteCofinite.mli tag.mli tagSet.mli options.mli tree.mli ata.mli +BASESRC=custom.ml memoizer.ml hcons.ml hlist.ml ptset.ml finiteCofinite.ml tag.ml tagSet.ml options.ml tree.ml ata.ml +BASEMLI=sigs.mli memoizer.mli hcons.mli hlist.mli ptset.mli finiteCofinite.mli tag.mli tagSet.mli options.mli tree.mli ata.mli MLSRCS = memory.ml $(BASESRC) ulexer.ml xPath.ml main.ml MLISRCS = memory.mli $(BASEMLI) ulexer.mli xPath.mli BASEOBJS= $(BASESRC:.ml=.cmx) diff --git a/SXSIStorageInterface.cpp b/SXSIStorageInterface.cpp index 0a1a715..43ea155 100644 --- a/SXSIStorageInterface.cpp +++ b/SXSIStorageInterface.cpp @@ -32,15 +32,9 @@ void SXSIStorageInterface::newChild(string name) void SXSIStorageInterface::newText(string text) { - if (text.empty()) { - _new_empty_text++; - tb->NewEmptyText(); - } - else { - _new_text++; - _length_text += text.size(); - tb->NewText((unsigned char*) text.c_str()); - } + _new_text++; + _length_text += text.size(); + tb->NewText((unsigned char*) text.c_str()); } diff --git a/ata.ml b/ata.ml index e06ac04..13d3cce 100644 --- a/ata.ml +++ b/ata.ml @@ -434,27 +434,26 @@ let tags_of_state a q = match b with | `Positive s -> let r = Ptset.Int.inter a s in (r,Ptset.Int.mem Tag.pcdata r, true) | `Negative s -> let r = Ptset.Int.diff a s in (r, Ptset.Int.mem Tag.pcdata r, false) - - let mk_nil_ctx x _ = Tree.mk_nil x - let next_sibling_ctx x _ = Tree.next_sibling x - let r_ignore _ x = x module type ResultSet = sig type t + type elt = [` Tree] Tree.node val empty : t - val cons : Tree.t -> t -> t + val cons : elt -> t -> t val concat : t -> t -> t - val iter : (Tree.t -> unit) -> t -> unit - val fold : (Tree.t -> 'a -> 'a) -> t -> 'a -> 'a - val map : (Tree.t -> Tree.t) -> t -> t + val iter : ( elt -> unit) -> t -> unit + val fold : ( elt -> 'a -> 'a) -> t -> 'a -> 'a + val map : ( elt -> elt) -> t -> t val length : t -> int + val merge : bool -> bool -> bool -> bool -> elt -> t -> t -> t end module Integer : ResultSet = struct type t = int + type elt = [`Tree] Tree.node let empty = 0 let cons _ x = x+1 let concat x y = x + y @@ -462,12 +461,21 @@ let tags_of_state a q = let fold _ _ _ = failwith "fold not implemented" let map _ _ = failwith "map not implemented" let length x = x + let merge rb rb1 rb2 mark t res1 res2 = + if rb then + let res1 = if rb1 then res1 else 0 + and res2 = if rb2 then res2 else 0 + in + if mark then 1+res1+res2 + else res1+res2 + else 0 end module IdSet : ResultSet = struct + type elt = [`Tree] Tree.node type node = Nil - | Cons of Tree.t * node + | Cons of elt * node | Concat of node*node and t = { node : node; @@ -504,6 +512,18 @@ let tags_of_state a q = | Concat(t1,t2) -> Concat(loop t1,loop t2) in { l with node = loop l.node } + + let merge rb rb1 rb2 mark t res1 res2 = + if rb then + let res1 = if rb1 then res1 else empty + and res2 = if rb2 then res2 else empty + in + if mark then { node = Cons(t,(Concat(res1.node,res2.node))); + length = res1.length + res2.length + 1;} + else + { node = (Concat(res1.node,res2.node)); + length = res1.length + res2.length ;} + else empty end @@ -532,11 +552,12 @@ END let string_of_ts tags = (Ptset.Int.fold (fun t a -> a ^ " " ^ (Tag.to_string t) ) tags "{")^ " }" - let choose_jump tagset qtags1 qtagsn a f_nil f_text f_t1 f_s1 f_tn f_sn f_notext = + let choose_jump tagset qtags1 qtagsn a f_nil f_t1 f_s1 f_tn f_sn f_notext = let tags1,hastext1,fin1 = inter_text tagset (tags a qtags1) in let tagsn,hastextn,finn = inter_text tagset (tags a qtagsn) in - if (hastext1||hastextn) then (`ANY,f_text) (* jumping to text nodes doesn't work really well *) - else if (Ptset.Int.is_empty tags1) && (Ptset.Int.is_empty tagsn) then (`NIL,f_nil) + (*if (hastext1||hastextn) then (`ANY,f_text) (* jumping to text nodes doesn't work really well *) + else*) + if (Ptset.Int.is_empty tags1) && (Ptset.Int.is_empty tagsn) then (`NIL,f_nil) else if (Ptset.Int.is_empty tagsn) then if (Ptset.Int.is_singleton tags1) then (* TaggedChild/Sibling *) @@ -551,25 +572,23 @@ END (`ANY,mk_app_fun f_sn tagsn (string_of_ts tagsn)) else (`ANY,f_notext) - let choose_jump_down a b c d = + let choose_jump_down tree a b c d = choose_jump a b c d - (mk_fun (Tree.mk_nil) "Tree.mk_nil") - (mk_fun (Tree.first_child) "Tree.text_below") - (mk_fun (Tree.tagged_child) "Tree.tagged_child") - (mk_fun (Tree.select_child) "Tree.select_child") (* !! no select_child in Tree.ml *) - (mk_fun (Tree.tagged_desc) "Tree.tagged_desc") - (mk_fun (Tree.select_desc) "Tree.select_desc") (* !! no select_desc *) - (mk_fun (Tree.first_child) "Tree.first_child") - - let choose_jump_next a b c d = + (mk_fun (fun _ -> Tree.nil) "Tree.mk_nil") + (mk_fun (Tree.tagged_child tree) "Tree.tagged_child") + (mk_fun (Tree.select_child tree) "Tree.select_child") (* !! no select_child in Tree.ml *) + (mk_fun (Tree.tagged_desc tree) "Tree.tagged_desc") + (mk_fun (Tree.select_desc tree) "Tree.select_desc") (* !! no select_desc *) + (mk_fun (Tree.first_child tree) "Tree.first_child") + + let choose_jump_next tree a b c d = choose_jump a b c d - (mk_fun (fun t _ -> Tree.mk_nil t) "Tree.mk_nil2") - (mk_fun (Tree.next_sibling_ctx) "Tree.text_next") - (mk_fun (Tree.tagged_sibling_ctx) "Tree.tagged_sibling_ctx")(* !! no tagged_sibling in Tree.ml *) - (mk_fun (Tree.select_sibling_ctx) "Tree.select_sibling_ctx")(* !! no select_sibling in Tree.ml *) - (mk_fun (Tree.tagged_foll_ctx) "Tree.tagged_foll_ctx") - (mk_fun (Tree.select_foll_ctx) "Tree.select_foll_ctx")(* !! no select_foll *) - (mk_fun (Tree.next_sibling_ctx) "Tree.node_sibling_ctx") + (mk_fun (fun _ _ -> Tree.nil) "Tree.mk_nil2") + (mk_fun (Tree.tagged_sibling_ctx tree) "Tree.tagged_sibling_ctx")(* !! no tagged_sibling in Tree.ml *) + (mk_fun (Tree.select_sibling_ctx tree) "Tree.select_sibling_ctx")(* !! no select_sibling in Tree.ml *) + (mk_fun (Tree.tagged_foll_ctx tree) "Tree.tagged_foll_ctx") + (mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx")(* !! no select_foll *) + (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") module SetTagKey = @@ -582,23 +601,22 @@ END module CachedTransTable = Hashtbl.Make(SetTagKey) let td_trans = CachedTransTable.create 4093 - let merge rb rb1 rb2 mark t res1 res2 = - if rb - then - let res1 = if rb1 then res1 else RS.empty - and res2 = if rb2 then res2 else RS.empty - in - if mark then RS.cons t (RS.concat res1 res2) - else RS.concat res1 res2 - else RS.empty let empty_size n = let rec loop acc = function 0 -> acc | n -> loop (SList.cons StateSet.empty acc) (n-1) in loop SList.nil n - + + let merge rb rb1 rb2 mark t res1 res2 = + if rb then + let res1 = if rb1 then res1 else RS.empty + and res2 = if rb2 then res2 else RS.empty + in + if mark then RS.cons t (RS.concat res1 res2) + else RS.concat res1 res2 + else RS.empty - let top_down ?(noright=false) a t slist ctx slot_size = + let top_down ?(noright=false) a tree t slist ctx slot_size = let pempty = empty_size slot_size in (* evaluation starts from the right so we put sl1,res1 at the end *) let eval_fold2_slist fll t (sl2,res2) (sl1,res1) = @@ -609,7 +627,7 @@ END SList.Cons(s2,ll2), fl::fll -> let r',rb,rb1,rb2,mark = eval_formlist s1 s2 fl in - let _ = res.(i) <- merge rb rb1 rb2 mark t res1.(i) res2.(i) + let _ = res.(i) <- RS.merge rb rb1 rb2 mark t res1.(i) res2.(i) in fold ll1 ll2 fll (i+1) (SList.cons r' aq) @@ -621,12 +639,12 @@ END let null_result() = (pempty,Array.make slot_size RS.empty) in let rec loop t slist ctx = - if Tree.is_nil t then null_result() else get_trans t slist (Tree.tag t) ctx + if t == Tree.nil then null_result() else get_trans t slist (Tree.tag tree t) ctx and loop_tag tag t slist ctx = - if Tree.is_nil t then null_result() else get_trans t slist tag ctx + if t == Tree.nil then null_result() else get_trans t slist tag ctx and loop_no_right t slist ctx = - if Tree.is_nil t then null_result() else get_trans ~noright:true t slist (Tree.tag t) ctx + if t == Tree.nil then null_result() else get_trans ~noright:true t slist (Tree.tag tree t) ctx and get_trans ?(noright=false) t slist tag ctx = let cont = try @@ -665,10 +683,10 @@ END slist ([],SList.nil,SList.nil,StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty) in (* Logic to chose the first and next function *) - let tags_below,tags_after = Tree.tags t tag in - let f_kind,first = choose_jump_down tags_below ca da a - and n_kind,next = if noright then (`NIL, fun t _ -> Tree.mk_nil t ) - else choose_jump_next tags_after sa fa a in + let tags_below,tags_after = Tree.tags tree tag in + let f_kind,first = choose_jump_down tree tags_below ca da a + and n_kind,next = if noright then (`NIL, fun _ _ -> Tree.nil ) + else choose_jump_next tree tags_after sa fa a in let empty_res = null_result() in let cont = match f_kind,n_kind with @@ -730,12 +748,12 @@ END (if noright then loop_no_right else loop) t slist ctx - let run_top_down a t = + let run_top_down a tree = let init = SList.cons a.init SList.nil in - let _,res = top_down a t init t 1 + let _,res = top_down a tree Tree.root init Tree.root 1 in D_IGNORE_( - output_trace a t "trace.html" + output_trace a tree root "trace.html" (RS.fold (fun t a -> IntSet.add (Tree.id t) a) res.(0) IntSet.empty), res.(0)) ;; @@ -853,33 +871,33 @@ END let h_tdconf = Hashtbl.create 511 - let rec bottom_up a tree conf next jump_fun root dotd init accu = + let rec bottom_up a tree t conf next jump_fun root dotd init accu = if (not dotd) && (Configuration.is_empty conf ) then accu,conf,next else - let below_right = Tree.is_below_right tree next in + let below_right = Tree.is_below_right tree t next in let accu,rightconf,next_of_next = if below_right then (* jump to the next *) - bottom_up a next conf (jump_fun next) jump_fun (Tree.next_sibling tree) true init accu + bottom_up a tree next conf (jump_fun next) jump_fun (Tree.next_sibling tree t) true init accu else accu,Configuration.empty,next in let sub = if dotd then - if below_right then prepare_topdown a tree true - else prepare_topdown a tree false + if below_right then prepare_topdown a tree t true + else prepare_topdown a tree t false else conf in let conf,next = (Configuration.merge rightconf sub, next_of_next) in - if Tree.equal tree root then accu,conf,next + if t == root then accu,conf,next else - let parent = Tree.binary_parent tree in - let ptag = Tree.tag parent in - let dir = Tree.is_left tree in + let parent = Tree.binary_parent tree t in + let ptag = Tree.tag tree parent in + let dir = Tree.is_left tree t in let slist = Configuration.Ptss.fold (fun e a -> SList.cons e a) conf.Configuration.sets SList.nil in let fl_list = get_up_trans slist ptag a parent in let slist = SList.rev (slist) in @@ -891,10 +909,10 @@ END (newconf.Configuration.results) (accu,Configuration.empty) in - bottom_up a parent newconf next jump_fun root false init accu + bottom_up a tree parent newconf next jump_fun root false init accu - and prepare_topdown a t noright = - let tag = Tree.tag t in + and prepare_topdown a tree t noright = + let tag = Tree.tag tree t in (* pr "Going top down on tree with tag %s = %s " (if Tree.is_nil t then "###" else (Tag.to_string(Tree.tag t))) (Tree.dump_node t); *) let r = @@ -913,7 +931,7 @@ END pr "\n%!"; in *) let r = SList.cons r SList.nil in - let set,res = top_down (~noright:noright) a t r t 1 in + let set,res = top_down (~noright:noright) a tree t r t 1 in let set = match SList.node set with | SList.Cons(x,_) ->x | _ -> assert false @@ -925,7 +943,8 @@ END - let run_bottom_up a t k = + let run_bottom_up a tree k = + let t = Tree.root in let trlist = Hashtbl.find a.trans (Ptset.Int.choose a.init) in let init = List.fold_left @@ -939,16 +958,18 @@ END match k with | `TAG (tag) -> (*Tree.tagged_lowest t tag, fun tree -> Tree.tagged_next tree tag*) - (Tree.tagged_desc tag t, fun tree -> Tree.tagged_foll_ctx tag tree t) - | `CONTAINS(_) -> (Tree.first_child t,fun tree -> Tree.next_sibling_ctx tree t) + (Tree.tagged_desc tree tag t, let jump = Tree.tagged_foll_ctx tree tag + in fun n -> jump n t ) + | `CONTAINS(_) -> (Tree.first_child tree t,let jump = Tree.next_sibling_ctx tree + in fun n -> jump n t) | _ -> assert false in let tree2 = jump_fun tree1 in - let rec loop tree next acc = + let rec loop t next acc = (* let _ = pr "\n_________________________\nNew iteration\n" in let _ = pr "Jumping to %s\n%!" (Tree.dump_node tree) in *) - let acc,conf,next_of_next = bottom_up a tree - Configuration.empty next jump_fun (Tree.root tree) true init acc + let acc,conf,next_of_next = bottom_up a tree t + Configuration.empty next jump_fun (Tree.root) true init acc in (* let _ = pr "End of first iteration, conf is:\n%!"; Configuration.pr fmt conf diff --git a/ata.mli b/ata.mli index 3152112..ae4e479 100644 --- a/ata.mli +++ b/ata.mli @@ -84,13 +84,15 @@ val dump : Format.formatter -> 'a t -> unit module type ResultSet = sig type t + type elt = [`Tree] Tree.node val empty : t - val cons : Tree.t -> t -> t + val cons : elt -> t -> t val concat : t -> t -> t - val iter : (Tree.t -> unit) -> t -> unit - val fold : (Tree.t -> 'a -> 'a) -> t -> 'a -> 'a - val map : (Tree.t -> Tree.t) -> t -> t + val iter : (elt -> unit) -> t -> unit + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + val map : (elt -> elt) -> t -> t val length : t -> int + val merge : bool -> bool -> bool -> bool -> elt -> t -> t -> t end module IdSet : ResultSet diff --git a/depend b/depend index 09cbc73..62572f7 100644 --- a/depend +++ b/depend @@ -6,8 +6,6 @@ memoizer.cmo: memoizer.cmi memoizer.cmx: memoizer.cmi hcons.cmo: hcons.cmi hcons.cmx: hcons.cmi -memhashtbl.cmo: hcons.cmi memhashtbl.cmi -memhashtbl.cmx: hcons.cmx memhashtbl.cmi hlist.cmo: hcons.cmi hlist.cmi hlist.cmx: hcons.cmx hlist.cmi ptset.cmo: hcons.cmi ptset.cmi @@ -36,8 +34,6 @@ memory.cmi: sigs.cmi: memoizer.cmi: hcons.cmi: -memhashtbl.cmo: hcons.cmi memhashtbl.cmi -memhashtbl.cmx: hcons.cmx memhashtbl.cmi hlist.cmi: hcons.cmi ptset.cmi: hcons.cmi finiteCofinite.cmi: sigs.cmi diff --git a/main.ml b/main.ml index 4e12ae8..4fcc0af 100644 --- a/main.ml +++ b/main.ml @@ -60,7 +60,7 @@ let main v query_string output = Printf.eprintf "Finding min occurences : "; time ( List.fold_left (fun ((min_occ,kind)as acc) (tag,_) -> - let numtags = Tree.subtree_tags v tag in + let numtags = Tree.subtree_tags v tag Tree.root in if ((numtags < min_occ) && numtags >= 2) then (numtags,`TAG(tag)) else acc) jump_to) ltags @@ -107,7 +107,7 @@ let main v query_string output = let oc = open_out f in output_string oc "\n"; IdSet.iter (fun t -> - Tree.print_xml_fast oc t; + Tree.print_xml_fast oc v t; output_char oc '\n'; output_string oc "----------\n"; ) result) (); diff --git a/tree.ml b/tree.ml index e3e8fe2..9cb5ef6 100644 --- a/tree.ml +++ b/tree.ml @@ -25,7 +25,8 @@ external load_tree : string -> int -> tree = "caml_xml_tree_load" external nullt : unit -> 'a node = "caml_xml_tree_nullt" -let nil : 'a node = Obj.magic (-1) +let nil : 'a node = -1 +let root : [`Tree ] node = 0 external text_get_tc_text : tree -> [`Text] node -> string = "caml_text_collection_get_text" @@ -119,10 +120,10 @@ let ptset_to_vector s = HPtset.add vector_htbl s v; v -type t = { doc : tree; - node : [`Tree] node; - ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; - } +type t = { + doc : tree; + ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; +} let text_size t = text_size t.doc @@ -233,18 +234,19 @@ module DocIdSet = struct let compare = compare_node end) end -let is_nil t = t.node == nil +let is_nil t = t == nil -let is_node t = t.node != nil +let is_node t = t != nil +let is_root t = t == root let node_of_t t = let _ = Tag.init (Obj.magic t) in let table = collect_tags t in { doc= t; - node = tree_root t; ttable = table; } + let finalize _ = Printf.eprintf "Release the string list !\n%!" ;; @@ -272,49 +274,65 @@ let load ?(sample=64) str = let tag_pool t = pool t.doc -let compare a b = a.node - b.node +let compare a b = a - b -let equal a b = a.node == b.node +let equal a b = a == b let nts = function -1 -> "Nil" | i -> Printf.sprintf "Node (%i)" i -let dump_node t = nts t.node - -let mk_nil t = { t with node = nil } -let root n = { n with node = tree_root n.doc } +let dump_node t = nts t -let is_root n = n.node == (tree_root n.doc) -let is_left n = tree_is_first_child n.doc n.node +let is_left t n = tree_is_first_child t.doc n + +let is_below_right t n1 n2 = tree_is_ancestor t.doc (tree_parent t.doc n1) n2 + +let parent t n = tree_parent t.doc n + +let first_child t = (); fun n -> tree_first_child t.doc n -let is_below_right t1 t2 = tree_is_ancestor t1.doc (tree_parent t1.doc t1.node) t2.node +(* these function will be called in two times: first partial application + on the tag, then application of the tag and the tree, then application of + the other arguments. We use the trick to let the compiler optimize application +*) -let parent n = { n with node = tree_parent n.doc n.node } +let tagged_child t tag = () ; fun n -> tree_tagged_child t.doc n tag -let first_child n = { n with node = tree_first_child n.doc n.node } -let tagged_child tag n = { n with node = tree_tagged_child n.doc n.node tag } -let select_child ts n = { n with node = tree_select_child n.doc n.node (ptset_to_vector ts) } +let select_child t = fun ts -> + let v = ptset_to_vector ts in (); + fun n -> tree_select_child t.doc n v -let next_sibling n = { n with node = tree_next_sibling n.doc n.node } -let tagged_sibling tag n = { n with node = tree_tagged_sibling n.doc n.node tag } -let select_sibling ts n = { n with node = tree_select_foll_sibling n.doc n.node (ptset_to_vector ts) } +let next_sibling t = (); fun n -> tree_next_sibling t.doc n +let tagged_sibling t tag = (); fun n -> tree_tagged_sibling t.doc n tag -let next_sibling_ctx n _ = next_sibling n -let tagged_sibling_ctx tag n _ = tagged_sibling tag n -let select_sibling_ctx ts n _ = select_sibling ts n +let select_sibling t = fun ts -> + let v = (ptset_to_vector ts) in (); + fun n -> tree_select_foll_sibling t.doc n v -let id t = tree_node_xml_id t.doc t.node +let next_sibling_ctx t = (); fun n _ -> tree_next_sibling t.doc n +let tagged_sibling_ctx t tag = (); fun n _ -> tree_tagged_sibling t.doc n tag + +let select_sibling_ctx t = fun ts -> + let v = (ptset_to_vector ts) in (); + fun n _ -> tree_select_foll_sibling t.doc n v + +let id t n = tree_node_xml_id t.doc n -let tag t = if t.node == nil then Tag.nullt else tree_tag_id t.doc t.node +let tag t n = if n == nil then Tag.nullt else tree_tag_id t.doc n + +let tagged_desc t tag = (); fun n -> tree_tagged_desc t.doc n tag -let tagged_desc tag n = { n with node = tree_tagged_desc n.doc n.node tag } -let select_desc ts n = { n with node = tree_select_desc n.doc n.node (ptset_to_vector ts) } +let select_desc t = fun ts -> + let v = (ptset_to_vector ts) in (); + fun n -> tree_select_desc t.doc n v -let tagged_foll_ctx tag t ctx = - { t with node = tree_tagged_foll_below t.doc t.node tag ctx.node } -let select_foll_ctx ts n ctx = { n with node = tree_select_foll_below n.doc n.node (ptset_to_vector ts) ctx.node } +let tagged_foll_ctx t tag = (); fun n ctx -> tree_tagged_foll_below t.doc n tag ctx + +let select_foll_ctx t = fun ts -> + let v = (ptset_to_vector ts) in (); + fun n ctx -> tree_select_foll_below t.doc n v ctx let last_idx = ref 0 let array_find a i j = @@ -332,33 +350,33 @@ let array_find a i j = let count t s = text_count t.doc s - let print_xml_fast outc t = + let print_xml_fast outc tree t = let rec loop ?(print_right=true) t = - if t.node != nil + if t != nil then - let tagid = tree_tag_id t.doc t.node in + let tagid = tree_tag_id tree.doc t in if tagid==Tag.pcdata - then output_string outc (text_get_cached_text t.doc t.node); + then output_string outc (text_get_cached_text tree.doc t); if print_right - then loop (next_sibling t) + then loop (next_sibling tree t) else let tagstr = Tag.to_string tagid in - let l = first_child t - and r = next_sibling t + let l = first_child tree t + and r = next_sibling tree t in output_char outc '<'; output_string outc tagstr; - if l.node == nil then output_string outc "/>" + if l == nil then output_string outc "/>" else - if (tag l) == Tag.attribute then + if (tag tree l) == Tag.attribute then begin - loop_attributes (first_child l); - if (next_sibling l).node == nil then output_string outc "/>" + loop_attributes (first_child tree l); + if (next_sibling tree l) == nil then output_string outc "/>" else begin output_char outc '>'; - loop (next_sibling l); + loop (next_sibling tree l); output_string outc "'; @@ -374,25 +392,24 @@ let array_find a i j = end; if print_right then loop r and loop_attributes a = - let s = (Tag.to_string (tag a)) in + let s = (Tag.to_string (tag tree a)) in let attname = String.sub s 3 ((String.length s) -3) in output_char outc ' '; output_string outc attname; output_string outc "=\""; - output_string outc (text_get_cached_text t.doc - (tree_my_text a.doc (first_child a).node)); + output_string outc (text_get_cached_text tree.doc + (tree_my_text tree.doc (first_child tree a))); output_char outc '"'; - loop_attributes (next_sibling a) + loop_attributes (next_sibling tree a) in loop ~print_right:false t - let print_xml_fast outc t = - if (tag t) = Tag.document_node then - print_xml_fast outc (first_child t) - else print_xml_fast outc t + let print_xml_fast outc tree t = + if (tag tree t) = Tag.document_node then + print_xml_fast outc tree (first_child tree t) + else print_xml_fast outc tree t - let tags_below t tag = fst(Hashtbl.find t.ttable tag) @@ -402,45 +419,42 @@ let tags_after t tag = let tags t tag = Hashtbl.find t.ttable tag -let rec binary_parent t = - if tree_is_first_child t.doc t.node - then { t with node = tree_parent t.doc t.node } - else { t with node = tree_prev_sibling t.doc t.node } +let binary_parent t n = + if tree_is_first_child t.doc n + then tree_parent t.doc n + else tree_prev_sibling t.doc n -let doc_ids (t:t) : (int*int) = - (Obj.magic (tree_doc_ids t.doc t.node)) +let doc_ids t n = tree_doc_ids t.doc n -let subtree_tags t tag = - if t.node == nil then 0 else - tree_subtree_tags t.doc t.node tag +let subtree_tags t tag = (); + fun n -> if n == nil then 0 else + tree_subtree_tags t.doc n tag -let get_text t = - let tid = tree_my_text t.doc t.node in +let get_text t n = + let tid = tree_my_text t.doc n in if tid == nil then "" else - let a, b = tree_doc_ids t.doc (tree_root t.doc) in - let _ = Printf.eprintf "Trying to take text %i of node %i in %i %i\n%!" tid t.node a b in - text_get_cached_text t.doc tid + text_get_cached_text t.doc tid -let dump_tree fmt t = - let rec loop tree n = - if tree != nil then - let tag = (tree_tag_id t.doc tree ) in +let dump_tree fmt tree = + let rec loop t n = + if t != nil then + let tag = (tree_tag_id tree.doc t ) in let tagstr = Tag.to_string tag in let tab = String.make n ' ' in if tag == Tag.pcdata || tag == Tag.attribute_data then Format.fprintf fmt "%s<%s>%s\n" - tab tagstr (text_get_cached_text t.doc (tree_my_text t.doc tree)) tagstr + tab tagstr (text_get_cached_text tree.doc (tree_my_text tree.doc t)) tagstr else begin Format.fprintf fmt "%s<%s>\n" tab tagstr; - loop (tree_first_child t.doc tree) (n+2); + loop (tree_first_child tree.doc t) (n+2); Format.fprintf fmt "%s\n%!" tab tagstr; end; - loop (tree_next_sibling t.doc tree) n + loop (tree_next_sibling tree.doc t) n in - loop (tree_root t.doc) 0 + loop root 0 ;; diff --git a/tree.mli b/tree.mli index 3f72894..6da77f5 100644 --- a/tree.mli +++ b/tree.mli @@ -1,59 +1,71 @@ type t + val init_contains : t -> string -> unit val init_naive_contains : t -> string -> unit -val is_nil : t -> bool -val is_node : t -> bool -val dump_node : t -> string + + val parse_xml_uri : string -> t val parse_xml_string : string -> t val save : t -> string -> unit val load : ?sample:int -> string -> t val tag_pool : t -> Tag.pool -val compare : t -> t -> int -val equal : t -> t -> bool -val mk_nil : t -> t -val root : t -> t -val is_root : t -> bool -val parent : t -> t -val first_child : t -> t -val tagged_child : Tag.t -> t -> t -val select_child : Ptset.Int.t -> t -> t -val next_sibling : t -> t -val tagged_sibling : Tag.t -> t -> t -val tagged_sibling_ctx : Tag.t -> t -> t -> t +type 'a node = private int +type node_kind = [ `Tree | `Text ] +val equal : [ `Tree ] node -> [ `Tree ] node -> bool +val compare : [ `Tree ] node -> [ `Tree ] node -> int +val dump_node : 'a node -> string + + +val nil : 'a node +val root : [ `Tree ] node + +val is_root : [ `Tree ] node -> bool +val is_nil : [ `Tree ] node -> bool + +val parent : t -> [ `Tree ] node -> [ `Tree ] node +val first_child : t -> [ `Tree ] node -> [ `Tree ] node +val tagged_child : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node + +val select_child : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node + +val next_sibling : t -> [ `Tree ] node -> [ `Tree ] node +val next_sibling_ctx : t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node + +val tagged_sibling : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node +val tagged_sibling_ctx : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node + +val select_sibling : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node +val select_sibling_ctx : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node -val select_sibling : Ptset.Int.t -> t -> t -val select_sibling_ctx : Ptset.Int.t -> t -> t -> t -val next_sibling_ctx : t -> t -> t -val tag : t -> Tag.t -val id : t -> int +val tag : t -> [ `Tree ] node -> Tag.t +val id : t -> [ `Tree ] node -> int -val tagged_desc : Tag.t -> t -> t -val select_desc : Ptset.Int.t -> t -> t +val tagged_desc : t -> Tag.t -> [ `Tree ] node -> [`Tree] node +val select_desc : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -val tagged_foll_ctx : Tag.t -> t -> t -> t -val select_foll_ctx : Ptset.Int.t -> t -> t -> t +val tagged_foll_ctx : t -> Tag.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node +val select_foll_ctx : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node val count : t -> string -> int -val print_xml_fast : out_channel -> t -> unit +val print_xml_fast : out_channel -> t -> [ `Tree ] node -> unit val tags_below : t -> Tag.t -> Ptset.Int.t val tags_after : t -> Tag.t -> Ptset.Int.t -val tags : t -> Tag.t -> Ptset.Int.t*Ptset.Int.t -val is_below_right : t -> t -> bool -val is_left : t -> bool +val tags : t -> Tag.t -> Ptset.Int.t*Ptset.Int.t +val is_below_right : t -> [`Tree] node -> [`Tree] node -> bool +val is_left : t -> [`Tree] node -> bool -val binary_parent : t -> t +val binary_parent : t -> [`Tree] node -> [`Tree] node val count_contains : t -> string -> int val unsorted_contains : t -> string -> unit val text_size : t -> int -val doc_ids : t -> int*int -val subtree_tags : t -> Tag.t -> int -val get_text : t -> string +val doc_ids : t -> [`Tree] node -> [`Text] node * [`Text] node +val subtree_tags : t -> Tag.t -> [`Tree] node -> int +val get_text : t -> [`Tree] node -> string val dump_tree : Format.formatter -> t -> unit