val has_tagged_foll : t -> Tag.t -> bool
val tagged_desc : t -> Tag.t -> t
val tagged_foll : t -> Tag.t -> t
+ val tagged_below : t -> Ptset.t -> Ptset.t -> t
+ val tagged_next : t -> Ptset.t -> Ptset.t -> t -> t
+ val tagged_desc_only : t -> Ptset.t -> t
+ val tagged_foll_only : t -> Ptset.t -> t -> t
+ val text_below : t -> t
+ val text_next : t -> t -> t
val init_tagged_next : t -> Tag.t -> unit
- val tagged_next : t -> Tag.t -> t
val subtree_tags : t -> Tag.t -> int
val is_left : t -> bool
val print_id : Format.formatter -> t -> unit
val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit
+ val init_contains : t -> string -> unit
+ val init_naive_contains : t -> string -> unit
+ val mk_nil : t -> t
+ val test_jump : t -> Tag.t -> unit
+ val time_xml_tree : t -> Tag.t -> int list
+ val time_xml_tree2 : t -> Tag.t -> int list
end
module XML =
external parent : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent"
external parent_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc"
+ external prev_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc"
external first_child : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child"
external my_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text"
external next_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text"
-
+ external doc_ids : t -> [`Tree ] node -> [`Text ] node * [`Text ] node = "caml_xml_tree_doc_ids"
external text_xml_id : t -> [`Text ] node -> int = "caml_xml_tree_text_xml_id"
external node_xml_id : t -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id"
external is_ancestor : t -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor"
external tagged_desc : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc"
external tagged_foll : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_foll"
- external tagged_next : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_next"
external subtree_tags : t -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags"
+ external tagged_below : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_tagged_below"
+ external tagged_desc_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_tagged_desc_only"
+ external tagged_next : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_next"
+ external tagged_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only"
+ external tagged_desc_or_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only"
+ external tagged_foll_below : t -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below"
+
+ let test_jump tree tag =
+ let rec loop id ctx =
+ if id != nil
+ then
+ let first = tagged_desc tree id tag
+ and next = tagged_desc tree id tag
+ in
+ loop first id;
+ loop next ctx
+ in
+ loop (root tree) (root tree)
-
-
+
let test_xml_tree ppf tags v =
let pr x = Format.fprintf ppf x in
let rec aux id =
(tag_id v id)
(Tag.to_string (tag_id v id))
(node_xml_id v id);
- pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i\n%!"
+ pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i PrevDoc(next_text)=%i\n%!"
(int_of_node (prev_text v id))
(Text.get_text v (prev_text v id))
(int_of_node (my_text v id))
(Text.get_text v (my_text v id))
(int_of_node (next_text v id))
(Text.get_text v (next_text v id))
- (int_of_node(parent_doc v (my_text v id)));
- pr "Testing Tagged*\n%!";
- Ptset.iter (fun t ->
- let str = Tag.to_string t in
+ (int_of_node(parent_doc v (my_text v id)))
+ (int_of_node(prev_doc v (next_text v id)));
+ let i1,i2 = doc_ids v id in
+ pr "Testing DocIds below (%i,%i)*\n%!"
+ (int_of_node i1) (int_of_node i2);
+ pr "Testing Tagged*\n%!";
+ Ptset.iter (fun t ->
+ let str = Tag.to_string t in
if Tag.pcdata <> t
then begin
pr "Tag: %s : \n%!" str;
end
in
aux (root v)
+
+ let rrrr = ref 0
+
+ let time_xml_tree v tag =
+
+ let rec aux id acc =
+ incr rrrr;
+ if (is_nil id)
+ then acc
+ else begin
+ let acc =
+ if tag == (tag_id v id)
+ then
+ id::acc
+ else acc
+ in
+ aux (next_sibling v id) (aux (first_child v id) acc);
+ end
+ in
+ let r = aux (root v) [] in
+ Printf.eprintf "%i\n%!" !rrrr;r
+
+ let rrrr2 = ref 0
+ let time_xml_tree2 v tag =
+ let rec aux id acc ctx=
+ incr rrrr2;
+ if (is_nil id)
+ then acc
+ else begin
+ let acc =
+ if tag == (tag_id v id)
+ then
+ id::acc
+ else acc
+ in
+ aux (tagged_foll_below v id tag ctx) (aux (tagged_desc v id tag) acc id) ctx;
+ end
+ in
+ let r = aux (root v) [] (root v) in
+ Printf.eprintf "%i\n%!" !rrrr2; r
let dump { doc=t } = Tree.print_skel t
let test_xml_tree ppf tags { doc=t } = Tree.test_xml_tree ppf tags t
+ let time_xml_tree { doc=t } tag = Tree.time_xml_tree t tag
+ let time_xml_tree2 { doc=t } tag = Tree.time_xml_tree2 t tag
+ let test_jump { doc=t } tag = Tree.test_jump t tag
+ let contains_array = ref [| |]
+
+ let init_contains t s =
+ let a = Text.contains t.doc s
+ in
+ Array.fast_sort (compare) a;
+ contains_array := a
+
+ let init_naive_contains t s =
+ let i,j = Tree.doc_ids t.doc (Tree.root t.doc)
+ in
+ let regexp = Str.regexp_string s in
+ let matching arg =
+ try
+ let _ = Str.search_forward regexp arg 0;
+ in true
+ with _ -> false
+ in
+ let rec loop n acc l =
+ if n >= j then acc,l
+ else
+ let s = (*Printf.eprintf "%i \n%!" n;*)Text.get_cached_text t.doc n
+ in
+ if matching s
+ then loop (n+1) (n::acc) (l+1)
+ else loop (n+1) acc l
+ in
+ let acc,l = loop i [] 0 in
+ let a = Array.create l Text.nil in
+ let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc
+ in
+ contains_array := a
+
+
module DocIdSet = struct
include Set.Make (struct type t = string_content
| Node (NC t) -> Printf.sprintf "Node (NC %i)" (int_of_node t)
| Node (SC (t,i)) -> Printf.sprintf "Node (SC (%i,%i))" (int_of_node t) (int_of_node i)
-
+ let mk_nil t = { t with node = Nil }
let root n = { n with node = norm (Tree.root n.doc) }
let is_root n = match n.node with
| Node(NC t) -> (int_of_node t) == 0
| { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_desc d n tag) }
| _ -> { t with node=Nil }
-(*
- let tagged_next t tag =
- if tag == Tag.attribute || tag == Tag.pcdata then failwith "tagged_next"
- else
- match tagged_desc t tag with
- | { doc = d; node=Nil } -> tagged_foll t tag
- | x -> x
-*)
+
+ let tagged_next t tb tf s =
+ match s with
+ | { node = Node (NC(below)) } -> begin
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ { t with node= norm (tagged_next d n (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) }
+ | { doc = d; node=Node(SC (i,n) ) } when is_nil n ->
+ let p = parent_doc d i in
+ { t with node= norm (tagged_next d p (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) }
+ | { doc = d; node=Node(SC (_,n) ) } ->
+ if Ptset.mem (tag_id d n) (Ptset.union tb tf)
+ then { t with node=Node(NC(n)) }
+ else
+ let vb = Ptset.to_int_vector tb in
+ let vf = Ptset.to_int_vector tf in
+ let node =
+ let dsc = tagged_below d n vb vf in
+ if is_nil dsc
+ then tagged_next d n vb vf below
+ else dsc
+ in
+ { t with node = norm node }
+ | _ -> {t with node=Nil }
+ end
+
+ | _ -> {t with node=Nil }
+
+ let tagged_foll_only t tf s =
+ match s with
+ | { node = Node (NC(below)) } -> begin
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ { t with node= norm (tagged_foll_only d n (Ptset.to_int_vector tf) below) }
+ | { doc = d; node=Node(SC (i,n) ) } when is_nil n ->
+ let p = parent_doc d i in
+ { t with node= norm (tagged_foll_only d p (Ptset.to_int_vector tf) below) }
+ | { doc = d; node=Node(SC (_,n) ) } ->
+ if Ptset.mem (tag_id d n) tf
+ then { t with node=Node(NC(n)) }
+ else
+ let vf = Ptset.to_int_vector tf in
+ let node =
+ let dsc = tagged_desc_only d n vf in
+ if is_nil dsc
+ then tagged_foll_only d n vf below
+ else dsc
+ in
+ { t with node = norm node }
+ | _ -> {t with node=Nil }
+ end
+
+ | _ -> {t with node=Nil }
+
+
+ let tagged_below t tc td =
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ let vc = Ptset.to_int_vector tc
+ in
+ let vd = Ptset.to_int_vector td
+ in
+ { t with node= norm(tagged_below d n vc vd) }
+ | _ -> { t with node=Nil }
+
+ let tagged_desc_only t td =
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ let vd = Ptset.to_int_vector td
+ in
+ { t with node= norm(tagged_desc_only d n vd) }
+ | _ -> { t with node=Nil }
+
+
+ let last_idx = ref 0
+ let array_find a i j =
+ let l = Array.length a in
+ let rec loop idx x y =
+ if x > y || idx >= l then Text.nil
+ else
+ if a.(idx) >= x then if a.(idx) > y then Text.nil else (last_idx := idx;a.(idx))
+ else loop (idx+1) x y
+ in
+ if a.(0) > j || a.(l-1) < i then Text.nil
+ else loop !last_idx i j
+
+
+ let text_below t =
+ let l = Array.length !contains_array in
+ if l = 0 then { t with node=Nil }
+ else
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ let i,j = doc_ids t.doc n in
+ let id = array_find !contains_array i j
+ in
+ if id == Text.nil then
+ { t with node=Nil }
+ else
+ {t with node = Node(SC(id, Tree.next_sibling d (Tree.prev_doc d id))) }
+ | _ -> { t with node=Nil }
+
+ let text_next t root =
+ let l = Array.length !contains_array in
+ if l = 0 then { t with node=Nil }
+ else
+ let inf = match t with
+ | { doc =d; node = Node(NC n) } -> snd(doc_ids d n)+1
+ | { node = Node(SC(i,_)) } -> i+1
+ | _ -> assert false
+ in
+ match root with
+ | { doc = d; node= Node (NC n) } ->
+ let _,j = doc_ids t.doc n in
+
+ let id = array_find !contains_array inf j
+ in
+ if id == Text.nil then { doc = d; node= Nil }
+ else
+ {doc = d; node = Node(SC(id,Tree.next_sibling d (Tree.prev_doc d id))) }
+ | _ -> { t with node=Nil}
+
+
+
let subtree_tags t tag =
match t with
{ doc = d; node = Node(NC n) } ->
- let tagged_next t tag =
+(* let tagged_next t tag =
if !idx >= Array.length !tagged_desc_array
then {t with node=Nil}
else
let r = !tagged_desc_array.(!idx)
in
incr idx; r
-
+*)
let has_tagged_foll t tag = is_node (tagged_foll t tag)