val first_child : t -> t
val next_sibling : t -> t
val parent : t -> t
+ val root : t -> t
+ val is_root : t -> bool
val id : t -> int
val tag : t -> Tag.t
val print_xml_fast : out_channel -> t -> unit
with type elt = string_content
val string_below : t -> string_content -> bool
val contains : t -> string -> DocIdSet.t
- val contains_old : t -> string -> bool
+ val contains_old : t -> string -> DocIdSet.t
+ val contains_iter : t -> string -> DocIdSet.t
+ val count_contains : t -> string -> int
+ val count : t -> string -> int
val dump : t -> unit
val get_string : t -> string_content -> string
val has_tagged_desc : t -> Tag.t -> bool
val tagged_foll : t -> Tag.t -> t
val tagged_next : t -> Tag.t -> t
val subtree_tags : t -> Tag.t -> int
+ val is_left : t -> bool
end
module XML =
external get_cached_text : t -> [`Text ] node -> string = "caml_text_collection_get_cached_text"
+
let get_text t n =
if (equal nil n) || is_empty t n then ""
else get_cached_text t n
+ external size : t -> int = "caml_text_collection_size"
external is_contains : t -> string -> bool = "caml_text_collection_is_contains"
external count_contains : t -> string -> int = "caml_text_collection_count_contains"
+ external count : t -> string -> int = "caml_text_collection_count"
external contains : t -> string -> [`Text ] node array = "caml_text_collection_contains"
end
| Node (SC (t,i)) -> Printf.sprintf "Node (SC (%i,%i))" (int_of_node t) (int_of_node i)
+ let root n = { n with node = norm (Tree.root n.doc) }
+ let is_root n = match n.node with
+ | Node(NC t) when (Tree.root n.doc) == t -> true
+ | _ -> false
+
let parent n =
let node' =
match n.node with
- | Node(NC t) | Node(SC (_,t)) ->
- if (Tree.root n.doc) == t
- then Nil
- else Node(NC(Tree.parent n.doc t)) (* A parent node can never be a SC *)
- | _ -> assert false
+ | Node(NC t) ->
+ let txt = prev_text n.doc t in
+ if Text.is_empty n.doc txt then
+ Node(NC (Tree.parent n.doc t))
+ else
+ Node(SC (txt,t))
+ | Node(SC(t,_)) -> Node (NC(parent_doc n.doc t))
+ | _ -> failwith "parent"
in
{ n with node = node' }
let contains t s =
Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.doc s)
+
let contains_old t s =
let regexp = Str.regexp_string s in
let matching arg =
in true
with _ -> false
in
- let rec find t = match t.node with
- | Nil -> false
- | String _ -> matching (string t)
- | Node(_) -> (find (left t )) || (find (right t))
+ let rec find t acc = match t.node with
+ | Nil -> acc
+ | String i ->
+ if matching (string t) then DocIdSet.add i acc else acc
+ | Node(_) -> (find (left t )) ((find (right t)) acc)
+ in
+ find t DocIdSet.empty
+
+
+ let contains_iter t s =
+ let regexp = Str.regexp_string s in
+ let matching arg =
+ try
+ let _ = Str.search_forward regexp arg 0;
+ in true
+ with _ -> false
+ in
+ let size = Text.size t.doc in
+ let rec find acc n =
+ if n == size then acc
+ else
+ find
+ (if matching (Text.get_cached_text t.doc (Obj.magic n)) then
+ DocIdSet.add (Obj.magic n) acc
+ else acc) (n+1)
in
- find t
+ find DocIdSet.empty 0
+
+
+
+
+ let count_contains t s = Text.count_contains t.doc s
+ let count t s = Text.count t.doc s
+
+ let is_left t =
+ let u = left (parent t) in
+ (id t) == (id u)
let print_xml_fast outc t =
let rec loop ?(print_right=true) t = match t.node with