projects
/
SXSI
/
xpathcomp.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge branch xpathcomp-succintbackend-refactor back to trunk
[SXSI/xpathcomp.git]
/
tree.ml
diff --git
a/tree.ml
b/tree.ml
index
3bfbfce
..
2218c28
100644
(file)
--- a/
tree.ml
+++ b/
tree.ml
@@
-13,22
+13,32
@@
sig
type t
val parse_xml_uri : string -> t
val parse_xml_string : string -> t
type t
val parse_xml_uri : string -> t
val parse_xml_string : string -> t
+ val save : t -> string -> unit
+ val load : ?sample:int -> string -> t
val tag_pool : t -> Tag.pool
val string : t -> string
val descr : t -> descr
val tag_pool : t -> Tag.pool
val string : t -> string
val descr : t -> descr
+ val is_node : t -> bool
val left : t -> t
val right : t -> t
val left : t -> t
val right : t -> t
+ val first_child : t -> t
+ val next_sibling : t -> t
val parent : t -> t
val id : t -> int
val tag : t -> Tag.t
val print_xml_fast : out_channel -> t -> unit
val compare : t -> t -> int
val equal : t -> t -> bool
val parent : t -> t
val id : t -> int
val tag : t -> Tag.t
val print_xml_fast : out_channel -> t -> unit
val compare : t -> t -> int
val equal : t -> t -> bool
- module DocIdSet : Set.S with type elt = string_content
+ module DocIdSet :
+ sig
+ include Set.S
+ end
+ with type elt = string_content
val string_below : t -> string_content -> bool
val contains : t -> string -> DocIdSet.t
val contains_old : t -> string -> bool
val dump : t -> unit
val string_below : t -> string_content -> bool
val contains : t -> string -> DocIdSet.t
val contains_old : t -> string -> bool
val dump : t -> unit
+ val get_string : t -> string_content -> string
end
module XML =
end
module XML =
@@
-45,15
+55,17
@@
struct
external int_of_node : 'a node -> int = "%identity"
external int_of_node : 'a node -> int = "%identity"
- external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri"
-
-
+ external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri"
external parse_xml_string : string -> int -> bool -> bool -> t = "caml_call_shredder_string"
external parse_xml_string : string -> int -> bool -> bool -> t = "caml_call_shredder_string"
+ external save_tree : t -> string -> unit = "caml_xml_tree_save"
+ external load_tree : string -> int -> t = "caml_xml_tree_load"
+
module Text =
struct
module Text =
struct
-
+ let equal : [`Text] node -> [`Text] node -> bool = equal
+
(* Todo *)
external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt"
let nil = nullt ()
(* Todo *)
external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt"
let nil = nullt ()
@@
-76,7
+88,7
@@
struct
module Tree =
struct
module Tree =
struct
-
+ let equal : [`Tree ] node -> [`Tree] node -> bool = equal
external serialize : t -> string -> unit = "caml_xml_tree_serialize"
external unserialize : string -> t = "caml_xml_tree_unserialize"
external serialize : t -> string -> unit = "caml_xml_tree_serialize"
external unserialize : string -> t = "caml_xml_tree_unserialize"
@@
-117,7
+129,7
@@
struct
then Printf.eprintf "#\n"
else
begin
then Printf.eprintf "#\n"
else
begin
- Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!"
+ Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)
parent_doc(my_text)=%i
\n%!"
(int_of_node id)
(Tag.to_string (tag_id t id))
(node_xml_id t id)
(int_of_node id)
(Tag.to_string (tag_id t id))
(node_xml_id t id)
@@
-126,7
+138,9
@@
struct
(int_of_node (my_text t id))
(Text.get_text t (my_text t id))
(int_of_node (next_text t id))
(int_of_node (my_text t id))
(Text.get_text t (my_text t id))
(int_of_node (next_text t id))
- (Text.get_text t (next_text t id));
+ (Text.get_text t (next_text t id))
+ (int_of_node(parent_doc t (my_text t id)));
+
aux(first_child t id);
aux(next_sibling t id);
end
aux(first_child t id);
aux(next_sibling t id);
end
@@
-169,10
+183,13
@@
struct
node : descr }
let dump { doc=t } = Tree.print_skel t
node : descr }
let dump { doc=t } = Tree.print_skel t
- module DocIdSet = Set.Make (struct type t = string_content
- let compare = (-) end)
-
-
+ module DocIdSet = struct
+ include Set.Make (struct type t = string_content
+ let compare = (-) end)
+
+ end
+ let is_node = function { node=Node(_) } -> true | _ -> false
+ let get_string t (i:string_content) = Text.get_text t.doc i
open Tree
let node_of_t t = { doc= t;
node = Node(NC (root t)) }
open Tree
let node_of_t t = { doc= t;
node = Node(NC (root t)) }
@@
-191,6
+208,11
@@
struct
!Options.disable_text_collection),__LOCATION__))
!Options.disable_text_collection),__LOCATION__))
+ let save t str = save_tree t.doc str
+
+ let load ?(sample=64) str = node_of_t (load_tree str sample)
+
+
external pool : doc -> Tag.pool = "%identity"
let tag_pool t = pool t.doc
external pool : doc -> Tag.pool = "%identity"
let tag_pool t = pool t.doc
@@
-288,11
+310,13
@@
struct
| _ -> ()
*)
let string_below t id =
| _ -> ()
*)
let string_below t id =
- let
p
id = parent_doc t.doc id in
+ let
str
id = parent_doc t.doc id in
match t.node with
match t.node with
- | Node(NC(i)) -> (is_ancestor t.doc i pid)
- | Node(SC(i,_)) -> (is_ancestor t.doc (parent_doc t.doc i) pid)
+ | Node(NC(i)) ->
+ (Tree.equal i strid) || (is_ancestor t.doc i strid)
+ | Node(SC(i,_)) -> Text.equal i id
| _ -> false
| _ -> false
+
let contains t s =
Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.doc s)
let contains t s =
Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.doc s)
@@
-316,7
+340,9
@@
struct
let rec loop ?(print_right=true) t = match t.node with
| Nil -> ()
| String (s) -> output_string outc (string t)
let rec loop ?(print_right=true) t = match t.node with
| Nil -> ()
| String (s) -> output_string outc (string t)
- | Node _ when Tag.equal (tag t) Tag.pcdata -> loop (left t); loop (right t)
+ | Node _ when Tag.equal (tag t) Tag.pcdata ->
+ loop (left t);
+ if print_right then loop (right t)
| Node (_) ->
let tg = Tag.to_string (tag t) in
| Node (_) ->
let tg = Tag.to_string (tag t) in