PPINCLUDES=$(OCAMLINCLUDES:%=-ppopt %)
-CXXSOURCES = XMLDocShredder.cpp OCamlDriver.cpp
-CXXOBJECTS = $(CXXSOURCES:.cpp=.o)
-
+CXXSOURCES = results.c XMLDocShredder.cpp OCamlDriver.cpp
+CXXOBJECTS1 = $(CXXSOURCES:.cpp=.o)
+CXXOBJECTS = $(CXXOBJECTS1:.c=.o)
CXXINCLUDES = \
-I/usr/include/libxml++-2.6 \
-I/usr/include/libxml2 \
XMLDocShredder.o: XMLDocShredder.h XMLDocShredder.cpp
OCamlDriver.o: XMLDocShredder.h
+results.o: results.h
compute_depend:
@echo [DEP]
#include <caml/callback.h>
#include <caml/fail.h>
#include <caml/custom.h>
-
+#include "results.h"
#define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) ))
#define NOT_IMPLEMENTED(s) (caml_failwith(s))
return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag))));
}
+extern "C" CAMLprim value caml_xml_tree_subtree_size(value tree,value id){
+ return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(id))));
+}
+
extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){
CAMLparam2(tree,str);
caml_initialize(&Field(tuple,1),Val_int(r.max));
CAMLreturn (tuple);
}
+
+extern "C" CAMLprim value caml_result_set_create(value size){
+ CAMLparam1(size);
+ results* res = (results*) malloc(sizeof(results));
+ results r = createResults (Int_val(size));
+ res->n = r.n;
+ res->lgn = r.lgn;
+ res->tree = r.tree;
+ CAMLreturn ((value) (res));
+}
+
+extern "C" CAMLprim value caml_result_set_set(value result,value p){
+ CAMLparam2(result,p);
+ setResult ( *((results*) result), Int_val(p));
+ CAMLreturn (Val_unit);
+}
+
+extern "C" CAMLprim value caml_result_set_clear(value result,value p1,value p2){
+ CAMLparam3(result,p1,p2);
+ clearRange ( *((results*) result), Int_val(p1), Int_val(p2));
+ CAMLreturn (Val_unit);
+}
+
+extern "C" CAMLprim value caml_result_set_next(value result,value p){
+ CAMLparam2(result,p);
+ CAMLreturn (Val_int(nextResult(*((results*) result), Int_val(p))));
+}
+
+
let merge (rb,rb1,rb2,mark) t res1 res2 =
if rb then
let res1 = if rb1 then res1 else empty
- and res2 = if rb2 then res2 else empty
+ and res2 = if rb2 then res2 else empty
in
if mark then { node = Cons(t,(Concat(res1.node,res2.node)));
length = res1.length + res2.length + 1;}
end
-
+ module GResult = struct
+ type t
+ type elt = [` Tree] Tree.node
+ external create_empty : int -> t = "caml_result_set_create"
+ external set : t -> int -> t = "caml_result_set_set"
+ external next : t -> int -> int = "caml_result_set_next"
+ external clear : t -> int -> int -> unit = "caml_result_set_clear"
+ let empty = create_empty 100000000
+
+ let cons e t = set t (Obj.magic e)
+ let concat _ t = t
+ let iter f t =
+ let rec loop i =
+ if i == -1 then ()
+ else (f (Obj.magic i);loop (next t i))
+ in loop 0
+
+ let fold _ _ _ = failwith "noop"
+ let map _ _ = failwith "noop"
+ let length t = let cpt = ref ~-1 in
+ iter (fun _ -> incr cpt) t; !cpt
+
+ let merge (rb,rb1,rb2,mark) elt t1 t2 =
+ if mark then (set t1 (Obj.magic elt) ; t1) else t1
+
+ end
module Run (RS : ResultSet) =
struct
in
(Hashtbl.add h_trans key res;res)
+
let h_tdconf = Hashtbl.create 511
let rec bottom_up a tree t conf next jump_fun root dotd init accu =
if (not dotd) && (Configuration.is_empty conf ) then
-
- accu,conf,next
+ accu,conf,next
else
let below_right = Tree.is_below_right tree t next in
let conf,next =
(Configuration.merge rightconf sub, next_of_next)
in
- if t == root then accu,conf,next else
+ if t == root then accu,conf,next else
let parent = Tree.binary_parent tree t in
let ptag = Tree.tag tree parent in
let dir = Tree.is_left tree t in
and prepare_topdown a tree t noright =
let tag = Tree.tag tree t in
-(* pr "Going top down on tree with tag %s = %s "
- (if Tree.is_nil t then "###" else (Tag.to_string(Tree.tag t))) (Tree.dump_node t); *)
let r =
try
Hashtbl.find h_tdconf tag
let set = match SList.node set with
| SList.Cons(x,_) ->x
| _ -> assert false
- in
-(* pr "Result of topdown run is %!";
- StateSet.print fmt (Ptset.Int.elements set);
- pr ", number is %i\n%!" (RS.length res.(0)); *)
- Configuration.add Configuration.empty set res.(0)
+ in
+ Configuration.add Configuration.empty set res.(0)
let run_bottom_up a tree k =
let t = Tree.root in
- let trlist = Hashtbl.find a.trans (Ptset.Int.choose a.init)
+ let trlist = Hashtbl.find a.trans (StateSet.choose a.init)
in
let init = List.fold_left
(fun acc (_,t) ->
let _,_,f,_ = Transition.node t in
let _,_,l = fst ( Formula.st f ) in
- Ptset.Int.union acc l)
- Ptset.Int.empty trlist
+ StateSet.union acc l)
+ StateSet.empty trlist
in
let tree1,jump_fun =
match k with
(*Tree.tagged_lowest t tag, fun tree -> Tree.tagged_next tree tag*)
(Tree.tagged_desc tree tag t, let jump = Tree.tagged_foll_ctx tree tag
in fun n -> jump n t )
- | `CONTAINS(_) -> (Tree.first_child tree t,let jump = Tree.next_sibling_ctx tree
+ | `CONTAINS(_) -> (Tree.text_below tree t,let jump = Tree.text_next tree
in fun n -> jump n t)
| _ -> assert false
in
let tree2 = jump_fun tree1 in
let rec loop t next acc =
-(* let _ = pr "\n_________________________\nNew iteration\n" in
- let _ = pr "Jumping to %s\n%!" (Tree.dump_node tree) in *)
let acc,conf,next_of_next = bottom_up a tree t
Configuration.empty next jump_fun (Tree.root) true init acc
in
- (* let _ = pr "End of first iteration, conf is:\n%!";
- Configuration.pr fmt conf
- in *)
let acc = Configuration.IMap.fold
- ( fun s res acc -> if Ptset.Int.intersect init s
+ ( fun s res acc -> if StateSet.intersect init s
then RS.concat res acc else acc) conf.Configuration.results acc
in
if Tree.is_nil next_of_next (*|| Tree.equal next next_of_next *)then
end
let top_down_count a t = let module RI = Run(Integer) in Integer.length (RI.run_top_down a t)
- let top_down a t = let module RI = Run(IdSet) in (RI.run_top_down a t)
+ let top_down a t = let module RI = Run(GResult) in (RI.run_top_down a t)
let bottom_up_count a t k = let module RI = Run(Integer) in Integer.length (RI.run_bottom_up a t k)
end
module IdSet : ResultSet
+module GResult : ResultSet
val top_down_count : 'a t -> Tree.t -> int
-val top_down : 'a t -> Tree.t -> IdSet.t
+val top_down : 'a t -> Tree.t -> GResult.t
val bottom_up_count :
'a t -> Tree.t -> [> `CONTAINS of 'b | `TAG of Tag.t ] -> int
let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in
let _ = Ata.dump Format.err_formatter auto in
let _ = Printf.eprintf "%!" in
- let jump_to =
+ let jump_to =
match contains with
None -> (max_int,`NOTHING)
| Some s ->
time (Tree.init_naive_contains v) s
end;(r,`CONTAINS(s))
in
+ let test_list = jump_to in
+ (*
let test_list =
if (!Options.backward) then begin
Printf.eprintf "Finding min occurences : ";
else acc) jump_to) ltags
end
else (max_int,`NOTHING)
- in
+ in*)
let _ = if (snd test_list) != `NOTHING then
let occ,s1,s2 = match test_list with
| (x,`TAG (tag)) -> (x, "tag", (Tag.to_string tag))
in ()
else
let result = time (top_down auto) v in
- let rcount = IdSet.length result in
+ let rcount = GResult.length result in
Printf.eprintf "Number of nodes in the result set : %i\n" rcount;
Printf.eprintf "\n%!";
begin
time( fun () ->
let oc = open_out f in
output_string oc "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
- IdSet.iter (fun t ->
+ GResult.iter (fun t ->
Tree.print_xml_fast oc v t;
output_char oc '\n';
<?xml version="1.0"?>
-<a><b>
- <c><d/><e/><f/></c>
- <g><h/><i/><j/></g>
- <k><l/><m/><n/></k>
- </b>
- <o>
- <p><q/><r/><s/></p>
- <t><u/><v/><w/></t>
- <x><y/><z/><aa/></x>
- </o>
+<a>
+ <d><a><b>foo</b></a></d>
+ <d><a><b>foo</b></a></d>
+ <d><a><b>foo</b></a></d>
+ <d><a><b>foo</b></a></d>
+ <d><a><b>foo</b></a></d>
+ <d><b>foo</b></d>
</a>
external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text"
external tree_root : tree -> [`Tree] node = "caml_xml_tree_root"
-
+external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size"
+
let tree_is_nil x = equal_node x nil
external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" "noalloc"
doc : tree;
ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t;
}
-
+let subtree_size t i = tree_subtree_size t.doc i
let text_size t = text_size t.doc
module MemUnion = Hashtbl.Make (struct
let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc
in
contains_array := a
+
+let last_idx = ref 0
+
+let array_find a i j =
+ let l = Array.length a in
+ let rec loop idx x y =
+ if x > y || idx >= l then nulldoc
+ else
+ if a.(idx) >= x then if a.(idx) > y then nulldoc else (last_idx := idx;a.(idx))
+ else loop (idx+1) x y
+ in
+ if a.(0) > j || a.(l-1) < i then nulldoc
+ else loop !last_idx i j
+let text_below tree t =
+ let l = Array.length !contains_array in
+ let i,j = tree_doc_ids tree.doc t in
+ let id = if l == 0 then i else (array_find !contains_array i j) in
+ tree_parent_doc tree.doc id
+
+let text_next tree t root =
+ let l = Array.length !contains_array in
+ let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
+ let _,j = tree_doc_ids tree.doc root in
+ let id = if l == 0 then if inf > j then nulldoc else inf
+ else array_find !contains_array inf j
+ in
+ tree_parent_doc tree.doc id
+
module DocIdSet = struct
let dump_node t = nts (inode t)
-
let is_left t n = tree_is_first_child t.doc n
-let is_below_right t n1 n2 = tree_is_ancestor t.doc (tree_parent t.doc n1) n2
-
+let is_below_right t n1 n2 =
+ tree_is_ancestor t.doc (tree_parent t.doc n1) n2
+ && not (tree_is_ancestor t.doc n1 n2)
+
let parent t n = tree_parent t.doc n
let first_child t = (); fun n -> tree_first_child t.doc n
let tags t tag = Hashtbl.find t.ttable tag
-let binary_parent t n =
+let rec binary_parent t n =
+ let r =
if tree_is_first_child t.doc n
then tree_parent t.doc n
else tree_prev_sibling t.doc n
+ in if tree_tag_id t.doc r = Tag.pcdata then
+ binary_parent t r
+ else r
let doc_ids t n = tree_doc_ids t.doc n
val get_text : t -> [`Tree] node -> string
val dump_tree : Format.formatter -> t -> unit
+val subtree_size : t -> [`Tree] node -> int
+val text_below : t -> [`Tree] node -> [`Tree] node
+val text_next : t -> [`Tree] node -> [`Tree] node -> [`Tree] node