}
extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){
- CAMLparam2(tree,id);
- const char* txt = (const char*) (XMLTREE(tree)->GetText((DocID) Int_val(id)));
- CAMLreturn (caml_copy_string(txt));
+ CAMLparam2(tree,id);
+ CAMLlocal1(str);
+ uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id));
+ str = caml_copy_string((const char*)txt);
+ delete (txt);
+ CAMLreturn (str);
}
extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){
CAMLparam2(tree,id);
uchar * cstr = (uchar *) String_val(str);
std::vector<DocID> results;
results = XMLTREE(tree)->Contains(cstr);
-
+ //free(cstr);
resarray = caml_alloc_tuple(results.size());
for (unsigned int i=0; i<results.size();i++){
}
extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){
CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->ParentNode(TREENODEVAL(id))));
+ CAMLreturn(Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id))));
}
extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) {
CAMLparam2(tree,id);
CAMLreturn(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id)))));
}
-extern "C" CAMLprim value caml_xml_tree_tag(value tree, value id){
- CAMLparam2(tree,id);
- const char* tag;
- tag =(const char*) XMLTREE(tree)->GetTagName(XMLTREE(tree)->Tag(TREENODEVAL(id)));
-
- CAMLreturn (caml_copy_string(tag));
-}
extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){
CAMLparam2(tree,tagid);
- const char* tag;
- tag = (const char*) XMLTREE(tree)->GetTagName((TagType) (Int_val(tagid)));
-
- CAMLreturn (caml_copy_string(tag));
+ CAMLlocal1(str);
+ char* tag;
+ tag = (char*) XMLTREE(tree)->GetTagNameByRef((TagType) (Int_val(tagid)));
+ str = caml_copy_string((const char*) tag);
+ CAMLreturn (str);
}
(* Statistics *)
mutable numbt : int;
mutable max_states : int;
+ contains : (string,Tree.Binary.DocIdSet.t) Hashtbl.t;
}
let mk () = { initial = SSet.empty;
ignore = SSet.empty;
result = BST.empty;
numbt = 0;
- max_states = 0
+ max_states = 0;
+ contains = Hashtbl.create 37;
+
};;
let print_tags fmt l =
let mem s x = SSet.mem x s
- let rec accepting_among ?(strings=None)auto t r =
+ let rec accepting_among ?(nobrother=false) ?(strings=None) auto t r =
if SSet.is_empty r then r else
match strings with
- | Some valid_strings when (Tree.Binary.DocIdSet.for_all (fun i ->
- not (Tree.Binary.string_below t i)) valid_strings )
+ | Some valid_strings when Tree.Binary.DocIdSet.for_all (fun i -> not(Tree.Binary.string_below t i)
+ ) valid_strings
-> SSet.empty
| _ -> (
match strings with
| None -> SSet.inter r auto.final
| Some valid_strings when (Tree.Binary.DocIdSet.mem id valid_strings)
- -> SSet.inter r auto.final
+ -> Printf.eprintf "Selecting string '%s'\n%!" (Tree.Binary.get_string t id);
+ SSet.inter r auto.final
| _ -> SSet.empty
)
| Tree.Binary.Node(_) ->
(fun x->SSet.mem (Transition.dest1 x) s1)
Transition.dest2 transitions
in
- let s2 = accepting_among auto t2 r2
+ let s2 = if nobrother then SSet.union (SSet.inter r auto.final) to_ignore
+ else accepting_among auto t2 r2
in
let _,s = filter_map_rev
(fun x -> SSet.mem (Transition.dest2 x) s2)
let accept ?(strings=None) auto t =
auto.result <- BST.empty;
- if SSet.is_empty (accepting_among ~strings:strings auto t auto.initial)
+ if SSet.is_empty (accepting_among ~nobrother:true ~strings:strings auto t auto.initial)
then false
else true
end
mutable result : BST.t;
mutable numbt : int;
mutable max_states : int;
+ contains : (string,Tree.Binary.DocIdSet.t) Hashtbl.t;
}
val mk : unit -> t
val dump : Format.formatter -> t -> unit
module BottomUp :
sig
- val accepting_among : ?strings:Tree.Binary.DocIdSet.t option ->
+ val accepting_among : ?nobrother:bool -> ?strings:Tree.Binary.DocIdSet.t option ->
t -> Tree.Binary.t -> SSet.t -> SSet.t
val accept : ?strings:Tree.Binary.DocIdSet.t option ->
t -> Tree.Binary.t -> bool
(* semi-colon separated list of input documents *)
-let documents = [ "../tests/base.xml" ]
+let documents = [ "../tests/tiny.srx" ]
(* semi-colon separated list of XPath queries *)
-let queries = [ "/child::*"; "//*" ]
+let queries = [ "/*" ]
(* I is the initial configuration
struct
let path = "."
let result_basename = "test"
- let num_runs = 5
+ let num_runs = 1
let run_with_output = true
let run_without_output = true
end
module I = INIT_TESTER (CONF)
-module TEST = MK (SXSI) (MK (SaxonBXQuery) (I))
+module TEST = MK (SXSI) (I)
let reference = false
let time_factor = 1.0
let mk_queryfile b doc q out = ()
- let mk_cmdline b qout qfile doc q = [ "-d"; doc; q ]@ (if b then [qout] else [])
+ let mk_cmdline b qout qfile doc q = [ doc; q ]@ (if b then [qout] else [])
let parse_rules =
[ ( ".*Parsing document :[ \\t]*\\([0-9]+\\.[0-9]*\\)ms.*",
[ Input_parsing_time 1]);
let auto = time XPath.Compile.compile query in
XPath.Ast.print Format.err_formatter query;
Format.eprintf "\n%!";
- (* Format.eprintf "Internal rep of the tree is :\n%!";
- Tree.Binary.dump v; *)
+(* Format.eprintf "Internal rep of the tree is :\n%!";
+ Tree.Binary.dump v; *)
Printf.eprintf "Execution time : ";
time (fun v -> ignore (TopDown.accept auto v)) v;
Printf.eprintf "Number of nodes in the result set : %i\n" (BST.cardinal auto.result);
val print_xml_fast : out_channel -> t -> unit
val compare : t -> t -> int
val equal : t -> t -> bool
- module DocIdSet : Set.S with type elt = string_content
+ module DocIdSet :
+ sig
+ include Set.S
+ end
+ with type elt = string_content
val string_below : t -> string_content -> bool
val contains : t -> string -> DocIdSet.t
val contains_old : t -> string -> bool
val dump : t -> unit
+ val get_string : t -> string_content -> string
end
module XML =
module Text =
struct
-
+ let equal : [`Text] node -> [`Text] node -> bool = equal
+
(* Todo *)
external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt"
let nil = nullt ()
module Tree =
struct
-
+ let equal : [`Tree ] node -> [`Tree] node -> bool = equal
external serialize : t -> string -> unit = "caml_xml_tree_serialize"
external unserialize : string -> t = "caml_xml_tree_unserialize"
then Printf.eprintf "#\n"
else
begin
- Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!"
+ Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) parent_doc(my_text)=%i\n%!"
(int_of_node id)
(Tag.to_string (tag_id t id))
(node_xml_id t id)
(int_of_node (my_text t id))
(Text.get_text t (my_text t id))
(int_of_node (next_text t id))
- (Text.get_text t (next_text t id));
+ (Text.get_text t (next_text t id))
+ (int_of_node(parent_doc t (my_text t id)));
+
aux(first_child t id);
aux(next_sibling t id);
end
node : descr }
let dump { doc=t } = Tree.print_skel t
- module DocIdSet = Set.Make (struct type t = string_content
- let compare = (-) end)
-
-
+ module DocIdSet = struct
+ include Set.Make (struct type t = string_content
+ let compare = (-) end)
+
+ end
+ let get_string t (i:string_content) = Text.get_text t.doc i
open Tree
let node_of_t t = { doc= t;
node = Node(NC (root t)) }
| _ -> ()
*)
let string_below t id =
- let pid = parent_doc t.doc id in
+ let strid = parent_doc t.doc id in
match t.node with
- | Node(NC(i)) -> (is_ancestor t.doc i pid)
- | Node(SC(i,_)) -> (is_ancestor t.doc (parent_doc t.doc i) pid)
+ | Node(NC(i)) ->
+ (Tree.equal i strid) || (is_ancestor t.doc i strid)
+ | Node(SC(i,_)) -> Text.equal i id
| _ -> false
+
let contains t s =
Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.doc s)
let rec loop ?(print_right=true) t = match t.node with
| Nil -> ()
| String (s) -> output_string outc (string t)
- | Node _ when Tag.equal (tag t) Tag.pcdata -> loop (left t); loop (right t)
+ | Node _ when Tag.equal (tag t) Tag.pcdata ->
+ loop (left t);
+ if print_right then loop (right t)
| Node (_) ->
let tg = Tag.to_string (tag t) in
val print_xml_fast : out_channel -> t -> unit
val compare : t -> t -> int
val equal : t -> t -> bool
- module DocIdSet : Set.S with type elt = string_content
+ module DocIdSet :
+ sig
+ include Set.S
+ end with type elt = string_content
val string_below : t -> string_content -> bool
val contains : t -> string -> DocIdSet.t
val contains_old : t -> string -> bool
val dump : t -> unit
+ val get_string : t -> string_content -> string
end
module Binary : BINARY
let text t = Tree.Binary.string (Tree.Binary.left t)
+
+
let rec eval_expr tree (e:expr) : value = match e with
| `Call (f,args) -> (List.assoc f globals) (List.map (eval_expr tree) args)
| `Auto(a) -> `NodeSet(ignore (Automaton.BottomUp.accept a tree);
begin
match args with
[ `Auto(a); `String(s) ] ->
- let docs = Tree.Binary.contains tree s
- in
+ let docs = try
+ Hashtbl.find a.Automaton.contains s
+ with
+ | Not_found ->
+ let r = Tree.Binary.contains tree s
+ in
+ (* Tree.Binary.DocIdSet.iter (fun id ->
+ Printf.eprintf "%s matches %s\n%!" (Tree.Binary.get_string tree id) s) r; *)
+
+ Hashtbl.add a.Automaton.contains s r;r
+ in
let _ = Automaton.BottomUp.accept ~strings:(Some docs) a tree
in `NodeSet(a.Automaton.result)
| _ -> failwith "contains invalid"
let rec map_dir (d,acc) = function
| [] -> acc
| s::r -> map_dir ((dir s),(s,d)::acc) r
- in let l = match p with
- | Absolute p | Relative p -> map_dir (Final,[]) p
- | AbsoluteDoS p ->
- let l = (map_dir (Final,[]) p)
- in ((DescendantOrSelf,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l
- in ((Child,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l
-
+ in
+ let l =
+ match p with
+ | Absolute p
+ | Relative p -> map_dir (Final,[]) p
+ | AbsoluteDoS p ->
+ let l = (map_dir (Final,[]) p)
+ in ((DescendantOrSelf,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l
+ in ((Child,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l
+
let rec compile_step q dir trs final initial ignore (axis,test,pred) =
let q' = State.mk() in