+INLINE=10
DEBUG=false
PROFILE=true
VERBOSE=false
-BASESRC=custom.ml ptset.ml finiteCofinite.ml tag.ml tagSet.ml options.ml tree.ml
-BASEMLI=sigs.mli ptset.mli finiteCofinite.mli tag.mli tagSet.mli options.mli tree.mli
-MLSRCS = memory.ml $(BASESRC) ata.ml ulexer.ml xPath.ml main.ml
-MLISRCS = memory.mli $(BASEMLI) ata.mli ulexer.mli xPath.mli
+BASESRC=custom.ml ptset.ml finiteCofinite.ml tag.ml tagSet.ml options.ml tree.ml ata.ml
+BASEMLI=sigs.mli ptset.mli finiteCofinite.mli tag.mli tagSet.mli options.mli tree.mli ata.mli
+MLSRCS = memory.ml $(BASESRC) ulexer.ml xPath.ml main.ml
+MLISRCS = memory.mli $(BASEMLI) ulexer.mli xPath.mli
BASEOBJS= $(BASESRC:.ml=.cmx)
BASEINT= $(BASEMLI:.ml=.cmi)
MLOBJS = $(MLSRCS:.ml=.cmx)
SYNT_DEBUG = -ppopt -DDEBUG
else
CXX = g++
-OCAMLOPT = ocamlopt -g -cc "$(CXX)" -ccopt -O3 -ccopt -std=c++0x -noassert -inline 10000
+OCAMLOPT = ocamlopt -g -cc "$(CXX)" -ccopt -O3 -ccopt -std=c++0x -noassert -inline $(INLINE)
endif
ifeq ($(PROFILE), true)
SYNT_PROF = $(SYNT_DEBUG) -ppopt -DPROFILE
return;
}
+
extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){
CAMLparam1(uri);
CAMLlocal1(doc);
DocID tid;
if (id == NULLT)
return;
- // int tag = tree->Tag(id);
+ //int tag = tree->Tag(id);
if (id) {
tid = tree->PrevText(id);
char * data = (char *) (tree->getTextCollection())->GetText(tid);
extern "C" CAMLprim value caml_xml_tree_root(value tree){
CAMLparam1(tree);
- CAMLreturn (TREENODEVAL(XMLTREE(tree)->Root()));
+ CAMLreturn (Val_int(TREENODEVAL(XMLTREE(tree)->Root())));
}
extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){
CAMLparam1(tree);
CAMLparam2(tree,id);
CAMLreturn(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id))));
}
+extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){
+ CAMLparam2(tree,id);
+ CAMLreturn(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id))));
+}
+
extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){
CAMLparam2(tree,id);
CAMLreturn(Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id))));
}
+extern "C" CAMLprim value caml_xml_tree_prev_doc(value tree, value id){
+ CAMLparam2(tree,id);
+ CAMLreturn(Val_int (XMLTREE(tree)->PrevNode((DocID) Int_val(id))));
+}
+
extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) {
CAMLparam3(tree,id1,id2);
CAMLreturn(Val_bool (XMLTREE(tree)->IsAncestor(TREENODEVAL(id1),TREENODEVAL(id2))));
CAMLreturn(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag))));
}
-extern "C" CAMLprim value caml_xml_tree_tagged_next(value tree, value id, value tag){
- CAMLparam3(tree,id,tag);
- CAMLreturn(Val_int (XMLTREE(tree)->TaggedNext(TREENODEVAL(id),(TagType) Int_val(tag))));
-}
-
-
-
extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){
CAMLparam3(tree,id,tag);
CAMLreturn(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id))));
}
-extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id))));
-}
-
extern "C" CAMLprim value caml_xml_tree_prev_text(value tree, value id){
CAMLparam2(tree,id);
CAMLlocal1(res);
memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
CAMLreturn(doc);
}
+
+extern "C" {
+ static int caml_empty_vector[] = { 0 };
+}
+
+extern "C" CAMLprim value caml_int_vector_empty(value unit){
+ CAMLparam1(unit);
+ CAMLreturn ((value) caml_empty_vector);
+}
+
+extern "C" CAMLprim value caml_int_vector_length(value vec){
+ CAMLparam1(vec);
+ CAMLreturn (Val_int( ((int*) caml_empty_vector)[0] ));
+}
+extern "C" CAMLprim value caml_int_vector_alloc(value len){
+ CAMLparam1(len);
+ int * vec = (int *) malloc(sizeof(int)*(Int_val(len)+1));
+ vec[0] = Int_val(len);
+ CAMLreturn ((value) vec);
+}
+
+extern "C" CAMLprim value caml_int_vector_set(value vec, value i, value v){
+ CAMLparam3(vec,i,v);
+
+ ((int*) vec)[Int_val(i)+1] = Int_val(v);
+ CAMLreturn (Val_unit);
+}
+
+
+#define VECT(x) ((int*) (x))
+extern "C" CAMLprim value caml_xml_tree_tagged_below(value tree, value node, value ctags, value dtags){
+ CAMLparam4(tree,node,ctags,dtags);
+
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedBelow(TREENODEVAL(node),
+ &(VECT(ctags)[1]),
+ VECT(ctags)[0],
+ &(VECT(dtags)[1]),
+ VECT(dtags)[0]))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_tagged_next(value tree, value node, value ctags, value ftags,value root){
+ CAMLparam5(tree,node,ctags,ftags,root);
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedNext(TREENODEVAL(node),
+ &(VECT(ctags)[1]),
+ VECT(ctags)[0],
+ &(VECT(ftags)[1]),
+ VECT(ftags)[0],
+ TREENODEVAL(root)))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_tagged_desc_only(value tree, value node,value dtags){
+ CAMLparam3(tree,node,dtags);
+
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedDescOnly(TREENODEVAL(node),
+ &(VECT(dtags)[1]),
+ VECT(dtags)[0]))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_tagged_foll_only(value tree, value node, value ftags,value root){
+ CAMLparam4(tree,node,ftags,root);
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedFollOnly(TREENODEVAL(node),
+ &(VECT(ftags)[1]),
+ VECT(ftags)[0],
+ TREENODEVAL(root)))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_tagged_desc_or_foll_only(value tree, value node, value ftags,value root){
+ CAMLparam4(tree,node,ftags,root);
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedDescOrFollOnly(TREENODEVAL(node),
+ &(VECT(ftags)[1]),
+ VECT(ftags)[0],
+ TREENODEVAL(root)))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){
+ CAMLparam2(tree,node);
+ CAMLlocal1(tuple);
+ tuple = caml_alloc_tuple(2);
+ range r = (XMLTREE(tree)->DocIds(TREENODEVAL(node)));
+ caml_initialize(&Field(tuple,0),Val_int(r.min));
+ caml_initialize(&Field(tuple,1),Val_int(r.max));
+ CAMLreturn (tuple);
+}
| False | True
| Or of formula * formula
| And of formula * formula
- | Atom of ([ `Left | `Right ]*bool*state)
+ | Atom of ([ `Left | `Right | `LLeft | `RRight ]*bool*state)
and formula = { fid: int;
+ fkey : int;
pos : formula_expr;
neg : formula;
- st : Ptset.t*Ptset.t;
+ st : (Ptset.t*Ptset.t)*(Ptset.t*Ptset.t);
size: int;
}
+external hash_const_variant : [> ] -> int = "%identity"
+external int_bool : bool -> int = "%identity"
+
+let hash_node_form t = match t with
+ | False -> 0
+ | True -> 1
+ | And(f1,f2) -> (2+17*f1.fkey + 37*f2.fkey) land max_int
+ | Or(f1,f2) -> (3+101*f1.fkey + 253*f2.fkey) land max_int
+ | Atom(v,b,s) -> ((hash_const_variant v) + (3846*(int_bool b) +257) + (s lsl 13 - s)) land max_int
+
module FormNode =
struct
type t = formula
- let hash t = match t.pos with
- | False -> 0
- | True -> 1
- | And(f1,f2) -> (2+17*f1.fid + 37*f2.fid) land max_int
- | Or(f1,f2) -> (3+101*f1.fid + 253*f2.fid) land max_int
- | Atom(`Left,true,s) -> (5 + 11 * 23 * s) land max_int
- | Atom(`Right,true,s) -> (5 + 19 * 23 * s) land max_int
- | Atom(`Left,false,s) -> (5 + 11 * 39 * s) land max_int
- | Atom(`Right,false,s) -> (5 + 19 * 39 * s) land max_int
-
-
+
+ let hash t = t.fkey
let equal f1 f2 =
- if f1.fid == f2.fid || f1.pos == f2.pos then true
+ if f1.fid == f2.fid || f1.fkey == f2.fkey || f1.pos == f2.pos then true
else
match f1.pos,f2.pos with
| False,False | True,True -> true
- | Atom(d1,b1,s1), Atom(d2,b2,s2) when (b1==b2) && (s1=s2) && (d1 = d2) -> true
+ | Atom(d1,b1,s1), Atom(d2,b2,s2) when (b1==b2) && (s1==s2) && (d1 = d2) -> true
| Or(g1,g2),Or(h1,h2)
| And(g1,g2),And(h1,h2) -> g1.fid == h1.fid && g2.fid == h2.fid
| _ -> false
+
end
module WH = Weak.Make(FormNode)
let f_pool = WH.create 107
+let empty_pair = Ptset.empty,Ptset.empty
+let empty_quad = empty_pair,empty_pair
+
let true_,false_ =
- let rec t = { fid = 1; pos = True; neg = f ; st = Ptset.empty,Ptset.empty; size =1; }
- and f = { fid = 0; pos = False; neg = t; st = Ptset.empty,Ptset.empty; size = 1; }
+ let rec t = { fid = 1; pos = True; fkey=1; neg = f ; st = empty_quad; size =1; }
+ and f = { fid = 0; pos = False; fkey=0; neg = t; st = empty_quad; size = 1; }
in
WH.add f_pool f;
WH.add f_pool t;
let cons pos neg s1 s2 size1 size2 =
let rec pnode =
{ fid = gen_id ();
+ fkey = hash_node_form pos;
pos = pos;
neg = nnode;
st = s1;
and nnode = {
fid = gen_id ();
pos = neg;
+ fkey = hash_node_form neg;
neg = pnode;
st = s2;
size = size2;
let atom_ d p s =
let si = Ptset.singleton s in
let ss = match d with
- | `Left -> si,Ptset.empty
- | `Right -> Ptset.empty,si
+ | `Left -> (si,Ptset.empty),empty_pair
+ | `Right -> empty_pair,(si,Ptset.empty)
+ | `LLeft -> (Ptset.empty,si),empty_pair
+ | `RRight -> empty_pair,(Ptset.empty,si)
in fst (cons (Atom(d,p,s)) (Atom(d,not p,s)) ss ss 1 1)
+
+let union_quad ((l1,ll1),(r1,rr1)) ((l2,ll2),(r2,rr2)) =
+ (Ptset.union l1 l2 ,Ptset.union ll1 ll2),
+ (Ptset.union r1 r2 ,Ptset.union rr1 rr2)
let merge_states f1 f2 =
let sp =
- Ptset.union (fst f1.st) (fst f2.st),
- Ptset.union (snd f1.st) (snd f2.st)
+ union_quad f1.st f2.st
and sn =
- Ptset.union (fst f1.neg.st) (fst f2.neg.st),
- Ptset.union (snd f1.neg.st) (snd f2.neg.st)
+ union_quad f1.neg.st f2.neg.st
in
sp,sn
-
+
let full_or_ f1 f2 =
let f1,f2 = if f1.fid < f2.fid then f2,f1 else f1,f2 in
let sp,sn = merge_states f1 f2 in
(pr_frm ppf f2);
| Atom(dir,b,s) -> Format.fprintf ppf "%s%s[%i]"
(if b then "" else "¬")
- (if dir = `Left then "↓₁" else "↓₂") s
+ (match dir with
+ | `Left -> "↓₁"
+ | `Right -> "↓₂"
+ | `LLeft -> "⇓₁"
+ | `RRight -> "⇓₂") s
let dnf_hash = Hashtbl.create 17
let rec dnf_aux f = match f.pos with
| False -> PL.empty
| True -> PL.singleton (Ptset.empty,Ptset.empty)
- | Atom(`Left,_,s) -> PL.singleton (Ptset.singleton s,Ptset.empty)
- | Atom(`Right,_,s) -> PL.singleton (Ptset.empty,Ptset.singleton s)
+ | Atom((`Left|`LLeft),_,s) -> PL.singleton (Ptset.singleton s,Ptset.empty)
+ | Atom((`Right|`RRight),_,s) -> PL.singleton (Ptset.empty,Ptset.singleton s)
| Or(f1,f2) -> PL.union (dnf f1) (dnf f2)
| And(f1,f2) ->
- let pl1 = dnf f1
- and pl2 = dnf f2
- in
- PL.fold (fun (s1,s2) acc ->
- PL.fold ( fun (s1', s2') acc' ->
- (PL.add
- ((Ptset.union s1 s1'),
- (Ptset.union s2 s2')) acc') )
- pl2 acc )
- pl1 PL.empty
-
-
- and dnf f =
- try
+ let pl1 = dnf f1
+ and pl2 = dnf f2
+ in
+ PL.fold (fun (s1,s2) acc ->
+ PL.fold ( fun (s1', s2') acc' ->
+ (PL.add
+ ((Ptset.union s1 s1'),
+ (Ptset.union s2 s2')) acc') )
+ pl2 acc )
+ pl1 PL.empty
+
+
+ and dnf f =
+ try
Hashtbl.find dnf_hash f.fid
with
- Not_found ->
- let d = dnf_aux f in
- Hashtbl.add dnf_hash f.fid d;d
+ Not_found ->
+ let d = dnf_aux f in
+ Hashtbl.add dnf_hash f.fid d;d
- let can_top_down f =
+ let can_top_down f =
let nf = dnf f in
if (PL.cardinal nf > 3)then None
else match PL.elements nf with
- | [(s1,s2); (t1,t2); (u1,u2)] when
- Ptset.is_empty s1 && Ptset.is_empty s2 && Ptset.is_empty t1 && Ptset.is_empty u2
- -> Some(true,t2,u1)
- | [(t1,t2); (u1,u2)] when Ptset.is_empty t1 && Ptset.is_empty u2
- -> Some(false,t2,u1)
- | _ -> None
+ | [(s1,s2); (t1,t2); (u1,u2)] when
+ Ptset.is_empty s1 && Ptset.is_empty s2 && Ptset.is_empty t1 && Ptset.is_empty u2
+ -> Some(true,t2,u1)
+ | [(t1,t2); (u1,u2)] when Ptset.is_empty t1 && Ptset.is_empty u2
+ -> Some(false,t2,u1)
+ | _ -> None
let equal_form f1 f2 =
let s =
if TagSet.is_finite ts
- then "{" ^ (TagSet.fold (fun t a -> a ^ " " ^ (Tag.to_string t)) ts "") ^"}"
+ then "{" ^ (TagSet.fold (fun t a -> a ^ " '" ^ (Tag.to_string t)^"'") ts "") ^" }"
else let cts = TagSet.neg ts in
if TagSet.is_empty cts then "*" else
(TagSet.fold (fun t a -> a ^ " " ^ (Tag.to_string t)) cts "*\\{"
Format.fprintf ppf ",%s %s " (Tag.to_string t) (if b then "=>" else "->");
pr_frm ppf f;
Format.fprintf ppf "(fid=%i) left=" f.fid;
- let l,r = f.st in pr_st ppf (Ptset.elements l);
+ let (l,ll),(r,rr) = f.st in
+ pr_st ppf (Ptset.elements l);
+ Format.fprintf ppf ", ";
+ pr_st ppf (Ptset.elements ll);
Format.fprintf ppf ", right=";
pr_st ppf (Ptset.elements r);
+ Format.fprintf ppf ", ";
+ pr_st ppf (Ptset.elements rr);
Format.fprintf ppf "\n";
) a.sigma;
Format.fprintf ppf "=======================================\n"
let equal_trans (q1,t1,m1,f1,_) (q2,t2,m2,f2,_) =
(q1 == q2) && (TagSet.equal t1 t2) && (m1 == m2) && (equal_form f1 f2)
-
+
module TS =
struct
type node = Nil | Cons of Tree.t * node | Concat of node*node
let cons e t = node (Cons(e,t.node)) (t.size+1)
let concat t1 t2 = node (Concat (t1.node,t2.node)) (t1.size+t2.size)
- let append e t = concat t (cons e empty)
+ let append = cons
+(* let append e t = node (Concat(t.node,Cons(e,Nil))) (t.size+1) *)
let to_list_rev t =
let rec aux acc l rest =
| Concat(n1,n2) -> let _ = loop n1 in loop n2
in loop n
- end
- module TS2 =
- struct
- type t = string
- let empty = String.make 10_000_000 '0'
- let cons e t = t.[Tree.id e] <- '1';t
- let append = cons
- let concat s1 s2 = failwith "not implemented"
-
- let length t =
- let res = ref 0 in
- for i = 0 to 9_999_999 do
- if t.[i] == '1' then
- incr res
- done; !res
-
- let iter f t = failwith "not implemented"
- let to_list_rev t = failwith "not implemented"
- end
+ let rev_iter f { node = n } =
+ let rec loop = function
+ | Nil -> ()
+ | Cons(e,n) -> let _ = loop n in f e
+ | Concat(n1,n2) -> let _ = loop n2 in loop n1
+ in loop n
- module BottomUpNew = struct
-
-IFDEF DEBUG
-THEN
- type trace =
- | TNil of Ptset.t*Ptset.t
- | TNode of Ptset.t*Ptset.t*bool* (int*bool*formula) list
-
- let traces = Hashtbl.create 17
- let dump_trace t =
- let out = open_out "debug_trace.dot"
- in
- let outf = Format.formatter_of_out_channel out in
-
- let rec aux t num =
- if Tree.is_node t
- then
- match (try Hashtbl.find traces (Tree.id t) with Not_found -> TNil(Ptset.empty,Ptset.empty)) with
- | TNode(r,s,mark,trs) ->
- let numl = aux (Tree.left t) num in
- let numr = aux (Tree.right t) (numl+1) in
- let mynum = numr + 1 in
- Format.fprintf outf "n%i [ label=\"<%s>\\nr=" mynum (Tag.to_string (Tree.tag t));
- pr_st outf (Ptset.elements r);
- Format.fprintf outf "\\ns=";
- pr_st outf (Ptset.elements s);
- List.iter (fun (q,m,f) ->
- Format.fprintf outf "\\n%i %s" q (if m then "⇨" else "→");
- pr_frm outf f ) trs;
- Format.fprintf outf "\", %s shape=box ];\n"
- (if mark then "color=cyan1, style=filled," else "");
- let _ = Format.fprintf outf "n%i -> n%i;\n" mynum numl in
- let _ = Format.fprintf outf "n%i -> n%i;\n" mynum numr in
- mynum
- | TNil(r,s) -> Format.fprintf outf "n%i [ shape=box, label=\"Nil\\nr=" num;
- pr_st outf (Ptset.elements r);
- Format.fprintf outf "\\ns=";
- pr_st outf (Ptset.elements s);
- Format.fprintf outf "\"];\n";num
- else
- match Hashtbl.find traces (-10) with
- | TNil(r,s) ->
- Format.fprintf outf "n%i [ shape=box, label=\"Nil\\nr=" num;
- pr_st outf (Ptset.elements r);
- Format.fprintf outf "\\ns=";
- pr_st outf (Ptset.elements s);
- Format.fprintf outf "\"];\n";
- num
- | _ -> assert false
+ let find f { node = n } =
+ let rec loop = function
+ | Nil -> raise Not_found
+ | Cons(e,n) -> if f e then e else loop n
+ | Concat(n1,n2) -> try
+ loop n1
+ with
+ | Not_found -> loop n2
in
- Format.fprintf outf "digraph G {\n";
- ignore(aux t 0);
- Format.fprintf outf "}\n%!";
- close_out out;
- ignore(Sys.command "dot -Tsvg debug_trace.dot > debug_trace.svg")
-END
-
+ loop n
+ end
+(*
+ module BottomUpJumpNew = struct
+*)
module HFEval = Hashtbl.Make(
struct
type t = int*Ptset.t*Ptset.t
let hash (a,b,c) =
a+17*(Ptset.hash b) + 31*(Ptset.hash c)
end)
-
+
let hfeval = HFEval.create 4097
-(* let miss = ref 0
- let call = ref 0
- let timeref = ref 0.0
- let timerefall = ref 0.0
- let time f x =
- incr call;
- let t1 = Unix.gettimeofday ()
- in let r = f x
- in
- timeref := !timeref +. ((Unix.gettimeofday()) -. t1);
- r
-
- let timeall f x =
- let t1 = Unix.gettimeofday ()
- in let r = f x
- in
- timerefall := !timerefall +. ((Unix.gettimeofday()) -. t1);
- r
-
-*)
-
-
let eval_form_bool f s1 s2 =
let rec eval f = match f.pos with
- | Atom(`Left,b,q) -> if b == (Ptset.mem q s1) then (true,true,false) else false,false,false
- | Atom(`Right,b,q) -> if b == (Ptset.mem q s2) then (true,false,true) else false,false,false
+ | Atom((`Left|`LLeft),b,q) -> if b == (Ptset.mem q s1) then (true,true,false) else false,false,false
+ | Atom((`Right|`RRight),b,q) -> if b == (Ptset.mem q s2) then (true,false,true) else false,false,false
(* test some inlining *)
| True -> true,true,true
| False -> false,false,false
in eval f
- module HFEvalDir = Hashtbl.Make(
- struct
- type t = int*Ptset.t*[`Left | `Right ]
- let equal (a,b,c) (d,e,f) =
- a==d && (Ptset.equal b e) && (c = f)
- let hash_dir = function `Left -> 7919
- | `Right -> 3517
-
- let hash (a,b,c) =
- a+17*(Ptset.hash b) + 31*(hash_dir c)
- end)
-
- let hfeval_dir = HFEvalDir.create 4097
-
-
- let eval_dir dir f s =
- let rec eval f = match f.pos with
- | Atom(d,b,q) when d = dir -> if b == (Ptset.mem q s) then true_ else false_
- | Atom(_,b,q) -> f
- (* test some inlining *)
- | True -> true_
- | False -> false_
- | _ ->
- try
- HFEvalDir.find hfeval_dir (f.fid,s,dir)
- with
- | Not_found ->
- let r =
- match f.pos with
- | Or(f1,f2) ->
- let f1 = eval f1
- in
- if is_true f1 then true_
- else if is_false f1 then eval f2
- else or_ f1 f2
- | And(f1,f2) ->
- let f1 = eval f1 in
- if is_false f1 then false_
- else if is_true f1 then eval f2
- else and_ f1 f2
- | _ -> assert false
- in
- HFEvalDir.add hfeval_dir (f.fid,s,dir) r;
- r
-
- in eval f
-
-
-
let fstate_pool = Hashtbl.create 11
let merge_pred a b = match a,b with
else f,false in
(or_ tmpf accf,accm||m,acchtrue||hastrue)
else (accf,accm,acchtrue)
- ) acc (Hashtbl.find a.phi q)
-
+ ) acc (try Hashtbl.find a.phi q with Not_found -> [])
let get_trans t a tag r =
try
HTagSet.add a.sigma (accq,tag) (mark,f,([],[]),has_true);
f.st,f,mark,has_true,accq
+ let h_union = Hashtbl.create 4097
+
+ let pt_cup s1 s2 =
+ let h = (Ptset.hash s1,Ptset.hash s2) in
+ try
+ Hashtbl.find h_union h
+ with
+ | Not_found -> let s = Ptset.union s1 s2
+ in
+ Hashtbl.add h_union h s;s
+
+
+
+ let tags_of_state a q = Hashtbl.fold
+ (fun p l acc ->
+ if p == q then
+ List.fold_left
+ (fun acc (ts,_) ->
+ pt_cup (TagSet.positive ts) acc) acc l
+ else acc) a.phi Ptset.empty
+
+ let h_tags_states = Hashtbl.create 4096
+
+
- let check_pred l t = true (*l = [] ||
- List.exists (function p ->
- match p with
- `Left f | `Right f -> f t
- | _ -> assert false) l
- *)
+
+ let tags a qs =
+ try
+ Hashtbl.find h_tags_states (Ptset.hash qs)
+ with
+ | Not_found ->
+ let l = Ptset.fold (fun q acc -> pt_cup acc (tags_of_state a q)) qs Ptset.empty
+ in
+ Hashtbl.add h_tags_states (Ptset.hash qs) l;l
+
+ let time cpt acc f x =
+ let t1 = Unix.gettimeofday () in
+ let r = f x in
+ let t2 = Unix.gettimeofday () in
+ let t = (1000. *.(t2 -. t1)) in
+ acc:=!acc+.t;
+ incr cpt;
+ r
+
+ let h_time = Hashtbl.create 4096
+ let calls = ref 0
- let rec accepting_among2 a t r acc =
+ let rtime s f x =
+
+ let cpt,atime =
+ try
+ Hashtbl.find h_time s
+ with
+ | _ -> (ref 0, ref 0.)
+ in
+ let r = time cpt atime f x
+ in
+ Hashtbl.replace h_time s (cpt,atime);
+ r
+
+ let rec accepting_among_time a t r ctx =
+ incr calls;
let orig = r in
let rest = Ptset.inter r a.final in
let r = Ptset.diff r rest in
- if Ptset.is_empty r then rest,acc else
- if (not (Tree.is_node t))
+ if Ptset.is_empty r then rest,TS.empty else
+ if Tree.is_node t
then
- orig,acc
- else
- let t1 = Tree.first_child t
- and t2 = Tree.next_sibling t in
- let (r1,r2),formula,mark,has_true,r = get_trans t a (Tree.tag t) r
- in
- let s1,res1 = accepting_among2 a t1 r1 acc
- in
- let formula = eval_dir `Left formula s1 in
- if is_false formula then rest,acc
- else
- if is_true formula then (* tail call equivalent to a top down *)
- accepting_among2 a t2 orig (if mark then TS.append t res1 else res1)
+ let among,result,form =
+ let ((ls,lls),(rs,rrs)),formula,mark,has_true,r' =
+ let tag = rtime "Tree.tag" Tree.tag t in
+ rtime "get_trans" (get_trans t a tag) r
+ in
+ let tl = rtime "tags" (tags a) ls
+ and tr = rtime "tags" (tags a) rs
+ and tll = rtime "tags" (tags a) lls
+ and trr = rtime "tags" (tags a) rrs
+ in
+ let first =
+ if Ptset.mem Tag.pcdata (pt_cup tl tll)
+ then
+ rtime "Tree.text_below" (Tree.text_below) t
else
- let s2,res2 = accepting_among2 a t2 r2 res1
+ let etl = Ptset.is_empty tl
+ and etll = Ptset.is_empty tll
in
- let formula = eval_dir `Right formula s2
+ if etl && etll
+ then Tree.mk_nil t
+ else
+ if etl then rtime "Tree.tagged_desc_only" (Tree.tagged_desc_only t) tll
+ else if etll then rtime "Tree.first_child" (Tree.first_child) t
+ else (* add child only *)
+ rtime "Tree.tagged_below" (Tree.tagged_below t tl) tll
+ and next =
+ if Ptset.mem Tag.pcdata (pt_cup tr trr)
+ then
+ rtime "Tree.text_next" (Tree.text_next t) ctx
+ else
+ let etr = Ptset.is_empty tr
+ and etrr = Ptset.is_empty trr
in
- if is_false formula then rest,res1
+ if etr && etrr
+ then Tree.mk_nil t
else
- orig,(if mark then TS.append t (res2)
- else res2)
+ if etr then rtime "Tree.tagged_foll_only" (Tree.tagged_foll_only t trr) ctx
+ else if etrr then rtime "Tree.next_sibling" (Tree.next_sibling) t
+ else (* add ns only *)
+ rtime "Tree.tagged_next" (Tree.tagged_next t tr trr) ctx
+
+ in
+ let s1,res1 = accepting_among_time a first (pt_cup ls lls) t
+ and s2,res2 = accepting_among_time a next (pt_cup rs rrs) ctx
+ in
+ let rb,rb1,rb2 = rtime "eval_form_bool" (eval_form_bool formula s1) s2 in
+ if rb
+ then
+ let res1 = if rb1 then res1 else TS.empty
+ and res2 = if rb2 then res2 else TS.empty
+ in r', rtime "TS.concat" (TS.concat res2) (if mark then rtime "TS.append" (TS.append t) res1 else res1),formula
+ else Ptset.empty,TS.empty,formula
+
+ in
+
+ among,result
+
+ else orig,TS.empty
+
+ let run_time a t =
+ let st,res = accepting_among_time a t a.init t in
+ let _ = Printf.eprintf "\n Timings\n";
+ let total_time = Hashtbl.fold (fun fname ({ contents=cpt }, {contents=atime}) (total_time) ->
+ Printf.eprintf "%s\t %i calls, %f ms accumulated time, %f ms mean time\n"
+ fname cpt atime (atime /. (float_of_int cpt));
+ total_time +. atime ) h_time 0.
+ in
+ Printf.eprintf "total calls %i, total monitored time %f ms\n%!" !calls total_time
+ in
+ if Ptset.is_empty (st) then TS.empty else res
- let rec accepting_among a t r =
+
+
+ let rec accepting_among a t r ctx =
let orig = r in
let rest = Ptset.inter r a.final in
let r = Ptset.diff r rest in
if Ptset.is_empty r then rest,TS.empty else
if Tree.is_node t
then
- let (r1,r2),formula,mark,has_true,r = get_trans t a (Tree.tag t) r
- in
- let s1,res1 = accepting_among a (Tree.first_child t) r1
- and s2,res2 = accepting_among a (Tree.next_sibling t) r2
- in
- let rb,rb1,rb2 = eval_form_bool formula s1 s2 in
- if rb
- then
- let res1 = if rb1 then res1 else TS.empty
- and res2 = if rb2 then res2 else TS.empty
- in r, TS.concat res2 (if mark then TS.cons t res1 else res1)
- else orig,TS.empty
+ let among,result,form =
+ let ((ls,lls),(rs,rrs)),formula,mark,has_true,r' =
+ let tag = Tree.tag t in
+ get_trans t a tag r
+ in
+ let tl = tags a ls
+ and tr = tags a rs
+ and tll = tags a lls
+ and trr = tags a rrs
+ in
+ let first =
+ if Ptset.mem Tag.pcdata (pt_cup tl tll)
+ then
+ Tree.text_below t
+ else
+ let etl = Ptset.is_empty tl
+ and etll = Ptset.is_empty tll
+ in
+ if etl && etll
+ then Tree.mk_nil t
+ else
+ if etl then Tree.tagged_desc_only t tll
+ else if etll then Tree.first_child t
+ else (* add child only *)
+ Tree.tagged_below t tl tll
+ and next =
+ if Ptset.mem Tag.pcdata (pt_cup tr trr)
+ then
+ Tree.text_next t ctx
+ else
+ let etr = Ptset.is_empty tr
+ and etrr = Ptset.is_empty trr
+ in
+ if etr && etrr
+ then Tree.mk_nil t
+ else
+ if etr then Tree.tagged_foll_only t trr ctx
+ else if etrr then Tree.next_sibling t
+ else (* add ns only *)
+ Tree.tagged_next t tr trr ctx
+
+ in
+ let s1,res1 = accepting_among a first (pt_cup ls lls) t
+ and s2,res2 = accepting_among a next (pt_cup rs rrs) ctx
+ in
+ let rb,rb1,rb2 = eval_form_bool formula s1 s2 in
+ if rb
+ then
+ let res1 = if rb1 then res1 else TS.empty
+ and res2 = if rb2 then res2 else TS.empty
+ in r', TS.concat res2 (if mark then TS.append t res1 else res1),formula
+ else Ptset.empty,TS.empty,formula
+
+ in
+ among,result
+
else orig,TS.empty
+
+ let run a t =
+ let st,res = accepting_among a t a.init t in
+ if Ptset.is_empty (st) then TS.empty else res
-
- let rec accepting_count a t r =
- let orig = r in
- let rest = Ptset.inter r a.final in
- let r = Ptset.diff r rest in
- if Ptset.is_empty r then rest,0 else
- if Tree.is_node t
- then
- let (r1,r2),formula,mark,has_true,r = get_trans t a (Tree.tag t) r
- in
- let s1,res1 = accepting_count a (Tree.first_child t) r1
- and s2,res2 = accepting_count a (Tree.next_sibling t) r2
- in
- let rb,rb1,rb2 = eval_form_bool formula s1 s2 in
- if rb
- then
- let res1 = if rb1 then res1 else 0
- and res2 = if rb2 then res2 else 0
- in r, res1+res2+(if mark then 1 else 0)
- else orig,0
- else orig,0
-
- let run a t =
-(* let _ =
- call := 0; miss:=0;
- timeref := 0.0;
- HFEval.clear hfeval;
- Hashtbl.clear dnf_hash;
- Hashtbl.clear fstate_pool;
- in *)
- let st,res = accepting_among a t a.init in
- let b = Ptset.is_empty (st) in
- if b then TS.empty
- else
- res
-
- let run_count a t =
-(* let _ =
- call := 0; miss:=0;
- timeref := 0.0;
- timerefall := 0.0;
- HFEval.clear hfeval;
- Hashtbl.clear dnf_hash;
- Hashtbl.clear fstate_pool;
- in *)
- let st,res = accepting_count a t a.init in
- let b = Ptset.is_empty (st) in
- if b then 0
- else
- res
- end
-
- module Jump = struct
- let eval_dir = BottomUpNew.eval_dir
- let xi1 = HTagSet.create 10
- let xi2 = HTagSet.create 10
-
- let rec accept_from orig a t r acc =
- if (Tree.is_root t) ||
- (Ptset.subset orig r)
- then
- acc
- else
- let is_left = Tree.is_left t in
- let tag = Tree.tag t in
- let nr,f, mark =
- try
- HTagSet.find (if is_left then xi1 else xi2)
- (r,tag)
- with
- | Not_found ->
- let trans =
- Hashtbl.fold
- (fun q l acc ->
- List.fold_left (fun ((racc,facc,macc) as acc) (ts,(m,f,_)) ->
- let rl,rr = f.st in
- if (TagSet.mem tag ts) &&
- (Ptset.intersect (if is_left then rl else rr) r)
- then (Ptset.add q racc,or_ f facc, macc||m)
- else acc) acc l)
- a.phi (Ptset.empty,false_,false)
- in
- HTagSet.add (if is_left then xi1 else xi2) (r,tag) trans;
- trans
- in
- let form = eval_dir (if is_left then `Left else `Right) f r
- in
- if is_true form then accept_from orig a (Tree.parent t) nr
- (if mark then TS.cons t acc else acc)
- else if is_false form then TS.empty
- else assert false
-
- let run a t r =
- HTagSet.clear xi1;
- HTagSet.clear xi2;
- let orig =
- List.fold_left (fun s (_,(_,f,_)) ->
- Ptset.union s (fst f.st))
- Ptset.empty (Hashtbl.find a.phi (Ptset.choose a.init))
- in
- accept_from orig a t r TS.empty
-
+(*
end
+*)
| True
| Or of formula * formula
| And of formula * formula
- | Atom of ([ `Left | `Right ] * bool * state)
-and formula = { fid : int; pos : formula_expr; neg : formula; st : Ptset.t*Ptset.t; size: int;}
+ | Atom of ([ `Left | `Right | `LLeft | `RRight ] * bool * state)
+and formula = { fid : int; fkey : int; pos : formula_expr; neg : formula; st : (Ptset.t*Ptset.t)*(Ptset.t*Ptset.t); size: int;}
val true_ : formula
val false_ : formula
-val atom_ : [`Left | `Right ] -> bool -> state -> formula
+val atom_ : [`Left | `Right | `LLeft | `RRight ] -> bool -> state -> formula
val and_ : formula -> formula -> formula
val or_ : formula -> formula -> formula
val not_ : formula -> formula
-val equal_form : formula -> formula -> bool
+(*val equal_form : formula -> formula -> bool *)
val pr_frm : Format.formatter -> formula -> unit
val ( >=> ) : state*(TagSet.t*bool*predicate) -> formula -> t
val ( +| ) : formula -> formula -> formula
val ( *& ) : formula -> formula -> formula
-val ( ** ) : [`Left | `Right ] -> state -> formula
+val ( ** ) : [`Left | `Right | `LLeft | `RRight ] -> state -> formula
end
type transition = Transitions.t
-val equal_trans : transition -> transition -> bool
+val equal_trans : transition -> transition -> bool
module TS : sig
type t
val to_list_rev : t -> Tree.Binary.t list
val length : t -> int
val iter : (Tree.Binary.t -> unit) -> t -> unit
+ val rev_iter : (Tree.Binary.t -> unit) -> t -> unit
+ val find : (Tree.Binary.t -> bool) -> t -> Tree.Binary.t
end
-module BottomUpNew :
-sig
+(*module BottomUpJumpNew :
+sig *)
val run : t -> Tree.Binary.t -> TS.t
- val run_count : t -> Tree.Binary.t -> int
-end
+ val run_time : t -> Tree.Binary.t -> TS.t
+(*end *)
-module Jump :
-sig
- val run : t -> Tree.Binary.t -> Ptset.t -> TS.t
-end
DEFINE D(x) = ignore(x);
-
DEFINE MM(v,l) = (let ____x = v in (Memory.register ____x (Loc.to_string (l)));____x)
let () = Memory.schedule_stats ()
sig
type elt
type t
+ type set
val empty : t
val any : t
val is_empty : t -> bool
val choose : t -> elt
val hash : t -> int
val equal : t -> t -> bool
+ val positive : t -> set
+ val negative : t -> set
end
-module Make (E : Sigs.Set) : S with type elt = E.elt =
+module Make (E : Sigs.Set) : S with type elt = E.elt and type set = E.t =
struct
type elt = E.elt
type t = Finite of E.t | CoFinite of E.t
-
+ type set = E.t
let empty = Finite E.empty
let any = CoFinite E.empty
function Finite x -> (E.hash x)
| CoFinite x -> ( ~-(E.hash x) land max_int)
+ let positive =
+ function
+ | Finite x -> x
+ | _ -> E.empty
+
+ let negative =
+ function
+ | CoFinite x -> x
+ | _ -> E.empty
+
end
sig
type elt
type t
+ type set
val empty : t
val any : t
val is_empty : t -> bool
val choose : t -> elt
val hash : t -> int
val equal : t -> t -> bool
+ val positive : t -> set
+ val negative : t -> set
end
-module Make : functor (E : Sigs.Set) -> S with type elt = E.elt
+module Make : functor (E : Sigs.Set) -> S with type elt = E.elt and type set = E.t
open Ata
-
let l = ref [] ;;
let time f x =
let t1 = Unix.gettimeofday () in
;;
let total_time () = List.fold_left (+.) 0. !l;;
-let poa = TagSet.add Tag.pcdata (TagSet.singleton Tag.attribute)
-
-let rec fill_hashtag t =
- if Tree.Binary.is_node t then
- begin
- let tag = Tree.Binary.tag t in
- let a =
- if TagSet.mem tag poa
- then 0
- else
- fill_hashtag (Tree.Binary.first_child t)
- in
- let b = fill_hashtag (Tree.Binary.next_sibling t)
- in a+b+1
- end
- else 0
-
-
-let test_slashslash tree k =
- let test =
- match k with "*" -> TagSet.remove (Tag.tag "") TagSet.star
- | s -> TagSet.singleton (Tag.tag k)
- in
- let attorstring = TagSet.cup TagSet.pcdata TagSet.attribute in
- let rec aux t acc =
- if Tree.Binary.is_node t
- then
- let tag = Tree.Binary.tag t in
- let l = Tree.Binary.first_child t
- and r = Tree.Binary.next_sibling t
- in
- let acc =
- if TagSet.mem tag test
- then
- TS.append t acc
- else
- acc
- in
- let rl = if TagSet.mem tag attorstring then acc else aux l acc
- in aux r rl
- else
- acc
- in
- let _ = Printf.eprintf "Testing optimal //%s ... " k in
- let r = time (aux tree ) TS.empty in
- Printf.eprintf "Result set is %i nodes\n%!" (TS.length r)
-
-
-let test_jump tree k =
- let ttag = Tag.tag k in
-
- let rec loop acc tree =
- if Tree.Binary.is_node tree
- then
- let acc = TS.cons tree acc in
- loop acc (Tree.Binary.tagged_foll tree ttag)
- else
- acc
-
- in
- let _ = Printf.eprintf "Testing jumping for tag %s ... " k in
- let r = time (loop TS.empty ) (Tree.Binary.tagged_next tree ttag) in
- Printf.eprintf "Result set is %i nodes\n%!" (TS.length r)
-
-
-
-let test_traversal tree k =
- let ttag = Tag.tag k in
- let iid t = if Tree.Binary.is_node t then Tree.Binary.id t else -1 in
- let rec aux t =
- if Tree.Binary.is_node t
- then
- let tag = Tree.Binary.tag t in
- let l = Tree.Binary.first_child t
- and r = Tree.Binary.next_sibling t
- in
- let _ = Printf.eprintf "Tree with id %i and tag=%s, tagged_desc %s is %i tagged_foll is %i\n%!"
- (Tree.Binary.id t) (Tag.to_string tag) (k)
- (iid (Tree.Binary.tagged_desc t ttag))
- (iid (Tree.Binary.tagged_foll t ttag))
- in
- aux l;
- aux r;
-
- else
- ()
- in
- aux tree
-
-
-
-let test_count_subtree tree k =
- let ttag = Tag.tag k in
- let _ = Printf.eprintf "Counting subtrees with tag %s ... %!" k in
- let r = time(Tree.Binary.subtree_tags tree) ttag in
- Printf.eprintf "%i nodes \n%!" r
-
-
-let test_contains tree s =
- let _ = Printf.eprintf "Fetching DocIds containing %s ... %!" s in
- time (fun s -> let r = Tree.Binary.contains tree s in
- Tree.Binary.DocIdSet.iter
- (fun t -> output_string stderr
- (Tree.Binary.get_string tree t);
- output_char stderr '\n') r ) s
-
-
-let test_count_contains tree s =
- let _ = Printf.eprintf "Counting DocIds containing %s ... %!" s in
- let r = time (Tree.Binary.count_contains tree) s in
- Printf.eprintf "%i documents ids\n%!" (r)
-
-let test_contains_old tree s =
- let _ = Printf.eprintf "Fetching (old) DocIds containing %s ... %!" s in
- let r = time (Tree.Binary.contains_old tree) s in
- Printf.eprintf "%i documents ids\n%!" (Tree.Binary.DocIdSet.cardinal r)
-
-let test_contains_iter tree s =
- let _ = Printf.eprintf "Fetching (old) DocIds containing %s ... %!" s in
- let r = time (Tree.Binary.contains_iter tree) s in
- Printf.eprintf "%i documents ids\n%!" (Tree.Binary.DocIdSet.cardinal r)
-
-module Stack =
-struct
- type t = { mutable table: Tree.Binary.t array;
- mutable top : int }
-
- let empty = { table = Array.make 0 (Obj.magic 0);
- top = 0 }
- let cons e s =
- let ls = Array.length s.table in
- if ls > s.top
- then
- begin
- s.table.(s.top) <- e;
- s.top <- s.top + 1;
- s
- end
- else
-
- let a = Array.make (ls * 2 + 1) (Tree.Binary.root e)
- in
- Array.blit s.table 0 a 0 ls;
- s.table <- a;
- s.table.(s.top) <- e;
- s.top <- s.top + 1;
- s
-
-end
-
-let test_fast tree =
- let rec aux t acc =
- if Tree.Binary.is_node t
- then
- aux (Tree.Binary.right t)( aux (Tree.Binary.left t) (Stack.cons t acc))
- else acc
- in let _ = Printf.eprintf "Fast traversal ...%!" in
- time (aux tree) Stack.empty
-
-let test_cps tree =
- let rec aux t acc cont =
- if Tree.Binary.is_node t
- then aux (Tree.Binary.left t) (Stack.cons t acc) ((Tree.Binary.right t)::cont)
- else match cont with
- | [] -> acc
- | p::r -> aux p acc r
- in
- let _ = Printf.eprintf "CPS traversal ...%!" in
- time (aux tree Stack.empty) []
-
let main v query output =
let _ = Tag.init (Tree.Binary.tag_pool v) in
time (fill_hashtag) v;
*)
Printf.eprintf "Compiling query : ";
- let auto,_ = time XPath.Compile.compile query in
+ let auto,ltags,contains = time XPath.Compile.compile query in
+ let _ = Ata.dump Format.err_formatter auto in
+ let _ = Printf.eprintf "%!" in
+ let _ = match contains with
+ None -> ()
+ | Some s -> Tree.Binary.init_contains v s
+ in
Printf.eprintf "Execution time %s : " (if !Options.count_only then "(counting only)" else "");
begin
if !Options.count_only then
- let result = time (BottomUpNew.run_count auto) v
- in
- Printf.eprintf "Number of nodes in the result set : %i\n" result
- else
- let result = time (BottomUpNew.run auto) v in
- Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result);
+ failwith "Count only not implemented in this version"
+ else
+ let _ = Gc.set ({ Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 }) in
+ let result = time (if !Options.time then run_time auto else run auto) v in
+ Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result);
+ Printf.eprintf "\n%!";
begin
match output with
| None -> ()
time( fun () ->
let oc = open_out f in
output_string oc "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
- TS.iter (fun t -> output_string oc "----------\n";
+ TS.rev_iter (fun t -> output_string oc "----------\n";
Tree.Binary.print_xml_fast oc t;
output_char oc '\n') result) ();
end;
let output_file = ref None
let save_file = ref ""
let count_only = ref false
-
+let time = ref false
let usage_msg = Printf.sprintf "%s <input.{xml|srx}> 'query' [output]" Sys.argv.(0)
| 2 -> output_file := Some s; incr pos
| _ -> raise (Arg.Bad(s))
-let spec = [ "-c", Arg.Set(count_only), "counting only (don't materialize the result set";
+let spec = [ "-c", Arg.Set(count_only), "counting only (don't materialize the result set)";
+ "-t", Arg.Set(time), "print timing statistics";
"-f", Arg.Set_int(sample_factor), "sample factor [default=64]";
"-i", Arg.Set(index_empty_texts), "index empty texts [default=false]";
"-d", Arg.Set(disable_text_collection), "disable text collection[default=false]";
val input_file : string ref
val output_file : string option ref
val save_file : string ref
-
+val time : bool ref
type t = { id : int;
key : int; (* hash *)
- node : node }
+ node : node;
+ }
and node =
| Empty
| Leaf of int
| Branch of int * int * t * t
+
+(* faster if outside of a module *)
+let hash_node x = match x with
+ | Empty -> 0
+ | Leaf i -> (i+1) land max_int
+ (* power of 2 +/- 1 are fast ! *)
+ | Branch (b,i,l,r) ->
+ ((b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key
+ + (r.key lsl 7) - r.key) land max_int
+
module Node =
struct
type _t = t
type t = _t
- let hash x = x.key
- let hash_node = function
- | Empty -> 0
- | Leaf i -> i+1
- (* power of 2 +/- 1 are fast ! *)
- | Branch (b,i,l,r) ->
- (b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key
- + (r.key lsl 7) - r.key
- let hash_node x = (hash_node x) land max_int
- let equal x y = match (x.node,y.node) with
+ external hash : t -> int = "%field1"
+ let equal x y =
+ if x.id == y.id || x.key == y.key || x.node == y.node then true
+ else
+ match (x.node,y.node) with
| Empty,Empty -> true
| Leaf k1, Leaf k2 when k1 == k2 -> true
| Branch(p1,m1,l1,r1), Branch(p2,m2,l2,r2) when m1==m2 && p1==p2 &&
let rec norm n =
let v = { id = gen_uid ();
- key = Node.hash_node n;
+ key = hash_node n;
node = n }
in
WH.merge pool v
let hash s = s.key
let from_list l = List.fold_left (fun acc i -> add i acc) empty l
+
+type int_vector
+
+external int_vector_alloc : int -> int_vector = "caml_int_vector_alloc"
+external int_vector_set : int_vector -> int -> int -> unit = "caml_int_vector_set"
+external int_vector_length : int_vector -> int = "caml_int_vector_length"
+external int_vector_empty : unit -> int_vector = "caml_int_vector_empty"
+
+let empty_vector = int_vector_empty ()
+
+let to_int_vector_ext s =
+ let l = cardinal s in
+ let v = int_vector_alloc l in
+ let i = ref 0 in
+ iter (fun e -> int_vector_set v !i e; incr i) s;
+ v
+
+let hash_vectors = Hashtbl.create 4097
+
+let to_int_vector s =
+ try
+ Hashtbl.find hash_vectors s.key
+ with
+ Not_found ->
+ let v = to_int_vector_ext s in
+ Hashtbl.add hash_vectors s.key v;
+ v
+
+
val hash : t -> int
val from_list : int list -> t
+
+type int_vector
+val to_int_vector : t -> int_vector
+
val to_string : t -> string
val compare : t -> t -> int
val equal : t -> t -> bool
+val nullt : t
val dump : Format.formatter -> t -> unit
val check : t -> unit (* Check internal invariants *)
let hash = Hashtbl.hash
end
*)
-include FiniteCofinite.Make(Ptset)
+module M : FiniteCofinite.S with type elt = Tag.t and type set = Ptset.t =
+ FiniteCofinite.Make(Ptset)
+include M
+
let tag t = singleton t
let pcdata = singleton Tag.pcdata
let attribute = singleton Tag.attribute
let star = diff any (cup pcdata attribute)
let node = neg attribute
+
(* Distributed under the terms of the LGPL (see LICENCE) *)
(******************************************************************************)
-include FiniteCofinite.S with type elt = Tag.t
+include FiniteCofinite.S with type elt = Tag.t and type set = Ptset.t
val tag : Tag.t -> t
val pcdata : t
<?xml version="1.0"?>
-<a>foo<b/><b/><c><d/></c><b/></a>
+<b>foo
+ <e/>
+ <f>
+ <c>
+ <a><g/> </a>
+ </c>
+ <d> foobar
+ <a/>
+ </d>
+ <e>
+ <a> </a>
+ </e>
+ <c>
+ <g>
+ <a/>
+ foo <a/>
+ </g>
+ </c>
+ </f>
+ foo
+</b>
query=`cat $i`
xqueryorig="xpath-pt/xquery/$querybase".xql
-cat $xqueryorig | sed -e "s/doc()/doc(\"xmark_tiny.xml\")/g" >tmp.xql
-../main xmark_tiny.xml "$query" results/"$querybase".sxsi
+cat $xqueryorig | sed -e "s/doc()/doc(\"XMark_1.04.xml\")/g" >tmp.xql
+../main XMark_1.04.srx "$query" results/"$querybase".sxsi
saxonb-xquery -o:results/"$querybase".saxon tmp.xql
echo >> results/"$querybase".saxon
diff -w results/"$querybase".sxsi results/"$querybase".saxon >/dev/null || echo $querybase gives different results
val has_tagged_foll : t -> Tag.t -> bool
val tagged_desc : t -> Tag.t -> t
val tagged_foll : t -> Tag.t -> t
+ val tagged_below : t -> Ptset.t -> Ptset.t -> t
+ val tagged_next : t -> Ptset.t -> Ptset.t -> t -> t
+ val tagged_desc_only : t -> Ptset.t -> t
+ val tagged_foll_only : t -> Ptset.t -> t -> t
+ val text_below : t -> t
+ val text_next : t -> t -> t
val init_tagged_next : t -> Tag.t -> unit
- val tagged_next : t -> Tag.t -> t
val subtree_tags : t -> Tag.t -> int
val is_left : t -> bool
val print_id : Format.formatter -> t -> unit
val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit
+ val init_contains : t -> string -> unit
+ val mk_nil : t -> t
+ val test_jump : t -> Tag.t -> unit
end
module XML =
external parent : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent"
external parent_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc"
+ external prev_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc"
external first_child : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child"
external my_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text"
external next_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text"
-
+ external doc_ids : t -> [`Tree ] node -> [`Text ] node * [`Text ] node = "caml_xml_tree_doc_ids"
external text_xml_id : t -> [`Text ] node -> int = "caml_xml_tree_text_xml_id"
external node_xml_id : t -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id"
external is_ancestor : t -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor"
external tagged_desc : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc"
external tagged_foll : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_foll"
- external tagged_next : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_next"
external subtree_tags : t -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags"
+ external tagged_below : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_tagged_below"
+ external tagged_desc_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_tagged_desc_only"
+ external tagged_next : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_next"
+ external tagged_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only"
+ external tagged_desc_or_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only"
+
+ let test_jump tree tag =
+ let rec loop id ctx =
+ if id != nil
+ then
+ let first = tagged_desc tree id tag
+ and next = tagged_desc tree id tag
+ in
+ loop first id;
+ loop next ctx
+ in
+ loop (root tree) (root tree)
-
-
+
let test_xml_tree ppf tags v =
let pr x = Format.fprintf ppf x in
let rec aux id =
(tag_id v id)
(Tag.to_string (tag_id v id))
(node_xml_id v id);
- pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i\n%!"
+ pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i PrevDoc(next_text)=%i\n%!"
(int_of_node (prev_text v id))
(Text.get_text v (prev_text v id))
(int_of_node (my_text v id))
(Text.get_text v (my_text v id))
(int_of_node (next_text v id))
(Text.get_text v (next_text v id))
- (int_of_node(parent_doc v (my_text v id)));
- pr "Testing Tagged*\n%!";
- Ptset.iter (fun t ->
- let str = Tag.to_string t in
+ (int_of_node(parent_doc v (my_text v id)))
+ (int_of_node(prev_doc v (next_text v id)));
+ let i1,i2 = doc_ids v id in
+ pr "Testing DocIds below (%i,%i)*\n%!"
+ (int_of_node i1) (int_of_node i2);
+ pr "Testing Tagged*\n%!";
+ Ptset.iter (fun t ->
+ let str = Tag.to_string t in
if Tag.pcdata <> t
then begin
pr "Tag: %s : \n%!" str;
let dump { doc=t } = Tree.print_skel t
let test_xml_tree ppf tags { doc=t } = Tree.test_xml_tree ppf tags t
+ let test_jump { doc=t } tag = Tree.test_jump t tag
+ let contains_array = ref [| |]
+
+ let init_contains t s =
+ let a = Text.contains t.doc s
+ in
+ Array.fast_sort (compare) a;
+ contains_array := a
+
+
module DocIdSet = struct
include Set.Make (struct type t = string_content
| Node (NC t) -> Printf.sprintf "Node (NC %i)" (int_of_node t)
| Node (SC (t,i)) -> Printf.sprintf "Node (SC (%i,%i))" (int_of_node t) (int_of_node i)
-
+ let mk_nil t = { t with node = Nil }
let root n = { n with node = norm (Tree.root n.doc) }
let is_root n = match n.node with
| Node(NC t) -> (int_of_node t) == 0
| { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_desc d n tag) }
| _ -> { t with node=Nil }
-(*
- let tagged_next t tag =
- if tag == Tag.attribute || tag == Tag.pcdata then failwith "tagged_next"
- else
- match tagged_desc t tag with
- | { doc = d; node=Nil } -> tagged_foll t tag
- | x -> x
-*)
+
+ let tagged_next t tb tf s =
+ match s with
+ | { node = Node (NC(below)) } -> begin
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ { t with node= norm (tagged_next d n (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) }
+ | { doc = d; node=Node(SC (i,n) ) } when is_nil n ->
+ let p = parent_doc d i in
+ { t with node= norm (tagged_next d p (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) }
+ | { doc = d; node=Node(SC (_,n) ) } ->
+ if Ptset.mem (tag_id d n) (Ptset.union tb tf)
+ then { t with node=Node(NC(n)) }
+ else
+ let vb = Ptset.to_int_vector tb in
+ let vf = Ptset.to_int_vector tf in
+ let node =
+ let dsc = tagged_below d n vb vf in
+ if is_nil dsc
+ then tagged_next d n vb vf below
+ else dsc
+ in
+ { t with node = norm node }
+ | _ -> {t with node=Nil }
+ end
+
+ | _ -> {t with node=Nil }
+
+ let tagged_foll_only t tf s =
+ match s with
+ | { node = Node (NC(below)) } -> begin
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ { t with node= norm (tagged_foll_only d n (Ptset.to_int_vector tf) below) }
+ | { doc = d; node=Node(SC (i,n) ) } when is_nil n ->
+ let p = parent_doc d i in
+ { t with node= norm (tagged_foll_only d p (Ptset.to_int_vector tf) below) }
+ | { doc = d; node=Node(SC (_,n) ) } ->
+ if Ptset.mem (tag_id d n) tf
+ then { t with node=Node(NC(n)) }
+ else
+ let vf = Ptset.to_int_vector tf in
+ let node =
+ let dsc = tagged_desc_only d n vf in
+ if is_nil dsc
+ then tagged_foll_only d n vf below
+ else dsc
+ in
+ { t with node = norm node }
+ | _ -> {t with node=Nil }
+ end
+
+ | _ -> {t with node=Nil }
+
+
+ let tagged_below t tc td =
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ let vc = Ptset.to_int_vector tc
+ in
+ let vd = Ptset.to_int_vector td
+ in
+ { t with node= norm(tagged_below d n vc vd) }
+ | _ -> { t with node=Nil }
+
+ let tagged_desc_only t td =
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ let vd = Ptset.to_int_vector td
+ in
+ { t with node= norm(tagged_desc_only d n vd) }
+ | _ -> { t with node=Nil }
+
+
+ let array_find a i j =
+ let l = Array.length a in
+ let rec loop idx x y =
+ if x > y || idx >= l then Text.nil
+ else
+ if a.(idx) >= x then if a.(idx) > y then Text.nil else a.(idx)
+ else loop (idx+1) x y
+ in
+ if a.(0) > j || a.(l-1) < i then Text.nil
+ else loop 0 i j
+
+
+ let text_below t =
+ let l = Array.length !contains_array in
+ if l = 0 then { t with node=Nil }
+ else
+ match t with
+ | { doc = d; node=Node(NC n) } ->
+ let i,j = doc_ids t.doc n in
+ let id = array_find !contains_array i j
+ in
+ if id == Text.nil then
+ { t with node=Nil }
+ else
+ {t with node = Node(SC(id, Tree.next_sibling d (Tree.prev_doc d id))) }
+ | _ -> { t with node=Nil }
+
+ let text_next t root =
+ let l = Array.length !contains_array in
+ if l = 0 then { t with node=Nil }
+ else
+ let inf = match t with
+ | { doc =d; node = Node(NC n) } -> snd(doc_ids d n)+1
+ | { node = Node(SC(i,_)) } -> i+1
+ | _ -> assert false
+ in
+ match root with
+ | { doc = d; node= Node (NC n) } ->
+ let _,j = doc_ids t.doc n in
+
+ let id = array_find !contains_array inf j
+ in
+ if id == Text.nil then { doc = d; node= Nil }
+ else
+ {doc = d; node = Node(SC(id,Tree.next_sibling d (Tree.prev_doc d id))) }
+ | _ -> { t with node=Nil}
+
+
+
let subtree_tags t tag =
match t with
{ doc = d; node = Node(NC n) } ->
- let tagged_next t tag =
+(* let tagged_next t tag =
if !idx >= Array.length !tagged_desc_array
then {t with node=Nil}
else
let r = !tagged_desc_array.(!idx)
in
incr idx; r
-
+*)
let has_tagged_foll t tag = is_node (tagged_foll t tag)
val has_tagged_foll : t -> Tag.t -> bool
val tagged_desc : t -> Tag.t -> t
val tagged_foll : t -> Tag.t -> t
+ val tagged_below : t -> Ptset.t -> Ptset.t -> t
+ val tagged_next : t -> Ptset.t -> Ptset.t -> t -> t
+ val tagged_desc_only : t -> Ptset.t -> t
+ val tagged_foll_only : t -> Ptset.t -> t -> t
+ val text_below : t -> t
+ val text_next : t -> t -> t
val init_tagged_next : t -> Tag.t -> unit
- val tagged_next : t -> Tag.t -> t
val subtree_tags : t -> Tag.t -> int
val is_left : t -> bool
val print_id : Format.formatter -> t -> unit
val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit
+ val init_contains : t -> string -> unit
+ val mk_nil : t -> t
+ val test_jump : t -> Tag.t -> unit
end
module Binary : BINARY
let rec token = lexer
| [' ' '\t'] -> token lexbuf
| "text()" | "node()" | "and" | "not" | "or"
+ | "contains" | "contains_full"
| "self" | "descendant" | "child" | "descendant-or-self"
| "attribute" | "following-sibling" | "preceding-sibling"
| "parent" | "ancestor" | "ancestor-or-self" | "preceding" | "following"
try
Tree.Binary.parse_xml_uri Sys.argv.(1)
with
- | _ ->
- Printf.printf "Error parsing document\n";
- exit 2
+ | _ ->(
+ try
+ Tree.Binary.load Sys.argv.(1)
+ with
+ | _ ->
+ Printf.printf "Error parsing document\n";
+ exit 2)
;;
let _ = Tag.init (Tree.Binary.tag_pool doc)
;;
-let tags = (Ptset.add (Tag.tag "foo") (collect_tags doc))
+let tags = (collect_tags doc)
;;
-
+(*
let _ = Tree.Binary.test_xml_tree Format.std_formatter tags doc
;;
+let _ = Printf.printf "Testing //a with jumping\n"
+;;
+*)
+let rec test_a dir t acc ctx =
+ if Tree.Binary.is_node t
+ then
+ let acc =
+ if (Tree.Binary.tag t) == (Tag.tag "a")
+ then Ata.TS.cons t acc
+ else acc
+ in
+ let first = Tree.Binary.tagged_below t Ptset.empty (Ptset.singleton (Tag.tag "a"))
+ and next = Tree.Binary.tagged_next t Ptset.empty (Ptset.singleton (Tag.tag "a")) ctx
+ in
+ let _ =
+ Printf.printf "t is :";
+ Tree.Binary.print_xml_fast stdout t;
+ Printf.printf " called from %s of " (if dir then "below" else "next");
+ Tree.Binary.print_xml_fast stdout ctx;
+ if (Tree.Binary.is_node next)
+ then begin
+ Printf.printf ", Next a is %!";
+ Tree.Binary.print_xml_fast stdout next;
+ end
+ else
+ Printf.printf ", Next a is empty!";
+ print_newline();
+ in
+ test_a false next (test_a true first acc t) t
+ else acc
+;;
+
+let rec test_text dir t acc ctx =
+ if Tree.Binary.is_node t
+ then
+ let acc =
+ if (Tree.Binary.tag t) == (Tag.pcdata)
+ then Ata.TS.cons t acc
+ else acc
+ in
+ let first = Tree.Binary.text_below t
+ and next = Tree.Binary.text_next t ctx
+ in
+ (*
+ let _ =
+ Printf.printf "t is :";
+ Tree.Binary.print_xml_fast stdout t;
+ Printf.printf " called from %s of " (if dir then "below" else "next");
+ Tree.Binary.print_xml_fast stdout ctx;
+ if (Tree.Binary.is_node first)
+ then begin
+ Printf.printf "First (text) is %!";
+ Tree.Binary.print_xml_fast stdout first;
+ end
+ else
+ Printf.printf "First (text) is empty!";
+ if (Tree.Binary.is_node next)
+ then begin
+ Printf.printf ", Next (text) is %!";
+ Tree.Binary.print_xml_fast stdout next;
+ end
+ else
+ Printf.printf ", Next (text) is empty!";
+ print_newline();
+ in *)
+ test_text false next (test_text true first acc t) ctx
+ else acc
+;;
+(*
+let r = test_a true doc Ata.TS.empty doc;;
+(*
+let _ = Printf.printf "==> %i nodes\n" (Ata.TS.length r)
+let _ = Ata.TS.iter (fun t -> Tree.Binary.print_xml_fast stdout t; print_newline();) r
+
+*)
+let _ = Tree.Binary.init_contains doc "car"
+
+let r = test_text true doc Ata.TS.empty doc
+let _ = Printf.printf "==> %i nodes\n" (Ata.TS.length r)
+(* let _ = Ata.TS.iter (fun t -> Tree.Binary.print_xml_fast stdout t; print_newline();) r *)
+;;
+
+*)
+let time f x =
+ let t1 = Unix.gettimeofday () in
+ let r = f x in
+ let t2 = Unix.gettimeofday () in
+ let t = (1000. *.(t2 -. t1)) in
+ Printf.eprintf " %fms\n%!" t ;
+ r
+;;
+let _ = Printf.eprintf "Timing jump //keyword ... "
+let _ = time Tree.Binary.test_jump doc (Tag.tag "keyword")
(******************************************************************************)
INCLUDE "debug.ml";;
#load "pa_extend.cmo";;
-
+let contains = ref None
module Ast =
struct
(* The steps are in reverse order !!!! *)
| [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ]
| [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ]
+| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [
+ let _ = contains := Some(s) in (Child,TagSet.singleton Tag.pcdata, p)]
+ ]
+| [ "contains_full"; "(" ; s = STRING ; ")";p=top_pred -> [
+ let _ = contains := Some(s) in (Descendant,TagSet.singleton Tag.pcdata, p)]
+ ]
| [ test = test; p = top_pred -> [(Child,test, p)] ]
| [ att = ATT ; p = top_pred ->
match att with
tr_parent_loop : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
tr : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
tr_aux : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
+ mutable entry_points : (Tag.t*Ptset.t) list;
+ mutable contains : string option;
}
let dummy_conf = { st_root = -1;
st_univ = -1;
tr_parent_loop = Hashtbl.create 0;
tr = Hashtbl.create 0;
tr_aux = Hashtbl.create 0;
+ entry_points = [];
+ contains = None;
}
let _r =
function (`Left|`Last) -> `Right
| `Right -> `Left
-let _l = function (`Left|`Last) -> `Left
- | `Right -> `Right
+ | `RRight -> `LLeft
+ | `LLeft -> `RRight
+
+
+let _l =
+ function (`Left|`Last) -> `Left
+ | `Right -> `Right
+ | `RRight -> `RRight
+ | `LLeft -> `LLeft
open Ata.Transitions
| (x,z::y) ::r -> z,(x,y)::r
| _-> assert false
-let rec compile_step ?(existential=false) conf q_src dir ctx_path step num =
+let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num =
let ex = existential in
let axis,test,pred = step in
let is_last = dir = `Last in
let p_st, p_anc, p_par, p_pre, p_num, p_f =
compile_pred conf q_src num ctx_path dir pred q_dst
in
-
let new_st,new_dst, new_ctx =
match axis with
- | Child | FollowingSibling | Descendant | DescendantOrSelf ->
- let axis =
- if axis = DescendantOrSelf
- then
- begin
- or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential));
- Descendant
- end
- else axis
+
+ | Child | Descendant ->
+ let left,right =
+ if nrec then `LLeft,`RRight
+ else `Left,`Right
in
+
let t1 = ?< q_src><(test, is_last && not(ex))>=>
- p_f *& (if is_last then Ata.true_ else (_l dir) ** q_dst) in
+ p_f *& ( if false (*is_last*) then Ata.true_ else (_l left) ** q_dst) in
let _ = add_trans num conf.tr t1 in
let _ = if axis=Descendant then
add_trans num conf.tr_aux (
- ?< q_src><@ ((if ex then TagSet.diff TagSet.star test
+ ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test
else TagSet.star),false,
- if TagSet.is_finite test
- then `Left(fun t ->
- if (Tree.Binary.is_node t)
- then
- let mytag = Tree.Binary.tag t in
- TagSet.exists (fun tag ->
- tag == mytag ||
- Tree.Binary.has_tagged_desc t tag
- )
- test
- else true
- )
-
- else `True )>=> `Left ** q_src )
+ `True )>=> `LLeft ** q_src )
in
let t3 =
?< q_src><@ ((if ex then TagSet.diff TagSet.any test
- else TagSet.any), false,
- if axis=Descendant&&TagSet.is_finite test
- then `True (*`Right(fun t ->
- TagSet.exists (fun tag -> Tree.Binary.has_tagged_foll t tag)
- test) *)
- else `True )>=>
- if ex then ( Ata.atom_ `Left false q_src) *& `Right ** q_src
- else `Right ** q_src
+ else TagSet.any), false, `True )>=>
+ if ex then ( Ata.atom_ `Left false q_src) *& right ** q_src
+ else (if axis=Descendant then `RRight else `Right) ** q_src
in
let _ = add_trans num conf.tr_aux t3
in
| Ancestor | AncestorOrSelf ->
conf.has_backward <- true;
let up_states, new_ctx =
- List.map (fst) ctx_path, (vpush q_root [])
+ List.fold_left (fun acc (q,_) -> if q == q_root then acc else q::acc) [] ctx_path, (vpush q_root [])
in
let _ = if axis = AncestorOrSelf then
or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential));
let fc = List.fold_left (fun f s -> ((_l dir)**s +|f)) Ata.false_ up_states
in
let t1 = ?< q_frm_root><(test,is_last && (not existential) )>=>
- (if is_last then Ata.true_ else (_l dir) ** q_dst) *& fc in
+ ( (*if is_last then Ata.true_ else *) (`LLeft ) ** q_dst) *& fc in
add_trans num conf.tr t1;
[q_dst ], q_dst, vpush q_frm_root new_ctx
(Ptset.elements (Ptset.union p_st (Ptset.from_list new_st)),
new_dst,
new_ctx)
-
+and is_rec = function
+ [] -> false
+ | ((axis,_,_),_)::_ ->
+ match axis with
+ Descendant | Ancestor -> true
+ | _ -> false
+
and compile_path ?(existential=false) annot_path config q_src states idx ctx_path =
List.fold_left
- (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward) (step,dir) ->
+ (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) ->
let add_states,new_dst,new_ctx =
- compile_step ~existential:existential config a_dst dir ctx_path step num
+ compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num
in
let new_states = Ptset.union (Ptset.from_list add_states) a_st in
let nanc_st,npar_st,npre_st,new_bw =
|(Parent|Ancestor|AncestorOrSelf),_,_ -> Ptset.add a_dst anc_st,par_st,pre_st,true
| _ -> anc_st,par_st,pre_st,has_backward
in
- new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw
+ new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r)
)
- (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false )
+ (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false,(List.tl annot_path) )
annot_path
-
+
and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst =
let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 =
compile_pred conf q_src idx ctx_path dir p1 ddst in
| Path (p) ->
let q = Ata.mk_state () in
let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in
- let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward =
+ let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ =
compile_path ~existential:true annot_path conf q states idx ctx_path
in
let ret_dir = match annot_path with
in
let steps = List.rev steps in
let dirsteps = dirannot steps in
+ let _ = Ata.mk_state() in
let config = { st_root = Ata.mk_state();
st_univ = Ata.mk_state();
final_state = Ptset.empty;
tr_parent_loop = Hashtbl.create 5;
tr = Hashtbl.create 5;
tr_aux = Hashtbl.create 5;
+ entry_points = [];
+ contains = None
}
in
let q0 = Ata.mk_state() in
add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ);
add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ);
*)
- let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward =
+ let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ =
compile_path dirsteps config q0 states 0 [(config.st_root,[]) ]
in
let fst_tr =
- ?< (config.st_root) >< (TagSet.star,false) >=>
- (`Left** q0) *& (if config.has_backward then `Left ** config.st_from_root else Ata.true_)
+ ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=>
+ ((if is_rec dirsteps then `LLeft else `Left)** q0) *& (if config.has_backward then `LLeft ** config.st_from_root else Ata.true_)
in
add_trans num config.tr fst_tr;
- if config.has_backward then begin
+ if config.has_backward then begin
add_trans num config.tr_aux
- (?< (config.st_from_root) >< (TagSet.star,false) >=> `Left ** config.st_from_root +|
- `Right ** config.st_from_root);
+ (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft ** config.st_from_root);
add_trans num config.tr_aux
- (?< (config.st_from_root) >< (TagSet.cup TagSet.pcdata TagSet.attribute,false) >=>
- `Right ** config.st_from_root);
+ (?< (config.st_from_root) >< (TagSet.any,false) >=>
+ `RRight ** config.st_from_root);
end;
let phi = Hashtbl.create 37 in
Ata.states = if has_backward then Ptset.add config.st_from_root a_st else a_st;
Ata.init = Ptset.singleton config.st_root;
Ata.final = Ptset.union anc_st config.final_state;
- Ata.universal = Ptset.union anc_st config.final_state;
+ Ata.universal = Ptset.singleton a_dst;
Ata.phi = phi;
Ata.delta = Hashtbl.create 17;
Ata.sigma = Ata.HTagSet.create 17;
- },[]
+ },config.entry_points,!contains
end
end
module Compile :
sig
-val compile : Ast.path -> Ata.t * (Tag.t*Ptset.t) list
+val compile : Ast.path -> Ata.t * (Tag.t*Ptset.t) list * string option
end