From: kim Date: Tue, 10 Mar 2009 00:30:18 +0000 (+0000) Subject: merge from branch stable-succint-jumping X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=dc91851aaeac91a71eba2c266d0227adea0c5815;p=SXSI%2Fxpathcomp.git merge from branch stable-succint-jumping git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@231 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index 91daf4e..51eac54 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,12 @@ +INLINE=10 DEBUG=false PROFILE=true VERBOSE=false -BASESRC=custom.ml ptset.ml finiteCofinite.ml tag.ml tagSet.ml options.ml tree.ml -BASEMLI=sigs.mli ptset.mli finiteCofinite.mli tag.mli tagSet.mli options.mli tree.mli -MLSRCS = memory.ml $(BASESRC) ata.ml ulexer.ml xPath.ml main.ml -MLISRCS = memory.mli $(BASEMLI) ata.mli ulexer.mli xPath.mli +BASESRC=custom.ml ptset.ml finiteCofinite.ml tag.ml tagSet.ml options.ml tree.ml ata.ml +BASEMLI=sigs.mli ptset.mli finiteCofinite.mli tag.mli tagSet.mli options.mli tree.mli ata.mli +MLSRCS = memory.ml $(BASESRC) ulexer.ml xPath.ml main.ml +MLISRCS = memory.mli $(BASEMLI) ulexer.mli xPath.mli BASEOBJS= $(BASESRC:.ml=.cmx) BASEINT= $(BASEMLI:.ml=.cmi) MLOBJS = $(MLSRCS:.ml=.cmx) @@ -47,7 +48,7 @@ OCAMLOPT = ocamlopt -g -cc "$(CXX)" SYNT_DEBUG = -ppopt -DDEBUG else CXX = g++ -OCAMLOPT = ocamlopt -g -cc "$(CXX)" -ccopt -O3 -ccopt -std=c++0x -noassert -inline 10000 +OCAMLOPT = ocamlopt -g -cc "$(CXX)" -ccopt -O3 -ccopt -std=c++0x -noassert -inline $(INLINE) endif ifeq ($(PROFILE), true) SYNT_PROF = $(SYNT_DEBUG) -ppopt -DPROFILE diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index e1ddf83..f56b4a3 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -49,6 +49,7 @@ extern "C" void caml_init_ops () { return; } + extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){ CAMLparam1(uri); CAMLlocal1(doc); @@ -98,7 +99,7 @@ void traversal_rec(XMLTree* tree, treeNode id){ DocID tid; if (id == NULLT) return; - // int tag = tree->Tag(id); + //int tag = tree->Tag(id); if (id) { tid = tree->PrevText(id); char * data = (char *) (tree->getTextCollection())->GetText(tid); @@ -194,7 +195,7 @@ extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){ extern "C" CAMLprim value caml_xml_tree_root(value tree){ CAMLparam1(tree); - CAMLreturn (TREENODEVAL(XMLTREE(tree)->Root())); + CAMLreturn (Val_int(TREENODEVAL(XMLTREE(tree)->Root()))); } extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){ CAMLparam1(tree); @@ -204,11 +205,21 @@ extern "C" CAMLprim value caml_xml_tree_parent(value tree, value id){ CAMLparam2(tree,id); CAMLreturn(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){ + CAMLparam2(tree,id); + CAMLreturn(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id)))); +} + extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){ CAMLparam2(tree,id); CAMLreturn(Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id)))); } +extern "C" CAMLprim value caml_xml_tree_prev_doc(value tree, value id){ + CAMLparam2(tree,id); + CAMLreturn(Val_int (XMLTREE(tree)->PrevNode((DocID) Int_val(id)))); +} + extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) { CAMLparam3(tree,id1,id2); CAMLreturn(Val_bool (XMLTREE(tree)->IsAncestor(TREENODEVAL(id1),TREENODEVAL(id2)))); @@ -242,13 +253,6 @@ extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value CAMLreturn(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_next(value tree, value id, value tag){ - CAMLparam3(tree,id,tag); - CAMLreturn(Val_int (XMLTREE(tree)->TaggedNext(TREENODEVAL(id),(TagType) Int_val(tag)))); -} - - - extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){ CAMLparam3(tree,id,tag); @@ -261,11 +265,6 @@ extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ CAMLreturn(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); } -extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id)))); -} - extern "C" CAMLprim value caml_xml_tree_prev_text(value tree, value id){ CAMLparam2(tree,id); CAMLlocal1(res); @@ -341,3 +340,91 @@ extern "C" CAMLprim value caml_xml_tree_load(value filename,value samplerate){ memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); CAMLreturn(doc); } + +extern "C" { + static int caml_empty_vector[] = { 0 }; +} + +extern "C" CAMLprim value caml_int_vector_empty(value unit){ + CAMLparam1(unit); + CAMLreturn ((value) caml_empty_vector); +} + +extern "C" CAMLprim value caml_int_vector_length(value vec){ + CAMLparam1(vec); + CAMLreturn (Val_int( ((int*) caml_empty_vector)[0] )); +} +extern "C" CAMLprim value caml_int_vector_alloc(value len){ + CAMLparam1(len); + int * vec = (int *) malloc(sizeof(int)*(Int_val(len)+1)); + vec[0] = Int_val(len); + CAMLreturn ((value) vec); +} + +extern "C" CAMLprim value caml_int_vector_set(value vec, value i, value v){ + CAMLparam3(vec,i,v); + + ((int*) vec)[Int_val(i)+1] = Int_val(v); + CAMLreturn (Val_unit); +} + + +#define VECT(x) ((int*) (x)) +extern "C" CAMLprim value caml_xml_tree_tagged_below(value tree, value node, value ctags, value dtags){ + CAMLparam4(tree,node,ctags,dtags); + + CAMLreturn (Val_int ( + (XMLTREE(tree)->TaggedBelow(TREENODEVAL(node), + &(VECT(ctags)[1]), + VECT(ctags)[0], + &(VECT(dtags)[1]), + VECT(dtags)[0])))); +} + +extern "C" CAMLprim value caml_xml_tree_tagged_next(value tree, value node, value ctags, value ftags,value root){ + CAMLparam5(tree,node,ctags,ftags,root); + CAMLreturn (Val_int ( + (XMLTREE(tree)->TaggedNext(TREENODEVAL(node), + &(VECT(ctags)[1]), + VECT(ctags)[0], + &(VECT(ftags)[1]), + VECT(ftags)[0], + TREENODEVAL(root))))); +} + +extern "C" CAMLprim value caml_xml_tree_tagged_desc_only(value tree, value node,value dtags){ + CAMLparam3(tree,node,dtags); + + CAMLreturn (Val_int ( + (XMLTREE(tree)->TaggedDescOnly(TREENODEVAL(node), + &(VECT(dtags)[1]), + VECT(dtags)[0])))); +} + +extern "C" CAMLprim value caml_xml_tree_tagged_foll_only(value tree, value node, value ftags,value root){ + CAMLparam4(tree,node,ftags,root); + CAMLreturn (Val_int ( + (XMLTREE(tree)->TaggedFollOnly(TREENODEVAL(node), + &(VECT(ftags)[1]), + VECT(ftags)[0], + TREENODEVAL(root))))); +} + +extern "C" CAMLprim value caml_xml_tree_tagged_desc_or_foll_only(value tree, value node, value ftags,value root){ + CAMLparam4(tree,node,ftags,root); + CAMLreturn (Val_int ( + (XMLTREE(tree)->TaggedDescOrFollOnly(TREENODEVAL(node), + &(VECT(ftags)[1]), + VECT(ftags)[0], + TREENODEVAL(root))))); +} + +extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ + CAMLparam2(tree,node); + CAMLlocal1(tuple); + tuple = caml_alloc_tuple(2); + range r = (XMLTREE(tree)->DocIds(TREENODEVAL(node))); + caml_initialize(&Field(tuple,0),Val_int(r.min)); + caml_initialize(&Field(tuple,1),Val_int(r.max)); + CAMLreturn (tuple); +} diff --git a/ata.ml b/ata.ml index a9dbf22..d17c0c8 100644 --- a/ata.ml +++ b/ata.ml @@ -30,46 +30,52 @@ type formula_expr = | False | True | Or of formula * formula | And of formula * formula - | Atom of ([ `Left | `Right ]*bool*state) + | Atom of ([ `Left | `Right | `LLeft | `RRight ]*bool*state) and formula = { fid: int; + fkey : int; pos : formula_expr; neg : formula; - st : Ptset.t*Ptset.t; + st : (Ptset.t*Ptset.t)*(Ptset.t*Ptset.t); size: int; } +external hash_const_variant : [> ] -> int = "%identity" +external int_bool : bool -> int = "%identity" + +let hash_node_form t = match t with + | False -> 0 + | True -> 1 + | And(f1,f2) -> (2+17*f1.fkey + 37*f2.fkey) land max_int + | Or(f1,f2) -> (3+101*f1.fkey + 253*f2.fkey) land max_int + | Atom(v,b,s) -> ((hash_const_variant v) + (3846*(int_bool b) +257) + (s lsl 13 - s)) land max_int + module FormNode = struct type t = formula - let hash t = match t.pos with - | False -> 0 - | True -> 1 - | And(f1,f2) -> (2+17*f1.fid + 37*f2.fid) land max_int - | Or(f1,f2) -> (3+101*f1.fid + 253*f2.fid) land max_int - | Atom(`Left,true,s) -> (5 + 11 * 23 * s) land max_int - | Atom(`Right,true,s) -> (5 + 19 * 23 * s) land max_int - | Atom(`Left,false,s) -> (5 + 11 * 39 * s) land max_int - | Atom(`Right,false,s) -> (5 + 19 * 39 * s) land max_int - - + + let hash t = t.fkey let equal f1 f2 = - if f1.fid == f2.fid || f1.pos == f2.pos then true + if f1.fid == f2.fid || f1.fkey == f2.fkey || f1.pos == f2.pos then true else match f1.pos,f2.pos with | False,False | True,True -> true - | Atom(d1,b1,s1), Atom(d2,b2,s2) when (b1==b2) && (s1=s2) && (d1 = d2) -> true + | Atom(d1,b1,s1), Atom(d2,b2,s2) when (b1==b2) && (s1==s2) && (d1 = d2) -> true | Or(g1,g2),Or(h1,h2) | And(g1,g2),And(h1,h2) -> g1.fid == h1.fid && g2.fid == h2.fid | _ -> false + end module WH = Weak.Make(FormNode) let f_pool = WH.create 107 +let empty_pair = Ptset.empty,Ptset.empty +let empty_quad = empty_pair,empty_pair + let true_,false_ = - let rec t = { fid = 1; pos = True; neg = f ; st = Ptset.empty,Ptset.empty; size =1; } - and f = { fid = 0; pos = False; neg = t; st = Ptset.empty,Ptset.empty; size = 1; } + let rec t = { fid = 1; pos = True; fkey=1; neg = f ; st = empty_quad; size =1; } + and f = { fid = 0; pos = False; fkey=0; neg = t; st = empty_quad; size = 1; } in WH.add f_pool f; WH.add f_pool t; @@ -82,6 +88,7 @@ let is_false f = f.fid == 0 let cons pos neg s1 s2 size1 size2 = let rec pnode = { fid = gen_id (); + fkey = hash_node_form pos; pos = pos; neg = nnode; st = s1; @@ -89,6 +96,7 @@ let cons pos neg s1 s2 size1 size2 = and nnode = { fid = gen_id (); pos = neg; + fkey = hash_node_form neg; neg = pnode; st = s2; size = size2; @@ -99,20 +107,24 @@ let cons pos neg s1 s2 size1 size2 = let atom_ d p s = let si = Ptset.singleton s in let ss = match d with - | `Left -> si,Ptset.empty - | `Right -> Ptset.empty,si + | `Left -> (si,Ptset.empty),empty_pair + | `Right -> empty_pair,(si,Ptset.empty) + | `LLeft -> (Ptset.empty,si),empty_pair + | `RRight -> empty_pair,(Ptset.empty,si) in fst (cons (Atom(d,p,s)) (Atom(d,not p,s)) ss ss 1 1) + +let union_quad ((l1,ll1),(r1,rr1)) ((l2,ll2),(r2,rr2)) = + (Ptset.union l1 l2 ,Ptset.union ll1 ll2), + (Ptset.union r1 r2 ,Ptset.union rr1 rr2) let merge_states f1 f2 = let sp = - Ptset.union (fst f1.st) (fst f2.st), - Ptset.union (snd f1.st) (snd f2.st) + union_quad f1.st f2.st and sn = - Ptset.union (fst f1.neg.st) (fst f2.neg.st), - Ptset.union (snd f1.neg.st) (snd f2.neg.st) + union_quad f1.neg.st f2.neg.st in sp,sn - + let full_or_ f1 f2 = let f1,f2 = if f1.fid < f2.fid then f2,f1 else f1,f2 in let sp,sn = merge_states f1 f2 in @@ -208,48 +220,52 @@ type t = { (pr_frm ppf f2); | Atom(dir,b,s) -> Format.fprintf ppf "%s%s[%i]" (if b then "" else "¬") - (if dir = `Left then "↓₁" else "↓₂") s + (match dir with + | `Left -> "↓₁" + | `Right -> "↓₂" + | `LLeft -> "⇓₁" + | `RRight -> "⇓₂") s let dnf_hash = Hashtbl.create 17 let rec dnf_aux f = match f.pos with | False -> PL.empty | True -> PL.singleton (Ptset.empty,Ptset.empty) - | Atom(`Left,_,s) -> PL.singleton (Ptset.singleton s,Ptset.empty) - | Atom(`Right,_,s) -> PL.singleton (Ptset.empty,Ptset.singleton s) + | Atom((`Left|`LLeft),_,s) -> PL.singleton (Ptset.singleton s,Ptset.empty) + | Atom((`Right|`RRight),_,s) -> PL.singleton (Ptset.empty,Ptset.singleton s) | Or(f1,f2) -> PL.union (dnf f1) (dnf f2) | And(f1,f2) -> - let pl1 = dnf f1 - and pl2 = dnf f2 - in - PL.fold (fun (s1,s2) acc -> - PL.fold ( fun (s1', s2') acc' -> - (PL.add - ((Ptset.union s1 s1'), - (Ptset.union s2 s2')) acc') ) - pl2 acc ) - pl1 PL.empty - - - and dnf f = - try + let pl1 = dnf f1 + and pl2 = dnf f2 + in + PL.fold (fun (s1,s2) acc -> + PL.fold ( fun (s1', s2') acc' -> + (PL.add + ((Ptset.union s1 s1'), + (Ptset.union s2 s2')) acc') ) + pl2 acc ) + pl1 PL.empty + + + and dnf f = + try Hashtbl.find dnf_hash f.fid with - Not_found -> - let d = dnf_aux f in - Hashtbl.add dnf_hash f.fid d;d + Not_found -> + let d = dnf_aux f in + Hashtbl.add dnf_hash f.fid d;d - let can_top_down f = + let can_top_down f = let nf = dnf f in if (PL.cardinal nf > 3)then None else match PL.elements nf with - | [(s1,s2); (t1,t2); (u1,u2)] when - Ptset.is_empty s1 && Ptset.is_empty s2 && Ptset.is_empty t1 && Ptset.is_empty u2 - -> Some(true,t2,u1) - | [(t1,t2); (u1,u2)] when Ptset.is_empty t1 && Ptset.is_empty u2 - -> Some(false,t2,u1) - | _ -> None + | [(s1,s2); (t1,t2); (u1,u2)] when + Ptset.is_empty s1 && Ptset.is_empty s2 && Ptset.is_empty t1 && Ptset.is_empty u2 + -> Some(true,t2,u1) + | [(t1,t2); (u1,u2)] when Ptset.is_empty t1 && Ptset.is_empty u2 + -> Some(false,t2,u1) + | _ -> None let equal_form f1 f2 = @@ -269,7 +285,7 @@ type t = { let s = if TagSet.is_finite ts - then "{" ^ (TagSet.fold (fun t a -> a ^ " " ^ (Tag.to_string t)) ts "") ^"}" + then "{" ^ (TagSet.fold (fun t a -> a ^ " '" ^ (Tag.to_string t)^"'") ts "") ^" }" else let cts = TagSet.neg ts in if TagSet.is_empty cts then "*" else (TagSet.fold (fun t a -> a ^ " " ^ (Tag.to_string t)) cts "*\\{" @@ -285,9 +301,14 @@ type t = { Format.fprintf ppf ",%s %s " (Tag.to_string t) (if b then "=>" else "->"); pr_frm ppf f; Format.fprintf ppf "(fid=%i) left=" f.fid; - let l,r = f.st in pr_st ppf (Ptset.elements l); + let (l,ll),(r,rr) = f.st in + pr_st ppf (Ptset.elements l); + Format.fprintf ppf ", "; + pr_st ppf (Ptset.elements ll); Format.fprintf ppf ", right="; pr_st ppf (Ptset.elements r); + Format.fprintf ppf ", "; + pr_st ppf (Ptset.elements rr); Format.fprintf ppf "\n"; ) a.sigma; Format.fprintf ppf "=======================================\n" @@ -308,7 +329,7 @@ type t = { let equal_trans (q1,t1,m1,f1,_) (q2,t2,m2,f2,_) = (q1 == q2) && (TagSet.equal t1 t2) && (m1 == m2) && (equal_form f1 f2) - + module TS = struct type node = Nil | Cons of Tree.t * node | Concat of node*node @@ -319,7 +340,8 @@ type t = { let cons e t = node (Cons(e,t.node)) (t.size+1) let concat t1 t2 = node (Concat (t1.node,t2.node)) (t1.size+t2.size) - let append e t = concat t (cons e empty) + let append = cons +(* let append e t = node (Concat(t.node,Cons(e,Nil))) (t.size+1) *) let to_list_rev t = let rec aux acc l rest = @@ -344,86 +366,30 @@ type t = { | Concat(n1,n2) -> let _ = loop n1 in loop n2 in loop n - end - module TS2 = - struct - type t = string - let empty = String.make 10_000_000 '0' - let cons e t = t.[Tree.id e] <- '1';t - let append = cons - let concat s1 s2 = failwith "not implemented" - - let length t = - let res = ref 0 in - for i = 0 to 9_999_999 do - if t.[i] == '1' then - incr res - done; !res - - let iter f t = failwith "not implemented" - let to_list_rev t = failwith "not implemented" - end + let rev_iter f { node = n } = + let rec loop = function + | Nil -> () + | Cons(e,n) -> let _ = loop n in f e + | Concat(n1,n2) -> let _ = loop n2 in loop n1 + in loop n - module BottomUpNew = struct - -IFDEF DEBUG -THEN - type trace = - | TNil of Ptset.t*Ptset.t - | TNode of Ptset.t*Ptset.t*bool* (int*bool*formula) list - - let traces = Hashtbl.create 17 - let dump_trace t = - let out = open_out "debug_trace.dot" - in - let outf = Format.formatter_of_out_channel out in - - let rec aux t num = - if Tree.is_node t - then - match (try Hashtbl.find traces (Tree.id t) with Not_found -> TNil(Ptset.empty,Ptset.empty)) with - | TNode(r,s,mark,trs) -> - let numl = aux (Tree.left t) num in - let numr = aux (Tree.right t) (numl+1) in - let mynum = numr + 1 in - Format.fprintf outf "n%i [ label=\"<%s>\\nr=" mynum (Tag.to_string (Tree.tag t)); - pr_st outf (Ptset.elements r); - Format.fprintf outf "\\ns="; - pr_st outf (Ptset.elements s); - List.iter (fun (q,m,f) -> - Format.fprintf outf "\\n%i %s" q (if m then "⇨" else "→"); - pr_frm outf f ) trs; - Format.fprintf outf "\", %s shape=box ];\n" - (if mark then "color=cyan1, style=filled," else ""); - let _ = Format.fprintf outf "n%i -> n%i;\n" mynum numl in - let _ = Format.fprintf outf "n%i -> n%i;\n" mynum numr in - mynum - | TNil(r,s) -> Format.fprintf outf "n%i [ shape=box, label=\"Nil\\nr=" num; - pr_st outf (Ptset.elements r); - Format.fprintf outf "\\ns="; - pr_st outf (Ptset.elements s); - Format.fprintf outf "\"];\n";num - else - match Hashtbl.find traces (-10) with - | TNil(r,s) -> - Format.fprintf outf "n%i [ shape=box, label=\"Nil\\nr=" num; - pr_st outf (Ptset.elements r); - Format.fprintf outf "\\ns="; - pr_st outf (Ptset.elements s); - Format.fprintf outf "\"];\n"; - num - | _ -> assert false + let find f { node = n } = + let rec loop = function + | Nil -> raise Not_found + | Cons(e,n) -> if f e then e else loop n + | Concat(n1,n2) -> try + loop n1 + with + | Not_found -> loop n2 in - Format.fprintf outf "digraph G {\n"; - ignore(aux t 0); - Format.fprintf outf "}\n%!"; - close_out out; - ignore(Sys.command "dot -Tsvg debug_trace.dot > debug_trace.svg") -END - + loop n + end +(* + module BottomUpJumpNew = struct +*) module HFEval = Hashtbl.Make( struct type t = int*Ptset.t*Ptset.t @@ -432,36 +398,14 @@ END let hash (a,b,c) = a+17*(Ptset.hash b) + 31*(Ptset.hash c) end) - + let hfeval = HFEval.create 4097 -(* let miss = ref 0 - let call = ref 0 - let timeref = ref 0.0 - let timerefall = ref 0.0 - let time f x = - incr call; - let t1 = Unix.gettimeofday () - in let r = f x - in - timeref := !timeref +. ((Unix.gettimeofday()) -. t1); - r - - let timeall f x = - let t1 = Unix.gettimeofday () - in let r = f x - in - timerefall := !timerefall +. ((Unix.gettimeofday()) -. t1); - r - -*) - - let eval_form_bool f s1 s2 = let rec eval f = match f.pos with - | Atom(`Left,b,q) -> if b == (Ptset.mem q s1) then (true,true,false) else false,false,false - | Atom(`Right,b,q) -> if b == (Ptset.mem q s2) then (true,false,true) else false,false,false + | Atom((`Left|`LLeft),b,q) -> if b == (Ptset.mem q s1) then (true,true,false) else false,false,false + | Atom((`Right|`RRight),b,q) -> if b == (Ptset.mem q s2) then (true,false,true) else false,false,false (* test some inlining *) | True -> true,true,true | False -> false,false,false @@ -496,55 +440,6 @@ END in eval f - module HFEvalDir = Hashtbl.Make( - struct - type t = int*Ptset.t*[`Left | `Right ] - let equal (a,b,c) (d,e,f) = - a==d && (Ptset.equal b e) && (c = f) - let hash_dir = function `Left -> 7919 - | `Right -> 3517 - - let hash (a,b,c) = - a+17*(Ptset.hash b) + 31*(hash_dir c) - end) - - let hfeval_dir = HFEvalDir.create 4097 - - - let eval_dir dir f s = - let rec eval f = match f.pos with - | Atom(d,b,q) when d = dir -> if b == (Ptset.mem q s) then true_ else false_ - | Atom(_,b,q) -> f - (* test some inlining *) - | True -> true_ - | False -> false_ - | _ -> - try - HFEvalDir.find hfeval_dir (f.fid,s,dir) - with - | Not_found -> - let r = - match f.pos with - | Or(f1,f2) -> - let f1 = eval f1 - in - if is_true f1 then true_ - else if is_false f1 then eval f2 - else or_ f1 f2 - | And(f1,f2) -> - let f1 = eval f1 in - if is_false f1 then false_ - else if is_true f1 then eval f2 - else and_ f1 f2 - | _ -> assert false - in - HFEvalDir.add hfeval_dir (f.fid,s,dir) r; - r - - in eval f - - - let fstate_pool = Hashtbl.create 11 let merge_pred a b = match a,b with @@ -576,8 +471,7 @@ END else f,false in (or_ tmpf accf,accm||m,acchtrue||hastrue) else (accf,accm,acchtrue) - ) acc (Hashtbl.find a.phi q) - + ) acc (try Hashtbl.find a.phi q with Not_found -> []) let get_trans t a tag r = try @@ -599,173 +493,224 @@ END HTagSet.add a.sigma (accq,tag) (mark,f,([],[]),has_true); f.st,f,mark,has_true,accq + let h_union = Hashtbl.create 4097 + + let pt_cup s1 s2 = + let h = (Ptset.hash s1,Ptset.hash s2) in + try + Hashtbl.find h_union h + with + | Not_found -> let s = Ptset.union s1 s2 + in + Hashtbl.add h_union h s;s + + + + let tags_of_state a q = Hashtbl.fold + (fun p l acc -> + if p == q then + List.fold_left + (fun acc (ts,_) -> + pt_cup (TagSet.positive ts) acc) acc l + else acc) a.phi Ptset.empty + + let h_tags_states = Hashtbl.create 4096 + + - let check_pred l t = true (*l = [] || - List.exists (function p -> - match p with - `Left f | `Right f -> f t - | _ -> assert false) l - *) + + let tags a qs = + try + Hashtbl.find h_tags_states (Ptset.hash qs) + with + | Not_found -> + let l = Ptset.fold (fun q acc -> pt_cup acc (tags_of_state a q)) qs Ptset.empty + in + Hashtbl.add h_tags_states (Ptset.hash qs) l;l + + let time cpt acc f x = + let t1 = Unix.gettimeofday () in + let r = f x in + let t2 = Unix.gettimeofday () in + let t = (1000. *.(t2 -. t1)) in + acc:=!acc+.t; + incr cpt; + r + + let h_time = Hashtbl.create 4096 + let calls = ref 0 - let rec accepting_among2 a t r acc = + let rtime s f x = + + let cpt,atime = + try + Hashtbl.find h_time s + with + | _ -> (ref 0, ref 0.) + in + let r = time cpt atime f x + in + Hashtbl.replace h_time s (cpt,atime); + r + + let rec accepting_among_time a t r ctx = + incr calls; let orig = r in let rest = Ptset.inter r a.final in let r = Ptset.diff r rest in - if Ptset.is_empty r then rest,acc else - if (not (Tree.is_node t)) + if Ptset.is_empty r then rest,TS.empty else + if Tree.is_node t then - orig,acc - else - let t1 = Tree.first_child t - and t2 = Tree.next_sibling t in - let (r1,r2),formula,mark,has_true,r = get_trans t a (Tree.tag t) r - in - let s1,res1 = accepting_among2 a t1 r1 acc - in - let formula = eval_dir `Left formula s1 in - if is_false formula then rest,acc - else - if is_true formula then (* tail call equivalent to a top down *) - accepting_among2 a t2 orig (if mark then TS.append t res1 else res1) + let among,result,form = + let ((ls,lls),(rs,rrs)),formula,mark,has_true,r' = + let tag = rtime "Tree.tag" Tree.tag t in + rtime "get_trans" (get_trans t a tag) r + in + let tl = rtime "tags" (tags a) ls + and tr = rtime "tags" (tags a) rs + and tll = rtime "tags" (tags a) lls + and trr = rtime "tags" (tags a) rrs + in + let first = + if Ptset.mem Tag.pcdata (pt_cup tl tll) + then + rtime "Tree.text_below" (Tree.text_below) t else - let s2,res2 = accepting_among2 a t2 r2 res1 + let etl = Ptset.is_empty tl + and etll = Ptset.is_empty tll in - let formula = eval_dir `Right formula s2 + if etl && etll + then Tree.mk_nil t + else + if etl then rtime "Tree.tagged_desc_only" (Tree.tagged_desc_only t) tll + else if etll then rtime "Tree.first_child" (Tree.first_child) t + else (* add child only *) + rtime "Tree.tagged_below" (Tree.tagged_below t tl) tll + and next = + if Ptset.mem Tag.pcdata (pt_cup tr trr) + then + rtime "Tree.text_next" (Tree.text_next t) ctx + else + let etr = Ptset.is_empty tr + and etrr = Ptset.is_empty trr in - if is_false formula then rest,res1 + if etr && etrr + then Tree.mk_nil t else - orig,(if mark then TS.append t (res2) - else res2) + if etr then rtime "Tree.tagged_foll_only" (Tree.tagged_foll_only t trr) ctx + else if etrr then rtime "Tree.next_sibling" (Tree.next_sibling) t + else (* add ns only *) + rtime "Tree.tagged_next" (Tree.tagged_next t tr trr) ctx + + in + let s1,res1 = accepting_among_time a first (pt_cup ls lls) t + and s2,res2 = accepting_among_time a next (pt_cup rs rrs) ctx + in + let rb,rb1,rb2 = rtime "eval_form_bool" (eval_form_bool formula s1) s2 in + if rb + then + let res1 = if rb1 then res1 else TS.empty + and res2 = if rb2 then res2 else TS.empty + in r', rtime "TS.concat" (TS.concat res2) (if mark then rtime "TS.append" (TS.append t) res1 else res1),formula + else Ptset.empty,TS.empty,formula + + in + + among,result + + else orig,TS.empty + + let run_time a t = + let st,res = accepting_among_time a t a.init t in + let _ = Printf.eprintf "\n Timings\n"; + let total_time = Hashtbl.fold (fun fname ({ contents=cpt }, {contents=atime}) (total_time) -> + Printf.eprintf "%s\t %i calls, %f ms accumulated time, %f ms mean time\n" + fname cpt atime (atime /. (float_of_int cpt)); + total_time +. atime ) h_time 0. + in + Printf.eprintf "total calls %i, total monitored time %f ms\n%!" !calls total_time + in + if Ptset.is_empty (st) then TS.empty else res - let rec accepting_among a t r = + + + let rec accepting_among a t r ctx = let orig = r in let rest = Ptset.inter r a.final in let r = Ptset.diff r rest in if Ptset.is_empty r then rest,TS.empty else if Tree.is_node t then - let (r1,r2),formula,mark,has_true,r = get_trans t a (Tree.tag t) r - in - let s1,res1 = accepting_among a (Tree.first_child t) r1 - and s2,res2 = accepting_among a (Tree.next_sibling t) r2 - in - let rb,rb1,rb2 = eval_form_bool formula s1 s2 in - if rb - then - let res1 = if rb1 then res1 else TS.empty - and res2 = if rb2 then res2 else TS.empty - in r, TS.concat res2 (if mark then TS.cons t res1 else res1) - else orig,TS.empty + let among,result,form = + let ((ls,lls),(rs,rrs)),formula,mark,has_true,r' = + let tag = Tree.tag t in + get_trans t a tag r + in + let tl = tags a ls + and tr = tags a rs + and tll = tags a lls + and trr = tags a rrs + in + let first = + if Ptset.mem Tag.pcdata (pt_cup tl tll) + then + Tree.text_below t + else + let etl = Ptset.is_empty tl + and etll = Ptset.is_empty tll + in + if etl && etll + then Tree.mk_nil t + else + if etl then Tree.tagged_desc_only t tll + else if etll then Tree.first_child t + else (* add child only *) + Tree.tagged_below t tl tll + and next = + if Ptset.mem Tag.pcdata (pt_cup tr trr) + then + Tree.text_next t ctx + else + let etr = Ptset.is_empty tr + and etrr = Ptset.is_empty trr + in + if etr && etrr + then Tree.mk_nil t + else + if etr then Tree.tagged_foll_only t trr ctx + else if etrr then Tree.next_sibling t + else (* add ns only *) + Tree.tagged_next t tr trr ctx + + in + let s1,res1 = accepting_among a first (pt_cup ls lls) t + and s2,res2 = accepting_among a next (pt_cup rs rrs) ctx + in + let rb,rb1,rb2 = eval_form_bool formula s1 s2 in + if rb + then + let res1 = if rb1 then res1 else TS.empty + and res2 = if rb2 then res2 else TS.empty + in r', TS.concat res2 (if mark then TS.append t res1 else res1),formula + else Ptset.empty,TS.empty,formula + + in + among,result + else orig,TS.empty + + let run a t = + let st,res = accepting_among a t a.init t in + if Ptset.is_empty (st) then TS.empty else res - - let rec accepting_count a t r = - let orig = r in - let rest = Ptset.inter r a.final in - let r = Ptset.diff r rest in - if Ptset.is_empty r then rest,0 else - if Tree.is_node t - then - let (r1,r2),formula,mark,has_true,r = get_trans t a (Tree.tag t) r - in - let s1,res1 = accepting_count a (Tree.first_child t) r1 - and s2,res2 = accepting_count a (Tree.next_sibling t) r2 - in - let rb,rb1,rb2 = eval_form_bool formula s1 s2 in - if rb - then - let res1 = if rb1 then res1 else 0 - and res2 = if rb2 then res2 else 0 - in r, res1+res2+(if mark then 1 else 0) - else orig,0 - else orig,0 - - let run a t = -(* let _ = - call := 0; miss:=0; - timeref := 0.0; - HFEval.clear hfeval; - Hashtbl.clear dnf_hash; - Hashtbl.clear fstate_pool; - in *) - let st,res = accepting_among a t a.init in - let b = Ptset.is_empty (st) in - if b then TS.empty - else - res - - let run_count a t = -(* let _ = - call := 0; miss:=0; - timeref := 0.0; - timerefall := 0.0; - HFEval.clear hfeval; - Hashtbl.clear dnf_hash; - Hashtbl.clear fstate_pool; - in *) - let st,res = accepting_count a t a.init in - let b = Ptset.is_empty (st) in - if b then 0 - else - res - end - - module Jump = struct - let eval_dir = BottomUpNew.eval_dir - let xi1 = HTagSet.create 10 - let xi2 = HTagSet.create 10 - - let rec accept_from orig a t r acc = - if (Tree.is_root t) || - (Ptset.subset orig r) - then - acc - else - let is_left = Tree.is_left t in - let tag = Tree.tag t in - let nr,f, mark = - try - HTagSet.find (if is_left then xi1 else xi2) - (r,tag) - with - | Not_found -> - let trans = - Hashtbl.fold - (fun q l acc -> - List.fold_left (fun ((racc,facc,macc) as acc) (ts,(m,f,_)) -> - let rl,rr = f.st in - if (TagSet.mem tag ts) && - (Ptset.intersect (if is_left then rl else rr) r) - then (Ptset.add q racc,or_ f facc, macc||m) - else acc) acc l) - a.phi (Ptset.empty,false_,false) - in - HTagSet.add (if is_left then xi1 else xi2) (r,tag) trans; - trans - in - let form = eval_dir (if is_left then `Left else `Right) f r - in - if is_true form then accept_from orig a (Tree.parent t) nr - (if mark then TS.cons t acc else acc) - else if is_false form then TS.empty - else assert false - - let run a t r = - HTagSet.clear xi1; - HTagSet.clear xi2; - let orig = - List.fold_left (fun s (_,(_,f,_)) -> - Ptset.union s (fst f.st)) - Ptset.empty (Hashtbl.find a.phi (Ptset.choose a.init)) - in - accept_from orig a t r TS.empty - +(* end +*) diff --git a/ata.mli b/ata.mli index 32f28a0..593a39e 100644 --- a/ata.mli +++ b/ata.mli @@ -19,15 +19,15 @@ type formula_expr = | True | Or of formula * formula | And of formula * formula - | Atom of ([ `Left | `Right ] * bool * state) -and formula = { fid : int; pos : formula_expr; neg : formula; st : Ptset.t*Ptset.t; size: int;} + | Atom of ([ `Left | `Right | `LLeft | `RRight ] * bool * state) +and formula = { fid : int; fkey : int; pos : formula_expr; neg : formula; st : (Ptset.t*Ptset.t)*(Ptset.t*Ptset.t); size: int;} val true_ : formula val false_ : formula -val atom_ : [`Left | `Right ] -> bool -> state -> formula +val atom_ : [`Left | `Right | `LLeft | `RRight ] -> bool -> state -> formula val and_ : formula -> formula -> formula val or_ : formula -> formula -> formula val not_ : formula -> formula -val equal_form : formula -> formula -> bool +(*val equal_form : formula -> formula -> bool *) val pr_frm : Format.formatter -> formula -> unit @@ -56,11 +56,11 @@ val ( ><@ ) : state -> TagSet.t*bool*predicate -> state*(TagSet.t*bool*predicate val ( >=> ) : state*(TagSet.t*bool*predicate) -> formula -> t val ( +| ) : formula -> formula -> formula val ( *& ) : formula -> formula -> formula -val ( ** ) : [`Left | `Right ] -> state -> formula +val ( ** ) : [`Left | `Right | `LLeft | `RRight ] -> state -> formula end type transition = Transitions.t -val equal_trans : transition -> transition -> bool +val equal_trans : transition -> transition -> bool module TS : sig type t @@ -71,15 +71,13 @@ module TS : sig val to_list_rev : t -> Tree.Binary.t list val length : t -> int val iter : (Tree.Binary.t -> unit) -> t -> unit + val rev_iter : (Tree.Binary.t -> unit) -> t -> unit + val find : (Tree.Binary.t -> bool) -> t -> Tree.Binary.t end -module BottomUpNew : -sig +(*module BottomUpJumpNew : +sig *) val run : t -> Tree.Binary.t -> TS.t - val run_count : t -> Tree.Binary.t -> int -end + val run_time : t -> Tree.Binary.t -> TS.t +(*end *) -module Jump : -sig - val run : t -> Tree.Binary.t -> Ptset.t -> TS.t -end diff --git a/debug.ml b/debug.ml index de10dd0..f754993 100644 --- a/debug.ml +++ b/debug.ml @@ -19,7 +19,6 @@ module Loc = Camlp4.PreCast.Loc DEFINE D(x) = ignore(x); - DEFINE MM(v,l) = (let ____x = v in (Memory.register ____x (Loc.to_string (l)));____x) let () = Memory.schedule_stats () diff --git a/finiteCofinite.ml b/finiteCofinite.ml index 32f0e48..58f0730 100644 --- a/finiteCofinite.ml +++ b/finiteCofinite.ml @@ -10,6 +10,7 @@ module type S = sig type elt type t + type set val empty : t val any : t val is_empty : t -> bool @@ -38,14 +39,16 @@ sig val choose : t -> elt val hash : t -> int val equal : t -> t -> bool + val positive : t -> set + val negative : t -> set end -module Make (E : Sigs.Set) : S with type elt = E.elt = +module Make (E : Sigs.Set) : S with type elt = E.elt and type set = E.t = struct type elt = E.elt type t = Finite of E.t | CoFinite of E.t - + type set = E.t let empty = Finite E.empty let any = CoFinite E.empty @@ -179,5 +182,15 @@ struct function Finite x -> (E.hash x) | CoFinite x -> ( ~-(E.hash x) land max_int) + let positive = + function + | Finite x -> x + | _ -> E.empty + + let negative = + function + | CoFinite x -> x + | _ -> E.empty + end diff --git a/finiteCofinite.mli b/finiteCofinite.mli index b489557..7f98130 100644 --- a/finiteCofinite.mli +++ b/finiteCofinite.mli @@ -4,6 +4,7 @@ module type S = sig type elt type t + type set val empty : t val any : t val is_empty : t -> bool @@ -32,7 +33,9 @@ module type S = val choose : t -> elt val hash : t -> int val equal : t -> t -> bool + val positive : t -> set + val negative : t -> set end -module Make : functor (E : Sigs.Set) -> S with type elt = E.elt +module Make : functor (E : Sigs.Set) -> S with type elt = E.elt and type set = E.t diff --git a/main.ml b/main.ml index 7cc631b..c446ef3 100644 --- a/main.ml +++ b/main.ml @@ -8,7 +8,6 @@ INCLUDE "debug.ml" open Ata - let l = ref [] ;; let time f x = let t1 = Unix.gettimeofday () in @@ -21,176 +20,6 @@ let time f x = ;; let total_time () = List.fold_left (+.) 0. !l;; -let poa = TagSet.add Tag.pcdata (TagSet.singleton Tag.attribute) - -let rec fill_hashtag t = - if Tree.Binary.is_node t then - begin - let tag = Tree.Binary.tag t in - let a = - if TagSet.mem tag poa - then 0 - else - fill_hashtag (Tree.Binary.first_child t) - in - let b = fill_hashtag (Tree.Binary.next_sibling t) - in a+b+1 - end - else 0 - - -let test_slashslash tree k = - let test = - match k with "*" -> TagSet.remove (Tag.tag "") TagSet.star - | s -> TagSet.singleton (Tag.tag k) - in - let attorstring = TagSet.cup TagSet.pcdata TagSet.attribute in - let rec aux t acc = - if Tree.Binary.is_node t - then - let tag = Tree.Binary.tag t in - let l = Tree.Binary.first_child t - and r = Tree.Binary.next_sibling t - in - let acc = - if TagSet.mem tag test - then - TS.append t acc - else - acc - in - let rl = if TagSet.mem tag attorstring then acc else aux l acc - in aux r rl - else - acc - in - let _ = Printf.eprintf "Testing optimal //%s ... " k in - let r = time (aux tree ) TS.empty in - Printf.eprintf "Result set is %i nodes\n%!" (TS.length r) - - -let test_jump tree k = - let ttag = Tag.tag k in - - let rec loop acc tree = - if Tree.Binary.is_node tree - then - let acc = TS.cons tree acc in - loop acc (Tree.Binary.tagged_foll tree ttag) - else - acc - - in - let _ = Printf.eprintf "Testing jumping for tag %s ... " k in - let r = time (loop TS.empty ) (Tree.Binary.tagged_next tree ttag) in - Printf.eprintf "Result set is %i nodes\n%!" (TS.length r) - - - -let test_traversal tree k = - let ttag = Tag.tag k in - let iid t = if Tree.Binary.is_node t then Tree.Binary.id t else -1 in - let rec aux t = - if Tree.Binary.is_node t - then - let tag = Tree.Binary.tag t in - let l = Tree.Binary.first_child t - and r = Tree.Binary.next_sibling t - in - let _ = Printf.eprintf "Tree with id %i and tag=%s, tagged_desc %s is %i tagged_foll is %i\n%!" - (Tree.Binary.id t) (Tag.to_string tag) (k) - (iid (Tree.Binary.tagged_desc t ttag)) - (iid (Tree.Binary.tagged_foll t ttag)) - in - aux l; - aux r; - - else - () - in - aux tree - - - -let test_count_subtree tree k = - let ttag = Tag.tag k in - let _ = Printf.eprintf "Counting subtrees with tag %s ... %!" k in - let r = time(Tree.Binary.subtree_tags tree) ttag in - Printf.eprintf "%i nodes \n%!" r - - -let test_contains tree s = - let _ = Printf.eprintf "Fetching DocIds containing %s ... %!" s in - time (fun s -> let r = Tree.Binary.contains tree s in - Tree.Binary.DocIdSet.iter - (fun t -> output_string stderr - (Tree.Binary.get_string tree t); - output_char stderr '\n') r ) s - - -let test_count_contains tree s = - let _ = Printf.eprintf "Counting DocIds containing %s ... %!" s in - let r = time (Tree.Binary.count_contains tree) s in - Printf.eprintf "%i documents ids\n%!" (r) - -let test_contains_old tree s = - let _ = Printf.eprintf "Fetching (old) DocIds containing %s ... %!" s in - let r = time (Tree.Binary.contains_old tree) s in - Printf.eprintf "%i documents ids\n%!" (Tree.Binary.DocIdSet.cardinal r) - -let test_contains_iter tree s = - let _ = Printf.eprintf "Fetching (old) DocIds containing %s ... %!" s in - let r = time (Tree.Binary.contains_iter tree) s in - Printf.eprintf "%i documents ids\n%!" (Tree.Binary.DocIdSet.cardinal r) - -module Stack = -struct - type t = { mutable table: Tree.Binary.t array; - mutable top : int } - - let empty = { table = Array.make 0 (Obj.magic 0); - top = 0 } - let cons e s = - let ls = Array.length s.table in - if ls > s.top - then - begin - s.table.(s.top) <- e; - s.top <- s.top + 1; - s - end - else - - let a = Array.make (ls * 2 + 1) (Tree.Binary.root e) - in - Array.blit s.table 0 a 0 ls; - s.table <- a; - s.table.(s.top) <- e; - s.top <- s.top + 1; - s - -end - -let test_fast tree = - let rec aux t acc = - if Tree.Binary.is_node t - then - aux (Tree.Binary.right t)( aux (Tree.Binary.left t) (Stack.cons t acc)) - else acc - in let _ = Printf.eprintf "Fast traversal ...%!" in - time (aux tree) Stack.empty - -let test_cps tree = - let rec aux t acc cont = - if Tree.Binary.is_node t - then aux (Tree.Binary.left t) (Stack.cons t acc) ((Tree.Binary.right t)::cont) - else match cont with - | [] -> acc - | p::r -> aux p acc r - in - let _ = Printf.eprintf "CPS traversal ...%!" in - time (aux tree Stack.empty) [] - let main v query output = let _ = Tag.init (Tree.Binary.tag_pool v) in @@ -209,16 +38,22 @@ let main v query output = time (fill_hashtag) v; *) Printf.eprintf "Compiling query : "; - let auto,_ = time XPath.Compile.compile query in + let auto,ltags,contains = time XPath.Compile.compile query in + let _ = Ata.dump Format.err_formatter auto in + let _ = Printf.eprintf "%!" in + let _ = match contains with + None -> () + | Some s -> Tree.Binary.init_contains v s + in Printf.eprintf "Execution time %s : " (if !Options.count_only then "(counting only)" else ""); begin if !Options.count_only then - let result = time (BottomUpNew.run_count auto) v - in - Printf.eprintf "Number of nodes in the result set : %i\n" result - else - let result = time (BottomUpNew.run auto) v in - Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result); + failwith "Count only not implemented in this version" + else + let _ = Gc.set ({ Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 }) in + let result = time (if !Options.time then run_time auto else run auto) v in + Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result); + Printf.eprintf "\n%!"; begin match output with | None -> () @@ -227,7 +62,7 @@ let main v query output = time( fun () -> let oc = open_out f in output_string oc "\n"; - TS.iter (fun t -> output_string oc "----------\n"; + TS.rev_iter (fun t -> output_string oc "----------\n"; Tree.Binary.print_xml_fast oc t; output_char oc '\n') result) (); end; diff --git a/options.ml b/options.ml index bd7fcce..ab25d7d 100644 --- a/options.ml +++ b/options.ml @@ -7,7 +7,7 @@ let input_file = ref "" let output_file = ref None let save_file = ref "" let count_only = ref false - +let time = ref false let usage_msg = Printf.sprintf "%s 'query' [output]" Sys.argv.(0) @@ -20,7 +20,8 @@ let anon_fun = | 2 -> output_file := Some s; incr pos | _ -> raise (Arg.Bad(s)) -let spec = [ "-c", Arg.Set(count_only), "counting only (don't materialize the result set"; +let spec = [ "-c", Arg.Set(count_only), "counting only (don't materialize the result set)"; + "-t", Arg.Set(time), "print timing statistics"; "-f", Arg.Set_int(sample_factor), "sample factor [default=64]"; "-i", Arg.Set(index_empty_texts), "index empty texts [default=false]"; "-d", Arg.Set(disable_text_collection), "disable text collection[default=false]"; diff --git a/options.mli b/options.mli index 9db75a7..ebc5828 100644 --- a/options.mli +++ b/options.mli @@ -7,4 +7,4 @@ val query : string ref val input_file : string ref val output_file : string option ref val save_file : string ref - +val time : bool ref diff --git a/ptset.ml b/ptset.ml index 3d30f68..4b2c845 100644 --- a/ptset.ml +++ b/ptset.ml @@ -11,26 +11,32 @@ type elt = int type t = { id : int; key : int; (* hash *) - node : node } + node : node; + } and node = | Empty | Leaf of int | Branch of int * int * t * t + +(* faster if outside of a module *) +let hash_node x = match x with + | Empty -> 0 + | Leaf i -> (i+1) land max_int + (* power of 2 +/- 1 are fast ! *) + | Branch (b,i,l,r) -> + ((b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key + + (r.key lsl 7) - r.key) land max_int + module Node = struct type _t = t type t = _t - let hash x = x.key - let hash_node = function - | Empty -> 0 - | Leaf i -> i+1 - (* power of 2 +/- 1 are fast ! *) - | Branch (b,i,l,r) -> - (b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key - + (r.key lsl 7) - r.key - let hash_node x = (hash_node x) land max_int - let equal x y = match (x.node,y.node) with + external hash : t -> int = "%field1" + let equal x y = + if x.id == y.id || x.key == y.key || x.node == y.node then true + else + match (x.node,y.node) with | Empty,Empty -> true | Leaf k1, Leaf k2 when k1 == k2 -> true | Branch(p1,m1,l1,r1), Branch(p2,m2,l2,r2) when m1==m2 && p1==p2 && @@ -62,7 +68,7 @@ let is_empty s = s.id==0 let rec norm n = let v = { id = gen_uid (); - key = Node.hash_node n; + key = hash_node n; node = n } in WH.merge pool v @@ -368,3 +374,32 @@ let rec intersect s1 s2 = (equal s1 s2) || let hash s = s.key let from_list l = List.fold_left (fun acc i -> add i acc) empty l + +type int_vector + +external int_vector_alloc : int -> int_vector = "caml_int_vector_alloc" +external int_vector_set : int_vector -> int -> int -> unit = "caml_int_vector_set" +external int_vector_length : int_vector -> int = "caml_int_vector_length" +external int_vector_empty : unit -> int_vector = "caml_int_vector_empty" + +let empty_vector = int_vector_empty () + +let to_int_vector_ext s = + let l = cardinal s in + let v = int_vector_alloc l in + let i = ref 0 in + iter (fun e -> int_vector_set v !i e; incr i) s; + v + +let hash_vectors = Hashtbl.create 4097 + +let to_int_vector s = + try + Hashtbl.find hash_vectors s.key + with + Not_found -> + let v = to_int_vector_ext s in + Hashtbl.add hash_vectors s.key v; + v + + diff --git a/ptset.mli b/ptset.mli index c36a08d..8a25ffc 100644 --- a/ptset.mli +++ b/ptset.mli @@ -87,3 +87,7 @@ val intersect : t -> t -> bool val hash : t -> int val from_list : int list -> t + +type int_vector +val to_int_vector : t -> int_vector + diff --git a/tag.mli b/tag.mli index 46efbcf..b5e0ad2 100644 --- a/tag.mli +++ b/tag.mli @@ -7,6 +7,7 @@ val init : pool -> unit val to_string : t -> string val compare : t -> t -> int val equal : t -> t -> bool +val nullt : t val dump : Format.formatter -> t -> unit val check : t -> unit (* Check internal invariants *) diff --git a/tagSet.ml b/tagSet.ml index 503df80..76c1c98 100644 --- a/tagSet.ml +++ b/tagSet.ml @@ -4,10 +4,14 @@ struct let hash = Hashtbl.hash end *) -include FiniteCofinite.Make(Ptset) +module M : FiniteCofinite.S with type elt = Tag.t and type set = Ptset.t = + FiniteCofinite.Make(Ptset) +include M + let tag t = singleton t let pcdata = singleton Tag.pcdata let attribute = singleton Tag.attribute let star = diff any (cup pcdata attribute) let node = neg attribute + diff --git a/tagSet.mli b/tagSet.mli index 572057e..d536c8c 100644 --- a/tagSet.mli +++ b/tagSet.mli @@ -5,7 +5,7 @@ (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) -include FiniteCofinite.S with type elt = Tag.t +include FiniteCofinite.S with type elt = Tag.t and type set = Ptset.t val tag : Tag.t -> t val pcdata : t diff --git a/tests/test.xml b/tests/test.xml index 9a9e8c0..286dfb0 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,2 +1,22 @@ -foo +foo + + + + + + foobar + + + + + + + + + foo + + + + foo + diff --git a/tests/xpathmark.sh b/tests/xpathmark.sh index e786b8f..f10054e 100755 --- a/tests/xpathmark.sh +++ b/tests/xpathmark.sh @@ -18,8 +18,8 @@ querybase=`basename "$i" .xpl` query=`cat $i` xqueryorig="xpath-pt/xquery/$querybase".xql -cat $xqueryorig | sed -e "s/doc()/doc(\"xmark_tiny.xml\")/g" >tmp.xql -../main xmark_tiny.xml "$query" results/"$querybase".sxsi +cat $xqueryorig | sed -e "s/doc()/doc(\"XMark_1.04.xml\")/g" >tmp.xql +../main XMark_1.04.srx "$query" results/"$querybase".sxsi saxonb-xquery -o:results/"$querybase".saxon tmp.xql echo >> results/"$querybase".saxon diff -w results/"$querybase".sxsi results/"$querybase".saxon >/dev/null || echo $querybase gives different results diff --git a/tree.ml b/tree.ml index 20a7792..b19a3aa 100644 --- a/tree.ml +++ b/tree.ml @@ -48,12 +48,20 @@ sig val has_tagged_foll : t -> Tag.t -> bool val tagged_desc : t -> Tag.t -> t val tagged_foll : t -> Tag.t -> t + val tagged_below : t -> Ptset.t -> Ptset.t -> t + val tagged_next : t -> Ptset.t -> Ptset.t -> t -> t + val tagged_desc_only : t -> Ptset.t -> t + val tagged_foll_only : t -> Ptset.t -> t -> t + val text_below : t -> t + val text_next : t -> t -> t val init_tagged_next : t -> Tag.t -> unit - val tagged_next : t -> Tag.t -> t val subtree_tags : t -> Tag.t -> int val is_left : t -> bool val print_id : Format.formatter -> t -> unit val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit + val init_contains : t -> string -> unit + val mk_nil : t -> t + val test_jump : t -> Tag.t -> unit end module XML = @@ -126,6 +134,7 @@ struct external parent : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" external parent_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" + external prev_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" external first_child : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" @@ -155,17 +164,32 @@ struct external my_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" external next_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" - + external doc_ids : t -> [`Tree ] node -> [`Text ] node * [`Text ] node = "caml_xml_tree_doc_ids" external text_xml_id : t -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" external node_xml_id : t -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" external is_ancestor : t -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" external tagged_desc : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" external tagged_foll : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_foll" - external tagged_next : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_next" external subtree_tags : t -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" + external tagged_below : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_tagged_below" + external tagged_desc_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_tagged_desc_only" + external tagged_next : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_next" + external tagged_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only" + external tagged_desc_or_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only" + + let test_jump tree tag = + let rec loop id ctx = + if id != nil + then + let first = tagged_desc tree id tag + and next = tagged_desc tree id tag + in + loop first id; + loop next ctx + in + loop (root tree) (root tree) - - + let test_xml_tree ppf tags v = let pr x = Format.fprintf ppf x in let rec aux id = @@ -178,17 +202,21 @@ struct (tag_id v id) (Tag.to_string (tag_id v id)) (node_xml_id v id); - pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i\n%!" + pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i PrevDoc(next_text)=%i\n%!" (int_of_node (prev_text v id)) (Text.get_text v (prev_text v id)) (int_of_node (my_text v id)) (Text.get_text v (my_text v id)) (int_of_node (next_text v id)) (Text.get_text v (next_text v id)) - (int_of_node(parent_doc v (my_text v id))); - pr "Testing Tagged*\n%!"; - Ptset.iter (fun t -> - let str = Tag.to_string t in + (int_of_node(parent_doc v (my_text v id))) + (int_of_node(prev_doc v (next_text v id))); + let i1,i2 = doc_ids v id in + pr "Testing DocIds below (%i,%i)*\n%!" + (int_of_node i1) (int_of_node i2); + pr "Testing Tagged*\n%!"; + Ptset.iter (fun t -> + let str = Tag.to_string t in if Tag.pcdata <> t then begin pr "Tag: %s : \n%!" str; @@ -273,6 +301,16 @@ struct let dump { doc=t } = Tree.print_skel t let test_xml_tree ppf tags { doc=t } = Tree.test_xml_tree ppf tags t + let test_jump { doc=t } tag = Tree.test_jump t tag + let contains_array = ref [| |] + + let init_contains t s = + let a = Text.contains t.doc s + in + Array.fast_sort (compare) a; + contains_array := a + + module DocIdSet = struct include Set.Make (struct type t = string_content @@ -336,7 +374,7 @@ struct | Node (NC t) -> Printf.sprintf "Node (NC %i)" (int_of_node t) | Node (SC (t,i)) -> Printf.sprintf "Node (SC (%i,%i))" (int_of_node t) (int_of_node i) - + let mk_nil t = { t with node = Nil } let root n = { n with node = norm (Tree.root n.doc) } let is_root n = match n.node with | Node(NC t) -> (int_of_node t) == 0 @@ -446,14 +484,129 @@ struct | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_desc d n tag) } | _ -> { t with node=Nil } -(* - let tagged_next t tag = - if tag == Tag.attribute || tag == Tag.pcdata then failwith "tagged_next" - else - match tagged_desc t tag with - | { doc = d; node=Nil } -> tagged_foll t tag - | x -> x -*) + + let tagged_next t tb tf s = + match s with + | { node = Node (NC(below)) } -> begin + match t with + | { doc = d; node=Node(NC n) } -> + { t with node= norm (tagged_next d n (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) } + | { doc = d; node=Node(SC (i,n) ) } when is_nil n -> + let p = parent_doc d i in + { t with node= norm (tagged_next d p (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) } + | { doc = d; node=Node(SC (_,n) ) } -> + if Ptset.mem (tag_id d n) (Ptset.union tb tf) + then { t with node=Node(NC(n)) } + else + let vb = Ptset.to_int_vector tb in + let vf = Ptset.to_int_vector tf in + let node = + let dsc = tagged_below d n vb vf in + if is_nil dsc + then tagged_next d n vb vf below + else dsc + in + { t with node = norm node } + | _ -> {t with node=Nil } + end + + | _ -> {t with node=Nil } + + let tagged_foll_only t tf s = + match s with + | { node = Node (NC(below)) } -> begin + match t with + | { doc = d; node=Node(NC n) } -> + { t with node= norm (tagged_foll_only d n (Ptset.to_int_vector tf) below) } + | { doc = d; node=Node(SC (i,n) ) } when is_nil n -> + let p = parent_doc d i in + { t with node= norm (tagged_foll_only d p (Ptset.to_int_vector tf) below) } + | { doc = d; node=Node(SC (_,n) ) } -> + if Ptset.mem (tag_id d n) tf + then { t with node=Node(NC(n)) } + else + let vf = Ptset.to_int_vector tf in + let node = + let dsc = tagged_desc_only d n vf in + if is_nil dsc + then tagged_foll_only d n vf below + else dsc + in + { t with node = norm node } + | _ -> {t with node=Nil } + end + + | _ -> {t with node=Nil } + + + let tagged_below t tc td = + match t with + | { doc = d; node=Node(NC n) } -> + let vc = Ptset.to_int_vector tc + in + let vd = Ptset.to_int_vector td + in + { t with node= norm(tagged_below d n vc vd) } + | _ -> { t with node=Nil } + + let tagged_desc_only t td = + match t with + | { doc = d; node=Node(NC n) } -> + let vd = Ptset.to_int_vector td + in + { t with node= norm(tagged_desc_only d n vd) } + | _ -> { t with node=Nil } + + + let array_find a i j = + let l = Array.length a in + let rec loop idx x y = + if x > y || idx >= l then Text.nil + else + if a.(idx) >= x then if a.(idx) > y then Text.nil else a.(idx) + else loop (idx+1) x y + in + if a.(0) > j || a.(l-1) < i then Text.nil + else loop 0 i j + + + let text_below t = + let l = Array.length !contains_array in + if l = 0 then { t with node=Nil } + else + match t with + | { doc = d; node=Node(NC n) } -> + let i,j = doc_ids t.doc n in + let id = array_find !contains_array i j + in + if id == Text.nil then + { t with node=Nil } + else + {t with node = Node(SC(id, Tree.next_sibling d (Tree.prev_doc d id))) } + | _ -> { t with node=Nil } + + let text_next t root = + let l = Array.length !contains_array in + if l = 0 then { t with node=Nil } + else + let inf = match t with + | { doc =d; node = Node(NC n) } -> snd(doc_ids d n)+1 + | { node = Node(SC(i,_)) } -> i+1 + | _ -> assert false + in + match root with + | { doc = d; node= Node (NC n) } -> + let _,j = doc_ids t.doc n in + + let id = array_find !contains_array inf j + in + if id == Text.nil then { doc = d; node= Nil } + else + {doc = d; node = Node(SC(id,Tree.next_sibling d (Tree.prev_doc d id))) } + | _ -> { t with node=Nil} + + + let subtree_tags t tag = match t with { doc = d; node = Node(NC n) } -> @@ -491,14 +644,14 @@ struct - let tagged_next t tag = +(* let tagged_next t tag = if !idx >= Array.length !tagged_desc_array then {t with node=Nil} else let r = !tagged_desc_array.(!idx) in incr idx; r - +*) let has_tagged_foll t tag = is_node (tagged_foll t tag) diff --git a/tree.mli b/tree.mli index d5d48a3..e9ef8c7 100644 --- a/tree.mli +++ b/tree.mli @@ -46,12 +46,20 @@ sig val has_tagged_foll : t -> Tag.t -> bool val tagged_desc : t -> Tag.t -> t val tagged_foll : t -> Tag.t -> t + val tagged_below : t -> Ptset.t -> Ptset.t -> t + val tagged_next : t -> Ptset.t -> Ptset.t -> t -> t + val tagged_desc_only : t -> Ptset.t -> t + val tagged_foll_only : t -> Ptset.t -> t -> t + val text_below : t -> t + val text_next : t -> t -> t val init_tagged_next : t -> Tag.t -> unit - val tagged_next : t -> Tag.t -> t val subtree_tags : t -> Tag.t -> int val is_left : t -> bool val print_id : Format.formatter -> t -> unit val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit + val init_contains : t -> string -> unit + val mk_nil : t -> t + val test_jump : t -> Tag.t -> unit end module Binary : BINARY diff --git a/ulexer.ml b/ulexer.ml index 7b8972b..a6ea991 100644 --- a/ulexer.ml +++ b/ulexer.ml @@ -188,6 +188,7 @@ let parse_char lexbuf base i = let rec token = lexer | [' ' '\t'] -> token lexbuf | "text()" | "node()" | "and" | "not" | "or" + | "contains" | "contains_full" | "self" | "descendant" | "child" | "descendant-or-self" | "attribute" | "following-sibling" | "preceding-sibling" | "parent" | "ancestor" | "ancestor-or-self" | "preceding" | "following" diff --git a/unit_test.ml b/unit_test.ml index e52c533..f6cf23a 100644 --- a/unit_test.ml +++ b/unit_test.ml @@ -32,15 +32,112 @@ let doc = try Tree.Binary.parse_xml_uri Sys.argv.(1) with - | _ -> - Printf.printf "Error parsing document\n"; - exit 2 + | _ ->( + try + Tree.Binary.load Sys.argv.(1) + with + | _ -> + Printf.printf "Error parsing document\n"; + exit 2) ;; let _ = Tag.init (Tree.Binary.tag_pool doc) ;; -let tags = (Ptset.add (Tag.tag "foo") (collect_tags doc)) +let tags = (collect_tags doc) ;; - +(* let _ = Tree.Binary.test_xml_tree Format.std_formatter tags doc ;; +let _ = Printf.printf "Testing //a with jumping\n" +;; +*) +let rec test_a dir t acc ctx = + if Tree.Binary.is_node t + then + let acc = + if (Tree.Binary.tag t) == (Tag.tag "a") + then Ata.TS.cons t acc + else acc + in + let first = Tree.Binary.tagged_below t Ptset.empty (Ptset.singleton (Tag.tag "a")) + and next = Tree.Binary.tagged_next t Ptset.empty (Ptset.singleton (Tag.tag "a")) ctx + in + let _ = + Printf.printf "t is :"; + Tree.Binary.print_xml_fast stdout t; + Printf.printf " called from %s of " (if dir then "below" else "next"); + Tree.Binary.print_xml_fast stdout ctx; + if (Tree.Binary.is_node next) + then begin + Printf.printf ", Next a is %!"; + Tree.Binary.print_xml_fast stdout next; + end + else + Printf.printf ", Next a is empty!"; + print_newline(); + in + test_a false next (test_a true first acc t) t + else acc +;; + +let rec test_text dir t acc ctx = + if Tree.Binary.is_node t + then + let acc = + if (Tree.Binary.tag t) == (Tag.pcdata) + then Ata.TS.cons t acc + else acc + in + let first = Tree.Binary.text_below t + and next = Tree.Binary.text_next t ctx + in + (* + let _ = + Printf.printf "t is :"; + Tree.Binary.print_xml_fast stdout t; + Printf.printf " called from %s of " (if dir then "below" else "next"); + Tree.Binary.print_xml_fast stdout ctx; + if (Tree.Binary.is_node first) + then begin + Printf.printf "First (text) is %!"; + Tree.Binary.print_xml_fast stdout first; + end + else + Printf.printf "First (text) is empty!"; + if (Tree.Binary.is_node next) + then begin + Printf.printf ", Next (text) is %!"; + Tree.Binary.print_xml_fast stdout next; + end + else + Printf.printf ", Next (text) is empty!"; + print_newline(); + in *) + test_text false next (test_text true first acc t) ctx + else acc +;; +(* +let r = test_a true doc Ata.TS.empty doc;; +(* +let _ = Printf.printf "==> %i nodes\n" (Ata.TS.length r) +let _ = Ata.TS.iter (fun t -> Tree.Binary.print_xml_fast stdout t; print_newline();) r + +*) +let _ = Tree.Binary.init_contains doc "car" + +let r = test_text true doc Ata.TS.empty doc +let _ = Printf.printf "==> %i nodes\n" (Ata.TS.length r) +(* let _ = Ata.TS.iter (fun t -> Tree.Binary.print_xml_fast stdout t; print_newline();) r *) +;; + +*) +let time f x = + let t1 = Unix.gettimeofday () in + let r = f x in + let t2 = Unix.gettimeofday () in + let t = (1000. *.(t2 -. t1)) in + Printf.eprintf " %fms\n%!" t ; + r +;; +let _ = Printf.eprintf "Timing jump //keyword ... " +let _ = time Tree.Binary.test_jump doc (Tag.tag "keyword") diff --git a/xPath.ml b/xPath.ml index 23c5fc6..de9c056 100644 --- a/xPath.ml +++ b/xPath.ml @@ -6,7 +6,7 @@ (******************************************************************************) INCLUDE "debug.ml";; #load "pa_extend.cmo";; - +let contains = ref None module Ast = struct (* The steps are in reverse order !!!! *) @@ -153,6 +153,12 @@ step : [ | [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ] | [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ] +| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some(s) in (Child,TagSet.singleton Tag.pcdata, p)] + ] +| [ "contains_full"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some(s) in (Descendant,TagSet.singleton Tag.pcdata, p)] + ] | [ test = test; p = top_pred -> [(Child,test, p)] ] | [ att = ATT ; p = top_pred -> match att with @@ -221,6 +227,8 @@ type config = { st_root : Ata.state; (* state matching the root element (initial tr_parent_loop : (Ata.state,int*(Ata.transition list)) Hashtbl.t; tr : (Ata.state,int*(Ata.transition list)) Hashtbl.t; tr_aux : (Ata.state,int*(Ata.transition list)) Hashtbl.t; + mutable entry_points : (Tag.t*Ptset.t) list; + mutable contains : string option; } let dummy_conf = { st_root = -1; st_univ = -1; @@ -230,14 +238,23 @@ let dummy_conf = { st_root = -1; tr_parent_loop = Hashtbl.create 0; tr = Hashtbl.create 0; tr_aux = Hashtbl.create 0; + entry_points = []; + contains = None; } let _r = function (`Left|`Last) -> `Right | `Right -> `Left -let _l = function (`Left|`Last) -> `Left - | `Right -> `Right + | `RRight -> `LLeft + | `LLeft -> `RRight + + +let _l = + function (`Left|`Last) -> `Left + | `Right -> `Right + | `RRight -> `RRight + | `LLeft -> `LLeft open Ata.Transitions @@ -293,7 +310,7 @@ let hpop = function | (x,z::y) ::r -> z,(x,y)::r | _-> assert false -let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = +let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num = let ex = existential in let axis,test,pred = step in let is_last = dir = `Last in @@ -305,54 +322,32 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = let p_st, p_anc, p_par, p_pre, p_num, p_f = compile_pred conf q_src num ctx_path dir pred q_dst in - let new_st,new_dst, new_ctx = match axis with - | Child | FollowingSibling | Descendant | DescendantOrSelf -> - let axis = - if axis = DescendantOrSelf - then - begin - or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential)); - Descendant - end - else axis + + | Child | Descendant -> + let left,right = + if nrec then `LLeft,`RRight + else `Left,`Right in + let t1 = ?< q_src><(test, is_last && not(ex))>=> - p_f *& (if is_last then Ata.true_ else (_l dir) ** q_dst) in + p_f *& ( if false (*is_last*) then Ata.true_ else (_l left) ** q_dst) in let _ = add_trans num conf.tr t1 in let _ = if axis=Descendant then add_trans num conf.tr_aux ( - ?< q_src><@ ((if ex then TagSet.diff TagSet.star test + ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test else TagSet.star),false, - if TagSet.is_finite test - then `Left(fun t -> - if (Tree.Binary.is_node t) - then - let mytag = Tree.Binary.tag t in - TagSet.exists (fun tag -> - tag == mytag || - Tree.Binary.has_tagged_desc t tag - ) - test - else true - ) - - else `True )>=> `Left ** q_src ) + `True )>=> `LLeft ** q_src ) in let t3 = ?< q_src><@ ((if ex then TagSet.diff TagSet.any test - else TagSet.any), false, - if axis=Descendant&&TagSet.is_finite test - then `True (*`Right(fun t -> - TagSet.exists (fun tag -> Tree.Binary.has_tagged_foll t tag) - test) *) - else `True )>=> - if ex then ( Ata.atom_ `Left false q_src) *& `Right ** q_src - else `Right ** q_src + else TagSet.any), false, `True )>=> + if ex then ( Ata.atom_ `Left false q_src) *& right ** q_src + else (if axis=Descendant then `RRight else `Right) ** q_src in let _ = add_trans num conf.tr_aux t3 in @@ -378,7 +373,7 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = | Ancestor | AncestorOrSelf -> conf.has_backward <- true; let up_states, new_ctx = - List.map (fst) ctx_path, (vpush q_root []) + List.fold_left (fun acc (q,_) -> if q == q_root then acc else q::acc) [] ctx_path, (vpush q_root []) in let _ = if axis = AncestorOrSelf then or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential)); @@ -386,7 +381,7 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = let fc = List.fold_left (fun f s -> ((_l dir)**s +|f)) Ata.false_ up_states in let t1 = ?< q_frm_root><(test,is_last && (not existential) )>=> - (if is_last then Ata.true_ else (_l dir) ** q_dst) *& fc in + ( (*if is_last then Ata.true_ else *) (`LLeft ) ** q_dst) *& fc in add_trans num conf.tr t1; [q_dst ], q_dst, vpush q_frm_root new_ctx @@ -409,12 +404,18 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = (Ptset.elements (Ptset.union p_st (Ptset.from_list new_st)), new_dst, new_ctx) - +and is_rec = function + [] -> false + | ((axis,_,_),_)::_ -> + match axis with + Descendant | Ancestor -> true + | _ -> false + and compile_path ?(existential=false) annot_path config q_src states idx ctx_path = List.fold_left - (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward) (step,dir) -> + (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) -> let add_states,new_dst,new_ctx = - compile_step ~existential:existential config a_dst dir ctx_path step num + compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num in let new_states = Ptset.union (Ptset.from_list add_states) a_st in let nanc_st,npar_st,npre_st,new_bw = @@ -423,11 +424,11 @@ and compile_path ?(existential=false) annot_path config q_src states idx ctx_pat |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ptset.add a_dst anc_st,par_st,pre_st,true | _ -> anc_st,par_st,pre_st,has_backward in - new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw + new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r) ) - (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false ) + (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false,(List.tl annot_path) ) annot_path - + and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 = compile_pred conf q_src idx ctx_path dir p1 ddst in @@ -457,7 +458,7 @@ and compile_expr conf states q_src idx ctx_path dir e qdst = | Path (p) -> let q = Ata.mk_state () in let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in - let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward = + let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ = compile_path ~existential:true annot_path conf q states idx ctx_path in let ret_dir = match annot_path with @@ -489,6 +490,7 @@ let compile path = in let steps = List.rev steps in let dirsteps = dirannot steps in + let _ = Ata.mk_state() in let config = { st_root = Ata.mk_state(); st_univ = Ata.mk_state(); final_state = Ptset.empty; @@ -497,6 +499,8 @@ let compile path = tr_parent_loop = Hashtbl.create 5; tr = Hashtbl.create 5; tr_aux = Hashtbl.create 5; + entry_points = []; + contains = None } in let q0 = Ata.mk_state() in @@ -507,21 +511,20 @@ let compile path = add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ); add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ); *) - let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward = + let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ = compile_path dirsteps config q0 states 0 [(config.st_root,[]) ] in let fst_tr = - ?< (config.st_root) >< (TagSet.star,false) >=> - (`Left** q0) *& (if config.has_backward then `Left ** config.st_from_root else Ata.true_) + ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=> + ((if is_rec dirsteps then `LLeft else `Left)** q0) *& (if config.has_backward then `LLeft ** config.st_from_root else Ata.true_) in add_trans num config.tr fst_tr; - if config.has_backward then begin + if config.has_backward then begin add_trans num config.tr_aux - (?< (config.st_from_root) >< (TagSet.star,false) >=> `Left ** config.st_from_root +| - `Right ** config.st_from_root); + (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft ** config.st_from_root); add_trans num config.tr_aux - (?< (config.st_from_root) >< (TagSet.cup TagSet.pcdata TagSet.attribute,false) >=> - `Right ** config.st_from_root); + (?< (config.st_from_root) >< (TagSet.any,false) >=> + `RRight ** config.st_from_root); end; let phi = Hashtbl.create 37 in @@ -542,11 +545,11 @@ let compile path = Ata.states = if has_backward then Ptset.add config.st_from_root a_st else a_st; Ata.init = Ptset.singleton config.st_root; Ata.final = Ptset.union anc_st config.final_state; - Ata.universal = Ptset.union anc_st config.final_state; + Ata.universal = Ptset.singleton a_dst; Ata.phi = phi; Ata.delta = Hashtbl.create 17; Ata.sigma = Ata.HTagSet.create 17; - },[] + },config.entry_points,!contains end diff --git a/xPath.mli b/xPath.mli index e1220c1..23235d7 100644 --- a/xPath.mli +++ b/xPath.mli @@ -35,5 +35,5 @@ sig end module Compile : sig -val compile : Ast.path -> Ata.t * (Tag.t*Ptset.t) list +val compile : Ast.path -> Ata.t * (Tag.t*Ptset.t) list * string option end