| Or of 'hcons * 'hcons
| And of 'hcons * 'hcons
| Atom of ([ `Left | `Right | `LLeft | `RRight ]*bool*State.t)
+
type 'hcons node = {
pos : 'hcons expr;
mutable neg : 'hcons;
}
external hash_const_variant : [> ] -> int = "%identity"
- module rec HNode : Hcons.S with type data = Node.t = Hcons.Make (Node)
- and Node : Hashtbl.HashedType with type t = HNode.t node =
+ module rec Node : Hcons.S with type data = Data.t = Hcons.Make (Data)
+ and Data : Hashtbl.HashedType with type t = Node.t node =
struct
- type t = HNode.t node
+ type t = Node.t node
let equal x y = x.size == y.size &&
match x.pos,y.pos with
- | False,False
- | True,True -> true
- | Or(xf1,xf2),Or(yf1,yf2)
- | And(xf1,xf2),And(yf1,yf2) -> (HNode.equal xf1 yf1) && (HNode.equal xf2 yf2)
- | Atom(d1,p1,s1), Atom(d2,p2,s2) -> d1 == d2 && (p1==p2) && s1 == s2
- | _ -> false
+ | a,b when a == b -> true
+ | Or(xf1,xf2),Or(yf1,yf2)
+ | And(xf1,xf2),And(yf1,yf2) -> (xf1 == yf1) && (xf2 == yf2)
+ | Atom(d1,p1,s1), Atom(d2,p2,s2) -> d1 == d2 && (p1==p2) && s1 == s2
+ | _ -> false
let hash f =
match f.pos with
| False -> 0
| True -> 1
- | Or (f1,f2) -> HASHINT3(PRIME2,HNode.uid f1,HNode.uid f2)
- | And (f1,f2) -> HASHINT3(PRIME3,HNode.uid f1,HNode.uid f2)
+ | Or (f1,f2) -> HASHINT3(PRIME2,f1.Node.id, f2.Node.id)
+ | And (f1,f2) -> HASHINT3(PRIME3,f1.Node.id,f2.Node.id)
| Atom(d,p,s) -> HASHINT4(PRIME4,hash_const_variant d,vb p,s)
end
- type t = HNode.t
- let hash = HNode.hash
- let uid = HNode.uid
- let equal = HNode.equal
- let expr f = (HNode.node f).pos
- let st f = (HNode.node f ).st
- let size f = (HNode.node f).size
+ type t = Node.t
+ let hash x = x.Node.key
+ let uid x = x.Node.id
+ let equal = Node.equal
+ let expr f = f.Node.node.pos
+ let st f = f.Node.node.st
+ let size f = f.Node.node.size
let prio f =
match expr f with
let cons pos neg s1 s2 size1 size2 =
- let nnode = HNode.make { pos = neg; neg = (Obj.magic 0); st = s2; size = size2 } in
- let pnode = HNode.make { pos = pos; neg = nnode ; st = s1; size = size1 }
+ let nnode = Node.make { pos = neg; neg = (Obj.magic 0); st = s2; size = size2 } in
+ let pnode = Node.make { pos = pos; neg = nnode ; st = s1; size = size1 }
in
- (HNode.node nnode).neg <- pnode; (* works because the neg field isn't taken into
+ (Node.node nnode).neg <- pnode; (* works because the neg field isn't taken into
account for hashing ! *)
pnode,nnode
| `RRight -> empty_triple,(StateSet.empty,si,si)
in fst (cons (Atom(d,p,s)) (Atom(d,not p,s)) ss ss 1 1)
- let not_ f = (HNode.node f).neg
+ let not_ f = f.Node.node.neg
let union_hex ((l1,ll1,lll1),(r1,rr1,rrr1)) ((l2,ll2,lll2),(r2,rr2,rrr2)) =
(StateSet.mem_union l1 l2 ,StateSet.mem_union ll1 ll2,StateSet.mem_union lll1 lll2),
(StateSet.mem_union r1 r2 ,StateSet.mem_union rr1 rr2,StateSet.mem_union rrr1 rrr2)
let print ppf fl =
iter (fun t -> Transition.print ppf t; Format.pp_print_newline ppf ()) fl
end
+
+module Formlistlist =
+struct
+ include Hlist.Make(Formlist)
+ let print ppf fll =
+ iter (fun fl -> Formlist.print ppf fl; Format.pp_print_newline ppf ())fll
+end
type 'a t = {
id : int;
end
-
let choose_jump tagset qtags1 qtagsn a f_nil f_t1 f_s1 f_tn f_sn f_notext f_maytext =
let tags1,hastext1,fin1 = inter_text tagset (tags a qtags1) in
let tagsn,hastextn,finn = inter_text tagset (tags a qtagsn) in
else if (hastext1||hastextn) then (`ANY,f_maytext)
else (`ANY,f_notext)
- let choose_jump_down tree a b c d =
+ let choose_jump_down tree a b c d =
choose_jump a b c d
(mk_fun (fun _ -> Tree.nil) "Tree.mk_nil")
(mk_fun (Tree.tagged_child tree) "Tree.tagged_child")
(mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx")
(mk_fun (Tree.next_element_ctx tree) "Tree.node_element_ctx")
(mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx")
-
-
- module SetTagKey =
- struct
- type t = Tag.t*SList.t
- let equal (t1,s1) (t2,s2) = t1 == t2 && s1 == s2
- let hash (t,s) = HASHINT2(t,SList.uid s)
- end
-
- module CachedTransTable = Hashtbl.Make(SetTagKey)
- let td_trans = CachedTransTable.create 4093
-
-
+
+ module SetTagKey =
+ struct
+ type t = Tag.t*SList.t
+ let equal (t1,s1) (t2,s2) = t1 == t2 && s1 == s2
+ let hash (t,s) = HASHINT2(t,s.SList.Node.id)
+ end
+
+ module CachedTransTable = Hashtbl.Make(SetTagKey)
+ let td_trans = CachedTransTable.create 4093
+
+
let empty_size n =
let rec loop acc = function 0 -> acc
| n -> loop (SList.cons StateSet.empty acc) (n-1)
in loop SList.nil n
- let merge rb rb1 rb2 mark t res1 res2 =
- if rb then
- let res1 = if rb1 then res1 else RS.empty
- and res2 = if rb2 then res2 else RS.empty
- in
- if mark then RS.cons t (RS.concat res1 res2)
- else RS.concat res1 res2
- else RS.empty
+
+ module Fold2Res = Hashtbl.Make(struct
+ type t = Formlistlist.t*SList.t*SList.t
+ let hash (f,s,t) = HASHINT3(f.Formlistlist.Node.id,
+ s.SList.Node.id,
+ t.SList.Node.id)
+ let equal (a,b,c) (d,e,f) = a==d && b == e && c == f
+ end)
+
+ let h_fold2 = Fold2Res.create BIG_H_SIZE
let top_down ?(noright=false) a tree t slist ctx slot_size =
let pempty = empty_size slot_size in
+ let rempty = Array.make slot_size RS.empty in
(* evaluation starts from the right so we put sl1,res1 at the end *)
let eval_fold2_slist fll t (sl2,res2) (sl1,res1) =
- let res = Array.copy res1 in
- let rec fold l1 l2 fll i aq =
- match fll with
- [fl] -> (* inline for speed *)
- let s1 = SList.hd l1
- and s2 = SList.hd l2 in
- let r',flags = eval_formlist s1 s2 fl in
- let _ = res.(i) <- RS.merge flags t res1.(i) res2.(i) in
- (SList.cons r' aq),res
- | fl::fll ->
- let SList.Cons(s1,ll1) = l1.SList.Node.node
- and SList.Cons(s2,ll2) = l2.SList.Node.node in
- let r',flags = eval_formlist s1 s2 fl in
- let _ = res.(i) <- RS.merge flags t res1.(i) res2.(i)
- in
- fold ll1 ll2 fll (i+1) (SList.cons r' aq)
- | _ -> aq,res
- in
- fold sl1 sl2 fll 0 SList.nil
+ let res = Array.copy rempty in
+ try
+ let r,b,btab = Fold2Res.find h_fold2 (fll,sl1,sl2) in
+ if b then for i=0 to slot_size - 1 do
+ res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i);
+ done;
+ r,res
+ with
+ Not_found ->
+ let btab = Array.make slot_size (false,false,false,false) in
+ let rec fold l1 l2 fll i aq ab =
+ match fll.Formlistlist.Node.node,
+ l1.SList.Node.node,
+ l2.SList.Node.node
+ with
+ | Formlistlist.Cons(fl,fll),
+ SList.Cons(s1,ll1),
+ SList.Cons(s2,ll2) ->
+ let r',((b,_,_,_) as flags) = eval_formlist s1 s2 fl in
+ let _ = btab.(i) <- flags
+ in
+ fold ll1 ll2 fll (i+1) (SList.cons r' aq) (b||ab)
+ | _ -> aq,ab
+ in
+ let r,b = fold sl1 sl2 fll 0 SList.nil false in
+ Fold2Res.add h_fold2 (fll,sl1,sl2) (r,b,btab);
+ if b then for i=0 to slot_size - 1 do
+ res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i);
+ done;
+ r,res
in
- let null_result() = (pempty,Array.make slot_size RS.empty) in
- let rec loop t slist ctx =
- if t == Tree.nil then null_result() else get_trans t slist (Tree.tag tree t) ctx
+ let null_result = (pempty,Array.copy rempty) in
+ let rec loop t slist ctx=
+ if t == Tree.nil then null_result else get_trans t slist (Tree.tag tree t) ctx
and loop_tag tag t slist ctx =
- if t == Tree.nil then null_result() else get_trans t slist tag ctx
+ if t == Tree.nil then null_result else get_trans t slist tag ctx
and loop_no_right t slist ctx =
- if t == Tree.nil then null_result() else get_trans ~noright:true t slist (Tree.tag tree t) ctx
+ if t == Tree.nil then null_result else get_trans ~noright:true t slist (Tree.tag tree t) ctx
and get_trans ?(noright=false) t slist tag ctx =
let cont =
try
)
) set (Formlist.nil,StateSet.empty,StateSet.empty,ca,da,sa,fa)
- in fl::fll_acc, (SList.cons ll lllacc), (SList.cons rr rllacc),ca,da,sa,fa)
- slist ([],SList.nil,SList.nil,StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty)
+ in (Formlistlist.cons fl fll_acc), (SList.cons ll lllacc), (SList.cons rr rllacc),ca,da,sa,fa)
+ slist (Formlistlist.nil,SList.nil,SList.nil,StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty)
in
(* Logic to chose the first and next function *)
let _,tags_below,_,tags_after = Tree.tags tree tag in
(* let _ = Printf.eprintf "Tags below %s are : \n" (Tag.to_string tag) in
let _ = Ptset.Int.iter (fun i -> Printf.eprintf "%s " (Tag.to_string i)) tags_below in
let _ = Printf.eprintf "\n%!" in *)
+ let tags_below = Ptset.Int.remove tag tags_below in
let f_kind,first = choose_jump_down tree tags_below ca da a
and n_kind,next = if noright then (`NIL, fun _ _ -> Tree.nil )
else choose_jump_next tree tags_after sa fa a in
- let empty_res = null_result() in
- let cont =
- match f_kind,n_kind with
- | `NIL,`NIL ->
- (fun _ _ -> eval_fold2_slist fl_list t empty_res empty_res )
- | _,`NIL -> (
- match f_kind with
- |`TAG(tag) ->
- (fun t _ -> eval_fold2_slist fl_list t empty_res
- (loop_tag tag (first t) llist t))
- | `ANY ->
- (fun t _ -> eval_fold2_slist fl_list t empty_res
- (loop (first t) llist t))
- | _ -> assert false)
-
- | `NIL,_ -> (
- match n_kind with
- |`TAG(tag) ->
- (fun t ctx -> eval_fold2_slist fl_list t
- (loop_tag tag (next t ctx) rlist ctx) empty_res)
-
- | `ANY ->
- (fun t ctx -> eval_fold2_slist fl_list t
- (loop (next t ctx) rlist ctx) empty_res)
-
- | _ -> assert false)
-
- | `TAG(tag1),`TAG(tag2) ->
- (fun t ctx -> eval_fold2_slist fl_list t
- (loop (next t ctx) rlist ctx)
- (loop (first t) llist t))
-
- | `TAG(tag),`ANY ->
- (fun t ctx ->
- eval_fold2_slist fl_list t
- (loop (next t ctx) rlist ctx)
- (loop_tag tag (first t) llist t))
- | `ANY,`TAG(tag) ->
- (fun t ctx ->
- eval_fold2_slist fl_list t
- (loop_tag tag (next t ctx) rlist ctx)
- (loop (first t) llist t) )
- | `ANY,`ANY ->
- (fun t ctx ->
- eval_fold2_slist fl_list t
- (loop (next t ctx) rlist ctx)
- (loop (first t) llist t) )
- | _ -> assert false
- in
- let cont = D_IF_( (fun t ctx ->
- let a,b = cont t ctx in
- register_trace tree t (slist,a,fl_list,first,next,ctx);
- (a,b)
- ) ,cont)
- in
- (CachedTransTable.add td_trans (tag,slist) cont;cont)
- in cont t ctx
-
- in
- (if noright then loop_no_right else loop) t slist ctx
-
-
+ let empty_res = null_result in
+ let cont =
+ match f_kind,n_kind with
+ | `NIL,`NIL ->
+ (fun t _ -> eval_fold2_slist fl_list t empty_res empty_res )
+ | _,`NIL -> (
+ match f_kind with
+ |`TAG(tag) ->
+ (fun t _ -> eval_fold2_slist fl_list t empty_res
+ (loop_tag tag (first t) llist t))
+ | `ANY ->
+ (fun t _ -> eval_fold2_slist fl_list t empty_res
+ (loop (first t) llist t))
+ | _ -> assert false)
+
+ | `NIL,_ -> (
+ match n_kind with
+ |`TAG(tag) ->
+ (fun t ctx -> eval_fold2_slist fl_list t
+ (loop_tag tag (next t ctx) rlist ctx) empty_res)
+
+ | `ANY ->
+ (fun t ctx -> eval_fold2_slist fl_list t
+ (loop (next t ctx) rlist ctx) empty_res)
+
+ | _ -> assert false)
+
+ | `TAG(tag1),`TAG(tag2) ->
+ (fun t ctx -> eval_fold2_slist fl_list t
+ (loop_tag tag2 (next t ctx) rlist ctx)
+ (loop_tag tag1 (first t) llist t))
+
+ | `TAG(tag),`ANY ->
+ (fun t ctx -> eval_fold2_slist fl_list t
+ (loop (next t ctx) rlist ctx)
+ (loop_tag tag (first t) llist t))
+ | `ANY,`TAG(tag) ->
+ (fun t ctx ->
+ eval_fold2_slist fl_list t
+ (loop_tag tag (next t ctx) rlist ctx)
+ (loop (first t) llist t) )
+ | `ANY,`ANY ->
+ (fun t ctx ->
+ eval_fold2_slist fl_list t
+ (loop (next t ctx) rlist ctx)
+ (loop (first t) llist t) )
+ | _ -> assert false
+ in
+ let cont = D_IF_( (fun t ctx ->
+ let a,b = cont t ctx in
+ register_trace tree t (slist,a,fl_list,first,next,ctx);
+ (a,b)
+ ) ,cont)
+ in
+ (CachedTransTable.add td_trans (tag,slist) cont;cont)
+ in cont t ctx
+
+ in
+ (if noright then loop_no_right else loop) t slist ctx
+
+
let run_top_down a tree =
let init = SList.cons a.init SList.nil in
let _,res = top_down a tree Tree.root init Tree.root 1
let disabled_gc = { Gc.get() with
Gc.max_overhead = 1000000;
Gc.space_overhead = 100 }
-
-
-
-
+let hash x = 131*x/(x-1+1)
+let test_loop tree tag =
+ let t' = Tree.tagged_desc tree tag Tree.root in
+ let f = Hashtbl.create 4096
+ in
+ let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in
+ let g t ctx =
+ if t == Tree.nil then 0
+ else 1+ ((Hashtbl.find f (hash 101)) (jump t ctx) ctx)
+ in
+ Hashtbl.add f (hash 101) g;
+ (Hashtbl.find f (hash 101)) t' Tree.root
+let test_loop2 tree tag =
+ let t' = Tree.tagged_desc tree tag Tree.root in
+ let f = Hashtbl.create 4096
+ in
+ let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in
+ let rec g t ctx =
+ if t == Tree.nil then 0
+ else 1+ (match (Hashtbl.find f (hash 101)) with
+ `Foo ->g (jump t ctx) ctx
+ )
+ in
+ Hashtbl.add f (hash 101) `Foo;
+ g t' Tree.root
let main v query_string output =
-
+
let _ = Tag.init (Tree.tag_pool v) in
Printf.eprintf "Parsing query : ";
let query = try
with
Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1
in
- XPath.Ast.print Format.err_formatter query;
- Format.fprintf Format.err_formatter "\n%!";
- Printf.eprintf "Compiling query : ";
- let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in
- let _ = Ata.dump Format.err_formatter auto in
- let _ = Printf.eprintf "%!" in
- let jump_to =
- match contains with
- None -> (max_int,`NOTHING)
- | Some s ->
- let r = Tree.count v s
- in
- Printf.eprintf "%i documents in the TextCollection\n" (Tree.text_size v);
- Printf.eprintf "Global count is %i, using " r;
- if r < !Options.tc_threshold then begin
- Printf.eprintf "TextCollection contains\nCalling global contains : ";
- time (Tree.init_contains v) s;
- end
- else begin
- Printf.eprintf "Naive contains\nCalling global contains : ";
- time (Tree.init_naive_contains v) s
- end;(r,`CONTAINS(s))
- in
- let test_list = jump_to in
- (*
+ let _ = Printf.eprintf "Timing //keyword :" in
+ let r = time (test_loop v) (Tag.tag "keyword") in
+ let _ = Printf.eprintf "Count is %i\n%!" r in
+ let _ = Printf.eprintf "Timing //keyword 2:" in
+ let r = time (test_loop2 v) (Tag.tag "keyword") in
+ let _ = Printf.eprintf "Count is %i\n%!" r in
+ XPath.Ast.print Format.err_formatter query;
+ Format.fprintf Format.err_formatter "\n%!";
+ Printf.eprintf "Compiling query : ";
+ let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in
+ let _ = Ata.dump Format.err_formatter auto in
+ let _ = Printf.eprintf "%!" in
+ let jump_to =
+ match contains with
+ None -> (max_int,`NOTHING)
+ | Some s ->
+ let r = Tree.count v s
+ in
+ Printf.eprintf "%i documents in the TextCollection\n" (Tree.text_size v);
+ Printf.eprintf "Global count is %i, using " r;
+ if r < !Options.tc_threshold then begin
+ Printf.eprintf "TextCollection contains\nCalling global contains : ";
+ time (Tree.init_contains v) s;
+ end
+ else begin
+ Printf.eprintf "Naive contains\nCalling global contains : ";
+ time (Tree.init_naive_contains v) s
+ end;(r,`CONTAINS(s))
+ in
+ let test_list = jump_to in
+ (*
let test_list =
- if (!Options.backward) then begin
- Printf.eprintf "Finding min occurences : ";
- time
- ( List.fold_left (fun ((min_occ,kind)as acc) (tag,_) ->
+ if (!Options.backward) then begin
+ Printf.eprintf "Finding min occurences : ";
+ time
+ ( List.fold_left (fun ((min_occ,kind)as acc) (tag,_) ->
let numtags = Tree.subtree_tags v tag Tree.root in
if ((numtags < min_occ) && numtags >= 2)
then (numtags,`TAG(tag))
begin
let _ = Gc.full_major();Gc.compact() in
let _ = Printf.eprintf "%!" in
- (* let _ = Gc.set (disabled_gc) in *)
+ let _ = Gc.set (disabled_gc) in
if !Options.backward && ((snd test_list) != `NOTHING )then
let r = time (bottom_up_count auto v )(snd test_list) in