(******************************************************************************) (* SXSI : XPath evaluator *) (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) INCLUDE "debug.ml";; module State = struct type t = int let mk = let i = ref ~-1 in fun () -> incr i;!i let compare p q = p - q let equal p q = p==q let hash p = p let print fmt p = Format.fprintf fmt "<%.6i>" p end module ISet : Set.S with type elt = int= struct let max = Sys.word_size - 2 type t = int type elt = int let empty = 0 let full = -1 let is_empty x = x == 0 let mem e s = ((1 lsl e) land s) != 0 let add e s = (1 lsl e) lor s let singleton e = (1 lsl e) let union a b = a lor b let inter a b = a land b let diff a b = a land (lnot b) let remove e s = (lnot (1 lsl e) land s) let compare = (-) let equal = (==) let subset a b = a land (lnot b) == 0 let cardinal s = let rec loop n s = if s == 0 then n else loop (succ n) (s - (s land (-s))) in loop 0 s (* inverse of bit i = 1 lsl i i.e. tib i = log_2(i) *) let log2 = Array.create 255 0 let () = for i = 0 to 7 do log2.(1 lsl i) <- i done (* assumption: x is a power of 2 *) let tib32 x = if x land 0xFFFF == 0 then let x = x lsr 16 in if x land 0xFF == 0 then 24 + log2.(x lsr 8) else 16 + log2.(x) else if x land 0xFF == 0 then 8 + log2.(x lsr 8) else log2.(x) let ffffffff = (0xffff lsl 16) lor 0xffff let tib64 x = if x land ffffffff == 0 then 32 + tib32 (x lsr 32) else tib32 x let tib = match Sys.word_size with 32 -> tib32 | 64 -> tib64 | _ -> assert false let min_elt s = if s == 0 then raise Not_found; tib (s land (-s)) let choose = min_elt (* TODO: improve? *) let max_elt s = if s == 0 then raise Not_found; let rec loop i = if s land i != 0 then tib i else if i = 1 then raise Not_found else loop (i lsr 1) in loop min_int let rec elements s = if s == 0 then [] else let i = s land (-s) in tib i :: elements (s - i) let rec iter f s = if s != 0 then let i = s land (-s) in f (tib i); iter f (s - i) let rec fold f s acc = if s == 0 then acc else let i = s land (-s) in fold f (s - i) (f (tib i) acc) let rec for_all p s = s == 0 || let i = s land (-s) in p (tib i) && for_all p (s - i) let rec exists p s = s != 0 && let i = s land (-s) in p (tib i) || exists p (s - i) let rec filter p s = if s == 0 then 0 else let i = s land (-s) in let s = filter p (s - i) in if p (tib i) then s + i else s let rec partition p s = if s == 0 then 0, 0 else let i = s land (-s) in let st,sf = partition p (s - i) in if p (tib i) then st + i, sf else st, sf + i let split i s = let bi = 1 lsl i in s land (bi - 1), s land bi != 0, s land (-1 lsl (i+1)) end (* module SSet = Set.Make(State)*) module SSet = ISet module Transition = struct type t = Label of State.t * TagSet.Xml.t * State.t * State.t | External of State.t * (Tree.Binary.t -> bool)*State.t * State.t let source = function Label(s,_,_,_) | External(s,_,_,_) -> s let dest1 = function Label(_,_,d,_) | External(_,_,d,_) -> d let dest2 = function Label(_,_,_,d) | External(_,_,_,d) -> d let compatible t1 t2 = State.equal (source t1) (source t2) && State.equal (dest1 t1) (dest1 t2) && State.equal (dest2 t1) (dest2 t2) let check t1 t2 = if not (compatible t1 t2) then failwith "Incompatible transitions" let cup t1 t2 = check t1 t2; match (t1,t2) with | Label(s,ts,d1,d2), Label(_,ts',_,_) -> Label(s,TagSet.Xml.cup ts ts',d1,d2) | External(s,f,d1,d2), External(_,f',_,_) -> External(s,(fun x -> (f x)||(f' x)),d1,d2) | Label(s,ts,d1,d2), External(_,f,_,_) | External(_,f,_,_), Label(s,ts,d1,d2) -> External(s,(fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) ts)||f x),d1,d2) let cap t1 t2 = check t1 t2; match (t1,t2) with | Label(s,ts,d1,d2), Label(_,ts',_,_) -> Label(s,TagSet.Xml.cap ts ts',d1,d2) | External(s,f,d1,d2), External(_,f',_,_) -> External(s,(fun x -> (f x)&&(f' x)),d1,d2) | Label(s,ts,d1,d2), External(_,f,_,_) | External(_,f,_,_), Label(s,ts,d1,d2) -> External(s,(fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) ts)&& f x),d1,d2) let neg = function | Label(s,ts,d1,d2) -> Label(s,TagSet.Xml.neg ts,d1,d2) | External(s,f,d1,d2) -> External(s,(fun x -> not(f x)), d1 ,d2) let can_take t = function | Label(_,ts,_,_) -> TagSet.Xml.mem (Tree.Binary.tag t) ts | External(_,f,_,_) -> f t (* Hashtbl indexed by source State *) module HT = Hashtbl.Make(State) type hashtbl = { label : (TagSet.Xml.t*State.t*State.t) HT.t; extern : ((Tree.Binary.t-> bool)*State.t*State.t) HT.t; } let empty () = { label = HT.create 17; extern = HT.create 17; } let clear h = HT.clear h.label; HT.clear h.extern let add h = function | Label(s,t,d1,d2) -> HT.add h.label s (t,d1,d2) | External(s,f,d1,d2) -> HT.add h.extern s (f,d1,d2) let find_all ?(accu=[]) ?(pred_label=fun _ _ _ _ -> true) ?(pred_extern= fun _ _ _ _ -> true) h q = List.fold_left (fun acc (t,d1,d2) -> if pred_label q t d1 d2 then Label(q,t,d1,d2) :: acc else acc) (List.fold_left (fun acc (f,d1,d2) -> if pred_extern q f d1 d2 then External(q,f,d1,d2) :: acc else acc) accu (HT.find_all h.extern q)) (HT.find_all h.label q) let find_all_dest q h = HT.fold (fun source (t,q1,q2) acc -> if (State.equal q1 q || State.equal q2 q) then Label(source,t,q1,q2)::acc else acc) h.label (HT.fold (fun source (t,q1,q2) acc -> if (State.equal q1 q || State.equal q2 q) then External(source,t,q1,q2)::acc else acc) h.extern []) let fold_state f_lab f_ext h q acc = List.fold_left (fun acc (t,d1,d2) -> f_lab acc q t d1 d2) (List.fold_left (fun acc (f,d1,d2) -> f_ext acc q f d1 d2) acc (HT.find_all h.extern q)) (HT.find_all h.label q) end module BST = Set.Make(Tree.Binary) type t = { initial : SSet.t; final : SSet.t; transitions : Transition.hashtbl; marking : SSet.t; ignore : SSet.t; mutable result : BST.t; (* Statistics *) mutable numbt : int; mutable max_states : int; contains : (string,Tree.Binary.DocIdSet.t) Hashtbl.t; } let mk () = { initial = SSet.empty; final = SSet.empty; transitions = Transition.empty(); marking = SSet.empty; ignore = SSet.empty; result = BST.empty; numbt = 0; max_states = 0; contains = Hashtbl.create 37; };; let print_tags fmt l = let l = if TagSet.Xml.is_finite l then l else (Format.fprintf fmt "* \\ "; TagSet.Xml.neg l ) in Format.fprintf fmt "{ "; ignore(TagSet.Xml.fold (fun t first -> if not first then Format.fprintf fmt " ,"; Tag.print fmt t; false) l true); Format.fprintf fmt "}" let dump fmt auto = Format.fprintf fmt "----------------- Automaton dump -------------------\n"; Format.fprintf fmt "Initial states: "; SSet.iter (fun s -> State.print fmt s; Format.fprintf fmt " ") auto.initial; Format.fprintf fmt "\n"; Format.fprintf fmt "Final states: "; SSet.iter (fun s -> State.print fmt s; Format.fprintf fmt " ") auto.final; Format.fprintf fmt "\n"; Format.fprintf fmt "Marking states: "; SSet.iter (fun s -> State.print fmt s; Format.fprintf fmt " ") auto.marking; Format.fprintf fmt "\n"; Format.fprintf fmt "Ignore states: "; SSet.iter (fun s -> State.print fmt s; Format.fprintf fmt " ") auto.ignore; Format.fprintf fmt "\n"; Format.fprintf fmt "Transitions:\n"; Transition.HT.iter (fun source (l,dest1,dest2) -> State.print fmt source; Format.fprintf fmt "-> "; print_tags fmt l; Format.fprintf fmt "("; State.print fmt dest1; Format.fprintf fmt " ,"; State.print fmt dest2; Format.fprintf fmt ")\n") auto.transitions.Transition.label; Format.fprintf fmt "----------------------------------------------------\n" module BottomUp = struct exception Fail let pr_states fmt st = SSet.iter (fun s -> State.print fmt s; Format.fprintf fmt " ") st let err = Format.err_formatter let filter_map_rev filt map l = let rec loop ((accuf,accum) as accu) = function | [] -> accu | t::r -> loop (if filt t then (t::accuf,SSet.add (map t) accum) else accu) r in loop ([],SSet.empty) l let mem s x = SSet.mem x s let rec accepting_among ?(nobrother=false) ?(strings=None) auto t r = if SSet.is_empty r then r else match strings with | Some valid_strings when Tree.Binary.DocIdSet.for_all (fun i -> not(Tree.Binary.string_below t i) ) valid_strings -> SSet.empty | _ -> ( let to_ignore = SSet.inter auto.ignore r in let r = SSet.diff r to_ignore in let res = match Tree.Binary.descr t with | Tree.Binary.Nil -> SSet.inter r auto.final | Tree.Binary.String id -> ( match strings with | None -> SSet.inter r auto.final | Some valid_strings when (Tree.Binary.DocIdSet.mem id valid_strings) -> Printf.eprintf "Selecting string '%s'\n%!" (Tree.Binary.get_string t id); SSet.inter r auto.final | _ -> SSet.empty ) | Tree.Binary.Node(_) -> let t1 = Tree.Binary.left t and t2 = Tree.Binary.right t in let transitions = SSet.fold ( fun q accu -> Transition.fold_state (fun acc q t d1 d2 -> Transition.Label(q,t,d1,d2) :: acc) (fun acc q t d1 d2 -> Transition.External(q,t,d1,d2) :: acc) auto.transitions q accu) r [] in let transitions,r1 = filter_map_rev (Transition.can_take t) Transition.dest1 transitions in let s1 = accepting_among auto t1 r1 in let transitions,r2 = filter_map_rev (fun x->SSet.mem (Transition.dest1 x) s1) Transition.dest2 transitions in let s2 = if nobrother then SSet.union (SSet.inter r auto.final) to_ignore else accepting_among auto t2 r2 in let _,s = filter_map_rev (fun x -> SSet.mem (Transition.dest2 x) s2) (Transition.source) transitions in if SSet.is_empty s then s else (if SSet.exists (mem auto.marking) s1 || SSet.exists (mem auto.marking) s2 then auto.result <- BST.add t auto.result;s) in SSet.union to_ignore res) let accept ?(strings=None) auto t = auto.result <- BST.empty; if SSet.is_empty (accepting_among ~nobrother:true ~strings:strings auto t auto.initial) then false else true end module TopDown = struct let rec accept_at auto t q = if SSet.mem q auto.ignore then true else match Tree.Binary.descr t with | Tree.Binary.Nil | Tree.Binary.String _ -> SSet.mem q auto.final | Tree.Binary.Node(_) -> let tag = Tree.Binary.tag t and t1 = Tree.Binary.left t and t2 = Tree.Binary.right t in let transitions = Transition.find_all ~pred_label:(fun _ ts _ _ -> TagSet.Xml.mem tag ts) ~pred_extern:(fun _ f _ _ -> f t) auto.transitions q in let rec iter_trans res = function [] -> res | (Transition.Label(_,_,q1,q2) | Transition.External (_,_,q1,q2))::r -> let _ = auto.numbt <- succ auto.numbt in if (accept_at auto t1 q1) && (accept_at auto t2 q2) then begin if (SSet.mem q1 auto.marking)||(SSet.mem q2 auto.marking) then begin auto.result <- BST.add t auto.result; end; iter_trans true r end else iter_trans res r in iter_trans false transitions let accept auto t = auto.numbt <- -1; SSet.exists (fun q -> P(auto.numbt <- succ auto.numbt); auto.result <- BST.empty; accept_at auto t q) auto.initial let rec run_in auto t states = if SSet.is_empty states then () else match Tree.Binary.descr t with | Tree.Binary.Nil | Tree.Binary.String _ -> () | Tree.Binary.Node(_) -> let tag = Tree.Binary.tag t and t1 = Tree.Binary.left t and t2 = Tree.Binary.right t in P(let i = SSet.cardinal states in if i > auto.max_states then auto.max_states <- i); let s1,s2 = SSet.fold (fun q acc -> if SSet.mem q auto.ignore then acc else Transition.fold_state (fun (ss1,ss2) _ ts d1 d2 -> if TagSet.Xml.mem tag ts then (SSet.add d1 ss1, SSet.add d2 ss2) else (ss1,ss2)) (fun (ss1,ss2) _ f d1 d2 -> if f t then (SSet.add d1 ss1, SSet.add d2 ss2) else (ss1,ss2)) auto.transitions q acc ) states (SSet.empty,SSet.empty) in if SSet.is_empty (SSet.inter auto.marking (SSet.union s1 s2)) then () else auto.result <- BST.add t auto.result; run_in auto t1 s1; run_in auto t2 s2 let run auto t = auto.result <- BST.empty; P(auto.numbt <- 0); run_in auto t auto.initial end