-(******************************************************************************)
-(* SXSI : XPath evaluator *)
-(* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
-(* Copyright NICTA 2008 *)
-(* Distributed under the terms of the LGPL (see LICENCE) *)
-(******************************************************************************)
-INCLUDE "debug.ml";;
-
-module State =
-struct
- type t = int
- let mk = let i = ref ~-1 in fun () -> incr i;!i
- let compare p q = p - q
- let equal p q = p==q
- let hash p = p
- let print fmt p = Format.fprintf fmt "<%.6i>" p
-end
-
-module ISet : Set.S with type elt = int=
-struct
- let max = Sys.word_size - 2
- type t = int
- type elt = int
-
- let empty = 0
- let full = -1
- let is_empty x = x == 0
- let mem e s = ((1 lsl e) land s) != 0
- let add e s = (1 lsl e) lor s
- let singleton e = (1 lsl e)
- let union a b = a lor b
- let inter a b = a land b
- let diff a b = a land (lnot b)
- let remove e s = (lnot (1 lsl e) land s)
- let compare = (-)
- let equal = (==)
- let subset a b = a land (lnot b) == 0
- let cardinal s =
- let rec loop n s =
- if s == 0 then n else loop (succ n) (s - (s land (-s)))
- in
- loop 0 s
-(* inverse of bit i = 1 lsl i i.e. tib i = log_2(i) *)
-let log2 = Array.create 255 0
-let () = for i = 0 to 7 do log2.(1 lsl i) <- i done
-
-(* assumption: x is a power of 2 *)
-let tib32 x =
- if x land 0xFFFF == 0 then
- let x = x lsr 16 in
- if x land 0xFF == 0 then 24 + log2.(x lsr 8) else 16 + log2.(x)
- else
- if x land 0xFF == 0 then 8 + log2.(x lsr 8) else log2.(x)
-
-let ffffffff = (0xffff lsl 16) lor 0xffff
-let tib64 x =
- if x land ffffffff == 0 then 32 + tib32 (x lsr 32) else tib32 x
-
-let tib =
- match Sys.word_size with 32 -> tib32 | 64 -> tib64 | _ -> assert false
-
-let min_elt s =
- if s == 0 then raise Not_found;
- tib (s land (-s))
-
-let choose = min_elt
-
-(* TODO: improve? *)
-let max_elt s =
- if s == 0 then raise Not_found;
- let rec loop i =
- if s land i != 0 then tib i
- else if i = 1 then raise Not_found else loop (i lsr 1)
- in
- loop min_int
-
-let rec elements s =
- if s == 0 then [] else let i = s land (-s) in tib i :: elements (s - i)
-
-let rec iter f s =
- if s != 0 then let i = s land (-s) in f (tib i); iter f (s - i)
-
-let rec fold f s acc =
- if s == 0 then acc else let i = s land (-s) in fold f (s - i) (f (tib i) acc)
-
-let rec for_all p s =
- s == 0 || let i = s land (-s) in p (tib i) && for_all p (s - i)
-
-let rec exists p s =
- s != 0 && let i = s land (-s) in p (tib i) || exists p (s - i)
-
-let rec filter p s =
- if s == 0 then
- 0
- else
- let i = s land (-s) in
- let s = filter p (s - i) in
- if p (tib i) then s + i else s
-
-let rec partition p s =
- if s == 0 then
- 0, 0
- else
- let i = s land (-s) in
- let st,sf = partition p (s - i) in
- if p (tib i) then st + i, sf else st, sf + i
-
-let split i s =
- let bi = 1 lsl i in
- s land (bi - 1), s land bi != 0, s land (-1 lsl (i+1))
-
-
-end
-(* module SSet = Set.Make(State)*)
-module SSet = ISet
-
-module Transition =
-struct
-
- type t = Label of State.t * TagSet.Xml.t * State.t * State.t
- | External of State.t * (Tree.Binary.t -> bool)*State.t * State.t
-
- let source = function Label(s,_,_,_) | External(s,_,_,_) -> s
- let dest1 = function Label(_,_,d,_) | External(_,_,d,_) -> d
- let dest2 = function Label(_,_,_,d) | External(_,_,_,d) -> d
-
- let compatible t1 t2 =
- State.equal (source t1) (source t2)
- && State.equal (dest1 t1) (dest1 t2)
- && State.equal (dest2 t1) (dest2 t2)
-
- let check t1 t2 =
- if not (compatible t1 t2)
- then failwith "Incompatible transitions"
-
- let cup t1 t2 =
- check t1 t2;
- match (t1,t2) with
- | Label(s,ts,d1,d2), Label(_,ts',_,_) -> Label(s,TagSet.Xml.cup ts ts',d1,d2)
- | External(s,f,d1,d2), External(_,f',_,_) -> External(s,(fun x -> (f x)||(f' x)),d1,d2)
- | Label(s,ts,d1,d2), External(_,f,_,_)
- | External(_,f,_,_), Label(s,ts,d1,d2) -> External(s,(fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) ts)||f x),d1,d2)
-
- let cap t1 t2 =
- check t1 t2;
- match (t1,t2) with
- | Label(s,ts,d1,d2), Label(_,ts',_,_) -> Label(s,TagSet.Xml.cap ts ts',d1,d2)
- | External(s,f,d1,d2), External(_,f',_,_) -> External(s,(fun x -> (f x)&&(f' x)),d1,d2)
- | Label(s,ts,d1,d2), External(_,f,_,_)
- | External(_,f,_,_), Label(s,ts,d1,d2) -> External(s,(fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) ts)&& f x),d1,d2)
-
- let neg = function
- | Label(s,ts,d1,d2) -> Label(s,TagSet.Xml.neg ts,d1,d2)
- | External(s,f,d1,d2) -> External(s,(fun x -> not(f x)), d1 ,d2)
-
-
- let can_take t = function
- | Label(_,ts,_,_) -> TagSet.Xml.mem (Tree.Binary.tag t) ts
- | External(_,f,_,_) -> f t
-
- (* Hashtbl indexed by source State *)
- module HT = Hashtbl.Make(State)
-
-
-
- type hashtbl = { label : (TagSet.Xml.t*State.t*State.t) HT.t;
- extern : ((Tree.Binary.t-> bool)*State.t*State.t) HT.t;
- }
-
-
- let empty () = { label = HT.create 17;
- extern = HT.create 17;
- }
-
- let clear h = HT.clear h.label; HT.clear h.extern
-
- let add h = function
- | Label(s,t,d1,d2) -> HT.add h.label s (t,d1,d2)
- | External(s,f,d1,d2) -> HT.add h.extern s (f,d1,d2)
-
- let find_all ?(accu=[]) ?(pred_label=fun _ _ _ _ -> true) ?(pred_extern= fun _ _ _ _ -> true) h q =
- List.fold_left
- (fun acc (t,d1,d2) ->
- if pred_label q t d1 d2
- then Label(q,t,d1,d2) :: acc
- else acc)
- (List.fold_left
- (fun acc (f,d1,d2) ->
- if pred_extern q f d1 d2
- then External(q,f,d1,d2) :: acc
- else acc)
- accu
- (HT.find_all h.extern q))
- (HT.find_all h.label q)
-
- let find_all_dest q h =
- HT.fold (fun source (t,q1,q2) acc ->
- if (State.equal q1 q || State.equal q2 q)
- then Label(source,t,q1,q2)::acc
- else acc) h.label
- (HT.fold (fun source (t,q1,q2) acc ->
- if (State.equal q1 q || State.equal q2 q)
- then External(source,t,q1,q2)::acc
- else acc) h.extern [])
-
-
- let fold_state f_lab f_ext h q acc =
- List.fold_left
- (fun acc (t,d1,d2) ->
- f_lab acc q t d1 d2)
- (List.fold_left
- (fun acc (f,d1,d2) ->
- f_ext acc q f d1 d2)
- acc
- (HT.find_all h.extern q))
- (HT.find_all h.label q)
-
-
-end
-module BST = Set.Make(Tree.Binary)
-
-type t = { initial : SSet.t;
- final : SSet.t;
- transitions : Transition.hashtbl;
- marking : SSet.t;
- ignore : SSet.t;
- mutable result : BST.t;
- (* Statistics *)
- mutable numbt : int;
- mutable max_states : int;
- contains : (string,Tree.Binary.DocIdSet.t) Hashtbl.t;
- }
-
-let mk () = { initial = SSet.empty;
- final = SSet.empty;
- transitions = Transition.empty();
- marking = SSet.empty;
- ignore = SSet.empty;
- result = BST.empty;
- numbt = 0;
- max_states = 0;
- contains = Hashtbl.create 37;
-
- };;
-
- let print_tags fmt l =
- let l =
- if TagSet.Xml.is_finite l then l
- else (Format.fprintf fmt "* \\ "; TagSet.Xml.neg l )
- in
- Format.fprintf fmt "{ ";
- ignore(TagSet.Xml.fold (fun t first ->
- if not first
- then Format.fprintf fmt " ,";
- Tag.print fmt t; false) l true);
- Format.fprintf fmt "}"
-
- let dump fmt auto =
- Format.fprintf fmt "----------------- Automaton dump -------------------\n";
- Format.fprintf fmt "Initial states: ";
- SSet.iter (fun s -> State.print fmt s;
- Format.fprintf fmt " ") auto.initial;
- Format.fprintf fmt "\n";
- Format.fprintf fmt "Final states: ";
- SSet.iter (fun s -> State.print fmt s;
- Format.fprintf fmt " ") auto.final;
- Format.fprintf fmt "\n";
- Format.fprintf fmt "Marking states: ";
- SSet.iter (fun s -> State.print fmt s;
- Format.fprintf fmt " ") auto.marking;
- Format.fprintf fmt "\n";
- Format.fprintf fmt "Ignore states: ";
- SSet.iter (fun s -> State.print fmt s;
- Format.fprintf fmt " ") auto.ignore;
- Format.fprintf fmt "\n";
- Format.fprintf fmt "Transitions:\n";
- Transition.HT.iter (fun source (l,dest1,dest2) ->
- State.print fmt source;
- Format.fprintf fmt "-> ";
- print_tags fmt l;
- Format.fprintf fmt "(";
- State.print fmt dest1;
- Format.fprintf fmt " ,";
- State.print fmt dest2;
- Format.fprintf fmt ")\n") auto.transitions.Transition.label;
- Format.fprintf fmt "----------------------------------------------------\n"
-
-
-
-module BottomUp =
-struct
-
- exception Fail
-
- let pr_states fmt st = SSet.iter (fun s -> State.print fmt s;
- Format.fprintf fmt " ") st
-
- let err = Format.err_formatter
- let filter_map_rev filt map l =
- let rec loop ((accuf,accum) as accu) = function
- | [] -> accu
- | t::r -> loop (if filt t then (t::accuf,SSet.add (map t) accum)
- else accu) r
- in
- loop ([],SSet.empty) l
-
- let mem s x = SSet.mem x s
-
-
- let rec accepting_among ?(nobrother=false) ?(strings=None) auto t r =
- if SSet.is_empty r then r else
- match strings with
- | Some valid_strings when Tree.Binary.DocIdSet.for_all (fun i -> not(Tree.Binary.string_below t i)
- ) valid_strings
- -> SSet.empty
- | _ -> (
-
- let to_ignore = SSet.inter auto.ignore r in
- let r = SSet.diff r to_ignore
- in
- let res =
- match Tree.Binary.descr t with
- | Tree.Binary.Nil -> SSet.inter r auto.final
- | Tree.Binary.String id -> (
- match strings with
- | None -> SSet.inter r auto.final
- | Some valid_strings when (Tree.Binary.DocIdSet.mem id valid_strings)
- -> Printf.eprintf "Selecting string '%s'\n%!" (Tree.Binary.get_string t id);
- SSet.inter r auto.final
- | _ -> SSet.empty
- )
- | Tree.Binary.Node(_) ->
- let t1 = Tree.Binary.left t
- and t2 = Tree.Binary.right t
- in
- let transitions =
- SSet.fold
- ( fun q accu ->
- Transition.fold_state
- (fun acc q t d1 d2 -> Transition.Label(q,t,d1,d2) :: acc)
- (fun acc q t d1 d2 -> Transition.External(q,t,d1,d2) :: acc)
- auto.transitions q accu) r []
- in
- let transitions,r1 =
- filter_map_rev
- (Transition.can_take t)
- Transition.dest1 transitions
- in
- let s1 = accepting_among auto t1 r1
- in
- let transitions,r2 =
- filter_map_rev
- (fun x->SSet.mem (Transition.dest1 x) s1)
- Transition.dest2 transitions
- in
- let s2 = if nobrother then SSet.union (SSet.inter r auto.final) to_ignore
- else accepting_among auto t2 r2
- in
- let _,s = filter_map_rev
- (fun x -> SSet.mem (Transition.dest2 x) s2)
- (Transition.source) transitions
- in
- if SSet.is_empty s then s
- else
- (if SSet.exists (mem auto.marking) s1 || SSet.exists (mem auto.marking) s2
- then auto.result <- BST.add t auto.result;s)
- in SSet.union to_ignore res)
-
-
- let accept ?(strings=None) auto t =
- auto.result <- BST.empty;
- if SSet.is_empty (accepting_among ~nobrother:true ~strings:strings auto t auto.initial)
- then false
- else true
-end
-
-module TopDown = struct
- let rec accept_at auto t q =
- if SSet.mem q auto.ignore then true
- else
- match Tree.Binary.descr t with
- | Tree.Binary.Nil | Tree.Binary.String _ -> SSet.mem q auto.final
- | Tree.Binary.Node(_) ->
- let tag = Tree.Binary.tag t
- and t1 = Tree.Binary.left t
- and t2 = Tree.Binary.right t
- in
- let transitions =
- Transition.find_all
- ~pred_label:(fun _ ts _ _ -> TagSet.Xml.mem tag ts)
- ~pred_extern:(fun _ f _ _ -> f t)
- auto.transitions q
- in
- let rec iter_trans res = function
- [] -> res
- | (Transition.Label(_,_,q1,q2) | Transition.External (_,_,q1,q2))::r ->
- let _ = auto.numbt <- succ auto.numbt in
- if (accept_at auto t1 q1) && (accept_at auto t2 q2)
- then
- begin
- if (SSet.mem q1 auto.marking)||(SSet.mem q2 auto.marking)
- then
- begin
- auto.result <- BST.add t auto.result;
- end;
- iter_trans true r
- end
- else
- iter_trans res r
- in iter_trans false transitions
-
-
-
-
- let accept auto t =
- auto.numbt <- -1;
- SSet.exists (fun q ->
- P(auto.numbt <- succ auto.numbt);
- auto.result <- BST.empty;
- accept_at auto t q) auto.initial
-
-
- let rec run_in auto t states =
- if SSet.is_empty states then ()
- else
- match Tree.Binary.descr t with
- | Tree.Binary.Nil | Tree.Binary.String _ -> ()
- | Tree.Binary.Node(_) ->
- let tag = Tree.Binary.tag t
- and t1 = Tree.Binary.left t
- and t2 = Tree.Binary.right t
- in
- P(let i = SSet.cardinal states in
- if i > auto.max_states then auto.max_states <- i);
- let s1,s2 =
- SSet.fold
- (fun q acc ->
- if SSet.mem q auto.ignore then acc
- else
- Transition.fold_state
- (fun (ss1,ss2) _ ts d1 d2 ->
- if TagSet.Xml.mem tag ts
- then
- (SSet.add d1 ss1,
- SSet.add d2 ss2)
- else (ss1,ss2))
- (fun (ss1,ss2) _ f d1 d2 ->
- if f t
- then
- (SSet.add d1 ss1,
- SSet.add d2 ss2)
- else (ss1,ss2)) auto.transitions q acc ) states (SSet.empty,SSet.empty)
- in
- if SSet.is_empty (SSet.inter auto.marking (SSet.union s1 s2))
- then ()
- else auto.result <- BST.add t auto.result;
- run_in auto t1 s1;
- run_in auto t2 s2
-
-
- let run auto t =
- auto.result <- BST.empty;
- P(auto.numbt <- 0);
-
- run_in auto t auto.initial
-
-end
-