X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=automaton.ml;fp=automaton.ml;h=0000000000000000000000000000000000000000;hb=83aa6cf8a120ea6681402ce42ae56631fca1ddf4;hp=0c815a4ee53a3f73580cbe73738477ad2fae57b3;hpb=4680fa5b41156d70f0fde69981f0d241184b19d9;p=SXSI%2Fxpathcomp.git diff --git a/automaton.ml b/automaton.ml deleted file mode 100644 index 0c815a4..0000000 --- a/automaton.ml +++ /dev/null @@ -1,468 +0,0 @@ -(******************************************************************************) -(* SXSI : XPath evaluator *) -(* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) -(* Copyright NICTA 2008 *) -(* Distributed under the terms of the LGPL (see LICENCE) *) -(******************************************************************************) -INCLUDE "debug.ml";; - -module State = -struct - type t = int - let mk = let i = ref ~-1 in fun () -> incr i;!i - let compare p q = p - q - let equal p q = p==q - let hash p = p - let print fmt p = Format.fprintf fmt "<%.6i>" p -end - -module ISet : Set.S with type elt = int= -struct - let max = Sys.word_size - 2 - type t = int - type elt = int - - let empty = 0 - let full = -1 - let is_empty x = x == 0 - let mem e s = ((1 lsl e) land s) != 0 - let add e s = (1 lsl e) lor s - let singleton e = (1 lsl e) - let union a b = a lor b - let inter a b = a land b - let diff a b = a land (lnot b) - let remove e s = (lnot (1 lsl e) land s) - let compare = (-) - let equal = (==) - let subset a b = a land (lnot b) == 0 - let cardinal s = - let rec loop n s = - if s == 0 then n else loop (succ n) (s - (s land (-s))) - in - loop 0 s -(* inverse of bit i = 1 lsl i i.e. tib i = log_2(i) *) -let log2 = Array.create 255 0 -let () = for i = 0 to 7 do log2.(1 lsl i) <- i done - -(* assumption: x is a power of 2 *) -let tib32 x = - if x land 0xFFFF == 0 then - let x = x lsr 16 in - if x land 0xFF == 0 then 24 + log2.(x lsr 8) else 16 + log2.(x) - else - if x land 0xFF == 0 then 8 + log2.(x lsr 8) else log2.(x) - -let ffffffff = (0xffff lsl 16) lor 0xffff -let tib64 x = - if x land ffffffff == 0 then 32 + tib32 (x lsr 32) else tib32 x - -let tib = - match Sys.word_size with 32 -> tib32 | 64 -> tib64 | _ -> assert false - -let min_elt s = - if s == 0 then raise Not_found; - tib (s land (-s)) - -let choose = min_elt - -(* TODO: improve? *) -let max_elt s = - if s == 0 then raise Not_found; - let rec loop i = - if s land i != 0 then tib i - else if i = 1 then raise Not_found else loop (i lsr 1) - in - loop min_int - -let rec elements s = - if s == 0 then [] else let i = s land (-s) in tib i :: elements (s - i) - -let rec iter f s = - if s != 0 then let i = s land (-s) in f (tib i); iter f (s - i) - -let rec fold f s acc = - if s == 0 then acc else let i = s land (-s) in fold f (s - i) (f (tib i) acc) - -let rec for_all p s = - s == 0 || let i = s land (-s) in p (tib i) && for_all p (s - i) - -let rec exists p s = - s != 0 && let i = s land (-s) in p (tib i) || exists p (s - i) - -let rec filter p s = - if s == 0 then - 0 - else - let i = s land (-s) in - let s = filter p (s - i) in - if p (tib i) then s + i else s - -let rec partition p s = - if s == 0 then - 0, 0 - else - let i = s land (-s) in - let st,sf = partition p (s - i) in - if p (tib i) then st + i, sf else st, sf + i - -let split i s = - let bi = 1 lsl i in - s land (bi - 1), s land bi != 0, s land (-1 lsl (i+1)) - - -end -(* module SSet = Set.Make(State)*) -module SSet = ISet - -module Transition = -struct - - type t = Label of State.t * TagSet.Xml.t * State.t * State.t - | External of State.t * (Tree.Binary.t -> bool)*State.t * State.t - - let source = function Label(s,_,_,_) | External(s,_,_,_) -> s - let dest1 = function Label(_,_,d,_) | External(_,_,d,_) -> d - let dest2 = function Label(_,_,_,d) | External(_,_,_,d) -> d - - let compatible t1 t2 = - State.equal (source t1) (source t2) - && State.equal (dest1 t1) (dest1 t2) - && State.equal (dest2 t1) (dest2 t2) - - let check t1 t2 = - if not (compatible t1 t2) - then failwith "Incompatible transitions" - - let cup t1 t2 = - check t1 t2; - match (t1,t2) with - | Label(s,ts,d1,d2), Label(_,ts',_,_) -> Label(s,TagSet.Xml.cup ts ts',d1,d2) - | External(s,f,d1,d2), External(_,f',_,_) -> External(s,(fun x -> (f x)||(f' x)),d1,d2) - | Label(s,ts,d1,d2), External(_,f,_,_) - | External(_,f,_,_), Label(s,ts,d1,d2) -> External(s,(fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) ts)||f x),d1,d2) - - let cap t1 t2 = - check t1 t2; - match (t1,t2) with - | Label(s,ts,d1,d2), Label(_,ts',_,_) -> Label(s,TagSet.Xml.cap ts ts',d1,d2) - | External(s,f,d1,d2), External(_,f',_,_) -> External(s,(fun x -> (f x)&&(f' x)),d1,d2) - | Label(s,ts,d1,d2), External(_,f,_,_) - | External(_,f,_,_), Label(s,ts,d1,d2) -> External(s,(fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) ts)&& f x),d1,d2) - - let neg = function - | Label(s,ts,d1,d2) -> Label(s,TagSet.Xml.neg ts,d1,d2) - | External(s,f,d1,d2) -> External(s,(fun x -> not(f x)), d1 ,d2) - - - let can_take t = function - | Label(_,ts,_,_) -> TagSet.Xml.mem (Tree.Binary.tag t) ts - | External(_,f,_,_) -> f t - - (* Hashtbl indexed by source State *) - module HT = Hashtbl.Make(State) - - - - type hashtbl = { label : (TagSet.Xml.t*State.t*State.t) HT.t; - extern : ((Tree.Binary.t-> bool)*State.t*State.t) HT.t; - } - - - let empty () = { label = HT.create 17; - extern = HT.create 17; - } - - let clear h = HT.clear h.label; HT.clear h.extern - - let add h = function - | Label(s,t,d1,d2) -> HT.add h.label s (t,d1,d2) - | External(s,f,d1,d2) -> HT.add h.extern s (f,d1,d2) - - let find_all ?(accu=[]) ?(pred_label=fun _ _ _ _ -> true) ?(pred_extern= fun _ _ _ _ -> true) h q = - List.fold_left - (fun acc (t,d1,d2) -> - if pred_label q t d1 d2 - then Label(q,t,d1,d2) :: acc - else acc) - (List.fold_left - (fun acc (f,d1,d2) -> - if pred_extern q f d1 d2 - then External(q,f,d1,d2) :: acc - else acc) - accu - (HT.find_all h.extern q)) - (HT.find_all h.label q) - - let find_all_dest q h = - HT.fold (fun source (t,q1,q2) acc -> - if (State.equal q1 q || State.equal q2 q) - then Label(source,t,q1,q2)::acc - else acc) h.label - (HT.fold (fun source (t,q1,q2) acc -> - if (State.equal q1 q || State.equal q2 q) - then External(source,t,q1,q2)::acc - else acc) h.extern []) - - - let fold_state f_lab f_ext h q acc = - List.fold_left - (fun acc (t,d1,d2) -> - f_lab acc q t d1 d2) - (List.fold_left - (fun acc (f,d1,d2) -> - f_ext acc q f d1 d2) - acc - (HT.find_all h.extern q)) - (HT.find_all h.label q) - - -end -module BST = Set.Make(Tree.Binary) - -type t = { initial : SSet.t; - final : SSet.t; - transitions : Transition.hashtbl; - marking : SSet.t; - ignore : SSet.t; - mutable result : BST.t; - (* Statistics *) - mutable numbt : int; - mutable max_states : int; - contains : (string,Tree.Binary.DocIdSet.t) Hashtbl.t; - } - -let mk () = { initial = SSet.empty; - final = SSet.empty; - transitions = Transition.empty(); - marking = SSet.empty; - ignore = SSet.empty; - result = BST.empty; - numbt = 0; - max_states = 0; - contains = Hashtbl.create 37; - - };; - - let print_tags fmt l = - let l = - if TagSet.Xml.is_finite l then l - else (Format.fprintf fmt "* \\ "; TagSet.Xml.neg l ) - in - Format.fprintf fmt "{ "; - ignore(TagSet.Xml.fold (fun t first -> - if not first - then Format.fprintf fmt " ,"; - Tag.print fmt t; false) l true); - Format.fprintf fmt "}" - - let dump fmt auto = - Format.fprintf fmt "----------------- Automaton dump -------------------\n"; - Format.fprintf fmt "Initial states: "; - SSet.iter (fun s -> State.print fmt s; - Format.fprintf fmt " ") auto.initial; - Format.fprintf fmt "\n"; - Format.fprintf fmt "Final states: "; - SSet.iter (fun s -> State.print fmt s; - Format.fprintf fmt " ") auto.final; - Format.fprintf fmt "\n"; - Format.fprintf fmt "Marking states: "; - SSet.iter (fun s -> State.print fmt s; - Format.fprintf fmt " ") auto.marking; - Format.fprintf fmt "\n"; - Format.fprintf fmt "Ignore states: "; - SSet.iter (fun s -> State.print fmt s; - Format.fprintf fmt " ") auto.ignore; - Format.fprintf fmt "\n"; - Format.fprintf fmt "Transitions:\n"; - Transition.HT.iter (fun source (l,dest1,dest2) -> - State.print fmt source; - Format.fprintf fmt "-> "; - print_tags fmt l; - Format.fprintf fmt "("; - State.print fmt dest1; - Format.fprintf fmt " ,"; - State.print fmt dest2; - Format.fprintf fmt ")\n") auto.transitions.Transition.label; - Format.fprintf fmt "----------------------------------------------------\n" - - - -module BottomUp = -struct - - exception Fail - - let pr_states fmt st = SSet.iter (fun s -> State.print fmt s; - Format.fprintf fmt " ") st - - let err = Format.err_formatter - let filter_map_rev filt map l = - let rec loop ((accuf,accum) as accu) = function - | [] -> accu - | t::r -> loop (if filt t then (t::accuf,SSet.add (map t) accum) - else accu) r - in - loop ([],SSet.empty) l - - let mem s x = SSet.mem x s - - - let rec accepting_among ?(nobrother=false) ?(strings=None) auto t r = - if SSet.is_empty r then r else - match strings with - | Some valid_strings when Tree.Binary.DocIdSet.for_all (fun i -> not(Tree.Binary.string_below t i) - ) valid_strings - -> SSet.empty - | _ -> ( - - let to_ignore = SSet.inter auto.ignore r in - let r = SSet.diff r to_ignore - in - let res = - match Tree.Binary.descr t with - | Tree.Binary.Nil -> SSet.inter r auto.final - | Tree.Binary.String id -> ( - match strings with - | None -> SSet.inter r auto.final - | Some valid_strings when (Tree.Binary.DocIdSet.mem id valid_strings) - -> Printf.eprintf "Selecting string '%s'\n%!" (Tree.Binary.get_string t id); - SSet.inter r auto.final - | _ -> SSet.empty - ) - | Tree.Binary.Node(_) -> - let t1 = Tree.Binary.left t - and t2 = Tree.Binary.right t - in - let transitions = - SSet.fold - ( fun q accu -> - Transition.fold_state - (fun acc q t d1 d2 -> Transition.Label(q,t,d1,d2) :: acc) - (fun acc q t d1 d2 -> Transition.External(q,t,d1,d2) :: acc) - auto.transitions q accu) r [] - in - let transitions,r1 = - filter_map_rev - (Transition.can_take t) - Transition.dest1 transitions - in - let s1 = accepting_among auto t1 r1 - in - let transitions,r2 = - filter_map_rev - (fun x->SSet.mem (Transition.dest1 x) s1) - Transition.dest2 transitions - in - let s2 = if nobrother then SSet.union (SSet.inter r auto.final) to_ignore - else accepting_among auto t2 r2 - in - let _,s = filter_map_rev - (fun x -> SSet.mem (Transition.dest2 x) s2) - (Transition.source) transitions - in - if SSet.is_empty s then s - else - (if SSet.exists (mem auto.marking) s1 || SSet.exists (mem auto.marking) s2 - then auto.result <- BST.add t auto.result;s) - in SSet.union to_ignore res) - - - let accept ?(strings=None) auto t = - auto.result <- BST.empty; - if SSet.is_empty (accepting_among ~nobrother:true ~strings:strings auto t auto.initial) - then false - else true -end - -module TopDown = struct - let rec accept_at auto t q = - if SSet.mem q auto.ignore then true - else - match Tree.Binary.descr t with - | Tree.Binary.Nil | Tree.Binary.String _ -> SSet.mem q auto.final - | Tree.Binary.Node(_) -> - let tag = Tree.Binary.tag t - and t1 = Tree.Binary.left t - and t2 = Tree.Binary.right t - in - let transitions = - Transition.find_all - ~pred_label:(fun _ ts _ _ -> TagSet.Xml.mem tag ts) - ~pred_extern:(fun _ f _ _ -> f t) - auto.transitions q - in - let rec iter_trans res = function - [] -> res - | (Transition.Label(_,_,q1,q2) | Transition.External (_,_,q1,q2))::r -> - let _ = auto.numbt <- succ auto.numbt in - if (accept_at auto t1 q1) && (accept_at auto t2 q2) - then - begin - if (SSet.mem q1 auto.marking)||(SSet.mem q2 auto.marking) - then - begin - auto.result <- BST.add t auto.result; - end; - iter_trans true r - end - else - iter_trans res r - in iter_trans false transitions - - - - - let accept auto t = - auto.numbt <- -1; - SSet.exists (fun q -> - P(auto.numbt <- succ auto.numbt); - auto.result <- BST.empty; - accept_at auto t q) auto.initial - - - let rec run_in auto t states = - if SSet.is_empty states then () - else - match Tree.Binary.descr t with - | Tree.Binary.Nil | Tree.Binary.String _ -> () - | Tree.Binary.Node(_) -> - let tag = Tree.Binary.tag t - and t1 = Tree.Binary.left t - and t2 = Tree.Binary.right t - in - P(let i = SSet.cardinal states in - if i > auto.max_states then auto.max_states <- i); - let s1,s2 = - SSet.fold - (fun q acc -> - if SSet.mem q auto.ignore then acc - else - Transition.fold_state - (fun (ss1,ss2) _ ts d1 d2 -> - if TagSet.Xml.mem tag ts - then - (SSet.add d1 ss1, - SSet.add d2 ss2) - else (ss1,ss2)) - (fun (ss1,ss2) _ f d1 d2 -> - if f t - then - (SSet.add d1 ss1, - SSet.add d2 ss2) - else (ss1,ss2)) auto.transitions q acc ) states (SSet.empty,SSet.empty) - in - if SSet.is_empty (SSet.inter auto.marking (SSet.union s1 s2)) - then () - else auto.result <- BST.add t auto.result; - run_in auto t1 s1; - run_in auto t2 s2 - - - let run auto t = - auto.result <- BST.empty; - P(auto.numbt <- 0); - - run_in auto t auto.initial - -end -