1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
12 let mk = let i = ref ~-1 in fun () -> incr i;!i
13 let compare p q = p - q
16 let print fmt p = Format.fprintf fmt "<%.6i>" p
19 module ISet : Set.S with type elt = int=
21 let max = Sys.word_size - 2
27 let is_empty x = x == 0
28 let mem e s = ((1 lsl e) land s) != 0
29 let add e s = (1 lsl e) lor s
30 let singleton e = (1 lsl e)
31 let union a b = a lor b
32 let inter a b = a land b
33 let diff a b = a land (lnot b)
34 let remove e s = (lnot (1 lsl e) land s)
37 let subset a b = a land (lnot b) == 0
40 if s == 0 then n else loop (succ n) (s - (s land (-s)))
43 (* inverse of bit i = 1 lsl i i.e. tib i = log_2(i) *)
44 let log2 = Array.create 255 0
45 let () = for i = 0 to 7 do log2.(1 lsl i) <- i done
47 (* assumption: x is a power of 2 *)
49 if x land 0xFFFF == 0 then
51 if x land 0xFF == 0 then 24 + log2.(x lsr 8) else 16 + log2.(x)
53 if x land 0xFF == 0 then 8 + log2.(x lsr 8) else log2.(x)
55 let ffffffff = (0xffff lsl 16) lor 0xffff
57 if x land ffffffff == 0 then 32 + tib32 (x lsr 32) else tib32 x
60 match Sys.word_size with 32 -> tib32 | 64 -> tib64 | _ -> assert false
63 if s == 0 then raise Not_found;
70 if s == 0 then raise Not_found;
72 if s land i != 0 then tib i
73 else if i = 1 then raise Not_found else loop (i lsr 1)
78 if s == 0 then [] else let i = s land (-s) in tib i :: elements (s - i)
81 if s != 0 then let i = s land (-s) in f (tib i); iter f (s - i)
83 let rec fold f s acc =
84 if s == 0 then acc else let i = s land (-s) in fold f (s - i) (f (tib i) acc)
87 s == 0 || let i = s land (-s) in p (tib i) && for_all p (s - i)
90 s != 0 && let i = s land (-s) in p (tib i) || exists p (s - i)
96 let i = s land (-s) in
97 let s = filter p (s - i) in
98 if p (tib i) then s + i else s
100 let rec partition p s =
104 let i = s land (-s) in
105 let st,sf = partition p (s - i) in
106 if p (tib i) then st + i, sf else st, sf + i
110 s land (bi - 1), s land bi != 0, s land (-1 lsl (i+1))
114 (* module SSet = Set.Make(State)*)
120 type t = Label of State.t * TagSet.Xml.t * State.t * State.t
121 | External of State.t * (Tree.Binary.t -> bool)*State.t * State.t
123 let source = function Label(s,_,_,_) | External(s,_,_,_) -> s
124 let dest1 = function Label(_,_,d,_) | External(_,_,d,_) -> d
125 let dest2 = function Label(_,_,_,d) | External(_,_,_,d) -> d
127 let compatible t1 t2 =
128 State.equal (source t1) (source t2)
129 && State.equal (dest1 t1) (dest1 t2)
130 && State.equal (dest2 t1) (dest2 t2)
133 if not (compatible t1 t2)
134 then failwith "Incompatible transitions"
139 | Label(s,ts,d1,d2), Label(_,ts',_,_) -> Label(s,TagSet.Xml.cup ts ts',d1,d2)
140 | External(s,f,d1,d2), External(_,f',_,_) -> External(s,(fun x -> (f x)||(f' x)),d1,d2)
141 | Label(s,ts,d1,d2), External(_,f,_,_)
142 | External(_,f,_,_), Label(s,ts,d1,d2) -> External(s,(fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) ts)||f x),d1,d2)
147 | Label(s,ts,d1,d2), Label(_,ts',_,_) -> Label(s,TagSet.Xml.cap ts ts',d1,d2)
148 | External(s,f,d1,d2), External(_,f',_,_) -> External(s,(fun x -> (f x)&&(f' x)),d1,d2)
149 | Label(s,ts,d1,d2), External(_,f,_,_)
150 | External(_,f,_,_), Label(s,ts,d1,d2) -> External(s,(fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) ts)&& f x),d1,d2)
153 | Label(s,ts,d1,d2) -> Label(s,TagSet.Xml.neg ts,d1,d2)
154 | External(s,f,d1,d2) -> External(s,(fun x -> not(f x)), d1 ,d2)
157 let can_take t = function
158 | Label(_,ts,_,_) -> TagSet.Xml.mem (Tree.Binary.tag t) ts
159 | External(_,f,_,_) -> f t
161 (* Hashtbl indexed by source State *)
162 module HT = Hashtbl.Make(State)
166 type hashtbl = { label : (TagSet.Xml.t*State.t*State.t) HT.t;
167 extern : ((Tree.Binary.t-> bool)*State.t*State.t) HT.t;
171 let empty () = { label = HT.create 17;
172 extern = HT.create 17;
175 let clear h = HT.clear h.label; HT.clear h.extern
178 | Label(s,t,d1,d2) -> HT.add h.label s (t,d1,d2)
179 | External(s,f,d1,d2) -> HT.add h.extern s (f,d1,d2)
181 let find_all ?(accu=[]) ?(pred_label=fun _ _ _ _ -> true) ?(pred_extern= fun _ _ _ _ -> true) h q =
183 (fun acc (t,d1,d2) ->
184 if pred_label q t d1 d2
185 then Label(q,t,d1,d2) :: acc
188 (fun acc (f,d1,d2) ->
189 if pred_extern q f d1 d2
190 then External(q,f,d1,d2) :: acc
193 (HT.find_all h.extern q))
194 (HT.find_all h.label q)
196 let find_all_dest q h =
197 HT.fold (fun source (t,q1,q2) acc ->
198 if (State.equal q1 q || State.equal q2 q)
199 then Label(source,t,q1,q2)::acc
201 (HT.fold (fun source (t,q1,q2) acc ->
202 if (State.equal q1 q || State.equal q2 q)
203 then External(source,t,q1,q2)::acc
204 else acc) h.extern [])
207 let fold_state f_lab f_ext h q acc =
209 (fun acc (t,d1,d2) ->
212 (fun acc (f,d1,d2) ->
215 (HT.find_all h.extern q))
216 (HT.find_all h.label q)
220 module BST = Set.Make(Tree.Binary)
222 type t = { initial : SSet.t;
224 transitions : Transition.hashtbl;
227 mutable result : BST.t;
230 mutable max_states : int;
231 contains : (string,Tree.Binary.DocIdSet.t) Hashtbl.t;
234 let mk () = { initial = SSet.empty;
236 transitions = Transition.empty();
237 marking = SSet.empty;
242 contains = Hashtbl.create 37;
246 let print_tags fmt l =
248 if TagSet.Xml.is_finite l then l
249 else (Format.fprintf fmt "* \\ "; TagSet.Xml.neg l )
251 Format.fprintf fmt "{ ";
252 ignore(TagSet.Xml.fold (fun t first ->
254 then Format.fprintf fmt " ,";
255 Tag.print fmt t; false) l true);
256 Format.fprintf fmt "}"
259 Format.fprintf fmt "----------------- Automaton dump -------------------\n";
260 Format.fprintf fmt "Initial states: ";
261 SSet.iter (fun s -> State.print fmt s;
262 Format.fprintf fmt " ") auto.initial;
263 Format.fprintf fmt "\n";
264 Format.fprintf fmt "Final states: ";
265 SSet.iter (fun s -> State.print fmt s;
266 Format.fprintf fmt " ") auto.final;
267 Format.fprintf fmt "\n";
268 Format.fprintf fmt "Marking states: ";
269 SSet.iter (fun s -> State.print fmt s;
270 Format.fprintf fmt " ") auto.marking;
271 Format.fprintf fmt "\n";
272 Format.fprintf fmt "Ignore states: ";
273 SSet.iter (fun s -> State.print fmt s;
274 Format.fprintf fmt " ") auto.ignore;
275 Format.fprintf fmt "\n";
276 Format.fprintf fmt "Transitions:\n";
277 Transition.HT.iter (fun source (l,dest1,dest2) ->
278 State.print fmt source;
279 Format.fprintf fmt "-> ";
281 Format.fprintf fmt "(";
282 State.print fmt dest1;
283 Format.fprintf fmt " ,";
284 State.print fmt dest2;
285 Format.fprintf fmt ")\n") auto.transitions.Transition.label;
286 Format.fprintf fmt "----------------------------------------------------\n"
295 let pr_states fmt st = SSet.iter (fun s -> State.print fmt s;
296 Format.fprintf fmt " ") st
298 let err = Format.err_formatter
299 let filter_map_rev filt map l =
300 let rec loop ((accuf,accum) as accu) = function
302 | t::r -> loop (if filt t then (t::accuf,SSet.add (map t) accum)
305 loop ([],SSet.empty) l
307 let mem s x = SSet.mem x s
310 let rec accepting_among ?(nobrother=false) ?(strings=None) auto t r =
311 if SSet.is_empty r then r else
313 | Some valid_strings when Tree.Binary.DocIdSet.for_all (fun i -> not(Tree.Binary.string_below t i)
318 let to_ignore = SSet.inter auto.ignore r in
319 let r = SSet.diff r to_ignore
322 match Tree.Binary.descr t with
323 | Tree.Binary.Nil -> SSet.inter r auto.final
324 | Tree.Binary.String id -> (
326 | None -> SSet.inter r auto.final
327 | Some valid_strings when (Tree.Binary.DocIdSet.mem id valid_strings)
328 -> Printf.eprintf "Selecting string '%s'\n%!" (Tree.Binary.get_string t id);
329 SSet.inter r auto.final
332 | Tree.Binary.Node(_) ->
333 let t1 = Tree.Binary.left t
334 and t2 = Tree.Binary.right t
339 Transition.fold_state
340 (fun acc q t d1 d2 -> Transition.Label(q,t,d1,d2) :: acc)
341 (fun acc q t d1 d2 -> Transition.External(q,t,d1,d2) :: acc)
342 auto.transitions q accu) r []
346 (Transition.can_take t)
347 Transition.dest1 transitions
349 let s1 = accepting_among auto t1 r1
353 (fun x->SSet.mem (Transition.dest1 x) s1)
354 Transition.dest2 transitions
356 let s2 = if nobrother then SSet.union (SSet.inter r auto.final) to_ignore
357 else accepting_among auto t2 r2
359 let _,s = filter_map_rev
360 (fun x -> SSet.mem (Transition.dest2 x) s2)
361 (Transition.source) transitions
363 if SSet.is_empty s then s
365 (if SSet.exists (mem auto.marking) s1 || SSet.exists (mem auto.marking) s2
366 then auto.result <- BST.add t auto.result;s)
367 in SSet.union to_ignore res)
370 let accept ?(strings=None) auto t =
371 auto.result <- BST.empty;
372 if SSet.is_empty (accepting_among ~nobrother:true ~strings:strings auto t auto.initial)
377 module TopDown = struct
378 let rec accept_at auto t q =
379 if SSet.mem q auto.ignore then true
381 match Tree.Binary.descr t with
382 | Tree.Binary.Nil | Tree.Binary.String _ -> SSet.mem q auto.final
383 | Tree.Binary.Node(_) ->
384 let tag = Tree.Binary.tag t
385 and t1 = Tree.Binary.left t
386 and t2 = Tree.Binary.right t
390 ~pred_label:(fun _ ts _ _ -> TagSet.Xml.mem tag ts)
391 ~pred_extern:(fun _ f _ _ -> f t)
394 let rec iter_trans res = function
396 | (Transition.Label(_,_,q1,q2) | Transition.External (_,_,q1,q2))::r ->
397 let _ = auto.numbt <- succ auto.numbt in
398 if (accept_at auto t1 q1) && (accept_at auto t2 q2)
401 if (SSet.mem q1 auto.marking)||(SSet.mem q2 auto.marking)
404 auto.result <- BST.add t auto.result;
410 in iter_trans false transitions
417 SSet.exists (fun q ->
418 P(auto.numbt <- succ auto.numbt);
419 auto.result <- BST.empty;
420 accept_at auto t q) auto.initial
423 let rec run_in auto t states =
424 if SSet.is_empty states then ()
426 match Tree.Binary.descr t with
427 | Tree.Binary.Nil | Tree.Binary.String _ -> ()
428 | Tree.Binary.Node(_) ->
429 let tag = Tree.Binary.tag t
430 and t1 = Tree.Binary.left t
431 and t2 = Tree.Binary.right t
433 P(let i = SSet.cardinal states in
434 if i > auto.max_states then auto.max_states <- i);
438 if SSet.mem q auto.ignore then acc
440 Transition.fold_state
441 (fun (ss1,ss2) _ ts d1 d2 ->
442 if TagSet.Xml.mem tag ts
447 (fun (ss1,ss2) _ f d1 d2 ->
452 else (ss1,ss2)) auto.transitions q acc ) states (SSet.empty,SSet.empty)
454 if SSet.is_empty (SSet.inter auto.marking (SSet.union s1 s2))
456 else auto.result <- BST.add t auto.result;
462 auto.result <- BST.empty;
465 run_in auto t auto.initial