X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2Fcompile.ml;fp=src%2Fcompile.ml;h=421279831b15257c27500784d564c475608c5b64;hb=4b52da1a20a4fe031930bb96d2ca46bec06dc529;hp=0000000000000000000000000000000000000000;hpb=a223af3254fb51c279cfbccdc18c59484fdca74e;p=SXSI%2Fxpathcomp.git diff --git a/src/compile.ml b/src/compile.ml new file mode 100644 index 0000000..4212798 --- /dev/null +++ b/src/compile.ml @@ -0,0 +1,240 @@ + +open Ata +open XPath.Ast + +type text_query = [ `Prefix | `Suffix | `Equals | `Contains ] + +type tri_state = [ `Unknown | `Yes | `No ] + +let pr_tri_state fmt v = + Format.fprintf fmt "%s" + (match v with + | `Unknown -> "`Unknown" + | `Yes -> "`Yes" + | `No -> "`No") +;; + + +type info = { + trans : Transition.t list; + states : StateSet.t; + marking_states : StateSet.t; + bottom_states : StateSet.t; + last : State.t; + bottom_up : tri_state; + text_pred : (text_query * string) list +} + +let empty_info = + { trans = []; + states = StateSet.empty; + marking_states = StateSet.empty; + bottom_states = StateSet.empty; + last = State.dummy; + bottom_up = `Unknown; + text_pred = [] + } + +open Formula.Infix + +let mk_phi top phi loop = if top then phi *& loop else phi + +let log msg v1 v2 = + let () = Format.eprintf "%a -> %a in %s\n%!" + pr_tri_state v1 + pr_tri_state v2 + msg + in v2 + +let log _ _ v = v + +let rec compile_step toplevel ((axis, test, _) as _step) state cont conf last = + let test, cont = match test with + | Simple t -> t, cont + | Complex (t, p) -> t, Formula.and_ (Formula.pred_ p) cont + in + let marking = toplevel && last in + let trans, new_cont = + match axis with + | Child -> + let loop = `Right *+ state in + let phi = mk_phi toplevel cont loop in + ( [ (Transition.make (state, test, marking, phi)); + (Transition.make (state, TagSet.any, false, loop))], + (`Left *+ state)) + + | FollowingSibling -> + let loop = `Right *+ state in + let phi = mk_phi toplevel cont loop in + ( [ (Transition.make (state, test, marking, phi)); + (Transition.make (state, TagSet.any, false, loop))], + (`Right *+ state)) + + | Descendant -> + let loopfun = if toplevel then Formula.and_ else Formula.or_ in + let loop = loopfun (`Left *+ state) (`Right *+ state) in + let phi = mk_phi toplevel cont loop in + ( [ (Transition.make (state, test, marking, phi)); + (Transition.make (state, TagSet.any, false, loop)); + (*(Transition.make (state, TagSet.any, false, `Right *+ state)) *) + ], + (`Left *+ state)) + + | _ -> assert false + in + { conf with + trans = trans@conf.trans; + states = StateSet.add state conf.states; + marking_states = + if toplevel + then StateSet.add state conf.marking_states + else conf.marking_states + }, new_cont + +and compile_step_list toplevel sl conf = + match sl with + [] -> + let state = State.make () in + let phi = `Left *+ state in + let loop = (`Left *+ state) *& (`Right *+ state) in + ( true, + phi, + { conf with + states = StateSet.add state conf.states; + bottom_states = StateSet.add state conf.bottom_states; + trans = (Transition.make (state, TagSet.any, false, loop)) :: conf.trans; + } ) + | (_, _, pred) as step :: sll -> + let state = State.make () in + let pred, conf = compile_predicate pred conf in + let last, cont, conf = compile_step_list toplevel sll conf in + let conf, new_cont = compile_step toplevel step state (pred *& cont) conf last in + let conf = if toplevel && last then {conf with last = state} else conf in + false, new_cont, conf + +and compile_predicate predicate conf = + match predicate with + | Or(p1, p2) -> + + let cont1, conf1 = compile_predicate p1 conf in + let cont2, conf2 = compile_predicate p2 conf1 in + cont1 +| cont2, { conf2 with bottom_up = `No + } + | And(p1, p2) -> + let cont1, conf1 = compile_predicate p1 conf in + let cont2, conf2 = compile_predicate p2 conf1 in + cont1 *& cont2, { conf2 with bottom_up = `No + } + | Not p -> + let cont, conf = compile_predicate p conf in + Formula.not_ cont, { conf with bottom_up = `No + } + | Expr e -> + compile_expr e conf +and append_path p s = + match p with + | Relative sl -> Relative (sl @ [s]) + | Absolute sl -> Absolute (sl @ [s]) + | AbsoluteDoS sl -> AbsoluteDoS (sl @ [s]) + +and compile_expr expr conf = + match expr with + | True -> Formula.true_, conf + | False -> Formula.false_, conf + | Path p -> + let phi, conf = compile_path false p conf in + phi, { conf with + bottom_up = let v = + match conf.bottom_up with + | `Yes -> `Yes + | _ -> `No + in v + } + | Function(fn, + [ Path(Relative + [(Self, Simple (n), Expr True)]) ; String s ]) when n == TagSet.node -> + + let f = + match fn with + | "contains" -> `Contains + | "equals" -> `Equals + | "starts-with" -> `Prefix + | "ends-with" -> `Suffix + | _ -> failwith ("Unknown function : " ^ fn) + in + let pred = Tree.mk_pred f s in + let phi, conf' = + compile_expr (Path (Relative [(Child, Complex(TagSet.pcdata, pred), Expr True)])) conf + in + phi, + { conf' with + text_pred = (f,s) :: conf'.text_pred; + bottom_up = + let v = + match conf.bottom_up with + | `Unknown -> `Yes + | _ -> `No + in v + } + | _ -> assert false + +and compile_path toplevel p conf = + let sl = + match p with + | Relative sl -> sl + | Absolute sl -> (Child, Simple (TagSet.singleton Tag.document_node), Expr True)::sl + | AbsoluteDoS sl -> + (Descendant, (Simple TagSet.node), Expr True)::sl + in + let _, cont, conf = compile_step_list toplevel sl conf in + cont, conf + +let is_topdown_loop q s = + StateSet.cardinal (StateSet.remove q s) <= 1 +let rec remove_topdown_marking trans l last = + match l with + | [] -> last :: l + | q :: ll -> + let tr_list = Hashtbl.find trans q in + if List.for_all + (fun (_, t) -> + let _, _, m, f = Transition.node t in + let (_, _, stl), (_, _, str) = Formula.st f in + not m && is_topdown_loop q stl && is_topdown_loop q str) tr_list + then remove_topdown_marking trans ll q + else last :: l + + +let compile path = + let cont, conf = compile_path true path empty_info in + let (_, _, init), _ = Formula.st cont in + let get t s = + try + Hashtbl.find t s + with + | Not_found -> [] + in + let table = Hashtbl.create 13 in + let () = + List.iter (fun tr -> + let q, ts, _, _ = Transition.node tr in + let l = get table q in + Hashtbl.replace table q ((ts, tr)::l)) conf.trans + in + let auto = { + id = Oo.id (object end); + Ata.states = conf.states; + init = init; + Ata.last = conf.last; + trans = table; + Ata.marking_states = conf.marking_states; + Ata.topdown_marking_states = conf.marking_states; + (* StateSet.from_list ( + remove_topdown_marking table + (StateSet.elements conf.marking_states) + (StateSet.min_elt init) + ); *) + Ata.bottom_states = StateSet.union conf.bottom_states conf.marking_states; + Ata.true_states = conf.bottom_states; + } + in auto, (if conf.bottom_up = `Yes then Some conf.text_pred else None)