From 83c90cb5eeebfffa05d0383430eb80e7905b46a0 Mon Sep 17 00:00:00 2001 From: Lucca Hirschi Date: Thu, 5 Jul 2012 15:27:49 +0200 Subject: [PATCH] Full implem of BU_over_Max and TP_max (to be tested) + my.xml from thesis + stuffs in formula/asta + selected_nodes in asta. --- run_tests | 16 ++-- src/asta.ml | 4 + src/asta.mli | 6 ++ src/formula.ml | 16 +++- src/formula.mli | 5 +- src/run.ml | 167 ++++++++++++++++++++++++++--------- src/run.mli | 4 + src/stateSet.ml | 10 +-- src/test.ml | 17 ++-- tests/docs/my.xml | 44 ++++++--- tests/queries/my.queries | 2 +- tests/queries/my.queries.old | 1 + 12 files changed, 221 insertions(+), 71 deletions(-) create mode 100644 tests/queries/my.queries.old diff --git a/run_tests b/run_tests index 5d5752d..13f44fe 100755 --- a/run_tests +++ b/run_tests @@ -1,8 +1,8 @@ -./test.native ./tests/docs/my.xml '/child::site/child::regions' -echo "________________________________________________________________________________" -./test.native ./tests/docs/my.xml -f ./tests/queries/Treebank.xml.queries -echo "________________________________________________________________________________" -./test.native ./tests/docs/my.xml '/descendant::listitem[not(descendant::keyword/child::emph)]/descendant::parlist' -echo "________________________________________________________________________________" -./test.native ./tests/docs/my.xml -f ./tests/queries/my.queries -./test.native ./tests/docs/my.xml -f ./tests/queries/my.queries 2> tests/results/my.result \ No newline at end of file +#./test.native ./tests/docs/my.xml '/child::site/child::regions' +#echo "________________________________________________________________________________" +#./test.native ./tests/docs/my.xml -f ./tests/queries/Treebank.xml.queries +#echo "________________________________________________________________________________" +#./test.native ./tests/docs/my.xml '/descendant::listitem[not(descendant::keyword/child::emph)]/descendant::parlist' +#echo "________________________________________________________________________________" +./test.native ./tests/docs/my.xml -f ./tests/queries/my.queries.old +#./test.native ./tests/docs/my.xml -f ./tests/queries/my.queries 2> tests/results/my.result \ No newline at end of file diff --git a/src/asta.ml b/src/asta.ml index bbbba49..3189b27 100644 --- a/src/asta.ml +++ b/src/asta.ml @@ -121,6 +121,10 @@ let init_top ast = ast.top <- (StateSet.empty) let top_states ast = StateSet.elements ast.top +let bot_states ast = ast.bottom + +let selec_states ast = ast.selec + let print fmt asta = let print_box fmt flag = let pp = Format.fprintf fmt in diff --git a/src/asta.mli b/src/asta.mli index 190f979..e3d3c0d 100644 --- a/src/asta.mli +++ b/src/asta.mli @@ -75,6 +75,12 @@ val init_top : t -> unit val top_states : t -> state list (** Give the list of top states of an ASTA *) +val bot_states : t -> StateSet.t +(** Give the list of bottom states of an ASTA *) + +val selec_states : t -> StateSet.t +(** Give the list of selecting states of an ASTA *) + val print : Format.formatter -> t -> unit (** Pretty printer *) diff --git a/src/formula.ml b/src/formula.ml index 03618c2..c75748f 100644 --- a/src/formula.ml +++ b/src/formula.ml @@ -80,7 +80,21 @@ let rec eval_form ss f = match expr f with | Or(f1,f2) -> eval_form ss f1 || eval_form ss f2 | Atom(dir, b, s) -> let set = match dir with |`Left -> fst ss | `Right -> snd ss in - StateSet.mem s set + if b then StateSet.mem s set + else not (StateSet.mem s set) + +let rec infer_form ssq ssr f = match expr f with + | False -> false + | True -> true + | And(f1,f2) -> infer_form ssq ssr f1 && infer_form ssq ssr f2 + | Or(f1,f2) -> infer_form ssq ssr f1 || infer_form ssq ssr f2 + | Atom(dir, b, s) -> + let setq, setr = match dir with + |`Left -> fst ssq, fst ssr + | `Right -> snd ssq, fst ssr in + (* WG: WE SUPPOSE THAT Q^r and Q^q are disjoint ! *) + let mem = StateSet.mem s setq || StateSet.mem s setr in + if b then mem else not mem (* End *) let rec print ?(parent=false) ppf f = diff --git a/src/formula.mli b/src/formula.mli index e108758..6c54386 100644 --- a/src/formula.mli +++ b/src/formula.mli @@ -53,7 +53,10 @@ val size : t -> int (** Syntactic size of the formula *) val eval_form : (StateSet.t * StateSet.t) -> t -> bool -(** [eval_form s_1,s_2 F] evaluates the formula [F] on [(s_1,s_2)] *) +(** [eval_form sf,sn F] evaluates the formula [F] on [(sf,sn)] *) + +val infer_form : (StateSet.t * StateSet.t) -> (StateSet.t * StateSet.t) -> t -> bool +(** [eval_form S1 S2 F] infers S1; S2 |- F *) val print : Format.formatter -> t -> unit (** Pretty printer *) diff --git a/src/run.ml b/src/run.ml index d88352b..8a7e066 100644 --- a/src/run.ml +++ b/src/run.ml @@ -17,86 +17,173 @@ module Node = struct type t = int let hash n = n - let compare n1 n2 = n1 - n2 - let equal n1 n2 = n1 = n2 + let compare = (-) + let equal = (=) end module NodeHash = Hashtbl.Make (Node) type t = (StateSet.t*StateSet.t) NodeHash.t -(** Map from node to query and recognizing states *) +(** Map from nodes to query and recognizing states *) (* Note that we do not consider the nil nodes *) exception Oracle_fail exception Over_max_fail exception Max_fail +(* Mapped sets for leaves *) +let map_leaf asta = (Asta.bot_states asta, StateSet.empty) +let empty = (StateSet.empty,StateSet.empty) + (* Build the Oracle *) let rec bu_oracle asta run tree tnode = - let init_set node = - let set = (StateSet.empty,StateSet.empty) in - NodeHash.add run node set - and node = Tree.preorder tree tnode in - if (Tree.is_leaf tree tnode) + let node = Tree.preorder tree tnode in + if Tree.is_leaf tree tnode then - if not (tnode == Tree.nil) - then - init_set node - else () + if tnode == Tree.nil + then () + else NodeHash.add run node (map_leaf asta) else - let tfnode = Tree.first_child tree tnode - and tnnode = Tree.next_sibling tree tnode in + let tfnode = Tree.first_child tree tnode (* first child *) + and tnnode = Tree.next_sibling tree tnode in (* next-sibling *) + let fnode,nnode = + (Tree.preorder tree tfnode, Tree.preorder tree tnnode) in begin bu_oracle asta run tree tfnode; bu_oracle asta run tree tnnode; - let (fnode,nnode) = - (Tree.preorder tree tfnode, Tree.preorder tree tnnode) in let q_rec n = try NodeHash.find run n - with Not_found -> (StateSet.empty,StateSet.empty) in - let (_,qf),(_,qn) = q_rec fnode,q_rec nnode in - let lab = Tree.tag tree tnode in + with Not_found -> map_leaf asta in + let (_,qfr),(_,qnr) = q_rec fnode,q_rec nnode (* computed in rec call *) + and lab = Tree.tag tree tnode in let _,list_tr = Asta.transitions_lab asta lab in (* only take reco. *) - let result_set = ref StateSet.empty in - let rec result = function - | [] -> () + let rec result set = function + | [] -> set | (q,form) :: tl -> - if Formula.eval_form (qf,qn) form - then begin - result_set := (StateSet.add q (!result_set)); - result tl; end - else result tl in - result list_tr; - NodeHash.add run node (StateSet.empty, !result_set) - (* Do not remove elt in Hahs (the old one would appear) *) + if Formula.eval_form (qfr,qnr) form + then result (StateSet.add q set) tl + else result set tl in + let result_set = result StateSet.empty list_tr in + NodeHash.add run node (StateSet.empty, result_set) end (* Build the over-approx. of the maximal run *) -let rec bu_over_max asta run tree node = - () +let rec bu_over_max asta run tree tnode = + if (Tree.is_leaf tree tnode) (* BU_oracle has already created the map *) + then + () + else + let tfnode = Tree.first_child tree tnode + and tnnode = Tree.next_sibling tree tnode in + begin + bu_over_max asta run tree tfnode; + bu_over_max asta run tree tnnode; + let (fnode,nnode) = + (Tree.preorder tree tfnode, Tree.preorder tree tnnode) + and node = Tree.preorder tree tnode in + let q_rec n = + try NodeHash.find run n + with Not_found -> map_leaf asta in + let (qfq,qfr),(qnq,qnr) = q_rec fnode,q_rec nnode in + let lab = Tree.tag tree tnode in + let list_tr,_ = Asta.transitions_lab asta lab in (* only take query st. *) + let rec result set = function + | [] -> set + | (q,form) :: tl -> + if Formula.infer_form (qfq,qnq) (qfr,qnr) form + then result (StateSet.add q set) tl + else result set tl in + let _,resultr = try NodeHash.find run node + with _ -> raise Over_max_fail in + let result_set = result StateSet.empty list_tr in + NodeHash.replace run node (result_set, resultr) + (* Never remove elt in Hash (the old one would appear) *) + end + (* Build the maximal run *) -let rec tp_max asta run tree node = - () +let rec tp_max asta run tree tnode = + if (Tree.is_leaf tree tnode) (* BU_oracle has already created the map *) + then + () + else + let node = Tree.preorder tree tnode + and tfnode = Tree.first_child tree tnode + and tnnode = Tree.next_sibling tree tnode in + let (fnode,nnode) = + (Tree.preorder tree tfnode, Tree.preorder tree tnnode) in + let q_rec n = + try NodeHash.find run n + with Not_found -> (Asta.bot_states asta,StateSet.empty) in + let (qf),(qn) = q_rec fnode,q_rec nnode in + let lab = Tree.tag tree tnode in + let list_tr,_ = Asta.transitions_lab asta lab in (* only take query. *) + let set_node,_ = try NodeHash.find run node + with _ -> raise Max_fail in + let rec result = function + | [] -> [] + | (q,form) :: tl -> + if (Formula.infer_form qf qn form) && (StateSet.mem q set_node) + then form :: (result tl) + else result tl in + let list_form = result list_tr in + let rec add_st (ql,qr) = function + | [] -> ql,qr + | f :: tl -> let sql,sqr = Formula.st f in + let ql' = StateSet.union sql ql + and qr' = StateSet.union sqr qr in + add_st (ql',qr') tl in + let ql,qr = add_st (StateSet.empty, StateSet.empty) list_form in + let qfq,qfr = try NodeHash.find run fnode + with | _ -> map_leaf asta + and qnq,qnr = try NodeHash.find run nnode + with | _ -> map_leaf asta in + begin + if Tree.is_leaf tree tfnode + then () + else NodeHash.replace run fnode (StateSet.inter qfq ql,qfr); + if Tree.is_leaf tree tnnode + then () + else NodeHash.replace run nnode (StateSet.inter qnq qr,qnr); + tp_max asta run tree tfnode; + tp_max asta run tree tnnode; + end let compute tree asta = + let flag = 2 in (* debug *) let size_tree = 10000 in (* todo *) let map = NodeHash.create size_tree in bu_oracle asta map tree (Tree.root tree); - bu_over_max asta map tree (Tree.root tree); - tp_max asta map tree (Tree.root tree); + if flag > 0 then begin + bu_over_max asta map tree (Tree.root tree); + if flag = 2 + then + tp_max asta map tree (Tree.root tree) + else () + end + else (); map +let selected_nodes tree asta = + let run = compute tree asta in + NodeHash.fold + (fun key set acc -> + if not(StateSet.is_empty + (StateSet.inter (fst set) (Asta.selec_states asta))) + then key :: acc + else acc) + run [] + let print fmt run = - let print_d_set fmt (s_1,s_2) = - Format.fprintf fmt "@[(%a,@ %a)@]" + let print_d_set fmt (s_1,s_2) = + Format.fprintf fmt "(%a,%a)" StateSet.print s_1 StateSet.print s_2 in - let print_map fmt run = + let print_map fmt run = let pp = Format.fprintf fmt in if NodeHash.length run = 0 then Format.fprintf fmt "ø" else - NodeHash.iter (fun cle set -> pp "| %i-->%a @ " cle print_d_set set) + NodeHash.iter (fun cle set -> pp "| %i->%a @ " cle print_d_set set) run in let print_box fmt run = let pp = Format.fprintf fmt in diff --git a/src/run.mli b/src/run.mli index c7c34c9..3413cd9 100644 --- a/src/run.mli +++ b/src/run.mli @@ -21,5 +21,9 @@ type t val compute : Tree.t -> Asta.t -> t (** Gives the maximal run of an ASTA on a tree *) +val selected_nodes : Tree.t -> Asta.t -> int list +(** Gives a list of the positions (preorder) of selected nodes of a tree by + an ASTA *) + val print : Format.formatter -> t -> unit (** Pretty printer *) diff --git a/src/stateSet.ml b/src/stateSet.ml index b97be4e..2a9e283 100644 --- a/src/stateSet.ml +++ b/src/stateSet.ml @@ -18,10 +18,10 @@ open Format include Ptset.Make (Hcons.PosInt) let print ppf s = - let p_set ppf s = - if is_empty s - then fprintf ppf "ø" - else + if is_empty s + then fprintf ppf "ø" + else + let p_set ppf s = (Pretty.print_list ~sep:"," (State.print)) ppf (elements s) in - fprintf ppf "{ %a }" p_set s + fprintf ppf "{%a}" p_set s diff --git a/src/test.ml b/src/test.ml index e9000c3..42dc011 100644 --- a/src/test.ml +++ b/src/test.ml @@ -14,9 +14,9 @@ (***********************************************************************) -(** use: xml_file "XPath querie" - or : xml_file -f XPath_querie_file - only the first line of XPath_querie_file is read +(** use: [./test xml_file "XPath querie"] + or : [./test xml_file -f XPath_querie_file] + only the first line of [XPath_querie_file] is read *) open Format @@ -56,6 +56,8 @@ let () = let query = query () in let asta = build_asta query in let run = compute_run doc asta in + let selected_nodes = Run.selected_nodes doc asta in + Format.pp_set_margin err_formatter 80; fprintf err_formatter "@[##### Query #####@. %a@]\n" XPath.Ast.print query; output_string stderr "\n##### Doc #####\n"; @@ -63,7 +65,12 @@ let () = output_string stderr "\n"; Asta.print err_formatter asta; Run.print err_formatter run; - output_string stderr "\n # Doc: \n"; + output_string stderr "\n # Doc with positions: \n"; Tree.print_xml_preorder stderr doc (Tree.root doc); - output_string stderr "\n"; + let rec print_selec fmt l = match l with + | [x] -> fprintf fmt "%s" (string_of_int x) + | x :: tl -> fprintf fmt "%s" ((string_of_int x)^"; ");print_selec fmt tl + | [] -> fprintf fmt "%s" "ø" in + fprintf err_formatter "@.@. # Selected nodes: {%a}@." + print_selec selected_nodes; exit 0 diff --git a/tests/docs/my.xml b/tests/docs/my.xml index 5d8fa9d..0c21bc6 100644 --- a/tests/docs/my.xml +++ b/tests/docs/my.xml @@ -1,10 +1,34 @@ - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/queries/my.queries b/tests/queries/my.queries index a578841..04ad466 100644 --- a/tests/queries/my.queries +++ b/tests/queries/my.queries @@ -1 +1 @@ -/descendant::a[descendant::c[child::e and not(descendant::f[not(descendant::e)]/descendant::g)]]/descendant::b[child::g] \ No newline at end of file +/descendant::a[descendant::c[descendant::e and not(descendant::f[not(descendant::e)]/descendant::g)]]/descendant::b[descendant::g] \ No newline at end of file diff --git a/tests/queries/my.queries.old b/tests/queries/my.queries.old new file mode 100644 index 0000000..a578841 --- /dev/null +++ b/tests/queries/my.queries.old @@ -0,0 +1 @@ +/descendant::a[descendant::c[child::e and not(descendant::f[not(descendant::e)]/descendant::g)]]/descendant::b[child::g] \ No newline at end of file -- 2.17.1