From: Huibo SHI Date: Tue, 11 Mar 2014 15:19:22 +0000 (+0100) Subject: Ajout de l'implementation preliminaire. X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=commitdiff_plain;h=f3a0235e4715d24d8e8b4053923d433e1d876851 Ajout de l'implementation preliminaire. --- diff --git a/src/query_tree.ml b/src/query_tree.ml new file mode 100644 index 0000000..a1b47a6 --- /dev/null +++ b/src/query_tree.ml @@ -0,0 +1,148 @@ +open Table + + + +let all_nodes tree = let root = Naive_tree.root tree in + eval_axis tree [root] (Descendant true) + +let element_by_tag tree tagset = let dom = all_nodes tree in + List.filter (fun c -> QNameSet.mem (Naive_tree.tag tree c) tagset ) dom + +let rec compile_single_path p = + let open Xpath.Ast in + match p with + | Absolute p | Relative p -> compile_step_list (List.rev p) + +and compile_step_list p = + match p with + | [] -> Start + | (a,(test,_),el) :: r -> + let qtree = compile_step_list r in + let res = Binop ( Inter,Axis (a,qtree), Tag (test) ) in + List.fold_left (fun acc e -> + Binop(Inter, acc, compile_expr e)) res el (*avant j'ai utilise une function compile_expr_list ,c'est pas genial*) + + and compile_expr (e : Xpath.Ast.expr ) = match e with + | Fun_call (f, [ e0 ]) when (QName.to_string f) = "not" -> + let qtree = compile_expr e0 in + Binop (Diff , Dom, qtree) + + | Binop (e1,op,e2) -> let qtree1 = compile_expr e1 in + let qtree2 = compile_expr e2 in + begin + match op with + | Or -> Binop (Union , qtree1,qtree2) + | And -> Binop (Inter ,qtree1,qtree2) + | _ -> failwith "Unknown operator" + end + | Path p -> compile_path_rev p + | _ -> failwith "Unknown expression" + + and compile_path_rev p = + match p with + | [] -> assert false + | [p] -> compile_single_path_rev p + | p::r -> List.fold_left (fun acc p -> Binop (Union , acc, compile_single_path_rev p) ) (compile_single_path_rev p) r + + and compile_single_path_rev p = + match p with + | Absolute p | Relative p -> compile_step_list_rev (List.rev p) + + and compile_step_list_rev p = match p with + | [] -> Dom (*assert false*) (*on fait rien , mais comment signifer ???*) + | (a,(test,_),el) :: r -> + let qtree = compile_step_list_rev r in + let res = Binop (Inter , qtree, Tag(test)) in + let qtree2 = List.fold_left (fun acc e -> + Binop(Inter, acc, compile_expr e)) res el in + let a_rev = axis_rev a in + Axis (a_rev , qtree2) + + + and axis_rev a = + let open Xpath.Ast in + match a with + Self -> Self + | Attribute -> assert false + | Child -> Parent + | Descendant b -> + if not b then (Ancestor false) + else (Ancestor true) (* true = descendant-or-self, false = descendant *) + | FollowingSibling -> PrecedingSibling + | Parent -> Child + | Ancestor b -> + if not b then (Descendant false) + else (Descendant true) (* true = ancestor-or-self, false = ancestor *) + | PrecedingSibling -> FollowingSibling + | Preceding -> Following + | Following -> Preceding + + +let compile_xpath p = match p with + | [] -> assert false + | [p] -> compile_single_path p + | p::r -> List.fold_left (fun acc p -> Binop (Union , acc, compile_single_path p) ) (compile_single_path p) r + +let comp_node t n1 n2 = (Naive_tree.preorder t n1) < (Naive_tree.preorder t n2) + + +let rec union_list t l1 l2 = + match l1,l2 with + | [],l2 -> l2 + | l1, [] -> l1 + | h1::ll1, h2::ll2 -> if (comp_node t h2 h1) then h2 :: (union_list t l1 ll2) + else if (comp_node t h1 h2) then h1::(union_list t ll1 l2) + else h1 ::(union_list t ll1 ll2) + +let rec inter_list t l1 l2 = + match l1,l2 with + | [],l2 -> [] + | l1, [] -> [] + | h1::ll1, h2::ll2 -> if (comp_node t h1 h2) then inter_list t ll1 l2 + else if (comp_node t h2 h1) then inter_list t l1 ll2 + else h1 :: (inter_list t ll1 ll2) + +let rec diff_list t l1 l2 = + match l1,l2 with + | [],l2 -> [] + | l1, [] -> l1 + | h1::ll1, h2::ll2 -> if (comp_node t h1 h2) then h1::(diff_list t ll1 l2) + else if (comp_node t h2 h1) then h2 :: (diff_list t l1 ll2) + else diff_list t ll1 ll2 + + +let do_debug = ref true + +let debug tree q l = + if !do_debug then begin + Format.fprintf Format.std_formatter "Evaluation de: "; + print_query_tree Format.std_formatter q; + Format.fprintf Format.std_formatter "\nResultat: %i" + (List.length l); + Format.pp_print_flush Format.std_formatter (); + print_node_list tree l; + Format.fprintf Format.std_formatter "\n----------------\n"; + Format.pp_print_flush Format.std_formatter (); + end + + +let rec eval_query_tree tree start q = + let resultat = + match q with + | Start -> start + | Dom -> all_nodes tree + | Tag t -> element_by_tag tree t + | Axis (a,q1) -> let ls = eval_query_tree tree start q1 in + eval_axis tree ls a + | Binop (op,q1,q2)-> begin + let ls1 = eval_query_tree tree start q1 in + let ls2 = eval_query_tree tree start q2 in + match op with + | Union -> union_list tree ls1 ls2 + | Inter -> inter_list tree ls1 ls2 + | Diff -> diff_list tree ls1 ls2 + end + in + debug tree q resultat; + resultat + diff --git a/src/query_tree.mli b/src/query_tree.mli new file mode 100644 index 0000000..9afa1ad --- /dev/null +++ b/src/query_tree.mli @@ -0,0 +1,34 @@ +val all_nodes : Naive_tree.t -> Naive_tree.node list +(** [all_nodes t] returns all the nodes in the tree [t]. + Returns an empty list if there are no nodes in the tree. + *) + +val element_by_tag : Naive_tree.t -> QNameSet.t -> Naive_tree.node list +(** [element_by_tag t tag] returns all the nodes whose tag equal to [tag] in the tree [t]. *) + +val compile_single_path : Xpath.Ast.single_path -> Table.query_tree +(** il y a un cycle si on met la definition de query_tree dans le fichier [Query_tree.ml] + [compile_single_path spath] returns un query_tree built with [spath] +*) + +val compile_xpath : Xpath.Ast.path -> Table.query_tree +(** [compile_xpath path] returns un query_tree built with [xpath] +*) + +val union_list : Naive_tree.t ->Naive_tree.node list -> Naive_tree.node list -> Naive_tree.node list +(** union two lists without duplicating +*) + +val inter_list : Naive_tree.t ->Naive_tree.node list -> Naive_tree.node list -> Naive_tree.node list +(** make a list without duplicating by using the intersection of two lists +*) + +val diff_list : Naive_tree.t ->Naive_tree.node list -> Naive_tree.node list -> Naive_tree.node list +(** difference two lists without duplicating +*) + +val eval_query_tree : Naive_tree.t -> Naive_tree.node list ->Table.query_tree -> Naive_tree.node list +(** [eval_query_tree tree start q] returns the set of nodes that evaluate by the query_tree [q]. + [start] the set of nodes departing. + [q] query_tree +*) diff --git a/src/table.ml b/src/table.ml new file mode 100644 index 0000000..f6e7749 --- /dev/null +++ b/src/table.ml @@ -0,0 +1,161 @@ +(*creer a 28/01/2014*) + +type move = Self + | Firstchild + | Nextsibling + | Revfirstchild + | Prevsibling + +type query_tree = Binop of op * query_tree * query_tree + | Axis of Xpath.Ast.axis * query_tree + | Start + | Dom + | Tag of QNameSet.t +and op = Union | Inter | Diff + +(*28/01/2014 + parametres : tree l'arbre xml + n un noeud + m move + retour :un noeud qui correspond à la relation r +*) + +let print_node_list tree l = + List.iter (fun node -> + Naive_tree.print_xml stdout tree node + ) l; + print_newline() + +let rec print_query_tree fmt q = + match q with + Dom -> Format.fprintf fmt "Dom" + | Start -> Format.fprintf fmt "Start" + | Tag t -> Format.fprintf fmt "Tag(%a)" QNameSet.print t + | Axis (a,q) -> + Format.fprintf fmt "%a(%a)" Xpath.Ast.print_axis a print_query_tree q + | Binop (op,q1,q2) -> + Format.fprintf fmt "%a(%a, %a)" + print_binop op + print_query_tree q1 + print_query_tree q2 + +and print_binop fmt o = + match o with + | Union -> Format.fprintf fmt "Union" + | Inter -> Format.fprintf fmt "Inter" + | Diff -> Format.fprintf fmt "Diff" + +let rec eval_relation tree m n = + match m with + Self -> n + | Firstchild -> Naive_tree.first_child tree n + | Nextsibling -> Naive_tree.next_sibling tree n + | Revfirstchild -> Naive_tree.parent_of_first tree n + | Prevsibling -> Naive_tree.prev_sibling tree n + +(*28/01/2014 + parametres : tree l'arbre xml + ls l'ensemble de noeuds + m move + retour : l'ensemble de noeuds qui correspondent à la relation r +*) + + +let compare_node tree a b = + compare (Naive_tree.preorder tree a ) (Naive_tree.preorder tree b ) + +let rec eval_move tree ls m = + match m with + Self -> ls + | r -> List.filter (fun n -> n != Naive_tree.nil) + (List.map (eval_relation tree r) ls) + + +(*28/01/2014 + parametres : tree l'arbre xml + ls l'ensemble de noeuds + m move + retour : l'ensemble de noeuds qui correspondent à des relations lr +*) + +and eval_star tree ls lr = + let h = Hashtbl.create 17 in + let q = Queue.create () in + List.iter ( fun e -> Queue.add e q ) ls; + while not (Queue.is_empty q ) do + let n = Queue.pop q in + if not (Hashtbl.mem h n) then begin + Hashtbl.add h n (); + List.iter ( fun r -> let m = eval_relation tree r n in + if m != Naive_tree.nil && not (Hashtbl.mem h m ) then begin + + Queue.add m q; end + ) lr + end + done; + let l = Hashtbl.fold (fun k _ acc -> k::acc) h [] in + List.sort (compare_node tree) l + +(*28/01/2014 + parametres : tree l'arbre xml + ls l'ensemble de noeuds + a axis + retour : l'ensemble de noeuds qui correspondent à l'axe +*) + +let keep_elements t l = + List.filter (fun n -> match Naive_tree.kind t n with + | Element | Text | Document -> true | _ -> false) l + +let rec eval_axis tree ls a = + let open Xpath.Ast in + let res = + (* let ls = List.sort ( fun a b -> compare (Naive_tree.preorder tree a ) (Naive_tree.preorder tree b ) ) ls in écrir dans la log!!!!!*) + match a with + Self -> ls + + | Attribute -> assert false + + | Child -> let lfc = eval_move tree ls Firstchild in + eval_star tree lfc [Nextsibling] + + | Descendant c -> let lfc = eval_move tree ls Firstchild in + let ls2 = eval_star tree lfc [Firstchild;Nextsibling] in + + (* List.merge (compare_node tree) (if c then ls else []) + (List.merge (compare_node tree) ls2 ls)*) + + if not c then ls2 + else List.merge (compare_node tree) ls2 ls + + | FollowingSibling -> let lnexts = eval_move tree ls Nextsibling in + eval_star tree lnexts [Nextsibling] + + | Parent -> let lprevs = eval_star tree ls [Prevsibling] in + eval_move tree lprevs Revfirstchild + + | Ancestor b -> let ls2 = eval_star tree ls [Revfirstchild;Prevsibling] in + let ls3 = eval_move tree ls2 Revfirstchild in + if not b then ls3 + else List.merge (compare_node tree ) ls3 ls + + | PrecedingSibling -> let ls2 = eval_star tree ls [Prevsibling] in + eval_move tree ls2 Prevsibling + + | Preceding -> let ls2 = eval_axis tree ls (Ancestor true) in + let ls3 = eval_axis tree ls2 PrecedingSibling in + eval_axis tree ls3 (Descendant true) + + | Following -> let ls2 = eval_axis tree ls (Ancestor true) in + let ls3 = eval_axis tree ls2 FollowingSibling in + eval_axis tree ls3 (Descendant true) + in + keep_elements tree res + + + + + + + + diff --git a/src/table_driver.ml b/src/table_driver.ml new file mode 100644 index 0000000..9d5b240 --- /dev/null +++ b/src/table_driver.ml @@ -0,0 +1,133 @@ +open Table +open Table_options +open Query_tree + +let parse_xpath p = + Xpath.Parser.parse (Ulexing.from_utf8_string p) + +let main () = + let () = Table_options.parse () in + let doc = + let fd, close_fd = match !Table_options.input_file with + None | Some "-" | Some "/dev/stdin" ->stdin,ignore (*qu'est-ce que c'est ignore?????*) + | Some input -> + let fd = open_in input in fd, fun() -> close_in fd + in + (*let inc = open_in Sys.argv.(1) in*) + let doc = Naive_tree.load_xml_file fd in + close_fd(); (*ca sert a fermer fd????*) + doc + in + let queries = + List.map ( fun q -> + parse_xpath q) + !Table_options.queries + in + let query_tree_list = + List.map (fun query -> compile_xpath query) queries + in + let cpt = ref 0 in + + List.iter ( fun q -> + + let res = eval_query_tree doc [ (Naive_tree.root doc) ] q in + print_node_list doc res; + Format.printf "---------------Fin %i\n!" !cpt; + incr cpt; + ) query_tree_list ; + + (* let output = + match !Options.output_file with + | None | Some "-" | Some "/dev/stdout" -> stdout + | Some f -> open_out f + in + List.iter (fun query -> + Logger.msg `STATS "Query: %a " Xpath.Ast.print_path query) queries; + List.iter (fun query_tree -> + Logger.msg `STATS "@[Query_tree: @\n%a@]" print_query_tree Format.std_formatter query_tree) query_tree_list; + + *) + exit 0 + + +let () = main () + + + + + + + +(* let query = Xpath.Parser.parse + (Ulexing.from_utf8_string Sys.argv.(2)) + in + Format.printf "La requete est: %a\n%!" + Xpath.Ast.print_path query; *) + + (* print_endline "Test 1 root.firstchild"; + let c1 = eval_move doc [ (Naive_tree.root doc) ] Firstchild in + print_node_list doc c1; + print_endline "Test 2 root.firstchild.firstchild"; + let c2 = eval_move doc c1 Firstchild in + print_node_list doc c2; + print_endline "Test 3 root.firstchild.firstchild.nextsibling"; + let c3 = eval_move doc c2 Nextsibling in + print_node_list doc c3; + print_endline "Test 4 root.firstchild.firstchild.nextsibling.firstchild"; + let c4 = eval_move doc c3 Firstchild in + print_node_list doc c4; + print_endline "Test 5 root.firstchild.firstchild.nextsibling.firstchild.ancestor false"; + let c5 = eval_axis doc c4 (Ancestor false) in + print_node_list doc c5; + print_endline "Test 6"; + let c6 = eval_move doc c5 Prevsibling in + print_node_list doc c6; + print_endline "Test 7"; + let c7 = eval_move doc c6 Revfirstchild in + print_node_list doc c7; + print_endline "Test 8 Child [root]"; + let c8 = eval_axis doc [Naive_tree.root doc] Child in + print_node_list doc c8; + print_endline "Test 9 Descendant [b]"; + let c9 = eval_axis doc c2 (Descendant false) in + print_node_list doc c9; + print_endline "Test 10 Descendant or self [b]"; + let c10 = eval_axis doc c2 (Descendant true) in + print_node_list doc c10; + print_endline "Test 11 FollowingSibling [b]"; + let c11 = eval_axis doc c2 FollowingSibling in + print_node_list doc c11; + print_endline "Test 12 Parent [b]"; + let c12 = eval_axis doc c2 Parent in + print_node_list doc c12; + print_endline "Test 13 Ancestor or self [b]"; + let c13 = eval_axis doc c2 (Ancestor true) in + print_node_list doc c13; + print_endline "Test 14 PrecdingSibling [b]"; + let c14 = eval_axis doc c2 PrecedingSibling in + print_node_list doc c14; + print_endline "Test 15 preceding [b]"; + let c15 = eval_axis doc c2 Preceding in + print_node_list doc c15; + print_endline "Test 16 Following [b]"; + let c16 = eval_axis doc c2 Following in + print_node_list doc c16; + print_endline "Test 17 tag [b]"; + let c17 = element_by_tag doc (QName.make "b") in + print_node_list doc c17; + print_endline "Test 18 all nodes"; + let c18 = all_nodes doc in + print_node_list doc c18; + print_endline "Test 19 compile_xpath"; *) + + + (*Format.printf "Le document contient: %i noeuds\n%!" (Naive_tree.size doc); + let c19 = compile_xpath (parse_xpath "/child::a[child::b or not(following::]") in + Format.printf ">> %a\n%!" print_query_tree c19; + print_endline "Test 20 eval_query_tree"; + let res = eval_query_tree doc [ (Naive_tree.root doc) ] c19 in + print_node_list doc res; + print_endline "Fin";*) + + + diff --git a/src/table_options.ml b/src/table_options.ml new file mode 100644 index 0000000..d1e08e7 --- /dev/null +++ b/src/table_options.ml @@ -0,0 +1,30 @@ +open Arg + + +let input_file : string option ref = ref None +let output_file : string option ref = ref None +let queries = ref [] + + +let set_string_option r s = r := Some s (*pas compris*) + +let specs = align [ + + "-d", String (set_string_option input_file), + " specify the input document file [default stdin]"; + "--doc", String (set_string_option input_file), " "; + "-o", String (set_string_option output_file), + " specify the output file [default stdout]"; + "--out", String (set_string_option output_file), " "; +] + +let usage_msg = + Printf.sprintf "usage: %s [options] query [query ... query]" Sys.argv.(0) + +let usage () = usage specs usage_msg + +let parse () = + parse specs (fun q -> queries := q :: !queries ) usage_msg; + match !queries with + [] -> raise (Arg.Bad "missing query") + | l -> queries := List.rev l