(***********************************************************************) (* *) (* TAToo *) (* *) (* Kim Nguyen, LRI UMR8623 *) (* Université Paris-Sud & CNRS *) (* *) (* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) (* Recherche Scientifique. All rights reserved. This file is *) (* distributed under the terms of the GNU Lesser General Public *) (* License, with the special exception on linking described in file *) (* ../LICENSE. *) (* *) (***********************************************************************) type node = { tag : QName.t; preorder : int; mutable kind : Tree.NodeKind.t; mutable data : string; mutable first_child : node; mutable next_sibling : node; mutable parent: node; } let rec nil = { tag = QName.nil; kind = Tree.NodeKind.Element; preorder = -1; data = ""; first_child = nil; next_sibling = nil; parent = nil; } let dummy_tag = QName.of_string "#dummy" let rec dummy = { tag = dummy_tag; kind = Tree.NodeKind.Element; preorder = -1; data = ""; first_child = dummy; next_sibling = dummy; parent = dummy; } type t = { root : node; size : int; by_preorder : node array; (* TODO add other intersting stuff *) } module Parser = struct type context = { mutable stack : node list; text_buffer : Buffer.t; mutable current_preorder : int; } let print_node_ptr fmt n = Format.fprintf fmt "<%s>" (if n == nil then "NIL" else if n == dummy then "DUMMY" else "NODE " ^ string_of_int n.preorder) let debug_node fmt node = Format.fprintf fmt "{ tag=%s; preorder=%i; data=%S;\ first_child=%a; next_sibling=%a; parent=%a }" (QName.to_string node.tag) node.preorder node.data print_node_ptr node.first_child print_node_ptr node.next_sibling print_node_ptr node.parent let debug_ctx fmt ctx = Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\ \n-------------\n" ctx.current_preorder (Pretty.print_list ~sep:";\n" debug_node) ctx.stack let push n ctx = ctx.stack <- n :: ctx.stack let pop ctx = match ctx.stack with [] -> failwith "XML parse error" | e :: l -> ctx.stack <- l; e let top ctx = match ctx.stack with | [] -> failwith "XML parse error" | e :: _ -> e let next ctx = let i = ctx.current_preorder in ctx.current_preorder <- 1 + i; i let is_left n = n.next_sibling == dummy let text_string = QName.to_string QName.text let comment_string = QName.to_string QName.comment let rec start_element_handler parser_ ctx tag attr_list = do_text parser_ ctx; let parent = top ctx in let n = { tag = QName.of_string tag; kind = Tree.NodeKind.Element; preorder = next ctx; data = ""; first_child = dummy; next_sibling = dummy; parent = parent; } in if parent.first_child == dummy then parent.first_child <- n else parent.next_sibling <- n; push n ctx; List.iter (do_attribute parser_ ctx) attr_list and do_attribute parser_ ctx (att, value) = start_element_handler parser_ ctx att []; let n = top ctx in n.data <- value; n.kind <- Tree.NodeKind.Attribute; end_element_handler parser_ ctx att and consume_closing ctx n = if n.next_sibling != dummy then let _ = pop ctx in consume_closing ctx (top ctx) and end_element_handler parser_ ctx _ = do_text parser_ ctx; let node = top ctx in if node.first_child == dummy then node.first_child <- nil else begin node.next_sibling <- nil; consume_closing ctx node end and do_text parser_ ctx = if Buffer.length ctx.text_buffer != 0 then let s = Buffer.contents ctx.text_buffer in Buffer.clear ctx.text_buffer; start_element_handler parser_ ctx text_string []; let node = top ctx in node.data <- s; node.kind <- Tree.NodeKind.Text; end_element_handler parser_ ctx text_string and comment_handler parser_ ctx s = do_text parser_ ctx; start_element_handler parser_ ctx comment_string []; let node = top ctx in node.data <- s; node.kind <- Tree.NodeKind.Comment; end_element_handler parser_ ctx comment_string and processing_instruction_handler parser_ ctx tag data = do_text parser_ ctx; start_element_handler parser_ ctx tag []; let node = top ctx in node.data <- data; node.kind <- Tree.NodeKind.ProcessingInstruction; end_element_handler parser_ ctx tag let character_data_handler _parser ctx text = Buffer.add_string ctx.text_buffer text let create_parser () = let ctx = { text_buffer = Buffer.create 512; current_preorder = 0; stack = [] } in let psr = Expat.parser_create ~encoding:None in Expat.set_start_element_handler psr (start_element_handler psr ctx); Expat.set_end_element_handler psr (end_element_handler psr ctx); Expat.set_character_data_handler psr (character_data_handler psr ctx); Expat.set_comment_handler psr (comment_handler psr ctx); Expat.set_processing_instruction_handler psr (processing_instruction_handler psr ctx); push { tag = QName.document; preorder = next ctx; kind = Tree.NodeKind.Document; data = ""; first_child = dummy; next_sibling = dummy; parent = nil; } ctx; (psr, fun () -> let node = top ctx in node.next_sibling <- nil; consume_closing ctx node; Expat.final psr; let root = List.hd ctx.stack in root.next_sibling <- nil; let a = Array.make ctx.current_preorder nil in let rec loop n = if n != nil then begin a.(n.preorder) <- n; loop n.first_child; loop n.next_sibling; end in loop root; { root = root; size = ctx.current_preorder; by_preorder = a } ) let error e parser_ = let msg = Printf.sprintf "%i.%i %s" (Expat.get_current_line_number parser_) (Expat.get_current_column_number parser_) (Expat.xml_error_to_string e) in raise (Tree.Parse_error msg) let parse_string s = let parser_, finalize = create_parser () in try Expat.parse parser_ s; finalize () with Expat.Expat_error e -> error e parser_ let parse_file fd = let buffer = String.make 4096 '\000' in let parser_, finalize = create_parser () in let rec loop () = let read = input fd buffer 0 4096 in if read != 0 then let () = Expat.parse_sub parser_ buffer 0 read in loop () in try loop (); finalize () with Expat.Expat_error e -> error e parser_ end let load_xml_file = Parser.parse_file let load_xml_string = Parser.parse_string let output_escape_string out s = for i = 0 to String.length s - 1 do match s.[i] with | '<' -> output_string out "<" | '>' -> output_string out ">" | '&' -> output_string out "&" | '"' -> output_string out """ | '\'' -> output_string out "'" | c -> output_char out c done let rec print_attributes ?(sep=true) out tree_ node = if (node.kind == Tree.NodeKind.Attribute) then let tag = QName.to_string node.tag in if sep then output_char out ' '; output_string out tag; output_string out "=\""; output_escape_string out node.data; output_char out '\"'; print_attributes out tree_ node.next_sibling else node let rec print_xml out tree_ node = if node != nil then let () = let open Tree.NodeKind in match node.kind with | Node -> () | Text -> output_escape_string out node.data | Element | Document -> let tag = QName.to_string node.tag in output_char out '<'; output_string out tag; let fchild = print_attributes out tree_ node.first_child in if fchild == nil then output_string out "/>" else begin output_char out '>'; print_xml out tree_ fchild; output_string out "' end | Attribute -> ignore (print_attributes ~sep:false out tree_ node) | Comment -> output_string out "" | ProcessingInstruction -> output_string out "" in print_xml out tree_ node.next_sibling let print_xml out tree_ node = let nnode = { node with next_sibling = nil } in print_xml out tree_ nnode let root t = t.root let size t = t.size let first_child _ n = n.first_child let next_sibling _ n = n.next_sibling let parent _ n = n.parent let tag _ n = n.tag let data _ n = n.data let kind _ n = n.kind let preorder _ n = n.preorder let by_preorder t i = if i >= 0 && i < t.size then Array.unsafe_get t.by_preorder i else let e = Invalid_argument "by_preorder" in raise e let print_node fmt n = Parser.debug_node fmt n