--- /dev/null
+(***********************************************************************)
+(* *)
+(* TAToo *)
+(* *)
+(* Kim Nguyen, LRI UMR8623 *)
+(* Université Paris-Sud & CNRS *)
+(* *)
+(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *)
+(* Recherche Scientifique. All rights reserved. This file is *)
+(* distributed under the terms of the GNU Lesser General Public *)
+(* License, with the special exception on linking described in file *)
+(* ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(*
+ Time-stamp: <Last modified on 2013-04-04 18:47:30 CEST by Kim Nguyen>
+*)
+
+type node = {
+ tag : QName.t;
+ preorder : int;
+ mutable kind : Tree.NodeKind.t;
+ mutable data : string;
+ mutable first_child : node;
+ mutable next_sibling : node;
+ mutable parent: node;
+}
+
+
+
+let rec nil = {
+ tag = QName.nil;
+ kind = Tree.NodeKind.Element;
+ preorder = -1;
+ data = "";
+ first_child = nil;
+ next_sibling = nil;
+ parent = nil;
+}
+
+let dummy_tag = QName.of_string "#dummy"
+let rec dummy = {
+ tag = dummy_tag;
+ kind = Tree.NodeKind.Element;
+ preorder = -1;
+ data = "";
+ first_child = dummy;
+ next_sibling = dummy;
+ parent = dummy;
+}
+
+
+type t = {
+ root : node;
+ size : int;
+ (* TODO add other intersting stuff *)
+}
+
+
+
+module Parser =
+struct
+
+ type context = {
+ mutable stack : node list;
+ text_buffer : Buffer.t;
+ mutable current_preorder : int;
+ }
+
+ let print_node_ptr fmt n =
+ Format.fprintf fmt "<%s>"
+ (if n == nil then "NIL" else
+ if n == dummy then "DUMMY" else
+ "NODE " ^ string_of_int n.preorder)
+
+ let debug_node fmt node =
+ Format.fprintf fmt "{ tag=%s; preorder=%i; data=%S; first_child=%a; next_sibling=%a; parent=%a }"
+ (QName.to_string node.tag)
+ node.preorder
+ node.data
+ print_node_ptr node.first_child
+ print_node_ptr node.next_sibling
+ print_node_ptr node.parent
+
+
+ let debug_ctx fmt ctx =
+ Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n"
+ ctx.current_preorder
+ (Pretty.print_list ~sep:";\n" debug_node) ctx.stack
+
+
+ let push n ctx = ctx.stack <- n :: ctx.stack
+ let pop ctx =
+ match ctx.stack with
+ [] -> failwith "XML parse error"
+ | e :: l -> ctx.stack <- l; e
+
+ let top ctx = match ctx.stack with
+ | [] -> failwith "XML parse error"
+ | e :: _ -> e
+
+ let next ctx =
+ let i = ctx.current_preorder in
+ ctx.current_preorder <- 1 + i;
+ i
+
+ let is_left n = n.next_sibling == dummy
+
+
+ let text_string = QName.to_string QName.text
+ let comment_string = QName.to_string QName.comment
+
+
+ let rec start_element_handler parser_ ctx tag attr_list =
+ do_text parser_ ctx;
+ let parent = top ctx in
+ let n = { tag = QName.of_string tag;
+ kind = Tree.NodeKind.Element;
+ preorder = next ctx;
+ data = "";
+ first_child = dummy;
+ next_sibling = dummy;
+ parent = parent;
+ }
+ in
+ if parent.first_child == dummy then parent.first_child <- n
+ else parent.next_sibling <- n;
+ push n ctx;
+ List.iter (do_attribute parser_ ctx) attr_list
+
+ and do_attribute parser_ ctx (att, value) =
+ let att_tag = QName.to_string (QName.attribute (QName.of_string att)) in
+ start_element_handler parser_ ctx att_tag [];
+ let n = top ctx in
+ n.data <- value;
+ n.kind <- Tree.NodeKind.Attribute;
+ end_element_handler parser_ ctx att_tag
+
+ and consume_closing ctx n =
+ if n.next_sibling != dummy then
+ let _ = pop ctx in consume_closing ctx (top ctx)
+
+ and end_element_handler parser_ ctx _ =
+ do_text parser_ ctx;
+ let node = top ctx in
+ if node.first_child == dummy then node.first_child <- nil
+ else begin
+ node.next_sibling <- nil;
+ consume_closing ctx node
+ end
+
+ and do_text parser_ ctx =
+ if Buffer.length ctx.text_buffer != 0 then
+ let s = Buffer.contents ctx.text_buffer in
+ Buffer.clear ctx.text_buffer;
+ start_element_handler parser_ ctx text_string [];
+ let node = top ctx in
+ node.data <- s;
+ node.kind <- Tree.NodeKind.Text;
+ end_element_handler parser_ ctx text_string
+
+ and comment_handler parser_ ctx s =
+ do_text parser_ ctx;
+ start_element_handler parser_ ctx comment_string [];
+ let node = top ctx in
+ node.data <- s;
+ node.kind <- Tree.NodeKind.Comment;
+ end_element_handler parser_ ctx comment_string
+
+ and processing_instruction_handler parser_ ctx tag data =
+ do_text parser_ ctx;
+ let pi = QName.to_string
+ (QName.processing_instruction (QName.of_string tag))
+ in
+ start_element_handler parser_ ctx pi [];
+ let node = top ctx in
+ node.data <- data;
+ node.kind <- Tree.NodeKind.ProcessingInstruction;
+ end_element_handler parser_ ctx pi
+
+
+ let character_data_handler _parser ctx text =
+ Buffer.add_string ctx.text_buffer text
+
+ let create_parser () =
+ let ctx = { text_buffer = Buffer.create 512;
+ current_preorder = 0;
+ stack = [] } in
+ let psr = Expat.parser_create ~encoding:None in
+ Expat.set_start_element_handler psr (start_element_handler psr ctx);
+ Expat.set_end_element_handler psr (end_element_handler psr ctx);
+ Expat.set_character_data_handler
+ psr (character_data_handler psr ctx);
+ Expat.set_comment_handler psr (comment_handler psr ctx);
+ Expat.set_processing_instruction_handler psr
+ (processing_instruction_handler psr ctx);
+ push { tag = QName.document;
+ preorder = next ctx;
+ kind = Tree.NodeKind.Document;
+ data = "";
+ first_child = dummy;
+ next_sibling = dummy;
+ parent = nil;
+ } ctx;
+ (psr,
+ fun () ->
+ let node = top ctx in
+ node.next_sibling <- nil;
+ consume_closing ctx node;
+ match ctx.stack with
+ [ root ] ->
+ root.next_sibling <- nil;
+ { root = root;
+ size = ctx.current_preorder
+ }
+ | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN)
+ )
+
+
+ let parse_string s =
+ let parser_, finalize = create_parser () in
+ Expat.parse parser_ s;
+ finalize ()
+
+ let parse_file fd =
+ let buffer = String.create 4096 in
+ let parser_, finalize = create_parser () in
+ let rec loop () =
+ let read = input fd buffer 0 4096 in
+ if read != 0 then
+ let () = Expat.parse_sub parser_ buffer 0 read in
+ loop ()
+ in loop (); finalize ()
+
+end
+
+
+let load_xml_file = Parser.parse_file
+let load_xml_string = Parser.parse_string
+
+let output_escape_string out s =
+ for i = 0 to String.length s - 1 do
+ match s.[i] with
+ | '<' -> output_string out "<"
+ | '>' -> output_string out ">"
+ | '&' -> output_string out "&"
+ | '"' -> output_string out """
+ | '\'' -> output_string out "'"
+ | c -> output_char out c
+ done
+
+
+let rec print_attributes ?(sep=true) out tree_ node =
+ if (node.kind == Tree.NodeKind.Attribute) then
+ let tag = QName.to_string (QName.remove_prefix node.tag) in
+ if sep then output_char out ' ';
+ output_string out tag;
+ output_string out "=\"";
+ output_escape_string out node.data;
+ output_char out '\"';
+ print_attributes out tree_ node.next_sibling
+ else
+ node
+
+let rec print_xml out tree_ node =
+ if node != nil then
+ let () =
+ let open Tree.NodeKind in
+ match node.kind with
+ | Node -> ()
+ | Text -> output_escape_string out node.data
+ | Element | Document ->
+ let tag = QName.to_string node.tag in
+ output_char out '<';
+ output_string out tag;
+ let fchild = print_attributes out tree_ node.first_child in
+ if fchild == nil then output_string out "/>"
+ else begin
+ output_char out '>';
+ print_xml out tree_ fchild;
+ output_string out "</";
+ output_string out tag;
+ output_char out '>'
+ end
+ | Attribute -> ignore (print_attributes ~sep:false out tree_ node)
+ | Comment ->
+ output_string out "<!--";
+ output_string out node.data;
+ output_string out "-->"
+ | ProcessingInstruction ->
+ output_string out "<?";
+ output_string out (QName.to_string (QName.remove_prefix node.tag));
+ output_char out ' ';
+ output_string out node.data;
+ output_string out "?>"
+ in
+ print_xml out tree_ node.next_sibling
+
+let print_xml out tree_ node =
+ let nnode = { node with next_sibling = nil } in print_xml out tree_ nnode
+
+let root t = t.root
+let size t = t.size
+let first_child _ n = n.first_child
+let next_sibling _ n = n.next_sibling
+let parent _ n = n.parent
+let tag _ n = n.tag
+let data _ n = n.data
+let kind _ n = n.kind
+let preorder _ n = n.preorder
+
+let print_node fmt n = Parser.debug_node fmt n