1 (***********************************************************************)
5 (* Kim Nguyen, LRI UMR8623 *)
6 (* Université Paris-Sud & CNRS *)
8 (* Copyright 2010-2012 Université Paris-Sud and Centre National de la *)
9 (* Recherche Scientifique. All rights reserved. This file is *)
10 (* distributed under the terms of the GNU Lesser General Public *)
11 (* License, with the special exception on linking described in file *)
14 (***********************************************************************)
17 Time-stamp: <Last modified on 2013-03-13 18:47:18 CET by Kim Nguyen>
24 mutable kind : Common.NodeKind.t;
25 mutable data : string;
26 mutable first_child : node;
27 mutable next_sibling : node;
35 kind = Common.NodeKind.Element;
43 let dummy_tag = QName.of_string "#dummy"
46 kind = Common.NodeKind.Element;
58 (* TODO add other intersting stuff *)
67 mutable stack : node list;
68 text_buffer : Buffer.t;
69 mutable current_preorder : int;
72 let print_node_ptr fmt n =
73 Format.fprintf fmt "<%s>"
74 (if n == nil then "NIL" else
75 if n == dummy then "DUMMY" else
76 "NODE " ^ string_of_int n.preorder)
78 let debug_node fmt node =
79 Format.fprintf fmt "{ tag=%s; preorder=%i; data=%S; first_child=%a; next_sibling=%a; parent=%a }"
80 (QName.to_string node.tag)
83 print_node_ptr node.first_child
84 print_node_ptr node.next_sibling
85 print_node_ptr node.parent
88 let debug_ctx fmt ctx =
89 Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n"
91 (Pretty.print_list ~sep:";\n" debug_node) ctx.stack
94 let push n ctx = ctx.stack <- n :: ctx.stack
97 [] -> failwith "XML parse error"
98 | e :: l -> ctx.stack <- l; e
100 let top ctx = match ctx.stack with
101 | [] -> failwith "XML parse error"
105 let i = ctx.current_preorder in
106 ctx.current_preorder <- 1 + i;
109 let is_left n = n.next_sibling == dummy
112 let text_string = QName.to_string QName.text
113 let comment_string = QName.to_string QName.comment
116 let rec start_element_handler parser_ ctx tag attr_list =
118 let parent = top ctx in
119 let n = { tag = QName.of_string tag;
120 kind = Common.NodeKind.Element;
124 next_sibling = dummy;
128 if parent.first_child == dummy then parent.first_child <- n
129 else parent.next_sibling <- n;
131 List.iter (do_attribute parser_ ctx) attr_list
133 and do_attribute parser_ ctx (att, value) =
134 let att_tag = QName.to_string (QName.attribute (QName.of_string att)) in
135 start_element_handler parser_ ctx att_tag [];
138 n.kind <- Common.NodeKind.Attribute;
139 end_element_handler parser_ ctx att_tag
141 and consume_closing ctx n =
142 if n.next_sibling != dummy then
143 let _ = pop ctx in consume_closing ctx (top ctx)
145 and end_element_handler parser_ ctx _ =
147 let node = top ctx in
148 if node.first_child == dummy then node.first_child <- nil
150 node.next_sibling <- nil;
151 consume_closing ctx node
154 and do_text parser_ ctx =
155 if Buffer.length ctx.text_buffer != 0 then
156 let s = Buffer.contents ctx.text_buffer in
157 Buffer.clear ctx.text_buffer;
158 start_element_handler parser_ ctx text_string [];
159 let node = top ctx in
161 node.kind <- Common.NodeKind.Text;
162 end_element_handler parser_ ctx text_string
164 and comment_handler parser_ ctx s =
166 start_element_handler parser_ ctx comment_string [];
167 let node = top ctx in
169 node.kind <- Common.NodeKind.Comment;
170 end_element_handler parser_ ctx comment_string
172 and processing_instruction_handler parser_ ctx tag data =
174 let pi = QName.to_string
175 (QName.processing_instruction (QName.of_string tag))
177 start_element_handler parser_ ctx pi [];
178 let node = top ctx in
180 node.kind <- Common.NodeKind.ProcessingInstruction;
181 end_element_handler parser_ ctx pi
184 let character_data_handler _parser ctx text =
185 Buffer.add_string ctx.text_buffer text
187 let create_parser () =
188 let ctx = { text_buffer = Buffer.create 512;
189 current_preorder = 0;
191 let psr = Expat.parser_create ~encoding:None in
192 Expat.set_start_element_handler psr (start_element_handler psr ctx);
193 Expat.set_end_element_handler psr (end_element_handler psr ctx);
194 Expat.set_character_data_handler
195 psr (character_data_handler psr ctx);
196 Expat.set_comment_handler psr (comment_handler psr ctx);
197 Expat.set_processing_instruction_handler psr
198 (processing_instruction_handler psr ctx);
199 push { tag = QName.document;
201 kind = Common.NodeKind.Document;
204 next_sibling = dummy;
209 let node = top ctx in
210 node.next_sibling <- nil;
211 consume_closing ctx node;
214 root.next_sibling <- nil;
216 size = ctx.current_preorder
218 | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN)
223 let parser_, finalize = create_parser () in
224 Expat.parse parser_ s;
228 let buffer = String.create 4096 in
229 let parser_, finalize = create_parser () in
231 let read = input fd buffer 0 4096 in
233 let () = Expat.parse_sub parser_ buffer 0 read in
235 in loop (); finalize ()
240 let load_xml_file = Parser.parse_file
241 let load_xml_string = Parser.parse_string
243 let output_escape_string out s =
244 for i = 0 to String.length s - 1 do
246 | '<' -> output_string out "<"
247 | '>' -> output_string out ">"
248 | '&' -> output_string out "&"
249 | '"' -> output_string out """
250 | '\'' -> output_string out "'"
251 | c -> output_char out c
255 let rec print_attributes ?(sep=true) out tree_ node =
256 if (node.kind == Common.NodeKind.Attribute) then
257 let tag = QName.to_string (QName.remove_prefix node.tag) in
258 if sep then output_char out ' ';
259 output_string out tag;
260 output_string out "=\"";
261 output_escape_string out node.data;
262 output_char out '\"';
263 print_attributes out tree_ node.next_sibling
267 let rec print_xml out tree_ node =
270 let open Common.NodeKind in
273 | Text -> output_escape_string out node.data
274 | Element | Document ->
275 let tag = QName.to_string node.tag in
277 output_string out tag;
278 let fchild = print_attributes out tree_ node.first_child in
279 if fchild == nil then output_string out "/>"
282 print_xml out tree_ fchild;
283 output_string out "</";
284 output_string out tag;
287 | Attribute -> ignore (print_attributes ~sep:false out tree_ node)
289 output_string out "<!--";
290 output_string out node.data;
291 output_string out "-->"
292 | ProcessingInstruction ->
293 output_string out "<?";
294 output_string out (QName.to_string (QName.remove_prefix node.tag));
296 output_string out node.data;
297 output_string out "?>"
299 print_xml out tree_ node.next_sibling
301 let print_xml out tree_ node =
302 let nnode = { node with next_sibling = nil } in print_xml out tree_ nnode
306 let first_child _ n = n.first_child
307 let next_sibling _ n = n.next_sibling
308 let parent _ n = n.parent
310 let data _ n = n.data
311 let kind _ n = n.kind
312 let preorder _ n = n.preorder
314 let print_node fmt n = Parser.debug_node fmt n