1 (***********************************************************************)
5 (* Kim Nguyen, LRI UMR8623 *)
6 (* Université Paris-Sud & CNRS *)
8 (* Copyright 2010-2012 Université Paris-Sud and Centre National de la *)
9 (* Recherche Scientifique. All rights reserved. This file is *)
10 (* distributed under the terms of the GNU Lesser General Public *)
11 (* License, with the special exception on linking described in file *)
14 (***********************************************************************)
17 Time-stamp: <Last modified on 2013-04-22 16:42:50 CEST by Kim Nguyen>
23 mutable kind : Tree.NodeKind.t;
24 mutable data : string;
25 mutable first_child : node;
26 mutable next_sibling : node;
34 kind = Tree.NodeKind.Element;
42 let dummy_tag = QName.of_string "#dummy"
45 kind = Tree.NodeKind.Element;
57 (* TODO add other intersting stuff *)
66 mutable stack : node list;
67 text_buffer : Buffer.t;
68 mutable current_preorder : int;
71 let print_node_ptr fmt n =
72 Format.fprintf fmt "<%s>"
73 (if n == nil then "NIL" else
74 if n == dummy then "DUMMY" else
75 "NODE " ^ string_of_int n.preorder)
77 let debug_node fmt node =
78 Format.fprintf fmt "{ tag=%s; preorder=%i; data=%S; first_child=%a; next_sibling=%a; parent=%a }"
79 (QName.to_string node.tag)
82 print_node_ptr node.first_child
83 print_node_ptr node.next_sibling
84 print_node_ptr node.parent
87 let debug_ctx fmt ctx =
88 Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n"
90 (Pretty.print_list ~sep:";\n" debug_node) ctx.stack
93 let push n ctx = ctx.stack <- n :: ctx.stack
96 [] -> failwith "XML parse error"
97 | e :: l -> ctx.stack <- l; e
99 let top ctx = match ctx.stack with
100 | [] -> failwith "XML parse error"
104 let i = ctx.current_preorder in
105 ctx.current_preorder <- 1 + i;
108 let is_left n = n.next_sibling == dummy
111 let text_string = QName.to_string QName.text
112 let comment_string = QName.to_string QName.comment
115 let rec start_element_handler parser_ ctx tag attr_list =
117 let parent = top ctx in
118 let n = { tag = QName.of_string tag;
119 kind = Tree.NodeKind.Element;
123 next_sibling = dummy;
127 if parent.first_child == dummy then parent.first_child <- n
128 else parent.next_sibling <- n;
130 List.iter (do_attribute parser_ ctx) attr_list
132 and do_attribute parser_ ctx (att, value) =
133 let att_tag = QName.to_string (QName.attribute (QName.of_string att)) in
134 start_element_handler parser_ ctx att_tag [];
137 n.kind <- Tree.NodeKind.Attribute;
138 end_element_handler parser_ ctx att_tag
140 and consume_closing ctx n =
141 if n.next_sibling != dummy then
142 let _ = pop ctx in consume_closing ctx (top ctx)
144 and end_element_handler parser_ ctx _ =
146 let node = top ctx in
147 if node.first_child == dummy then node.first_child <- nil
149 node.next_sibling <- nil;
150 consume_closing ctx node
153 and do_text parser_ ctx =
154 if Buffer.length ctx.text_buffer != 0 then
155 let s = Buffer.contents ctx.text_buffer in
156 Buffer.clear ctx.text_buffer;
157 start_element_handler parser_ ctx text_string [];
158 let node = top ctx in
160 node.kind <- Tree.NodeKind.Text;
161 end_element_handler parser_ ctx text_string
163 and comment_handler parser_ ctx s =
165 start_element_handler parser_ ctx comment_string [];
166 let node = top ctx in
168 node.kind <- Tree.NodeKind.Comment;
169 end_element_handler parser_ ctx comment_string
171 and processing_instruction_handler parser_ ctx tag data =
173 let pi = QName.to_string
174 (QName.processing_instruction (QName.of_string tag))
176 start_element_handler parser_ ctx pi [];
177 let node = top ctx in
179 node.kind <- Tree.NodeKind.ProcessingInstruction;
180 end_element_handler parser_ ctx pi
183 let character_data_handler _parser ctx text =
184 Buffer.add_string ctx.text_buffer text
186 let create_parser () =
187 let ctx = { text_buffer = Buffer.create 512;
188 current_preorder = 0;
190 let psr = Expat.parser_create ~encoding:None in
191 Expat.set_start_element_handler psr (start_element_handler psr ctx);
192 Expat.set_end_element_handler psr (end_element_handler psr ctx);
193 Expat.set_character_data_handler
194 psr (character_data_handler psr ctx);
195 Expat.set_comment_handler psr (comment_handler psr ctx);
196 Expat.set_processing_instruction_handler psr
197 (processing_instruction_handler psr ctx);
198 push { tag = QName.document;
200 kind = Tree.NodeKind.Document;
203 next_sibling = dummy;
208 let node = top ctx in
209 node.next_sibling <- nil;
210 consume_closing ctx node;
212 let root = List.hd ctx.stack in
213 root.next_sibling <- nil;
215 size = ctx.current_preorder
219 let error e parser_ =
220 let msg = Printf.sprintf "%i.%i %s"
221 (Expat.get_current_line_number parser_)
222 (Expat.get_current_column_number parser_)
223 (Expat.xml_error_to_string e)
225 raise (Tree.Parse_error msg)
228 let parser_, finalize = create_parser () in
230 Expat.parse parser_ s;
233 Expat.Expat_error e -> error e parser_
236 let buffer = String.create 4096 in
237 let parser_, finalize = create_parser () in
239 let read = input fd buffer 0 4096 in
241 let () = Expat.parse_sub parser_ buffer 0 read in
246 Expat.Expat_error e -> error e parser_
251 let load_xml_file = Parser.parse_file
252 let load_xml_string = Parser.parse_string
254 let output_escape_string out s =
255 for i = 0 to String.length s - 1 do
257 | '<' -> output_string out "<"
258 | '>' -> output_string out ">"
259 | '&' -> output_string out "&"
260 | '"' -> output_string out """
261 | '\'' -> output_string out "'"
262 | c -> output_char out c
266 let rec print_attributes ?(sep=true) out tree_ node =
267 if (node.kind == Tree.NodeKind.Attribute) then
268 let tag = QName.to_string (QName.remove_prefix node.tag) in
269 if sep then output_char out ' ';
270 output_string out tag;
271 output_string out "=\"";
272 output_escape_string out node.data;
273 output_char out '\"';
274 print_attributes out tree_ node.next_sibling
278 let rec print_xml out tree_ node =
281 let open Tree.NodeKind in
284 | Text -> output_escape_string out node.data
285 | Element | Document ->
286 let tag = QName.to_string node.tag in
288 output_string out tag;
289 let fchild = print_attributes out tree_ node.first_child in
290 if fchild == nil then output_string out "/>"
293 print_xml out tree_ fchild;
294 output_string out "</";
295 output_string out tag;
298 | Attribute -> ignore (print_attributes ~sep:false out tree_ node)
300 output_string out "<!--";
301 output_string out node.data;
302 output_string out "-->"
303 | ProcessingInstruction ->
304 output_string out "<?";
305 output_string out (QName.to_string (QName.remove_prefix node.tag));
307 output_string out node.data;
308 output_string out "?>"
310 print_xml out tree_ node.next_sibling
312 let print_xml out tree_ node =
313 let nnode = { node with next_sibling = nil } in print_xml out tree_ nnode
317 let first_child _ n = n.first_child
318 let next_sibling _ n = n.next_sibling
319 let parent _ n = n.parent
321 let data _ n = n.data
322 let kind _ n = n.kind
323 let preorder _ n = n.preorder
325 let print_node fmt n = Parser.debug_node fmt n