1 (***********************************************************************)
5 (* Kim Nguyen, LRI UMR8623 *)
6 (* Université Paris-Sud & CNRS *)
8 (* Copyright 2010-2012 Université Paris-Sud and Centre National de la *)
9 (* Recherche Scientifique. All rights reserved. This file is *)
10 (* distributed under the terms of the GNU Lesser General Public *)
11 (* License, with the special exception on linking described in file *)
14 (***********************************************************************)
19 mutable kind : Tree.NodeKind.t;
20 mutable data : string;
21 mutable first_child : node;
22 mutable next_sibling : node;
30 kind = Tree.NodeKind.Element;
38 let dummy_tag = QName.of_string "#dummy"
41 kind = Tree.NodeKind.Element;
53 by_preorder : node array;
54 (* TODO add other intersting stuff *)
63 mutable stack : node list;
64 text_buffer : Buffer.t;
65 mutable current_preorder : int;
68 let print_node_ptr fmt n =
69 Format.fprintf fmt "<%s>"
70 (if n == nil then "NIL" else
71 if n == dummy then "DUMMY" else
72 "NODE " ^ string_of_int n.preorder)
74 let debug_node fmt node =
75 Format.fprintf fmt "{ tag=%s; preorder=%i; data=%S; first_child=%a; next_sibling=%a; parent=%a }"
76 (QName.to_string node.tag)
79 print_node_ptr node.first_child
80 print_node_ptr node.next_sibling
81 print_node_ptr node.parent
84 let debug_ctx fmt ctx =
85 Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n"
87 (Pretty.print_list ~sep:";\n" debug_node) ctx.stack
90 let push n ctx = ctx.stack <- n :: ctx.stack
93 [] -> failwith "XML parse error"
94 | e :: l -> ctx.stack <- l; e
96 let top ctx = match ctx.stack with
97 | [] -> failwith "XML parse error"
101 let i = ctx.current_preorder in
102 ctx.current_preorder <- 1 + i;
105 let is_left n = n.next_sibling == dummy
108 let text_string = QName.to_string QName.text
109 let comment_string = QName.to_string QName.comment
112 let rec start_element_handler parser_ ctx tag attr_list =
114 let parent = top ctx in
115 let n = { tag = QName.of_string tag;
116 kind = Tree.NodeKind.Element;
120 next_sibling = dummy;
124 if parent.first_child == dummy then parent.first_child <- n
125 else parent.next_sibling <- n;
127 List.iter (do_attribute parser_ ctx) attr_list
129 and do_attribute parser_ ctx (att, value) =
130 let att_tag = QName.to_string (QName.attribute (QName.of_string att)) in
131 start_element_handler parser_ ctx att_tag [];
134 n.kind <- Tree.NodeKind.Attribute;
135 end_element_handler parser_ ctx att_tag
137 and consume_closing ctx n =
138 if n.next_sibling != dummy then
139 let _ = pop ctx in consume_closing ctx (top ctx)
141 and end_element_handler parser_ ctx _ =
143 let node = top ctx in
144 if node.first_child == dummy then node.first_child <- nil
146 node.next_sibling <- nil;
147 consume_closing ctx node
150 and do_text parser_ ctx =
151 if Buffer.length ctx.text_buffer != 0 then
152 let s = Buffer.contents ctx.text_buffer in
153 Buffer.clear ctx.text_buffer;
154 start_element_handler parser_ ctx text_string [];
155 let node = top ctx in
157 node.kind <- Tree.NodeKind.Text;
158 end_element_handler parser_ ctx text_string
160 and comment_handler parser_ ctx s =
162 start_element_handler parser_ ctx comment_string [];
163 let node = top ctx in
165 node.kind <- Tree.NodeKind.Comment;
166 end_element_handler parser_ ctx comment_string
168 and processing_instruction_handler parser_ ctx tag data =
170 let pi = QName.to_string
171 (QName.processing_instruction (QName.of_string tag))
173 start_element_handler parser_ ctx pi [];
174 let node = top ctx in
176 node.kind <- Tree.NodeKind.ProcessingInstruction;
177 end_element_handler parser_ ctx pi
180 let character_data_handler _parser ctx text =
181 Buffer.add_string ctx.text_buffer text
183 let create_parser () =
184 let ctx = { text_buffer = Buffer.create 512;
185 current_preorder = 0;
187 let psr = Expat.parser_create ~encoding:None in
188 Expat.set_start_element_handler psr (start_element_handler psr ctx);
189 Expat.set_end_element_handler psr (end_element_handler psr ctx);
190 Expat.set_character_data_handler
191 psr (character_data_handler psr ctx);
192 Expat.set_comment_handler psr (comment_handler psr ctx);
193 Expat.set_processing_instruction_handler psr
194 (processing_instruction_handler psr ctx);
195 push { tag = QName.document;
197 kind = Tree.NodeKind.Document;
200 next_sibling = dummy;
205 let node = top ctx in
206 node.next_sibling <- nil;
207 consume_closing ctx node;
209 let root = List.hd ctx.stack in
210 root.next_sibling <- nil;
211 let a = Array.create ctx.current_preorder nil in
222 size = ctx.current_preorder;
227 let error e parser_ =
228 let msg = Printf.sprintf "%i.%i %s"
229 (Expat.get_current_line_number parser_)
230 (Expat.get_current_column_number parser_)
231 (Expat.xml_error_to_string e)
233 raise (Tree.Parse_error msg)
236 let parser_, finalize = create_parser () in
238 Expat.parse parser_ s;
241 Expat.Expat_error e -> error e parser_
244 let buffer = String.create 4096 in
245 let parser_, finalize = create_parser () in
247 let read = input fd buffer 0 4096 in
249 let () = Expat.parse_sub parser_ buffer 0 read in
254 Expat.Expat_error e -> error e parser_
259 let load_xml_file = Parser.parse_file
260 let load_xml_string = Parser.parse_string
262 let output_escape_string out s =
263 for i = 0 to String.length s - 1 do
265 | '<' -> output_string out "<"
266 | '>' -> output_string out ">"
267 | '&' -> output_string out "&"
268 | '"' -> output_string out """
269 | '\'' -> output_string out "'"
270 | c -> output_char out c
274 let rec print_attributes ?(sep=true) out tree_ node =
275 if (node.kind == Tree.NodeKind.Attribute) then
276 let tag = QName.to_string (QName.remove_prefix node.tag) in
277 if sep then output_char out ' ';
278 output_string out tag;
279 output_string out "=\"";
280 output_escape_string out node.data;
281 output_char out '\"';
282 print_attributes out tree_ node.next_sibling
286 let rec print_xml out tree_ node =
289 let open Tree.NodeKind in
292 | Text -> output_escape_string out node.data
293 | Element | Document ->
294 let tag = QName.to_string node.tag in
296 output_string out tag;
297 let fchild = print_attributes out tree_ node.first_child in
298 if fchild == nil then output_string out "/>"
301 print_xml out tree_ fchild;
302 output_string out "</";
303 output_string out tag;
306 | Attribute -> ignore (print_attributes ~sep:false out tree_ node)
308 output_string out "<!--";
309 output_string out node.data;
310 output_string out "-->"
311 | ProcessingInstruction ->
312 output_string out "<?";
313 output_string out (QName.to_string (QName.remove_prefix node.tag));
315 output_string out node.data;
316 output_string out "?>"
318 print_xml out tree_ node.next_sibling
320 let print_xml out tree_ node =
321 let nnode = { node with next_sibling = nil } in print_xml out tree_ nnode
325 let first_child _ n = n.first_child
326 let next_sibling _ n = n.next_sibling
327 let parent _ n = n.parent
329 let data _ n = n.data
330 let kind _ n = n.kind
331 let preorder _ n = n.preorder
332 let by_preorder t i =
333 if i >= 0 && i < t.size then Array.unsafe_get t.by_preorder i
334 else let e = Invalid_argument "by_preorder" in raise e
335 let print_node fmt n = Parser.debug_node fmt n