1 (***********************************************************************)
5 (* Kim Nguyen, LRI UMR8623 *)
6 (* Université Paris-Sud & CNRS *)
8 (* Copyright 2010-2012 Université Paris-Sud and Centre National de la *)
9 (* Recherche Scientifique. All rights reserved. This file is *)
10 (* distributed under the terms of the GNU Lesser General Public *)
11 (* License, with the special exception on linking described in file *)
14 (***********************************************************************)
19 mutable data : string;
20 mutable first_child : node;
21 mutable next_sibling : node;
36 let dummy_tag = QName.of_string "#dummy"
49 (* TODO add other intersting stuff *)
58 mutable stack : node list;
59 text_buffer : Buffer.t;
60 mutable current_preorder : int;
63 let print_node_ptr fmt n =
64 Format.fprintf fmt "%s"
65 (if n == nil then "<NIL>" else
66 if n == dummy then "<DUMMY>" else
67 "<NODE " ^ string_of_int n.preorder ^ ">")
69 let debug_node fmt node =
70 Format.fprintf fmt "{ tag=%s; preorder=%i; data=%s; first_child=%a; next_sibling=%a; parent=%a }"
71 (QName.to_string node.tag)
74 print_node_ptr node.first_child
75 print_node_ptr node.next_sibling
76 print_node_ptr node.parent
79 let debug_ctx fmt ctx =
80 Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n"
82 (Pretty.print_list ~sep:";\n" debug_node) ctx.stack
85 let push n ctx = ctx.stack <- n :: ctx.stack
88 [] -> failwith "XML parse error"
89 | e :: l -> ctx.stack <- l; e
91 let top ctx = match ctx.stack with
92 | [] -> failwith "XML parse error"
96 let i = ctx.current_preorder in
97 ctx.current_preorder <- 1 + i;
100 let is_left n = n.next_sibling == dummy
103 let text_string = QName.to_string QName.text
104 let attr_map_string = QName.to_string QName.attribute_map
106 let rec start_element_handler parser_ ctx tag attr_list =
108 let parent = top ctx in
109 let n = { tag = QName.of_string tag;
113 next_sibling = dummy;
117 if parent.first_child == dummy then parent.first_child <- n
118 else parent.next_sibling <- n;
123 start_element_handler parser_ ctx attr_map_string [];
124 List.iter (do_attribute parser_ ctx) attr_list;
125 end_element_handler parser_ ctx attr_map_string
127 and do_attribute parser_ ctx (att, value) =
128 let att_tag = " " ^ att in
129 start_element_handler parser_ ctx att_tag [];
130 start_element_handler parser_ ctx text_string [];
131 let n = top ctx in n.data <- value;
132 end_element_handler parser_ ctx text_string;
133 end_element_handler parser_ ctx att_tag
135 and consume_closing ctx n =
136 if n.next_sibling != dummy then
137 let _ = pop ctx in consume_closing ctx (top ctx)
139 and end_element_handler parser_ ctx tag =
141 let node = top ctx in
142 if node.first_child == dummy then node.first_child <- nil
144 node.next_sibling <- nil;
145 consume_closing ctx node
148 and do_text parser_ ctx =
149 if Buffer.length ctx.text_buffer != 0 then
150 let s = Buffer.contents ctx.text_buffer in
151 Buffer.clear ctx.text_buffer;
152 start_element_handler parser_ ctx text_string [];
153 let node = top ctx in
155 end_element_handler parser_ ctx text_string
159 let character_data_handler parser_ ctx text =
160 Buffer.add_string ctx.text_buffer text
162 let create_parser () =
163 let ctx = { text_buffer = Buffer.create 512;
164 current_preorder = 0;
166 let parser_ = Expat.parser_create ~encoding:None in
167 Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx);
168 Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx);
169 Expat.set_character_data_handler parser_ (character_data_handler parser_ ctx);
170 push { tag = QName.document;
174 next_sibling = dummy;
179 let node = top ctx in
180 node.next_sibling <- nil;
181 consume_closing ctx node;
184 root.next_sibling <- nil;
186 | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN)
191 let parser_, finalize = create_parser () in
192 Expat.parse parser_ s;
196 let buffer = String.create 4096 in
197 let parser_, finalize = create_parser () in
199 let read = input fd buffer 0 4096 in
201 let () = Expat.parse_sub parser_ buffer 0 read in
203 in loop (); finalize ()
208 let load_xml_file = Parser.parse_file
209 let load_xml_string = Parser.parse_string
212 let output_escape_string out s =
213 for i = 0 to String.length s - 1 do
215 | '<' -> output_string out "<"
216 | '>' -> output_string out ">"
217 | '&' -> output_string out "&"
218 | '"' -> output_string out """
219 | '\'' -> output_string out "'"
220 | c -> output_char out c
223 let rec print_attributes out tree_ node =
224 if node != nil then begin
225 output_string out (QName.to_string node.tag);
226 output_string out "=\"";
227 output_escape_string out node.first_child.data;
229 print_attributes out tree_ node.next_sibling
232 let rec print_xml out tree_ node =
235 if node.tag == QName.text then
236 output_escape_string out node.data
238 let tag = QName.to_string node.tag in
240 output_string out tag;
242 if node.first_child.tag == QName.attribute_map then
244 print_attributes out tree_ node.first_child.first_child
246 node.first_child.next_sibling
250 if fchild == nil then output_string out "/>"
253 print_xml out tree_ fchild;
254 output_string out "</";
255 output_string out tag;
259 print_xml out tree_ node.next_sibling
263 let first_child _ n = n.first_child
264 let next_sibling _ n = n.next_sibling
265 let parent _ n = n.parent
266 (* Begin Lucca Hirschi *)
267 let is_leaf t n = (first_child t n == nil) && (next_sibling t n == nil)
270 let data _ n = n.data
271 let preorder _ n = n.preorder
274 let rec print_xml_preorder out tree_ node =
277 if node.tag == QName.text then
279 output_escape_string out node.data;
280 output_string out ("'"^(string_of_int(preorder tree_ node)));
283 let tag = QName.to_string node.tag in
285 output_string out tag;
286 output_string out (" '"^(string_of_int(preorder tree_ node)));
288 if node.first_child.tag == QName.attribute_map then
290 print_attributes out tree_ node.first_child.first_child
292 node.first_child.next_sibling
296 if fchild == nil then output_string out "/>"
299 print_xml_preorder out tree_ fchild;
300 output_string out "</";
301 output_string out tag;
305 print_xml_preorder out tree_ node.next_sibling