-type node = {
- tag : QName.t;
- preorder : int;
- mutable data : string;
- mutable first_child : node;
- mutable next_sibling : node;
- mutable parent: node;
-}
-
-
-
-let rec nil = {
- tag = QName.nil;
- preorder = -1;
- data = "";
- first_child = nil;
- next_sibling = nil;
- parent = nil;
-}
-
-let dummy_tag = QName.of_string "#dummy"
-let rec dummy = {
- tag = dummy_tag;
- preorder = -1;
- data = "";
- first_child = dummy;
- next_sibling = dummy;
- parent = dummy;
-}
-
-
-type t = {
- root : node;
- (* TODO add other intersting stuff *)
-}
-
-
-
-module Parser =
-struct
-
- type context = {
- mutable stack : node list;
- text_buffer : Buffer.t;
- mutable current_preorder : int;
- }
-
- let print_node_ptr fmt n =
- Format.fprintf fmt "%s"
- (if n == nil then "<NIL>" else
- if n == dummy then "<DUMMY>" else
- "<NODE " ^ string_of_int n.preorder ^ ">")
-
- let debug_node fmt node =
- Format.fprintf fmt "{ tag=%s; preorder=%i; data=%s; first_child=%a; next_sibling=%a; parent=%a }"
- (QName.to_string node.tag)
- node.preorder
- node.data
- print_node_ptr node.first_child
- print_node_ptr node.next_sibling
- print_node_ptr node.parent
-
-
- let debug_ctx fmt ctx =
- Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n"
- ctx.current_preorder
- (Pretty.print_list ~sep:";\n" debug_node) ctx.stack
-
-
- let push n ctx = ctx.stack <- n :: ctx.stack
- let pop ctx =
- match ctx.stack with
- [] -> failwith "XML parse error"
- | e :: l -> ctx.stack <- l; e
-
- let top ctx = match ctx.stack with
- | [] -> failwith "XML parse error"
- | e :: _ -> e
-
- let next ctx =
- let i = ctx.current_preorder in
- ctx.current_preorder <- 1 + i;
- i
-
- let is_left n = n.next_sibling == dummy
-
-
- let text_string = QName.to_string QName.text
-
- let rec start_element_handler parser_ ctx tag attr_list =
- do_text parser_ ctx;
- let parent = top ctx in
- let n = { tag = QName.of_string tag;
- preorder = next ctx;
- data = "";
- first_child = dummy;
- next_sibling = dummy;
- parent = parent;
- }
- in
- if parent.first_child == dummy then parent.first_child <- n
- else parent.next_sibling <- n;
- push n ctx
-
- and consume_closing ctx n =
- if n.next_sibling != dummy then
- let _ = pop ctx in consume_closing ctx (top ctx)
-
- and end_element_handler parser_ ctx tag =
- do_text parser_ ctx;
- let node = top ctx in
- if nodef.first_child == dummy then node.first_child <- nil
- else begin
- node.next_sibling <- nil;
- consume_closing ctx node
- end
-
- and do_text parser_ ctx =
- if Buffer.length ctx.text_buffer != 0 then
- let s = Buffer.contents ctx.text_buffer in
- Buffer.clear ctx.text_buffer;
- start_element_handler parser_ ctx text_string [];
- let node = top ctx in
- node.data <- s;
- end_element_handler parser_ ctx text_string;
- Format.eprintf "DEBUG: %a\n\n" debug_ctx ctx
-
-
- let character_data_handler parser_ ctx text =
- Buffer.add_string ctx.text_buffer text
-
- let create_parser () =
- let ctx = { text_buffer = Buffer.create 512;
- current_preorder = 0;
- stack = [] } in
- let parser_ = Expat.parser_create ~encoding:None in
- Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx);
- Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx);
- Expat.set_character_data_handler parser_ (character_data_handler parser_ ctx);
- push { tag = QName.document;
- preorder = next ctx;
- data = "";
- first_child = dummy;
- next_sibling = dummy;
- parent = nil;
- } ctx;
- (parser_,
- fun () ->
- let node = top ctx in
- node.next_sibling <- nil;
- consume_closing ctx node;
- match ctx.stack with
- [ root ] ->
- root.next_sibling <- nil;
- { root = root }
- | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN)
- )
-
-
- let parse_string s =
- let parser_, finalize = create_parser () in
- Expat.parse parser_ s;
- finalize ()
-
- let parse_file fd =
- let buffer = String.create 4096 in
- let parser_, finalize = create_parser () in
- let rec loop () =
- let read = input fd buffer 0 4096 in
- if read != 0 then
- let () = Expat.parse_sub parser_ buffer 0 read in
- loop ()
- in loop (); finalize ()