(***********************************************************************)
(*
- Time-stamp: <Last modified on 2013-02-07 09:59:37 CET by Kim Nguyen>
+ Time-stamp: <Last modified on 2013-03-13 18:47:18 CET by Kim Nguyen>
*)
open Utils
type node = {
tag : QName.t;
preorder : int;
+ mutable kind : Common.NodeKind.t;
mutable data : string;
mutable first_child : node;
mutable next_sibling : node;
let rec nil = {
tag = QName.nil;
+ kind = Common.NodeKind.Element;
preorder = -1;
data = "";
first_child = nil;
let dummy_tag = QName.of_string "#dummy"
let rec dummy = {
tag = dummy_tag;
+ kind = Common.NodeKind.Element;
preorder = -1;
data = "";
first_child = dummy;
type t = {
root : node;
+ size : int;
(* TODO add other intersting stuff *)
}
"NODE " ^ string_of_int n.preorder)
let debug_node fmt node =
- Format.fprintf fmt "{ tag=%s; preorder=%i; data=%s; first_child=%a; next_sibling=%a; parent=%a }"
+ Format.fprintf fmt "{ tag=%s; preorder=%i; data=%S; first_child=%a; next_sibling=%a; parent=%a }"
(QName.to_string node.tag)
node.preorder
node.data
let text_string = QName.to_string QName.text
- let attr_map_string = QName.to_string QName.attribute_map
+ let comment_string = QName.to_string QName.comment
+
let rec start_element_handler parser_ ctx tag attr_list =
do_text parser_ ctx;
let parent = top ctx in
let n = { tag = QName.of_string tag;
+ kind = Common.NodeKind.Element;
preorder = next ctx;
data = "";
first_child = dummy;
if parent.first_child == dummy then parent.first_child <- n
else parent.next_sibling <- n;
push n ctx;
- match attr_list with
- [] -> ()
- | _ ->
- start_element_handler parser_ ctx attr_map_string [];
- List.iter (do_attribute parser_ ctx) attr_list;
- end_element_handler parser_ ctx attr_map_string
+ List.iter (do_attribute parser_ ctx) attr_list
and do_attribute parser_ ctx (att, value) =
- let att_tag = " " ^ att in
+ let att_tag = QName.to_string (QName.attribute (QName.of_string att)) in
start_element_handler parser_ ctx att_tag [];
- start_element_handler parser_ ctx text_string [];
- let n = top ctx in n.data <- value;
- end_element_handler parser_ ctx text_string;
+ let n = top ctx in
+ n.data <- value;
+ n.kind <- Common.NodeKind.Attribute;
end_element_handler parser_ ctx att_tag
and consume_closing ctx n =
if n.next_sibling != dummy then
let _ = pop ctx in consume_closing ctx (top ctx)
- and end_element_handler parser_ ctx tag =
+ and end_element_handler parser_ ctx _ =
do_text parser_ ctx;
let node = top ctx in
if node.first_child == dummy then node.first_child <- nil
start_element_handler parser_ ctx text_string [];
let node = top ctx in
node.data <- s;
+ node.kind <- Common.NodeKind.Text;
end_element_handler parser_ ctx text_string
+ and comment_handler parser_ ctx s =
+ do_text parser_ ctx;
+ start_element_handler parser_ ctx comment_string [];
+ let node = top ctx in
+ node.data <- s;
+ node.kind <- Common.NodeKind.Comment;
+ end_element_handler parser_ ctx comment_string
+
+ and processing_instruction_handler parser_ ctx tag data =
+ do_text parser_ ctx;
+ let pi = QName.to_string
+ (QName.processing_instruction (QName.of_string tag))
+ in
+ start_element_handler parser_ ctx pi [];
+ let node = top ctx in
+ node.data <- data;
+ node.kind <- Common.NodeKind.ProcessingInstruction;
+ end_element_handler parser_ ctx pi
- let character_data_handler parser_ ctx text =
+ let character_data_handler _parser ctx text =
Buffer.add_string ctx.text_buffer text
let create_parser () =
let ctx = { text_buffer = Buffer.create 512;
current_preorder = 0;
stack = [] } in
- let parser_ = Expat.parser_create ~encoding:None in
- Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx);
- Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx);
- Expat.set_character_data_handler parser_ (character_data_handler parser_ ctx);
+ let psr = Expat.parser_create ~encoding:None in
+ Expat.set_start_element_handler psr (start_element_handler psr ctx);
+ Expat.set_end_element_handler psr (end_element_handler psr ctx);
+ Expat.set_character_data_handler
+ psr (character_data_handler psr ctx);
+ Expat.set_comment_handler psr (comment_handler psr ctx);
+ Expat.set_processing_instruction_handler psr
+ (processing_instruction_handler psr ctx);
push { tag = QName.document;
preorder = next ctx;
+ kind = Common.NodeKind.Document;
data = "";
first_child = dummy;
next_sibling = dummy;
parent = nil;
} ctx;
- (parser_,
+ (psr,
fun () ->
let node = top ctx in
node.next_sibling <- nil;
match ctx.stack with
[ root ] ->
root.next_sibling <- nil;
- { root = root }
+ { root = root;
+ size = ctx.current_preorder
+ }
| _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN)
)
let load_xml_file = Parser.parse_file
let load_xml_string = Parser.parse_string
-
let output_escape_string out s =
for i = 0 to String.length s - 1 do
match s.[i] with
| c -> output_char out c
done
-let rec print_attributes out tree_ node =
- if node != nil then begin
- output_string out (QName.to_string node.tag);
+
+let rec print_attributes ?(sep=true) out tree_ node =
+ if (node.kind == Common.NodeKind.Attribute) then
+ let tag = QName.to_string (QName.remove_prefix node.tag) in
+ if sep then output_char out ' ';
+ output_string out tag;
output_string out "=\"";
- output_escape_string out node.first_child.data;
- output_char out '"';
+ output_escape_string out node.data;
+ output_char out '\"';
print_attributes out tree_ node.next_sibling
- end
+ else
+ node
let rec print_xml out tree_ node =
if node != nil then
- let () =
- if node.tag == QName.text then
- output_escape_string out node.data
- else
+ let () =
+ let open Common.NodeKind in
+ match node.kind with
+ | Node -> ()
+ | Text -> output_escape_string out node.data
+ | Element | Document ->
let tag = QName.to_string node.tag in
output_char out '<';
output_string out tag;
- let fchild =
- if node.first_child.tag == QName.attribute_map then
- let () =
- print_attributes out tree_ node.first_child.first_child
- in
- node.first_child.next_sibling
- else
- node.first_child
- in
+ let fchild = print_attributes out tree_ node.first_child in
if fchild == nil then output_string out "/>"
else begin
output_char out '>';
output_string out tag;
output_char out '>'
end
- in
- print_xml out tree_ node.next_sibling
-
+ | Attribute -> ignore (print_attributes ~sep:false out tree_ node)
+ | Comment ->
+ output_string out "<!--";
+ output_string out node.data;
+ output_string out "-->"
+ | ProcessingInstruction ->
+ output_string out "<?";
+ output_string out (QName.to_string (QName.remove_prefix node.tag));
+ output_char out ' ';
+ output_string out node.data;
+ output_string out "?>"
+ in
+ print_xml out tree_ node.next_sibling
+
+let print_xml out tree_ node =
+ let nnode = { node with next_sibling = nil } in print_xml out tree_ nnode
let root t = t.root
+let size t = t.size
let first_child _ n = n.first_child
let next_sibling _ n = n.next_sibling
let parent _ n = n.parent
let tag _ n = n.tag
let data _ n = n.data
+let kind _ n = n.kind
let preorder _ n = n.preorder
+
+let print_node fmt n = Parser.debug_node fmt n