(***********************************************************************)
(*
- Time-stamp: <Last modified on 2013-03-05 16:20:32 CET by Kim Nguyen>
+ Time-stamp: <Last modified on 2013-03-13 18:47:18 CET by Kim Nguyen>
*)
open Utils
type node = {
tag : QName.t;
preorder : int;
+ mutable kind : Common.NodeKind.t;
mutable data : string;
mutable first_child : node;
mutable next_sibling : node;
let rec nil = {
tag = QName.nil;
+ kind = Common.NodeKind.Element;
preorder = -1;
data = "";
first_child = nil;
let dummy_tag = QName.of_string "#dummy"
let rec dummy = {
tag = dummy_tag;
+ kind = Common.NodeKind.Element;
preorder = -1;
data = "";
first_child = dummy;
type t = {
root : node;
+ size : int;
(* TODO add other intersting stuff *)
}
let text_string = QName.to_string QName.text
- let attr_map_string = QName.to_string QName.attribute_map
+ let comment_string = QName.to_string QName.comment
+
- let att_pref = QName.node QName.attribute_prefix
let rec start_element_handler parser_ ctx tag attr_list =
do_text parser_ ctx;
let parent = top ctx in
let n = { tag = QName.of_string tag;
+ kind = Common.NodeKind.Element;
preorder = next ctx;
data = "";
first_child = dummy;
if parent.first_child == dummy then parent.first_child <- n
else parent.next_sibling <- n;
push n ctx;
- match attr_list with
- [] -> ()
- | _ ->
- start_element_handler parser_ ctx attr_map_string [];
- List.iter (do_attribute parser_ ctx) attr_list;
- end_element_handler parser_ ctx attr_map_string
+ List.iter (do_attribute parser_ ctx) attr_list
and do_attribute parser_ ctx (att, value) =
- let att_tag = att_pref ^ att in
+ let att_tag = QName.to_string (QName.attribute (QName.of_string att)) in
start_element_handler parser_ ctx att_tag [];
- start_element_handler parser_ ctx text_string [];
- let n = top ctx in n.data <- value;
- end_element_handler parser_ ctx text_string;
+ let n = top ctx in
+ n.data <- value;
+ n.kind <- Common.NodeKind.Attribute;
end_element_handler parser_ ctx att_tag
and consume_closing ctx n =
start_element_handler parser_ ctx text_string [];
let node = top ctx in
node.data <- s;
+ node.kind <- Common.NodeKind.Text;
end_element_handler parser_ ctx text_string
+ and comment_handler parser_ ctx s =
+ do_text parser_ ctx;
+ start_element_handler parser_ ctx comment_string [];
+ let node = top ctx in
+ node.data <- s;
+ node.kind <- Common.NodeKind.Comment;
+ end_element_handler parser_ ctx comment_string
+
+ and processing_instruction_handler parser_ ctx tag data =
+ do_text parser_ ctx;
+ let pi = QName.to_string
+ (QName.processing_instruction (QName.of_string tag))
+ in
+ start_element_handler parser_ ctx pi [];
+ let node = top ctx in
+ node.data <- data;
+ node.kind <- Common.NodeKind.ProcessingInstruction;
+ end_element_handler parser_ ctx pi
let character_data_handler _parser ctx text =
let ctx = { text_buffer = Buffer.create 512;
current_preorder = 0;
stack = [] } in
- let parser_ = Expat.parser_create ~encoding:None in
- Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx);
- Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx);
- Expat.set_character_data_handler parser_ (character_data_handler parser_ ctx);
+ let psr = Expat.parser_create ~encoding:None in
+ Expat.set_start_element_handler psr (start_element_handler psr ctx);
+ Expat.set_end_element_handler psr (end_element_handler psr ctx);
+ Expat.set_character_data_handler
+ psr (character_data_handler psr ctx);
+ Expat.set_comment_handler psr (comment_handler psr ctx);
+ Expat.set_processing_instruction_handler psr
+ (processing_instruction_handler psr ctx);
push { tag = QName.document;
preorder = next ctx;
+ kind = Common.NodeKind.Document;
data = "";
first_child = dummy;
next_sibling = dummy;
parent = nil;
} ctx;
- (parser_,
+ (psr,
fun () ->
let node = top ctx in
node.next_sibling <- nil;
match ctx.stack with
[ root ] ->
root.next_sibling <- nil;
- { root = root }
+ { root = root;
+ size = ctx.current_preorder
+ }
| _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN)
)
let load_xml_file = Parser.parse_file
let load_xml_string = Parser.parse_string
-
let output_escape_string out s =
for i = 0 to String.length s - 1 do
match s.[i] with
| c -> output_char out c
done
-let rec print_attributes out tree_ node =
- if node != nil then begin
- let ntag = QName.to_string node.tag in
- output_char out ' ';
- output out ntag 1 (String.length ntag - 1);
+
+let rec print_attributes ?(sep=true) out tree_ node =
+ if (node.kind == Common.NodeKind.Attribute) then
+ let tag = QName.to_string (QName.remove_prefix node.tag) in
+ if sep then output_char out ' ';
+ output_string out tag;
output_string out "=\"";
- output_escape_string out node.first_child.data;
- output_char out '"';
+ output_escape_string out node.data;
+ output_char out '\"';
print_attributes out tree_ node.next_sibling
- end
+ else
+ node
let rec print_xml out tree_ node =
if node != nil then
let () =
- if node.tag == QName.text then
- output_escape_string out node.data
- else
- let tag = QName.to_string node.tag in
- output_char out '<';
- output_string out tag;
- let fchild =
- if node.first_child.tag == QName.attribute_map then
- let () = print_attributes out tree_ node.first_child.first_child in
- node.first_child.next_sibling
- else
- node.first_child
- in
- if fchild == nil then output_string out "/>"
- else begin
- output_char out '>';
- print_xml out tree_ fchild;
- output_string out "</";
- output_string out tag;
- output_char out '>'
- end
+ let open Common.NodeKind in
+ match node.kind with
+ | Node -> ()
+ | Text -> output_escape_string out node.data
+ | Element | Document ->
+ let tag = QName.to_string node.tag in
+ output_char out '<';
+ output_string out tag;
+ let fchild = print_attributes out tree_ node.first_child in
+ if fchild == nil then output_string out "/>"
+ else begin
+ output_char out '>';
+ print_xml out tree_ fchild;
+ output_string out "</";
+ output_string out tag;
+ output_char out '>'
+ end
+ | Attribute -> ignore (print_attributes ~sep:false out tree_ node)
+ | Comment ->
+ output_string out "<!--";
+ output_string out node.data;
+ output_string out "-->"
+ | ProcessingInstruction ->
+ output_string out "<?";
+ output_string out (QName.to_string (QName.remove_prefix node.tag));
+ output_char out ' ';
+ output_string out node.data;
+ output_string out "?>"
in
print_xml out tree_ node.next_sibling
-let print_xml out tree_ node = print_xml out tree_ { node with next_sibling = nil }
+let print_xml out tree_ node =
+ let nnode = { node with next_sibling = nil } in print_xml out tree_ nnode
+
let root t = t.root
+let size t = t.size
let first_child _ n = n.first_child
let next_sibling _ n = n.next_sibling
let parent _ n = n.parent
let tag _ n = n.tag
let data _ n = n.data
+let kind _ n = n.kind
let preorder _ n = n.preorder
let print_node fmt n = Parser.debug_node fmt n