(* ../LICENSE. *)
(* *)
(***********************************************************************)
+open Misc
-(*
- Time-stamp: <Last modified on 2013-02-05 13:56:52 CET by Kim Nguyễn>
-*)
+(** The different kind of XML nodes and utility functions *)
-type node = {
- tag : QName.t;
- preorder : int;
- mutable data : string;
- mutable first_child : node;
- mutable next_sibling : node;
- mutable parent: node;
-}
+module NodeKind =
+ struct
+ type t =
+ Document | Element | Text | Comment | Attribute
+ | ProcessingInstruction | Node
+ let to_string =
+ function
+ Document -> "document"
+ | Element -> "element"
+ | Attribute -> "attribute"
+ | Text -> "text"
+ | Comment -> "comment"
+ | ProcessingInstruction -> "processing-instruction"
+ | Node -> "node"
+ let print ppf k = Format.fprintf ppf "%s" (to_string k)
-let rec nil = {
- tag = QName.nil;
- preorder = -1;
- data = "";
- first_child = nil;
- next_sibling = nil;
- parent = nil;
-}
-let dummy_tag = QName.of_string "#dummy"
-let rec dummy = {
- tag = dummy_tag;
- preorder = -1;
- data = "";
- first_child = dummy;
- next_sibling = dummy;
- parent = dummy;
-}
+ let is_a k1 k2 =
+ k1 == Node || k2 == Node || k1 == k2
+end
+
+module NodeSummary =
+struct
+ (* Pack into an integer the result of the is_* and has_ predicates
+ for a given node *)
+ type t = int
+ let dummy = -1
+ (*
+ ...44443210
+ ...4444 -> kind
+ 3 -> has_right
+ 2 -> has_left
+ 1 -> is_right
+ 0 -> is_left
+ *)
+ let is_left (s : t) : bool =
+ s land 1 != 0
+
+ let is_right (s : t) : bool =
+ s land 0b10 != 0
+
+ let has_left (s : t) : bool =
+ s land 0b100 != 0
+
+ let has_right (s : t) : bool =
+ s land 0b1000 != 0
+
+ let kind (s : t) : NodeKind.t =
+ Obj.magic (s lsr 4)
+
+ let make is_left is_right has_left has_right kind =
+ (int_of_bool is_left) lor
+ ((int_of_bool is_right) lsl 1) lor
+ ((int_of_bool has_left) lsl 2) lor
+ ((int_of_bool has_right) lsl 3) lor
+ ((Obj.magic kind) lsl 4)
+end
-type t = {
- root : node;
- (* TODO add other intersting stuff *)
-}
+(** Signatures for trees *)
+exception Parse_error of string
-module Parser =
-struct
+module type S =
+sig
+ type node
+ (** The type of a tree node *)
- type context = {
- mutable stack : node list;
- text_buffer : Buffer.t;
- mutable current_preorder : int;
- }
-
- let print_node_ptr fmt n =
- Format.fprintf fmt "<%s>"
- (if n == nil then "NIL" else
- if n == dummy then "DUMMY" else
- "NODE " ^ string_of_int n.preorder)
-
- let debug_node fmt node =
- Format.fprintf fmt "{ tag=%s; preorder=%i; data=%s; first_child=%a; next_sibling=%a; parent=%a }"
- (QName.to_string node.tag)
- node.preorder
- node.data
- print_node_ptr node.first_child
- print_node_ptr node.next_sibling
- print_node_ptr node.parent
-
-
- let debug_ctx fmt ctx =
- Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n"
- ctx.current_preorder
- (Pretty.print_list ~sep:";\n" debug_node) ctx.stack
-
-
- let push n ctx = ctx.stack <- n :: ctx.stack
- let pop ctx =
- match ctx.stack with
- [] -> failwith "XML parse error"
- | e :: l -> ctx.stack <- l; e
-
- let top ctx = match ctx.stack with
- | [] -> failwith "XML parse error"
- | e :: _ -> e
-
- let next ctx =
- let i = ctx.current_preorder in
- ctx.current_preorder <- 1 + i;
- i
-
- let is_left n = n.next_sibling == dummy
-
-
- let text_string = QName.to_string QName.text
- let attr_map_string = QName.to_string QName.attribute_map
-
- let rec start_element_handler parser_ ctx tag attr_list =
- do_text parser_ ctx;
- let parent = top ctx in
- let n = { tag = QName.of_string tag;
- preorder = next ctx;
- data = "";
- first_child = dummy;
- next_sibling = dummy;
- parent = parent;
- }
- in
- if parent.first_child == dummy then parent.first_child <- n
- else parent.next_sibling <- n;
- push n ctx;
- match attr_list with
- [] -> ()
- | _ ->
- start_element_handler parser_ ctx attr_map_string [];
- List.iter (do_attribute parser_ ctx) attr_list;
- end_element_handler parser_ ctx attr_map_string
-
- and do_attribute parser_ ctx (att, value) =
- let att_tag = " " ^ att in
- start_element_handler parser_ ctx att_tag [];
- start_element_handler parser_ ctx text_string [];
- let n = top ctx in n.data <- value;
- end_element_handler parser_ ctx text_string;
- end_element_handler parser_ ctx att_tag
-
- and consume_closing ctx n =
- if n.next_sibling != dummy then
- let _ = pop ctx in consume_closing ctx (top ctx)
-
- and end_element_handler parser_ ctx tag =
- do_text parser_ ctx;
- let node = top ctx in
- if node.first_child == dummy then node.first_child <- nil
- else begin
- node.next_sibling <- nil;
- consume_closing ctx node
- end
-
- and do_text parser_ ctx =
- if Buffer.length ctx.text_buffer != 0 then
- let s = Buffer.contents ctx.text_buffer in
- Buffer.clear ctx.text_buffer;
- start_element_handler parser_ ctx text_string [];
- let node = top ctx in
- node.data <- s;
- end_element_handler parser_ ctx text_string
-
-
-
- let character_data_handler parser_ ctx text =
- Buffer.add_string ctx.text_buffer text
-
- let create_parser () =
- let ctx = { text_buffer = Buffer.create 512;
- current_preorder = 0;
- stack = [] } in
- let parser_ = Expat.parser_create ~encoding:None in
- Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx);
- Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx);
- Expat.set_character_data_handler parser_ (character_data_handler parser_ ctx);
- push { tag = QName.document;
- preorder = next ctx;
- data = "";
- first_child = dummy;
- next_sibling = dummy;
- parent = nil;
- } ctx;
- (parser_,
- fun () ->
- let node = top ctx in
- node.next_sibling <- nil;
- consume_closing ctx node;
- match ctx.stack with
- [ root ] ->
- root.next_sibling <- nil;
- { root = root }
- | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN)
- )
-
-
- let parse_string s =
- let parser_, finalize = create_parser () in
- Expat.parse parser_ s;
- finalize ()
-
- let parse_file fd =
- let buffer = String.create 4096 in
- let parser_, finalize = create_parser () in
- let rec loop () =
- let read = input fd buffer 0 4096 in
- if read != 0 then
- let () = Expat.parse_sub parser_ buffer 0 read in
- loop ()
- in loop (); finalize ()
+ type t
+ (** The type of trees *)
-end
+ val size : t -> int
+ (** Return the number of nodes *)
+
+ val nil : node
+ (** Nil node, denoting the first/second child of a leaf or the parent of
+ the root *)
+
+ val dummy : node
+ (** Dummy node that is guaranteed to never occur in any tree *)
+
+ val load_xml_file : in_channel -> t
+ (** Takes a file descriptor and returns the XML data stored in the
+ corresponding file. Start at the current position in the file
+ descriptor (which is not necessarily the begining of file)
+ *)
+
+ val load_xml_string : string -> t
+ (** Loads XML data stored in a string *)
+
+ val print_xml : out_channel -> t -> node -> unit
+ (** Outputs the tree as an XML document on the given output_channel *)
+ val root : t -> node
+ (** Returns the root of the tree *)
-let load_xml_file = Parser.parse_file
-let load_xml_string = Parser.parse_string
-
-
-let output_escape_string out s =
- for i = 0 to String.length s - 1 do
- match s.[i] with
- | '<' -> output_string out "<"
- | '>' -> output_string out ">"
- | '&' -> output_string out "&"
- | '"' -> output_string out """
- | '\'' -> output_string out "'"
- | c -> output_char out c
- done
-
-let rec print_attributes out tree_ node =
- if node != nil then begin
- output_string out (QName.to_string node.tag);
- output_string out "=\"";
- output_escape_string out node.first_child.data;
- output_char out '"';
- print_attributes out tree_ node.next_sibling
- end
-
-let rec print_xml out tree_ node =
- if node != nil then
- let () =
- if node.tag == QName.text then
- output_escape_string out node.data
- else
- let tag = QName.to_string node.tag in
- output_char out '<';
- output_string out tag;
- let fchild =
- if node.first_child.tag == QName.attribute_map then
- let () =
- print_attributes out tree_ node.first_child.first_child
- in
- node.first_child.next_sibling
- else
- node.first_child
- in
- if fchild == nil then output_string out "/>"
- else begin
- output_char out '>';
- print_xml out tree_ fchild;
- output_string out "</";
- output_string out tag;
- output_char out '>'
- end
- in
- print_xml out tree_ node.next_sibling
-
-
-let root t = t.root
-let first_child _ n = n.first_child
-let next_sibling _ n = n.next_sibling
-let parent _ n = n.parent
-let tag _ n = n.tag
-let data _ n = n.data
-let preorder _ n = n.preorder
+ val first_child : t -> node -> node
+ (** [first_child t n] returns the first child of node [n] in tree [t].
+ Returns [nil] if [n] is a leaf. Returns [nil] if [n == nil].
+ *)
+
+ val next_sibling : t -> node -> node
+ (** [next_sibling t n] returns the next_sibling of node [n] in tree [t].
+ Returns [nil] if [n] is the last child of a node.
+ Returns [nil] if [n == nil].
+ *)
+
+ val parent : t -> node -> node
+ (** [next_sibling t n] returns the parent of node [n] in tree [t].
+ Returns [nil] if [n] is the root of the tree.
+ Returns [nil] if [n == nil].
+ *)
+
+ val tag : t -> node -> QName.t
+ (** Returns the label of a given node *)
+
+ val data : t -> node -> string
+ (** Returns the character data associated with a node.
+ The only node having character data are those whose label is
+ QName.text, QName.cdata_section or QName.comment
+ *)
+
+ val kind : t -> node -> NodeKind.t
+ (** Returns the kind of the given node *)
+
+ val summary : t -> node -> NodeSummary.t
+ (** Returns the summary of the given node *)
+
+ val preorder : t -> node -> int
+ (** [preorder t n] returns the pre-order position of [n] in [t].
+ [preodrder t (root t) == 0] and [preorder t nil < 0].
+ *)
+
+ val by_preorder : t -> int -> node
+ (** [by_preorder t i] returns the node with preorder [i]
+ *)
+ val print_node : Format.formatter -> node -> unit
+
+end