X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xml-tree.hpp;h=4ada586c05b4b243bd3c82a9567937ada66fae01;hb=daf6549ca274aaa8d9c0b3964c5502f0f0f08d7d;hp=4f40dd16c82102b6bc4e471d6a02fb660af1bd88;hpb=2b67e586d2ea45d87648ac3977306139110c4e78;p=SXSI%2FXMLTree.git diff --git a/xml-tree.hpp b/xml-tree.hpp index 4f40dd1..4ada586 100644 --- a/xml-tree.hpp +++ b/xml-tree.hpp @@ -3,9 +3,10 @@ #include +#include #include -#include -#include +#include +#include #include #include #include @@ -46,31 +47,143 @@ public: ~xml_tree(); //Counting functions + /** + * [size()] returns the size of the tree (number of nodes) + * Runs in O(1) + */ inline uint32_t size() const; + + /** + * [num_tags()] returns the number of distinct tags. + * Runs in O(1) + */ inline uint32_t num_tags() const; + + /** + * [subtree_size(n)] returns the size of the subtree (number of nodes) + * rooted at n. + * Runs in O(1) + */ inline uint32_t subtree_size(node_t) const; + + /** + * [subtree_tags(n, t)] returns the number of occurences of tag [t] in the + * subtree rooted at [n] + * Runs in O(1) + */ inline uint32_t subtree_tags(node_t, tag_t) const; - inline uint32_t subtree_elements(node_t, tag_t*) const; + + /** + * [subtree_elements(n)] returns the number of element nodes below [n] + * Runs in O(attribute_ids->size()+3) + */ + inline uint32_t subtree_elements(node_t) const; + + /** + * [num_children(n)] returns the number of child nodes of [n] + * (both text and elements, and including a fake <@> node if + * present). + * Runs in O(1) (?) + */ uint32_t num_children(node_t) const; + + /** + * [child_pos(n)] returns the position of [n] amongst its siblings + * Runs in O(1) (?) + */ uint32_t child_pos(node_t) const; //Node_T tests + /** + * [is_leaf(n)] returns true if [n] is a leaf (i.e. if [num_children(n)] + * returns 0 + * Runs in O(1) + */ inline bool is_leaf(node_t) const; + + /** + * [is_ancestor(n, m)] returns true if [n] is an ancestor of [m], false + * otherwise + * Runs in O(1) + */ inline bool is_ancestor(node_t, node_t) const; + + /** + * [is_right_descendant(n, m)] returns true if [m] is a descendant-or-self of + * a following-sibling of [n], false otherwise + * Runs in O(1) + */ inline bool is_right_descendant(node_t, node_t) const; + + /** + * [is_child(n, m)] returns true if [m] is a child of [n] + * Runs in O(1) + */ bool is_child(node_t, node_t) const; + + /** + * [is_first_child(n, m)] returns true if [m] is the first child of [n] + * Runs in O(1) + */ inline bool is_first_child(node_t) const; + + /** + * [is_nil(n)] returns true if [n] is equal to xml_tree::NIL + * Runs in O(1) + */ inline bool is_nil(node_t) const; + + /** + * [is_open(n)] returns true if [n], seen as an index in the + * underlying BP representation corresponds to an opening parenthesis. + * Runs in O(1) + */ inline bool is_open(node_t) const; + //Numbering functions + /** + * [depth(n)] returns the depths of node [n]. The root has depth 1. + * Runs in O(1) + */ uint32_t depth(node_t) const; + + /** + * [preorder(n)] returns the preorder of node [n]. Equivalent to calling + * rank on the underlying BP representation. + * Runs in O(1) + */ uint32_t preorder(node_t) const; + + /** + * [preorder(n)] returns the postorder of node [n]. + * Runs in O(1) + */ uint32_t postorder(node_t) const; //Tag functions + /** + * [tag(n)] returns the tag of node [n] which must be a valid node identifier + * (in particular not NIL) + * Runs in O(1) + */ inline tag_t tag(node_t) const; + + /** + * [get_tag_name_by_ref(t)] returns the string representation of tag [t] + * For elements, the string representation is the tag name itself + * Returns if [t] is not a proper tag identifier. + * Runs in O(1) + */ const char* get_tag_name_by_ref(tag_t) const; + + /** + * [register_tag(s)] returns the tag identifier for the tag represented + * by the string [s]. If no such tag exists in the document, return a + * fresh tag identifier [i]. Subsequent calls with the same [s] will return + * the same identifier. + * Runs in O(1) + */ tag_t register_tag(char *s); //Navigation functions @@ -108,6 +221,9 @@ public: SXSI::TextCollection::document_result contains(uchar const *s) const; SXSI::TextCollection::document_result less_than(uchar const *s) const; + + bool naive_contains(node_t, uchar const *s) const; + //I/O functions void save(int, char*); static xml_tree* load(int, char*, bool, int); @@ -135,6 +251,8 @@ private: //Mapping from tag_t identifiers to/from tagnames std::vector *tag_names; std::unordered_map *tag_ids; + //Set of tag ids that map to attribute nodes + std::unordered_set *attribute_ids; //Text index SXSI::TextCollection *text_collection; static_bitsequence *text_positions;