X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xml-tree-inc.hpp;h=18c91e3dad2564b2cd5b06cfd8158d107ff1ad37;hb=HEAD;hp=6ce172432832a9f420b89b66cf9ce60fce99477d;hpb=c6266d8fd1872fad45b18d3d554410d080b65099;p=SXSI%2FXMLTree.git diff --git a/xml-tree-inc.hpp b/xml-tree-inc.hpp index 6ce1724..18c91e3 100644 --- a/xml-tree-inc.hpp +++ b/xml-tree-inc.hpp @@ -5,6 +5,8 @@ #ifndef XML_TREE_INC_HPP_ #define XML_TREE_INC_HPP_ +#include + inline uint32_t xml_tree::size() const { return tag_seq_len / 2; @@ -17,7 +19,7 @@ inline uint32_t xml_tree::num_tags() const inline uint32_t xml_tree::subtree_size(xml_tree::node_t x) const { - return bp_subtree_size(this->par, x); + return x == root() ? this->par->n/2 : bp_subtree_size(this->par, x); } inline uint32_t @@ -26,12 +28,27 @@ xml_tree::subtree_tags(xml_tree::node_t x, xml_tree::tag_t label) const xml_tree::node_t y = bp_find_close(this->par, x); if (y - x < 10) { uint32_t count = 0; - for(xml_tree::node_t i = x; i < y; i++) + for (xml_tree::node_t i = x; i <= y; ++i) count += (tag(i) == label); return count; } else { - return tags->rank(label, y) - tags->rank(label, x); - }; + return tags[label]->rank(y) - tags[label]->rank(x); + } +} + +inline uint32_t xml_tree::subtree_elements(xml_tree::node_t x) const +{ + + int32_t size = bp_subtree_size(par, x) - 1; + if (size <= 0) return 0; + uint32_t num_texts = subtree_tags(x, xml_tree::PCDATA_OPEN_TAG_ID); + uint32_t num_atts = subtree_tags(x, xml_tree::ATTRIBUTE_OPEN_TAG_ID); + uint32_t num_att_data = subtree_tags(x, xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID); + size -= num_texts; + size -= num_atts; + size -= 2*num_att_data; + return (uint32_t) size; + } inline bool xml_tree::is_leaf(xml_tree::node_t x) const @@ -39,6 +56,11 @@ inline bool xml_tree::is_leaf(xml_tree::node_t x) const return !bp_inspect(this->par, x+1); } +inline bool xml_tree::is_open(xml_tree::node_t x) const +{ + return bp_inspect(this->par, x); +} + inline bool xml_tree::is_ancestor(xml_tree::node_t x, xml_tree::node_t y) const { @@ -86,7 +108,8 @@ inline xml_tree::node_t xml_tree::parent(xml_tree::node_t x) const inline xml_tree::node_t xml_tree::first_child(node_t x) const { - return bp_first_child(this->par, x); + xml_tree::node_t result = bp_first_child(this->par, x); + return result; } inline xml_tree::node_t xml_tree::last_child(xml_tree::node_t x) const @@ -99,7 +122,8 @@ inline xml_tree::node_t xml_tree::last_child(xml_tree::node_t x) const inline xml_tree::node_t xml_tree::next_sibling(xml_tree::node_t x) const { - return bp_next_sibling(this->par, x); + xml_tree::node_t result = bp_next_sibling(this->par, x); + return result; } inline xml_tree::node_t xml_tree::prev_sibling(xml_tree::node_t x) const @@ -119,6 +143,8 @@ inline xml_tree::node_t xml_tree::first_element(xml_tree::node_t x) const case PCDATA_OPEN_TAG_ID: n = n + 2; return bp_inspect(this->par, n) ? n : xml_tree::NIL; + default: + return n; }; } @@ -133,9 +159,22 @@ inline xml_tree::node_t xml_tree::next_element(xml_tree::node_t x) const return x; } -inline xml_tree::node_t xml_tree::tagged_next(node_t x, tag_t tag) const + +inline xml_tree::node_t xml_tree::tagged_next_close(xml_tree::node_t x, + xml_tree::tag_t label) const +{ + xml_tree::node_t i=x; + for(i = x+1; i < std::min(x+100, this->par->n); i++) + if (tag(i) == label) + return i; + + return tags[label]->select_next(i); +} + +inline xml_tree::node_t xml_tree::tagged_next(xml_tree::node_t x, + xml_tree::tag_t label) const { - return this->tags->select_next(tag, x); + return tags[label]->select_next(x); } inline xml_tree::node_t @@ -149,7 +188,7 @@ xml_tree::tagged_descendant(xml_tree::node_t x, inline xml_tree::node_t xml_tree::tagged_following_before(xml_tree::node_t x, xml_tree::tag_t tag, - xml_tree::node_t limit) const + xml_tree::node_t limit) const { xml_tree::node_t close = bp_find_close(this->par, x); xml_tree::node_t s = tagged_next(close, tag); @@ -161,20 +200,58 @@ inline xml_tree::node_t xml_tree::tagged_child(xml_tree::node_t x, xml_tree::tag_t t) const { xml_tree::node_t c = first_child(x); + xml_tree::node_t result; if (is_nil(c) || tag(c) == t) return c; else - tagged_sibling(c, t); + return tagged_sibling(c, t); } inline xml_tree::node_t xml_tree::tagged_sibling(xml_tree::node_t x, xml_tree::tag_t t) const { xml_tree::node_t sibling = next_sibling(x); - while(!is_nil(sibling) && tag(sibling) != t) sibling = next_sibling(sibling); + xml_tree::tag_t stag; + while (sibling != xml_tree::NIL) { + stag = tag(sibling); + if (stag == t) + return sibling; + sibling = next_sibling(sibling); + }; return sibling; } +inline xml_tree::node_t +xml_tree::select_descendant(xml_tree::node_t x, xml_tree::tag_t *tags) const +{ + if (is_leaf(x)) return xml_tree::NIL; + auto min = xml_tree::NIL; + auto aux = xml_tree::NIL; + for(; *tags != xml_tree::NIL_TAG_ID; ++tags){ + aux = tagged_next(x, *tags); + if ((unsigned int) aux < (unsigned int) min) min = aux; + }; + return (min == xml_tree::NIL || is_ancestor(x, min)) ? min : xml_tree::NIL; +} + + +inline xml_tree::node_t +xml_tree::select_following_before(xml_tree::node_t x, xml_tree::tag_t *tags, + xml_tree::node_t limit) const +{ + auto min = xml_tree::NIL; + auto aux = xml_tree::NIL; + auto close = bp_find_close(this->par, x); + + for(; *tags != xml_tree::NIL_TAG_ID; ++tags){ + aux = tagged_next(close, *tags); + if ((unsigned int) aux < (unsigned int) min) min = aux; + } + + return (min == xml_tree::NIL || min < limit) ? min : xml_tree::NIL; +} + + xml_tree::node_t xml_tree::closing(xml_tree::node_t x) const { return bp_find_close(this->par, x); @@ -188,7 +265,8 @@ inline SXSI::TextCollection *xml_tree::get_text_collection() const inline xml_tree::node_t xml_tree::parent_node(int32_t d) const { - return (xml_tree::node_t) text_positions->select1(d + 1); + xml_tree::node_t res = text_positions->select1(d+1); + return (xml_tree::node_t) res; } inline SXSI::TextCollection::document_result