From: Kim Nguyễn Date: Mon, 15 Oct 2012 16:26:08 +0000 (+0200) Subject: Record which tag ids map to attribute nodes. Use that in subtree_element X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2FXMLTree.git;a=commitdiff_plain;h=9775b1833487a525901cf968d91a9e7f193395c5 Record which tag ids map to attribute nodes. Use that in subtree_element instead of expecting the caller to pass the tag of attribute nodes. --- diff --git a/xml-tree-inc.hpp b/xml-tree-inc.hpp index de88de0..19745ed 100644 --- a/xml-tree-inc.hpp +++ b/xml-tree-inc.hpp @@ -36,16 +36,20 @@ xml_tree::subtree_tags(xml_tree::node_t x, xml_tree::tag_t label) const } } -inline uint32_t xml_tree::subtree_elements(xml_tree::node_t x, - xml_tree::tag_t *atts) const +inline uint32_t xml_tree::subtree_elements(xml_tree::node_t x) const { int32_t size = bp_subtree_size(par, x) - 1; if (size <= 0) return 0; size -= subtree_tags(x, xml_tree::PCDATA_OPEN_TAG_ID); + size -= subtree_tags(x, xml_tree::ATTRIBUTE_OPEN_TAG_ID); + size -= subtree_tags(x, xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID); if (size < 3) return (uint32_t) size; - for(; *atts != xml_tree::NIL_TAG_ID; atts++) - size -= subtree_tags(x, *atts); + std::unordered_set::iterator it; + for(it = this->attribute_ids->begin(); + it != this->attribute_ids->end(); + ++it) + size -= subtree_tags(x, *it); return (uint32_t) size; } diff --git a/xml-tree.cpp b/xml-tree.cpp index e230d9b..b33383e 100644 --- a/xml-tree.cpp +++ b/xml-tree.cpp @@ -98,10 +98,17 @@ xml_tree::xml_tree(std::vector *tags_, tag_names = new std::vector(); tag_names->resize(tag_ids->size()); - + this->attribute_ids = new std::unordered_set(); std::unordered_map::iterator val; - for(val = this->tag_ids->begin(); val != this->tag_ids->end(); ++val) + for(val = this->tag_ids->begin(); val != this->tag_ids->end(); ++val){ (*tag_names)[val->second] = val->first; + if (val->first.size() >= 3 && + val->first[0] == '<' && + val->first[1] == '@' && + val->first[2] == '>'){ + this->attribute_ids->insert(val->second); + }; + } uint32_t max_tag = tag_names->size() - 1; bit_vector *tmp_bitmap = new bit_vector(npar, 1, 0); @@ -165,6 +172,7 @@ xml_tree::~xml_tree() delete [] tag_seq; delete tag_names; delete tag_ids; + delete attribute_ids; if (text_collection) delete text_collection; if (text_positions) delete text_positions; } @@ -282,6 +290,7 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf) tree->par = loadTree(fp); //TODO use new api tree->tag_names = new std::vector(); tree->tag_ids = new std::unordered_map(); + tree->attribute_ids = new std::unordered_set(); std::string s; int ntags; @@ -296,6 +305,9 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf) tree->tag_names->push_back(s); tree->tag_ids->insert(std::make_pair(s, static_cast(i))); + if (s.size() >= 3 && s[0] == '<' && s[1] == '@' && s[2] == '>'){ + tree->attribute_ids->insert(static_cast(i)); + }; }; diff --git a/xml-tree.hpp b/xml-tree.hpp index bfeff5f..436e2f9 100644 --- a/xml-tree.hpp +++ b/xml-tree.hpp @@ -3,6 +3,7 @@ #include +#include #include #include #include @@ -50,7 +51,7 @@ public: inline uint32_t num_tags() const; inline uint32_t subtree_size(node_t) const; inline uint32_t subtree_tags(node_t, tag_t) const; - inline uint32_t subtree_elements(node_t, tag_t*) const; + inline uint32_t subtree_elements(node_t) const; uint32_t num_children(node_t) const; uint32_t child_pos(node_t) const; @@ -138,6 +139,8 @@ private: //Mapping from tag_t identifiers to/from tagnames std::vector *tag_names; std::unordered_map *tag_ids; + //Set of tag ids that map to attribute nodes + std::unordered_set *attribute_ids; //Text index SXSI::TextCollection *text_collection; static_bitsequence *text_positions;