Record which tag ids map to attribute nodes. Use that in subtree_element
authorKim Nguyễn <kn@lri.fr>
Mon, 15 Oct 2012 16:26:08 +0000 (18:26 +0200)
committerKim Nguyễn <kn@lri.fr>
Mon, 15 Oct 2012 16:26:08 +0000 (18:26 +0200)
instead of expecting the caller to pass the tag of attribute nodes.

xml-tree-inc.hpp
xml-tree.cpp
xml-tree.hpp

index de88de0..19745ed 100644 (file)
@@ -36,16 +36,20 @@ xml_tree::subtree_tags(xml_tree::node_t x, xml_tree::tag_t label) const
   }
 }
 
-inline uint32_t xml_tree::subtree_elements(xml_tree::node_t x,
-                                          xml_tree::tag_t *atts) const
+inline uint32_t xml_tree::subtree_elements(xml_tree::node_t x) const
 {
 
   int32_t size = bp_subtree_size(par, x) - 1;
   if (size <= 0) return 0;
   size -= subtree_tags(x, xml_tree::PCDATA_OPEN_TAG_ID);
+  size -= subtree_tags(x, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
+  size -= subtree_tags(x, xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
   if (size < 3) return (uint32_t) size;
-  for(; *atts != xml_tree::NIL_TAG_ID; atts++)
-    size -= subtree_tags(x, *atts);
+  std::unordered_set<xml_tree::tag_t>::iterator it;
+  for(it = this->attribute_ids->begin();
+      it != this->attribute_ids->end();
+      ++it)
+    size -= subtree_tags(x, *it);
   return (uint32_t) size;
 
 }
index e230d9b..b33383e 100644 (file)
@@ -98,10 +98,17 @@ xml_tree::xml_tree(std::vector<int32_t> *tags_,
 
   tag_names = new std::vector<std::string>();
   tag_names->resize(tag_ids->size());
-
+  this->attribute_ids = new std::unordered_set<xml_tree::tag_t>();
   std::unordered_map<std::string, tag_t>::iterator val;
-  for(val = this->tag_ids->begin(); val != this->tag_ids->end(); ++val)
+  for(val = this->tag_ids->begin(); val != this->tag_ids->end(); ++val){
     (*tag_names)[val->second] = val->first;
+    if (val->first.size() >= 3 &&
+        val->first[0] == '<' &&
+        val->first[1] == '@' &&
+        val->first[2] == '>'){
+      this->attribute_ids->insert(val->second);
+    };
+  }
 
   uint32_t max_tag = tag_names->size() - 1;
   bit_vector *tmp_bitmap = new bit_vector(npar, 1, 0);
@@ -165,6 +172,7 @@ xml_tree::~xml_tree()
   delete [] tag_seq;
   delete tag_names;
   delete tag_ids;
+  delete attribute_ids;
   if (text_collection) delete text_collection;
   if (text_positions) delete text_positions;
 }
@@ -282,6 +290,7 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf)
   tree->par = loadTree(fp); //TODO use new api
   tree->tag_names = new std::vector<std::string>();
   tree->tag_ids = new std::unordered_map<std::string, xml_tree::tag_t>();
+  tree->attribute_ids = new std::unordered_set<xml_tree::tag_t>();
   std::string s;
   int ntags;
 
@@ -296,6 +305,9 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf)
     tree->tag_names->push_back(s);
     tree->tag_ids->insert(std::make_pair(s,
                                          static_cast<xml_tree::tag_t>(i)));
+    if (s.size() >= 3 && s[0] == '<' && s[1] == '@' && s[2] == '>'){
+      tree->attribute_ids->insert(static_cast<xml_tree::tag_t>(i));
+    };
 
   };
 
index bfeff5f..436e2f9 100644 (file)
@@ -3,6 +3,7 @@
 
 
 #include <cstdint>
+#include <unordered_set>
 #include <unordered_map>
 #include <libbp/bp.h>
 #include <libbp/bp-darray.h>
@@ -50,7 +51,7 @@ public:
   inline uint32_t num_tags() const;
   inline uint32_t subtree_size(node_t) const;
   inline uint32_t subtree_tags(node_t, tag_t) const;
-  inline uint32_t subtree_elements(node_t, tag_t*) const;
+  inline uint32_t subtree_elements(node_t) const;
   uint32_t num_children(node_t) const;
   uint32_t child_pos(node_t) const;
 
@@ -138,6 +139,8 @@ private:
   //Mapping from tag_t identifiers to/from tagnames
   std::vector<std::string> *tag_names;
   std::unordered_map<std::string, tag_t> *tag_ids;
+  //Set of tag ids that map to attribute nodes
+  std::unordered_set<tag_t> *attribute_ids;
   //Text index
   SXSI::TextCollection *text_collection;
   static_bitsequence *text_positions;