X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xml-tree.cpp;h=82f00b0af86742bfd66720272e51c0fd9e650c26;hb=HEAD;hp=e230d9baab037ab8beb6aa410e7f8ed6fae6843b;hpb=53806b86f193e42bb2c26a86049db559e6063877;p=SXSI%2FXMLTree.git diff --git a/xml-tree.cpp b/xml-tree.cpp index e230d9b..82f00b0 100644 --- a/xml-tree.cpp +++ b/xml-tree.cpp @@ -98,10 +98,17 @@ xml_tree::xml_tree(std::vector *tags_, tag_names = new std::vector(); tag_names->resize(tag_ids->size()); - + this->attribute_ids = new std::unordered_set(); std::unordered_map::iterator val; - for(val = this->tag_ids->begin(); val != this->tag_ids->end(); ++val) + for(val = this->tag_ids->begin(); val != this->tag_ids->end(); ++val){ (*tag_names)[val->second] = val->first; + if (val->first.size() >= 3 && + val->first[0] == '<' && + val->first[1] == '@' && + val->first[2] == '>'){ + this->attribute_ids->insert(val->second); + }; + } uint32_t max_tag = tag_names->size() - 1; bit_vector *tmp_bitmap = new bit_vector(npar, 1, 0); @@ -165,6 +172,7 @@ xml_tree::~xml_tree() delete [] tag_seq; delete tag_names; delete tag_ids; + delete attribute_ids; if (text_collection) delete text_collection; if (text_positions) delete text_positions; } @@ -282,6 +290,7 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf) tree->par = loadTree(fp); //TODO use new api tree->tag_names = new std::vector(); tree->tag_ids = new std::unordered_map(); + tree->attribute_ids = new std::unordered_set(); std::string s; int ntags; @@ -296,6 +305,9 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf) tree->tag_names->push_back(s); tree->tag_ids->insert(std::make_pair(s, static_cast(i))); + if (s.size() >= 3 && s[0] == '<' && s[1] == '@' && s[2] == '>'){ + tree->attribute_ids->insert(static_cast(i)); + }; }; @@ -305,7 +317,7 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf) //tree->tags = static_sequence_bs::load(fp); ufread(&tree->bits_per_tag, sizeof(uint), 1, fp); - fprintf(stderr, "\nBits per tag: %u\n", tree->bits_per_tag); + //fprintf(stderr, "\nBits per tag: %u\n", tree->bits_per_tag); ufread(&tree->tag_seq_len, sizeof(uint), 1, fp); size_t size = uint_len(tree->bits_per_tag, tree->tag_seq_len); tree->tag_seq = new uint[size]; @@ -375,7 +387,7 @@ std::pair xml_tree::text_id_range(xml_tree::node_t x) const else i = text_positions->rank1(x-1); j = text_positions->rank1(y); -// fprintf(stderr, "Rank of node %i is %i, rank of closing %i is %i\n", x, i, y, j); + if (i == j) return std::make_pair(xml_tree::NIL, xml_tree::NIL); else @@ -387,9 +399,9 @@ int32_t xml_tree::text_id(xml_tree::node_t x) const return (int32_t) text_positions->rank1(x) - 1; } -unsigned char* xml_tree::get_text(int32_t id) const +const char* xml_tree::get_text(int32_t id) const { - unsigned char * s = text_collection->GetText(id); + const char * s = reinterpret_cast(text_collection->GetText(id)); return s + (s[0] == 1); } @@ -570,11 +582,11 @@ void xml_tree::print(xml_tree::node_t x, int fd, bool no_text) label = tag(n); } else uputc('>', fd); - } else { + } else { uputs("/>", fd); n++; label = tag(n); - }; + }; }; } else do { uputs("