X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xml-tree.cpp;h=21c77c4ea27ec8a531073dc3358360c7fc63d0a5;hb=refs%2Fheads%2Fsmaneth-devel;hp=0e597e33f5471bb426f305c214bf320ba0e3c77b;hpb=32f5de396df8a4072a0756bdbd9ca37ddcc42004;p=SXSI%2FXMLTree.git diff --git a/xml-tree.cpp b/xml-tree.cpp index 0e597e3..21c77c4 100644 --- a/xml-tree.cpp +++ b/xml-tree.cpp @@ -95,14 +95,20 @@ xml_tree::xml_tree(std::vector *tags_, delete parbitmap; this->tag_ids = tag_ids; + tag_names = new std::vector(); tag_names->resize(tag_ids->size()); + this->attribute_ids = new std::unordered_set(); std::unordered_map::iterator val; - //for(auto val : *(this->tag_ids)) - //(*this->tag_names)[val.second] = val.first; - for(val = this->tag_ids->begin(); val != this->tag_ids->end(); ++val) - (*this->tag_names)[val->second] = val->first; - + for(val = this->tag_ids->begin(); val != this->tag_ids->end(); ++val){ + (*tag_names)[val->second] = val->first; + if (val->first.size() >= 3 && + val->first[0] == '<' && + val->first[1] == '@' && + val->first[2] == '>'){ + this->attribute_ids->insert(val->second); + }; + } uint32_t max_tag = tag_names->size() - 1; bit_vector *tmp_bitmap = new bit_vector(npar, 1, 0); @@ -166,6 +172,7 @@ xml_tree::~xml_tree() delete [] tag_seq; delete tag_names; delete tag_ids; + delete attribute_ids; if (text_collection) delete text_collection; if (text_positions) delete text_positions; } @@ -283,6 +290,7 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf) tree->par = loadTree(fp); //TODO use new api tree->tag_names = new std::vector(); tree->tag_ids = new std::unordered_map(); + tree->attribute_ids = new std::unordered_set(); std::string s; int ntags; @@ -297,6 +305,9 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf) tree->tag_names->push_back(s); tree->tag_ids->insert(std::make_pair(s, static_cast(i))); + if (s.size() >= 3 && s[0] == '<' && s[1] == '@' && s[2] == '>'){ + tree->attribute_ids->insert(static_cast(i)); + }; }; @@ -388,9 +399,9 @@ int32_t xml_tree::text_id(xml_tree::node_t x) const return (int32_t) text_positions->rank1(x) - 1; } -unsigned char* xml_tree::get_text(int32_t id) const +const char* xml_tree::get_text(int32_t id) const { - unsigned char * s = text_collection->GetText(id); + const char * s = reinterpret_cast(text_collection->GetText(id)); return s + (s[0] == 1); } @@ -439,12 +450,10 @@ void xml_tree::uputc(const char c, int fd) const char * xml_tree::get_tag_name_by_ref(xml_tree::tag_t tagid) const { - - unsigned char *s; if (tagid < 0 || tagid >= tag_names->size()) return ""; - return (const char *) (*tag_names)[tagid].c_str(); + return (*tag_names)[tagid].c_str(); } xml_tree::tag_t xml_tree::register_tag(char *s) @@ -559,6 +568,7 @@ void xml_tree::print(xml_tree::node_t x, int fd, bool no_text) uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd); n++; uputs("=\"", fd); + current_text += (current_text[0] == 1); read = uprintf((const char*) current_text, fd); current_text += read + 1; uputc('"', fd); @@ -596,97 +606,6 @@ void xml_tree::print(xml_tree::node_t x, int fd, bool no_text) } -// void xml_tree::print(xml_tree::node_t x, int fd, bool no_text) -// { -// if (print_buffer == 0) { -// print_buffer = new std::string(BUFFER_SIZE, 0); -// print_buffer->clear(); -// print_stack = new std::vector(); -// print_stack->reserve(256); -// }; - -// xml_tree::node_t fin = bp_find_close(par, x); -// xml_tree::node_t n = x; -// xml_tree::tag_t label = tag(n); -// unsigned char * current_text; - - -// while (n <= fin) { - -// if (bp_inspect(par, n)) { -// if (label == xml_tree::PCDATA_OPEN_TAG_ID){ -// if (no_text) { -// uputs("<$/>", fd); -// } else { -// current_text = get_text(text_id(n)); -// uprintf( (const char*) (current_text + (current_text[0] == 1)), fd); - -// if (current_text && text_index_type != TextCollectionBuilder::index_type_default) -// text_collection->DeleteText(current_text); - -// n += 2; // skip closin $ -// label = tag(n); -// }; -// } else { -// uputc('<', fd); -// uput_str((*tag_names)[label], fd); -// n++; -// if (bp_inspect(par, n)) { -// print_stack->push_back((*tag_names)[label]); -// label = tag(n); -// if (label == xml_tree::ATTRIBUTE_OPEN_TAG_ID) { -// n++; -// if (no_text) uputs("><@@>", fd); - -// while (bp_inspect(par, n)) -// if (no_text) { -// uputc('<', fd); -// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd); -// uputc('>', fd); -// uputs("<$@/>', fd); -// n += 4; -// } else { -// uputc(' ', fd); -// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd); -// n+= 2; -// uputs("=\"", fd); -// current_text = get_text(text_id(n)); -// uprintf((const char*) (current_text + (current_text[0] == 1)), fd); -// if (current_text && text_index_type != TextCollectionBuilder::index_type_default) -// text_collection->DeleteText(current_text); -// uputc('"', fd); -// n += 2; -// }; - -// if (no_text) -// uputs("", fd); -// else uputc('>', fd); -// n++; -// label = tag(n); -// } else -// uputc('>', fd); -// } else { -// uputs("/>", fd); -// n++; -// label = tag(n); -// }; -// }; -// } else do { -// uputs("back(), fd); -// uputc('>', fd); -// print_stack->pop_back(); -// n++; -// } while (!bp_inspect(par, n) && !print_stack->empty()); -// label = tag(n); -// }; -// uputc('\n', fd); - -// } - - static inline uchar * next_char(uchar *s, size_t &numtexts) {