delete parbitmap;
this->tag_ids = tag_ids;
+
tag_names = new std::vector<std::string>();
tag_names->resize(tag_ids->size());
- for(auto val : *(this->tag_ids))
- (*this->tag_names)[val.second] = val.first;
+ this->attribute_ids = new std::unordered_set<xml_tree::tag_t>();
+ std::unordered_map<std::string, tag_t>::iterator val;
+ for(val = this->tag_ids->begin(); val != this->tag_ids->end(); ++val){
+ (*tag_names)[val->second] = val->first;
+ if (val->first.size() >= 3 &&
+ val->first[0] == '<' &&
+ val->first[1] == '@' &&
+ val->first[2] == '>'){
+ this->attribute_ids->insert(val->second);
+ };
+ }
uint32_t max_tag = tag_names->size() - 1;
bit_vector *tmp_bitmap = new bit_vector(npar, 1, 0);
delete [] tag_seq;
delete tag_names;
delete tag_ids;
+ delete attribute_ids;
if (text_collection) delete text_collection;
if (text_positions) delete text_positions;
}
tree->par = loadTree(fp); //TODO use new api
tree->tag_names = new std::vector<std::string>();
tree->tag_ids = new std::unordered_map<std::string, xml_tree::tag_t>();
+ tree->attribute_ids = new std::unordered_set<xml_tree::tag_t>();
std::string s;
int ntags;
tree->tag_names->push_back(s);
tree->tag_ids->insert(std::make_pair(s,
static_cast<xml_tree::tag_t>(i)));
+ if (s.size() >= 3 && s[0] == '<' && s[1] == '@' && s[2] == '>'){
+ tree->attribute_ids->insert(static_cast<xml_tree::tag_t>(i));
+ };
};
//tree->tags = static_sequence_bs::load(fp);
ufread(&tree->bits_per_tag, sizeof(uint), 1, fp);
- fprintf(stderr, "\nBits per tag: %u\n", tree->bits_per_tag);
+ //fprintf(stderr, "\nBits per tag: %u\n", tree->bits_per_tag);
ufread(&tree->tag_seq_len, sizeof(uint), 1, fp);
size_t size = uint_len(tree->bits_per_tag, tree->tag_seq_len);
tree->tag_seq = new uint[size];
i = 0;
else
i = text_positions->rank1(x-1);
-
- j = text_positions->rank1(y);
-// fprintf(stderr, "Rank of node %i is %i, rank of closing %i is %i\n", x, i, y, j);
+ j = text_positions->rank1(y);
+
if (i == j)
return std::make_pair(xml_tree::NIL, xml_tree::NIL);
else
return (int32_t) text_positions->rank1(x) - 1;
}
-unsigned char* xml_tree::get_text(int32_t id) const
+const char* xml_tree::get_text(int32_t id) const
{
- unsigned char * s = text_collection->GetText(id);
+ const char * s = reinterpret_cast<const char*>(text_collection->GetText(id));
return s + (s[0] == 1);
}
const char * xml_tree::get_tag_name_by_ref(xml_tree::tag_t tagid) const
{
-
- unsigned char *s;
if (tagid < 0 || tagid >= tag_names->size())
return "<INVALID TAG>";
- return (const char *) (*tag_names)[tagid].c_str();
+ return (*tag_names)[tagid].c_str();
}
xml_tree::tag_t xml_tree::register_tag(char *s)
{
- auto found = tag_ids->find(std::string(s));
+ std::unordered_map<std::string, tag_t>::iterator found;
+ found = tag_ids->find(std::string(s));
if (found == tag_ids->end())
return xml_tree::NIL_TAG_ID;
else
unsigned char * orig_text;
unsigned char * current_text;
- auto r = text_id_range(x);
+ std::pair<int32_t, int32_t> r = text_id_range(x);
if (r.first == xml_tree::NIL)
current_text = 0;
else {
uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
n++;
uputs("=\"", fd);
+ current_text += (current_text[0] == 1);
read = uprintf((const char*) current_text, fd);
current_text += read + 1;
uputc('"', fd);
label = tag(n);
} else
uputc('>', fd);
- } else {
+ } else {
uputs("/>", fd);
n++;
label = tag(n);
- };
+ };
};
} else do {
uputs("</", fd);
}
-// void xml_tree::print(xml_tree::node_t x, int fd, bool no_text)
-// {
-// if (print_buffer == 0) {
-// print_buffer = new std::string(BUFFER_SIZE, 0);
-// print_buffer->clear();
-// print_stack = new std::vector<std::string>();
-// print_stack->reserve(256);
-// };
-
-// xml_tree::node_t fin = bp_find_close(par, x);
-// xml_tree::node_t n = x;
-// xml_tree::tag_t label = tag(n);
-// unsigned char * current_text;
-
-
-// while (n <= fin) {
-
-// if (bp_inspect(par, n)) {
-// if (label == xml_tree::PCDATA_OPEN_TAG_ID){
-// if (no_text) {
-// uputs("<$/>", fd);
-// } else {
-// current_text = get_text(text_id(n));
-// uprintf( (const char*) (current_text + (current_text[0] == 1)), fd);
-
-// if (current_text && text_index_type != TextCollectionBuilder::index_type_default)
-// text_collection->DeleteText(current_text);
-
-// n += 2; // skip closin $
-// label = tag(n);
-// };
-// } else {
-// uputc('<', fd);
-// uput_str((*tag_names)[label], fd);
-// n++;
-// if (bp_inspect(par, n)) {
-// print_stack->push_back((*tag_names)[label]);
-// label = tag(n);
-// if (label == xml_tree::ATTRIBUTE_OPEN_TAG_ID) {
-// n++;
-// if (no_text) uputs("><@@>", fd);
-
-// while (bp_inspect(par, n))
-// if (no_text) {
-// uputc('<', fd);
-// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
-// uputc('>', fd);
-// uputs("<$@/></", fd);
-// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
-// uputc('>', fd);
-// n += 4;
-// } else {
-// uputc(' ', fd);
-// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
-// n+= 2;
-// uputs("=\"", fd);
-// current_text = get_text(text_id(n));
-// uprintf((const char*) (current_text + (current_text[0] == 1)), fd);
-// if (current_text && text_index_type != TextCollectionBuilder::index_type_default)
-// text_collection->DeleteText(current_text);
-// uputc('"', fd);
-// n += 2;
-// };
-
-// if (no_text)
-// uputs("</@@>", fd);
-// else uputc('>', fd);
-// n++;
-// label = tag(n);
-// } else
-// uputc('>', fd);
-// } else {
-// uputs("/>", fd);
-// n++;
-// label = tag(n);
-// };
-// };
-// } else do {
-// uputs("</", fd);
-// uput_str(print_stack->back(), fd);
-// uputc('>', fd);
-// print_stack->pop_back();
-// n++;
-// } while (!bp_inspect(par, n) && !print_stack->empty());
-// label = tag(n);
-// };
-// uputc('\n', fd);
-
-// }
-
-
static inline uchar * next_char(uchar *s, size_t &numtexts)
{