inline uint32_t xml_tree::subtree_size(xml_tree::node_t x) const
{
- return bp_subtree_size(this->par, x);
+ return x == root() ? this->par->n/2 : bp_subtree_size(this->par, x);
}
inline uint32_t
count += (tag(i) == label);
return count;
} else {
- return tags->rank(label, y) - tags->rank(label, x);
+ return tags[label]->rank(y) - tags[label]->rank(x);
};
}
return x;
}
-inline xml_tree::node_t xml_tree::tagged_next(node_t x, tag_t tag) const
+
+inline xml_tree::node_t xml_tree::tagged_next_close(xml_tree::node_t x,
+ xml_tree::tag_t label) const
+{
+ xml_tree::node_t i=x;
+ for(xml_tree::node_t i = x+1; i < std::min(x+100, this->par->n); i++)
+ if (tag(i) == label)
+ return i;
+
+ return tags[label]->select_next(i);
+}
+
+inline xml_tree::node_t xml_tree::tagged_next(xml_tree::node_t x,
+ xml_tree::tag_t label) const
{
- return this->tags->select_next(tag, x);
+ return tags[label]->select_next(x);
}
inline xml_tree::node_t
inline xml_tree::node_t
xml_tree::tagged_following_before(xml_tree::node_t x,
xml_tree::tag_t tag,
- xml_tree::node_t limit) const
+ xml_tree::node_t limit) const
{
xml_tree::node_t close = bp_find_close(this->par, x);
xml_tree::node_t s = tagged_next(close, tag);
return sibling;
}
+inline xml_tree::node_t
+xml_tree::select_descendant(xml_tree::node_t x, xml_tree::tag_t *tags) const
+{
+ if (is_leaf(x)) return xml_tree::NIL;
+ auto min = xml_tree::NIL;
+ auto aux = xml_tree::NIL;
+ for(; *tags != xml_tree::NIL_TAG_ID; ++tags){
+ aux = tagged_next(x, *tags);
+ if ((unsigned int) aux < (unsigned int) min) min = aux;
+ };
+ return (min == xml_tree::NIL || is_ancestor(x, min)) ? min : xml_tree::NIL;
+}
+
+
+inline xml_tree::node_t
+xml_tree::select_following_before(xml_tree::node_t x, xml_tree::tag_t *tags,
+ xml_tree::node_t limit) const
+{
+ auto min = xml_tree::NIL;
+ auto aux = xml_tree::NIL;
+ auto close = bp_find_close(this->par, x);
+
+ for(; *tags != xml_tree::NIL_TAG_ID; ++tags){
+ aux = tagged_next(close, *tags);
+ if ((unsigned int) aux < (unsigned int) min) min = aux;
+ }
+
+ return (min == xml_tree::NIL || min < limit) ? min : xml_tree::NIL;
+}
+
+
xml_tree::node_t xml_tree::closing(xml_tree::node_t x) const
{
return bp_find_close(this->par, x);
print_buffer = 0;
}
-xml_tree::xml_tree(std::vector<int32_t> *tags,
+xml_tree::xml_tree(std::vector<int32_t> *tags_,
std::unordered_map<std::string, int32_t> *tag_ids,
bit_vector *parbitmap,
bool disable_tc,
size_t npar = parbitmap->size();
parbitmap->pack();
-
par = bp_construct(npar,
parbitmap->get_vector_ptr(),
OPT_DEGREE);
(*this->tag_names)[val.second] = val.first;
uint32_t max_tag = tag_names->size() - 1;
- static_bitsequence_builder *bmb = new static_bitsequence_builder_sdarray();
- alphabet_mapper *am = new alphabet_mapper_none();
- this->tags = new static_sequence_bs((uint32_t*)&((*tags)[0]), npar, am, bmb);
+ bit_vector *tmp_bitmap = new bit_vector(npar, 1, 0);
+ uint32_t * buff = tmp_bitmap->get_vector_ptr();
+ for(int32_t i = 0; i <= max_tag; i++){
+ uint32_t ones = 0;
+ for (size_t k = 0; k < npar; k++){
+ bool t = (i == (*tags_)[k]);
+ tmp_bitmap->set(k, t);
+ ones += t;
+ };
+
+ this->tags.push_back(new static_bitsequence_sdarray(buff, npar, ones));
+ }
+ delete tmp_bitmap;
+ delete [] buff;
+
+ //static_bitsequence_builder *bmb = new static_bitsequence_builder_sdarray();
+ //alphabet_mapper *am = new alphabet_mapper_none();
+
+// this->tags = new static_sequence_bs((uint32_t*)&((*tags)[0]), npar, am, bmb);
+
bits_per_tag = bits8(max_tag);
tag_seq_len = npar;
tag_seq = new uint32_t[uint_len(bits_per_tag, tag_seq_len)];
for(size_t i = 0; i < (size_t) tag_seq_len; i++)
- set_field(tag_seq, bits_per_tag, i, (uint32_t) (*tags)[i]);
+ set_field(tag_seq, bits_per_tag, i, (uint32_t) (*tags_)[i]);
- delete tags;
+ delete tags_;
if (disable_tc) {
text_positions = 0;
delete tc_builder;
};
-
}
xml_tree::~xml_tree()
{
bp_delete(par);
- delete tags;
+
+ for(int i = 0; i < tag_names->size(); i++){
+ delete tags[i];
+ tags[i] = 0;
+ };
+
+
delete [] tag_seq;
delete tag_names;
delete tag_ids;
-
if (text_collection) delete text_collection;
if (text_positions) delete text_positions;
}
return select_sibling(child, tags);
}
-xml_tree::node_t
-xml_tree::select_descendant(xml_tree::node_t x, xml_tree::tag_t *tags) const
-{
- if (is_leaf(x)) return xml_tree::NIL;
- auto min = xml_tree::NIL;
- auto aux = xml_tree::NIL;
- for(; *tags != xml_tree::NIL_TAG_ID; ++tags){
- aux = tagged_descendant(x, *tags);
- if ((unsigned int) aux < (unsigned int) min) min = aux;
- };
- return min;
-}
xml_tree::node_t
xml_tree::select_sibling(xml_tree::node_t x, xml_tree::tag_t *tags) const
return sibling;
}
-xml_tree::node_t
-xml_tree::select_following_before(xml_tree::node_t x, xml_tree::tag_t *tags,
- xml_tree::node_t limit) const
-{
- auto min = xml_tree::NIL;
- auto aux = xml_tree::NIL;
- for(; *tags != xml_tree::NIL_TAG_ID; ++tags){
- aux = tagged_following_before(x, *tags, limit);
- if ((unsigned int) aux < (unsigned int) min) min = aux;
- }
- return min;
-}
void xml_tree::save(int fd, char* s)
{
for (int i = 0; i < ntags;i++)
fprintf(fp, "%s\n", tag_names->at(i).c_str());
- tags->save(fp);
+ //tags->save(fp);
+ for(int i = 0; i < ntags; i++)
+ tags[i]->save(fp);
ufwrite(&bits_per_tag, sizeof(uint),1,fp);
ufwrite(&tag_seq_len, sizeof(uint),1,fp);
};
- tree->tags = static_sequence::load(fp);
+ for(int i = 0; i < ntags; i++){
+ tree->tags.push_back(static_bitsequence_sdarray::load(fp));
+ };
+
+ //tree->tags = static_sequence_bs::load(fp);
ufread(&tree->bits_per_tag, sizeof(uint), 1, fp);
fprintf(stderr, "\nBits per tag: %u\n", tree->bits_per_tag);
ufread(&tree->tag_seq_len, sizeof(uint), 1, fp);
return i;
}
-// void xml_tree::print(xml_tree::node_t x, int fd, bool no_text)
-// {
-
-// if (print_buffer == 0) {
-// print_buffer = new std::string(BUFFER_SIZE, 0);
-// print_buffer->clear();
-// print_stack = new std::vector<std::string>();
-// print_stack->reserve(256);
-// };
-
-// xml_tree::node_t fin = bp_find_close(par, x);
-// xml_tree::node_t n = x;
-// xml_tree::tag_t label = tag(n);
-// unsigned char * orig_text;
-// unsigned char * current_text;
-
-// auto r = text_id_range(x);
-// if (r.first == xml_tree::NIL)
-// current_text = 0;
-// else
-// current_text = text_collection->GetText(r.first, r.second);
-// current_text += (current_text[0] == 1);
-
-// orig_text = current_text;
-
-// size_t read = 0;
-
-// while (n <= fin) {
-
-// if (bp_inspect(par, n)) {
-// if (label == xml_tree::PCDATA_OPEN_TAG_ID){
-// if (no_text) {
-// uputs("<$/>", fd);
-// } else {
-// read = uprintf( (const char*) current_text, fd);
-// current_text += read + 1;
-// };
-// n += 2; // skip closin $
-// label = tag(n);
-// } else {
-
-// uputc('<', fd);
-// uput_str((*tag_names)[label], fd);
-// n++;
-// if (bp_inspect(par, n)) {
-// print_stack->push_back((*tag_names)[label]);
-// label = tag(n);
-// if (label == xml_tree::ATTRIBUTE_OPEN_TAG_ID) {
-// n++;
-// if (no_text) uputs("><@@>", fd);
-
-// while (bp_inspect(par, n))
-// if (no_text) {
-// uputc('<', fd);
-// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
-// uputc('>', fd);
-// uputs("<$@/></", fd);
-// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
-// uputc('>', fd);
-// n += 4;
-// } else {
-// uputc(' ', fd);
-// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
-// n++;
-// uputs("=\"", fd);
-// read = uprintf((const char*) current_text, fd);
-// current_text += read + 1;
-// uputc('"', fd);
-// n += 3;
-// };
-
-// if (no_text)
-// uputs("</@@>", fd);
-// else uputc('>', fd);
-// n++;
-// label = tag(n);
-// } else
-// uputc('>', fd);
-// } else {
-// uputs("/>", fd);
-// n++;
-// label = tag(n);
-// };
-// };
-// } else do {
-// uputs("</", fd);
-// uput_str(print_stack->back(), fd);
-// uputc('>', fd);
-// print_stack->pop_back();
-// n++;
-// } while (!bp_inspect(par, n) && !print_stack->empty());
-// label = tag(n);
-// };
-// uputc('\n', fd);
-// if (orig_text && text_index_type != TextCollectionBuilder::index_type_default)
-// if (*orig_text == '\0')
-// text_collection->DeleteText(orig_text - 1);
-// else
-// text_collection->DeleteText(orig_text);
-
-// }
void xml_tree::print(xml_tree::node_t x, int fd, bool no_text)
{
- for (int i = 0; i < 50; i++)
- fprintf(stderr, "Printing text number %i: %s\n", i, get_text(i));
-
+
if (print_buffer == 0) {
print_buffer = new std::string(BUFFER_SIZE, 0);
print_buffer->clear();
xml_tree::node_t fin = bp_find_close(par, x);
xml_tree::node_t n = x;
xml_tree::tag_t label = tag(n);
+ unsigned char * orig_text;
unsigned char * current_text;
+ auto r = text_id_range(x);
+ if (r.first == xml_tree::NIL)
+ current_text = 0;
+ else
+ current_text = text_collection->GetText(r.first, r.second);
+ current_text += (current_text[0] == 1);
+
+ orig_text = current_text;
+
+ size_t read = 0;
while (n <= fin) {
if (no_text) {
uputs("<$/>", fd);
} else {
- current_text = get_text(text_id(n));
- uprintf( (const char*) (current_text + (current_text[0] == 1)), fd);
-
- if (current_text && text_index_type != TextCollectionBuilder::index_type_default)
- text_collection->DeleteText(current_text);
-
- n += 2; // skip closin $
- label = tag(n);
- };
+ read = uprintf( (const char*) current_text, fd);
+ current_text += read + 1;
+ };
+ n += 2; // skip closin $
+ label = tag(n);
} else {
- uputc('<', fd);
- uput_str((*tag_names)[label], fd);
- n++;
- if (bp_inspect(par, n)) {
- print_stack->push_back((*tag_names)[label]);
- label = tag(n);
- if (label == xml_tree::ATTRIBUTE_OPEN_TAG_ID) {
- n++;
- if (no_text) uputs("><@@>", fd);
-
- while (bp_inspect(par, n))
- if (no_text) {
- uputc('<', fd);
- uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
- uputc('>', fd);
- uputs("<$@/></", fd);
- uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
- uputc('>', fd);
- n += 4;
- } else {
- uputc(' ', fd);
- uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
- n+= 2;
- uputs("=\"", fd);
- current_text = get_text(text_id(n));
- uprintf((const char*) (current_text + (current_text[0] == 1)), fd);
- if (current_text && text_index_type != TextCollectionBuilder::index_type_default)
- text_collection->DeleteText(current_text);
- uputc('"', fd);
- n += 2;
- };
-
- if (no_text)
- uputs("</@@>", fd);
- else uputc('>', fd);
- n++;
- label = tag(n);
- } else
- uputc('>', fd);
- } else {
- uputs("/>", fd);
- n++;
- label = tag(n);
- };
- };
- } else do {
- uputs("</", fd);
- uput_str(print_stack->back(), fd);
- uputc('>', fd);
- print_stack->pop_back();
- n++;
- } while (!bp_inspect(par, n) && !print_stack->empty());
- label = tag(n);
- };
+
+ uputc('<', fd);
+ uput_str((*tag_names)[label], fd);
+ n++;
+ if (bp_inspect(par, n)) {
+ print_stack->push_back((*tag_names)[label]);
+ label = tag(n);
+ if (label == xml_tree::ATTRIBUTE_OPEN_TAG_ID) {
+ n++;
+ if (no_text) uputs("><@@>", fd);
+
+ while (bp_inspect(par, n))
+ if (no_text) {
+ uputc('<', fd);
+ uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+ uputc('>', fd);
+ uputs("<$@/></", fd);
+ uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+ uputc('>', fd);
+ n += 4;
+ } else {
+ uputc(' ', fd);
+ uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+ n++;
+ uputs("=\"", fd);
+ read = uprintf((const char*) current_text, fd);
+ current_text += read + 1;
+ uputc('"', fd);
+ n += 3;
+ };
+
+ if (no_text)
+ uputs("</@@>", fd);
+ else uputc('>', fd);
+ n++;
+ label = tag(n);
+ } else
+ uputc('>', fd);
+ } else {
+ uputs("/>", fd);
+ n++;
+ label = tag(n);
+ };
+ };
+ } else do {
+ uputs("</", fd);
+ uput_str(print_stack->back(), fd);
+ uputc('>', fd);
+ print_stack->pop_back();
+ n++;
+ } while (!bp_inspect(par, n) && !print_stack->empty());
+ label = tag(n);
+ };
uputc('\n', fd);
+ if (orig_text && text_index_type != TextCollectionBuilder::index_type_default)
+ if (*orig_text == '\0')
+ text_collection->DeleteText(orig_text - 1);
+ else
+ text_collection->DeleteText(orig_text);
}
+
+// void xml_tree::print(xml_tree::node_t x, int fd, bool no_text)
+// {
+// if (print_buffer == 0) {
+// print_buffer = new std::string(BUFFER_SIZE, 0);
+// print_buffer->clear();
+// print_stack = new std::vector<std::string>();
+// print_stack->reserve(256);
+// };
+
+// xml_tree::node_t fin = bp_find_close(par, x);
+// xml_tree::node_t n = x;
+// xml_tree::tag_t label = tag(n);
+// unsigned char * current_text;
+
+
+// while (n <= fin) {
+
+// if (bp_inspect(par, n)) {
+// if (label == xml_tree::PCDATA_OPEN_TAG_ID){
+// if (no_text) {
+// uputs("<$/>", fd);
+// } else {
+// current_text = get_text(text_id(n));
+// uprintf( (const char*) (current_text + (current_text[0] == 1)), fd);
+
+// if (current_text && text_index_type != TextCollectionBuilder::index_type_default)
+// text_collection->DeleteText(current_text);
+
+// n += 2; // skip closin $
+// label = tag(n);
+// };
+// } else {
+// uputc('<', fd);
+// uput_str((*tag_names)[label], fd);
+// n++;
+// if (bp_inspect(par, n)) {
+// print_stack->push_back((*tag_names)[label]);
+// label = tag(n);
+// if (label == xml_tree::ATTRIBUTE_OPEN_TAG_ID) {
+// n++;
+// if (no_text) uputs("><@@>", fd);
+
+// while (bp_inspect(par, n))
+// if (no_text) {
+// uputc('<', fd);
+// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+// uputc('>', fd);
+// uputs("<$@/></", fd);
+// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+// uputc('>', fd);
+// n += 4;
+// } else {
+// uputc(' ', fd);
+// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+// n+= 2;
+// uputs("=\"", fd);
+// current_text = get_text(text_id(n));
+// uprintf((const char*) (current_text + (current_text[0] == 1)), fd);
+// if (current_text && text_index_type != TextCollectionBuilder::index_type_default)
+// text_collection->DeleteText(current_text);
+// uputc('"', fd);
+// n += 2;
+// };
+
+// if (no_text)
+// uputs("</@@>", fd);
+// else uputc('>', fd);
+// n++;
+// label = tag(n);
+// } else
+// uputc('>', fd);
+// } else {
+// uputs("/>", fd);
+// n++;
+// label = tag(n);
+// };
+// };
+// } else do {
+// uputs("</", fd);
+// uput_str(print_stack->back(), fd);
+// uputc('>', fd);
+// print_stack->pop_back();
+// n++;
+// } while (!bp_inspect(par, n) && !print_stack->empty());
+// label = tag(n);
+// };
+// uputc('\n', fd);
+
+// }