npar,
32);
//delete [] textbm;
- delete textbitmap;
+ //delete textbitmap;
this->text_index_type = idx_type;
text_collection = tc_builder->InitTextCollection();
tree->tags = static_sequence::load(fp);
ufread(&tree->bits_per_tag, sizeof(uint), 1, fp);
+ fprintf(stderr, "\nBits per tag: %u\n", tree->bits_per_tag);
ufread(&tree->tag_seq_len, sizeof(uint), 1, fp);
size_t size = uint_len(tree->bits_per_tag, tree->tag_seq_len);
tree->tag_seq = new uint[size];
return tree;
}
-uint32_t xml_tree::subtree_elements(xml_tree::node_t x) const
-{
-
- uint32_t size = bp_subtree_size(par, x);
- if (x == root()){
- x = bp_first_child(par,x);
- size = size - 1;
- };
-
- int s = x + 2*size - 1;
- int ntext =
- tags->rank(xml_tree::PCDATA_OPEN_TAG_ID, s) -
- tags->rank(xml_tree::PCDATA_OPEN_TAG_ID, x-1);
- size = size - ntext;
- xml_tree::node_t fin = bp_find_close(par, x);
- xml_tree::node_t y = tags->select_next(xml_tree::ATTRIBUTE_OPEN_TAG_ID, x);
- while (y != xml_tree::NIL && y < fin){
- size -= subtree_size(y);
- y = tags->select_next(xml_tree::ATTRIBUTE_OPEN_TAG_ID, y);
- };
- return size;
- }
uint32_t xml_tree::num_children(xml_tree::node_t x) const
{
std::pair<int32_t, int32_t> xml_tree::text_id_range(xml_tree::node_t x) const
{
int32_t i, j;
- i = text_positions->rank1(x - 1);
+ i = text_positions->rank1(x) - 1;
j = text_positions->rank1(x + 2 * bp_subtree_size(par, x) - 2);
if (i == j)
return std::make_pair(xml_tree::NIL, xml_tree::NIL);
void xml_tree::uflush_r(int fd, size_t s)
{
- if (s == 0) return;
+ if (s == 0 || print_buffer == 0 || fd <= 0) return;
size_t written;
while (1) {
written = write(fd, print_buffer->data(), s);
return i;
}
+// void xml_tree::print(xml_tree::node_t x, int fd, bool no_text)
+// {
+
+// if (print_buffer == 0) {
+// print_buffer = new std::string(BUFFER_SIZE, 0);
+// print_buffer->clear();
+// print_stack = new std::vector<std::string>();
+// print_stack->reserve(256);
+// };
+
+// xml_tree::node_t fin = bp_find_close(par, x);
+// xml_tree::node_t n = x;
+// xml_tree::tag_t label = tag(n);
+// unsigned char * orig_text;
+// unsigned char * current_text;
+
+// auto r = text_id_range(x);
+// if (r.first == xml_tree::NIL)
+// current_text = 0;
+// else
+// current_text = text_collection->GetText(r.first, r.second);
+// current_text += (current_text[0] == 1);
+
+// orig_text = current_text;
+
+// size_t read = 0;
+
+// while (n <= fin) {
+
+// if (bp_inspect(par, n)) {
+// if (label == xml_tree::PCDATA_OPEN_TAG_ID){
+// if (no_text) {
+// uputs("<$/>", fd);
+// } else {
+// read = uprintf( (const char*) current_text, fd);
+// current_text += read + 1;
+// };
+// n += 2; // skip closin $
+// label = tag(n);
+// } else {
+
+// uputc('<', fd);
+// uput_str((*tag_names)[label], fd);
+// n++;
+// if (bp_inspect(par, n)) {
+// print_stack->push_back((*tag_names)[label]);
+// label = tag(n);
+// if (label == xml_tree::ATTRIBUTE_OPEN_TAG_ID) {
+// n++;
+// if (no_text) uputs("><@@>", fd);
+
+// while (bp_inspect(par, n))
+// if (no_text) {
+// uputc('<', fd);
+// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+// uputc('>', fd);
+// uputs("<$@/></", fd);
+// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+// uputc('>', fd);
+// n += 4;
+// } else {
+// uputc(' ', fd);
+// uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+// n++;
+// uputs("=\"", fd);
+// read = uprintf((const char*) current_text, fd);
+// current_text += read + 1;
+// uputc('"', fd);
+// n += 3;
+// };
+
+// if (no_text)
+// uputs("</@@>", fd);
+// else uputc('>', fd);
+// n++;
+// label = tag(n);
+// } else
+// uputc('>', fd);
+// } else {
+// uputs("/>", fd);
+// n++;
+// label = tag(n);
+// };
+// };
+// } else do {
+// uputs("</", fd);
+// uput_str(print_stack->back(), fd);
+// uputc('>', fd);
+// print_stack->pop_back();
+// n++;
+// } while (!bp_inspect(par, n) && !print_stack->empty());
+// label = tag(n);
+// };
+// uputc('\n', fd);
+// if (orig_text && text_index_type != TextCollectionBuilder::index_type_default)
+// if (*orig_text == '\0')
+// text_collection->DeleteText(orig_text - 1);
+// else
+// text_collection->DeleteText(orig_text);
+
+// }
void xml_tree::print(xml_tree::node_t x, int fd, bool no_text)
{
-
+ for (int i = 0; i < 50; i++)
+ fprintf(stderr, "Printing text number %i: %s\n", i, get_text(i));
+
if (print_buffer == 0) {
print_buffer = new std::string(BUFFER_SIZE, 0);
print_buffer->clear();
xml_tree::node_t fin = bp_find_close(par, x);
xml_tree::node_t n = x;
xml_tree::tag_t label = tag(n);
- unsigned char * orig_text;
unsigned char * current_text;
- auto r = text_id_range(x);
- if (r.first == xml_tree::NIL)
- current_text = 0;
- else
- current_text = get_text(r.first);
-
- orig_text = current_text;
- size_t read = 0;
while (n <= fin) {
if (no_text) {
uputs("<$/>", fd);
} else {
- read = uprintf( (const char*) current_text, fd);
- current_text += read + 1;
- };
- n += 2; // skip closin $
- label = tag(n);
- } else {
+ current_text = get_text(text_id(n));
+ uprintf( (const char*) (current_text + (current_text[0] == 1)), fd);
- uputc('<', fd);
- uput_str((*tag_names)[label], fd);
- n++;
- if (bp_inspect(par, n)) {
- print_stack->push_back((*tag_names)[label]);
- label = tag(n);
- if (label == xml_tree::ATTRIBUTE_OPEN_TAG_ID) {
- n++;
- if (no_text) uputs("><@@>", fd);
-
- while (bp_inspect(par, n))
- if (no_text) {
- uputc('<', fd);
- uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
- uputc('>', fd);
- uputs("<$@/></", fd);
- uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
- uputc('>', fd);
- n += 4;
- } else {
- uputc(' ', fd);
- uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
- n++;
- uputs("=\"", fd);
- read = uprintf((const char*) current_text, fd);
- current_text += read + 1;
- uputc('"', fd);
- n += 3;
- };
-
- if (no_text)
- uputs("</@@>", fd);
- else uputc('>', fd);
- n++;
- label = tag(n);
- } else
- uputc('>', fd);
- } else {
- uputs("/>", fd);
- n++;
- label = tag(n);
- };
- };
- } else do {
- uputs("</", fd);
- uput_str(print_stack->back(), fd);
- uputc('>', fd);
- print_stack->pop_back();
- n++;
- } while (!bp_inspect(par, n) && !print_stack->empty());
- label = tag(n);
- };
+ if (current_text && text_index_type != TextCollectionBuilder::index_type_default)
+ text_collection->DeleteText(current_text);
+
+ n += 2; // skip closin $
+ label = tag(n);
+ };
+ } else {
+ uputc('<', fd);
+ uput_str((*tag_names)[label], fd);
+ n++;
+ if (bp_inspect(par, n)) {
+ print_stack->push_back((*tag_names)[label]);
+ label = tag(n);
+ if (label == xml_tree::ATTRIBUTE_OPEN_TAG_ID) {
+ n++;
+ if (no_text) uputs("><@@>", fd);
+
+ while (bp_inspect(par, n))
+ if (no_text) {
+ uputc('<', fd);
+ uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+ uputc('>', fd);
+ uputs("<$@/></", fd);
+ uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+ uputc('>', fd);
+ n += 4;
+ } else {
+ uputc(' ', fd);
+ uputs((const char*) &(get_tag_name_by_ref(tag(n))[3]), fd);
+ n+= 2;
+ uputs("=\"", fd);
+ current_text = get_text(text_id(n));
+ uprintf((const char*) (current_text + (current_text[0] == 1)), fd);
+ if (current_text && text_index_type != TextCollectionBuilder::index_type_default)
+ text_collection->DeleteText(current_text);
+ uputc('"', fd);
+ n += 2;
+ };
+
+ if (no_text)
+ uputs("</@@>", fd);
+ else uputc('>', fd);
+ n++;
+ label = tag(n);
+ } else
+ uputc('>', fd);
+ } else {
+ uputs("/>", fd);
+ n++;
+ label = tag(n);
+ };
+ };
+ } else do {
+ uputs("</", fd);
+ uput_str(print_stack->back(), fd);
+ uputc('>', fd);
+ print_stack->pop_back();
+ n++;
+ } while (!bp_inspect(par, n) && !print_stack->empty());
+ label = tag(n);
+ };
uputc('\n', fd);
- if (orig_text && text_index_type != TextCollectionBuilder::index_type_default)
- if (*orig_text == '\0')
- text_collection->DeleteText(orig_text - 1);
- else
- text_collection->DeleteText(orig_text);
}