From 429f734c9f9241dfb9c587e8b333777f3540625f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Fri, 6 Apr 2012 14:03:00 +0200 Subject: [PATCH] Various fixes, mainly tagged_sibling/select_sibling. --- xml-tree-inc.hpp | 39 +++++++++++++++++++++++++++++++++++---- xml-tree.cpp | 37 ++++++++++++++++++++++++++----------- 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/xml-tree-inc.hpp b/xml-tree-inc.hpp index 6ce1724..cd8db71 100644 --- a/xml-tree-inc.hpp +++ b/xml-tree-inc.hpp @@ -5,6 +5,19 @@ #ifndef XML_TREE_INC_HPP_ #define XML_TREE_INC_HPP_ +#include + +#if 0 +#define ASSERT_NODE(orig, res) do { \ + if (res < -1 || res >= par->n|| (res != -1 && res < orig)) \ + fprintf(stderr, \ + "Assertion failure: original node %i, result %i, line %i\n", \ + orig, res, __LINE__); \ + } while (0) +#else +#define ASSERT_NODE(orig, res) +#endif + inline uint32_t xml_tree::size() const { return tag_seq_len / 2; @@ -86,7 +99,9 @@ inline xml_tree::node_t xml_tree::parent(xml_tree::node_t x) const inline xml_tree::node_t xml_tree::first_child(node_t x) const { - return bp_first_child(this->par, x); + xml_tree::node_t result = bp_first_child(this->par, x); + ASSERT_NODE(x, result); + return result; } inline xml_tree::node_t xml_tree::last_child(xml_tree::node_t x) const @@ -99,7 +114,9 @@ inline xml_tree::node_t xml_tree::last_child(xml_tree::node_t x) const inline xml_tree::node_t xml_tree::next_sibling(xml_tree::node_t x) const { - return bp_next_sibling(this->par, x); + xml_tree::node_t result = bp_next_sibling(this->par, x); + ASSERT_NODE(x, result); + return result; } inline xml_tree::node_t xml_tree::prev_sibling(xml_tree::node_t x) const @@ -119,6 +136,8 @@ inline xml_tree::node_t xml_tree::first_element(xml_tree::node_t x) const case PCDATA_OPEN_TAG_ID: n = n + 2; return bp_inspect(this->par, n) ? n : xml_tree::NIL; + default: + return n; }; } @@ -161,17 +180,29 @@ inline xml_tree::node_t xml_tree::tagged_child(xml_tree::node_t x, xml_tree::tag_t t) const { xml_tree::node_t c = first_child(x); + xml_tree::node_t result; if (is_nil(c) || tag(c) == t) return c; else - tagged_sibling(c, t); + return tagged_sibling(c, t); + /* ASSERT_NODE(x, result); + return result;*/ } inline xml_tree::node_t xml_tree::tagged_sibling(xml_tree::node_t x, xml_tree::tag_t t) const { xml_tree::node_t sibling = next_sibling(x); - while(!is_nil(sibling) && tag(sibling) != t) sibling = next_sibling(sibling); + xml_tree::tag_t stag; + while (sibling != xml_tree::NIL) { + stag = tag(sibling); + if (stag == t) { + ASSERT_NODE(x, sibling); + return sibling; + } + sibling = next_sibling(sibling); + }; + ASSERT_NODE(x, sibling); return sibling; } diff --git a/xml-tree.cpp b/xml-tree.cpp index 6d33878..758c1b4 100644 --- a/xml-tree.cpp +++ b/xml-tree.cpp @@ -13,6 +13,10 @@ extern "C" { using namespace SXSI; +const xml_tree::node_t xml_tree::NIL; +const xml_tree::node_t xml_tree::ROOT; + + const xml_tree::tag_t xml_tree::NIL_TAG_ID; const char* xml_tree::NIL_TAG = ""; const xml_tree::tag_t xml_tree::DOCUMENT_OPEN_TAG_ID; @@ -26,6 +30,7 @@ const char* xml_tree::ATTRIBUTE_DATA_OPEN_TAG = "<@$>"; const xml_tree::tag_t xml_tree::CLOSE_TAG_ID; const char* xml_tree::CLOSE_TAG = ""; + static int bits8 (int t ) { int r = bits(t); if (r <= 8) @@ -78,6 +83,7 @@ xml_tree::xml_tree(std::vector *tags, size_t npar = parbitmap->size(); parbitmap->pack(); + par = bp_construct(npar, parbitmap->get_vector_ptr(), OPT_DEGREE); @@ -92,7 +98,8 @@ xml_tree::xml_tree(std::vector *tags, uint32_t max_tag = tag_names->size() - 1; static_bitsequence_builder *bmb = new static_bitsequence_builder_sdarray(); alphabet_mapper *am = new alphabet_mapper_none(); - this->tags = new static_sequence_bs((uint32_t *) &tags[0], npar, am, bmb); + + this->tags = new static_sequence_bs((uint32_t*)&((*tags)[0]), npar, am, bmb); bits_per_tag = bits8(max_tag); tag_seq_len = npar; tag_seq = new uint32_t[uint_len(bits_per_tag, tag_seq_len)]; @@ -110,10 +117,15 @@ xml_tree::xml_tree(std::vector *tags, text_positions = new static_bitsequence_rrr02(textbm, npar, 32); - delete [] textbm; + //delete [] textbm; + delete textbitmap; this->text_index_type = idx_type; + fprintf(stderr, "Before!\n"); + fflush(stderr); text_collection = tc_builder->InitTextCollection(); + fprintf(stderr, "After!\n"); + fflush(stderr); delete tc_builder; }; @@ -177,15 +189,18 @@ xml_tree::select_descendant(xml_tree::node_t x, return min; } - xml_tree::node_t xml_tree::select_sibling(xml_tree::node_t x, std::unordered_set *tags) const { xml_tree::node_t sibling = next_sibling(x); - while(!is_nil(sibling) && tags->find(tag(sibling)) == tags->end()) + xml_tree::tag_t t; + while(!is_nil(sibling)) { + t = tag(sibling); + if (tags->find(t) != tags->end()) return sibling; sibling = next_sibling(sibling); - return (sibling); + }; + return sibling; } xml_tree::node_t @@ -225,10 +240,12 @@ void xml_tree::save(int fd, char* s) ufwrite(tag_seq, sizeof(uint), uint_len(bits_per_tag, tag_seq_len), fp); bool disable_tc = text_collection == 0 || text_positions == 0; - ufwrite(&disable_tc, sizeof(bool),1,fp); - text_positions->save(fp); + ufwrite(&disable_tc, sizeof(bool),1,fp); + fprintf(stderr, "whoot\n"); + fflush(stderr); if (!disable_tc) { + text_positions->save(fp); ufwrite(&text_index_type, sizeof(TextCollectionBuilder::index_type_t), 1, fp); @@ -253,8 +270,7 @@ void xml_tree::save(int fd, char* s) fclose(fp); } -//static xml_tree* load(char*, bool, int); -// void print(int, node_t, bool no_text=false); + xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf) { FILE *fp; @@ -263,7 +279,6 @@ xml_tree* xml_tree::load(int fd, char* name, bool load_tc, int sf) int i; buffer[1023] = '\0'; fp = fdopen(fd, "r"); - xml_tree *tree = new xml_tree(); tree->par = loadTree(fp); //TODO use new api @@ -576,5 +591,5 @@ void xml_tree::print(int fd, xml_tree::node_t x, bool no_text) text_collection->DeleteText(orig_text - 1); else text_collection->DeleteText(orig_text); - + } -- 2.17.1