X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xml-tree-builder.cpp;fp=xml-tree-builder.cpp;h=0b7effc4576429a6eef4502443fdbe354644d760;hb=c6266d8fd1872fad45b18d3d554410d080b65099;hp=0000000000000000000000000000000000000000;hpb=03125db103d98200f7a313a38db54c56748283d1;p=SXSI%2FXMLTree.git diff --git a/xml-tree-builder.cpp b/xml-tree-builder.cpp new file mode 100644 index 0000000..0b7effc --- /dev/null +++ b/xml-tree-builder.cpp @@ -0,0 +1,140 @@ +#include "xml-tree-builder.hpp" +#include +#include + +using namespace SXSI; + +xml_tree_builder::xml_tree_builder() +{ + opened = false; + par = 0; + tags = 0; + tag_ids = 0; + text_positions = 0; + disable_text_index = false; + tc_builder = 0; +} + +xml_tree_builder::~xml_tree_builder() +{ + if (opened) reset(); +} + +void xml_tree_builder::reset() +{ + delete par; + delete tags; + delete tag_ids; + if (!disable_text_index){ + delete tc_builder; + delete text_positions; + }; +} + + +int32_t xml_tree_builder::register_tag(std::string tag, int32_t id) +{ + auto found = tag_ids->find(tag); + + if (found == tag_ids->end()) { + if (id != current_tag) + throw std::runtime_error("xml-tree-builder: inconsistant tag id"); + + tag_ids->insert(std::make_pair(tag, id)); + current_tag++; + return id; + } else + return found->second; + +} + +int32_t xml_tree_builder::register_tag(std::string tag) +{ + return register_tag(tag, current_tag); +} + +void +xml_tree_builder::open_document(bool disable_text_index, + unsigned int sample_rate, + TextCollectionBuilder::index_type_t idx_type) +{ + if (opened) reset(); + opened = true; + par = new bit_vector(); + tags = new std::vector(); + current_tag = 0; + tag_ids = new std::unordered_map(); + + register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID); + register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID); + + register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID); + register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID); + + register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID); + register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID); + + register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG, + xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID); + + register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG, + xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID); + + + this->disable_text_index = disable_text_index; + if (!disable_text_index){ + tc_builder = TextCollectionBuilder::create(sample_rate, idx_type); + text_positions = new bit_vector(); + text_index_type = idx_type; + }; +} + +void xml_tree_builder::open_tag(std::string tag) +{ + int32_t id = register_tag(tag); + tags->push_back(id); + par->push_back(true); + if (!disable_text_index) text_positions->push_back(false); +} + +void xml_tree_builder::close_tag(std::string) +{ + xml_tree::tag_t t = xml_tree::CLOSE_TAG_ID; + tags->push_back(t); + par->push_back(false); + if (!disable_text_index) text_positions->push_back(false); +} + +void xml_tree_builder::text(std::string s) +{ + if (!disable_text_index){ + if (s.empty()) s = "\001"; + tc_builder->InsertText((const unsigned char *) s.c_str()); + text_positions->set(text_positions->size() - 1, true); + } +} + +xml_tree *xml_tree_builder::close_document() +{ + if (opened) { + opened = false; + auto tags_ = tags; + auto tag_ids_ = tag_ids; + auto par_ = par; + auto tc_builder_ = tc_builder; + auto text_positions_ = text_positions; + tc_builder = 0; + text_positions = 0; + tags = 0; + tag_ids = 0; + par = 0; + return new xml_tree(tags_, tag_ids_, par_, + disable_text_index, + tc_builder_, + text_index_type, + text_positions_); + }; + + throw std::runtime_error("xml_tree_builder: inconsistent parser state"); +} +