+#include "xml-tree-builder.hpp"
+#include <stdexcept>
+#include <utility>
+
+using namespace SXSI;
+
+xml_tree_builder::xml_tree_builder()
+{
+ opened = false;
+ par = 0;
+ tags = 0;
+ tag_ids = 0;
+ text_positions = 0;
+ disable_text_index = false;
+ tc_builder = 0;
+}
+
+xml_tree_builder::~xml_tree_builder()
+{
+ if (opened) reset();
+}
+
+void xml_tree_builder::reset()
+{
+ delete par;
+ delete tags;
+ delete tag_ids;
+ if (!disable_text_index){
+ delete tc_builder;
+ delete text_positions;
+ };
+}
+
+
+int32_t xml_tree_builder::register_tag(std::string tag, int32_t id)
+{
+ auto found = tag_ids->find(tag);
+
+ if (found == tag_ids->end()) {
+ if (id != current_tag)
+ throw std::runtime_error("xml-tree-builder: inconsistant tag id");
+
+ tag_ids->insert(std::make_pair(tag, id));
+ current_tag++;
+ return id;
+ } else
+ return found->second;
+
+}
+
+int32_t xml_tree_builder::register_tag(std::string tag)
+{
+ return register_tag(tag, current_tag);
+}
+
+void
+xml_tree_builder::open_document(bool disable_text_index,
+ unsigned int sample_rate,
+ TextCollectionBuilder::index_type_t idx_type)
+{
+ if (opened) reset();
+ opened = true;
+ par = new bit_vector();
+ tags = new std::vector<int32_t>();
+ current_tag = 0;
+ tag_ids = new std::unordered_map<std::string, int32_t>();
+
+ register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
+ register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
+
+ register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
+ register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
+
+ register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
+ register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
+
+ register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
+ xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
+
+ register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
+ xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
+
+
+ this->disable_text_index = disable_text_index;
+ if (!disable_text_index){
+ tc_builder = TextCollectionBuilder::create(sample_rate, idx_type);
+ text_positions = new bit_vector();
+ text_index_type = idx_type;
+ };
+}
+
+void xml_tree_builder::open_tag(std::string tag)
+{
+ int32_t id = register_tag(tag);
+ tags->push_back(id);
+ par->push_back(true);
+ if (!disable_text_index) text_positions->push_back(false);
+}
+
+void xml_tree_builder::close_tag(std::string)
+{
+ xml_tree::tag_t t = xml_tree::CLOSE_TAG_ID;
+ tags->push_back(t);
+ par->push_back(false);
+ if (!disable_text_index) text_positions->push_back(false);
+}
+
+void xml_tree_builder::text(std::string s)
+{
+ if (!disable_text_index){
+ if (s.empty()) s = "\001";
+ tc_builder->InsertText((const unsigned char *) s.c_str());
+ text_positions->set(text_positions->size() - 1, true);
+ }
+}
+
+xml_tree *xml_tree_builder::close_document()
+{
+ if (opened) {
+ opened = false;
+ auto tags_ = tags;
+ auto tag_ids_ = tag_ids;
+ auto par_ = par;
+ auto tc_builder_ = tc_builder;
+ auto text_positions_ = text_positions;
+ tc_builder = 0;
+ text_positions = 0;
+ tags = 0;
+ tag_ids = 0;
+ par = 0;
+ return new xml_tree(tags_, tag_ids_, par_,
+ disable_text_index,
+ tc_builder_,
+ text_index_type,
+ text_positions_);
+ };
+
+ throw std::runtime_error("xml_tree_builder: inconsistent parser state");
+}
+