1 #include "xml-tree-builder.hpp"
8 xml_tree_builder::xml_tree_builder()
15 disable_text_index = false;
19 xml_tree_builder::~xml_tree_builder()
24 void xml_tree_builder::reset()
29 if (!disable_text_index){
31 delete text_positions;
36 int32_t xml_tree_builder::register_tag(std::string tag, int32_t id)
38 auto found = tag_ids->find(tag);
40 if (found == tag_ids->end()) {
41 if (id != current_tag)
42 throw std::runtime_error("xml-tree-builder: inconsistant tag id");
44 tag_ids->insert(std::make_pair(tag, id));
52 int32_t xml_tree_builder::register_tag(std::string tag)
54 return register_tag(tag, current_tag);
58 xml_tree_builder::open_document(bool disable_text_index,
59 unsigned int sample_rate,
60 TextCollectionBuilder::index_type_t idx_type)
64 par = new bit_vector();
65 tags = new std::vector<int32_t>();
67 tag_ids = new std::unordered_map<std::string, int32_t>();
69 register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
70 register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
72 register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
73 register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
75 register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
76 register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
78 register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
79 xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
81 register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
82 xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
84 register_tag(xml_tree::CLOSE_TAG,
85 xml_tree::CLOSE_TAG_ID);
87 this->disable_text_index = disable_text_index;
88 if (!disable_text_index){
89 tc_builder = TextCollectionBuilder::create(sample_rate, idx_type);
90 text_positions = new bit_vector();
91 text_index_type = idx_type;
95 void xml_tree_builder::open_tag(std::string tag)
97 int32_t id = register_tag(tag);
100 if (!disable_text_index) text_positions->set_le(text_positions->size(), false);
103 void xml_tree_builder::close_tag(std::string)
105 xml_tree::tag_t t = xml_tree::CLOSE_TAG_ID;
107 par->push_back(false);
108 if (!disable_text_index) text_positions->set_le(text_positions->size(), false);
111 void xml_tree_builder::text(std::string s)
113 if (!disable_text_index){
114 if (s.empty()) s = "\001";
115 tc_builder->InsertText((const unsigned char *) s.c_str());
116 text_positions->set_le(text_positions->size() - 1, true);
120 xml_tree *xml_tree_builder::close_document()
125 auto tag_ids_ = tag_ids;
127 auto tc_builder_ = tc_builder;
128 auto text_positions_ = text_positions;
134 return new xml_tree(tags_, tag_ids_, par_,
141 throw std::runtime_error("xml_tree_builder: inconsistent parser state");