1 #include "xml-tree-builder.hpp"
7 xml_tree_builder::xml_tree_builder()
14 disable_text_index = false;
18 xml_tree_builder::~xml_tree_builder()
23 void xml_tree_builder::reset()
28 if (!disable_text_index){
30 delete text_positions;
35 int32_t xml_tree_builder::register_tag(std::string tag, int32_t id)
37 auto found = tag_ids->find(tag);
39 if (found == tag_ids->end()) {
40 if (id != current_tag)
41 throw std::runtime_error("xml-tree-builder: inconsistant tag id");
43 tag_ids->insert(std::make_pair(tag, id));
51 int32_t xml_tree_builder::register_tag(std::string tag)
53 return register_tag(tag, current_tag);
57 xml_tree_builder::open_document(bool disable_text_index,
58 unsigned int sample_rate,
59 TextCollectionBuilder::index_type_t idx_type)
63 par = new bit_vector();
64 tags = new std::vector<int32_t>();
66 tag_ids = new std::unordered_map<std::string, int32_t>();
68 register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
69 register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
71 register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
72 register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
74 register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
75 register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
77 register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
78 xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
80 register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
81 xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
84 this->disable_text_index = disable_text_index;
85 if (!disable_text_index){
86 tc_builder = TextCollectionBuilder::create(sample_rate, idx_type);
87 text_positions = new bit_vector();
88 text_index_type = idx_type;
92 void xml_tree_builder::open_tag(std::string tag)
94 int32_t id = register_tag(tag);
97 if (!disable_text_index) text_positions->push_back(false);
100 void xml_tree_builder::close_tag(std::string)
102 xml_tree::tag_t t = xml_tree::CLOSE_TAG_ID;
104 par->push_back(false);
105 if (!disable_text_index) text_positions->push_back(false);
108 void xml_tree_builder::text(std::string s)
110 if (!disable_text_index){
111 if (s.empty()) s = "\001";
112 tc_builder->InsertText((const unsigned char *) s.c_str());
113 text_positions->set(text_positions->size() - 1, true);
117 xml_tree *xml_tree_builder::close_document()
122 auto tag_ids_ = tag_ids;
124 auto tc_builder_ = tc_builder;
125 auto text_positions_ = text_positions;
131 return new xml_tree(tags_, tag_ids_, par_,
138 throw std::runtime_error("xml_tree_builder: inconsistent parser state");