19ce5dcebad230b02addcecce75ee7066abedb61
[SXSI/XMLTree.git] / xml-tree-builder.cpp
1 #include "xml-tree-builder.hpp"
2 #include <stdexcept>
3 #include <utility>
4
5 using namespace SXSI;
6
7 xml_tree_builder::xml_tree_builder()
8 {
9   opened = false;
10   par = 0;
11   tags = 0;
12   tag_ids = 0;
13   text_positions = 0;
14   disable_text_index = false;
15   tc_builder = 0;
16 }
17
18 xml_tree_builder::~xml_tree_builder()
19 {
20   if (opened) reset();
21 }
22
23 void xml_tree_builder::reset()
24 {
25   delete par;
26   delete tags;
27   delete tag_ids;
28   if (!disable_text_index){
29     delete tc_builder;
30     delete text_positions;
31   };
32 }
33
34
35 int32_t xml_tree_builder::register_tag(std::string tag, int32_t id)
36 {
37   auto found = tag_ids->find(tag);
38
39   if (found == tag_ids->end()) {
40     if (id != current_tag)
41       throw std::runtime_error("xml-tree-builder: inconsistant tag id");
42
43     tag_ids->insert(std::make_pair(tag, id));
44     current_tag++;
45     return id;
46   } else
47     return found->second;
48
49 }
50
51 int32_t xml_tree_builder::register_tag(std::string tag)
52 {
53   return register_tag(tag, current_tag);
54 }
55
56 void
57 xml_tree_builder::open_document(bool disable_text_index,
58                                 unsigned int sample_rate,
59                                 TextCollectionBuilder::index_type_t idx_type)
60 {
61   if (opened) reset();
62   opened = true;
63   par = new bit_vector();
64   tags = new std::vector<int32_t>();
65   current_tag = 0;
66   tag_ids = new std::unordered_map<std::string, int32_t>();
67
68   register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
69   register_tag(xml_tree::DOCUMENT_OPEN_TAG, xml_tree::DOCUMENT_OPEN_TAG_ID);
70
71   register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
72   register_tag(xml_tree::ATTRIBUTE_OPEN_TAG, xml_tree::ATTRIBUTE_OPEN_TAG_ID);
73
74   register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
75   register_tag(xml_tree::PCDATA_OPEN_TAG, xml_tree::PCDATA_OPEN_TAG_ID);
76
77   register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
78                xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
79
80   register_tag(xml_tree::ATTRIBUTE_DATA_OPEN_TAG,
81                xml_tree::ATTRIBUTE_DATA_OPEN_TAG_ID);
82
83
84   this->disable_text_index = disable_text_index;
85   if (!disable_text_index){
86     tc_builder = TextCollectionBuilder::create(sample_rate, idx_type);
87     text_positions = new bit_vector();
88     text_index_type = idx_type;
89   };
90 }
91
92 void xml_tree_builder::open_tag(std::string tag)
93 {
94   int32_t id = register_tag(tag);
95   tags->push_back(id);
96   par->push_back(true);
97   if (!disable_text_index) text_positions->set_le(text_positions->size(), false);
98 }
99
100 void xml_tree_builder::close_tag(std::string)
101 {
102   xml_tree::tag_t t = xml_tree::CLOSE_TAG_ID;
103   tags->push_back(t);
104   par->push_back(false);
105   if (!disable_text_index) text_positions->set_le(text_positions->size(), false);
106 }
107
108 void xml_tree_builder::text(std::string s)
109 {
110   if (!disable_text_index){
111     if (s.empty()) s = "\001";
112     tc_builder->InsertText((const unsigned char *) s.c_str());
113     text_positions->set_le(text_positions->size() - 1, true);
114   }
115 }
116
117 xml_tree *xml_tree_builder::close_document()
118 {
119   if (opened) {
120     opened = false;
121     auto tags_ = tags;
122     auto tag_ids_ = tag_ids;
123     auto par_ = par;
124     auto tc_builder_ = tc_builder;
125     auto text_positions_ = text_positions;
126     tc_builder = 0;
127     text_positions = 0;
128     tags = 0;
129     tag_ids = 0;
130     par = 0;
131     return new xml_tree(tags_, tag_ids_, par_,
132                         disable_text_index,
133                         tc_builder_,
134                         text_index_type,
135                         text_positions_);
136   };
137
138   throw std::runtime_error("xml_tree_builder: inconsistent parser state");
139 }
140