X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xml-tree.hpp;fp=xml-tree.hpp;h=950f0ef4711d99ff730b16e7b5f25859238320eb;hb=c6266d8fd1872fad45b18d3d554410d080b65099;hp=0000000000000000000000000000000000000000;hpb=03125db103d98200f7a313a38db54c56748283d1;p=SXSI%2FXMLTree.git diff --git a/xml-tree.hpp b/xml-tree.hpp new file mode 100644 index 0000000..950f0ef --- /dev/null +++ b/xml-tree.hpp @@ -0,0 +1,159 @@ +#ifndef XML_TREE_HPP_ +#define XML_TREE_HPP_ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bit-vector.hpp" + +#undef W +#undef Wminusone +#undef WW + +#include +#include + +class xml_tree_builder; + +class xml_tree { +public: + typedef int32_t node_t; + typedef int32_t tag_t; + + static const node_t NIL = -1; + static const node_t ROOT = 0; + + static const char* NIL_TAG; + static const tag_t NIL_TAG_ID = -1; + static const char* DOCUMENT_OPEN_TAG; + static const tag_t DOCUMENT_OPEN_TAG_ID = 0; + static const char* ATTRIBUTE_OPEN_TAG; + static const tag_t ATTRIBUTE_OPEN_TAG_ID = 1; + static const char* PCDATA_OPEN_TAG; + static const tag_t PCDATA_OPEN_TAG_ID = 2; + static const char* ATTRIBUTE_DATA_OPEN_TAG; + static const tag_t ATTRIBUTE_DATA_OPEN_TAG_ID = 3; + static const char* CLOSE_TAG; + static const tag_t CLOSE_TAG_ID = 4; + + + ~xml_tree(); + + //Counting functions + inline uint32_t size() const; + inline uint32_t num_tags() const; + inline uint32_t subtree_size(node_t) const; + inline uint32_t subtree_tags(node_t, tag_t) const; + uint32_t subtree_elements(node_t) const; + uint32_t num_children(node_t) const; + uint32_t child_pos(node_t) const; + + + //Node_T tests + inline bool is_leaf(node_t) const; + inline bool is_ancestor(node_t, node_t) const; + inline bool is_right_descendant(node_t, node_t) const; + bool is_child(node_t, node_t) const; + inline bool is_first_child(node_t) const; + inline bool is_nil(node_t) const; + + uint32_t depth(node_t) const; + uint32_t preorder(node_t) const; + uint32_t postorder(node_t) const; + + //Tag functions + inline tag_t tag(node_t) const; + const char* get_tag_name_by_ref(tag_t) const; + tag_t register_tag(char *s); + + //Navigation functions + inline node_t root () const; + node_t child(node_t, uint32_t) const; + inline node_t parent(node_t) const; + inline node_t first_child(node_t) const; + inline node_t last_child(node_t) const; + inline node_t next_sibling(node_t) const; + inline node_t prev_sibling(node_t) const; + inline node_t first_element(node_t) const; + inline node_t next_element(node_t) const; + inline node_t tagged_next(node_t, tag_t) const; + inline node_t tagged_descendant(node_t, tag_t) const; + inline node_t tagged_following_before(node_t, tag_t, node_t) const; + inline node_t tagged_child(node_t, tag_t) const; + inline node_t tagged_sibling(node_t, tag_t) const; + node_t select_child(node_t, std::unordered_set*) const; + node_t select_descendant(node_t, std::unordered_set*) const; + node_t select_sibling(node_t, std::unordered_set*) const; + node_t select_following_before (node_t, + std::unordered_set*, node_t) const; + inline node_t closing(node_t) const; + + //Text functions + inline node_t parent_node(int32_t) const; + inline SXSI::TextCollection *get_text_collection() const; + std::pair text_id_range(node_t) const; + int32_t text_id(node_t) const; + unsigned char* get_text(int32_t) const; + + SXSI::TextCollection::document_result prefix(uchar const *s) const; + SXSI::TextCollection::document_result suffix(uchar const *s) const; + SXSI::TextCollection::document_result equals(uchar const *s) const; + SXSI::TextCollection::document_result contains(uchar const *s) const; + SXSI::TextCollection::document_result less_than(uchar const *s) const; + + //I/O functions + void save(int, char*); + static xml_tree* load(int, char*, bool, int); + void print(int, node_t, bool no_text=false); + void flush(int); + +private: + friend class xml_tree_builder; + xml_tree(); + xml_tree(std::vector*, + std::unordered_map*, + bit_vector*, + bool, + SXSI::TextCollectionBuilder *, + SXSI::TextCollectionBuilder::index_type_t, + bit_vector*); + + //Parenthesis sequence + bp *par; + //tag sequence + static_sequence *tags; + uint32_t *tag_seq; + uint32_t tag_seq_len; + uint32_t bits_per_tag; + //Mapping from tag_t identifiers to/from tagnames + std::vector *tag_names; + std::unordered_map *tag_ids; + //Text index + SXSI::TextCollection *text_collection; + static_bitsequence *text_positions; + SXSI::TextCollectionBuilder::index_type_t text_index_type; + //auxiliary data structures for efficient printing. + std::vector *print_stack; + std::string *print_buffer; + +#define BUFFER_SIZE (8192*2) + + void uflush(int); + void uflush_r(int, size_t); + void uput_str(std::string s, int fd); + void uputs(const char* s, int fd); + void uputc(const char c, int fd); + size_t uprintf(const char*s, int fd); +}; + +#define XML_TREE_INTERNAL__ 1 +#include "xml-tree-inc.hpp" + +#endif //XML_TREE_HPP_