#include <cstdint>
+#include <unordered_set>
#include <unordered_map>
-#include <bp/bp.h>
-#include <bp/bp-darray.h>
+#include <libbp/bp.h>
+#include <libbp/bp-darray.h>
#include <libcds/includes/basics.h>
#include <libcds/includes/static_bitsequence.h>
#include <libcds/includes/alphabet_mapper.h>
~xml_tree();
//Counting functions
+ /**
+ * [size()] returns the size of the tree (number of nodes)
+ * Runs in O(1)
+ */
inline uint32_t size() const;
+
+ /**
+ * [num_tags()] returns the number of distinct tags.
+ * Runs in O(1)
+ */
inline uint32_t num_tags() const;
+
+ /**
+ * [subtree_size(n)] returns the size of the subtree (number of nodes)
+ * rooted at n.
+ * Runs in O(1)
+ */
inline uint32_t subtree_size(node_t) const;
+
+ /**
+ * [subtree_tags(n, t)] returns the number of occurences of tag [t] in the
+ * subtree rooted at [n]
+ * Runs in O(1)
+ */
inline uint32_t subtree_tags(node_t, tag_t) const;
- inline uint32_t subtree_elements(node_t, tag_t*) const;
+
+ /**
+ * [subtree_elements(n)] returns the number of element nodes below [n]
+ * Runs in O(attribute_ids->size()+3)
+ */
+ inline uint32_t subtree_elements(node_t) const;
+
+ /**
+ * [num_children(n)] returns the number of child nodes of [n]
+ * (both text and elements, and including a fake <@> node if
+ * present).
+ * Runs in O(1) (?)
+ */
uint32_t num_children(node_t) const;
+
+ /**
+ * [child_pos(n)] returns the position of [n] amongst its siblings
+ * Runs in O(1) (?)
+ */
uint32_t child_pos(node_t) const;
//Node_T tests
+ /**
+ * [is_leaf(n)] returns true if [n] is a leaf (i.e. if [num_children(n)]
+ * returns 0
+ * Runs in O(1)
+ */
inline bool is_leaf(node_t) const;
+
+ /**
+ * [is_ancestor(n, m)] returns true if [n] is an ancestor of [m], false
+ * otherwise
+ * Runs in O(1)
+ */
inline bool is_ancestor(node_t, node_t) const;
+
+ /**
+ * [is_right_descendant(n, m)] returns true if [m] is a descendant-or-self of
+ * a following-sibling of [n], false otherwise
+ * Runs in O(1)
+ */
inline bool is_right_descendant(node_t, node_t) const;
+
+ /**
+ * [is_child(n, m)] returns true if [m] is a child of [n]
+ * Runs in O(1)
+ */
bool is_child(node_t, node_t) const;
+
+ /**
+ * [is_first_child(n, m)] returns true if [m] is the first child of [n]
+ * Runs in O(1)
+ */
inline bool is_first_child(node_t) const;
+
+ /**
+ * [is_nil(n)] returns true if [n] is equal to xml_tree::NIL
+ * Runs in O(1)
+ */
inline bool is_nil(node_t) const;
+
+ /**
+ * [is_open(n)] returns true if [n], seen as an index in the
+ * underlying BP representation corresponds to an opening parenthesis.
+ * Runs in O(1)
+ */
inline bool is_open(node_t) const;
+ //Numbering functions
+ /**
+ * [depth(n)] returns the depths of node [n]. The root has depth 1.
+ * Runs in O(1)
+ */
uint32_t depth(node_t) const;
+
+ /**
+ * [preorder(n)] returns the preorder of node [n]. Equivalent to calling
+ * rank on the underlying BP representation.
+ * Runs in O(1)
+ */
uint32_t preorder(node_t) const;
+
+ /**
+ * [preorder(n)] returns the postorder of node [n].
+ * Runs in O(1)
+ */
uint32_t postorder(node_t) const;
//Tag functions
+ /**
+ * [tag(n)] returns the tag of node [n] which must be a valid node identifier
+ * (in particular not NIL)
+ * Runs in O(1)
+ */
inline tag_t tag(node_t) const;
+
+ /**
+ * [get_tag_name_by_ref(t)] returns the string representation of tag [t]
+ * For elements, the string representation is the tag name itself
+ * Returns <!INVALID!> if [t] is not a proper tag identifier.
+ * Runs in O(1)
+ */
const char* get_tag_name_by_ref(tag_t) const;
+
+ /**
+ * [register_tag(s)] returns the tag identifier for the tag represented
+ * by the string [s]. If no such tag exists in the document, return a
+ * fresh tag identifier [i]. Subsequent calls with the same [s] will return
+ * the same identifier.
+ * Runs in O(1)
+ */
tag_t register_tag(char *s);
//Navigation functions
SXSI::TextCollection::document_result contains(uchar const *s) const;
SXSI::TextCollection::document_result less_than(uchar const *s) const;
+
+ bool naive_contains(node_t, uchar const *s) const;
+
//I/O functions
void save(int, char*);
static xml_tree* load(int, char*, bool, int);
//Mapping from tag_t identifiers to/from tagnames
std::vector<std::string> *tag_names;
std::unordered_map<std::string, tag_t> *tag_ids;
+ //Set of tag ids that map to attribute nodes
+ std::unordered_set<tag_t> *attribute_ids;
//Text index
SXSI::TextCollection *text_collection;
static_bitsequence *text_positions;