X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2Ftree.ml;h=31e2f79b2c5290f84fb1a857284e5bf440ebc852;hb=7d26a502050836784010cabae6acedee8aa9a46c;hp=ec253bcdae8345849eb0c5680d4a45c897a66096;hpb=0da8c3c7c76ab06d5ccfc6ae52488d7549735059;p=SXSI%2Fxpathcomp.git diff --git a/src/tree.ml b/src/tree.ml index ec253bc..31e2f79 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -572,7 +572,7 @@ let parse_xml_string str = node_of_t (TreeBuilder.parse_string str) let size t = tree_size t.doc;; let magic_string = "SXSI_INDEX" -let version_string = "3" +let version_string = "4" let pos fd = Unix.lseek fd 0 Unix.SEEK_CUR @@ -633,7 +633,7 @@ let load ?(sample=64) ?(load_text=true) str = let _ = set_binary_mode_in in_c true in let load_table () = (let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file"); - (let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file"); + (let vs = input_line in_c in if vs <> version_string then failwith "Unsupported index format"); let c = load_tag_table in_c in let s = load_tag_table in_c in let d = load_tag_table in_c in @@ -785,7 +785,7 @@ let stats tree = in traverse tree.doc root [] 0; let sumdepth = Hashtbl.fold (fun p c acc -> (List.length p) * c + acc) h 0 in - + let alltags = Ptset.Int.union tree.elements tree.attributes in Logger.print err_formatter "Statistics :@\n\ Average depth: %f@\n\ Longest path: %i@\n\ @@ -793,14 +793,16 @@ Number of distinct paths: %i@\n\ Number of nodes: %i@\n\ Number of leaves: %i@\n\ Number of pcdata/cdata nodes: %i@\n\ -Number of distinct tags: %i@\n@?" +Number of distinct tags: %i@\n\ +Largest tag id: %i@\n@?" (float_of_int sumdepth /. float_of_int !numleaves) !depth (Hashtbl.length h) (tree_subtree_size tree.doc root) !numleaves !numtexts - (Ptset.Int.cardinal (Ptset.Int.union tree.elements tree.attributes)) + (Ptset.Int.cardinal alltags) + (Ptset.Int.max_elt alltags) (* Logger.print err_formatter "Average depth: %f, number of leaves %i@\n@?" ((float_of_int a)/. (float_of_int b)) b