CAMLparam2(bitmap, nsize);
size_t bits = Long_val(nsize);
size_t bytes = (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int);
- fprintf(stderr, "Growing to: %lu bytes\n", (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int));
unsigned int * buffer = (unsigned int*) realloc((void *) bitmap, bytes);
if (buffer == NULL)
CAMLRAISEMSG("BP: cannot reallocate memory");
int f1 = Int_val(file);
int f2 = dup(f1);
FILE * fd = fdopen(f2, "a");
- fprintf(stderr, "Writing %i %p bytes\n", ((B->n+D-1)/D)*8, B );
fflush(stderr);
if (fd == NULL)
CAMLRAISEMSG("Error saving bp file");
loop 1 l
in
Hashtbl.iter (fun lhs rhs ->
- let Node( head, _ ) = lhs in
+ let Node( head, args ) = lhs in
let Node( tag1, params) = rhs in
let tag2, pos2 = pos_id2 params in
let id1 = h_find ~msg:"7" tag_to_id tag1
- and id2 = h_find ~msg:"8" tag_to_id tag2
- in
+ and id2 = h_find ~msg:"8" tag_to_id tag2 in
let rule_ = id2 lsl 27 in
let rule_ = (rule_ lor id1) lsl 2 in
let rule_ = (rule_ lor pos2) lsl 2 in
- let rule_ = rule_ lor (List.length params) in
+ let rule_ = (rule_ lor (List.length params)) lsl 2 in
+ let rule_ = rule_ lor (List.length args) in
r_array.((h_find ~msg:"9" tag_to_id head) - rules_offset ) <- rule_
) rules;
(*let l = Array.length renum_tags in *)
let load f =
let cin = open_in f in
- let pr_pos () =
- Printf.eprintf "Position: %i kiB\n" (pos_in cin / 1024)
- in
let read () = Marshal.from_channel cin in
if read () != _GRAMMAR_MAGIC then failwith "Invalid grammar file";
if read () != _GRAMMAR_VERSION then failwith "Deprecated grammar format";
- pr_pos();
let tags : int array = read () in
- pr_pos();
let rules : int array = read () in
- pr_pos();
let rules_offset : int = read () in
- pr_pos();
let tag_to_id : (string, int) Hashtbl.t = read () in
- pr_pos();
let tag_of_id : string array = read () in
- pr_pos();
let fd = Unix.descr_of_in_channel cin in
let pos = pos_in cin in
ignore(Unix.lseek fd pos Unix.SEEK_SET);
type tn_symbol = [ any_type ] Node.t
+type partial =
+ Leaf of node
+ | Node of tn_symbol * partial array
+
+
let is_nil (t : t_symbol) =
(Node.to_int t) == 4
let translate_tag _ t = if t == 4 then ~-1 else t
let to_string t tag =
if tag < Array.length t.tag_of_id then t.tag_of_id.(Tag.to_int tag)
- else "<!INVALIDTAG!>"
+ else "<!INVALID TAG!>"
let register_tag t s =
try Hashtbl.find t.tag_to_id s with
}
let start_root : node = Node.of_int 0
-let start_tag t (idx : node) =
- t.tags.(Bp.preorder_rank t.start (Node.to_int idx))
+let start_tag g (idx : node) : [<any_type] Node.t =
+ Node.of_int (g.tags.(Bp.preorder_rank g.start (Node.to_int idx)))
+
+;;
let start_first_child t (idx : node) =
- Bp.first_child t.start (Node.to_int idx)
+ Node.of_int (Bp.first_child t.start (Node.to_int idx))
let start_next_sibling t (idx : node) =
- Bp.next_sibling t.start (Node.to_int idx)
+ Node.of_int (Bp.next_sibling t.start (Node.to_int idx))
let is_non_terminal t (n : [< any_type ] Node.t) =
let n = Node.to_int n in
let is_terminal t (n : [< any_type ] Node.t) = not(is_non_terminal t n)
external terminal : [< any_type ] Node.t -> t_symbol = "%identity"
-external non_terminal : [< any_type ] Node.t -> t_symbol = "%identity"
+external non_terminal : [< any_type ] Node.t -> n_symbol = "%identity"
let tag (n : t_symbol) : Tag.t = Obj.magic n
let get_id1 (r : rhs) : tn_symbol =
Node.of_int(
- ((Node.to_int r) lsr 4) land 0x7ffffff)
+ ((Node.to_int r) lsr 6) land 0x7ffffff)
let get_id2 (r : rhs) : tn_symbol =
- Node.of_int((Node.to_int r) lsr 31)
+ Node.of_int((Node.to_int r) lsr 33)
+
+let get_rank (r : rhs) : int =
+ (Node.to_int r) land 0b11
-let get_param_pos (r : rhs) : int =
+let get_id1_rank (r : rhs) : int =
((Node.to_int r) lsr 2) land 0b11
-let num_params (r : rhs) : int =
- (Node.to_int r) land 0b11
+let get_id2_pos (r : rhs) : int =
+ ((Node.to_int r) lsr 4) land 0b11
+let get_id2_rank (r : rhs) : int =
+ get_rank r + 1 - get_id1_rank r
type t
+
+type node = [ `Start ] Node.t
+
+type n_type = [ `NonTerminal ]
+type t_type = [ `Terminal ]
+type r_type = [ `Rule ]
+type any_type = [ n_type | t_type ]
+type rhs = [ r_type ] Node.t
+
+type n_symbol = n_type Node.t
+type t_symbol = t_type Node.t
+type tn_symbol = [ any_type ] Node.t
+
+
+type partial =
+ Leaf of node
+ | Node of tn_symbol * partial array
+
+
val parse : string -> t
val save : t -> string -> unit
val load : string -> t
+val tag_operations : t -> Tag.operations
+
+val start_tag : t -> node -> tn_symbol
+val is_terminal : t -> [< any_type ] Node.t -> bool
+val is_non_terminal : t -> [< any_type ] Node.t -> bool
+external terminal : [< any_type ] Node.t -> t_symbol = "%identity"
+external non_terminal : [< any_type ] Node.t -> n_symbol = "%identity"
+val nil_symbol : t_symbol
+val tag : t_symbol -> Tag.t
+val start_first_child : t -> node -> node
+val start_next_sibling : t -> node -> node
+val get_rule : t -> n_symbol -> rhs
+val get_rank : rhs -> int
+val get_id1_rank : rhs -> int
+val get_id2_rank : rhs -> int
+val get_id2_pos : rhs -> int
+val get_id1 : rhs -> tn_symbol
+val get_id2 : rhs -> tn_symbol
let default_gc = Gc.get()
let tuned_gc = { default_gc with
Gc.minor_heap_size = 32*1024*1024;
- Gc.major_heap_increment = 8*1024*1024;
+ Gc.major_heap_increment = 8*1024*1024;
Gc.max_overhead = 1000000;
Gc.space_overhead = 100;
}
let () = Options.parse_cmdline()
;;
+let _ =
+ try
+ Printexc.record_backtrace true;
-let document =
- if Filename.check_suffix !Options.input_file ".g" then
- let g = Grammar2.parse !Options.input_file in
- let () = Grammar2.save g "/tmp/test.g" in
- let g = Grammar2.load "/tmp/test.g" in
- ignore g;
- exit 3
-
- else if Filename.check_suffix !Options.input_file ".g.bin" then
- let g = time ~msg:"Loading grammar" (Grammar.load !Options.input_file) true in
- begin
+ let document =
+ if Filename.check_suffix !Options.input_file ".g.bin" ||
+ Filename.check_suffix !Options.input_file ".g"
+ then
+ let is_index = Filename.check_suffix !Options.input_file ".g.bin" in
+ let g =
+ if is_index then
+ time ~msg:"Loading grammar" (Grammar2.load) !Options.input_file
+ else
+ let g = time ~msg:"Parsing grammar file" Grammar2.parse !Options.input_file in
+ if !Options.save_file <> "" then
+ time ~msg:"Saving index" (Grammar2.save g) !Options.save_file;
+ g
+ in
+ begin
(* Todo Factorise with main *)
- Tag.init (Grammar.tag_operations g);
- let query =
- time ~msg:"Parsing query" XPath.parse !Options.query
- in
- if !Options.verbose then begin
- Printf.eprintf "Parsed query:\n%!";
- XPath.Ast.print Format.err_formatter query;
- Format.fprintf Format.err_formatter "\n%!"
- end;
- let auto, bu_info =
- time ~msg:"Compiling query" (Compile.compile) query
- in
- if !Options.verbose then Ata.print Format.err_formatter auto;
- Gc.full_major();
- Gc.compact();
- Gc.set (tuned_gc);
- let runtime =
- let module R = ResJIT.Count in
- let module M = Runtime.Make(R) in
+ Tag.init (Grammar2.tag_operations g);
+ let query =
+ time ~msg:"Parsing query" XPath.parse !Options.query
+ in
+ if !Options.verbose then begin
+ Printf.eprintf "Parsed query:\n%!";
+ XPath.Ast.print Format.err_formatter query;
+ Format.fprintf Format.err_formatter "\n%!"
+ end;
+ let auto, bu_info =
+ time ~msg:"Compiling query" (Compile.compile) query
+ in
+ if !Options.verbose then Ata.print Format.err_formatter auto;
+ Gc.full_major();
+ Gc.compact();
+ Gc.set (tuned_gc);
+ let runtime =
+ let module R = ResJIT.Count in
+ let module M = Runtime.Make(R) in
(* mk_runtime run auto doc arg count print outfile *)
- mk_runtime M.grammar_run auto (Obj.magic g) () R.NS.length (Obj.magic R.NS.serialize) None
- in
- runtime ();
- exit 0
- end
- else if Filename.check_suffix !Options.input_file ".srx"
- then
- time
- ~msg:"Loading file"
- (Tree.load
- ~sample:!Options.sample_factor
- ~load_text:true)
- !Options.input_file
- else
- let v =
- time
- ~msg:"Parsing document"
- (Tree.parse_xml_uri)
- !Options.input_file
- in
- let () =
- if !Options.save_file <> ""
+ mk_runtime M.grammar_run auto (Obj.magic g) () R.NS.length (Obj.magic R.NS.serialize) None
+ in
+ runtime ();
+ exit 0
+ end
+ else if Filename.check_suffix !Options.input_file ".srx"
then
time
- ~msg:"Writing file to disk"
- (Tree.save v)
- !Options.save_file;
+ ~msg:"Loading file"
+ (Tree.load
+ ~sample:!Options.sample_factor
+ ~load_text:true)
+ !Options.input_file
+ else
+ let v =
+ time
+ ~msg:"Parsing document"
+ (Tree.parse_xml_uri)
+ !Options.input_file
+ in
+ let () =
+ if !Options.save_file <> ""
+ then
+ time
+ ~msg:"Writing file to disk"
+ (Tree.save v)
+ !Options.save_file;
+ in
+ v
in
- v
-in
- try
- (*Printexc.record_backtrace true; *)
main document !Options.query !Options.output_file;
if !Options.verbose then Printf.eprintf "Maximum resident set size: %s\n" (read_procmem());
Gc.full_major();
Profile.summary Format.err_formatter
with
- | Ulexer.Loc.Exc_located ((x,y),e) ->
- Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);
- exit 1
-
- | e ->
- output_string stderr "\n";
- flush stderr;
- Printexc.print_backtrace stderr;
- Printf.eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e);
- output_string stderr "\n";
- flush stderr;
-(* Ptset.Int.stats(); *)
- exit 2
+ | Ulexer.Loc.Exc_located ((x,y),e) ->
+ Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);
+ exit 1
+
+ | e ->
+ output_string stderr "\n";
+ flush stderr;
+ Printexc.print_backtrace stderr;
+ Printf.eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e);
+ output_string stderr "\n";
+ flush stderr;
+ exit 2
type result_set
val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set
val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set
- val grammar_run : Ata.t -> Grammar.t -> unit -> result_set
+ val grammar_run : Ata.t -> Grammar2.t -> unit -> result_set
end
(* Grammar run *)
+
external is_young : 'a array -> bool = "caml_custom_is_young" "noalloc"
external blit : 'a array -> int -> 'a array -> int -> int -> unit = "caml_custom_array_blit"
module M = Map.Make(struct type t = Grammar.n_symbol let compare = compare end)
let grammar_run auto g () =
- let start_symbol = Node.of_int 0 in
- let dummy_leaf = Grammar.Leaf (Node.nil) in
- let nil_symbol = Grammar.nil_symbol g in
+ let dummy_leaf = Grammar2.Leaf (Node.nil) in
let res_len = (StateSet.max_elt auto.states) + 1 in
let empty_slot = Array.create res_len U.NS.empty in
let nil_res = auto.bottom_states, empty_slot in
if idx < Node.null then nil_res
else if StateSet.is_empty states then empty_res
else begin
- let symbol = Grammar.get_symbol_at g start_symbol idx in
- if Grammar.is_terminal symbol then
- let symbol = Grammar.terminal symbol in
- if symbol == nil_symbol then nil_res else
- let tag = Grammar.tag symbol in
+ let symbol = Grammar2.start_tag g idx in
+ if Grammar2.is_terminal g symbol then
+ let symbol = Grammar2.terminal symbol in
+ if symbol == Grammar2.nil_symbol then nil_res else
+ let tag = Grammar2.tag symbol in
let lst, rst, trans = get_trans tag states in
- let fs = Grammar.start_first_child g idx in
+ let fs = Grammar2.start_first_child g idx in
let s1, slot1 = start_loop fs lst in
- let s2, slot2 = start_loop (Grammar.start_next_sibling g fs) rst in
+ let s2, slot2 = start_loop (Grammar2.start_next_sibling g fs) rst in
let opcode = L3JIT.find cache3 trans s1 s2 in
if opcode == L3JIT.dummy then
(L3JIT.cache_apply cache3 auto trans s1 s2) empty_slot slot1 slot2 (Obj.magic ()) (Obj.magic ())
else opcode empty_slot slot1 slot2 (Obj.magic ()) (Obj.magic())
else
- let nt = Grammar.non_terminal symbol in
- let nparam = Grammar.num_params nt in
- let child = ref (Grammar.first_child g start_symbol idx) in
+ let nt = Grammar2.non_terminal symbol in
+ let rhs = Grammar2.get_rule g nt in
+ let nparam = Grammar2.get_rank rhs in
+ let child = ref (Grammar2.start_first_child g idx) in
for i = 0 to nparam - 1 do
let c = !child in
- parameters.(i) <- Grammar.Leaf c;
- child := Grammar.next_sibling g start_symbol c;
+ parameters.(i) <- Grammar2.Leaf c;
+ child := Grammar2.start_next_sibling g c;
done;
rule_loop nt states parameters
end
- and rule_loop (t : Grammar.n_symbol) states a_param =
+ and rule_loop (t : Grammar2.n_symbol) states a_param =
incr rule_counter;
-(* log_symbol (t); *)
if !rule_counter land (4095) == 0 then begin Gc.minor() end;
- let id1 = Grammar.get_id1 g t in
- let id2 = Grammar.get_id2 g t in
- let param_pos = Grammar.get_param_pos t in
- let nparam1 = Grammar.num_children id1 in
- let nparam2 =
- if Grammar.is_terminal id2 && nil_symbol == (Grammar.terminal id2) then 0
- else Grammar.num_children id2
- in
+ let rhs = Grammar2.get_rule g t in
+ let id1 = Grammar2.get_id1 rhs in
+ let id2 = Grammar2.get_id2 rhs in
+ let param_pos = Grammar2.get_id2_pos rhs in
+ let nparam1 = Grammar2.get_id1_rank rhs in
+ let nparam2 = Grammar2.get_id2_rank rhs in
let a_param2 = if nparam2 == 0 then [||] else Array.create nparam2 dummy_leaf in
let i = param_pos - 2 in
let ip1 = i + 1 in
let offset2d = i+2 in
let offset2s = i+nparam2 + 1 in
blit a_param 0 parameters_tmp 0 (i+1);
- parameters_tmp.(ip1) <- Grammar.Node(id2, a_param2); (* id2( ... ) *)
+ parameters_tmp.(ip1) <- Grammar2.Node(id2, a_param2); (* id2( ... ) *)
blit a_param offset2s parameters_tmp offset2d (nparam1 - i - 2);
blit a_param ip1 a_param2 0 nparam2;
blit parameters_tmp 0 parameters 0 nparam1;
- if Grammar.is_non_terminal id1 then
- let id1 = Grammar.non_terminal id1 in
+ if Grammar2.is_non_terminal g id1 then
+ let id1 = Grammar2.non_terminal id1 in
rule_loop id1 states parameters
else
- let id1 = Grammar.terminal id1 in
+ let id1 = Grammar2.terminal id1 in
terminal_loop id1 states parameters
- and terminal_loop (symbol : Grammar.t_symbol) states a_param =
- if symbol == nil_symbol then nil_res else begin
+ and terminal_loop (symbol : Grammar2.t_symbol) states a_param =
+ if symbol == Grammar2.nil_symbol then nil_res else begin
(* todo factor in from start_loop *)
- let tag = Grammar.tag symbol in
+ let tag = Grammar2.tag symbol in
let lst, rst, trans = get_trans tag states in
let next = a_param.(1) in
let s1, slot1 = partial_loop a_param.(0) lst in
else
opcode empty_slot slot1 slot2 (Obj.magic()) (Obj.magic())
- (* End: TODO refactor *)
-
end
and partial_loop l states =
match l with
- | Grammar.Leaf id -> start_loop id states
- | Grammar.Node (id, a_param) ->
- let is_term = Grammar.is_terminal id in
+ | Grammar2.Leaf id -> start_loop id states
+ | Grammar2.Node (id, a_param) ->
+ let is_term = Grammar2.is_terminal g id in
if is_term then
- terminal_loop (Grammar.terminal id) states a_param
+ terminal_loop (Grammar2.terminal id) states a_param
else
- rule_loop (Grammar.non_terminal id) states a_param
+ rule_loop (Grammar2.non_terminal id) states a_param
in
let _, slot = start_loop (Node.null) auto.init in
type result_set
val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set
val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set
- val grammar_run : Ata.t -> Grammar.t -> unit -> result_set
+ val grammar_run : Ata.t -> Grammar2.t -> unit -> result_set
end
module Make (U : ResJIT.S) : S with type result_set = U.NS.t