_build
*.native
*.byte
+doc/*.ps
+doc/*.out
+doc/html/*
\ No newline at end of file
--- /dev/null
+ocamldoc -d html/ -html -I ../_build/src/ -hide Camlp4 ../_build/src/*.ml
+ocamldoc -dot -I ../_build/src/ -hide Camlp4 ../_build/src/*.ml
+dot -Tps ocamldoc.out -o dep.ps
\ No newline at end of file
--- /dev/null
+./test.native ./tests/docs/tiny.xml '/child::site/child::regions'
+./test.native ./tests/docs/tiny.xml -f ./tests/queries/Treebank.xml.queries
+./test.native ./tests/docs/tiny.xml '/descendant::listitem[not(descendant::keyword/child::emph)]/descendant::parlist'
--- /dev/null
+(***********************************************************************)
+(* *)
+(* TAToo *)
+(* *)
+(* ? *)
+(* ? *)
+(* *)
+(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *)
+(* Recherche Scientifique. All rights reserved. This file is *)
+(* distributed under the terms of the GNU Lesser General Public *)
+(* License, with the special exception on linking described in file *)
+(* ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+open Format
+
+type state = State.t
+
+type label = QNameSet.t
+
+type formula = Formula.t
+
+module Transition =
+struct
+ type t = state * label * formula
+
+ let compare (st,la,f) (st',la',f') =
+ let x_1 = State.compare st st' in
+ if x_1 != 0 then x_1
+ else let x_2 = QNameSet.compare la la' in
+ if x_2 != 0 then x_2
+ else Formula.compare f f'
+ let st (st,la,f) = st
+ let la (st,la,f) = la
+ let fo (st,la,f) = f
+end
+
+module SetT =
+struct
+ include Set.Make(Transition)
+end
+
+type t = {
+ reco : StateSet.t;
+ selec : StateSet.t;
+ bottom : StateSet.t;
+ top : StateSet.t;
+ trans : SetT.t;
+}
+
+exception Not_found_transition
+exception Transition_not_injective
+
+let transition asta st lab =
+ let filter (s,l,f) =
+ (State.compare s st = 0) && (QNameSet.compare l lab = 0) in
+ let tr_set = SetT.elements (SetT.filter filter asta.trans) in
+ match tr_set with
+ | [] -> raise Not_found_transition
+ | x::y::z -> raise Transition_not_injective
+ | [l] -> Transition.fo l
+
+let transitions asta st =
+ let filter (s,l,f) = State.compare s st = 0 in
+ let rec remove_states l = match l with
+ | [] -> []
+ | (a,s,l) :: tl -> (s,l) :: (remove_states tl) in
+ remove_states (SetT.elements (SetT.filter filter asta.trans))
+
+let print fmt asta = ()
+
+let to_file out asta = ()
--- /dev/null
+(***********************************************************************)
+(* *)
+(* TAToo *)
+(* *)
+(* ? *)
+(* ? *)
+(* *)
+(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *)
+(* Recherche Scientifique. All rights reserved. This file is *)
+(* distributed under the terms of the GNU Lesser General Public *)
+(* License, with the special exception on linking described in file *)
+(* ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+(** Implementation of alternating selecting tree automata (ASTA) *)
+
+type state
+(** The type of states *)
+
+type label
+(** The type of labels of the transitions *)
+
+type formula
+(** The type of transition formulae *)
+
+type t
+(** The type of ASTAs *)
+
+val transition : t -> state -> label -> formula
+(** Give the formula which must hold for a current state and label *)
+
+val transitions : t -> state -> (label*formula) list
+(** Give the list of labels and formulae from transitions for a given state *)
+
+val print : Format.formatter -> t -> unit
+(** Describe the automaton as text *)
+
+val to_file : out_channel -> t -> unit
+(** Outputs the description of the automaton on the given out_channel *)
(***********************************************************************)
+(** use: xml_file "XPath querie"
+ or : xml_file -f XPath_querie_file
+ only the first line of XPath_querie_file is read
+*)
+
module F = Formula
(* to force ocaml build to add Formula to the dependency chain even if
we don't use it yet*)
-let query = XPath.parse Sys.argv.(2)
+let query =
+ let arg2 = Sys.argv.(2) in
+ if arg2 = "-f"
+ then let fq = open_in Sys.argv.(3) in
+ let q = XPath.parse_file fq in
+ close_in fq; q
+ else XPath.parse_string arg2
open Format
let is_left n = n.next_sibling == dummy
- let start_element_handler parser_ ctx tag attr_list =
+
+ let text_string = QName.to_string QName.text
+ let attr_map_string = QName.to_string QName.attribute_map
+
+ let rec start_element_handler parser_ ctx tag attr_list =
+ do_text parser_ ctx;
let parent = top ctx in
let n = { tag = QName.of_string tag;
preorder = next ctx;
in
if parent.first_child == dummy then parent.first_child <- n
else parent.next_sibling <- n;
- push n ctx
-
- let rec consume_closing ctx n =
+ push n ctx;
+ match attr_list with
+ [] -> ()
+ | _ ->
+ start_element_handler parser_ ctx attr_map_string [];
+ List.iter (do_attribute parser_ ctx) attr_list;
+ end_element_handler parser_ ctx attr_map_string
+
+ and do_attribute parser_ ctx (att, value) =
+ let att_tag = " " ^ att in
+ start_element_handler parser_ ctx att_tag [];
+ start_element_handler parser_ ctx text_string [];
+ let n = top ctx in n.data <- value;
+ end_element_handler parser_ ctx text_string;
+ end_element_handler parser_ ctx att_tag
+
+ and consume_closing ctx n =
if n.next_sibling != dummy then
let _ = pop ctx in consume_closing ctx (top ctx)
-
- let end_element_handler parser_ ctx tag =
+ and end_element_handler parser_ ctx tag =
+ do_text parser_ ctx;
let node = top ctx in
if node.first_child == dummy then node.first_child <- nil
else begin
consume_closing ctx node
end
- let character_data_handler parser_ _ t text =
- Buffer.add_string t text
+ and do_text parser_ ctx =
+ if Buffer.length ctx.text_buffer != 0 then
+ let s = Buffer.contents ctx.text_buffer in
+ Buffer.clear ctx.text_buffer;
+ start_element_handler parser_ ctx text_string [];
+ let node = top ctx in
+ node.data <- s;
+ end_element_handler parser_ ctx text_string
+
+
+
+ let character_data_handler parser_ ctx text =
+ Buffer.add_string ctx.text_buffer text
let create_parser () =
let ctx = { text_buffer = Buffer.create 512;
let parser_ = Expat.parser_create ~encoding:None in
Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx);
Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx);
+ Expat.set_character_data_handler parser_ (character_data_handler parser_ ctx);
push { tag = QName.document;
preorder = next ctx;
data = "";
let load_xml_file = Parser.parse_file
let load_xml_string = Parser.parse_string
+
+let output_escape_string out s =
+ for i = 0 to String.length s - 1 do
+ match s.[i] with
+ | '<' -> output_string out "<"
+ | '>' -> output_string out ">"
+ | '&' -> output_string out "&"
+ | '"' -> output_string out """
+ | '\'' -> output_string out "'"
+ | c -> output_char out c
+ done
+
+let rec print_attributes out tree_ node =
+ if node != nil then begin
+ output_string out (QName.to_string node.tag);
+ output_string out "=\"";
+ output_escape_string out node.first_child.data;
+ output_char out '"';
+ print_attributes out tree_ node.next_sibling
+ end
+
let rec print_xml out tree_ node =
if node != nil then
- let tag = QName.to_string node.tag in
- output_char out '<';
- output_string out tag;
- (* print attributes *)
- if node.first_child == nil then output_string out "/>"
- else begin
- output_char out '>';
- print_xml out tree_ node.first_child;
- output_string out "</";
- output_string out tag;
- output_char out '>'
- end;
+ let () =
+ if node.tag == QName.text then
+ output_escape_string out node.data
+ else
+ let tag = QName.to_string node.tag in
+ output_char out '<';
+ output_string out tag;
+ let fchild =
+ if node.first_child.tag == QName.attribute_map then
+ let () =
+ print_attributes out tree_ node.first_child.first_child
+ in
+ node.first_child.next_sibling
+ else
+ node.first_child
+ in
+ if fchild == nil then output_string out "/>"
+ else begin
+ output_char out '>';
+ print_xml out tree_ fchild;
+ output_string out "</";
+ output_string out tag;
+ output_char out '>'
+ end
+ in
print_xml out tree_ node.next_sibling
-
let root t = t.root
let first_child _ n = n.first_child
let next_sibling _ n = n.next_sibling
*)
- let parse = Gram.parse_string query (Ulexer.Loc.mk "<string>")
+ let parse_string = Gram.parse_string query (Ulexer.Loc.mk "<string>")
+ let parse_file fd = parse_string (input_line fd)
+
end
-let parse = Parser.parse
+let parse_string = Parser.parse_string
+let parse_file = Parser.parse_file
end
-val parse : string -> Ast.path
+val parse_string : string -> Ast.path
+val parse_file : in_channel -> Ast.path
<c/>
<d/>
<e>
- <f> <g/> <h/> </f>
+ <f id="1" value="2" > <g/> <h/> </f>
<i> </i>
</e>
<j> <k/> <l/> <m/> </j>
--- /dev/null
+/descendant::NP
+#%/descendant::*/child::NP
+#T03%/descendant::NP/descendant::VBZ
+T02%/descendant::S[descendant::VP and descendant::NP]/child::VP/child::PP[child::IN]/child::NP/child::VBN
+T03%/descendant::NP[descendant::JJ or descendant::CC]
+T04%/descendant::CC[ not(descendant::JJ) ]
+#%/descendant::S/child::VP/child::PP[child::NP/child::VBN]/child::IN
+T05%/descendant::NN[descendant::VBZ or descendant::IN]/child::*[descendant::NN or descendant::_QUOTE_]
+#T06%/descendant::EMPTY[descendant::VP and descendant::S]/descendant::PP[descendant::S and descendant::VGN]
\ No newline at end of file
--- /dev/null
+/descendant::promoter[contains(., "PSSM 5000 8 1 0 19 20 18 1 20 7 1 0 1 0 1 18 0 2 17 0 0 0 1 0 0 3 1 20 0 0 0 1 0 8")]
+/descendant::promoter[contains(., "PSSM 100000 12 6 19 19 20 5 0 1 20 19 20 1 1 4 0 0 0 3 10 1 0 1 0 13 13 10 1 0 0 11 0 18 0 0 0 6 1 0 0 1 0 1 10 0 0 0 0 0 5") ]
+/descendant::promoter[contains(., "PSSM 300000 14 0 1 12 6 0 0 0 1 2 6 6 1 3 0 0 0 0 7 13 3 2 0 0 4 5 10 6 3 2 12 1 0 0 0 0 0 11 3 1 1 0 3 11 0 0 0 0 10 11 12 0 0 1 1 4 7") ]
+/descendant::exon[ descendant::sequence[ contains(., "PSSM 5000 8 1 0 19 20 18 1 20 7 1 0 1 0 1 18 0 2 17 0 0 0 1 0 0 3 1 20 0 0 0 1 0 8")] ]
+/descendant::exon[ descendant::sequence[ contains( ., "PSSM 100000 12 6 19 19 20 5 0 1 20 19 20 1 1 4 0 0 0 3 10 1 0 1 0 13 13 10 1 0 0 11 0 18 0 0 0 6 1 0 0 1 0 1 10 0 0 0 0 0 5") ] ]
+/descendant::exon[ descendant::sequence[ contains(., "PSSM 300000 14 0 1 12 6 0 0 0 1 2 6 6 1 3 0 0 0 0 7 13 3 2 0 0 4 5 10 6 3 2 12 1 0 0 0 0 0 11 3 1 1 0 3 11 0 0 0 0 10 11 12 0 0 1 1 4 7") ] ]
+/descendant::*[contains(., "PSSM 5000 8 1 0 19 20 18 1 20 7 1 0 1 0 1 18 0 2 17 0 0 0 1 0 0 3 1 20 0 0 0 1 0 8")]
+/descendant::*[contains(., "PSSM 100000 12 6 19 19 20 5 0 1 20 19 20 1 1 4 0 0 0 3 10 1 0 1 0 13 13 10 1 0 0 11 0 18 0 0 0 6 1 0 0 1 0 1 10 0 0 0 0 0 5") ]
+/descendant::*[contains(., "PSSM 300000 14 0 1 12 6 0 0 0 1 2 6 6 1 3 0 0 0 0 7 13 3 2 0 0 4 5 10 6 3 2 12 1 0 0 0 0 0 11 3 1 1 0 3 11 0 0 0 0 10 11 12 0 0 1 1 4 7") ]
--- /dev/null
+/descendant::Article[ descendant::AbstractText[ contains ( . , "plus") or contains ( . , "for") ] ]%/descendant::Article/descendant::AbstractText[ . ftcontains "plus" ftor "for" ]
+/descendant::Article[ descendant::AbstractText[ contains ( . , "plus") and not(contains ( . , "for")) ] ]%/descendant::Article/descendant::AbstractText[ . ftcontains "plus" ftand ftnot "for" ]
+/descendant::MedlineCitation/child::Article/child::AuthorList/child::Author[ child::LastName[starts-with( ., "Bar")]]%/descendant::MedlineCitation/child::Article/child::AuthorList/child::Author[ child::LastName ftcontains "Bar" at start ]
+/descendant::*[ descendant::LastName[ contains( ., "Nguyen") ] ]%/descendant::*[ descendant::LastName ftcontains "Nguyen" entire content ]
+/descendant::*/descendant::*[ contains( ., "epididymis") ]%/descendant::*/descendant::*[ . ftcontains "epididymis" ]
+/descendant::*[ descendant::PublicationType[ ends-with( ., "Article") ]]%/descendant::*[ descendant::PublicationType ftcontains "Article" at end ]
+/descendant::MedlineCitation[ descendant::Country[ contains( ., "AUSTRALIA") ] ]%/descendant::MedlineCitation[ descendant::Country ftcontains "AUSTRALIA" ]
--- /dev/null
+/descendant::Article[ descendant::AbstractText[ contains ( . , "blood sample") ] ]%/descendant::Article/descendant::AbstractText[ . ftcontains "blood sample" all words ordered ]
+W2%/descendant::Article[ descendant::AbstractText[ contains ( . , "is such that") ] ]%/descendant::Article/descendant::AbstractText[ . ftcontains "is such that" all words ordered ]
+W3%/descendant::Article[ descendant::AbstractText[ contains( . , "various types of") and contains( . , "immune cells") ] ]%/descendant::Article[descendant::AbstractText[ . ftcontains "various types of" all words ordered ftand "immune cells" all words ordered ]]
+W4%/descendant::Article[ descendant::AbstractText[ contains( . , "of the bone marrow") ] ]%/descendant::Article[descendant::AbstractText[ . ftcontains "of the bone marrow" all words ordered ]]
+W5%/descendant::Article[ descendant::AbstractText[ contains( . , "cell") and not(contains( ., "blood")) ] ]%/descendant::Article[descendant::AbstractText[ . ftcontains "cell" ftand ftnot "blood" ]]
+#W5%/descendant::MedlineCitation/child::Article/child::AuthorList/child::Author[ child::LastName[starts-with( ., "Bar")]]%/descendant::MedlineCitation/child::Article/child::AuthorList/child::Author[ child::LastName ftcontains "Bar" at start ]
+#W6%/descendant::*[ descendant::LastName[ contains( ., "Nguyen") ] ]%/descendant::*[ descendant::LastName ftcontains "Nguyen" entire content ]
+#W7%/descendant::*/descendant::*[ contains( ., "epididymis") ]%/descendant::*/descendant::*[ . ftcontains "epididymis" ]
+#W8%/descendant::*[ descendant::PublicationType[ ends-with( ., "Article") ]]%/descendant::*[ descendant::PublicationType ftcontains "Article" at end ]
+#W9%/descendant::MedlineCitation[ descendant::Country[ contains( ., "AUSTRALIA") ] ]%/descendant::MedlineCitation[ descendant::Country ftcontains "AUSTRALIA" ]
--- /dev/null
+/child::site/child::closed_auctions/child::closed_auction/child::annotation/child::description/child::text/child::keyword
+X04%/descendant::listitem/descendant::keyword
+#%/descendant::closed_auction/descendant::keyword
+#%/site/closed_auctions/closed_auction/descendant::keyword
+X05%/child::site/child::closed_auctions/child::closed_auction[child::annotation/child::description/child::text/child::keyword]/child::date
+X06%/child::site/child::closed_auctions/child::closed_auction[descendant::keyword]/child::date
+X07%/child::site/child::people/child::person[child::profile/child::gender and child::profile/child::age]/child::name
+X08%/child::site/child::people/child::person[child::phone or child::homepage]/child::name
+X09%/child::site/child::people/child::person[child::address and (child::phone or child::homepage) and (child::creditcard or child::profile)]/child::name
+X10%/descendant::listitem[not(descendant::keyword/child::emph)]/descendant::parlist
+X11%/descendant::listitem[ (descendant::keyword or descendant::emph) and (descendant::emph or descendant::bold)]/child::parlist
+X12%/descendant::people[ descendant::person[not(child::address)] and descendant::person[not(child::watches)]]/child::person[child::watches]
+#%/site/regions/europe/item/mailbox/mail/text/keyword
+#%/site/closed_auctions/closed_auction/annotation/description/parlist/listitem
+#%/site/closed_auctions/closed_auction/annotation/description/parlist/listitem/parlist/listitem/*/descendant::keyword
+#%/site/regions/*/item/descendant::keyword
+#%/site/regions/*/person[ address and (phone or homepage) ]
+#%/descendant::listitem[ descendant::keyword and descendant::emph]/descendant::parlist
+#%/site/regions/*/item[ mailbox/mail/date ]/mailbox/mail
+X13%/child::*[ descendant::* ]
+X14%/descendant::*
+X15%/descendant::*/descendant::*
+X16%/descendant::*/descendant::*/descendant::*
+X17%/descendant::*/descendant::*/descendant::*/descendant::*
+#%/descendant::*/descendant::*/descendant::*/descendant::*/descendant::*/descendant::*
+#%/descendant::*/descendant::*
--- /dev/null
+/child::site/child::regions/child::*/child::item
+X03%/child::site/child::closed_auctions/child::closed_auction/child::annotation/child::description/child::text/child::keyword
+X04%/descendant::listitem/descendant::keyword
+#%/descendant::closed_auction/descendant::keyword
+#%/site/closed_auctions/closed_auction/descendant::keyword
+X05%/child::site/child::closed_auctions/child::closed_auction[child::annotation/child::description/child::text/child::keyword]/child::date
+X06%/child::site/child::closed_auctions/child::closed_auction[descendant::keyword]/child::date
+X07%/child::site/child::people/child::person[child::profile/child::gender and child::profile/child::age]/child::name
+X08%/child::site/child::people/child::person[child::phone or child::homepage]/child::name
+X09%/child::site/child::people/child::person[child::address and (child::phone or child::homepage) and (child::creditcard or child::profile)]/child::name
+X10%/descendant::listitem[not(descendant::keyword/child::emph)]/descendant::parlist
+X11%/descendant::listitem[ (descendant::keyword or descendant::emph) and (descendant::emph or descendant::bold)]/child::parlist
+X12%/descendant::people[ descendant::person[not(child::address)] and descendant::person[not(child::watches)]]/child::person[child::watches]
+#%/site/regions/europe/item/mailbox/mail/text/keyword
+#%/site/closed_auctions/closed_auction/annotation/description/parlist/listitem
+#%/site/closed_auctions/closed_auction/annotation/description/parlist/listitem/parlist/listitem/*/descendant::keyword
+#%/site/regions/*/item/descendant::keyword
+#%/site/regions/*/person[ address and (phone or homepage) ]
+#%/descendant::listitem[ descendant::keyword and descendant::emph]/descendant::parlist
+#%/site/regions/*/item[ mailbox/mail/date ]/mailbox/mail
+X13%/child::*[ descendant::* ]
+X14%/descendant::*
+X15%/descendant::*/descendant::*
+X16%/descendant::*/descendant::*/descendant::*
+X17%/descendant::*/descendant::*/descendant::*/descendant::*
+#%/descendant::*/descendant::*/descendant::*/descendant::*/descendant::*/descendant::*
+#%/descendant::*/descendant::*
--- /dev/null
+/child::site/child::closed_auctions
+X02%/child::site/child::regions/child::*/child::item
+X03%/child::site/child::closed_auctions/child::closed_auction/child::annotation/child::description/child::text/child::keyword
+X04%/descendant::listitem/descendant::keyword
+#%/descendant::closed_auction/descendant::keyword
+#%/site/closed_auctions/closed_auction/descendant::keyword
+X05%/child::site/child::closed_auctions/child::closed_auction[child::annotation/child::description/child::text/child::keyword]/child::date
+X06%/child::site/child::closed_auctions/child::closed_auction[descendant::keyword]/child::date
+X07%/child::site/child::people/child::person[child::profile/child::gender and child::profile/child::age]/child::name
+X08%/child::site/child::people/child::person[child::phone or child::homepage]/child::name
+X09%/child::site/child::people/child::person[child::address and (child::phone or child::homepage) and (child::creditcard or child::profile)]/child::name
+X10%/descendant::listitem[not(descendant::keyword/child::emph)]/descendant::parlist
+X11%/descendant::listitem[ (descendant::keyword or descendant::emph) and (descendant::emph or descendant::bold)]/child::parlist
+X12%/descendant::people[ descendant::person[not(child::address)] and descendant::person[not(child::watches)]]/child::person[child::watches]
+#%/site/regions/europe/item/mailbox/mail/text/keyword
+#%/site/closed_auctions/closed_auction/annotation/description/parlist/listitem
+#%/site/closed_auctions/closed_auction/annotation/description/parlist/listitem/parlist/listitem/*/descendant::keyword
+#%/site/regions/*/item/descendant::keyword
+#%/site/regions/*/person[ address and (phone or homepage) ]
+#%/descendant::listitem[ descendant::keyword and descendant::emph]/descendant::parlist
+#%/site/regions/*/item[ mailbox/mail/date ]/mailbox/mail
+X13%/child::*[ descendant::* ]
+X14%/descendant::*
+X15%/descendant::*/descendant::*
+X16%/descendant::*/descendant::*/descendant::*
+X17%/descendant::*/descendant::*/descendant::*/descendant::*
+#%/descendant::*/descendant::*/descendant::*/descendant::*/descendant::*/descendant::*
+#%/descendant::*/descendant::*