3 cduce --compile tiki_of_html.cd
4 cduce --run tiki_of_html.cdo --arg http://digicosme.lri.fr/innovation.html
7 type html_element = <_ ..>html_content
8 type html_content = [ (Byte | html_element)* ]
9 type space = ' ' | '\t' | '\n'
10 type content = (Byte| html_element) \ space
13 let repeat (s : Latin1) (n : Int) : Latin1 =
14 if n = 0 then s else s @ repeat s (n - 1)
16 let remove_spaces (l : html_content) : html_content =
18 | [ (x :: content | _ )* ] -> x
20 let trim_spaces (l : html_content) : html_content =
22 [ space* (x::(Byte|html_element))*? space* ] -> x
24 let tiki_of_html (level : Int) (h : html_content) : Latin1 =
25 (* Applique le pattern a chaque element de la liste.
26 l''expression de droite doit etre une liste, et le tout
28 Équivalent de List.concat (List.map f l) où f est la transformation
33 (* on ignore le header et les balises de script *)
34 <(`script|`head) ..>_ -> []
37 | <(`b|`strong) ..>l -> "__" @ tiki_of_html level l @ "__"
40 | <(`em|`i) ..>l -> "''" @ tiki_of_html level l @ "''"
43 | <table ..>l -> tiki_of_html level (remove_spaces l) @ "||"
45 | <tr ..>[ space* (l::content)* ] -> "|" @ tiki_of_html level (remove_spaces l)
47 | <(`th|`td) ..>l -> "|" @ tiki_of_html level l
50 | <img src=(url & Latin1) ..>_ ->
51 " {img src=" @ url @ "} "
54 | <a href=(url & Latin1) ..>l ->
55 "[" @ url @ "|" @ tiki_of_html level l @ "]"
57 (* Les listes on extrait uniquement la partie li
58 pour chaque li on retire les blancs après le <li>
61 | <((`ul|`ol)&tag) ..>l ->
63 let sym = if tag = `ol then "#" else "*" in
64 let prefix = repeat sym level in
66 transform remove_spaces l with
68 prefix @ tiki_of_html (level+1) (trim_spaces c) @"\n"
69 | e -> tiki_of_html (level+1) [e]
73 (* Un tag qu''on ne connait pas, on se rapplique récursivement *)
74 | <_ ..>l -> " " @ tiki_of_html level l @ " "
76 (* Un charactère, on le copie dans la sortie *)
84 match load_html url with
85 | html_content & src -> print(tiki_of_html 0 src)
86 | _ -> print "Html invalide?\n"; exit 2
88 | _ -> print "Nombre d'arguments invalides\n"; exit 1