root/trunk/contributions/xml_parser/examples.lgt

Revision 4601, 11.4 KB (checked in by pmoura, 7 weeks ago)

Added svn:mime-type property to source files (set to text/x-logtalk).

  • Property svn:mime-type set to text/x-logtalk
  • Property svn:eol-style set to native
Line 
1/* Using xml.pl to solve XML Query Cases - An Example
2 *
3 * The following is a complete example to illustrate how the module can be used;
4 * it exercises both the input and output parsing modes of xml::parse/[2,3], and
5 * illustrates the use of xml::subterm/2 to access the nodes of a "document value
6 * model". It's written for Quintus Prolog, but should port to other Prologs
7 * easily.
8 *
9 * The entry-point of the program is the test/1 predicate.
10 *
11 * test( +QueryId ) executes a Prolog implementation of a Query from Use Case
12 * "XMP": Experiences and Exemplars, in the W3C's XML Query Use Cases, which
13 * "contains several example queries that illustrate requirements gathered from
14 * the database and document communities".
15 * <http://www.w3.org/TR/2002/WD-xmlquery-use-cases-20021115/#xmp>
16 *
17 * QueryId is one of q1
18q12 selecting which of the 12 use cases is executed.
19 * The XML output is written to the file [QueryId].xml in the current directory.
20 *
21 * xml::pp/1 is used to display the resulting "document value model"
22 % data-structures on the user output (stdout) stream.
23 */
24test( Query ) :-
25    xml_query( Query, ResultElement ),
26    % Parse output XML into the Output chars
27    xml::parse( Codes, xml([], [ResultElement]) ),
28    atom_concat( Query, '.xml', OutputFile ),
29    % Write OutputFile from the Output list of chars
30    open( OutputFile, write, Output ),
31    put_codes( Codes, Output ),
32    close( Output ),
33    % Pretty print OutputXML
34    write( 'Output XML' ), nl,
35    xml::pp( xml([], [ResultElement]) ).
36
37/* xml_query( +QueryNo, ?OutputXML ) when OutputXML is an XML Document Value Model
38 * produced by running an example taken, identified by QueryNo from the XML Query
39 * "XMP" use case.
40 */
41
42% Q1: List books published by Addison-Wesley after 1991, including their year and
43% title.
44
45xml_query( q1, element(bib, [], Books) ) :-
46    element_name( Title, title ),
47    element_name( Publisher, publisher ),
48    input_document( 'bib.xml', Bibliography ),
49    findall(
50        element(book, [year=Year], [Title]),
51        (
52        xml::subterm( Bibliography, element(book, Attributes, Content) ),
53        xml::subterm( Content, Publisher ),
54        xml::subterm( Publisher, Text ),
55        text_value( Text, "Addison-Wesley" ),
56        list::member( year=Year, Attributes ),
57        number_codes( YearNo, Year ),
58        YearNo > 1991,
59        xml::subterm( Content, Title )
60        ),
61        Books
62    ).
63
64% Q2: Create a flat list of all the title-author pairs, with each pair enclosed
65% in a "result" element.
66
67xml_query( q2, element(results, [], Results) ) :-
68    element_name( Title, title ),
69    element_name( Author, author ),
70    element_name( Book, book ),
71    input_document( 'bib.xml', Bibliography ),
72    findall(
73        element(result, [], [Title,Author]),
74        (
75        xml::subterm( Bibliography, Book ),
76        xml::subterm( Book, Title ),
77        xml::subterm( Book, Author )
78        ),
79        Results
80    ).
81
82% Q3: For each book in the bibliography, list the title and authors, grouped
83% inside a "result" element.
84
85xml_query( q3, element(results, [], Results) ) :-
86    element_name( Title, title ),
87    element_name( Author, author ),
88    element_name( Book, book ),
89    input_document( 'bib.xml', Bibliography ),
90    findall(
91        element(result, [], [Title|Authors]),
92        (
93        xml::subterm( Bibliography, Book ),
94        xml::subterm( Book, Title ),
95        findall( Author, xml::subterm(Book, Author), Authors )
96        ),
97        Results
98    ).
99
100% Q4: For each author in the bibliography, list the author's name and the titles
101% of all books by that author, grouped inside a "result" element.
102
103xml_query( q4, element(results, [], Results) ) :-
104    element_name( Title, title ),
105    element_name( Author, author ),
106    element_name( Book, book ),
107    input_document( 'bib.xml', Bibliography ),
108    findall( Author, xml::subterm(Bibliography, Author), AuthorBag ),
109    sort( AuthorBag, Authors ),
110    findall(
111        element(result, [], [Author|Titles]),
112        (
113        list::member( Author, Authors ),
114        findall( Title, (
115            xml::subterm( Bibliography, Book ),
116            xml::subterm( Book, Author ),
117            xml::subterm( Book, Title )
118            ),
119            Titles
120            )
121        ),
122        Results
123    ).
124
125% Q5: For each book found at both bn.com and amazon.com, list the title of the
126% book and its price from each source.
127
128xml_query( q5, element('books-with-prices', [], BooksWithPrices) ) :-
129    element_name( Title, title ),
130    element_name( Book, book ),
131    element_name( Review, entry ),
132    input_document( 'bib.xml', Bibliography ),
133    input_document( 'reviews.xml', Reviews ),
134    findall(
135        element('book-with-prices', [], [
136            Title,
137            element('price-bn',[], BNPrice ),
138            element('price-amazon',[], AmazonPrice )
139            ] ),
140        (
141        xml::subterm( Bibliography, Book ),
142        xml::subterm( Book, Title ),
143        xml::subterm( Reviews, Review ),
144        xml::subterm( Review, Title ),
145        xml::subterm( Book, element(price,_, BNPrice) ),
146        xml::subterm( Review, element(price,_, AmazonPrice) )
147        ),
148        BooksWithPrices
149    ).
150
151% Q6: For each book that has at least one author, list the title and first two
152% authors, and an empty "et-al" element if the book has additional authors.
153
154xml_query( q6, element(bib, [], Results) ) :-
155    element_name( Title, title ),
156    element_name( Author, author ),
157    element_name( Book, book ),
158    input_document( 'bib.xml', Bibliography ),
159    findall(
160        element(book, [], [Title,FirstAuthor|Authors]),
161        (
162        xml::subterm( Bibliography, Book ),
163        xml::subterm( Book, Title ),
164        findall( Author, xml::subterm(Book, Author), [FirstAuthor|Others] ),
165        other_authors( Others, Authors )
166        ),
167        Results
168    ).
169
170% Q7: List the titles and years of all books published by Addison-Wesley after
171% 1991, in alphabetic order.
172
173xml_query( q7, element(bib, [], Books) ) :-
174    element_name( Title, title ),
175    element_name( Publisher, publisher ),
176    input_document( 'bib.xml', Bibliography ),
177    findall(
178        Title-element(book, [year=Year], [Title]),
179        (
180        xml::subterm( Bibliography, element(book, Attributes, Book) ),
181        xml::subterm( Book, Publisher ),
182        xml::subterm( Publisher, Text ),
183        text_value( Text, "Addison-Wesley" ),
184        list::member( year=Year, Attributes ),
185        number_codes( YearNo, Year ),
186        YearNo > 1991,
187        xml::subterm( Book, Title )
188        ),
189        TitleBooks
190    ),
191    keysort( TitleBooks, TitleBookSet ),
192    range( TitleBookSet, Books ).
193
194% Q8: Find books in which the name of some element ends with the string "or" and
195% the same element contains the string "Suciu" somewhere in its content. For each
196% such book, return the title and the qualifying element.
197
198xml_query( q8, element(bib, [], Books) ) :-
199    element_name( Title, title ),
200    element_name( Book, book ),
201    element_name( QualifyingElement, QualifyingName ),
202    list::append( "Suciu", _Back, Suffix ),
203    input_document( 'bib.xml', Bibliography ),
204    findall(
205        element(book, [], [Title,QualifyingElement]),
206        (
207        xml::subterm( Bibliography, Book ),
208        xml::subterm( Book, QualifyingElement ),
209        atom_codes( QualifyingName, QNChars ),
210        list::append( _QNPrefix, "or", QNChars ),
211        xml::subterm( QualifyingElement, TextItem ),
212        text_value( TextItem, TextValue ),
213        list::append( _Prefix, Suffix, TextValue ),
214        xml::subterm( Book, Title )
215        ),
216        Books
217    ).
218
219% Q9: In the document "books.xml", find all section or chapter titles that
220% contain the word "XML", regardless of the level of nesting.
221
222xml_query( q9, element(results, [], Titles) ) :-
223    element_name( Title, title ),
224    list::append( "XML", _Back, Suffix ),
225    input_document( 'books.xml', Books ),
226    findall(
227        Title,
228        (
229        xml::subterm( Books, Title ),
230        xml::subterm( Title, TextItem ),
231        text_value( TextItem, TextValue ),
232        list::append( _Prefix, Suffix, TextValue )
233        ),
234        Titles
235    ).
236
237% Q10: In the document "prices.xml", find the minimum price for each book, in the
238% form of a "minprice" element with the book title as its title attribute.
239
240xml_query( q10, element(results, [], MinPrices) ) :-
241    element_name( Title, title ),
242    element_name( Price, price ),
243    input_document( 'prices.xml', Prices ),
244    findall( Title, xml::subterm(Prices, Title), TitleBag ),
245    sort( TitleBag, TitleSet ),
246    element_name( Book, book ),
247    findall(
248        element(minprice, [title=TitleString], [MinPrice]),
249        (
250        list::member( Title, TitleSet ),
251        xml::subterm( Title, TitleText ),
252        text_value( TitleText, TitleString ),
253        findall( PriceValue-Price, (
254            xml::subterm( Prices, Book ),
255            xml::subterm( Book, Title ),
256            xml::subterm( Book, Price ),
257            xml::subterm( Price, Text ),
258            text_value( Text, PriceChars ),
259            number_codes( PriceValue, PriceChars )
260            ),
261            PriceValues
262            ),
263        minimum( PriceValues, PriceValue-MinPrice )
264        ),
265        MinPrices
266    ).
267
268% Q11: For each book with an author, return the book with its title and authors.
269% For each book with an editor, return a reference with the book title and the
270% editor's affiliation.
271
272xml_query( q11, element(bib, [], Results) ) :-
273    element_name( Title, title ),
274    element_name( Author, author ),
275    element_name( Book, book ),
276    element_name( Editor, editor ),
277    element_name( Affiliation, affiliation ),
278    input_document( 'bib.xml', Bibliography ),
279    findall(
280        element(book, [], [Title,FirstAuthor|Authors]),
281        (
282        xml::subterm( Bibliography, Book ),
283        xml::subterm( Book, Title ),
284        findall( Author, xml::subterm(Book, Author), [FirstAuthor|Authors] )
285        ),
286        Books
287    ),
288    findall(
289        element(reference, [], [Title,Affiliation]),
290        (
291        xml::subterm( Bibliography, Book ),
292        xml::subterm( Book, Title ),
293        xml::subterm( Book, Editor ),
294        xml::subterm( Editor, Affiliation )
295        ),
296        References
297    ),
298    list::append( Books, References, Results ).
299
300% Q12: Find pairs of books that have different titles but the same set of authors
301% (possibly in a different order).
302
303xml_query( q12, element(bib, [], Pairs) ) :-
304    element_name( Author, author ),
305    element_name( Book1, book ),
306    element_name( Book2, book ),
307    element_name( Title1, title ),
308    element_name( Title2, title ),
309    input_document( 'bib.xml', Bibliography ),
310    findall(
311        element('book-pair', [], [Title1,Title2]),
312        (
313        xml::subterm( Bibliography, Book1 ),
314        findall( Author, xml::subterm(Book1, Author), AuthorBag1 ),
315        sort( AuthorBag1, AuthorSet ),
316        xml::subterm( Bibliography, Book2 ),
317        Book2 @< Book1,
318        findall( Author, xml::subterm(Book2, Author), AuthorBag2 ),
319        sort( AuthorBag2, AuthorSet ),
320        xml::subterm( Book1, Title1 ),
321        xml::subterm( Book2, Title2 )
322        ),
323        Pairs
324    ).
325
326% Auxilliary Predicates
327
328other_authors( [], [] ).
329other_authors( [Author|Authors], [Author|EtAl] ) :-
330    et_al( Authors, EtAl ).
331
332et_al( [], [] ).
333et_al( [_|_], [element('et-al',[],[])] ).
334
335text_value( [pcdata(Text)], Text ).
336text_value( [cdata(Text)], Text ).
337
338element_name( element(Name, _Attributes, _Content), Name ).
339
340
341/* range( +Pairs, ?Range ) when Pairs is a list of key-datum pairs and Range
342 * is the list of data.
343 */
344range( [], [] ).
345range( [_Key-Datum|Pairs], [Datum|Data] ) :-
346    range( Pairs, Data ).
347
348/* minimum( +List, ?Min ) is true if Min is the least member of List in the
349 * standard order.
350 */
351minimum( [H|T], Min ):-
352    minimum1( T, H, Min ).
353
354minimum1( [], Min, Min ).
355minimum1( [H|T], Min0, Min ) :-
356    compare( Relation, H, Min0 ),
357    minimum2( Relation, H, Min0, T, Min ).
358
359minimum2( '=', Min0, Min0, T, Min ) :-
360    minimum1( T, Min0, Min ).
361minimum2( '<', Min0, _Min1, T, Min ) :-
362    minimum1( T, Min0, Min ).
363minimum2( '>', _Min0, Min1, T, Min ) :-
364    minimum1( T, Min1, Min ).
365
366/* input_document( +File, ?XML ) reads File and parses the input into the
367 * "Document Value Model" XML.
368 */
369input_document( File, XML ) :-
370    % Read InputFile as a list of chars
371    open( File, read, Input ),
372    get_codes( Input, Codes ),
373    close( Input ),
374    % Parse the Input chars into the term XML
375    xml::parse( Codes, XML ).
376
377
378put_codes( [], _ ).
379put_codes( [Code|Codes], Output ) :-
380    put_code( Output, Code ),
381    put_codes( Codes, Output ).
382
383   
384get_codes( Input, [Code|Codes] ) :-
385    \+ at_end_of_stream( Input ),
386    get_code( Input, Code ),
387    get_codes( Input, Codes ).
388get_codes( _, [] ).
Note: See TracBrowser for help on using the browser.