| 1 | /* Using xml.pl to solve XML Query Cases - An Example |
|---|
| 2 | * |
|---|
| 3 | * The following is a complete example to illustrate how the module can be used; |
|---|
| 4 | * it exercises both the input and output parsing modes of xml::parse/[2,3], and |
|---|
| 5 | * illustrates the use of xml::subterm/2 to access the nodes of a "document value |
|---|
| 6 | * model". It's written for Quintus Prolog, but should port to other Prologs |
|---|
| 7 | * easily. |
|---|
| 8 | * |
|---|
| 9 | * The entry-point of the program is the test/1 predicate. |
|---|
| 10 | * |
|---|
| 11 | * test( +QueryId ) executes a Prolog implementation of a Query from Use Case |
|---|
| 12 | * "XMP": Experiences and Exemplars, in the W3C's XML Query Use Cases, which |
|---|
| 13 | * "contains several example queries that illustrate requirements gathered from |
|---|
| 14 | * the database and document communities". |
|---|
| 15 | * <http://www.w3.org/TR/2002/WD-xmlquery-use-cases-20021115/#xmp> |
|---|
| 16 | * |
|---|
| 17 | * QueryId is one of q1 |
|---|
| 18 | q12 selecting which of the 12 use cases is executed. |
|---|
| 19 | * The XML output is written to the file [QueryId].xml in the current directory. |
|---|
| 20 | * |
|---|
| 21 | * xml::pp/1 is used to display the resulting "document value model" |
|---|
| 22 | % data-structures on the user output (stdout) stream. |
|---|
| 23 | */ |
|---|
| 24 | test( Query ) :- |
|---|
| 25 | xml_query( Query, ResultElement ), |
|---|
| 26 | % Parse output XML into the Output chars |
|---|
| 27 | xml::parse( Codes, xml([], [ResultElement]) ), |
|---|
| 28 | atom_concat( Query, '.xml', OutputFile ), |
|---|
| 29 | % Write OutputFile from the Output list of chars |
|---|
| 30 | open( OutputFile, write, Output ), |
|---|
| 31 | put_codes( Codes, Output ), |
|---|
| 32 | close( Output ), |
|---|
| 33 | % Pretty print OutputXML |
|---|
| 34 | write( 'Output XML' ), nl, |
|---|
| 35 | xml::pp( xml([], [ResultElement]) ). |
|---|
| 36 | |
|---|
| 37 | /* xml_query( +QueryNo, ?OutputXML ) when OutputXML is an XML Document Value Model |
|---|
| 38 | * produced by running an example taken, identified by QueryNo from the XML Query |
|---|
| 39 | * "XMP" use case. |
|---|
| 40 | */ |
|---|
| 41 | |
|---|
| 42 | % Q1: List books published by Addison-Wesley after 1991, including their year and |
|---|
| 43 | % title. |
|---|
| 44 | |
|---|
| 45 | xml_query( q1, element(bib, [], Books) ) :- |
|---|
| 46 | element_name( Title, title ), |
|---|
| 47 | element_name( Publisher, publisher ), |
|---|
| 48 | input_document( 'bib.xml', Bibliography ), |
|---|
| 49 | findall( |
|---|
| 50 | element(book, [year=Year], [Title]), |
|---|
| 51 | ( |
|---|
| 52 | xml::subterm( Bibliography, element(book, Attributes, Content) ), |
|---|
| 53 | xml::subterm( Content, Publisher ), |
|---|
| 54 | xml::subterm( Publisher, Text ), |
|---|
| 55 | text_value( Text, "Addison-Wesley" ), |
|---|
| 56 | list::member( year=Year, Attributes ), |
|---|
| 57 | number_codes( YearNo, Year ), |
|---|
| 58 | YearNo > 1991, |
|---|
| 59 | xml::subterm( Content, Title ) |
|---|
| 60 | ), |
|---|
| 61 | Books |
|---|
| 62 | ). |
|---|
| 63 | |
|---|
| 64 | % Q2: Create a flat list of all the title-author pairs, with each pair enclosed |
|---|
| 65 | % in a "result" element. |
|---|
| 66 | |
|---|
| 67 | xml_query( q2, element(results, [], Results) ) :- |
|---|
| 68 | element_name( Title, title ), |
|---|
| 69 | element_name( Author, author ), |
|---|
| 70 | element_name( Book, book ), |
|---|
| 71 | input_document( 'bib.xml', Bibliography ), |
|---|
| 72 | findall( |
|---|
| 73 | element(result, [], [Title,Author]), |
|---|
| 74 | ( |
|---|
| 75 | xml::subterm( Bibliography, Book ), |
|---|
| 76 | xml::subterm( Book, Title ), |
|---|
| 77 | xml::subterm( Book, Author ) |
|---|
| 78 | ), |
|---|
| 79 | Results |
|---|
| 80 | ). |
|---|
| 81 | |
|---|
| 82 | % Q3: For each book in the bibliography, list the title and authors, grouped |
|---|
| 83 | % inside a "result" element. |
|---|
| 84 | |
|---|
| 85 | xml_query( q3, element(results, [], Results) ) :- |
|---|
| 86 | element_name( Title, title ), |
|---|
| 87 | element_name( Author, author ), |
|---|
| 88 | element_name( Book, book ), |
|---|
| 89 | input_document( 'bib.xml', Bibliography ), |
|---|
| 90 | findall( |
|---|
| 91 | element(result, [], [Title|Authors]), |
|---|
| 92 | ( |
|---|
| 93 | xml::subterm( Bibliography, Book ), |
|---|
| 94 | xml::subterm( Book, Title ), |
|---|
| 95 | findall( Author, xml::subterm(Book, Author), Authors ) |
|---|
| 96 | ), |
|---|
| 97 | Results |
|---|
| 98 | ). |
|---|
| 99 | |
|---|
| 100 | % Q4: For each author in the bibliography, list the author's name and the titles |
|---|
| 101 | % of all books by that author, grouped inside a "result" element. |
|---|
| 102 | |
|---|
| 103 | xml_query( q4, element(results, [], Results) ) :- |
|---|
| 104 | element_name( Title, title ), |
|---|
| 105 | element_name( Author, author ), |
|---|
| 106 | element_name( Book, book ), |
|---|
| 107 | input_document( 'bib.xml', Bibliography ), |
|---|
| 108 | findall( Author, xml::subterm(Bibliography, Author), AuthorBag ), |
|---|
| 109 | sort( AuthorBag, Authors ), |
|---|
| 110 | findall( |
|---|
| 111 | element(result, [], [Author|Titles]), |
|---|
| 112 | ( |
|---|
| 113 | list::member( Author, Authors ), |
|---|
| 114 | findall( Title, ( |
|---|
| 115 | xml::subterm( Bibliography, Book ), |
|---|
| 116 | xml::subterm( Book, Author ), |
|---|
| 117 | xml::subterm( Book, Title ) |
|---|
| 118 | ), |
|---|
| 119 | Titles |
|---|
| 120 | ) |
|---|
| 121 | ), |
|---|
| 122 | Results |
|---|
| 123 | ). |
|---|
| 124 | |
|---|
| 125 | % Q5: For each book found at both bn.com and amazon.com, list the title of the |
|---|
| 126 | % book and its price from each source. |
|---|
| 127 | |
|---|
| 128 | xml_query( q5, element('books-with-prices', [], BooksWithPrices) ) :- |
|---|
| 129 | element_name( Title, title ), |
|---|
| 130 | element_name( Book, book ), |
|---|
| 131 | element_name( Review, entry ), |
|---|
| 132 | input_document( 'bib.xml', Bibliography ), |
|---|
| 133 | input_document( 'reviews.xml', Reviews ), |
|---|
| 134 | findall( |
|---|
| 135 | element('book-with-prices', [], [ |
|---|
| 136 | Title, |
|---|
| 137 | element('price-bn',[], BNPrice ), |
|---|
| 138 | element('price-amazon',[], AmazonPrice ) |
|---|
| 139 | ] ), |
|---|
| 140 | ( |
|---|
| 141 | xml::subterm( Bibliography, Book ), |
|---|
| 142 | xml::subterm( Book, Title ), |
|---|
| 143 | xml::subterm( Reviews, Review ), |
|---|
| 144 | xml::subterm( Review, Title ), |
|---|
| 145 | xml::subterm( Book, element(price,_, BNPrice) ), |
|---|
| 146 | xml::subterm( Review, element(price,_, AmazonPrice) ) |
|---|
| 147 | ), |
|---|
| 148 | BooksWithPrices |
|---|
| 149 | ). |
|---|
| 150 | |
|---|
| 151 | % Q6: For each book that has at least one author, list the title and first two |
|---|
| 152 | % authors, and an empty "et-al" element if the book has additional authors. |
|---|
| 153 | |
|---|
| 154 | xml_query( q6, element(bib, [], Results) ) :- |
|---|
| 155 | element_name( Title, title ), |
|---|
| 156 | element_name( Author, author ), |
|---|
| 157 | element_name( Book, book ), |
|---|
| 158 | input_document( 'bib.xml', Bibliography ), |
|---|
| 159 | findall( |
|---|
| 160 | element(book, [], [Title,FirstAuthor|Authors]), |
|---|
| 161 | ( |
|---|
| 162 | xml::subterm( Bibliography, Book ), |
|---|
| 163 | xml::subterm( Book, Title ), |
|---|
| 164 | findall( Author, xml::subterm(Book, Author), [FirstAuthor|Others] ), |
|---|
| 165 | other_authors( Others, Authors ) |
|---|
| 166 | ), |
|---|
| 167 | Results |
|---|
| 168 | ). |
|---|
| 169 | |
|---|
| 170 | % Q7: List the titles and years of all books published by Addison-Wesley after |
|---|
| 171 | % 1991, in alphabetic order. |
|---|
| 172 | |
|---|
| 173 | xml_query( q7, element(bib, [], Books) ) :- |
|---|
| 174 | element_name( Title, title ), |
|---|
| 175 | element_name( Publisher, publisher ), |
|---|
| 176 | input_document( 'bib.xml', Bibliography ), |
|---|
| 177 | findall( |
|---|
| 178 | Title-element(book, [year=Year], [Title]), |
|---|
| 179 | ( |
|---|
| 180 | xml::subterm( Bibliography, element(book, Attributes, Book) ), |
|---|
| 181 | xml::subterm( Book, Publisher ), |
|---|
| 182 | xml::subterm( Publisher, Text ), |
|---|
| 183 | text_value( Text, "Addison-Wesley" ), |
|---|
| 184 | list::member( year=Year, Attributes ), |
|---|
| 185 | number_codes( YearNo, Year ), |
|---|
| 186 | YearNo > 1991, |
|---|
| 187 | xml::subterm( Book, Title ) |
|---|
| 188 | ), |
|---|
| 189 | TitleBooks |
|---|
| 190 | ), |
|---|
| 191 | keysort( TitleBooks, TitleBookSet ), |
|---|
| 192 | range( TitleBookSet, Books ). |
|---|
| 193 | |
|---|
| 194 | % Q8: Find books in which the name of some element ends with the string "or" and |
|---|
| 195 | % the same element contains the string "Suciu" somewhere in its content. For each |
|---|
| 196 | % such book, return the title and the qualifying element. |
|---|
| 197 | |
|---|
| 198 | xml_query( q8, element(bib, [], Books) ) :- |
|---|
| 199 | element_name( Title, title ), |
|---|
| 200 | element_name( Book, book ), |
|---|
| 201 | element_name( QualifyingElement, QualifyingName ), |
|---|
| 202 | list::append( "Suciu", _Back, Suffix ), |
|---|
| 203 | input_document( 'bib.xml', Bibliography ), |
|---|
| 204 | findall( |
|---|
| 205 | element(book, [], [Title,QualifyingElement]), |
|---|
| 206 | ( |
|---|
| 207 | xml::subterm( Bibliography, Book ), |
|---|
| 208 | xml::subterm( Book, QualifyingElement ), |
|---|
| 209 | atom_codes( QualifyingName, QNChars ), |
|---|
| 210 | list::append( _QNPrefix, "or", QNChars ), |
|---|
| 211 | xml::subterm( QualifyingElement, TextItem ), |
|---|
| 212 | text_value( TextItem, TextValue ), |
|---|
| 213 | list::append( _Prefix, Suffix, TextValue ), |
|---|
| 214 | xml::subterm( Book, Title ) |
|---|
| 215 | ), |
|---|
| 216 | Books |
|---|
| 217 | ). |
|---|
| 218 | |
|---|
| 219 | % Q9: In the document "books.xml", find all section or chapter titles that |
|---|
| 220 | % contain the word "XML", regardless of the level of nesting. |
|---|
| 221 | |
|---|
| 222 | xml_query( q9, element(results, [], Titles) ) :- |
|---|
| 223 | element_name( Title, title ), |
|---|
| 224 | list::append( "XML", _Back, Suffix ), |
|---|
| 225 | input_document( 'books.xml', Books ), |
|---|
| 226 | findall( |
|---|
| 227 | Title, |
|---|
| 228 | ( |
|---|
| 229 | xml::subterm( Books, Title ), |
|---|
| 230 | xml::subterm( Title, TextItem ), |
|---|
| 231 | text_value( TextItem, TextValue ), |
|---|
| 232 | list::append( _Prefix, Suffix, TextValue ) |
|---|
| 233 | ), |
|---|
| 234 | Titles |
|---|
| 235 | ). |
|---|
| 236 | |
|---|
| 237 | % Q10: In the document "prices.xml", find the minimum price for each book, in the |
|---|
| 238 | % form of a "minprice" element with the book title as its title attribute. |
|---|
| 239 | |
|---|
| 240 | xml_query( q10, element(results, [], MinPrices) ) :- |
|---|
| 241 | element_name( Title, title ), |
|---|
| 242 | element_name( Price, price ), |
|---|
| 243 | input_document( 'prices.xml', Prices ), |
|---|
| 244 | findall( Title, xml::subterm(Prices, Title), TitleBag ), |
|---|
| 245 | sort( TitleBag, TitleSet ), |
|---|
| 246 | element_name( Book, book ), |
|---|
| 247 | findall( |
|---|
| 248 | element(minprice, [title=TitleString], [MinPrice]), |
|---|
| 249 | ( |
|---|
| 250 | list::member( Title, TitleSet ), |
|---|
| 251 | xml::subterm( Title, TitleText ), |
|---|
| 252 | text_value( TitleText, TitleString ), |
|---|
| 253 | findall( PriceValue-Price, ( |
|---|
| 254 | xml::subterm( Prices, Book ), |
|---|
| 255 | xml::subterm( Book, Title ), |
|---|
| 256 | xml::subterm( Book, Price ), |
|---|
| 257 | xml::subterm( Price, Text ), |
|---|
| 258 | text_value( Text, PriceChars ), |
|---|
| 259 | number_codes( PriceValue, PriceChars ) |
|---|
| 260 | ), |
|---|
| 261 | PriceValues |
|---|
| 262 | ), |
|---|
| 263 | minimum( PriceValues, PriceValue-MinPrice ) |
|---|
| 264 | ), |
|---|
| 265 | MinPrices |
|---|
| 266 | ). |
|---|
| 267 | |
|---|
| 268 | % Q11: For each book with an author, return the book with its title and authors. |
|---|
| 269 | % For each book with an editor, return a reference with the book title and the |
|---|
| 270 | % editor's affiliation. |
|---|
| 271 | |
|---|
| 272 | xml_query( q11, element(bib, [], Results) ) :- |
|---|
| 273 | element_name( Title, title ), |
|---|
| 274 | element_name( Author, author ), |
|---|
| 275 | element_name( Book, book ), |
|---|
| 276 | element_name( Editor, editor ), |
|---|
| 277 | element_name( Affiliation, affiliation ), |
|---|
| 278 | input_document( 'bib.xml', Bibliography ), |
|---|
| 279 | findall( |
|---|
| 280 | element(book, [], [Title,FirstAuthor|Authors]), |
|---|
| 281 | ( |
|---|
| 282 | xml::subterm( Bibliography, Book ), |
|---|
| 283 | xml::subterm( Book, Title ), |
|---|
| 284 | findall( Author, xml::subterm(Book, Author), [FirstAuthor|Authors] ) |
|---|
| 285 | ), |
|---|
| 286 | Books |
|---|
| 287 | ), |
|---|
| 288 | findall( |
|---|
| 289 | element(reference, [], [Title,Affiliation]), |
|---|
| 290 | ( |
|---|
| 291 | xml::subterm( Bibliography, Book ), |
|---|
| 292 | xml::subterm( Book, Title ), |
|---|
| 293 | xml::subterm( Book, Editor ), |
|---|
| 294 | xml::subterm( Editor, Affiliation ) |
|---|
| 295 | ), |
|---|
| 296 | References |
|---|
| 297 | ), |
|---|
| 298 | list::append( Books, References, Results ). |
|---|
| 299 | |
|---|
| 300 | % Q12: Find pairs of books that have different titles but the same set of authors |
|---|
| 301 | % (possibly in a different order). |
|---|
| 302 | |
|---|
| 303 | xml_query( q12, element(bib, [], Pairs) ) :- |
|---|
| 304 | element_name( Author, author ), |
|---|
| 305 | element_name( Book1, book ), |
|---|
| 306 | element_name( Book2, book ), |
|---|
| 307 | element_name( Title1, title ), |
|---|
| 308 | element_name( Title2, title ), |
|---|
| 309 | input_document( 'bib.xml', Bibliography ), |
|---|
| 310 | findall( |
|---|
| 311 | element('book-pair', [], [Title1,Title2]), |
|---|
| 312 | ( |
|---|
| 313 | xml::subterm( Bibliography, Book1 ), |
|---|
| 314 | findall( Author, xml::subterm(Book1, Author), AuthorBag1 ), |
|---|
| 315 | sort( AuthorBag1, AuthorSet ), |
|---|
| 316 | xml::subterm( Bibliography, Book2 ), |
|---|
| 317 | Book2 @< Book1, |
|---|
| 318 | findall( Author, xml::subterm(Book2, Author), AuthorBag2 ), |
|---|
| 319 | sort( AuthorBag2, AuthorSet ), |
|---|
| 320 | xml::subterm( Book1, Title1 ), |
|---|
| 321 | xml::subterm( Book2, Title2 ) |
|---|
| 322 | ), |
|---|
| 323 | Pairs |
|---|
| 324 | ). |
|---|
| 325 | |
|---|
| 326 | % Auxilliary Predicates |
|---|
| 327 | |
|---|
| 328 | other_authors( [], [] ). |
|---|
| 329 | other_authors( [Author|Authors], [Author|EtAl] ) :- |
|---|
| 330 | et_al( Authors, EtAl ). |
|---|
| 331 | |
|---|
| 332 | et_al( [], [] ). |
|---|
| 333 | et_al( [_|_], [element('et-al',[],[])] ). |
|---|
| 334 | |
|---|
| 335 | text_value( [pcdata(Text)], Text ). |
|---|
| 336 | text_value( [cdata(Text)], Text ). |
|---|
| 337 | |
|---|
| 338 | element_name( element(Name, _Attributes, _Content), Name ). |
|---|
| 339 | |
|---|
| 340 | |
|---|
| 341 | /* range( +Pairs, ?Range ) when Pairs is a list of key-datum pairs and Range |
|---|
| 342 | * is the list of data. |
|---|
| 343 | */ |
|---|
| 344 | range( [], [] ). |
|---|
| 345 | range( [_Key-Datum|Pairs], [Datum|Data] ) :- |
|---|
| 346 | range( Pairs, Data ). |
|---|
| 347 | |
|---|
| 348 | /* minimum( +List, ?Min ) is true if Min is the least member of List in the |
|---|
| 349 | * standard order. |
|---|
| 350 | */ |
|---|
| 351 | minimum( [H|T], Min ):- |
|---|
| 352 | minimum1( T, H, Min ). |
|---|
| 353 | |
|---|
| 354 | minimum1( [], Min, Min ). |
|---|
| 355 | minimum1( [H|T], Min0, Min ) :- |
|---|
| 356 | compare( Relation, H, Min0 ), |
|---|
| 357 | minimum2( Relation, H, Min0, T, Min ). |
|---|
| 358 | |
|---|
| 359 | minimum2( '=', Min0, Min0, T, Min ) :- |
|---|
| 360 | minimum1( T, Min0, Min ). |
|---|
| 361 | minimum2( '<', Min0, _Min1, T, Min ) :- |
|---|
| 362 | minimum1( T, Min0, Min ). |
|---|
| 363 | minimum2( '>', _Min0, Min1, T, Min ) :- |
|---|
| 364 | minimum1( T, Min1, Min ). |
|---|
| 365 | |
|---|
| 366 | /* input_document( +File, ?XML ) reads File and parses the input into the |
|---|
| 367 | * "Document Value Model" XML. |
|---|
| 368 | */ |
|---|
| 369 | input_document( File, XML ) :- |
|---|
| 370 | % Read InputFile as a list of chars |
|---|
| 371 | open( File, read, Input ), |
|---|
| 372 | get_codes( Input, Codes ), |
|---|
| 373 | close( Input ), |
|---|
| 374 | % Parse the Input chars into the term XML |
|---|
| 375 | xml::parse( Codes, XML ). |
|---|
| 376 | |
|---|
| 377 | |
|---|
| 378 | put_codes( [], _ ). |
|---|
| 379 | put_codes( [Code|Codes], Output ) :- |
|---|
| 380 | put_code( Output, Code ), |
|---|
| 381 | put_codes( Codes, Output ). |
|---|
| 382 | |
|---|
| 383 | |
|---|
| 384 | get_codes( Input, [Code|Codes] ) :- |
|---|
| 385 | \+ at_end_of_stream( Input ), |
|---|
| 386 | get_code( Input, Code ), |
|---|
| 387 | get_codes( Input, Codes ). |
|---|
| 388 | get_codes( _, [] ). |
|---|