| 1 | /* xml.pl : Contains parse/[2,3] a bi-directional XML parser written in |
|---|
| 2 | * Prolog. |
|---|
| 3 | * |
|---|
| 4 | * Copyright (C) 2001-2005 Binding Time Limited |
|---|
| 5 | * Copyright (C) 2005, 2006 John Fletcher |
|---|
| 6 | * |
|---|
| 7 | * Current Release: $Revision: 2.0 $ |
|---|
| 8 | * |
|---|
| 9 | * TERMS AND CONDITIONS: |
|---|
| 10 | * |
|---|
| 11 | * This program is offered free of charge, as unsupported source code. You may |
|---|
| 12 | * use it, copy it, distribute it, modify it or sell it without restriction, |
|---|
| 13 | * but entirely at your own risk. |
|---|
| 14 | * |
|---|
| 15 | */ |
|---|
| 16 | |
|---|
| 17 | :- object(xml). |
|---|
| 18 | |
|---|
| 19 | :- info([ |
|---|
| 20 | version is 2.0, |
|---|
| 21 | author is 'John Fletcher', |
|---|
| 22 | date is 2006/11/2, |
|---|
| 23 | copyright is 'Copyright (C) 2001-2005 Binding Time Limited, Copyright (C) 2005, 2006 John Fletcher', |
|---|
| 24 | license is 'The code has been placed into the public domain, to encourage the use of Prolog with XML. This program is offered free of charge, as unsupported source code. You may use it, copy it, distribute it, modify it or sell it without restriction, but entirely at your own risk.', |
|---|
| 25 | comment is 'Bi-directional XML parser.', |
|---|
| 26 | remarks is [ |
|---|
| 27 | 'On-line documentation:' - 'http://www.zen37763.zen.co.uk/xml.pl.html', |
|---|
| 28 | 'Compliance:' - 'This XML parser supports a subset of XML suitable for XML Data and Worldwide Web applications. It is neither as strict nor as comprehensive as the XML 1.0 Specification mandates. It is not as strict, because, while the specification must eliminate ambiguities, not all errors need to be regarded as faults, and some reasonable examples of real XML usage would have to be rejected if they were. It is not as comprehensive, because, where the XML specification makes provision for more or less complete DTDs to be provided as part of a document, xml.pl actions the local definition of ENTITIES only. Other DTD extensions are treated as commentary.', |
|---|
| 29 | 'Bi-directional conversions:' - 'The conversions are not completely symmetrical, in that weaker XML is accepted than can be generated. Specifically, in-bound (Chars -> Document) parsing does not require strictly well-formed XML. If Chars does not represent well-formed XML, Document is instantiated to the term malformed(<attributes>, <content>).' |
|---|
| 30 | ]]). |
|---|
| 31 | |
|---|
| 32 | :- public(parse/2). |
|---|
| 33 | :- mode(parse(+nonvar, ?nonvar), zero_or_one). |
|---|
| 34 | :- mode(parse(?nonvar, +nonvar), zero_or_one). |
|---|
| 35 | :- info(parse/2, [ |
|---|
| 36 | comment is 'Parses Chars to/from a data structure of the form xml(<atts>, <content>).', |
|---|
| 37 | argnames is ['Chars', 'Document']]). |
|---|
| 38 | |
|---|
| 39 | :- public(parse/3). |
|---|
| 40 | :- mode(parse(+nonvar, +nonvar, ?nonvar), zero_or_one). |
|---|
| 41 | :- mode(parse(+nonvar, ?nonvar, +nonvar), zero_or_one). |
|---|
| 42 | :- info(parse/3, [ |
|---|
| 43 | comment is 'Parses Chars to/from a data structure of the form xml(<atts>, <content>).', |
|---|
| 44 | argnames is ['Controls', 'Chars', 'Document']]). |
|---|
| 45 | |
|---|
| 46 | :- public(subterm/2). |
|---|
| 47 | :- mode(subterm(+nonvar, ?nonvar), zero_or_one). |
|---|
| 48 | :- info(subterm/2, [ |
|---|
| 49 | comment is 'Unifies Subterm with a sub-term of XMLTerm. Note that XMLTerm is a sub-term of itself.', |
|---|
| 50 | argnames is ['XMLTerm', 'Subterm']]). |
|---|
| 51 | |
|---|
| 52 | :- public(pp/1). |
|---|
| 53 | :- mode(pp(+nonvar), zero_or_one). |
|---|
| 54 | :- info(pp/1, [ |
|---|
| 55 | comment is 'Pretty prints a XML document on the current output stream.', |
|---|
| 56 | argnames is ['XMLDocument']]). |
|---|
| 57 | |
|---|
| 58 | :- uses(list, [append/3, member/2, select/3, valid/1::is_list/1]). |
|---|
| 59 | :- uses(term, [ground/1]). |
|---|
| 60 | |
|---|
| 61 | /* parse( {+Controls}, +?Chars, ?+Document ) parses Chars to/from a data |
|---|
| 62 | * structure of the form xml(<atts>, <content>). <atts> is a list of |
|---|
| 63 | * <atom>=<string> attributes from the (possibly implicit) XML signature of the |
|---|
| 64 | * document. <content> is a (possibly empty) list comprising occurrences of : |
|---|
| 65 | * |
|---|
| 66 | * pcdata(<string>) : Text |
|---|
| 67 | * comment(<string>) : An xml comment; |
|---|
| 68 | * element(<tag>,<atts>,<content>) : <tag>..</tag> encloses <content> |
|---|
| 69 | * : <tag /> if empty |
|---|
| 70 | * instructions(<atom>, <string>) : Processing <? <atom> <params> ?>" |
|---|
| 71 | * cdata( <string> ) : <![CDATA[ <string> ]]> |
|---|
| 72 | * doctype(<atom>, <doctype id>) : DTD <!DOCTYPE .. > |
|---|
| 73 | * |
|---|
| 74 | * The conversions are not completely symmetrical, in that weaker XML is |
|---|
| 75 | * accepted than can be generated. Specifically, in-bound (Chars -> Document) |
|---|
| 76 | * does not require strictly well-formed XML. Document is instantiated to the |
|---|
| 77 | * term malformed(Attributes, Content) if Chars does not represent well-formed |
|---|
| 78 | * XML. The Content of a malformed/2 structure can contain: |
|---|
| 79 | * |
|---|
| 80 | * unparsed( <string> ) : Text which has not been parsed |
|---|
| 81 | * out_of_context( <tag> ) : <tag> is not closed |
|---|
| 82 | * |
|---|
| 83 | * in addition to the standard term types. |
|---|
| 84 | * |
|---|
| 85 | * Out-bound (Document -> Chars) parsing _does_ require that Document defines |
|---|
| 86 | * strictly well-formed XML. If an error is detected a 'domain' exception is |
|---|
| 87 | * raised. |
|---|
| 88 | * |
|---|
| 89 | * The domain exception will attempt to identify the particular sub-term in |
|---|
| 90 | * error and the message will show a list of its ancestor elements in the form |
|---|
| 91 | * <tag>{(id)}* where <id> is the value of any attribute _named_ id. |
|---|
| 92 | * |
|---|
| 93 | * At this release, the Controls applying to in-bound (Chars -> Document) |
|---|
| 94 | * parsing are: |
|---|
| 95 | * |
|---|
| 96 | * extended_characters(<bool>) : Use the extended character |
|---|
| 97 | * : entities for XHTML (default true) |
|---|
| 98 | * |
|---|
| 99 | * format(<bool>) : Strip layouts when no character data |
|---|
| 100 | * : appears between elements. |
|---|
| 101 | * : (default true) |
|---|
| 102 | * |
|---|
| 103 | * remove_attribute_prefixes(<bool>) : Remove namespace prefixes from |
|---|
| 104 | * : attributes when it's the same as the |
|---|
| 105 | * : prefix of the parent element |
|---|
| 106 | * : (default false). |
|---|
| 107 | * |
|---|
| 108 | * allow_ampersand(<bool>) : Allow unescaped ampersand |
|---|
| 109 | * : characters (&) to occur in PCDATA. |
|---|
| 110 | * : (default false). |
|---|
| 111 | * |
|---|
| 112 | * [<bool> is one of 'true' or 'false'] |
|---|
| 113 | * |
|---|
| 114 | * For out-bound (Document -> Chars) parsing, the only available option is: |
|---|
| 115 | * |
|---|
| 116 | * format(<Bool>) : Indent the element content |
|---|
| 117 | * : (default true) |
|---|
| 118 | * |
|---|
| 119 | * Different DCGs for input and output are used because input parsing is |
|---|
| 120 | * more flexible than output parsing. Errors in input are recorded as part |
|---|
| 121 | * of the data structure. Output parsing throws an exception if the document |
|---|
| 122 | * is not well-formed, diagnosis tries to identify the specific culprit term. |
|---|
| 123 | */ |
|---|
| 124 | parse( Chars, Document ) :- |
|---|
| 125 | parse( [], Chars, Document ). |
|---|
| 126 | |
|---|
| 127 | parse( Controls, Chars, Document ) :- |
|---|
| 128 | ( ground( Chars ) -> |
|---|
| 129 | xml_to_document( Controls, Chars, Document ) |
|---|
| 130 | ; document_to_xml( Controls, Document, Chars ) |
|---|
| 131 | ). |
|---|
| 132 | |
|---|
| 133 | document_to_xml( Controls, Document, Chars ) :- |
|---|
| 134 | ( member( format(false), Controls ) -> |
|---|
| 135 | Format = false |
|---|
| 136 | ; Format = true |
|---|
| 137 | ), |
|---|
| 138 | ( ground( Document ), |
|---|
| 139 | document_generation(Format, Document, Chars0, [] ) -> |
|---|
| 140 | Chars = Chars0 |
|---|
| 141 | ; fault( Document, [], Culprit, Path, Message ), |
|---|
| 142 | throw( |
|---|
| 143 | application_error('XML Parse: ~s in ~q~nCulprit: ~q~nPath: ~s', |
|---|
| 144 | [Message,Document,Culprit,Path] ) |
|---|
| 145 | ) |
|---|
| 146 | ). |
|---|
| 147 | |
|---|
| 148 | /* subterm( +XMLTerm, ?Subterm ) unifies Subterm with a sub-term of Term. |
|---|
| 149 | * Note that XMLTerm is a sub-term of itself. |
|---|
| 150 | */ |
|---|
| 151 | subterm( Term, Term ). |
|---|
| 152 | subterm( xml(_Attributes, Content), Term ) :- |
|---|
| 153 | subterm( Content, Term ). |
|---|
| 154 | subterm( [H|T], Term ) :- |
|---|
| 155 | ( subterm( H, Term ) |
|---|
| 156 | ; subterm( T, Term ) |
|---|
| 157 | ). |
|---|
| 158 | subterm( element(_Name,_Attributes,Content), Term ) :- |
|---|
| 159 | subterm( Content, Term ). |
|---|
| 160 | subterm( namespace(_URI,_Prefix,Content), Term ) :- |
|---|
| 161 | subterm( Content, Term ). |
|---|
| 162 | |
|---|
| 163 | |
|---|
| 164 | /* xml_to_document( +Controls, +XML, ?Document ) translates the list of |
|---|
| 165 | * character codes XML into the Prolog term Document. Controls is a list |
|---|
| 166 | * of terms controlling the treatment of layout characters and character |
|---|
| 167 | * entities. |
|---|
| 168 | */ |
|---|
| 169 | |
|---|
| 170 | :- private(xml_to_document/3). |
|---|
| 171 | :- mode(xml_to_document(+nonvar, +nonvar, ?nonvar), zero_or_one). |
|---|
| 172 | :- info(xml_to_document/3, [ |
|---|
| 173 | comment is 'Translates the list of character codes XML into the Prolog term Document. Controls is a list of terms controlling the treatment of layout characters and character entities.', |
|---|
| 174 | argnames is ['Controls', 'XML', 'Document']]). |
|---|
| 175 | |
|---|
| 176 | xml_to_document( Controls, XML, Document ) :- |
|---|
| 177 | initial_context( Controls, Context ), |
|---|
| 178 | ( xml_declaration( Attributes0, XML, XML1 ) -> |
|---|
| 179 | Attributes = Attributes0 |
|---|
| 180 | ; otherwise -> |
|---|
| 181 | XML1 = XML, |
|---|
| 182 | Attributes = [] |
|---|
| 183 | ), |
|---|
| 184 | xml_to_document( XML1, Context, Terms, [], WellFormed ), |
|---|
| 185 | xml_to_document1( WellFormed, Attributes, Terms, Document ). |
|---|
| 186 | |
|---|
| 187 | xml_to_document1( true, Attributes, Terms, xml(Attributes, Terms) ). |
|---|
| 188 | xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ). |
|---|
| 189 | |
|---|
| 190 | % unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed ) |
|---|
| 191 | unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ). |
|---|
| 192 | |
|---|
| 193 | xml_declaration( Attributes ) --> |
|---|
| 194 | spaces, |
|---|
| 195 | "<?", |
|---|
| 196 | nmtoken( xml ), |
|---|
| 197 | xml_declaration_attributes( Attributes ), |
|---|
| 198 | spaces, |
|---|
| 199 | "?>". |
|---|
| 200 | |
|---|
| 201 | xml_to_document( [], Context, Terms, [], WF ) :- |
|---|
| 202 | close_context( Context, Terms, WF ). |
|---|
| 203 | xml_to_document( [Char|Chars], Context, Terms, Residue, WF ) :- |
|---|
| 204 | ( Char =:= "<" -> |
|---|
| 205 | markup_structure( Chars, Context, Terms, Residue, WF ) |
|---|
| 206 | ; Char =:= "&" -> |
|---|
| 207 | entity_reference( Chars, Context, Terms, Residue, WF ) |
|---|
| 208 | ; Char =< " ", |
|---|
| 209 | \+ space_preserve( Context ) -> |
|---|
| 210 | layouts( Chars, Context, [Char|T], T, Terms, Residue, WF ) |
|---|
| 211 | ; void_context( Context ) -> |
|---|
| 212 | unparsed( [Char|Chars], Context, Terms, Residue, WF ) |
|---|
| 213 | ; otherwise -> |
|---|
| 214 | Terms = [pcdata([Char|Chars1])|Terms1], |
|---|
| 215 | acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF ) |
|---|
| 216 | ). |
|---|
| 217 | |
|---|
| 218 | layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :- |
|---|
| 219 | close_context( Context, Terms, WF ). |
|---|
| 220 | layouts( [Char|Chars], Context, Plus, Minus, Terms, Residue, WF ) :- |
|---|
| 221 | ( Char =:= "<" -> |
|---|
| 222 | markup_structure( Chars, Context, Terms, Residue, WF ) |
|---|
| 223 | ; Char =:= "&" -> |
|---|
| 224 | reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF ) |
|---|
| 225 | ; Char =< " " -> |
|---|
| 226 | Minus = [Char|Minus1], |
|---|
| 227 | layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF ) |
|---|
| 228 | ; void_context( Context ) -> |
|---|
| 229 | unparsed( [Char|Chars], Context, Terms, Residue, WF ) |
|---|
| 230 | ; otherwise -> |
|---|
| 231 | Terms = [pcdata(Plus)|Terms1], |
|---|
| 232 | Minus = [Char|Chars1], |
|---|
| 233 | context_update( space_preserve, Context, true, Context1 ), |
|---|
| 234 | acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF ) |
|---|
| 235 | ). |
|---|
| 236 | |
|---|
| 237 | acquire_pcdata( [], Context, [], Terms, [], WF ) :- |
|---|
| 238 | close_context( Context, Terms, WF ). |
|---|
| 239 | acquire_pcdata( [Char|Chars], Context, Chars1, Terms, Residue, WF ) :- |
|---|
| 240 | ( Char =:= "<" -> |
|---|
| 241 | Chars1 = [], |
|---|
| 242 | markup_structure( Chars, Context, Terms, Residue, WF ) |
|---|
| 243 | ; Char =:= "&" -> |
|---|
| 244 | reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF ) |
|---|
| 245 | ; otherwise -> |
|---|
| 246 | Chars1 = [Char|Chars2], |
|---|
| 247 | acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF ) |
|---|
| 248 | ). |
|---|
| 249 | |
|---|
| 250 | markup_structure( [], Context, Terms, Residue, WF ) :- |
|---|
| 251 | unparsed( "<", Context, Terms, Residue, WF ). |
|---|
| 252 | markup_structure( Chars, Context, Terms, Residue, WF ) :- |
|---|
| 253 | Chars = [Char|Chars1], |
|---|
| 254 | ( Char =:= "/" -> |
|---|
| 255 | closing_tag( Context, Chars1, Terms, Residue, WF ) |
|---|
| 256 | ; Char =:= "?" -> |
|---|
| 257 | pi_acquisition( Chars1, Context, Terms, Residue, WF ) |
|---|
| 258 | ; Char =:= "!" -> |
|---|
| 259 | declaration_acquisition( Chars1, Context, Terms, Residue, WF ) |
|---|
| 260 | ; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) -> |
|---|
| 261 | push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF ) |
|---|
| 262 | ; otherwise -> |
|---|
| 263 | unparsed( [0'<|Chars], Context, Terms, Residue, WF ) %' |
|---|
| 264 | ). |
|---|
| 265 | |
|---|
| 266 | push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :- |
|---|
| 267 | new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0), |
|---|
| 268 | push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ). |
|---|
| 269 | |
|---|
| 270 | push_tag1( true, Context, Term, Chars, [Term|Terms], Residue, WF ) :- |
|---|
| 271 | xml_to_document( Chars, Context, Terms, Residue, WF ). |
|---|
| 272 | push_tag1( false, _Context, Term, Chars, [Term], Chars, false ). |
|---|
| 273 | |
|---|
| 274 | new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :- |
|---|
| 275 | namespace_attributes( Attributes0, Context, Context1, Attributes1 ), |
|---|
| 276 | ( append( NSChars, [0':|TagChars1], TagChars ), %' |
|---|
| 277 | specific_namespace( NSChars, Context1, SpecificNamespace ) -> |
|---|
| 278 | Namespace0 = SpecificNamespace |
|---|
| 279 | ; otherwise -> |
|---|
| 280 | NSChars = "", |
|---|
| 281 | TagChars1 = TagChars, |
|---|
| 282 | default_namespace( Context1, Namespace0 ) |
|---|
| 283 | ), |
|---|
| 284 | current_namespace( Context1, CurrentNamespace ), |
|---|
| 285 | ( Namespace0 == CurrentNamespace -> |
|---|
| 286 | Term = element(Tag, Attributes, Contents), |
|---|
| 287 | Context2 = Context1 |
|---|
| 288 | ; otherwise -> |
|---|
| 289 | Term = namespace( Namespace0, NSChars, |
|---|
| 290 | element(Tag, Attributes, Contents) |
|---|
| 291 | ), |
|---|
| 292 | context_update( current_namespace, Context1, Namespace0, Context2 ) |
|---|
| 293 | ), |
|---|
| 294 | input_attributes( Attributes1, Context2, Attributes ), |
|---|
| 295 | atom_codes( Tag, TagChars1 ), |
|---|
| 296 | close_tag( Type, Chars, Context2, Contents, Residue, WF ). |
|---|
| 297 | |
|---|
| 298 | close_tag( empty, Residue, _Context, [], Residue, true ). |
|---|
| 299 | close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :- |
|---|
| 300 | context_update( element, Context0, Tag, Context1 ), |
|---|
| 301 | xml_to_document( Chars, Context1, Contents, Residue, WF ). |
|---|
| 302 | |
|---|
| 303 | pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :- |
|---|
| 304 | ( inline_instruction(Target, Processing, Chars, Rest ), |
|---|
| 305 | Target \== xml -> |
|---|
| 306 | Terms = [instructions(Target, Processing)|Terms1], |
|---|
| 307 | xml_to_document( Rest, Context, Terms1, Residue, WellFormed ) |
|---|
| 308 | ; otherwise -> |
|---|
| 309 | unparsed( [0'<,0'?|Chars], Context, Terms, Residue, WellFormed ) |
|---|
| 310 | ). |
|---|
| 311 | |
|---|
| 312 | declaration_acquisition( Chars, Context, Terms, Residue, WF ) :- |
|---|
| 313 | ( declaration_type( Chars, Type, Chars1 ), |
|---|
| 314 | declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) -> |
|---|
| 315 | Terms = [Term|Terms1], |
|---|
| 316 | xml_to_document( Rest, Context1, Terms1, Residue, WF ) |
|---|
| 317 | ; otherwise -> |
|---|
| 318 | unparsed( [0'<,0'!|Chars], Context, Terms, Residue, WF ) |
|---|
| 319 | ). |
|---|
| 320 | |
|---|
| 321 | open_tag( Tag, Namespaces, Attributes, Termination ) --> |
|---|
| 322 | nmtoken_chars( Tag ), |
|---|
| 323 | attributes( Attributes, [], Namespaces ), |
|---|
| 324 | spaces, |
|---|
| 325 | open_tag_terminator( Tag, Termination ). |
|---|
| 326 | |
|---|
| 327 | open_tag_terminator( Tag, push(Tag) ) --> |
|---|
| 328 | ">". |
|---|
| 329 | open_tag_terminator( _Tag, empty ) --> |
|---|
| 330 | "/>". |
|---|
| 331 | |
|---|
| 332 | declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) --> |
|---|
| 333 | comment(Comment). |
|---|
| 334 | declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) --> |
|---|
| 335 | cdata( CData ). |
|---|
| 336 | declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) --> |
|---|
| 337 | doctype( Name, Names, Namespaces0, Namespaces ), |
|---|
| 338 | spaces, |
|---|
| 339 | ">". |
|---|
| 340 | |
|---|
| 341 | inline_instruction( Target, Processing, Plus, Minus ) :- |
|---|
| 342 | nmtoken(Target, Plus, Mid0 ), |
|---|
| 343 | spaces( Mid0, Mid1 ), |
|---|
| 344 | append( Processing, [0'?,0'>|Minus], Mid1 ), |
|---|
| 345 | !. |
|---|
| 346 | |
|---|
| 347 | entity_reference_name( Reference ) --> |
|---|
| 348 | nmtoken_chars( Reference ), |
|---|
| 349 | ";". |
|---|
| 350 | |
|---|
| 351 | declaration_type( [Char1,Char2|Chars1], Class, Rest ) :- |
|---|
| 352 | Chars = [Char1,Char2|Chars1], |
|---|
| 353 | ( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) -> |
|---|
| 354 | Class = Class0, |
|---|
| 355 | Rest = Residue |
|---|
| 356 | ; otherwise -> |
|---|
| 357 | Class = generic, |
|---|
| 358 | Rest = Chars |
|---|
| 359 | ). |
|---|
| 360 | |
|---|
| 361 | declaration_type1( 0'-, 0'-, Chars, comment, Chars ). |
|---|
| 362 | declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :- |
|---|
| 363 | append( "DATA[", Residue, Chars ). |
|---|
| 364 | declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :- |
|---|
| 365 | append( "CTYPE", Residue, Chars ). |
|---|
| 366 | |
|---|
| 367 | closing_tag( Context, Chars, Terms, Residue, WellFormed ) :- |
|---|
| 368 | ( closing_tag_name( Tag, Chars, Rest ), |
|---|
| 369 | current_tag( Context, Tag ) -> |
|---|
| 370 | Terms = [], |
|---|
| 371 | Residue = Rest, |
|---|
| 372 | WellFormed = true |
|---|
| 373 | ; otherwise -> |
|---|
| 374 | unparsed( [0'<,0'/|Chars], Context, Terms, Residue, WellFormed ) |
|---|
| 375 | ). |
|---|
| 376 | |
|---|
| 377 | closing_tag_name( Tag ) --> |
|---|
| 378 | nmtoken_chars( Tag ), |
|---|
| 379 | spaces, |
|---|
| 380 | ">". |
|---|
| 381 | |
|---|
| 382 | entity_reference( Chars, Context, Terms, Residue, WF ) :- |
|---|
| 383 | reference_in_layout( Chars, Context, L, L, Terms, Residue, WF ). |
|---|
| 384 | |
|---|
| 385 | reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF ) :- |
|---|
| 386 | ( standard_character_entity( Char, Chars, Rest ) -> |
|---|
| 387 | Minus = [Char|Chars1], |
|---|
| 388 | Terms = [pcdata(Plus)|Terms1], |
|---|
| 389 | acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF ) |
|---|
| 390 | ; entity_reference_name( Reference, Chars, Rest ), |
|---|
| 391 | defined_entity( Reference, Context, String ) -> |
|---|
| 392 | append( String, Rest, Full ), |
|---|
| 393 | xml_to_document( Full, Context, Terms, Residue, WF ) |
|---|
| 394 | ; allow_ampersand( Context ) -> |
|---|
| 395 | Minus = [0'&|Chars1], %' |
|---|
| 396 | Terms = [pcdata(Plus)|Terms1], |
|---|
| 397 | acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF ) |
|---|
| 398 | ; otherwise -> |
|---|
| 399 | unparsed( [0'&|Chars], Context, Terms, Residue, WF ) %' |
|---|
| 400 | ). |
|---|
| 401 | |
|---|
| 402 | reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :- |
|---|
| 403 | ( standard_character_entity( Char, Chars0, Rest ) -> |
|---|
| 404 | Chars1 = [Char|Chars2], |
|---|
| 405 | acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF ) |
|---|
| 406 | ; entity_reference_name( Reference, Chars0, Rest ), |
|---|
| 407 | defined_entity( Reference, Context, String ) -> |
|---|
| 408 | append( String, Rest, Full ), |
|---|
| 409 | acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF ) |
|---|
| 410 | ; allow_ampersand( Context ) -> |
|---|
| 411 | Chars1 = [0'&|Chars2], |
|---|
| 412 | acquire_pcdata( Chars0, Context, Chars2, Terms, Residue, WF ) |
|---|
| 413 | ; otherwise -> |
|---|
| 414 | Chars1 = [], |
|---|
| 415 | unparsed( [0'&|Chars0], Context, Terms, Residue, WF ) |
|---|
| 416 | ). |
|---|
| 417 | |
|---|
| 418 | namespace_attributes( [], Context, Context, [] ). |
|---|
| 419 | namespace_attributes( Attributes0, Context0, Context, Attributes ) :- |
|---|
| 420 | Attributes0 = [_|_], |
|---|
| 421 | append( "xmlns:", Unqualified, QualifiedNameChars ), |
|---|
| 422 | ( select( "xmlns"=Value, Attributes0, Attributes1 ) -> |
|---|
| 423 | atom_codes( URI, Value ), |
|---|
| 424 | context_update( default_namespace, Context0, URI, Context1 ), |
|---|
| 425 | namespace_attributes( Attributes1, Context1, Context, Attributes ) |
|---|
| 426 | ; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) -> |
|---|
| 427 | Attributes = [QualifiedNameChars=Value|Attributes2], |
|---|
| 428 | atom_codes( URI, Value ), |
|---|
| 429 | context_update( ns_prefix(Unqualified), Context0, URI, Context1 ), |
|---|
| 430 | namespace_attributes( Attributes1, Context1, Context, Attributes2 ) |
|---|
| 431 | ; member( "xml:space"="preserve", Attributes0 ) -> |
|---|
| 432 | Attributes = Attributes0, |
|---|
| 433 | context_update( space_preserve, Context0, true, Context ) |
|---|
| 434 | ; otherwise -> |
|---|
| 435 | Context = Context0, |
|---|
| 436 | Attributes = Attributes0 |
|---|
| 437 | ). |
|---|
| 438 | |
|---|
| 439 | input_attributes( [], _Context, [] ). |
|---|
| 440 | input_attributes( [NameChars=Value|Attributes0], Context, |
|---|
| 441 | [Name=Value|Attributes] ) :- |
|---|
| 442 | ( remove_attribute_prefixes( Context ), |
|---|
| 443 | append( NSChars, [0':|NameChars1], NameChars ), %' |
|---|
| 444 | NSChars \== "xmlns", |
|---|
| 445 | specific_namespace( NSChars, Context, Namespace ), |
|---|
| 446 | current_namespace( Context, Namespace ) -> |
|---|
| 447 | atom_codes( Name, NameChars1 ) |
|---|
| 448 | ; otherwise -> |
|---|
| 449 | atom_codes( Name, NameChars ) |
|---|
| 450 | ), |
|---|
| 451 | input_attributes( Attributes0, Context, Attributes ). |
|---|
| 452 | |
|---|
| 453 | attributes( [Name=Value|Attributes], Seen, Namespaces ) --> |
|---|
| 454 | spaces, |
|---|
| 455 | nmtoken_chars( Name ), |
|---|
| 456 | {\+ member(Name, Seen)}, |
|---|
| 457 | spaces, |
|---|
| 458 | "=", |
|---|
| 459 | spaces, |
|---|
| 460 | attribute_value( Value, Namespaces ), |
|---|
| 461 | attributes( Attributes, [Name|Seen], Namespaces ). |
|---|
| 462 | attributes( [], _Seen, _Namespaces ) --> "". |
|---|
| 463 | |
|---|
| 464 | xml_declaration_attributes( [] ) --> "". |
|---|
| 465 | xml_declaration_attributes( [Name=Value|Attributes] ) --> |
|---|
| 466 | spaces, |
|---|
| 467 | nmtoken( Name ), |
|---|
| 468 | spaces, |
|---|
| 469 | "=", |
|---|
| 470 | spaces, |
|---|
| 471 | xml_string( Value ), |
|---|
| 472 | {xml_declaration_attribute_valid(Name, Value)}, |
|---|
| 473 | xml_declaration_attributes( Attributes ), |
|---|
| 474 | spaces. |
|---|
| 475 | |
|---|
| 476 | doctype( Name, External, Namespaces0, Namespaces1 ) --> |
|---|
| 477 | spaces, |
|---|
| 478 | nmtoken( Name ), |
|---|
| 479 | spaces, |
|---|
| 480 | doctype_id( External0 ), |
|---|
| 481 | spaces, |
|---|
| 482 | doctype1( Namespaces0, Literals, Namespaces1 ), |
|---|
| 483 | {doctype_extension(Literals, External0, External)}. |
|---|
| 484 | |
|---|
| 485 | doctype_extension( [], External, External ). |
|---|
| 486 | doctype_extension( [Literal|Literals], External0, External ) :- |
|---|
| 487 | extended_doctype( External0, [Literal|Literals], External ). |
|---|
| 488 | |
|---|
| 489 | extended_doctype( system(URL), Literals, system(URL,Literals) ). |
|---|
| 490 | extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ). |
|---|
| 491 | extended_doctype( local, Literals, local(Literals) ). |
|---|
| 492 | |
|---|
| 493 | doctype1( Namespaces0, Literals, Namespaces1 ) --> |
|---|
| 494 | "[", |
|---|
| 495 | !, |
|---|
| 496 | dtd( Namespaces0, Literals, Namespaces1 ), |
|---|
| 497 | "]". |
|---|
| 498 | doctype1( Namespaces, [], Namespaces ) --> "". |
|---|
| 499 | |
|---|
| 500 | doctype_id( system(URL) ) --> |
|---|
| 501 | "SYSTEM", |
|---|
| 502 | spaces, |
|---|
| 503 | uri( URL ). |
|---|
| 504 | doctype_id( public(URN,URL) ) --> |
|---|
| 505 | "PUBLIC", |
|---|
| 506 | spaces, |
|---|
| 507 | uri( URN ), |
|---|
| 508 | spaces, |
|---|
| 509 | uri( URL ). |
|---|
| 510 | doctype_id( local ) --> "". |
|---|
| 511 | |
|---|
| 512 | dtd( Namespaces0, Literals, Namespaces1 ) --> |
|---|
| 513 | spaces, |
|---|
| 514 | "<!ENTITY", |
|---|
| 515 | !, |
|---|
| 516 | spaces, |
|---|
| 517 | nmtoken_chars( Name ), |
|---|
| 518 | spaces, |
|---|
| 519 | quote( Quote ), |
|---|
| 520 | entity_value( Quote, Namespaces0, String ), |
|---|
| 521 | spaces, |
|---|
| 522 | ">", |
|---|
| 523 | {\+ character_entity( Name, _StandardChar ), |
|---|
| 524 | % Don't allow < "e; etc. to be updated |
|---|
| 525 | context_update( entity(Name), Namespaces0, String, Namespaces2 ) |
|---|
| 526 | }, |
|---|
| 527 | dtd( Namespaces2, Literals, Namespaces1 ). |
|---|
| 528 | dtd( Namespaces0, Literals, Namespaces1 ) --> |
|---|
| 529 | spaces, |
|---|
| 530 | "<!--", |
|---|
| 531 | !, |
|---|
| 532 | dtd_comment, |
|---|
| 533 | ">", |
|---|
| 534 | dtd( Namespaces0, Literals, Namespaces1 ). |
|---|
| 535 | dtd( Namespaces0, [dtd_literal(Literal)|Literals], Namespaces1 ) --> |
|---|
| 536 | spaces, |
|---|
| 537 | "<!", |
|---|
| 538 | !, |
|---|
| 539 | dtd_literal( Literal ), |
|---|
| 540 | dtd( Namespaces0, Literals, Namespaces1 ). |
|---|
| 541 | dtd( Namespaces, [], Namespaces ) --> spaces. |
|---|
| 542 | |
|---|
| 543 | dtd_literal( [] ) --> ">", !. |
|---|
| 544 | dtd_literal( Chars ) --> |
|---|
| 545 | "--", |
|---|
| 546 | !, |
|---|
| 547 | dtd_comment, |
|---|
| 548 | dtd_literal( Chars ). |
|---|
| 549 | dtd_literal( [Char|Chars] ) --> |
|---|
| 550 | [Char], |
|---|
| 551 | dtd_literal( Chars ). |
|---|
| 552 | |
|---|
| 553 | dtd_comment( Plus, Minus ) :- |
|---|
| 554 | append( _Chars, [0'-,0'-|Minus], Plus ), |
|---|
| 555 | !. |
|---|
| 556 | |
|---|
| 557 | nmtokens( [Name|Names] ) --> |
|---|
| 558 | spaces, |
|---|
| 559 | nmtoken( Name ), |
|---|
| 560 | nmtokens( Names ). |
|---|
| 561 | nmtokens( [] ) --> []. |
|---|
| 562 | |
|---|
| 563 | entity_value( Quote, Namespaces, String, [Char|Plus], Minus ) :- |
|---|
| 564 | ( Char == Quote -> |
|---|
| 565 | String = [], |
|---|
| 566 | Minus = Plus |
|---|
| 567 | ; Char =:= "&" -> |
|---|
| 568 | reference_in_entity( Namespaces, Quote, String, Plus, Minus ) |
|---|
| 569 | ; otherwise -> |
|---|
| 570 | String = [Char|String1], |
|---|
| 571 | entity_value( Quote, Namespaces, String1, Plus, Minus ) |
|---|
| 572 | ). |
|---|
| 573 | |
|---|
| 574 | attribute_value( String, Namespaces ) --> |
|---|
| 575 | quote( Quote ), |
|---|
| 576 | attribute_leading_layouts( Quote, Namespaces, String ). |
|---|
| 577 | |
|---|
| 578 | attribute_leading_layouts( _Quote, _Namespace, [], [], [] ). |
|---|
| 579 | attribute_leading_layouts( Quote, Namespaces, String, [Char|Plus], Minus ) :- |
|---|
| 580 | ( Char == Quote -> |
|---|
| 581 | String = [], |
|---|
| 582 | Minus = Plus |
|---|
| 583 | ; Char =:= "&" -> |
|---|
| 584 | ref_in_attribute_layout( Namespaces, Quote, String, Plus, Minus ) |
|---|
| 585 | ; Char > 32, Char \== 160 -> |
|---|
| 586 | String = [Char|String1], |
|---|
| 587 | attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus ) |
|---|
| 588 | ; otherwise -> |
|---|
| 589 | attribute_leading_layouts( Quote, Namespaces, String, Plus, Minus ) |
|---|
| 590 | ). |
|---|
| 591 | |
|---|
| 592 | attribute_layouts( _Quote, _Namespaces, _Layout, [], [], [] ). |
|---|
| 593 | attribute_layouts( Quote, Namespaces, Layout, String, [Char|Plus], Minus ) :- |
|---|
| 594 | ( Char == Quote -> |
|---|
| 595 | String = [], |
|---|
| 596 | Minus = Plus |
|---|
| 597 | ; Char =:= "&" -> |
|---|
| 598 | reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) |
|---|
| 599 | ; Char > 32, Char \== 160 -> |
|---|
| 600 | ( Layout == true -> |
|---|
| 601 | String = [0' ,Char|String1] %' |
|---|
| 602 | ; otherwise -> |
|---|
| 603 | String = [Char|String1] |
|---|
| 604 | ), |
|---|
| 605 | attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus ) |
|---|
| 606 | ; otherwise -> |
|---|
| 607 | attribute_layouts( Quote, Namespaces, true, String, Plus, Minus ) |
|---|
| 608 | ). |
|---|
| 609 | |
|---|
| 610 | ref_in_attribute_layout( NS, Quote, String, Plus, Minus ) :- |
|---|
| 611 | ( standard_character_entity( Char, Plus, Mid ) -> |
|---|
| 612 | String = [Char|String1], |
|---|
| 613 | attribute_layouts( Quote, NS, false, String1, Mid, Minus ) |
|---|
| 614 | ; entity_reference_name( Name, Plus, Suffix ), |
|---|
| 615 | defined_entity( Name, NS, Text ) -> |
|---|
| 616 | append( Text, Suffix, Mid ), |
|---|
| 617 | attribute_leading_layouts( Quote, NS, String, Mid, Minus ) |
|---|
| 618 | ; otherwise -> % Just & is okay in a value |
|---|
| 619 | String = [0'&|String1], %' |
|---|
| 620 | attribute_layouts( Quote, NS, false, String1, Plus, Minus ) |
|---|
| 621 | ). |
|---|
| 622 | |
|---|
| 623 | reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) :- |
|---|
| 624 | ( standard_character_entity( Char, Plus, Mid ) -> |
|---|
| 625 | ( Layout == true -> |
|---|
| 626 | String = [0' ,Char|String1] %' |
|---|
| 627 | ; otherwise -> |
|---|
| 628 | String = [Char|String1] |
|---|
| 629 | ), |
|---|
| 630 | Layout1 = false |
|---|
| 631 | ; entity_reference_name( Name, Plus, Suffix ), |
|---|
| 632 | defined_entity( Name, Namespaces, Text ) -> |
|---|
| 633 | String = String1, |
|---|
| 634 | append( Text, Suffix, Mid ), |
|---|
| 635 | Layout1 = Layout |
|---|
| 636 | ; otherwise -> % Just & is okay in a value |
|---|
| 637 | Mid = Plus, |
|---|
| 638 | String = [0'&|String1], %' |
|---|
| 639 | Layout1 = false |
|---|
| 640 | ), |
|---|
| 641 | attribute_layouts( Quote, Namespaces, Layout1, String1, Mid, Minus ). |
|---|
| 642 | |
|---|
| 643 | /* References are resolved backwards in Entity defintions so that |
|---|
| 644 | * circularity is avoided. |
|---|
| 645 | */ |
|---|
| 646 | reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :- |
|---|
| 647 | ( standard_character_entity( _SomeChar, Plus, _Rest ) -> |
|---|
| 648 | String = [0'&|String1], % ' Character entities are unparsed |
|---|
| 649 | Mid = Plus |
|---|
| 650 | ; entity_reference_name( Name, Plus, Suffix ), |
|---|
| 651 | defined_entity( Name, Namespaces, Text ) -> |
|---|
| 652 | String = String1, |
|---|
| 653 | append( Text, Suffix, Mid ) |
|---|
| 654 | ), |
|---|
| 655 | entity_value( Quote, Namespaces, String1, Mid, Minus ). |
|---|
| 656 | |
|---|
| 657 | standard_character_entity( Char ) --> |
|---|
| 658 | "#x", hex_character_reference( Char ), ";". |
|---|
| 659 | standard_character_entity( Char ) --> |
|---|
| 660 | "#", digit( Digit ), digits( Digits ), ";", |
|---|
| 661 | {number_chars( Char, [Digit|Digits])}. |
|---|
| 662 | standard_character_entity( C ) --> |
|---|
| 663 | chars( String ), |
|---|
| 664 | ";", |
|---|
| 665 | !, |
|---|
| 666 | {character_entity(String, C)}. |
|---|
| 667 | |
|---|
| 668 | uri( URI ) --> |
|---|
| 669 | quote( Quote ), |
|---|
| 670 | uri1( Quote, URI ). |
|---|
| 671 | |
|---|
| 672 | uri1( Quote, [] ) --> |
|---|
| 673 | quote( Quote ), |
|---|
| 674 | !. |
|---|
| 675 | uri1( Quote, [Char|Chars] ) --> |
|---|
| 676 | [Char], |
|---|
| 677 | uri1( Quote, Chars ). |
|---|
| 678 | |
|---|
| 679 | comment( Chars, Plus, Minus ) :- |
|---|
| 680 | append( Chars, [0'-,0'-,0'>|Minus], Plus ), %' |
|---|
| 681 | !. |
|---|
| 682 | |
|---|
| 683 | cdata( Chars, Plus, Minus ) :- |
|---|
| 684 | append( Chars, [0'],0'],0'>|Minus], Plus ), %' |
|---|
| 685 | !. |
|---|
| 686 | % Syntax Components |
|---|
| 687 | |
|---|
| 688 | hex_character_reference( Code ) --> |
|---|
| 689 | hex_character_reference1( 0, Code ). |
|---|
| 690 | |
|---|
| 691 | hex_character_reference1( Current, Code ) --> |
|---|
| 692 | hex_digit_char( Value ), |
|---|
| 693 | !, |
|---|
| 694 | {New is (Current << 4) + Value}, |
|---|
| 695 | hex_character_reference1( New, Code ). |
|---|
| 696 | hex_character_reference1( Code, Code ) --> "". |
|---|
| 697 | |
|---|
| 698 | hex_digit_char( 0 ) --> "0". |
|---|
| 699 | hex_digit_char( 1 ) --> "1". |
|---|
| 700 | hex_digit_char( 2 ) --> "2". |
|---|
| 701 | hex_digit_char( 3 ) --> "3". |
|---|
| 702 | hex_digit_char( 4 ) --> "4". |
|---|
| 703 | hex_digit_char( 5 ) --> "5". |
|---|
| 704 | hex_digit_char( 6 ) --> "6". |
|---|
| 705 | hex_digit_char( 7 ) --> "7". |
|---|
| 706 | hex_digit_char( 8 ) --> "8". |
|---|
| 707 | hex_digit_char( 9 ) --> "9". |
|---|
| 708 | hex_digit_char( 10 ) --> "A". |
|---|
| 709 | hex_digit_char( 11 ) --> "B". |
|---|
| 710 | hex_digit_char( 12 ) --> "C". |
|---|
| 711 | hex_digit_char( 13 ) --> "D". |
|---|
| 712 | hex_digit_char( 14 ) --> "E". |
|---|
| 713 | hex_digit_char( 15 ) --> "F". |
|---|
| 714 | hex_digit_char( 10 ) --> "a". |
|---|
| 715 | hex_digit_char( 11 ) --> "b". |
|---|
| 716 | hex_digit_char( 12 ) --> "c". |
|---|
| 717 | hex_digit_char( 13 ) --> "d". |
|---|
| 718 | hex_digit_char( 14 ) --> "e". |
|---|
| 719 | hex_digit_char( 15 ) --> "f". |
|---|
| 720 | |
|---|
| 721 | quote( 0'" ) --> %' |
|---|
| 722 | """". |
|---|
| 723 | quote( 0'' ) --> |
|---|
| 724 | "'". |
|---|
| 725 | |
|---|
| 726 | spaces( [], [] ). |
|---|
| 727 | spaces( [Char|Chars0], Chars1 ) :- |
|---|
| 728 | ( Char =< 32 -> |
|---|
| 729 | spaces( Chars0, Chars1 ) |
|---|
| 730 | ; otherwise -> |
|---|
| 731 | Chars1 = [Char|Chars0] |
|---|
| 732 | ). |
|---|
| 733 | |
|---|
| 734 | nmtoken( Name ) --> |
|---|
| 735 | nmtoken_chars( Chars ), |
|---|
| 736 | {atom_codes(Name, Chars)}. |
|---|
| 737 | |
|---|
| 738 | nmtoken_chars( [Char|Chars] ) --> |
|---|
| 739 | [Char], |
|---|
| 740 | {nmtoken_first( Char )}, |
|---|
| 741 | nmtoken_chars_tail( Chars ). |
|---|
| 742 | |
|---|
| 743 | nmtoken_chars_tail( [Char|Chars] ) --> |
|---|
| 744 | [Char], |
|---|
| 745 | {nmtoken_char(Char)}, |
|---|
| 746 | !, |
|---|
| 747 | nmtoken_chars_tail( Chars ). |
|---|
| 748 | nmtoken_chars_tail([]) --> "". |
|---|
| 749 | |
|---|
| 750 | nmtoken_first( 0': ). |
|---|
| 751 | nmtoken_first( 0'_ ). |
|---|
| 752 | nmtoken_first( Char ) :- |
|---|
| 753 | alphabet( Char ). |
|---|
| 754 | |
|---|
| 755 | nmtoken_char( 0'a ). |
|---|
| 756 | nmtoken_char( 0'b ). |
|---|
| 757 | nmtoken_char( 0'c ). |
|---|
| 758 | nmtoken_char( 0'd ). |
|---|
| 759 | nmtoken_char( 0'e ). |
|---|
| 760 | nmtoken_char( 0'f ). |
|---|
| 761 | nmtoken_char( 0'g ). |
|---|
| 762 | nmtoken_char( 0'h ). |
|---|
| 763 | nmtoken_char( 0'i ). |
|---|
| 764 | nmtoken_char( 0'j ). |
|---|
| 765 | nmtoken_char( 0'k ). |
|---|
| 766 | nmtoken_char( 0'l ). |
|---|
| 767 | nmtoken_char( 0'm ). |
|---|
| 768 | nmtoken_char( 0'n ). |
|---|
| 769 | nmtoken_char( 0'o ). |
|---|
| 770 | nmtoken_char( 0'p ). |
|---|
| 771 | nmtoken_char( 0'q ). |
|---|
| 772 | nmtoken_char( 0'r ). |
|---|
| 773 | nmtoken_char( 0's ). |
|---|
| 774 | nmtoken_char( 0't ). |
|---|
| 775 | nmtoken_char( 0'u ). |
|---|
| 776 | nmtoken_char( 0'v ). |
|---|
| 777 | nmtoken_char( 0'w ). |
|---|
| 778 | nmtoken_char( 0'x ). |
|---|
| 779 | nmtoken_char( 0'y ). |
|---|
| 780 | nmtoken_char( 0'z ). |
|---|
| 781 | nmtoken_char( 0'A ). |
|---|
| 782 | nmtoken_char( 0'B ). |
|---|
| 783 | nmtoken_char( 0'C ). |
|---|
| 784 | nmtoken_char( 0'D ). |
|---|
| 785 | nmtoken_char( 0'E ). |
|---|
| 786 | nmtoken_char( 0'F ). |
|---|
| 787 | nmtoken_char( 0'G ). |
|---|
| 788 | nmtoken_char( 0'H ). |
|---|
| 789 | nmtoken_char( 0'I ). |
|---|
| 790 | nmtoken_char( 0'J ). |
|---|
| 791 | nmtoken_char( 0'K ). |
|---|
| 792 | nmtoken_char( 0'L ). |
|---|
| 793 | nmtoken_char( 0'M ). |
|---|
| 794 | nmtoken_char( 0'N ). |
|---|
| 795 | nmtoken_char( 0'O ). |
|---|
| 796 | nmtoken_char( 0'P ). |
|---|
| 797 | nmtoken_char( 0'Q ). |
|---|
| 798 | nmtoken_char( 0'R ). |
|---|
| 799 | nmtoken_char( 0'S ). |
|---|
| 800 | nmtoken_char( 0'T ). |
|---|
| 801 | nmtoken_char( 0'U ). |
|---|
| 802 | nmtoken_char( 0'V ). |
|---|
| 803 | nmtoken_char( 0'W ). |
|---|
| 804 | nmtoken_char( 0'X ). |
|---|
| 805 | nmtoken_char( 0'Y ). |
|---|
| 806 | nmtoken_char( 0'Z ). |
|---|
| 807 | nmtoken_char( 0'0 ). |
|---|
| 808 | nmtoken_char( 0'1 ). |
|---|
| 809 | nmtoken_char( 0'2 ). |
|---|
| 810 | nmtoken_char( 0'3 ). |
|---|
| 811 | nmtoken_char( 0'4 ). |
|---|
| 812 | nmtoken_char( 0'5 ). |
|---|
| 813 | nmtoken_char( 0'6 ). |
|---|
| 814 | nmtoken_char( 0'7 ). |
|---|
| 815 | nmtoken_char( 0'8 ). |
|---|
| 816 | nmtoken_char( 0'9 ). |
|---|
| 817 | nmtoken_char( 0'. ). |
|---|
| 818 | nmtoken_char( 0'- ). |
|---|
| 819 | nmtoken_char( 0'_ ). |
|---|
| 820 | nmtoken_char( 0': ). |
|---|
| 821 | |
|---|
| 822 | xml_string( String ) --> |
|---|
| 823 | quote( Quote ), |
|---|
| 824 | xml_string1( Quote, String ). |
|---|
| 825 | |
|---|
| 826 | xml_string1( Quote, [] ) --> |
|---|
| 827 | quote( Quote ), |
|---|
| 828 | !. |
|---|
| 829 | xml_string1( Quote, [Char|Chars] ) --> |
|---|
| 830 | [Char], |
|---|
| 831 | xml_string1( Quote, Chars ). |
|---|
| 832 | |
|---|
| 833 | alphabet( 0'a ). |
|---|
| 834 | alphabet( 0'b ). |
|---|
| 835 | alphabet( 0'c ). |
|---|
| 836 | alphabet( 0'd ). |
|---|
| 837 | alphabet( 0'e ). |
|---|
| 838 | alphabet( 0'f ). |
|---|
| 839 | alphabet( 0'g ). |
|---|
| 840 | alphabet( 0'h ). |
|---|
| 841 | alphabet( 0'i ). |
|---|
| 842 | alphabet( 0'j ). |
|---|
| 843 | alphabet( 0'k ). |
|---|
| 844 | alphabet( 0'l ). |
|---|
| 845 | alphabet( 0'm ). |
|---|
| 846 | alphabet( 0'n ). |
|---|
| 847 | alphabet( 0'o ). |
|---|
| 848 | alphabet( 0'p ). |
|---|
| 849 | alphabet( 0'q ). |
|---|
| 850 | alphabet( 0'r ). |
|---|
| 851 | alphabet( 0's ). |
|---|
| 852 | alphabet( 0't ). |
|---|
| 853 | alphabet( 0'u ). |
|---|
| 854 | alphabet( 0'v ). |
|---|
| 855 | alphabet( 0'w ). |
|---|
| 856 | alphabet( 0'x ). |
|---|
| 857 | alphabet( 0'y ). |
|---|
| 858 | alphabet( 0'z ). |
|---|
| 859 | alphabet( 0'A ). |
|---|
| 860 | alphabet( 0'B ). |
|---|
| 861 | alphabet( 0'C ). |
|---|
| 862 | alphabet( 0'D ). |
|---|
| 863 | alphabet( 0'E ). |
|---|
| 864 | alphabet( 0'F ). |
|---|
| 865 | alphabet( 0'G ). |
|---|
| 866 | alphabet( 0'H ). |
|---|
| 867 | alphabet( 0'I ). |
|---|
| 868 | alphabet( 0'J ). |
|---|
| 869 | alphabet( 0'K ). |
|---|
| 870 | alphabet( 0'L ). |
|---|
| 871 | alphabet( 0'M ). |
|---|
| 872 | alphabet( 0'N ). |
|---|
| 873 | alphabet( 0'O ). |
|---|
| 874 | alphabet( 0'P ). |
|---|
| 875 | alphabet( 0'Q ). |
|---|
| 876 | alphabet( 0'R ). |
|---|
| 877 | alphabet( 0'S ). |
|---|
| 878 | alphabet( 0'T ). |
|---|
| 879 | alphabet( 0'U ). |
|---|
| 880 | alphabet( 0'V ). |
|---|
| 881 | alphabet( 0'W ). |
|---|
| 882 | alphabet( 0'X ). |
|---|
| 883 | alphabet( 0'Y ). |
|---|
| 884 | alphabet( 0'Z ). |
|---|
| 885 | |
|---|
| 886 | digit( C ) --> [C], {digit_table( C )}. |
|---|
| 887 | |
|---|
| 888 | digit_table( 0'0 ). |
|---|
| 889 | digit_table( 0'1 ). |
|---|
| 890 | digit_table( 0'2 ). |
|---|
| 891 | digit_table( 0'3 ). |
|---|
| 892 | digit_table( 0'4 ). |
|---|
| 893 | digit_table( 0'5 ). |
|---|
| 894 | digit_table( 0'6 ). |
|---|
| 895 | digit_table( 0'7 ). |
|---|
| 896 | digit_table( 0'8 ). |
|---|
| 897 | digit_table( 0'9 ). |
|---|
| 898 | |
|---|
| 899 | digits( [Digit|Digits] ) --> |
|---|
| 900 | digit( Digit ), |
|---|
| 901 | digits( Digits ). |
|---|
| 902 | digits( [] ) --> []. |
|---|
| 903 | |
|---|
| 904 | character_entity( "quot", 0'" ). %' |
|---|
| 905 | character_entity( "amp", 0'& ). %' |
|---|
| 906 | character_entity( "lt", 0'< ). %' |
|---|
| 907 | character_entity( "gt", 0'> ). %' |
|---|
| 908 | character_entity( "apos", 0'' ). |
|---|
| 909 | |
|---|
| 910 | /* pp( +XMLDocument ) "pretty prints" XMLDocument on the current |
|---|
| 911 | * output stream. |
|---|
| 912 | */ |
|---|
| 913 | pp( xml(Attributes, Document) ) :- |
|---|
| 914 | write( 'xml( ' ), pp_attributes( Attributes, 0 ), pp_comma, nl, |
|---|
| 915 | pp_list( Document, s(0) ), |
|---|
| 916 | write( ' ).' ), nl. |
|---|
| 917 | pp( malformed(Attributes, Document) ) :- |
|---|
| 918 | write( 'malformed( ' ), pp_attributes( Attributes, 0 ), pp_comma, nl, |
|---|
| 919 | pp_list( Document, s(0) ), |
|---|
| 920 | write( ' ).' ), nl. |
|---|
| 921 | |
|---|
| 922 | pp_indented( [], Indent ) :- |
|---|
| 923 | pp_indent( Indent), write( '[]' ). |
|---|
| 924 | pp_indented( List, Indent ) :- |
|---|
| 925 | List = [_|_], |
|---|
| 926 | pp_indent( Indent ), |
|---|
| 927 | pp_list( List, Indent ). |
|---|
| 928 | pp_indented( comment(Text), Indent ) :- |
|---|
| 929 | pp_indent( Indent ), write( 'comment(' ), pp_string(Text), write( ')' ). |
|---|
| 930 | pp_indented( namespace(URI,Prefix,Element), Indent ) :- |
|---|
| 931 | pp_indent( Indent ), |
|---|
| 932 | write( 'namespace( ' ), writeq( URI ), pp_comma_sp, |
|---|
| 933 | pp_string( Prefix ), pp_comma, nl, |
|---|
| 934 | pp_indented( Element, s(Indent) ), nl, |
|---|
| 935 | pp_indent( s(Indent) ), write( ')' ). |
|---|
| 936 | pp_indented( element(Tag,Attributes,Contents), Indent ) :- |
|---|
| 937 | pp_indent( Indent ), write( 'element( ' ), writeq( Tag ), pp_comma, nl, |
|---|
| 938 | pp_attributes( Attributes, s(Indent) ), pp_comma, nl, |
|---|
| 939 | pp_list( Contents, s(Indent) ), write( ' )' ). |
|---|
| 940 | pp_indented( instructions(Target, Processing), Indent ) :- |
|---|
| 941 | pp_indent( Indent ), write( 'instructions( ' ), writeq( Target ), pp_comma_sp, |
|---|
| 942 | pp_string(Processing), write( ')' ). |
|---|
| 943 | pp_indented( doctype(Name, DoctypeId), Indent ) :- |
|---|
| 944 | pp_indent( Indent ), write( 'doctype( ' ), writeq( Name ), pp_comma_sp, |
|---|
| 945 | pp_indented( DoctypeId, s(Indent) ), %' |
|---|
| 946 | write( ' )' ). |
|---|
| 947 | pp_indented( cdata(CData), Indent ) :- |
|---|
| 948 | pp_indent( Indent ), write( 'cdata(' ), pp_string(CData), write( ')' ). |
|---|
| 949 | pp_indented( pcdata(PCData), Indent ) :- |
|---|
| 950 | pp_indent( Indent ), write( 'pcdata(' ), pp_string(PCData), write( ')' ). |
|---|
| 951 | pp_indented( public(URN,URL), _Indent ) :- |
|---|
| 952 | write( 'public(' ), pp_string(URN), pp_comma_sp, |
|---|
| 953 | pp_string(URL), write( ')' ). |
|---|
| 954 | pp_indented( public(URN,URL,Literals), Indent ) :- |
|---|
| 955 | write( 'public(' ), pp_string(URN), pp_comma_sp, |
|---|
| 956 | pp_string(URL), pp_list( Literals, s(Indent) ), write( ')' ). |
|---|
| 957 | pp_indented( system(URL), _Indent ) :- |
|---|
| 958 | write( 'system(' ), pp_string(URL), write( ')' ). |
|---|
| 959 | pp_indented( system(URL,Literals), Indent ) :- |
|---|
| 960 | write( 'system(' ), pp_string(URL), pp_comma_sp, |
|---|
| 961 | pp_list( Literals, s(Indent) ), write( ')' ). |
|---|
| 962 | pp_indented( local, _Indent ) :- |
|---|
| 963 | write( local ). |
|---|
| 964 | pp_indented( local(Literals), Indent ) :- |
|---|
| 965 | write( 'local(' ), nl, |
|---|
| 966 | pp_list( Literals, s(Indent) ), write( ')' ). |
|---|
| 967 | pp_indented( dtd_literal(String), Indent ) :- |
|---|
| 968 | pp_indent( Indent ), write( 'dtd_literal(' ), pp_string(String), write( ')' ). |
|---|
| 969 | pp_indented( out_of_context(Tag), Indent ) :- |
|---|
| 970 | pp_indent( Indent ), write( '/* SYNTAX ERROR */ out_of_context( ' ), |
|---|
| 971 | writeq( Tag ), write( ' )' ). |
|---|
| 972 | pp_indented( unparsed(String), Indent ) :- |
|---|
| 973 | pp_indent( Indent ), write( '/* SYNTAX ERROR */ unparsed( ' ), |
|---|
| 974 | pp_string(String), write( ' )' ). |
|---|
| 975 | |
|---|
| 976 | pp_list( [], Indent ) :- |
|---|
| 977 | pp_indent( Indent ), write( [] ). |
|---|
| 978 | pp_list( [H|T], Indent ) :- |
|---|
| 979 | pp_indent( Indent ), write( '[' ), nl, |
|---|
| 980 | pp_indented( H, Indent ), |
|---|
| 981 | pp_list1( T, Indent ), |
|---|
| 982 | pp_indent( Indent ), write( ']' ). |
|---|
| 983 | |
|---|
| 984 | pp_list1( [], _Indent ) :- |
|---|
| 985 | nl. |
|---|
| 986 | pp_list1( [H|T], Indent ) :- |
|---|
| 987 | pp_comma, nl, |
|---|
| 988 | pp_indented( H, Indent ), |
|---|
| 989 | pp_list1( T, Indent ). |
|---|
| 990 | |
|---|
| 991 | pp_attributes( [], Indent ) :- |
|---|
| 992 | pp_indent( Indent ), write( [] ). |
|---|
| 993 | pp_attributes( [Attribute|Attributes], Indent ) :- |
|---|
| 994 | pp_indent( Indent ), write( '[' ), |
|---|
| 995 | pp_attributes1( Attributes, Attribute ), |
|---|
| 996 | write( ']' ). |
|---|
| 997 | |
|---|
| 998 | pp_attributes1( [], Name=Value ) :- |
|---|
| 999 | pp_name( Name ), pp_string( Value ). |
|---|
| 1000 | pp_attributes1( [H|T], Name=Value ) :- |
|---|
| 1001 | pp_name( Name ), pp_string( Value ), pp_comma_sp, |
|---|
| 1002 | pp_attributes1( T, H ). |
|---|
| 1003 | |
|---|
| 1004 | |
|---|
| 1005 | pp_name( Name ) :- |
|---|
| 1006 | ( possible_operator( Name ) -> |
|---|
| 1007 | write( '(' ), write( Name ), write( ')=' ) |
|---|
| 1008 | ; otherwise -> |
|---|
| 1009 | writeq( Name ), write( '=' ) |
|---|
| 1010 | ). |
|---|
| 1011 | |
|---|
| 1012 | possible_operator( (abolish) ). |
|---|
| 1013 | possible_operator( (attribute) ). |
|---|
| 1014 | possible_operator( (check_advice) ). |
|---|
| 1015 | possible_operator( (compile_command) ). |
|---|
| 1016 | possible_operator( (delay) ). |
|---|
| 1017 | possible_operator( (demon) ). |
|---|
| 1018 | possible_operator( (discontiguous) ). |
|---|
| 1019 | possible_operator( (div) ). |
|---|
| 1020 | possible_operator( (do) ). |
|---|
| 1021 | possible_operator( (document_export) ). |
|---|
| 1022 | possible_operator( (document_import) ). |
|---|
| 1023 | possible_operator( (dy) ). |
|---|
| 1024 | possible_operator( (dynamic) ). |
|---|
| 1025 | possible_operator( (edb) ). |
|---|
| 1026 | possible_operator( (eexport) ). |
|---|
| 1027 | possible_operator( (else) ). |
|---|
| 1028 | possible_operator( (except) ). |
|---|
| 1029 | possible_operator( (export) ). |
|---|
| 1030 | possible_operator( (foreign_pred) ). |
|---|
| 1031 | possible_operator( (from) ). |
|---|
| 1032 | possible_operator( (from_chars) ). |
|---|
| 1033 | possible_operator( (from_file) ). |
|---|
| 1034 | possible_operator( (from_stream) ). |
|---|
| 1035 | possible_operator( (global) ). |
|---|
| 1036 | possible_operator( (help) ). |
|---|
| 1037 | possible_operator( (hilog) ). |
|---|
| 1038 | possible_operator( (if) ). |
|---|
| 1039 | possible_operator( (import) ). |
|---|
| 1040 | possible_operator( (index) ). |
|---|
| 1041 | possible_operator( (initialization) ). |
|---|
| 1042 | possible_operator( (is) ). |
|---|
| 1043 | possible_operator( (listing) ). |
|---|
| 1044 | possible_operator( (local) ). |
|---|
| 1045 | possible_operator( (locked) ). |
|---|
| 1046 | possible_operator( (meta_predicate) ). |
|---|
| 1047 | possible_operator( (mod) ). |
|---|
| 1048 | possible_operator( (mode) ). |
|---|
| 1049 | possible_operator( (module_transparent) ). |
|---|
| 1050 | possible_operator( (multifile) ). |
|---|
| 1051 | possible_operator( (namic) ). |
|---|
| 1052 | possible_operator( (nocheck_advice) ). |
|---|
| 1053 | possible_operator( (nospy) ). |
|---|
| 1054 | possible_operator( (not) ). |
|---|
| 1055 | possible_operator( (of) ). |
|---|
| 1056 | possible_operator( (once) ). |
|---|
| 1057 | possible_operator( (onto_chars) ). |
|---|
| 1058 | possible_operator( (onto_file) ). |
|---|
| 1059 | possible_operator( (onto_stream) ). |
|---|
| 1060 | possible_operator( (parallel) ). |
|---|
| 1061 | possible_operator( (public) ). |
|---|
| 1062 | possible_operator( (r) ). |
|---|
| 1063 | possible_operator( (rem) ). |
|---|
| 1064 | possible_operator( (skipped) ). |
|---|
| 1065 | possible_operator( (spy) ). |
|---|
| 1066 | possible_operator( (table) ). |
|---|
| 1067 | possible_operator( (then) ). |
|---|
| 1068 | possible_operator( (thread_local) ). |
|---|
| 1069 | possible_operator( (ti) ). |
|---|
| 1070 | possible_operator( (ti_off) ). |
|---|
| 1071 | possible_operator( (traceable) ). |
|---|
| 1072 | possible_operator( (unskipped) ). |
|---|
| 1073 | possible_operator( (untraceable) ). |
|---|
| 1074 | possible_operator( (use_subsumptive_tabling) ). |
|---|
| 1075 | possible_operator( (use_variant_tabling) ). |
|---|
| 1076 | possible_operator( (volatile) ). |
|---|
| 1077 | possible_operator( (with) ). |
|---|
| 1078 | possible_operator( (with_input_from_chars) ). |
|---|
| 1079 | possible_operator( (with_output_to_chars) ). |
|---|
| 1080 | possible_operator( (xor) ). |
|---|
| 1081 | |
|---|
| 1082 | pp_indent( 0 ). |
|---|
| 1083 | pp_indent( s(N) ) :- |
|---|
| 1084 | write( ' ' ), |
|---|
| 1085 | pp_indent( N ). |
|---|
| 1086 | |
|---|
| 1087 | pp_comma :- |
|---|
| 1088 | write( ',' ). |
|---|
| 1089 | |
|---|
| 1090 | pp_comma_sp :- |
|---|
| 1091 | write( ', ' ). |
|---|
| 1092 | |
|---|
| 1093 | |
|---|
| 1094 | % Entity and Namespace map operations: these maps are usually quite small, so |
|---|
| 1095 | % a linear list lookup is okay. They could be substituted by a logarithmic |
|---|
| 1096 | % data structure - in extremis. |
|---|
| 1097 | |
|---|
| 1098 | |
|---|
| 1099 | /* empty_map( ?Map ) is true if Map is a null map. |
|---|
| 1100 | */ |
|---|
| 1101 | :- private(empty_map/1). |
|---|
| 1102 | :- mode(empty_map(?nonvar), zero_or_one). |
|---|
| 1103 | :- info(empty_map/1, [ |
|---|
| 1104 | comment is 'True if Map is a null map.', |
|---|
| 1105 | argnames is ['Map']]). |
|---|
| 1106 | |
|---|
| 1107 | empty_map( [] ). |
|---|
| 1108 | |
|---|
| 1109 | /* map_member( +Key, +Map, ?Data ) is true if Map is a ordered map structure |
|---|
| 1110 | * which records the pair Key-Data. Key must be ground. |
|---|
| 1111 | */ |
|---|
| 1112 | :- private(map_member/3). |
|---|
| 1113 | :- mode(map_member(+nonvar, +nonvar, ?nonvar), zero_or_one). |
|---|
| 1114 | :- info(map_member/3, [ |
|---|
| 1115 | comment is 'True if Map is a ordered map structure which records the pair Key-Data. Key must be ground.', |
|---|
| 1116 | argnames is ['Key', 'Map', 'Data']]). |
|---|
| 1117 | |
|---|
| 1118 | map_member( Key0, [Key1-Data1|Rest], Data0 ) :- |
|---|
| 1119 | ( Key0 == Key1 -> |
|---|
| 1120 | Data0 = Data1 |
|---|
| 1121 | ; Key0 @> Key1 -> |
|---|
| 1122 | map_member( Key0, Rest, Data0 ) |
|---|
| 1123 | ). |
|---|
| 1124 | |
|---|
| 1125 | /* map_store( +Map0, +Key, +Data, ?Map1 ) is true if Map0 is an ordered map |
|---|
| 1126 | * structure, Key must be ground, and Map1 is identical to Map0 except that |
|---|
| 1127 | * the pair Key-Data is recorded by Map1. |
|---|
| 1128 | */ |
|---|
| 1129 | :- private(map_store/4). |
|---|
| 1130 | :- mode(map_store(+nonvar, +nonvar, +nonvar, ?nonvar), zero_or_one). |
|---|
| 1131 | :- info(map_store/4, [ |
|---|
| 1132 | comment is 'True if Map0 is an ordered map structure, Key must be ground, and Map1 is identical to Map0 except that the pair Key-Data is recorded by Map1.', |
|---|
| 1133 | argnames is ['Map0', 'Key', 'Data', 'Map1']]). |
|---|
| 1134 | |
|---|
| 1135 | map_store( [], Key, Data, [Key-Data] ). |
|---|
| 1136 | map_store( [Key0-Data0|Map0], Key, Data, Map ) :- |
|---|
| 1137 | ( Key == Key0 -> |
|---|
| 1138 | Map = [Key-Data|Map0] |
|---|
| 1139 | ; Key @< Key0 -> |
|---|
| 1140 | Map = [Key-Data,Key0-Data0|Map0] |
|---|
| 1141 | ; otherwise -> % > |
|---|
| 1142 | Map = [Key0-Data0|Map1], |
|---|
| 1143 | map_store( Map0, Key, Data, Map1 ) |
|---|
| 1144 | ). |
|---|
| 1145 | |
|---|
| 1146 | /* context(?Element, ?PreserveSpace, ?CurrentNS, ?DefaultNS, ?Entities, ?Namespaces ) |
|---|
| 1147 | * is an ADT hiding the "state" arguments for XML Acquisition |
|---|
| 1148 | */ |
|---|
| 1149 | initial_context( |
|---|
| 1150 | Controls, |
|---|
| 1151 | context(void,PreserveSpace,'','',Entities,Empty, |
|---|
| 1152 | RemoveAttributePrefixes,AllowAmpersand) |
|---|
| 1153 | ) :- |
|---|
| 1154 | empty_map( Empty ), |
|---|
| 1155 | ( member( extended_characters(false), Controls ) -> |
|---|
| 1156 | Entities = Empty |
|---|
| 1157 | ; otherwise -> |
|---|
| 1158 | extended_character_entities(Entities) |
|---|
| 1159 | ), |
|---|
| 1160 | ( member( format(false), Controls ) -> |
|---|
| 1161 | PreserveSpace = true |
|---|
| 1162 | ; otherwise -> |
|---|
| 1163 | PreserveSpace = false |
|---|
| 1164 | ), |
|---|
| 1165 | ( member( remove_attribute_prefixes(true), Controls ) -> |
|---|
| 1166 | RemoveAttributePrefixes = true |
|---|
| 1167 | ; otherwise -> |
|---|
| 1168 | RemoveAttributePrefixes = false |
|---|
| 1169 | ), |
|---|
| 1170 | ( member( allow_ampersand(true), Controls ) -> |
|---|
| 1171 | AllowAmpersand = true |
|---|
| 1172 | ; otherwise -> |
|---|
| 1173 | AllowAmpersand = false |
|---|
| 1174 | |
|---|