module languages/xml/syntax/XML-STRICT %% Transliteration of the XML v1.0 standard. %% Rule numbers refer to the corresponding rule in the standard. %% Known issues: %% - No support for UniCode %% - The Prolog element becomes ambiguous when the doctype declaration is missing. exports context-free start-symbols Document sorts Document Element Content AttValue Prolog Attribute hiddens sorts AttDef AttType %AttValue% AttlistDecl %Attribute% CDSect CP Char CharData CharRef Children Choice Comment ConditionalSect ContentSpec Dash DefaultDecl Digit DocTypeDecl ETag ElementDecl EmptyElementTag EncName EncodingDecl EntityDecl EntityDef EntityRef EntityValue EnumeratedType Enumeration Eq ExtParsedEnt ExtPe ExtSubset ExtSubsetDecl ExternalId GEDecl ISO639Code IanaCode %Ignore IgnoreSect IgnoreSectContents% IncludeSect LangCode LanguageId Letter MarkUpDecl Misc Mixed NDataDecl Name NameChar Names Nmtoken Nmtokens NotationDecl NotationType PEDecl PEDef PEReference PI PITarget %Prolog% PubidChar PubidLiteral PublicID Reference S SDDecl STag Seq StringType SubCode SystemLiteral TextDecl TokenizedType UserCode VersionInfo VersionNum XMLDecl exports lexical syntax %[ 1]% Prolog Element %Misc*% -> Document %[ 2]% ~[] -> Char %[ 3]% [\ \n\r\t]+ -> S [a-zA-Z] -> Letter [0-9] -> Digit %[ 4]% Letter | Digit | [\.\-\_\:] -> NameChar %[ 5]% (Letter | "_" | ":") NameChar* -> Name %[ 6]% Name (S Name)* -> Names %[ 7]% NameChar+ -> Nmtoken %[ 8]% Nmtoken (S Nmtoken)* -> Nmtokens %[ 9]% "\"" (~[\%\&\"] | PEReference | Reference)* "\"" -> EntityValue %[ 9]% "'" (~[\%\&\'] | PEReference | Reference)* "'" -> EntityValue %[10]% "\"" (~[\<\&\"] | Reference)* "\"" -> AttValue %[10]% "'" (~[\<\&\'] | Reference)* "'" -> AttValue %[11]% ("\"" ~[\"]* "\"") | ("'" ~[\']* "'") -> SystemLiteral %[12]% ("\"" PubidChar* "\"") | ("'" PubidChar* "'") -> PubidLiteral %[13]% [\ \n\t] | [ a-zA-Z0-9] | [\-\'\(\)\+\,\.\/\:\=\?\;\!\*\#\@\$\_\%] -> PubidChar %[14]% ~[\<\&]+ -> CharData {avoid} ~[\<\&]+ "]]>" ~[\<\&]* -> CharData {reject} %[15]% "" -> Comment "-" -> Dash %[16]% "" PITarget (S Char*)? "?>" -> PI %[17]% Name -> PITarget [xX][mM][lL] -> PITarget {reject} %[18-21]% " (~[\]] | "]" | "]]")* "]]>" -> CDSect %[22]% XMLDecl? Misc* (DocTypeDecl Misc*)? -> Prolog %[23]% " VersionInfo EncodingDecl? SDDecl? S? "?>" -> XMLDecl %[24]% S "version" Eq (("\"" VersionNum "\"") | ("'" VersionNum "'") ) -> VersionInfo {prefer} %% over general Attribute %[25]% S? "=" S? -> Eq %[26]% ([a-zA-Z0-9\_\.\:] | "-")+ -> VersionNum %[27]% Comment | PI | S -> Misc %[28]% " S Name (S ExternalId)? S? ("[" (MarkUpDecl | PEReference | S)* "]" S? )? ">" -> DocTypeDecl %[29]% ElementDecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment -> MarkUpDecl %[30]% TextDecl? ExtSubsetDecl -> ExtSubset %[31]% (MarkUpDecl | ConditionalSect | PEReference | S)* -> ExtSubsetDecl %[32]% S "standalone" Eq (("'" ("yes" | "no") "'") | ("\"" ("yes" | "no") "\"")) -> SDDecl %[33]% LangCode ("-" SubCode)* -> LanguageId %[34]% ISO639Code | IanaCode | UserCode -> LangCode %[35]% [a-zA-Z] [a-zA-Z] -> ISO639Code %[36]% [iI] "-" [a-zA-Z]+ -> IanaCode %[37]% [xX] "-" [a-zA-Z]+ -> UserCode %[38]% [a-zA-Z]+ -> SubCode %[39]% EmptyElementTag | (STag Content* ETag) -> Element %[40]% "<" Name (S Attribute)* S? ">" -> STag %[41]% Name Eq AttValue -> Attribute %%{avoid} %% clash with ASF equation %[42]% "" Name S? ">" -> ETag %[43]% (Element | CharData | Reference | CDSect | PI | Comment) -> Content {prefer} %% and avoid amb with rule [39] %[44]% "<" Name (S Attribute)* S? "/>" -> EmptyElementTag %[45]% " S Name S ContentSpec S?">" -> ElementDecl %[46]% "EMPTY" | "ANY" | Mixed | Children -> ContentSpec %[47]% (Choice | Seq) ("?" | "*" | "+")? -> Children %[48]% (Name | Choice | Seq) ("?" | "*" | "+")? -> CP %[49]% "(" S? CP (S? "|" S? CP)* S? ")" -> Choice %[50]% "(" S? CP (S? "," S? CP)* S? ")" -> Seq %[51]% ( "(" S? "#PCDATA" (S? "|" S? Name)* S? ")*" ) | ( "(" S? "#PCDATA" S? ")" ) -> Mixed %[52]% " S Name AttDef* S? ">" -> AttlistDecl %[53]% S Name S AttType S DefaultDecl -> AttDef %[54]% StringType | TokenizedType | EnumeratedType -> AttType %[55]% "CDATA" -> StringType %[56]% "ID" | "IDREF" | "IDREFS" | "ENTITY" | "ENTITIES" | "NMTOKEN" | "NMTOKENS" -> TokenizedType %[57]% NotationType | Enumeration -> EnumeratedType %[58]% "NOTATION" S "(" S? Name (S? "|" S? Name)* S? ")" -> NotationType %[59]% "(" S? Nmtoken (S? "|" S? Nmtoken)* S? ")" -> Enumeration %[60]% "#REQUIRED" | "#IMPLIED" | (("#FIXED" S)? AttValue) -> DefaultDecl %[61]% IncludeSect %| IgnoreSect % -> ConditionalSect %[62]% " S? "INCLUDE" S? "[" ExtSubsetDecl "]]>" -> IncludeSect %% %[63]% "<![" S? "IGNORE" S? "[" IgnoreSectContents* "]]>" -> IgnoreSect %% %[64]% Ignore ("<![" IgnoreSectContents "]]>" Ignore)* -> IgnoreSectContents %% %[65]% Char* -> Ignore {avoid} %[66]% ("" [0-9]+ ";") | ("" [0-9a-fA-F]+ ";") -> CharRef %[67]% EntityRef | CharRef -> Reference %[68]% "&" Name ";" -> EntityRef %[69]% "%" Name ";" -> PEReference %[70]% GEDecl | PEDecl -> EntityDecl %[71]% " S Name S EntityDef S? ">" -> GEDecl %[72]% " S "%" S Name S PEDef S? ">" -> PEDecl %[73]% EntityValue | (ExternalId NDataDecl?) -> EntityDef %[74]% EntityValue | ExternalId -> PEDef %[75]% "SYSTEM" S SystemLiteral -> ExternalId "PUBLIC" S PubidLiteral S SystemLiteral -> ExternalId %[76]% S "NDATA" S Name -> NDataDecl %[77]% " VersionInfo? EncodingDecl S? "?>" -> TextDecl %[78]% TextDecl? Content* -> ExtParsedEnt %[79]% TextDecl? ExtSubsetDecl -> ExtPe %[80]% S "encoding" Eq (("\"" EncName "\"") | ("'" EncName "'")) -> EncodingDecl %[81]% [A-Za-z] ([A-Za-z0-9\.\_] | "-")* -> EncName %[82]% " S Name S (ExternalId | PublicID) S? ">" -> NotationDecl %[83]% "PUBLIC" S PubidLiteral -> PublicID lexical restrictions %% Enforce a longest match for S, Name and CharData S -/- [\ \n\r\t] S? -/- [\ \n\r\t] Name -/- [a-zA-Z0-9\.\-\_\:] CharData -/- ~[\<\&] Dash -/- [\-]