module languages/xml/syntax/XML %% Transliteration of the XML v1.0 standard. %% Rule numbers refer to the corresponding rule in the standard. %% Known issues: %% - No support for UniCode %% - The Prolog element becomes ambiguous when the doctype declaration is missing. %% Some rules are still commented out since ambihguity issues have not yet been resolved. exports context-free start-symbols Document sorts AttDef AttType AttValue AttValueD AttValueS AttlistDecl Attribute CDSect CP Char CharData CharRef Children Choice Comment ConditionalSect Content ContentSpec Dash DefaultDecl Digit Document DocTypeDecl Element ETag ElementDecl EmptyElementTag EncName EncodingDecl EntityDecl EntityDef EntityRef EntityValue EntityValueD EntityValueS EnumeratedType Enumeration Eq %ExtParsedEnt ExtPe% ExtSubset ExtSubsetDecl ExternalId GEDecl ISO639Code IanaCode %Ignore IgnoreSect IgnoreSectContents% IncludeSect LangCode LanguageId Letter MarkUpDecl Misc Mixed NDataDecl Name NameChar Names Nmtoken Nmtokens NonEmptyElementTag NotationDecl NotationType PEDecl PEDef PEReference PI PITarget Prolog PubidChar PubidLiteral PubidLiteralD PubidLiteralS PublicID Reference S SDDecl STag Seq StringType SubCode SystemLiteral SystemLiteralD SystemLiteralS TextDecl TokenizedType UserCode VersionInfo VersionNum XMLDecl sorts %% Auxiliary sorts for the benefit of lexical equations EV-CharD EV-CharS AV-CharD AV-CharS SL-CharD SL-CharS exports context-free syntax %[ 1]% Prolog Element %Misc*% -> Document {avoid} %% Element being parsed as Document lexical syntax %[ 2]% ~[] -> Char %[ 3]% [\ \t\n\r]+ -> S {cons("xml-s")} %[ 3]% [\ \t\n\r] -> LAYOUT {cons("xml-layout")} [a-zA-Z] -> Letter [0-9] -> Digit %[ 4]% Letter | Digit | [\.\-\_\:] -> NameChar %[ 5]% (Letter | "_" | ":") NameChar* -> Name {category("MetaKeyword")} context-free syntax %[ 6]% Name* -> Names lexical syntax %[ 7]% NameChar+ -> Nmtoken context-free syntax %[ 8]% Nmtoken* -> Nmtokens lexical syntax ~[\%\&\"] -> EV-CharD ~[\%\&\'] -> EV-CharS %[ 9]% [\"] ( EV-CharD | PEReference | Reference)* [\"] -> EntityValueD %[ 9]% [\'] ( EV-CharS | PEReference | Reference)* [\'] -> EntityValueS EntityValueS | EntityValueD -> EntityValue ~[\<\&\"] -> AV-CharD ~[\<\&\'] -> AV-CharS %[10]% [\"] (AV-CharD | Reference)* [\"] -> AttValueD %[10]% [\'] (AV-CharS | Reference)* [\'] -> AttValueS AttValueS | AttValueD -> AttValue ~[\"] -> SL-CharD ~[\'] -> SL-CharS %[11]% [\"] SL-CharD* [\"] -> SystemLiteralD [\'] SL-CharS* [\'] -> SystemLiteralS SystemLiteralS | SystemLiteralD -> SystemLiteral %[12]% [\"] PubidChar* [\"] -> PubidLiteralD [\'] PubidChar* [\'] -> PubidLiteralS PubidLiteralS | PubidLiteralD -> PubidLiteral %[13]% [\ \n\t] | [ a-zA-Z0-9] | [\-\'\(\)\+\,\.\/\:\=\?\;\!\*\#\@\$\_\%] -> PubidChar %[14]% ~[\<\&]+ -> CharData {avoid} ~[\<\&]* "]]>" ~[\<\&]* -> CharData {reject} %[15]% "" -> Comment {category("Comment")} "-" -> Dash %[16]% "" PITarget (S Char*)? "?>" -> PI %[17]% Name -> PITarget [xX][mM][lL] -> PITarget {reject} %[18-21]% " (~[\]] | "]" | "]]")* "]]>" -> CDSect context-free syntax %[22]% XMLDecl? Misc* (DocTypeDecl Misc*)? -> Prolog %[23]% " VersionInfo EncodingDecl? SDDecl? "?>" -> XMLDecl %[24]% "version" Eq (("\"" VersionNum "\"") | ("'" VersionNum "'") ) -> VersionInfo {prefer} %% over general Attribute lexical syntax %[25]% S? "=" S? -> Eq %[26]% [a-zA-Z0-9\_\.\:\-]+ -> VersionNum context-free syntax %[27]% Comment | PI -> Misc %[28]% " Name ExternalId? ("[" (MarkUpDecl | PEReference)* "]" )? ">" -> DocTypeDecl %[29]% ElementDecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment -> MarkUpDecl %[30]% TextDecl? ExtSubsetDecl -> ExtSubset %[31]% (MarkUpDecl | ConditionalSect | PEReference)* -> ExtSubsetDecl %[32]% "standalone" Eq (("'" ("yes" | "no") "'") | ("\"" ("yes" | "no") "\"")) -> SDDecl lexical syntax %[33]% LangCode ("-" SubCode)* -> LanguageId %[34]% ISO639Code | IanaCode | UserCode -> LangCode %[35]% [a-zA-Z] [a-zA-Z] -> ISO639Code %[36]% [iI] "-" [a-zA-Z]+ -> IanaCode %[37]% [xX] "-" [a-zA-Z]+ -> UserCode %[38]% [a-zA-Z]+ -> SubCode context-free syntax %[39]% EmptyElementTag | NonEmptyElementTag -> Element {prefer} STag Content* ETag -> NonEmptyElementTag %[40]% "<" Name Attribute* ">" -> STag %[41]% Name Eq AttValue -> Attribute %[42]% "" Name ">" -> ETag %[43]% (Element | CharData | Reference | CDSect | PI | Comment) -> Content %[44]% "<" Name Attribute* "/>" -> EmptyElementTag context-free syntax %[45]% " Name ContentSpec ">" -> ElementDecl %[46]% "EMPTY" | "ANY" | Mixed | Children -> ContentSpec %[47]% (Choice | Seq) ("?" | "*" | "+")? -> Children %[48]% (Name | Choice | Seq) ("?" | "*" | "+")? -> CP %[49]% "(" {CP "|"}+ ")" -> Choice %[50]% "(" {CP ","}+ ")" -> Seq %[51]% ( "(" "#PCDATA" ("|" Name)* ")*" ) | ( "(" "#PCDATA" ")" ) -> Mixed %[52]% " S Name AttDef* ">" -> AttlistDecl %[53]% Name AttType DefaultDecl -> AttDef %[54]% StringType | TokenizedType | EnumeratedType -> AttType %[55]% "CDATA" -> StringType %[56]% "ID" | "IDREF" | "IDREFS" | "ENTITY" | "ENTITIES" | "NMTOKEN" | "NMTOKENS" -> TokenizedType %[57]% NotationType | Enumeration -> EnumeratedType %[58]% "NOTATION" "(" {Name "|"}+ ")" -> NotationType %[59]% "(" {Nmtoken "|"}+ ")" -> Enumeration %[60]% "#REQUIRED" | "#IMPLIED" | ("#FIXED"? AttValue) -> DefaultDecl %[61]% IncludeSect %| IgnoreSect% -> ConditionalSect %[62]% " S? "INCLUDE" S? "[" ExtSubsetDecl "]]>" -> IncludeSect %% %[63]% "<![" S? "IGNORE" S? "[" IgnoreSectContents* "]]>" -> IgnoreSect %% %[64]% Ignore ("<![" IgnoreSectContents "]]>" Ignore)* -> IgnoreSectContents %% %[65]% Char+ -> Ignore {avoid} lexical syntax %[66]% ("" [0-9]+ ";") | ("" [0-9a-fA-F]+ ";") -> CharRef %[67]% EntityRef | CharRef -> Reference %[68]% "&" Name ";" -> EntityRef %[69]% "%" Name ";" -> PEReference context-free syntax %[70]% GEDecl | PEDecl -> EntityDecl %[71]% " Name EntityDef ">" -> GEDecl %[72]% " "%" Name PEDef ">" -> PEDecl %[73]% EntityValue | (ExternalId NDataDecl?) -> EntityDef %[74]% EntityValue | ExternalId -> PEDef %[75]% "SYSTEM" SystemLiteral -> ExternalId "PUBLIC" PubidLiteral SystemLiteral -> ExternalId %[76]% "NDATA" Name -> NDataDecl %[77]% " VersionInfo? EncodingDecl "?>" -> TextDecl %% %[78]% TextDecl? Content* -> ExtParsedEnt %% %[79]% TextDecl? ExtSubsetDecl -> ExtPe %[80]% "encoding" Eq (("\"" EncName "\"") | ("'" EncName "'")) -> EncodingDecl lexical syntax %[81]% [A-Za-z] ([A-Za-z0-9\.\_] | "-")* -> EncName context-free syntax %[82]% " Name (ExternalId | PublicID) ">" -> NotationDecl %[83]% "PUBLIC" PubidLiteral -> PublicID lexical restrictions %% Enforce longest match S -/- [\ \t\n\r] S? -/- [\ \t\n\r] Name -/- [a-zA-Z0-9\.\-\_\:] CharData -/- ~[\<\&] Dash -/- [\-] context-free restrictions %% Enforce longest match LAYOUT? -/- [\ \t\n\r]