%% %% Double quoted string literals are more difficult to parse because there are certian %% escapes that should be performed. In order to mimick this, a double wuoted string is %% parsed as a list of things that can be in a doublequoted string. These things are %% the escapes like '\n' or '\01'. The variables that can be in a string enclosed in %% curly braces are also parsed seperatly. Everything that is not special but just a %% part of the string is parsed as a Literal. %% %% @author Eric Bouwers %% @author Matin Bravenboer module languages/php/common/literals/DoubleQuotedStringLiterals imports languages/php/common/literals/VariableLiterals languages/php/common/literals/NumberLiterals exports sorts DoubleQuotedPart ConstantEncapsedString DQContent DQList DoubleQuotedPartSpecial syntax %% A double quoted string is a list parts "\"" DQContent "\"" -> <ConstantEncapsedString-CF> {cons("DoubleQuoted")} <DoubleQuotedPart?-CF> DQList <DoubleQuotedPart?-CF> -> DQContent {cons("DQContent")} <DoubleQuotedPart?-CF> -> DQContent {cons("DQContent")} DQList <DoubleQuotedPart?-CF> DQList -> DQList {cons("DQContent"), left} DoubleQuotedPartSpecial -> DQList %% Parts that can be in a string <DoubleQuotedLit-LEX> -> <DoubleQuotedPart-CF> {cons("Literal")} Escape -> DoubleQuotedPartSpecial HexaCharacter -> DoubleQuotedPartSpecial OctaCharacter -> DoubleQuotedPartSpecial EscapeVariable -> DoubleQuotedPartSpecial BracedVariable -> DoubleQuotedPartSpecial %% Literals. Notice that a slash or the dollar should only be parsed seperatly if %% they actually escape something. If they do not escape anything, the characters must %% appear in the literal. sorts SlashCharLit DollarCharLit CurlyBracketLit DoubleQuotedLit lexical syntax (~[\"\\\$\{] | SlashCharLit | DollarCharLit | CurlyBracketLit)+ -> DoubleQuotedLit "\\" -> SlashCharLit "$" -> DollarCharLit "{" -> CurlyBracketLit %% Take the longest literal lexical restrictions DoubleQuotedLit -/- ~[\"\\\$\{] DoubleQuotedLit -/- [\$] . ~[a-zA-Z0-9\_\{] DoubleQuotedLit -/- [\{] . ~[\$] %% A Literal Slash may not be followed by something that it can escape. This %% makes sure that the escapes are parsed to seperate strings. Otherwise the %% escapes would just appear as part of the literal lexical restrictions SlashCharLit -/- [\\\$\"rnt\'] SlashCharLit -/- [x] . [0-9A-Fa-f] SlashCharLit -/- [x] . [0-9A-Fa-f] . [0-9A-Fa-f] SlashCharLit -/- [0-7] SlashCharLit -/- [0-7] . [0-7] SlashCharLit -/- [0-7] . [0-7] . [0-7] %% Same holds for the Dollar. The dollar represents a variable and this %% should be parsed. A dollar with characters after it is not a part of the %% literal lexical restrictions DollarCharLit -/- [a-zA-Z\_] DollarCharLit -/- [\{] %% A curly open bracket is evaluated differently if it is followed by a Dollar lexical restrictions CurlyBracketLit -/- [\$] %% Basic escapes sorts Escape syntax "\\" [\\\$\"rnt\'] -> Escape {cons("Escape")} %% Hexa escapes. A character encoded by it's hexadecimal value. sorts HexaCharacterOne HexaCharacterTwo HexaCharacter syntax "\\" "x" [0-9A-Fa-f] -> HexaCharacterOne {cons("HexaChar")} "\\" "x" [0-9A-Fa-f][0-9A-Fa-f] -> HexaCharacterTwo {cons("HexaChar")} HexaCharacterOne -> HexaCharacter HexaCharacterTwo -> HexaCharacter restrictions HexaCharacterOne -/- [0-9A-Fa-f] %% Octa escapes. sorts OctaCharacterOne OctaCharacterTwo OctaCharacterThree OctaCharacter syntax "\\" [0-7] -> OctaCharacterOne {cons("OctaChar")} "\\" [0-7][0-7] -> OctaCharacterTwo {cons("OctaChar")} "\\" [0-7][0-7][0-7] -> OctaCharacterThree {cons("OctaChar")} OctaCharacterOne -> OctaCharacter OctaCharacterTwo -> OctaCharacter OctaCharacterThree -> OctaCharacter restrictions OctaCharacterOne -/- [0-7] OctaCharacterTwo -/- [0-7] %% Variable escapes. Partly rebuild of VariableLiterals, but other cases are too general. %% Only one-dimensional array's and single object-acces are allowed in simple syntax %% This is only for simple syntax sorts EscapeVariable TVariable String CompoundVariable EscapeSimpleVariable EmbeddedString syntax EscapeSimpleVariable -> EscapeVariable <TVariable-CF> -> EscapeSimpleVariable "${" <String-LEX> "}" -> EscapeVariable {cons("VariableBraced")} <TVariable-CF> "->" <String-LEX> -> EscapeVariable {cons("ObjectAccess")} <TVariable-CF> "[" <String-LEX> "]" -> EscapeVariable {cons("ArrayAccess")} "${" <String-LEX> "[" <String-LEX> "]" "}" -> EscapeVariable {cons("BracedArrayAccess")} "'" <String-LEX> "'" -> EmbeddedString {cons("EmbeddedString")} "${" <String-LEX> -> EmbeddedArrayVariable {cons("EmbeddedArrayVariable")} %% Embedded String can only occur when the arrayAccess is braced EmbeddedArrayVariable "[" EmbeddedString "]" "}" -> EscapeVariable {cons("BracedArrayAccess")} <TVariable-CF> "[" <CompoundVariable-CF> "]" -> EscapeVariable {cons("ArrayAccess")} EmbeddedArrayVariable "[" <CompoundVariable-CF> "]" "}" -> EscapeVariable {cons("BracedArrayAccess")} <TVariable-CF> "[" <LNumber-CF> "]" -> EscapeVariable {cons("ArrayAccess")} EmbeddedArrayVariable "[" <LNumber-CF> "]" "}" -> EscapeVariable {cons("BracedArrayAccess")} restrictions EscapeSimpleVariable -/- [\[] EscapeSimpleVariable -/- [\-].[\>] %% Variable escapes in complex syntax sorts BracedVariable syntax "{" <Variable-CF> "}" -> BracedVariable {cons("Braced")}