From 295997ae77dc2c04e94f4360d304dcc05a588e9e Mon Sep 17 00:00:00 2001 From: Rintaro Ishizaki Date: Wed, 19 Mar 2025 15:29:03 -0700 Subject: [PATCH 1/3] [Parser] Add 'atContextualKeywordPrefixedSyntax' method Unified disambiguation method for contextual-keyword prefixed syntax. --- Sources/SwiftParser/Expressions.swift | 46 +--- Sources/SwiftParser/Statements.swift | 196 +++++++++++------- .../SwiftParserTest/ThenStatementTests.swift | 23 +- 3 files changed, 130 insertions(+), 135 deletions(-) diff --git a/Sources/SwiftParser/Expressions.swift b/Sources/SwiftParser/Expressions.swift index 1aad77b9558..b93dbf3d08d 100644 --- a/Sources/SwiftParser/Expressions.swift +++ b/Sources/SwiftParser/Expressions.swift @@ -381,20 +381,6 @@ extension Parser { } } - /// Whether the current token is a valid contextual exprssion modifier like - /// `copy`, `consume`. - /// - /// `copy` etc. are only contextually a keyword if they are followed by an - /// identifier or keyword on the same line. We do this to ensure that we do - /// not break any copy functions defined by users. - private mutating func atContextualExpressionModifier() -> Bool { - return self.peek( - isAt: TokenSpec(.identifier, allowAtStartOfLine: false), - TokenSpec(.dollarIdentifier, allowAtStartOfLine: false), - TokenSpec(.self, allowAtStartOfLine: false) - ) - } - /// Parse an expression sequence element. mutating func parseSequenceExpressionElement( flavor: ExprFlavor, @@ -445,27 +431,7 @@ extension Parser { ) ) case (.unsafe, let handle)?: - if self.peek().isAtStartOfLine - // Closing paired syntax - || self.peek(isAt: .rightParen, .rightSquare, .rightBrace) - // Assignment - || self.peek(isAt: .equal) - // As an argument label or in a list context. - || self.peek(isAt: .colon, .comma) - // Start of a closure in a context where it should be interpreted as - // being part of a statement. - || (flavor == .stmtCondition && self.peek(isAt: .leftBrace)) - // Avoid treating as an "unsafe" expression when there is no trivia - // following the "unsafe" and the following token could either be a - // postfix expression or a subexpression: - // - Member access vs. leading . - // - Call vs. tuple expression. - // - Subscript vs. array or dictionary expression - || (self.peek(isAt: .period, .leftParen, .leftSquare) && self.peek().leadingTriviaByteLength == 0 - && self.currentToken.trailingTriviaByteLength == 0) - // End of file - || self.peek(isAt: .endOfFile) - { + if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor, acceptClosure: true, preferPostfixExpr: false) { break EXPR_PREFIX } @@ -486,7 +452,7 @@ extension Parser { assert(self.experimentalFeatures.contains(.oldOwnershipOperatorSpellings)) fallthrough case (.borrow, let handle)?: - if !atContextualExpressionModifier() { + if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) { break EXPR_PREFIX } let borrowTok = self.eat(handle) @@ -503,7 +469,7 @@ extension Parser { ) case (.copy, let handle)?: - if !atContextualExpressionModifier() { + if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) { break EXPR_PREFIX } @@ -524,7 +490,7 @@ extension Parser { assert(self.experimentalFeatures.contains(.oldOwnershipOperatorSpellings)) fallthrough case (.consume, let handle)?: - if !atContextualExpressionModifier() { + if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) { break EXPR_PREFIX } @@ -546,7 +512,7 @@ extension Parser { return RawExprSyntax(parsePackExpansionExpr(repeatHandle: handle, flavor: flavor, pattern: pattern)) case (.each, let handle)?: - if !atContextualExpressionModifier() { + if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) { break EXPR_PREFIX } @@ -561,7 +527,7 @@ extension Parser { ) case (.any, _)?: - if !atContextualExpressionModifier() && !self.peek().isContextualPunctuator("~") { + if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) && !self.peek().isContextualPunctuator("~") { break EXPR_PREFIX } diff --git a/Sources/SwiftParser/Statements.swift b/Sources/SwiftParser/Statements.swift index 403fc19fa59..f3dd00c65ae 100644 --- a/Sources/SwiftParser/Statements.swift +++ b/Sources/SwiftParser/Statements.swift @@ -900,6 +900,119 @@ extension Parser { } } +extension TokenConsumer { + /// Disambiguate the word at the cursor looks like a keyword-prefixed syntax. + /// + /// - Parameters: + /// - exprFlavor: The expression context. When using this function for a statement, e.g. 'yield', + /// use `.basic`. + /// - acceptClosure: When the next token is '{' and it looks like a closure, use this value as the result. + /// - preferPostfixExpr: When the next token is '.', '(', or '[' and there is a space between the word, + /// use `!preferPostfixExpr` as the result. + mutating func atContextualKeywordPrefixedSyntax( + exprFlavor: Parser.ExprFlavor, + acceptClosure: Bool = false, + preferPostfixExpr: Bool = true + ) -> Bool { + let next = peek() + + // The next token must be at the same line. + if next.isAtStartOfLine { + return false + } + + switch next.rawTokenKind { + + case .identifier, .dollarIdentifier, .wildcard: + // E.g. foo + return true + + case .integerLiteral, .floatLiteral, + .stringQuote, .multilineStringQuote, .singleQuote, .rawStringPoundDelimiter, + .regexSlash, .regexPoundDelimiter: + // E.g. 1 + return true + + case .prefixAmpersand, .prefixOperator, .atSign, .backslash, .pound: + // E.g. ! + return true + + case .keyword: + switch Keyword(next.tokenText) { + case .as, .is, .in: + // E.g. is + return false + default: + // Other lexer-classified keywords are identifier-like. + // E.g. self + return true + } + + case .binaryOperator, .equal, .arrow, .infixQuestionMark: + // E.g. != + return false + case .postfixOperator, .postfixQuestionMark, .exclamationMark, .ellipsis: + // E.g. ++ + return false + case .rightBrace, .rightParen, .rightSquare: + // E.g. ] + return false + case .colon, .comma: + // E.g. , + return false + case .semicolon, .endOfFile, .poundElse, .poundElseif, .poundEndif: + return false + + case .leftAngle, .rightAngle: + // Lexer never produce these token kinds. + return false + + case .stringSegment, .regexLiteralPattern: + // Calling this function inside a string/regex literal? + return false + + case .backtick, .poundAvailable, .poundUnavailable, + .poundSourceLocation, .poundIf, .shebang, .unknown: + // These are invalid for both cases + // E.g. #available + return false + + case .period, .leftParen, .leftSquare: + // These are truly ambiguous. They can be both start of postfix expression + // suffix or start of primary expression: + // + // - Member access vs. implicit member expression + // - Call vs. tuple expression + // - Subscript vs. collection literal + // + let hasSpace = (next.leadingTriviaByteLength + currentToken.trailingTriviaByteLength) != 0 + if !hasSpace { + // No space, the word is an decl-ref expression + return false + } + return !preferPostfixExpr + + case .leftBrace: + // E.g. { ... } + // Trailing closure is also ambiguous: + // + // - Trailing closure vs. immediately-invoked closure + // + // Checking whitespace between the word cannot help this because people + // usually put a space before trailing closures. Even though that is source + // breaking, we prefer parsing it as a keyword if the syntax accepts + // immediately-invoked closure patterns. E.g. 'unsafe { ... }()' + if !acceptClosure { + return false + } + return self.withLookahead { + $0.consumeAnyToken() + return $0.atValidTrailingClosure(flavor: exprFlavor) + } + } + } +} + // MARK: Lookahead extension Parser.Lookahead { @@ -949,91 +1062,16 @@ extension Parser.Lookahead { // FIXME: 'repeat' followed by '{' could be a pack expansion // with a closure pattern. return self.peek().rawTokenKind == .leftBrace - case .yield?: - switch self.peek().rawTokenKind { - case .prefixAmpersand: - // "yield &" always denotes a yield statement. - return true - case .leftParen: - // "yield (", by contrast, must be disambiguated with additional - // context. We always consider it an apply expression of a function - // called `yield` for the purposes of the parse. - return false - case .binaryOperator: - // 'yield &= x' treats yield as an identifier. - return false - default: - // "yield" followed immediately by any other token is likely a - // yield statement of some singular expression. - return !self.peek().isAtStartOfLine - } - case .discard?: - let next = peek() - // The thing to be discarded must be on the same line as `discard`. - if next.isAtStartOfLine { - return false - } - switch next.rawTokenKind { - case .identifier, .keyword: - // Since some identifiers like "self" are classified as keywords, - // we want to recognize those too, to handle "discard self". We also - // accept any identifier since we want to emit a nice error message - // later on during type checking. - return true - default: - // any other token following "discard" means it's not the statement. - // For example, could be the function call "discard()". - return false - } - - case .then: - return atStartOfThenStatement(preferExpr: preferExpr) + case .yield?, .discard?: + return atContextualKeywordPrefixedSyntax(exprFlavor: .basic, preferPostfixExpr: true) + case .then?: + return atContextualKeywordPrefixedSyntax(exprFlavor: .basic, preferPostfixExpr: false) case nil: return false } } - /// Whether we're currently at a `then` token that should be parsed as a - /// `then` statement. - mutating func atStartOfThenStatement(preferExpr: Bool) -> Bool { - guard self.at(.keyword(.then)) else { - return false - } - - // If we prefer an expr and aren't at the start of a newline, then don't - // parse a ThenStmt. - if preferExpr && !self.atStartOfLine { - return false - } - - // If 'then' is followed by a binary or postfix operator, prefer to parse as - // an expr. - if peek(isAtAnyIn: BinaryOperatorLike.self) != nil || peek(isAtAnyIn: PostfixOperatorLike.self) != nil { - return false - } - - switch PrepareForKeywordMatch(peek()) { - case TokenSpec(.is), TokenSpec(.as): - // Treat 'is' and 'as' like the binary operator case, and parse as an - // expr. - return false - - case .leftBrace: - // This is a trailing closure. - return false - - case .leftParen, .leftSquare, .period: - // These are handled based on whether there is trivia between the 'then' - // and the token. If so, it's a 'then' statement. Otherwise it should - // be treated as an expression, e.g `then(...)`, `then[...]`, `then.foo`. - return !self.currentToken.trailingTriviaText.isEmpty || !peek().leadingTriviaText.isEmpty - default: - break - } - return true - } - /// Returns whether the parser's current position is the start of a switch case, /// given that we're in the middle of a switch already. mutating func atStartOfSwitchCase(allowRecovery: Bool = false) -> Bool { diff --git a/Tests/SwiftParserTest/ThenStatementTests.swift b/Tests/SwiftParserTest/ThenStatementTests.swift index 0fa963a18b0..4bd75a86901 100644 --- a/Tests/SwiftParserTest/ThenStatementTests.swift +++ b/Tests/SwiftParserTest/ThenStatementTests.swift @@ -297,13 +297,7 @@ final class ThenStatementTests: ParserTestCase { """ then1️⃣ """, - diagnostics: [ - DiagnosticSpec( - message: "expected expression in 'then' statement", - fixIts: ["insert expression"] - ) - ], - fixedSource: "then <#expression#>" + substructure: DeclReferenceExprSyntax(baseName: .identifier("then")) ) } @@ -312,13 +306,7 @@ final class ThenStatementTests: ParserTestCase { """ then1️⃣; """, - diagnostics: [ - DiagnosticSpec( - message: "expected expression in 'then' statement", - fixIts: ["insert expression"] - ) - ], - fixedSource: "then <#expression#>;" + substructure: DeclReferenceExprSyntax(baseName: .identifier("then")) ) } @@ -342,7 +330,7 @@ final class ThenStatementTests: ParserTestCase { then 0 """, - substructure: ThenStmtSyntax(expression: IntegerLiteralExprSyntax(0)) + substructure: DeclReferenceExprSyntax(baseName: .identifier("then")) ) } @@ -685,7 +673,10 @@ final class ThenStatementTests: ParserTestCase { then .foo """, - substructure: ThenStmtSyntax(expression: MemberAccessExprSyntax(name: .identifier("foo"))) + substructure: MemberAccessExprSyntax( + base: DeclReferenceExprSyntax(baseName: .identifier("then")), + name: .identifier("foo") + ) ) } From 5bf42176a98e834560128a2f0cff8937a652dffb Mon Sep 17 00:00:00 2001 From: Rintaro Ishizaki Date: Thu, 20 Mar 2025 04:52:32 -0700 Subject: [PATCH 2/3] [Parser] Explicit keyword allow-list for contextual keyword prefixed --- Sources/SwiftParser/Statements.swift | 17 +++++++++++------ Tests/SwiftParserTest/StatementTests.swift | 11 ++++++----- Tests/SwiftParserTest/ThenStatementTests.swift | 4 ++-- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/Sources/SwiftParser/Statements.swift b/Sources/SwiftParser/Statements.swift index f3dd00c65ae..1eda318b1da 100644 --- a/Sources/SwiftParser/Statements.swift +++ b/Sources/SwiftParser/Statements.swift @@ -938,14 +938,19 @@ extension TokenConsumer { return true case .keyword: + // Some lexer-classified keywords can start expressions. switch Keyword(next.tokenText) { - case .as, .is, .in: - // E.g. is - return false - default: - // Other lexer-classified keywords are identifier-like. - // E.g. self + case .Any, .Self, .self, .super, .`init`, .true, .false, .nil: + return true + case .repeat, .try: + return true + case .if, .switch: return true + case .do where self.experimentalFeatures.contains(.doExpressions): + return true + + default: + return false } case .binaryOperator, .equal, .arrow, .infixQuestionMark: diff --git a/Tests/SwiftParserTest/StatementTests.swift b/Tests/SwiftParserTest/StatementTests.swift index 42e12f6785c..39872342bbe 100644 --- a/Tests/SwiftParserTest/StatementTests.swift +++ b/Tests/SwiftParserTest/StatementTests.swift @@ -609,21 +609,22 @@ final class StatementTests: ParserTestCase { assertParse( """ - discard 1️⃣case + discard1️⃣ 2️⃣case """, diagnostics: [ DiagnosticSpec( locationMarker: "1️⃣", - message: "expected expression in 'discard' statement", - fixIts: ["insert expression"] + message: "consecutive statements on a line must be separated by newline or ';'", + fixIts: ["insert newline", "insert ';'"] ), DiagnosticSpec( - locationMarker: "1️⃣", + locationMarker: "2️⃣", message: "'case' can only appear inside a 'switch' statement or 'enum' declaration" ), ], fixedSource: """ - discard <#expression#>case + discard + case """ ) diff --git a/Tests/SwiftParserTest/ThenStatementTests.swift b/Tests/SwiftParserTest/ThenStatementTests.swift index 4bd75a86901..0ecf3c77f22 100644 --- a/Tests/SwiftParserTest/ThenStatementTests.swift +++ b/Tests/SwiftParserTest/ThenStatementTests.swift @@ -295,7 +295,7 @@ final class ThenStatementTests: ParserTestCase { func testThenStmt22() { assertParse( """ - then1️⃣ + then """, substructure: DeclReferenceExprSyntax(baseName: .identifier("then")) ) @@ -304,7 +304,7 @@ final class ThenStatementTests: ParserTestCase { func testThenStmt23() { assertParse( """ - then1️⃣; + then; """, substructure: DeclReferenceExprSyntax(baseName: .identifier("then")) ) From ebf79c3244119fc78115488481456ddc59550e68 Mon Sep 17 00:00:00 2001 From: Rintaro Ishizaki Date: Fri, 21 Mar 2025 09:49:55 -0700 Subject: [PATCH 3/3] [Parser] Add 'allowNextLineOperand' to atContextualKeywordPrefixedSyntax Just like 'return', 'then' statement should accept operand on the next line. --- Sources/SwiftParser/Statements.swift | 35 ++++++++++++------- .../SwiftParserTest/ThenStatementTests.swift | 7 ++-- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/Sources/SwiftParser/Statements.swift b/Sources/SwiftParser/Statements.swift index 1eda318b1da..b6834a69af5 100644 --- a/Sources/SwiftParser/Statements.swift +++ b/Sources/SwiftParser/Statements.swift @@ -909,15 +909,17 @@ extension TokenConsumer { /// - acceptClosure: When the next token is '{' and it looks like a closure, use this value as the result. /// - preferPostfixExpr: When the next token is '.', '(', or '[' and there is a space between the word, /// use `!preferPostfixExpr` as the result. + /// - allowNextLineOperand: Whether the keyword-prefixed syntax accepts the operand on the next line. mutating func atContextualKeywordPrefixedSyntax( exprFlavor: Parser.ExprFlavor, acceptClosure: Bool = false, - preferPostfixExpr: Bool = true + preferPostfixExpr: Bool = true, + allowNextLineOperand: Bool = false ) -> Bool { let next = peek() // The next token must be at the same line. - if next.isAtStartOfLine { + if next.isAtStartOfLine && !allowNextLineOperand { return false } @@ -990,12 +992,13 @@ extension TokenConsumer { // - Call vs. tuple expression // - Subscript vs. collection literal // - let hasSpace = (next.leadingTriviaByteLength + currentToken.trailingTriviaByteLength) != 0 - if !hasSpace { - // No space, the word is an decl-ref expression + if preferPostfixExpr { return false } - return !preferPostfixExpr + + // If there's no space between the tokens, consider it's an expression. + // Otherwise, it looks like a keyword followed by an expression. + return (next.leadingTriviaByteLength + currentToken.trailingTriviaByteLength) != 0 case .leftBrace: // E.g. { ... } @@ -1003,13 +1006,14 @@ extension TokenConsumer { // // - Trailing closure vs. immediately-invoked closure // - // Checking whitespace between the word cannot help this because people - // usually put a space before trailing closures. Even though that is source - // breaking, we prefer parsing it as a keyword if the syntax accepts - // immediately-invoked closure patterns. E.g. 'unsafe { ... }()' if !acceptClosure { return false } + + // Checking whitespace between the word cannot help this because people + // usually put a space before trailing closures. Even though that is source + // breaking, we prefer parsing it as a keyword if the syntax accepts + // expressions starting with a closure. E.g. 'unsafe { ... }()' return self.withLookahead { $0.consumeAnyToken() return $0.atValidTrailingClosure(flavor: exprFlavor) @@ -1068,9 +1072,16 @@ extension Parser.Lookahead { // with a closure pattern. return self.peek().rawTokenKind == .leftBrace case .yield?, .discard?: - return atContextualKeywordPrefixedSyntax(exprFlavor: .basic, preferPostfixExpr: true) + return atContextualKeywordPrefixedSyntax( + exprFlavor: .basic, + preferPostfixExpr: true + ) case .then?: - return atContextualKeywordPrefixedSyntax(exprFlavor: .basic, preferPostfixExpr: false) + return atContextualKeywordPrefixedSyntax( + exprFlavor: .basic, + preferPostfixExpr: false, + allowNextLineOperand: !preferExpr + ) case nil: return false diff --git a/Tests/SwiftParserTest/ThenStatementTests.swift b/Tests/SwiftParserTest/ThenStatementTests.swift index 0ecf3c77f22..eeb093d58cf 100644 --- a/Tests/SwiftParserTest/ThenStatementTests.swift +++ b/Tests/SwiftParserTest/ThenStatementTests.swift @@ -330,7 +330,7 @@ final class ThenStatementTests: ParserTestCase { then 0 """, - substructure: DeclReferenceExprSyntax(baseName: .identifier("then")) + substructure: ThenStmtSyntax(expression: IntegerLiteralExprSyntax(0)) ) } @@ -673,10 +673,7 @@ final class ThenStatementTests: ParserTestCase { then .foo """, - substructure: MemberAccessExprSyntax( - base: DeclReferenceExprSyntax(baseName: .identifier("then")), - name: .identifier("foo") - ) + substructure: ThenStmtSyntax(expression: MemberAccessExprSyntax(name: .identifier("foo"))) ) }