From bfac69297fb425d0739bea10d3096933f63b55bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sat, 8 Nov 2025 22:24:29 +0200 Subject: [PATCH] [llvm-rc] Don't interpret integer literals as octal numbers in rc.exe mode (#166915) It turns out that rc.exe doesn't interpret integer literals as octal numbers - but GNU windres does. Previously, llvm-rc did interpret them as octal. Fix the issue by stripping away the leading zeros during tokenization. The alternative (which would be somewhat cleaner, as visible in tokenizer.test) would be to retain them in the RCToken object, but strip them out before calling StringRef::getAsInteger. Alternatively to handle the radix detection locally in llvm-rc code and not rely on getAsInteger to autodetect it. Both of those solutions require propagating the IsWindres flag so that it is available within RCToken, or at least when calling RCToken::intValue(). Fixes: https://github.com/llvm/llvm-project/issues/144723 (cherry picked from commit 3673cc7a4222c6b60d8bb287ca048efa37f61e3b) --- .../tools/llvm-rc/Inputs/octal-in-range.rc | 4 ++ .../llvm-rc/Inputs/octal-out-of-range.rc | 4 ++ llvm/test/tools/llvm-rc/Inputs/tokens.rc | 2 +- llvm/test/tools/llvm-rc/octal.test | 38 ++++++++++++++++ llvm/test/tools/llvm-rc/tokenizer.test | 5 ++- llvm/tools/llvm-rc/ResourceScriptToken.cpp | 44 +++++++++++++++---- llvm/tools/llvm-rc/ResourceScriptToken.h | 2 +- .../tools/llvm-rc/ResourceScriptTokenList.def | 2 +- llvm/tools/llvm-rc/llvm-rc.cpp | 3 +- 9 files changed, 90 insertions(+), 14 deletions(-) create mode 100644 llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc create mode 100644 llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc create mode 100644 llvm/test/tools/llvm-rc/octal.test diff --git a/llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc b/llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc new file mode 100644 index 0000000000000..8327ef9be9f5c --- /dev/null +++ b/llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc @@ -0,0 +1,4 @@ +1 VERSIONINFO +FILEVERSION 0010,0010,0010,0010 +BEGIN +END diff --git a/llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc b/llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc new file mode 100644 index 0000000000000..ce520f245a48d --- /dev/null +++ b/llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc @@ -0,0 +1,4 @@ +1 VERSIONINFO +FILEVERSION 9,08,09,1 +BEGIN +END diff --git a/llvm/test/tools/llvm-rc/Inputs/tokens.rc b/llvm/test/tools/llvm-rc/Inputs/tokens.rc index 20f77912477d9..caf01aeff45fe 100644 --- a/llvm/test/tools/llvm-rc/Inputs/tokens.rc +++ b/llvm/test/tools/llvm-rc/Inputs/tokens.rc @@ -1,4 +1,4 @@ -1 + 2 - 3214L & 0x120894 032173 2|&~+(-7){0xabcdef 0xABCDEFl} Begin End +1 + 2 - 3214L & 0x120894 032173 -0042 009 2|&~+(-7){0xabcdef 0xABCDEFl} Begin End 1*3/4 He11o LLVM identifier-with-dashes diff --git a/llvm/test/tools/llvm-rc/octal.test b/llvm/test/tools/llvm-rc/octal.test new file mode 100644 index 0000000000000..686c1fcf1608e --- /dev/null +++ b/llvm/test/tools/llvm-rc/octal.test @@ -0,0 +1,38 @@ +; RUN: llvm-rc -no-preprocess /FO %t.in-range-rc.res -- %p/Inputs/octal-in-range.rc +; RUN: llvm-readobj %t.in-range-rc.res | FileCheck %s --check-prefix=IN-RANGE-RC +; RUN: llvm-windres --no-preprocess %p/Inputs/octal-in-range.rc %t.in-range-windres.res +; RUN: llvm-readobj %t.in-range-windres.res | FileCheck %s --check-prefix=IN-RANGE-WINDRES + +; IN-RANGE-RC: Data: ( +; IN-RANGE-RC-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.| +; IN-RANGE-RC-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.| +; IN-RANGE-RC-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............| +; IN-RANGE-RC-NEXT: 0030: 0A000A00 0A000A00 00000000 00000000 |................| +; IN-RANGE-RC-NEXT: 0040: 00000000 00000000 00000000 00000000 |................| +; IN-RANGE-RC-NEXT: 0050: 00000000 00000000 00000000 |............| +; IN-RANGE-RC-NEXT: ) + +; IN-RANGE-WINDRES: Data: ( +; IN-RANGE-WINDRES-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.| +; IN-RANGE-WINDRES-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.| +; IN-RANGE-WINDRES-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............| +; IN-RANGE-WINDRES-NEXT: 0030: 08000800 08000800 00000000 00000000 |................| +; IN-RANGE-WINDRES-NEXT: 0040: 00000000 00000000 00000000 00000000 |................| +; IN-RANGE-WINDRES-NEXT: 0050: 00000000 00000000 00000000 |............| +; IN-RANGE-WINDRES-NEXT: ) + +; RUN: llvm-rc -no-preprocess /FO %t.out-of-range-rc.res -- %p/Inputs/octal-out-of-range.rc +; RUN: llvm-readobj %t.out-of-range-rc.res | FileCheck %s --check-prefix=OUT-OF-RANGE-RC +; RUN: not llvm-windres --no-preprocess %p/Inputs/octal-out-of-range.rc %t.out-of-range-windres.res 2>&1 | FileCheck %s --check-prefix OUT-OF-RANGE-WINDRES + +; OUT-OF-RANGE-RC: Data: ( +; OUT-OF-RANGE-RC-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.| +; OUT-OF-RANGE-RC-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.| +; OUT-OF-RANGE-RC-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............| +; OUT-OF-RANGE-RC-NEXT: 0030: 08000900 01000900 00000000 00000000 |................| +; OUT-OF-RANGE-RC-NEXT: 0040: 00000000 00000000 00000000 00000000 |................| +; OUT-OF-RANGE-RC-NEXT: 0050: 00000000 00000000 00000000 |............| +; OUT-OF-RANGE-RC-NEXT: ) + + +; OUT-OF-RANGE-WINDRES: llvm-rc: Error parsing file: Integer invalid or too large: 08 diff --git a/llvm/test/tools/llvm-rc/tokenizer.test b/llvm/test/tools/llvm-rc/tokenizer.test index 3062e2bf64629..953b0ca8c1b57 100644 --- a/llvm/test/tools/llvm-rc/tokenizer.test +++ b/llvm/test/tools/llvm-rc/tokenizer.test @@ -9,7 +9,10 @@ ; CHECK-NEXT: Int: 3214L; int value = 3214 ; CHECK-NEXT: Amp: & ; CHECK-NEXT: Int: 0x120894; int value = 1181844 -; CHECK-NEXT: Int: 032173; int value = 13435 +; CHECK-NEXT: Int: 32173; int value = 32173 +; CHECK-NEXT: Minus: - +; CHECK-NEXT: Int: 42; int value = 42 +; CHECK-NEXT: Int: 9; int value = 9 ; CHECK-NEXT: Int: 2; int value = 2 ; CHECK-NEXT: Pipe: | ; CHECK-NEXT: Amp: & diff --git a/llvm/tools/llvm-rc/ResourceScriptToken.cpp b/llvm/tools/llvm-rc/ResourceScriptToken.cpp index 0070037e63e6a..046a1bf78daef 100644 --- a/llvm/tools/llvm-rc/ResourceScriptToken.cpp +++ b/llvm/tools/llvm-rc/ResourceScriptToken.cpp @@ -26,11 +26,11 @@ using namespace llvm; using Kind = RCToken::Kind; // Checks if Representation is a correct description of an RC integer. -// It should be a 32-bit unsigned integer, either decimal, octal (0[0-7]+), -// or hexadecimal (0x[0-9a-f]+). It might be followed by a single 'L' -// character (that is the difference between our representation and -// StringRef's one). If Representation is correct, 'true' is returned and -// the return value is put back in Num. +// It should be a 32-bit unsigned integer, either decimal or hexadecimal +// (0x[0-9a-f]+). For Windres mode, it can also be octal (0[0-7]+). +// It might be followed by a single 'L' character (that is the difference +// between our representation and StringRef's one). If Representation is +// correct, 'true' is returned and the return value is put back in Num. static bool rcGetAsInteger(StringRef Representation, uint32_t &Num) { size_t Length = Representation.size(); if (Length == 0) @@ -95,7 +95,8 @@ namespace { class Tokenizer { public: - Tokenizer(StringRef Input) : Data(Input), DataLength(Input.size()), Pos(0) {} + Tokenizer(StringRef Input, bool IsWindres) + : Data(Input), DataLength(Input.size()), Pos(0), IsWindres(IsWindres) {} Expected> run(); @@ -128,6 +129,7 @@ class Tokenizer { // character. bool canStartInt() const; bool canContinueInt() const; + void trimIntString(StringRef &Str) const; bool canStartString() const; @@ -153,6 +155,7 @@ class Tokenizer { StringRef Data; size_t DataLength, Pos; + bool IsWindres; }; void Tokenizer::skipCurrentLine() { @@ -187,7 +190,12 @@ Expected> Tokenizer::run() { if (TokenKind == Kind::LineComment || TokenKind == Kind::StartComment) continue; - RCToken Token(TokenKind, Data.take_front(Pos).drop_front(TokenStart)); + StringRef Contents = Data.take_front(Pos).drop_front(TokenStart); + + if (TokenKind == Kind::Int) + trimIntString(Contents); + + RCToken Token(TokenKind, Contents); if (TokenKind == Kind::Identifier) { processIdentifier(Token); } else if (TokenKind == Kind::Int) { @@ -366,12 +374,30 @@ void Tokenizer::processIdentifier(RCToken &Token) const { Token = RCToken(Kind::BlockEnd, Name); } +void Tokenizer::trimIntString(StringRef &Str) const { + if (!IsWindres) { + // For compatibility with rc.exe, strip leading zeros that make the + // integer literal interpreted as octal. + // + // We do rely on Stringref::getAsInteger for autodetecting between + // decimal and hexadecimal literals, but we want to avoid interpreting + // literals as octal. + // + // This omits the leading zeros from the RCToken's value string entirely, + // which also has a visible effect when dumping the tokenizer output. + // Alternatively, we could store the IsWindres flag in RCToken and defer + // the trimming to RCToken::intValue. + while (Str.size() >= 2 && Str[0] == '0' && std::isdigit(Str[1])) + Str = Str.drop_front(1); + } +} + } // anonymous namespace namespace llvm { -Expected> tokenizeRC(StringRef Input) { - return Tokenizer(Input).run(); +Expected> tokenizeRC(StringRef Input, bool IsWindres) { + return Tokenizer(Input, IsWindres).run(); } } // namespace llvm diff --git a/llvm/tools/llvm-rc/ResourceScriptToken.h b/llvm/tools/llvm-rc/ResourceScriptToken.h index 3dcdfafd2d576..50ef8e4b00f53 100644 --- a/llvm/tools/llvm-rc/ResourceScriptToken.h +++ b/llvm/tools/llvm-rc/ResourceScriptToken.h @@ -76,7 +76,7 @@ class RCToken { // Tokens returned by this function hold only references to the parts // of the Input. Memory buffer containing Input cannot be freed, // modified or reallocated. -Expected> tokenizeRC(StringRef Input); +Expected> tokenizeRC(StringRef Input, bool IsWindres); } // namespace llvm diff --git a/llvm/tools/llvm-rc/ResourceScriptTokenList.def b/llvm/tools/llvm-rc/ResourceScriptTokenList.def index 6ee13b2815d35..98af23c649577 100644 --- a/llvm/tools/llvm-rc/ResourceScriptTokenList.def +++ b/llvm/tools/llvm-rc/ResourceScriptTokenList.def @@ -14,7 +14,7 @@ // Long tokens. They might consist of more than one character. TOKEN(Invalid) // Invalid token. Should not occur in a valid script. -TOKEN(Int) // Integer (decimal, octal or hexadecimal). +TOKEN(Int) // Integer (decimal or hexadecimal, and possibly octal for windres). TOKEN(String) // String value. TOKEN(Identifier) // Script identifier (resource name or type). TOKEN(LineComment) // Beginning of single-line comment. diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index 73621543848a1..530c6b59d9460 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -619,7 +619,8 @@ void doRc(std::string Src, std::string Dest, RcOptions &Opts, StringRef Contents = FileContents->getBuffer(); std::string FilteredContents = filterCppOutput(Contents); - std::vector Tokens = ExitOnErr(tokenizeRC(FilteredContents)); + std::vector Tokens = + ExitOnErr(tokenizeRC(FilteredContents, Opts.IsWindres)); if (Opts.BeVerbose) { const Twine TokenNames[] = {