diff --git a/llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc b/llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc new file mode 100644 index 0000000000000..8327ef9be9f5c --- /dev/null +++ b/llvm/test/tools/llvm-rc/Inputs/octal-in-range.rc @@ -0,0 +1,4 @@ +1 VERSIONINFO +FILEVERSION 0010,0010,0010,0010 +BEGIN +END diff --git a/llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc b/llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc new file mode 100644 index 0000000000000..ce520f245a48d --- /dev/null +++ b/llvm/test/tools/llvm-rc/Inputs/octal-out-of-range.rc @@ -0,0 +1,4 @@ +1 VERSIONINFO +FILEVERSION 9,08,09,1 +BEGIN +END diff --git a/llvm/test/tools/llvm-rc/Inputs/tokens.rc b/llvm/test/tools/llvm-rc/Inputs/tokens.rc index 20f77912477d9..caf01aeff45fe 100644 --- a/llvm/test/tools/llvm-rc/Inputs/tokens.rc +++ b/llvm/test/tools/llvm-rc/Inputs/tokens.rc @@ -1,4 +1,4 @@ -1 + 2 - 3214L & 0x120894 032173 2|&~+(-7){0xabcdef 0xABCDEFl} Begin End +1 + 2 - 3214L & 0x120894 032173 -0042 009 2|&~+(-7){0xabcdef 0xABCDEFl} Begin End 1*3/4 He11o LLVM identifier-with-dashes diff --git a/llvm/test/tools/llvm-rc/octal.test b/llvm/test/tools/llvm-rc/octal.test new file mode 100644 index 0000000000000..686c1fcf1608e --- /dev/null +++ b/llvm/test/tools/llvm-rc/octal.test @@ -0,0 +1,38 @@ +; RUN: llvm-rc -no-preprocess /FO %t.in-range-rc.res -- %p/Inputs/octal-in-range.rc +; RUN: llvm-readobj %t.in-range-rc.res | FileCheck %s --check-prefix=IN-RANGE-RC +; RUN: llvm-windres --no-preprocess %p/Inputs/octal-in-range.rc %t.in-range-windres.res +; RUN: llvm-readobj %t.in-range-windres.res | FileCheck %s --check-prefix=IN-RANGE-WINDRES + +; IN-RANGE-RC: Data: ( +; IN-RANGE-RC-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.| +; IN-RANGE-RC-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.| +; IN-RANGE-RC-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............| +; IN-RANGE-RC-NEXT: 0030: 0A000A00 0A000A00 00000000 00000000 |................| +; IN-RANGE-RC-NEXT: 0040: 00000000 00000000 00000000 00000000 |................| +; IN-RANGE-RC-NEXT: 0050: 00000000 00000000 00000000 |............| +; IN-RANGE-RC-NEXT: ) + +; IN-RANGE-WINDRES: Data: ( +; IN-RANGE-WINDRES-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.| +; IN-RANGE-WINDRES-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.| +; IN-RANGE-WINDRES-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............| +; IN-RANGE-WINDRES-NEXT: 0030: 08000800 08000800 00000000 00000000 |................| +; IN-RANGE-WINDRES-NEXT: 0040: 00000000 00000000 00000000 00000000 |................| +; IN-RANGE-WINDRES-NEXT: 0050: 00000000 00000000 00000000 |............| +; IN-RANGE-WINDRES-NEXT: ) + +; RUN: llvm-rc -no-preprocess /FO %t.out-of-range-rc.res -- %p/Inputs/octal-out-of-range.rc +; RUN: llvm-readobj %t.out-of-range-rc.res | FileCheck %s --check-prefix=OUT-OF-RANGE-RC +; RUN: not llvm-windres --no-preprocess %p/Inputs/octal-out-of-range.rc %t.out-of-range-windres.res 2>&1 | FileCheck %s --check-prefix OUT-OF-RANGE-WINDRES + +; OUT-OF-RANGE-RC: Data: ( +; OUT-OF-RANGE-RC-NEXT: 0000: 5C003400 00005600 53005F00 56004500 |\.4...V.S._.V.E.| +; OUT-OF-RANGE-RC-NEXT: 0010: 52005300 49004F00 4E005F00 49004E00 |R.S.I.O.N._.I.N.| +; OUT-OF-RANGE-RC-NEXT: 0020: 46004F00 00000000 BD04EFFE 00000100 |F.O.............| +; OUT-OF-RANGE-RC-NEXT: 0030: 08000900 01000900 00000000 00000000 |................| +; OUT-OF-RANGE-RC-NEXT: 0040: 00000000 00000000 00000000 00000000 |................| +; OUT-OF-RANGE-RC-NEXT: 0050: 00000000 00000000 00000000 |............| +; OUT-OF-RANGE-RC-NEXT: ) + + +; OUT-OF-RANGE-WINDRES: llvm-rc: Error parsing file: Integer invalid or too large: 08 diff --git a/llvm/test/tools/llvm-rc/tokenizer.test b/llvm/test/tools/llvm-rc/tokenizer.test index 3062e2bf64629..953b0ca8c1b57 100644 --- a/llvm/test/tools/llvm-rc/tokenizer.test +++ b/llvm/test/tools/llvm-rc/tokenizer.test @@ -9,7 +9,10 @@ ; CHECK-NEXT: Int: 3214L; int value = 3214 ; CHECK-NEXT: Amp: & ; CHECK-NEXT: Int: 0x120894; int value = 1181844 -; CHECK-NEXT: Int: 032173; int value = 13435 +; CHECK-NEXT: Int: 32173; int value = 32173 +; CHECK-NEXT: Minus: - +; CHECK-NEXT: Int: 42; int value = 42 +; CHECK-NEXT: Int: 9; int value = 9 ; CHECK-NEXT: Int: 2; int value = 2 ; CHECK-NEXT: Pipe: | ; CHECK-NEXT: Amp: & diff --git a/llvm/tools/llvm-rc/ResourceScriptToken.cpp b/llvm/tools/llvm-rc/ResourceScriptToken.cpp index 0070037e63e6a..046a1bf78daef 100644 --- a/llvm/tools/llvm-rc/ResourceScriptToken.cpp +++ b/llvm/tools/llvm-rc/ResourceScriptToken.cpp @@ -26,11 +26,11 @@ using namespace llvm; using Kind = RCToken::Kind; // Checks if Representation is a correct description of an RC integer. -// It should be a 32-bit unsigned integer, either decimal, octal (0[0-7]+), -// or hexadecimal (0x[0-9a-f]+). It might be followed by a single 'L' -// character (that is the difference between our representation and -// StringRef's one). If Representation is correct, 'true' is returned and -// the return value is put back in Num. +// It should be a 32-bit unsigned integer, either decimal or hexadecimal +// (0x[0-9a-f]+). For Windres mode, it can also be octal (0[0-7]+). +// It might be followed by a single 'L' character (that is the difference +// between our representation and StringRef's one). If Representation is +// correct, 'true' is returned and the return value is put back in Num. static bool rcGetAsInteger(StringRef Representation, uint32_t &Num) { size_t Length = Representation.size(); if (Length == 0) @@ -95,7 +95,8 @@ namespace { class Tokenizer { public: - Tokenizer(StringRef Input) : Data(Input), DataLength(Input.size()), Pos(0) {} + Tokenizer(StringRef Input, bool IsWindres) + : Data(Input), DataLength(Input.size()), Pos(0), IsWindres(IsWindres) {} Expected> run(); @@ -128,6 +129,7 @@ class Tokenizer { // character. bool canStartInt() const; bool canContinueInt() const; + void trimIntString(StringRef &Str) const; bool canStartString() const; @@ -153,6 +155,7 @@ class Tokenizer { StringRef Data; size_t DataLength, Pos; + bool IsWindres; }; void Tokenizer::skipCurrentLine() { @@ -187,7 +190,12 @@ Expected> Tokenizer::run() { if (TokenKind == Kind::LineComment || TokenKind == Kind::StartComment) continue; - RCToken Token(TokenKind, Data.take_front(Pos).drop_front(TokenStart)); + StringRef Contents = Data.take_front(Pos).drop_front(TokenStart); + + if (TokenKind == Kind::Int) + trimIntString(Contents); + + RCToken Token(TokenKind, Contents); if (TokenKind == Kind::Identifier) { processIdentifier(Token); } else if (TokenKind == Kind::Int) { @@ -366,12 +374,30 @@ void Tokenizer::processIdentifier(RCToken &Token) const { Token = RCToken(Kind::BlockEnd, Name); } +void Tokenizer::trimIntString(StringRef &Str) const { + if (!IsWindres) { + // For compatibility with rc.exe, strip leading zeros that make the + // integer literal interpreted as octal. + // + // We do rely on Stringref::getAsInteger for autodetecting between + // decimal and hexadecimal literals, but we want to avoid interpreting + // literals as octal. + // + // This omits the leading zeros from the RCToken's value string entirely, + // which also has a visible effect when dumping the tokenizer output. + // Alternatively, we could store the IsWindres flag in RCToken and defer + // the trimming to RCToken::intValue. + while (Str.size() >= 2 && Str[0] == '0' && std::isdigit(Str[1])) + Str = Str.drop_front(1); + } +} + } // anonymous namespace namespace llvm { -Expected> tokenizeRC(StringRef Input) { - return Tokenizer(Input).run(); +Expected> tokenizeRC(StringRef Input, bool IsWindres) { + return Tokenizer(Input, IsWindres).run(); } } // namespace llvm diff --git a/llvm/tools/llvm-rc/ResourceScriptToken.h b/llvm/tools/llvm-rc/ResourceScriptToken.h index 3dcdfafd2d576..50ef8e4b00f53 100644 --- a/llvm/tools/llvm-rc/ResourceScriptToken.h +++ b/llvm/tools/llvm-rc/ResourceScriptToken.h @@ -76,7 +76,7 @@ class RCToken { // Tokens returned by this function hold only references to the parts // of the Input. Memory buffer containing Input cannot be freed, // modified or reallocated. -Expected> tokenizeRC(StringRef Input); +Expected> tokenizeRC(StringRef Input, bool IsWindres); } // namespace llvm diff --git a/llvm/tools/llvm-rc/ResourceScriptTokenList.def b/llvm/tools/llvm-rc/ResourceScriptTokenList.def index 6ee13b2815d35..98af23c649577 100644 --- a/llvm/tools/llvm-rc/ResourceScriptTokenList.def +++ b/llvm/tools/llvm-rc/ResourceScriptTokenList.def @@ -14,7 +14,7 @@ // Long tokens. They might consist of more than one character. TOKEN(Invalid) // Invalid token. Should not occur in a valid script. -TOKEN(Int) // Integer (decimal, octal or hexadecimal). +TOKEN(Int) // Integer (decimal or hexadecimal, and possibly octal for windres). TOKEN(String) // String value. TOKEN(Identifier) // Script identifier (resource name or type). TOKEN(LineComment) // Beginning of single-line comment. diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index f623342366515..38bf03f51227b 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -619,7 +619,8 @@ void doRc(std::string Src, std::string Dest, RcOptions &Opts, StringRef Contents = FileContents->getBuffer(); std::string FilteredContents = filterCppOutput(Contents); - std::vector Tokens = ExitOnErr(tokenizeRC(FilteredContents)); + std::vector Tokens = + ExitOnErr(tokenizeRC(FilteredContents, Opts.IsWindres)); if (Opts.BeVerbose) { const Twine TokenNames[] = {