Skip to content

Commit 19376c5

Browse files
authored
Implement the RFC 3492 Punycode encoding (#2080)
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent c9fbf2d commit 19376c5

File tree

14 files changed

+1227
-0
lines changed

14 files changed

+1227
-0
lines changed

.github/workflows/website-build.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ jobs:
2020
-DSOURCEMETA_CORE_LANG_PROCESS:BOOL=OFF
2121
-DSOURCEMETA_CORE_LANG_PARALLEL:BOOL=OFF
2222
-DSOURCEMETA_CORE_LANG_NUMERIC:BOOL=OFF
23+
-DSOURCEMETA_CORE_PUNYCODE:BOOL=OFF
2324
-DSOURCEMETA_CORE_TIME:BOOL=OFF
2425
-DSOURCEMETA_CORE_UUID:BOOL=OFF
2526
-DSOURCEMETA_CORE_REGEX:BOOL=OFF

.github/workflows/website-deploy.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ jobs:
3030
-DSOURCEMETA_CORE_LANG_PROCESS:BOOL=OFF
3131
-DSOURCEMETA_CORE_LANG_PARALLEL:BOOL=OFF
3232
-DSOURCEMETA_CORE_LANG_NUMERIC:BOOL=OFF
33+
-DSOURCEMETA_CORE_PUNYCODE:BOOL=OFF
3334
-DSOURCEMETA_CORE_TIME:BOOL=OFF
3435
-DSOURCEMETA_CORE_UUID:BOOL=OFF
3536
-DSOURCEMETA_CORE_REGEX:BOOL=OFF

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ option(SOURCEMETA_CORE_LANG_IO "Build the Sourcemeta Core language I/O library"
77
option(SOURCEMETA_CORE_LANG_PROCESS "Build the Sourcemeta Core language Process library" ON)
88
option(SOURCEMETA_CORE_LANG_PARALLEL "Build the Sourcemeta Core language parallel library" ON)
99
option(SOURCEMETA_CORE_LANG_NUMERIC "Build the Sourcemeta Core language numeric library" ON)
10+
option(SOURCEMETA_CORE_PUNYCODE "Build the Sourcemeta Core Punycode library" ON)
1011
option(SOURCEMETA_CORE_TIME "Build the Sourcemeta Core time library" ON)
1112
option(SOURCEMETA_CORE_UUID "Build the Sourcemeta Core UUID library" ON)
1213
option(SOURCEMETA_CORE_MD5 "Build the Sourcemeta Core MD5 library" ON)
@@ -74,6 +75,10 @@ if(SOURCEMETA_CORE_LANG_NUMERIC)
7475
add_subdirectory(src/lang/numeric)
7576
endif()
7677

78+
if(SOURCEMETA_CORE_PUNYCODE)
79+
add_subdirectory(src/core/punycode)
80+
endif()
81+
7782
if(SOURCEMETA_CORE_TIME)
7883
add_subdirectory(src/core/time)
7984
endif()
@@ -183,6 +188,10 @@ if(SOURCEMETA_CORE_TESTS)
183188
add_subdirectory(test/numeric)
184189
endif()
185190

191+
if(SOURCEMETA_CORE_PUNYCODE)
192+
add_subdirectory(test/punycode)
193+
endif()
194+
186195
if(SOURCEMETA_CORE_TIME)
187196
add_subdirectory(test/time)
188197
endif()

config.cmake.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS)
88
list(APPEND SOURCEMETA_CORE_COMPONENTS process)
99
list(APPEND SOURCEMETA_CORE_COMPONENTS parallel)
1010
list(APPEND SOURCEMETA_CORE_COMPONENTS numeric)
11+
list(APPEND SOURCEMETA_CORE_COMPONENTS punycode)
1112
list(APPEND SOURCEMETA_CORE_COMPONENTS time)
1213
list(APPEND SOURCEMETA_CORE_COMPONENTS uuid)
1314
list(APPEND SOURCEMETA_CORE_COMPONENTS md5)
@@ -38,6 +39,8 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS})
3839
elseif(component STREQUAL "numeric")
3940
find_dependency(mpdecimal CONFIG)
4041
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake")
42+
elseif(component STREQUAL "punycode")
43+
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_punycode.cmake")
4144
elseif(component STREQUAL "time")
4245
include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_time.cmake")
4346
elseif(component STREQUAL "uuid")

src/core/punycode/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME punycode
2+
PRIVATE_HEADERS error.h
3+
SOURCES punycode.cc utf8.h)
4+
5+
if(SOURCEMETA_CORE_INSTALL)
6+
sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME punycode)
7+
endif()
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#ifndef SOURCEMETA_CORE_PUNYCODE_H_
2+
#define SOURCEMETA_CORE_PUNYCODE_H_
3+
4+
#ifndef SOURCEMETA_CORE_PUNYCODE_EXPORT
5+
#include <sourcemeta/core/punycode_export.h>
6+
#endif
7+
8+
// NOLINTBEGIN(misc-include-cleaner)
9+
#include <sourcemeta/core/punycode_error.h>
10+
// NOLINTEND(misc-include-cleaner)
11+
12+
#include <istream> // std::istream
13+
#include <ostream> // std::ostream
14+
#include <string> // std::string, std::u32string
15+
#include <string_view> // std::string_view, std::u32string_view
16+
17+
/// @defgroup punycode Punycode
18+
/// @brief An implementation of RFC 3492 Punycode.
19+
///
20+
/// This functionality is included as follows:
21+
///
22+
/// ```cpp
23+
/// #include <sourcemeta/core/punycode.h>
24+
/// ```
25+
26+
namespace sourcemeta::core {
27+
28+
// See https://www.rfc-editor.org/rfc/rfc3492
29+
30+
/// @ingroup punycode
31+
/// Encode Unicode code points (UTF-32) to Punycode. For example:
32+
///
33+
/// ```cpp
34+
/// #include <sourcemeta/core/punycode.h>
35+
/// #include <cassert>
36+
///
37+
/// const std::u32string input{0x0048, 0x0065, 0x006C, 0x006C, 0x006F,
38+
/// 0x002D, 0x305D, 0x308C, 0x305E, 0x308C,
39+
/// 0x306E, 0x5834, 0x6240};
40+
/// assert(sourcemeta::core::utf32_to_punycode(input) ==
41+
/// "Hello--fc4qua05auwb3674vfr0b");
42+
/// ```
43+
///
44+
/// Note that stream-based overloads for UTF-32 are not provided
45+
/// because the C++ standard library does not define the required locale facets
46+
/// (`std::ctype<char32_t>`) for `std::basic_istream<char32_t>` and
47+
/// `std::basic_ostream<char32_t>` to function properly.
48+
SOURCEMETA_CORE_PUNYCODE_EXPORT
49+
auto utf32_to_punycode(std::u32string_view input) -> std::string;
50+
51+
/// @ingroup punycode
52+
/// Encode UTF-8 to Punycode using streams. For example:
53+
///
54+
/// ```cpp
55+
/// #include <sourcemeta/core/punycode.h>
56+
/// #include <sstream>
57+
/// #include <cassert>
58+
///
59+
/// std::istringstream input{"M\xC3\xBCnchen"};
60+
/// std::ostringstream output;
61+
/// sourcemeta::core::utf8_to_punycode(input, output);
62+
/// assert(output.str() == "Mnchen-3ya");
63+
/// ```
64+
SOURCEMETA_CORE_PUNYCODE_EXPORT
65+
auto utf8_to_punycode(std::istream &input, std::ostream &output) -> void;
66+
67+
/// @ingroup punycode
68+
/// Encode UTF-8 to Punycode. For example:
69+
///
70+
/// ```cpp
71+
/// #include <sourcemeta/core/punycode.h>
72+
/// #include <cassert>
73+
///
74+
/// assert(sourcemeta::core::utf8_to_punycode("M\xC3\xBCnchen") ==
75+
/// "Mnchen-3ya");
76+
/// ```
77+
SOURCEMETA_CORE_PUNYCODE_EXPORT
78+
auto utf8_to_punycode(std::string_view input) -> std::string;
79+
80+
/// @ingroup punycode
81+
/// Decode Punycode to Unicode code points (UTF-32). For example:
82+
///
83+
/// ```cpp
84+
/// #include <sourcemeta/core/punycode.h>
85+
/// #include <cassert>
86+
///
87+
/// const std::u32string expected{0x0048, 0x0065, 0x006C, 0x006C, 0x006F,
88+
/// 0x002D, 0x305D, 0x308C, 0x305E, 0x308C,
89+
/// 0x306E, 0x5834, 0x6240};
90+
/// assert(sourcemeta::core::punycode_to_utf32("Hello--fc4qua05auwb3674vfr0b")
91+
/// ==
92+
/// expected);
93+
/// ```
94+
///
95+
/// Note that stream-based overloads for UTF-32 are not provided
96+
/// because the C++ standard library does not define the required locale facets
97+
/// (`std::ctype<char32_t>`) for `std::basic_istream<char32_t>` and
98+
/// `std::basic_ostream<char32_t>` to function properly.
99+
SOURCEMETA_CORE_PUNYCODE_EXPORT
100+
auto punycode_to_utf32(std::string_view input) -> std::u32string;
101+
102+
/// @ingroup punycode
103+
/// Decode Punycode to UTF-8 using streams. For example:
104+
///
105+
/// ```cpp
106+
/// #include <sourcemeta/core/punycode.h>
107+
/// #include <sstream>
108+
/// #include <cassert>
109+
///
110+
/// std::istringstream input{"Mnchen-3ya"};
111+
/// std::ostringstream output;
112+
/// sourcemeta::core::punycode_to_utf8(input, output);
113+
/// assert(output.str() == "M\xC3\xBCnchen");
114+
/// ```
115+
SOURCEMETA_CORE_PUNYCODE_EXPORT
116+
auto punycode_to_utf8(std::istream &input, std::ostream &output) -> void;
117+
118+
/// @ingroup punycode
119+
/// Decode Punycode to UTF-8. For example:
120+
///
121+
/// ```cpp
122+
/// #include <sourcemeta/core/punycode.h>
123+
/// #include <cassert>
124+
///
125+
/// assert(sourcemeta::core::punycode_to_utf8("Mnchen-3ya") ==
126+
/// "M\xC3\xBCnchen");
127+
/// ```
128+
SOURCEMETA_CORE_PUNYCODE_EXPORT
129+
auto punycode_to_utf8(std::string_view input) -> std::string;
130+
131+
} // namespace sourcemeta::core
132+
133+
#endif
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#ifndef SOURCEMETA_CORE_PUNYCODE_ERROR_H_
2+
#define SOURCEMETA_CORE_PUNYCODE_ERROR_H_
3+
4+
#ifndef SOURCEMETA_CORE_PUNYCODE_EXPORT
5+
#include <sourcemeta/core/punycode_error.h>
6+
#endif
7+
8+
#include <exception> // std::exception
9+
#include <string> // std::string
10+
#include <utility> // std::move
11+
12+
namespace sourcemeta::core {
13+
14+
// Exporting symbols that depends on the standard C++ library is considered
15+
// safe.
16+
// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN
17+
#if defined(_MSC_VER)
18+
#pragma warning(disable : 4251 4275)
19+
#endif
20+
21+
/// @ingroup punycode
22+
class SOURCEMETA_CORE_PUNYCODE_EXPORT PunycodeError : public std::exception {
23+
public:
24+
PunycodeError(std::string message) : message_{std::move(message)} {}
25+
26+
[[nodiscard]] auto what() const noexcept -> const char * override {
27+
return this->message_.c_str();
28+
}
29+
30+
private:
31+
std::string message_;
32+
};
33+
34+
#if defined(_MSC_VER)
35+
#pragma warning(default : 4251 4275)
36+
#endif
37+
38+
} // namespace sourcemeta::core
39+
40+
#endif

0 commit comments

Comments
 (0)