Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/ada.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "ada/character_sets-inl.h"
#include "ada/checkers-inl.h"
#include "ada/common_defs.h"
#include "ada/ada_data_url.h"
#include "ada/log.h"
#include "ada/encoding_type.h"
#include "ada/helpers.h"
Expand Down
33 changes: 33 additions & 0 deletions include/ada/ada_data_url.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#ifndef ADA_DATA_URL_H
#define ADA_DATA_URL_H

#include <string_view>

namespace ada::data_url {
// https://fetch.spec.whatwg.org/#data-url-struct
struct data_url {
data_url() = default;
data_url(const data_url &m) = default;
data_url(data_url &&m) noexcept = default;
data_url &operator=(data_url &&m) noexcept = default;
data_url &operator=(const data_url &m) = default;
~data_url() = default;

bool is_valid = true;
std::string body{};
std::string essence{};
};

ada::data_url::data_url parse_data_url(std::string_view data_url);

std::string collect_sequence_of_code_points(char c, const std::string& input, size_t& position);

bool isASCIIWhiteSpace(char c);

std::string removeASCIIWhiteSpace(const std::string& input, bool leading, bool trailing);

static constexpr bool is_base64(std::string_view input);

}

#endif // ADA_DATA_URL_H
2 changes: 2 additions & 0 deletions include/ada/serializers.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ std::string ipv6(const std::array<uint16_t, 8>& address) noexcept;
*/
std::string ipv4(uint64_t address) noexcept;

std::string url_serializer(const ada::url& url, bool excludeFragment) noexcept;

} // namespace ada::serializers

#endif // ADA_SERIALIZERS_H
3 changes: 2 additions & 1 deletion src/ada.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@
#include "parser.cpp"
#include "url_components.cpp"
#include "url_aggregator.cpp"
#include "ada_c.cpp"
#include "ada_c.cpp"
#include "ada_data_url.cpp"
132 changes: 132 additions & 0 deletions src/ada_data_url.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#include <string_view>
#include <cctype>

#include "ada.h"

namespace ada::data_url {

ada::data_url::data_url parse_data_url(std::string_view data_url) {
auto out = ada::data_url::data_url();

auto url = ada::parse<ada::url>(data_url, nullptr);

// 1. Assert: dataURL’s scheme is "data".
if (!url || url->get_protocol() != "data:") {
out.is_valid = false;
return out;
}

// 2. Let input be the result of running the URL serializer on dataURL with exclude
// fragment set to true.
url->set_hash({});
auto input = url->get_href();

// 3. Remove the leading "data:" from input.
input.erase(0, 5);

// 4. Let position point at the start of input.
size_t position = 0;

// 5. Let mimeType be the result of collecting a sequence of code points that are
// not equal to U+002C (,), given position.
auto mimetype = collect_sequence_of_code_points(',', input, position);
auto mimetype_length = mimetype.length();

// 6. Strip leading and trailing ASCII whitespace from mimeType.
mimetype = removeASCIIWhiteSpace(mimetype, true, true);

// 7. If position is past the end of input, then return failure.
if (position >= input.length()) {
out.is_valid = false;
return out;
}

// 8. Advance position by 1.
position++;

// 9. Let encodedBody be the remainder of input.
std::string encoded_body = input.substr(mimetype_length + 1);

// 10. Let body be the percent-decoding of encodedBody.
encoded_body = ada::unicode::percent_decode(encoded_body, encoded_body.find('%'));

// 11. If mimeType ends with U+003B (;), followed by zero or more U+0020 SPACE,
// followed by an ASCII case-insensitive match for "base64", then:
size_t last_semi_colon = input.find_last_of(';');

if (last_semi_colon != std::string::npos) {
size_t next_non_space = input.find_first_not_of(' ', last_semi_colon);

out.essence = mimetype.substr(0, last_semi_colon);

if (is_base64(mimetype)) {

// 11.1. Let stringBody be the isomorphic decode of body.
auto string_body = encoded_body;

// 11.2. Set body to the forgiving-base64 decode of stringBody.
// 11.3. If body is failure, then return failure.
// TODO
out.body = string_body;

// 11.4. Remove the last 6 code points from mimeType.
// 11.5. Remove trailing U+0020 SPACE code points from mimeType, if any.
// 11.6. Remove the last U+003B (;) from mimeType.
mimetype.erase(last_semi_colon);
}
}

// 12. If mimeType starts with ";", then prepend "text/plain" to mimeType.
if (mimetype.starts_with(';')) {
mimetype = "text/plain" + mimetype;
}

return out;
}

std::string collect_sequence_of_code_points(char c, const std::string& input, size_t& position) {
auto idx = input.find_first_of(c, position);
size_t start = position;

if (idx == std::string::npos) {
position = reinterpret_cast<size_t>(input.length());
return input.substr(start);
}

position = reinterpret_cast<size_t>(idx);
return input.substr(start, position);
}

std::string removeASCIIWhiteSpace(const std::string& input, bool leading, bool trailing) {
size_t lead = 0;
size_t trail = input.length();

if (leading) {
while (lead < input.length() && isASCIIWhiteSpace(input[lead]))
lead++;
}

if (trailing) {
while (trail > 0 && isASCIIWhiteSpace(input[trail]))
trail--;
}

return input.substr(lead, trail);
}

bool isASCIIWhiteSpace(char c) {
return c == '\r' || c == '\n' || c == '\t' || c == '\f';
}

static constexpr bool is_base64(std::string_view input) {
auto last_idx = input.find_last_of(';');
if (last_idx != std::string_view::npos) {
// TODO(@anonrig): Trim input
auto res = input.substr(last_idx + 1);
return res.size() == 6 && (res[0] | 0x20) == 'b' && (res[1] | 0x20) == 'a' &&
(res[2] | 0x20) == 's' && (res[3] | 0x20) == 'e' && (res[4] == '6') && (res[5] == '4');
}
return false;
}

}
18 changes: 18 additions & 0 deletions src/serializers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,22 @@ std::string ipv4(const uint64_t address) noexcept {
return output;
}

std::string url_serializer(const ada::url& url, bool excludeFragment) noexcept {
if (!excludeFragment) {
return url.get_href();
}

std::string href = url.get_href();
size_t hashLength = url.has_hash() ? url.get_hash().size() : 0;

std::string serialized = hashLength == 0 ? href : href.substr(0, href.length() - hashLength);

if (hashLength == 0 && href.ends_with('#')) {
serialized.pop_back();
return serialized;
}

return serialized;
}

} // namespace ada::serializers
7 changes: 7 additions & 0 deletions tests/basic_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,4 +462,11 @@ TYPED_TEST(basic_tests, negativeport) {
auto url = ada::parse<TypeParam>("https://www.google.com");
ASSERT_FALSE(url->set_port("-1"));
SUCCEED();
}

TYPED_TEST(basic_tests, data_url) {
auto data_url = ada::data_url::parse_data_url("data:application/octet-stream;base64,YWJj");
ASSERT_TRUE(data_url.is_valid);
ASSERT_EQ(data_url.essence, "application/octet-stream");
ASSERT_EQ(data_url.body, "YWJj");
}
Loading