summaryrefslogtreecommitdiff
path: root/vendor/utfcpp/utf8
diff options
context:
space:
mode:
authorPeter Powell <petpow@saberuk.com>2019-04-25 00:50:19 +0100
committerPeter Powell <petpow@saberuk.com>2019-04-25 19:24:35 +0100
commitc13d0744fee3b1411d238d45ad66eef8b104f72e (patch)
tree04b7e41578a3c1d5d7d36eda6628409b218dca3b /vendor/utfcpp/utf8
parentf2712eaf0c191575a55576217a88e4a7af3b8865 (diff)
Update vendored utfcpp library to commit ad27c7d5e0.
Diffstat (limited to 'vendor/utfcpp/utf8')
-rw-r--r--vendor/utfcpp/utf8/checked.h29
-rw-r--r--vendor/utfcpp/utf8/core.h25
-rw-r--r--vendor/utfcpp/utf8/cpp11.h103
-rw-r--r--vendor/utfcpp/utf8/unchecked.h29
4 files changed, 138 insertions, 48 deletions
diff --git a/vendor/utfcpp/utf8/checked.h b/vendor/utfcpp/utf8/checked.h
index 2aef5838d..c31861e0a 100644
--- a/vendor/utfcpp/utf8/checked.h
+++ b/vendor/utfcpp/utf8/checked.h
@@ -107,7 +107,9 @@ namespace utf8
*out++ = *it;
break;
case internal::NOT_ENOUGH_ROOM:
- throw not_enough_room();
+ out = utf8::append (replacement, out);
+ start = end;
+ break;
case internal::INVALID_LEAD:
out = utf8::append (replacement, out);
++start;
@@ -174,23 +176,19 @@ namespace utf8
return utf8::peek_next(it, end);
}
- /// Deprecated in versions that include "prior"
- template <typename octet_iterator>
- uint32_t previous(octet_iterator& it, octet_iterator pass_start)
- {
- octet_iterator end = it;
- while (utf8::internal::is_trail(*(--it)))
- if (it == pass_start)
- throw invalid_utf8(*it); // error - no lead byte in the sequence
- octet_iterator temp = it;
- return utf8::next(temp, end);
- }
-
template <typename octet_iterator, typename distance_type>
void advance (octet_iterator& it, distance_type n, octet_iterator end)
{
- for (distance_type i = 0; i < n; ++i)
- utf8::next(it, end);
+ const distance_type zero(0);
+ if (n < zero) {
+ // backward
+ for (distance_type i = n; i < zero; ++i)
+ utf8::prior(it, end);
+ } else {
+ // forward
+ for (distance_type i = zero; i < n; ++i)
+ utf8::next(it, end);
+ }
}
template <typename octet_iterator>
@@ -324,4 +322,3 @@ namespace utf8
#endif //header guard
-
diff --git a/vendor/utfcpp/utf8/core.h b/vendor/utfcpp/utf8/core.h
index ae0f367db..e007ca17d 100644
--- a/vendor/utfcpp/utf8/core.h
+++ b/vendor/utfcpp/utf8/core.h
@@ -49,8 +49,8 @@ namespace internal
const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
- const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
- const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
+ const uint16_t LEAD_OFFSET = 0xd7c0u; // LEAD_SURROGATE_MIN - (0x10000 >> 10)
+ const uint32_t SURROGATE_OFFSET = 0xfca02400u; // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
// Maximum valid value for a Unicode code point
const uint32_t CODE_POINT_MAX = 0x0010ffffu;
@@ -142,7 +142,7 @@ namespace internal
if (!utf8::internal::is_trail(*it))
return INCOMPLETE_SEQUENCE;
-
+
return UTF8_OK;
}
@@ -165,7 +165,7 @@ namespace internal
{
if (it == end)
return NOT_ENOUGH_ROOM;
-
+
code_point = utf8::internal::mask8(*it);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
@@ -222,7 +222,7 @@ namespace internal
template <typename octet_iterator>
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
- if (it == end)
+ if (it == end)
return NOT_ENOUGH_ROOM;
// Save the original value of it so we can go back in case of failure
@@ -237,7 +237,7 @@ namespace internal
// Get trail octets and calculate the code point
utf_error err = UTF8_OK;
switch (length) {
- case 0:
+ case 0:
return INVALID_LEAD;
case 1:
err = utf8::internal::get_sequence_1(it, end, cp);
@@ -313,18 +313,7 @@ namespace internal
((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
);
- }
-
- //Deprecated in release 2.3
- template <typename octet_iterator>
- inline bool is_bom (octet_iterator it)
- {
- return (
- (utf8::internal::mask8(*it++)) == bom[0] &&
- (utf8::internal::mask8(*it++)) == bom[1] &&
- (utf8::internal::mask8(*it)) == bom[2]
- );
- }
+ }
} // namespace utf8
#endif // header guard
diff --git a/vendor/utfcpp/utf8/cpp11.h b/vendor/utfcpp/utf8/cpp11.h
new file mode 100644
index 000000000..d93961b04
--- /dev/null
+++ b/vendor/utfcpp/utf8/cpp11.h
@@ -0,0 +1,103 @@
+// Copyright 2018 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
+#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
+
+#include "checked.h"
+#include <string>
+
+namespace utf8
+{
+
+ inline void append(char32_t cp, std::string& s)
+ {
+ append(uint32_t(cp), std::back_inserter(s));
+ }
+
+ inline std::string utf16to8(const std::u16string& s)
+ {
+ std::string result;
+ utf16to8(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline std::u16string utf8to16(const std::string& s)
+ {
+ std::u16string result;
+ utf8to16(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline std::string utf32to8(const std::u32string& s)
+ {
+ std::string result;
+ utf32to8(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline std::u32string utf8to32(const std::string& s)
+ {
+ std::u32string result;
+ utf8to32(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline std::size_t find_invalid(const std::string& s)
+ {
+ std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
+ return (invalid == s.end()) ? std::string::npos : (invalid - s.begin());
+ }
+
+ inline bool is_valid(const std::string& s)
+ {
+ return is_valid(s.begin(), s.end());
+ }
+
+ inline std::string replace_invalid(const std::string& s, char32_t replacement)
+ {
+ std::string result;
+ replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
+ return result;
+ }
+
+ inline std::string replace_invalid(const std::string& s)
+ {
+ std::string result;
+ replace_invalid(s.begin(), s.end(), std::back_inserter(result));
+ return result;
+ }
+
+ inline bool starts_with_bom(const std::string& s)
+ {
+ return starts_with_bom(s.begin(), s.end());
+ }
+
+} // namespace utf8
+
+#endif // header guard
+
diff --git a/vendor/utfcpp/utf8/unchecked.h b/vendor/utfcpp/utf8/unchecked.h
index cb2427166..c78419f69 100644
--- a/vendor/utfcpp/utf8/unchecked.h
+++ b/vendor/utfcpp/utf8/unchecked.h
@@ -38,7 +38,7 @@ namespace utf8
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (cp < 0x80) // one octet
- *(result++) = static_cast<uint8_t>(cp);
+ *(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
@@ -85,13 +85,13 @@ namespace utf8
break;
}
++it;
- return cp;
+ return cp;
}
template <typename octet_iterator>
uint32_t peek_next(octet_iterator it)
{
- return utf8::unchecked::next(it);
+ return utf8::unchecked::next(it);
}
template <typename octet_iterator>
@@ -102,18 +102,19 @@ namespace utf8
return utf8::unchecked::next(temp);
}
- // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
- template <typename octet_iterator>
- inline uint32_t previous(octet_iterator& it)
- {
- return utf8::unchecked::prior(it);
- }
-
template <typename octet_iterator, typename distance_type>
void advance (octet_iterator& it, distance_type n)
{
- for (distance_type i = 0; i < n; ++i)
- utf8::unchecked::next(it);
+ const distance_type zero(0);
+ if (n < zero) {
+ // backward
+ for (distance_type i = n; i < zero; ++i)
+ utf8::unchecked::prior(it);
+ } else {
+ // forward
+ for (distance_type i = zero; i < n; ++i)
+ utf8::unchecked::next(it);
+ }
}
template <typename octet_iterator>
@@ -128,7 +129,7 @@ namespace utf8
template <typename u16bit_iterator, typename octet_iterator>
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
- {
+ {
while (start != end) {
uint32_t cp = utf8::internal::mask16(*start++);
// Take care of surrogate pairs first
@@ -138,7 +139,7 @@ namespace utf8
}
result = utf8::unchecked::append(cp, result);
}
- return result;
+ return result;
}
template <typename u16bit_iterator, typename octet_iterator>