Last active
April 5, 2022 05:50
-
-
Save qis/58fd0e0326909a89a8622a338420c3c2 to your computer and use it in GitHub Desktop.
Parse ISO 8601 date time strings in C++ and AVX2.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// if(MSVC) | |
// set(AVX2_FLAGS "/arch:AVX2") | |
// else() | |
// set(AVX2_FLAGS "-march=native -mavx2") | |
// endif() | |
// | |
// option(ENABLE_AVX2 "Enable AVX2 support" OFF) | |
// if(ENABLE_AVX2) | |
// if(CMAKE_CROSSCOMPILING) | |
// set(AVX2 TRUE) | |
// else() | |
// include(CheckCXXSourceRuns) | |
// set(CMAKE_REQUIRED_FLAGS "${AVX2_FLAGS}") | |
// check_cxx_source_runs(" | |
// #include <immintrin.h> | |
// int main() { | |
// const auto src = _mm256_set_epi64x( | |
// 0x0002000200020002ULL, 0x0002000200020002ULL, | |
// 0x0002000200020002ULL, 0x0002000200020002ULL); | |
// const auto sub = _mm256_set_epi64x( | |
// 0x0001000100010001ULL, 0x0001000100010001ULL, | |
// 0x0001000100010001ULL, 0x0001000100010001ULL); | |
// const auto dst = _mm256_sub_epi16(src, sub); | |
// alignas(32) short str[16]; | |
// _mm256_store_si256(reinterpret_cast<__m256i*>(str), dst); | |
// for (auto c : str) { | |
// if (c != 1) { | |
// return -1; | |
// } | |
// } | |
// }" AVX2) | |
// endif() | |
// if(AVX2) | |
// set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}") | |
// add_definitions(-DICE_AVX2=1) | |
// endif() | |
// endif() | |
#pragma once | |
#include <date/date.h> | |
#include <array> | |
#include <string_view> | |
#include <system_error> | |
#include <cstddef> | |
#include <cstdint> | |
#if defined(ICE_AVX2) || defined(__INTELLISENSE__) | |
#include <immintrin.h> | |
#include <emmintrin.h> | |
#endif | |
namespace ice { | |
// clang-format off | |
constexpr bool is_digit(char c) noexcept { | |
return c >= '0' && c <= '9'; | |
} | |
template <typename T, std::size_t N> | |
constexpr T parse_unsigned_unchecked(const char* s) noexcept { | |
constexpr T zero = '0'; | |
T result = 0; | |
for (std::size_t i = 0; i < N; i++) { | |
result = static_cast<T>((result << 1) + (result << 3) + static_cast<T>(s[i]) - zero); | |
} | |
return result; | |
} | |
template <typename T, std::size_t N> | |
inline T parse_unsigned(const char* s) { | |
constexpr T zero = '0'; | |
T result = 0; | |
for (std::size_t i = 0; i < N; i++) { | |
if (!is_digit(s[i])) { | |
throw std::system_error(std::make_error_code(std::errc::invalid_argument)); | |
} | |
result = static_cast<T>((result << 1) + (result << 3) + static_cast<T>(s[i]) - zero); | |
} | |
return result; | |
} | |
// 0 ns | |
template <std::size_t N> | |
constexpr date::sys_time<std::chrono::milliseconds> parse_date_unchecked(const char (&s)[N]) noexcept { | |
static_assert(N > 23u, "date string must be at least 23 characters"); | |
const auto yr = date::year{ parse_unsigned_unchecked<int, 4>(s) }; | |
const auto mo = date::month{ parse_unsigned_unchecked<unsigned, 2>(s + 5) }; | |
const auto dy = date::day{ parse_unsigned_unchecked<unsigned, 2>(s + 8) }; | |
const auto hr = std::chrono::hours{ parse_unsigned_unchecked<int, 2>(s + 11) }; | |
const auto mi = std::chrono::minutes{ parse_unsigned_unchecked<int, 2>(s + 14) }; | |
const auto sc = std::chrono::seconds{ parse_unsigned_unchecked<long long, 2>(s + 17) }; | |
const auto ms = std::chrono::milliseconds{ parse_unsigned_unchecked<long long, 3>(s + 20) }; | |
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms; | |
} | |
// 12 ns | |
constexpr date::sys_time<std::chrono::milliseconds> parse_date_unchecked(std::string_view sv) noexcept { | |
const auto s = sv.data(); | |
const auto yr = date::year{ parse_unsigned_unchecked<int, 4>(s) }; | |
const auto mo = date::month{ parse_unsigned_unchecked<unsigned, 2>(s + 5) }; | |
const auto dy = date::day{ parse_unsigned_unchecked<unsigned, 2>(s + 8) }; | |
const auto hr = std::chrono::hours{ parse_unsigned_unchecked<int, 2>(s + 11) }; | |
const auto mi = std::chrono::minutes{ parse_unsigned_unchecked<int, 2>(s + 14) }; | |
const auto sc = std::chrono::seconds{ parse_unsigned_unchecked<long long, 2>(s + 17) }; | |
const auto ms = std::chrono::milliseconds{ parse_unsigned_unchecked<long long, 3>(s + 20) }; | |
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms; | |
} | |
// 22 ns | |
inline date::sys_time<std::chrono::milliseconds> parse_date(std::string_view sv, std::error_code& ec) noexcept { | |
ec.clear(); | |
if (sv.size() < 23u) { | |
ec = std::make_error_code(std::errc::message_size); | |
return {}; | |
} | |
if (!is_digit(sv[0]) || !is_digit(sv[1]) || !is_digit(sv[2]) || !is_digit(sv[3]) || sv[4] != '-' || | |
!is_digit(sv[5]) || !is_digit(sv[6]) || sv[7] != '-' || | |
!is_digit(sv[8]) || !is_digit(sv[8]) || sv[10] != ' ' || | |
!is_digit(sv[11]) || !is_digit(sv[12]) || sv[13] != ':' || | |
!is_digit(sv[14]) || !is_digit(sv[15]) || sv[16] != ':' || | |
!is_digit(sv[17]) || !is_digit(sv[18]) || sv[19] != '.' || | |
!is_digit(sv[20]) || !is_digit(sv[21]) || !is_digit(sv[22])) { | |
ec = std::make_error_code(std::errc::invalid_argument); | |
return {}; | |
} | |
return parse_date_unchecked(sv.data()); | |
} | |
// 20 ns | |
inline date::sys_time<std::chrono::milliseconds> parse_date(std::string_view sv) { | |
if (sv.size() < 23u) { | |
throw std::system_error(std::make_error_code(std::errc::message_size)); | |
} | |
if (sv[4] != '-' || sv[7] != '-' || sv[10] != ' ' || sv[13] != ':' || sv[16] != ':' || sv[19] != '.') { | |
throw std::system_error(std::make_error_code(std::errc::invalid_argument)); | |
} | |
const auto yr = date::year{ parse_unsigned<int, 4>(sv.data()) }; | |
const auto mo = date::month{ parse_unsigned<unsigned, 2>(sv.data() + 5) }; | |
const auto dy = date::day{ parse_unsigned<unsigned, 2>(sv.data() + 8) }; | |
const auto hr = std::chrono::hours{ parse_unsigned<int, 2>(sv.data() + 11) }; | |
const auto mi = std::chrono::minutes{ parse_unsigned<int, 2>(sv.data() + 14) }; | |
const auto sc = std::chrono::seconds{ parse_unsigned<long long, 2>(sv.data() + 17) }; | |
const auto ms = std::chrono::milliseconds{ parse_unsigned<long long, 3>(sv.data() + 20) }; | |
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms; | |
} | |
#if defined(ICE_AVX2) || defined(__INTELLISENSE__) | |
// 29 ns | |
inline date::sys_time<std::chrono::milliseconds> parse_date_avx2_unchecked(std::string_view sv) noexcept { | |
auto src = _mm256_set_epi16( | |
sv[21], sv[20], sv[18], sv[17], sv[15], sv[14], sv[12], sv[11], | |
sv[9], sv[8], sv[6], sv[5], sv[3], sv[2], sv[1], sv[0]); | |
// 16 x '0' | |
auto tmp = _mm256_set_epi64x( | |
0x0030003000300030ULL, 0x0030003000300030ULL, | |
0x0030003000300030ULL, 0x0030003000300030ULL); | |
src = _mm256_subs_epi16(src, tmp); | |
// 10, 100, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 100, 1000 | |
tmp = _mm256_set_epi64x( | |
0x000A00640001000AULL, 0x0001000A0001000AULL, | |
0x0001000A0001000AULL, 0x0001000A006403E8ULL); | |
src = _mm256_mullo_epi16(src, tmp); | |
src = _mm256_hadd_epi16(src, src); | |
alignas(32) uint16_t dst[16]; | |
_mm256_store_si256(reinterpret_cast<__m256i*>(dst), src); | |
const auto yr = date::year{ static_cast<int>(dst[0] + dst[1]) }; | |
const auto mo = date::month{ static_cast<unsigned>(dst[2]) }; | |
const auto dy = date::day{ static_cast<unsigned>(dst[3]) }; | |
const auto hr = std::chrono::hours{ static_cast<int>(dst[8]) }; | |
const auto mi = std::chrono::minutes{ static_cast<int>(dst[9]) }; | |
const auto sc = std::chrono::seconds{ static_cast<long long>(dst[10]) }; | |
const auto ms = std::chrono::milliseconds{ | |
static_cast<long long>(dst[11] + static_cast<uint16_t>(sv[22] - '0')) | |
}; | |
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms; | |
} | |
// 27 ns | |
inline date::sys_time<std::chrono::milliseconds> parse_date_avx2(std::string_view sv, std::error_code& ec) noexcept { | |
ec.clear(); | |
if (sv.size() < 23u) { | |
ec = std::make_error_code(std::errc::message_size); | |
return {}; | |
} | |
if (sv[4] != '-' || | |
sv[7] != '-' || | |
sv[10] != ' ' || | |
sv[13] != ':' || | |
sv[16] != ':' || | |
sv[19] != '.' || !is_digit(sv[22])) { | |
ec = std::make_error_code(std::errc::invalid_argument); | |
return {}; | |
} | |
auto src = _mm256_set_epi16( | |
sv[21], sv[20], sv[18], sv[17], sv[15], sv[14], sv[12], sv[11], | |
sv[9], sv[8], sv[6], sv[5], sv[3], sv[2], sv[1], sv[0]); | |
// 16 x '9' | |
auto tmp = _mm256_set_epi64x( | |
0x0039003900390039ULL, 0x0039003900390039ULL, | |
0x0039003900390039ULL, 0x0039003900390039ULL); | |
if (_mm256_movemask_epi8(_mm256_cmpgt_epi16(src, tmp))) { | |
ec = std::make_error_code(std::errc::invalid_argument); | |
return {}; | |
} | |
// 16 x '0' | |
tmp = _mm256_set_epi64x( | |
0x0030003000300030ULL, 0x0030003000300030ULL, | |
0x0030003000300030ULL, 0x0030003000300030ULL); | |
if (_mm256_movemask_epi8(_mm256_cmpgt_epi16(tmp, src))) { | |
ec = std::make_error_code(std::errc::invalid_argument); | |
return {}; | |
} | |
src = _mm256_subs_epi16(src, tmp); | |
// 10, 100, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 100, 1000 | |
tmp = _mm256_set_epi64x( | |
0x000A00640001000AULL, 0x0001000A0001000AULL, | |
0x0001000A0001000AULL, 0x0001000A006403E8ULL); | |
src = _mm256_mullo_epi16(src, tmp); | |
src = _mm256_hadd_epi16(src, src); | |
alignas(32) uint16_t dst[16]; | |
_mm256_store_si256(reinterpret_cast<__m256i*>(dst), src); | |
const auto yr = date::year{ static_cast<int>(dst[0] + dst[1]) }; | |
const auto mo = date::month{ static_cast<unsigned>(dst[2]) }; | |
const auto dy = date::day{ static_cast<unsigned>(dst[3]) }; | |
const auto hr = std::chrono::hours{ static_cast<int>(dst[8]) }; | |
const auto mi = std::chrono::minutes{ static_cast<int>(dst[9]) }; | |
const auto sc = std::chrono::seconds{ static_cast<long long>(dst[10]) }; | |
const auto ms = std::chrono::milliseconds{ | |
static_cast<long long>(dst[11] + static_cast<uint16_t>(sv[22] - '0')) | |
}; | |
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms; | |
} | |
// 32 ns | |
inline date::sys_time<std::chrono::milliseconds> parse_date_avx2(std::string_view sv) { | |
if (sv.size() < 23u) { | |
throw std::system_error(std::make_error_code(std::errc::message_size)); | |
} | |
if (sv[4] != '-' || | |
sv[7] != '-' || | |
sv[10] != ' ' || | |
sv[13] != ':' || | |
sv[16] != ':' || | |
sv[19] != '.' || !is_digit(sv[22])) { | |
throw std::system_error(std::make_error_code(std::errc::invalid_argument)); | |
} | |
auto src = _mm256_set_epi16( | |
sv[21], sv[20], sv[18], sv[17], sv[15], sv[14], sv[12], sv[11], | |
sv[9], sv[8], sv[6], sv[5], sv[3], sv[2], sv[1], sv[0]); | |
// 16 x '9' | |
auto tmp = _mm256_set_epi64x( | |
0x0039003900390039ULL, 0x0039003900390039ULL, | |
0x0039003900390039ULL, 0x0039003900390039ULL); | |
if (_mm256_movemask_epi8(_mm256_cmpgt_epi16(src, tmp))) { | |
throw std::system_error(std::make_error_code(std::errc::invalid_argument)); | |
} | |
// 16 x '0' | |
tmp = _mm256_set_epi64x( | |
0x0030003000300030ULL, 0x0030003000300030ULL, | |
0x0030003000300030ULL, 0x0030003000300030ULL); | |
if (_mm256_movemask_epi8(_mm256_cmpgt_epi16(tmp, src))) { | |
throw std::system_error(std::make_error_code(std::errc::invalid_argument)); | |
} | |
src = _mm256_subs_epi16(src, tmp); | |
// 10, 100, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 100, 1000 | |
tmp = _mm256_set_epi64x( | |
0x000A00640001000AULL, 0x0001000A0001000AULL, | |
0x0001000A0001000AULL, 0x0001000A006403E8ULL); | |
src = _mm256_mullo_epi16(src, tmp); | |
src = _mm256_hadd_epi16(src, src); | |
alignas(32) uint16_t dst[16]; | |
_mm256_store_si256(reinterpret_cast<__m256i*>(dst), src); | |
const auto yr = date::year{ static_cast<int>(dst[0] + dst[1]) }; | |
const auto mo = date::month{ static_cast<unsigned>(dst[2]) }; | |
const auto dy = date::day{ static_cast<unsigned>(dst[3]) }; | |
const auto hr = std::chrono::hours{ static_cast<int>(dst[8]) }; | |
const auto mi = std::chrono::minutes{ static_cast<int>(dst[9]) }; | |
const auto sc = std::chrono::seconds{ static_cast<long long>(dst[10]) }; | |
const auto ms = std::chrono::milliseconds{ | |
static_cast<long long>(dst[11] + static_cast<uint16_t>(sv[22] - '0')) | |
}; | |
return date::sys_days{ yr / mo / dy } + hr + mi + sc + ms; | |
} | |
#endif | |
// clang-format on | |
} // namespace ice |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment