Skip to content

Instantly share code, notes, and snippets.

@fire-eggs
Last active July 15, 2020 20:17
Show Gist options
  • Save fire-eggs/efe75c4e059338a23d3554ac61102453 to your computer and use it in GitHub Desktop.
Save fire-eggs/efe75c4e059338a23d3554ac61102453 to your computer and use it in GitHub Desktop.
Timing test for alternative implementations of fl_filename_isdir.
#define _CRT_SECURE_NO_WARNINGS
#include <string>
#include <windows.h>
#include <fileapi.h> // GetFileAttributes
#include <chrono>
#define FL_PATH_MAX 2048
unsigned fl_utf8decode(const char* p, const char* end, int* len)
{
unsigned char c = *(const unsigned char*)p;
if (c < 0x80) {
if (len) *len = 1;
return c;
#if ERRORS_TO_CP1252
}
else if (c < 0xa0) {
if (len) *len = 1;
return cp1252[c - 0x80];
#endif
}
else if (c < 0xc2) {
goto FAIL;
}
if ((end && p + 1 >= end) || (p[1] & 0xc0) != 0x80) goto FAIL;
if (c < 0xe0) {
if (len) *len = 2;
return
((p[0] & 0x1f) << 6) +
((p[1] & 0x3f));
}
else if (c == 0xe0) {
if (((const unsigned char*)p)[1] < 0xa0) goto FAIL;
goto UTF8_3;
#if STRICT_RFC3629
}
else if (c == 0xed) {
/* RFC 3629 says surrogate chars are illegal. */
if (((const unsigned char*)p)[1] >= 0xa0) goto FAIL;
goto UTF8_3;
}
else if (c == 0xef) {
/* 0xfffe and 0xffff are also illegal characters */
if (((const unsigned char*)p)[1] == 0xbf &&
((const unsigned char*)p)[2] >= 0xbe) goto FAIL;
goto UTF8_3;
#endif
}
else if (c < 0xf0) {
UTF8_3:
if ((end && p + 2 >= end) || (p[2] & 0xc0) != 0x80) goto FAIL;
if (len) *len = 3;
return
((p[0] & 0x0f) << 12) +
((p[1] & 0x3f) << 6) +
((p[2] & 0x3f));
}
else if (c == 0xf0) {
if (((const unsigned char*)p)[1] < 0x90) goto FAIL;
goto UTF8_4;
}
else if (c < 0xf4) {
UTF8_4:
if ((end && p + 3 >= end) || (p[2] & 0xc0) != 0x80 || (p[3] & 0xc0) != 0x80) goto FAIL;
if (len) *len = 4;
#if STRICT_RFC3629
/* RFC 3629 says all codes ending in fffe or ffff are illegal: */
if ((p[1] & 0xf) == 0xf &&
((const unsigned char*)p)[2] == 0xbf &&
((const unsigned char*)p)[3] >= 0xbe) goto FAIL;
#endif
return
((p[0] & 0x07) << 18) +
((p[1] & 0x3f) << 12) +
((p[2] & 0x3f) << 6) +
((p[3] & 0x3f));
}
else if (c == 0xf4) {
if (((const unsigned char*)p)[1] > 0x8f) goto FAIL; /* after 0x10ffff */
goto UTF8_4;
}
else {
FAIL:
if (len) *len = 1;
#if ERRORS_TO_ISO8859_1
return c;
#else
return 0xfffd; /* Unicode REPLACEMENT CHARACTER */
#endif
}
}
unsigned fl_utf8toUtf16(const char* src, unsigned srclen,
unsigned short* dst, unsigned dstlen)
{
const char* p = src;
const char* e = src + srclen;
unsigned count = 0;
if (dstlen) for (;;) {
if (p >= e) { dst[count] = 0; return count; }
if (!(*p & 0x80)) { /* ascii */
dst[count] = *p++;
}
else {
int len; unsigned ucs = fl_utf8decode(p, e, &len);
p += len;
if (ucs < 0x10000) {
dst[count] = ucs;
}
else {
/* make a surrogate pair: */
if (count + 2 >= dstlen) { dst[count] = 0; count += 2; break; }
dst[count] = (((ucs - 0x10000u) >> 10) & 0x3ff) | 0xd800;
dst[++count] = (ucs & 0x3ff) | 0xdc00;
}
}
if (++count == dstlen) { dst[count - 1] = 0; break; }
}
/* we filled dst, measure the rest: */
while (p < e) {
if (!(*p & 0x80)) p++;
else {
int len; unsigned ucs = fl_utf8decode(p, e, &len);
p += len;
if (ucs >= 0x10000) ++count;
}
++count;
}
return count;
}
static wchar_t* wbuf = NULL;
static wchar_t* utf8_to_wchar(const char* utf8, wchar_t*& wbuf, int lg = -1) {
unsigned len = (lg >= 0) ? (unsigned)lg : (unsigned)strlen(utf8);
unsigned wn = fl_utf8toUtf16(utf8, len, NULL, 0) + 1; // Query length
wbuf = (wchar_t*)realloc(wbuf, sizeof(wchar_t) * wn);
wn = fl_utf8toUtf16(utf8, len, (unsigned short*)wbuf, wn); // Convert string
wbuf[wn] = 0;
return wbuf;
}
int isdirGFAW(const char* n)
{
utf8_to_wchar(n, wbuf, (int)strlen(n));
DWORD res = GetFileAttributesW(wbuf);
return (res & FILE_ATTRIBUTE_DIRECTORY);
}
inline int isdirsep(char c) { return c == '/' || c == '\\'; }
int isdirORIG(const char* n)
{
struct _stat s;
char fn[FL_PATH_MAX];
int length;
length = (int)strlen(n);
// This workaround brought to you by the fine folks at Microsoft!
// (read lots of sarcasm in that...)
if (length < (int)(sizeof(fn) - 1)) {
if (length < 4 && isalpha(n[0]) && n[1] == ':' &&
(isdirsep(n[2]) || !n[2])) {
// Always use D:/ for drive letters
fn[0] = n[0];
strcpy(fn + 1, ":/");
n = fn;
}
else if (length > 0 && isdirsep(n[length - 1])) {
// Strip trailing slash from name...
length--;
memcpy(fn, n, length);
fn[length] = '\0';
n = fn;
}
}
return !_stat(n, &s) && (s.st_mode & _S_IFDIR);
}
const char* filename = "Z:/maid_filtered.txt";
long long testOne(int (*func)(const char *))
{
char buffer[FL_PATH_MAX];
FILE* fptr = fopen(filename, "r");
if (!fptr)
return -1LL;
auto start = std::chrono::high_resolution_clock::now();
bool first = true;
while (fgets(buffer, sizeof(buffer), fptr) != NULL)
{
buffer[strlen(buffer) - 1] = '\0';
int res = (*func)(filename);
}
auto stop = std::chrono::high_resolution_clock::now();
fclose(fptr);
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
return duration.count();
}
int main()
{
long long res2 = testOne(isdirGFAW);
double res2secs = res2 / 1000.0 / 1000.0;
printf("GFAW time: %lld (%.2f seconds)\n", res2, res2secs);
long long res1 = testOne(isdirORIG);
double res1secs = res1 / 1000.0 / 1000.0;
printf("Original time: %lld (%.2f seconds)\n", res1, res1secs);
}
@fire-eggs
Copy link
Author

My unit test version is over here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment