Forked from kongtomorrow/gist:3e35d926ec17d9b621c1
Last active
August 29, 2015 14:10
-
-
Save stephentyrone/228144648ee250a4ba07 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import <Foundation/Foundation.h> | |
#import <mach/mach_time.h> | |
#import <simd/simd.h> | |
NSTimeInterval SecondsFromMachTimeInterval(uint64_t machTimeInterval) { | |
static double timeScaleSeconds = 0.0; | |
if (timeScaleSeconds == 0.0) { | |
mach_timebase_info_data_t timebaseInfo; | |
if (mach_timebase_info(&timebaseInfo) == KERN_SUCCESS) { // returns scale factor for ns | |
double timeScaleMicroSeconds = ((double) timebaseInfo.numer / (double) timebaseInfo.denom) / 1000; | |
timeScaleSeconds = timeScaleMicroSeconds / 1000000; | |
} | |
} | |
return timeScaleSeconds*machTimeInterval; | |
} | |
NSTimeInterval TimeToPerformBlock(void (^blockToTime)(void), long repCount) { | |
uint64_t tick = mach_absolute_time(); | |
for (long i = 0; i < repCount; i++) { | |
blockToTime(); | |
} | |
uint64_t tock = mach_absolute_time(); | |
return SecondsFromMachTimeInterval(tock - tick); | |
} | |
void logMachTimeInterval_withIdentifier_(NSTimeInterval seconds, NSString *identifier) { | |
NSLog(@"%-80s %g seconds\n", [identifier UTF8String], seconds); | |
} | |
__attribute__((noinline)) void PresentTimeToPerformBlockWithIdentifier(NSString *identifier, long repCount,void (^blockToTime)(void)) { | |
logMachTimeInterval_withIdentifier_(TimeToPerformBlock(blockToTime, repCount), identifier); | |
} | |
__attribute__((noinline)) Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) { | |
#if __LP64__ | |
/* A bit of unrolling; go by 32s, 16s, and 8s first */ | |
while (len >= 32) { | |
uint64_t val = *(const uint64_t *)bytes; | |
uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
hiBits |= (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
hiBits |= (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
if (hiBits | (val & 0x8080808080808080ULL)) return false; | |
bytes += 8; | |
len -= 32; | |
} | |
while (len >= 16) { | |
uint64_t val = *(const uint64_t *)bytes; | |
uint64_t hiBits = (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
if (hiBits | (val & 0x8080808080808080ULL)) return false; | |
bytes += 8; | |
len -= 16; | |
} | |
while (len >= 8) { | |
uint64_t val = *(const uint64_t *)bytes; | |
if (val & 0x8080808080808080ULL) return false; | |
bytes += 8; | |
len -= 8; | |
} | |
#endif | |
/* Go by 4s */ | |
while (len >= 4) { | |
uint32_t val = *(const uint32_t *)bytes; | |
if (val & 0x80808080U) return false; | |
bytes += 4; | |
len -= 4; | |
} | |
/* Handle the rest one byte at a time */ | |
while (len--) { | |
if (*bytes++ & 0x80) return false; | |
} | |
return true; | |
} | |
static inline CFIndex firstSetIndex(vector_char32 x) { | |
#if defined __AVX2__ | |
return __builtin_ctz(_mm256_movemask_epi8(x)); | |
#else | |
return __builtin_ctz(_mm_movemask_epi8(x.hi) << 16 | _mm_movemask_epi8(x.lo)); | |
#endif | |
} | |
__attribute__((noinline)) Boolean __CFBytesInASCII_vec(const uint8_t *bytes, CFIndex len) { | |
// Early-out if length is zero. | |
if (len == 0) return true; | |
// Back up to a 16-byte aligned memory location. | |
const vector_char32 *aligned = (const vector_char32 *)((uintptr_t)bytes & -32); | |
const size_t adjust = bytes - (const uint8_t *)aligned; | |
// Generate a mask to zero-out the first distance bytes of a vector. | |
static const uint8_t maskSource[64] = { | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
}; | |
vector_char32 mask = *(const packed_char32 *)&maskSource[32 - adjust]; | |
// Load the first vector and zero out any bytes preceeding the buffer we | |
// are actually interested in. Adjust the length to account for these | |
// extra bytes as well. | |
vector_char32 data = *aligned & mask; | |
len += adjust; | |
// Now scan by aligned vectors until we either find a non-ASCII byte or | |
// we reach the end of the buffer. This may read past the end of the | |
// buffer, but it will never cross a page boundary beyond the end of the | |
// buffer, so it is safe. | |
while (!vector_any(data)) { | |
if (len <= 32) return true; | |
len -= 32; | |
data = *++aligned; | |
} | |
// We found a non-ASCII byte. If its index in the vector is less than the | |
// remaining length, return false. Otherwise, return true. | |
return firstSetIndex(data) >= len; | |
} | |
long total = 0; | |
long repeatCount = 10000; | |
int main(int argc, const char * argv[]) { | |
@autoreleasepool { | |
const uint8_t *shortStringBytes = (const uint8_t *)[@"hello!" UTF8String]; | |
CFIndex shortLen = strlen((char *)shortStringBytes); | |
const uint8_t *longStringBytes = (const uint8_t *)[[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL] UTF8String]; | |
CFIndex longLen = strlen((char *)longStringBytes); | |
const uint8_t *longEarlyOutStringBytes = (const uint8_t *)[[@"ü" stringByAppendingString:[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL]] UTF8String]; | |
CFIndex longEarlyLen = strlen((char *)longEarlyOutStringBytes); | |
NSLog(@"shortLen: %ld longLen:%ld", shortLen, longLen); | |
PresentTimeToPerformBlockWithIdentifier(@"short no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)shortStringBytes, shortLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"short vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)shortStringBytes, shortLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)longStringBytes, longLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)longStringBytes, longLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long early-out no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)longEarlyOutStringBytes, longEarlyLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long early-out vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)longEarlyOutStringBytes, longEarlyLen); | |
}); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment