Skip to content

Instantly share code, notes, and snippets.

@jdiego
Forked from tcbrindle/Tokenizer_Comparison.cpp
Created February 5, 2016 00:59
Show Gist options
  • Save jdiego/08411cd726bdb385791e to your computer and use it in GitHub Desktop.
Save jdiego/08411cd726bdb385791e to your computer and use it in GitHub Desktop.
#include <chrono>
#include <cstring>
#include <string>
#include <fstream>
#include <istream>
#include <iostream>
#include <sstream>
#include <boost/tokenizer.hpp>
using namespace std;
struct timer {
using clock = std::chrono::high_resolution_clock;
template <class DurationType = std::chrono::nanoseconds>
auto elapsed() const {
return std::chrono::duration_cast<DurationType>(clock::now() - start_);
}
void reset() { start_ = clock::now(); }
private:
std::chrono::time_point<clock> start_ = clock::now();
};
// 4 statements
void DoBoost( std::ofstream& cout, std::string& text )
{
boost::char_separator<char> sep(" \n\t\r\f");
boost::tokenizer<boost::char_separator<char>> tokens(text, sep);
for (const auto& t : tokens) {
cout << t ;
}
}
// 6 statements
void DoIterator(std::ofstream& cout, std::string& str )
{
// construct a stream from the string
std::stringstream strstr(str);
// use stream iterators to copy the stream to the vector as whitespace separated strings
std::istream_iterator<std::string> it(strstr);
std::istream_iterator<std::string> end;
std::vector<std::string> results(it, end);
// send the vector to stdout.
std::ostream_iterator<std::string> oit(cout);
std::copy(results.begin(), results.end(), oit);
}
// 4 statements
void DoIteratorCorrectly(std::ofstream& cout, std::string& str )
{
// construct a stream from the string
std::stringstream strstr(str);
// use stream iterators to read individual strings
std::istream_iterator<std::string> it(strstr);
std::istream_iterator<std::string> end;
std::for_each( it, end, [&cout]( const std::string& str ) { cout << str; } );
}
// 7 statements
void DoStrtok(std::ofstream& cout, std::string& str)
{
char* pMutableString = (char*) malloc( str.size()+1 );
strcpy( pMutableString, str.c_str() );
char *p = strtok(pMutableString, " \n\t\r\f");
while (p) {
cout << p;
p = strtok(NULL, " \n\t\r\f");
}
free(pMutableString);
}
// 7 statements
static bool IsDelim( char tst )
{
const char* DELIMS = " \n\t\r\f";
do // Delimiter string cannot be empty, so don't check for it
{
if( tst == *DELIMS )
return true;
++DELIMS;
} while( *DELIMS );
return false;
}
static bool IsDelim_STDFind( char tst )
{
// For those of you who insist that nobody can ever outperform
// the standard algorithms... std::find clocks in about 25% slower
const char* DELIMS = " \n\t\r\f";
const char* END = DELIMS+5;
return std::find( DELIMS, END, tst ) != END;
}
// 14 statements
void DoJoshsWay( std::ofstream& cout, std::string& str)
{
char* pMutableString = (char*) malloc( str.size()+1 );
strcpy( pMutableString, str.c_str() );
char* p = pMutableString;
// skip leading delimiters
while( *p && IsDelim(*p) )
++p;
while( *p )
{
// note start of token
char* pTok = p;
do// skip non-delimiters
{
++p;
} while( !IsDelim(*p) && *p );
// clobber trailing delimiter with null
*p = 0;
cout << pTok; // send the token
do // skip null, and any subsequent trailing delimiters
{
++p;
} while( *p && IsDelim(*p) );
}
free(pMutableString);
}
// 5 statements
template <class InputIt, class ForwardIt, class BinOp>
void for_each_token(InputIt first, InputIt last,
ForwardIt d_first, ForwardIt d_last,
BinOp binary_op)
{
while (first != last) {
const auto pos = find_first_of(first, last, d_first, d_last);
binary_op(first, pos);
if (pos == last) break;
first = next(pos);
}
}
// 2 statements
void DoTristansWay(std::ofstream& cout, std::string str)
{
constexpr char delims[] = " \n\t\r\f";
for_each_token(cbegin(str), cend(str),
cbegin(delims), cend(delims),
[&cout] (auto first, auto second) {
if (first != second) cout << string(first, second);
});
}
int main()
{
std::ifstream t("crytek_sponza.obj");
std::stringstream str;
str << t.rdbuf();
std::string text = str.str();
FILE* fp = fopen("times.csv", "w");
fprintf(fp,"boost,iterators,iterators_right,strtok,josh,tristan\n");
for( size_t i=0; i<5; i++ )
{
std::chrono::nanoseconds tm;
{
timer t;
std::ofstream out_boost("out_boost.txt");
DoBoost( out_boost, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_iter("out_iter.txt");
DoIterator( out_iter, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_split("out_iter_right.txt");
DoIteratorCorrectly( out_split, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_strtok("out_strtok.txt");
DoStrtok( out_strtok, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_josh("out_josh.txt");
DoJoshsWay( out_josh, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_tristan("out_tristan.txt");
DoTristansWay( out_tristan, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
fprintf(fp,"\n");
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment