Skip to content

Instantly share code, notes, and snippets.

@Enet4
Created November 7, 2019 20:13
Show Gist options
  • Save Enet4/fff96752ce8c74b51967e1de76ac434e to your computer and use it in GitHub Desktop.
Save Enet4/fff96752ce8c74b51967e1de76ac434e to your computer and use it in GitHub Desktop.
// Best real time: 0.80 s
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <clocale>
#include "utf8.h"
using namespace std;
bool is_blank(const std::string& line) {
auto it = line.begin();
auto end = line.end();
while (it != end) {
auto c = utf8::next(it, end);
if (!std::iswspace(c)) return false;
}
return true;
}
int main() {
std::setlocale(LC_ALL, "en_US.UTF8");
const char* test_file_path = "biggest.txt";
// Open the test file (contains UTF-8 encoded text)
ifstream fs8(test_file_path);
if (!fs8.is_open()) {
cout << "Could not open " << test_file_path << endl;
return 0;
}
unsigned int line_count = 1;
unsigned int k = 0;
string line;
// Play with all the lines in the file
while (std::getline(fs8, line)) {
if (!utf8::is_valid(line.begin(), line.end())) {
cout << "Invalid UTF-8 encoding detected at line " << line_count << endl;
cout << "\t" << line;
return -1;
}
if (is_blank(line)) {
k++;
}
line_count++;
}
cout << k << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment