Last active
August 29, 2015 13:59
-
-
Save CaptainJH/10847192 to your computer and use it in GitHub Desktop.
Read all the files in certain folder and parse the files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bool getline(FILE* f, std::string& line) | |
{ | |
line.clear(); | |
char c = 0; | |
do { | |
c = fgetc(f); | |
if (c == EOF) | |
return !line.empty(); | |
else if (c != '\n' && c != '\r') | |
line.push_back(c); | |
} while ((c != '\n' && c != '\r') || line.empty()); | |
return true; | |
} | |
/// how to use | |
//auto ifile = fopen(dir.c_str(), "rb"); | |
//while (getline(ifile, line)) | |
//{ | |
//} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <algorithm> | |
#include <string> | |
#include <set> | |
#include <regex> | |
#include <boost\filesystem.hpp> | |
#include "pystring.h" | |
void CollectFiles(const std::string& path, std::vector<std::string>& v) | |
{ | |
boost::filesystem::path dir(path); | |
boost::filesystem::directory_iterator it_end; | |
for (boost::filesystem::directory_iterator it_dir(dir); it_dir != it_end; ++it_dir) | |
{ | |
if (boost::filesystem::is_directory(it_dir->path())) | |
{ | |
CollectFiles(it_dir->path().string(), v); | |
} | |
else if (it_dir->path().extension() == L".csv") | |
{ | |
if (it_dir->path().filename() != L"miss.csv") | |
v.push_back(it_dir->path().string()); | |
} | |
} | |
} | |
int main() | |
{ | |
std::cout << "Device Report Check " << std::endl; | |
const std::string DeviceReportRoot = "F:\\UserReport"; | |
std::vector<std::string> v; | |
CollectFiles(DeviceReportRoot, v); | |
std::set<unsigned long long> QQ_Set; | |
std::for_each(v.begin(), v.end(), [&QQ_Set](const std::string& p){ | |
// how to read file content in one line! | |
std::string text((std::istreambuf_iterator<char>(std::ifstream(p))), | |
std::istreambuf_iterator<char>()); | |
std::vector<std::string> lines; | |
pystring::splitlines(text, lines); | |
std::for_each(lines.begin(), lines.end(), [&QQ_Set](const std::string& s){ | |
std::vector<std::string> blocks; | |
pystring::split(s, blocks, ","); | |
if (blocks.size() >= 2) | |
{ | |
const std::string& qq = blocks[1]; | |
std::smatch m; | |
auto found = std::regex_search(qq, m, std::regex("[0-9]+")); | |
if (found) | |
{ | |
auto qq_id = std::stoull(m.str()); | |
if (QQ_Set.find(qq_id) == QQ_Set.end()) | |
QQ_Set.insert(qq_id); | |
} | |
} | |
}); | |
}); | |
std::cout << "Found " << QQ_Set.size() << " unique ids" << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
使用之前的方法读文件,如果文件中出现 '\0',则会立即中止。对于一些自己无法控制内容的文件,会有潜在的风险,这时只能使用最原始的 C 的读文件的方法