Skip to content

Instantly share code, notes, and snippets.

@0smr
Last active June 17, 2022 13:17
Show Gist options
  • Save 0smr/9f5e273dc678da8dad9ea3dcf43dc1bb to your computer and use it in GitHub Desktop.
Save 0smr/9f5e273dc678da8dad9ea3dcf43dc1bb to your computer and use it in GitHub Desktop.
read utf-8 file and replace all it's notations with spaces and write content to new file. (file include Persian text)
#include <algorithm>
#include <iostream>
#include <fstream>
#include <sstream>
#include <codecvt>
#include <string>
#include <locale>
int main()
{
std::string strfileContent {};
std::ifstream textFile {"path_to_source.txt"};
std::stringstream strStream {};
std::wstring wideStrfileContent {};
std::wofstream frequencyFile {"path_to_dest.txt"};
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
frequencyFile.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>));
//read whole file content to strfileContent.
strStream << textFile.rdbuf();
strfileContent = strStream.str();
wideStrfileContent = converter.from_bytes(strfileContent);
//replace all notations with spaces.
std::wstring notations{L",-_+#@$%^&|*?=<>@~()[]{};:'!/\\\".0123456789۰۱۲۳۴۵۶۷۸۹،٪؟×؛«»"};
for(auto & x: notations)
{
std::replace(wideStrfileContent.begin(),wideStrfileContent.end(),x,L' ');
}
frequencyFile << wideStrfileContent;
frequencyFile.close();
textFile.close();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment