Last active
June 17, 2022 13:17
-
-
Save 0smr/9f5e273dc678da8dad9ea3dcf43dc1bb to your computer and use it in GitHub Desktop.
read utf-8 file and replace all it's notations with spaces and write content to new file. (file include Persian text)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <iostream> | |
#include <fstream> | |
#include <sstream> | |
#include <codecvt> | |
#include <string> | |
#include <locale> | |
int main() | |
{ | |
std::string strfileContent {}; | |
std::ifstream textFile {"path_to_source.txt"}; | |
std::stringstream strStream {}; | |
std::wstring wideStrfileContent {}; | |
std::wofstream frequencyFile {"path_to_dest.txt"}; | |
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter; | |
frequencyFile.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>)); | |
//read whole file content to strfileContent. | |
strStream << textFile.rdbuf(); | |
strfileContent = strStream.str(); | |
wideStrfileContent = converter.from_bytes(strfileContent); | |
//replace all notations with spaces. | |
std::wstring notations{L",-_+#@$%^&|*?=<>@~()[]{};:'!/\\\".0123456789۰۱۲۳۴۵۶۷۸۹،٪؟×؛«»"}; | |
for(auto & x: notations) | |
{ | |
std::replace(wideStrfileContent.begin(),wideStrfileContent.end(),x,L' '); | |
} | |
frequencyFile << wideStrfileContent; | |
frequencyFile.close(); | |
textFile.close(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment