-
-
Save mrm8488/a987bb54dab15435c2fd41b505173a3a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CustomIterableDatasetv1(IterableDataset): | |
def __init__(self, filename): | |
#Store the filename in object's memory | |
self.filename = filename | |
#And that's it, we no longer need to store the contents in the memory | |
def preprocess(self, text): | |
### Do something with text here | |
text_pp = text.lower().strip() | |
### | |
return text_pp | |
def line_mapper(self, line): | |
#Splits the line into text and label and applies preprocessing to the text | |
text, label = line.split(',') | |
text = self.preprocess(text) | |
return text, label | |
def __iter__(self): | |
#Create an iterator | |
file_itr = open(self.filename) | |
#Map each element using the line_mapper | |
mapped_itr = map(self.line_mapper, file_itr) | |
return mapped_itr |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment