This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from torch.utils.data import Dataset | |
| import pandas as pd | |
| from .review_vectorizer import ReviewVectorizer | |
| class ReviewDataset(Dataset): | |
| def __init__(self, review_df, vectorizer): | |
| """ | |
| Args: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import collections | |
| import re | |
| import pandas as pd | |
| import numpy as np | |
| import argparse | |
| # set up arguments | |
| parser = argparse.ArgumentParser(description='Split DataSet Arguments.') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| import torch.nn as nn | |
| class Perceptron(nn.Module): | |
| """ A perceptron is one linear Layer""" | |
| def __init__(self, input_dim: int): | |
| """ | |
| :param input_dim (int): size of inputs features | |
| """ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class Aljazeera(): | |
| url = 'https://www.aljazeera.net/aljazeerarss/a7c186be-1baa-4bd4-9d80-a84db769f779/73d0e1b4-532f-45ef-b135-bfdff8b8cab9' | |
| url_base = 'https://www.aljazeera.net' | |
| name = 'aljazeera.net' | |
| ua = { | |
| 'use-agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"} | |
| feed = None | |
| articles = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import xmltodict | |
| from urllib.request import urlopen | |
| class RssFeed(threading.Thread): | |
| def __init__(self, url): | |
| threading.Thread.__init__(self) | |
| self.url = url | |
| name = "undefined" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import regex as re | |
| from langdetect import detect | |
| import logging | |
| import os | |
| dir_base = os.path.dirname(os.path.abspath(__file__)) | |
| logging.basicConfig(filename=dir_base + '/../logs/ArabicTextCleaner.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s') |