Skip to content

Instantly share code, notes, and snippets.

View arhouati's full-sized avatar
🎯
Focusing

Abdelkader Rhouati arhouati

🎯
Focusing
View GitHub Profile
from torch.utils.data import Dataset
import pandas as pd
from .review_vectorizer import ReviewVectorizer
class ReviewDataset(Dataset):
def __init__(self, review_df, vectorizer):
"""
Args:
@arhouati
arhouati / dataset_split.py
Last active April 11, 2021 14:38
this script split yelp's dataset to three subsets : Train, Valid and Test
import collections
import re
import pandas as pd
import numpy as np
import argparse
# set up arguments
parser = argparse.ArgumentParser(description='Split DataSet Arguments.')
import torch
import torch.nn as nn
class Perceptron(nn.Module):
""" A perceptron is one linear Layer"""
def __init__(self, input_dim: int):
"""
:param input_dim (int): size of inputs features
"""
class Aljazeera():
url = 'https://www.aljazeera.net/aljazeerarss/a7c186be-1baa-4bd4-9d80-a84db769f779/73d0e1b4-532f-45ef-b135-bfdff8b8cab9'
url_base = 'https://www.aljazeera.net'
name = 'aljazeera.net'
ua = {
'use-agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"}
feed = None
articles = []
import xmltodict
from urllib.request import urlopen
class RssFeed(threading.Thread):
def __init__(self, url):
threading.Thread.__init__(self)
self.url = url
name = "undefined"
import regex as re
from langdetect import detect
import logging
import os
dir_base = os.path.dirname(os.path.abspath(__file__))
logging.basicConfig(filename=dir_base + '/../logs/ArabicTextCleaner.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s')