Created
March 11, 2022 13:21
-
-
Save berendgort/c967aece2c1d47c639d05c857e74b0bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class BinanceProcessor(): | |
def __init__(self, api_key_binance, api_secret_binance): | |
self.binance_api_key = api_key_binance # Enter your own API-key here | |
self.binance_api_secret = api_secret_binance # Enter your own API-secret here | |
self.binance_client = Client(api_key=api_key_binance, api_secret=api_secret_binance) | |
def run(self, ticker_list, start_date, end_date, time_interval, technical_indicator_list, if_vix): | |
data = self.download_data(ticker_list, start_date, end_date, time_interval) | |
data = self.clean_data(data) | |
data = self.add_technical_indicator(data, technical_indicator_list) | |
data.index = data['time'] | |
if if_vix: | |
data = self.add_vix(data) | |
price_array, tech_array, turbulence_array, time_array = self.df_to_array(data, if_vix) | |
tech_nan_positions = np.isnan(tech_array) | |
tech_array[tech_nan_positions] = 0 | |
return data | |
# main functions | |
def download_data(self, ticker_list, start_date, end_date, | |
time_interval): | |
self.start_time = start_date | |
self.end_time = end_date | |
self.interval = time_interval | |
self.ticker_list = ticker_list | |
final_df = pd.DataFrame() | |
for i in ticker_list: | |
hist_data = self.get_binance_bars(self.start_time, self.end_time, self.interval, symbol=i) | |
df = hist_data.iloc[:-1] | |
df = df.dropna() | |
df['tic'] = i | |
final_df = final_df.append(df) | |
return final_df | |
def clean_data(self, df): | |
df = df.dropna() | |
return df | |
def add_technical_indicator(self, df, tech_indicator_list): | |
# print('Adding self-defined technical indicators is NOT supported yet.') | |
# print('Use default: MACD, RSI, CCI, DX.') | |
self.tech_indicator_list = ['open', 'high', 'low', 'close', 'volume', | |
'macd', 'macd_signal', 'macd_hist', | |
'rsi', 'cci', 'dx'] | |
final_df = pd.DataFrame() | |
for i in df.tic.unique(): | |
# use massive function in previous cell | |
coin_df = df[df.tic == i].copy() | |
coin_df = get_features_for_each_coin(coin_df) | |
# Append constructed tic_df | |
final_df = final_df.append(coin_df) | |
return final_df | |
def add_turbulence(self, df): | |
print('Turbulence not supported yet. Return original DataFrame.') | |
return df | |
def add_vix(self, df): | |
print('VIX is not applicable for cryptocurrencies. Return original DataFrame') | |
return df | |
def df_to_array(self, df, if_vix): | |
unique_ticker = df.tic.unique() | |
if_first_time = True | |
for tic in unique_ticker: | |
if if_first_time: | |
price_array = df[df.tic == tic][['close']].values | |
tech_array = df[df.tic == tic][self.tech_indicator_list].values | |
if_first_time = False | |
else: | |
price_array = np.hstack([price_array, df[df.tic == tic][['close']].values]) | |
tech_array = np.hstack([tech_array, df[df.tic == tic][self.tech_indicator_list].values]) | |
time_array = df[df.tic == self.ticker_list[0]]['time'].values | |
assert price_array.shape[0] == tech_array.shape[0] | |
return price_array, tech_array, np.array([]), time_array | |
# helper functions | |
def stringify_dates(self, date: datetime): | |
return str(int(date.timestamp() * 1000)) | |
def get_binance_bars(self, start_date, end_date, kline_size, symbol): | |
data_df = pd.DataFrame() | |
klines = self.binance_client.get_historical_klines(symbol, kline_size, start_date, end_date) | |
data = pd.DataFrame(klines, | |
columns=['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av', | |
'trades', 'tb_base_av', 'tb_quote_av', 'ignore']) | |
data = data.drop(labels=['close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'], axis=1) | |
if len(data_df) > 0: | |
temp_df = pd.DataFrame(data) | |
data_df = data_df.append(temp_df) | |
else: | |
data_df = data | |
data_df = data_df.apply(pd.to_numeric, errors='coerce') | |
data_df['time'] = [datetime.fromtimestamp(x / 1000.0) for x in data_df.timestamp] | |
data.drop(labels=["timestamp"], axis=1) | |
data_df.index = [x for x in range(len(data_df))] | |
return data_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment