ozydingo · February 15, 2019 19:28
diff --git a/sagemaker_demo.py b/sagemaker_demo.py
 #!/usr/bin/env python
 # coding: utf-8

 # In[1]:


 # import libraries
 import boto3, re, sys, math, json, os, sagemaker, urllib.request
 from sagemaker import get_execution_role
 import numpy as np                                
 import pandas as pd                               
 import matplotlib.pyplot as plt                   
 from IPython.display import Image                 
 from IPython.display import display               
 from time import gmtime, strftime                 
 from sagemaker.predictor import csv_serializer   

 # Define IAM role
 role = get_execution_role()
 prefix = 'sagemaker/DEMO-xgboost-dm'
 containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',
              'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',
              'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',
              'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest'} # each region has its XGBoost container
 my_region = boto3.session.Session().region_name # set the region of the instance
 print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + containers[my_region] + " container for your SageMaker endpoint.")


 # In[2]:

 # Create s3 resources
 bucket_name = 'andrew-brain-power-sagemaker-demo' # <--- change this variable to a unique name for your bucket
 s3 = boto3.resource('s3')
 try:
    if  my_region == 'us-east-1':
      s3.create_bucket(Bucket=bucket_name)
    else: 
      s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })
    print('S3 bucket created successfully')
 except Exception as e:
    print('S3 error: ',e)


 # In[3]:

 # Download data
 try:
  urllib.request.urlretrieve ("https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv", "bank_clean.csv")
  print('Success: downloaded bank_clean.csv.')
 except Exception as e:
  print('Data load error: ',e)

 try:
  model_data = pd.read_csv('./bank_clean.csv',index_col=0)
  print('Success: Data loaded into dataframe.')
 except Exception as e:
    print('Data load error: ',e)


 # In[12]:

 # Split data into test, train sets
 train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])
 print(train_data.shape, test_data.shape)
 print(train_data.describe())


 # In[13]:

 # Format and upload data to s3
 pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)
 boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
 s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')


 # In[14]:

 # Create sagemaker session
 sess = sagemaker.Session()
 xgb = sagemaker.estimator.Estimator(containers[my_region],role, train_instance_count=1, train_instance_type='ml.m4.xlarge',output_path='s3://{}/{}/output'.format(bucket_name, prefix),sagemaker_session=sess)
 xgb.set_hyperparameters(max_depth=5,eta=0.2,gamma=4,min_child_weight=6,subsample=0.8,silent=0,objective='binary:logistic',num_round=100)


 # In[15]:

 # fit the model
 xgb.fit({'train': s3_input_train})


 # In[16]:

 # deploy a prediction server
 xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')


 # In[17]:

 # Prepare test data, predict
 test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).as_matrix() #load the data into an array
 xgb_predictor.content_type = 'text/csv' # set the data type for an inference
 xgb_predictor.serializer = csv_serializer # set the serializer type
 predictions = xgb_predictor.predict(test_data_array).decode('utf-8') # predict!
 predictions_array = np.fromstring(predictions[1:], sep=',') # and turn the prediction into an array
 print(predictions_array.shape)


 # In[32]:

 # Validate results
 cm = pd.crosstab(index=test_data['y_yes'], columns=np.round(predictions_array), rownames=['Observed'], colnames=['Predicted'])
 tn = cm.iloc[0,0]; fn = cm.iloc[1,0]; tp = cm.iloc[1,1]; fp = cm.iloc[0,1]; p = (tp+tn)/(tp+tn+fp+fn)*100
 print("\n{0:<20}{1:<4.1f}%\n".format("Overall Classification Rate: ", p))
 print("{0:<15}{1:<15}{2:>8}".format("Predicted", "No Purchase", "Purchase"))
 print("Observed")
 print("{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})".format("No Purchase", tn/(tn+fn)*100,tn, fp/(tp+fp)*100, fp))
 print("{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \n".format("Purchase", fn/(tn+fn)*100,fn, tp/(tp+fp)*100, tp))


 # In[ ]:


 ### Clean up resources:

 sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)
 bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)
 bucket_to_delete.objects.all().delete()
	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	# import libraries
	import boto3, re, sys, math, json, os, sagemaker, urllib.request
	from sagemaker import get_execution_role
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from IPython.display import Image
	from IPython.display import display
	from time import gmtime, strftime
	from sagemaker.predictor import csv_serializer

	# Define IAM role
	role = get_execution_role()
	prefix = 'sagemaker/DEMO-xgboost-dm'
	containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',
	'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',
	'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',
	'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest'} # each region has its XGBoost container
	my_region = boto3.session.Session().region_name # set the region of the instance
	print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + containers[my_region] + " container for your SageMaker endpoint.")


	# In[2]:

	# Create s3 resources
	bucket_name = 'andrew-brain-power-sagemaker-demo' # <--- change this variable to a unique name for your bucket
	s3 = boto3.resource('s3')
	try:
	if my_region == 'us-east-1':
	s3.create_bucket(Bucket=bucket_name)
	else:
	s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })
	print('S3 bucket created successfully')
	except Exception as e:
	print('S3 error: ',e)


	# In[3]:

	# Download data
	try:
	urllib.request.urlretrieve ("https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv", "bank_clean.csv")
	print('Success: downloaded bank_clean.csv.')
	except Exception as e:
	print('Data load error: ',e)

	try:
	model_data = pd.read_csv('./bank_clean.csv',index_col=0)
	print('Success: Data loaded into dataframe.')
	except Exception as e:
	print('Data load error: ',e)


	# In[12]:

	# Split data into test, train sets
	train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])
	print(train_data.shape, test_data.shape)
	print(train_data.describe())


	# In[13]:

	# Format and upload data to s3
	pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)
	boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
	s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')


	# In[14]:

	# Create sagemaker session
	sess = sagemaker.Session()
	xgb = sagemaker.estimator.Estimator(containers[my_region],role, train_instance_count=1, train_instance_type='ml.m4.xlarge',output_path='s3://{}/{}/output'.format(bucket_name, prefix),sagemaker_session=sess)
	xgb.set_hyperparameters(max_depth=5,eta=0.2,gamma=4,min_child_weight=6,subsample=0.8,silent=0,objective='binary:logistic',num_round=100)


	# In[15]:

	# fit the model
	xgb.fit({'train': s3_input_train})


	# In[16]:

	# deploy a prediction server
	xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')


	# In[17]:

	# Prepare test data, predict
	test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).as_matrix() #load the data into an array
	xgb_predictor.content_type = 'text/csv' # set the data type for an inference
	xgb_predictor.serializer = csv_serializer # set the serializer type
	predictions = xgb_predictor.predict(test_data_array).decode('utf-8') # predict!
	predictions_array = np.fromstring(predictions[1:], sep=',') # and turn the prediction into an array
	print(predictions_array.shape)


	# In[32]:

	# Validate results
	cm = pd.crosstab(index=test_data['y_yes'], columns=np.round(predictions_array), rownames=['Observed'], colnames=['Predicted'])
	tn = cm.iloc[0,0]; fn = cm.iloc[1,0]; tp = cm.iloc[1,1]; fp = cm.iloc[0,1]; p = (tp+tn)/(tp+tn+fp+fn)*100
	print("\n{0:<20}{1:<4.1f}%\n".format("Overall Classification Rate: ", p))
	print("{0:<15}{1:<15}{2:>8}".format("Predicted", "No Purchase", "Purchase"))
	print("Observed")
	print("{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})".format("No Purchase", tn/(tn+fn)100,tn, fp/(tp+fp)100, fp))
	print("{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \n".format("Purchase", fn/(tn+fn)100,fn, tp/(tp+fp)100, tp))


	# In[ ]:


	### Clean up resources:

	sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)
	bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)
	bucket_to_delete.objects.all().delete()