Last active
February 3, 2017 02:32
-
-
Save mellertson/0f9f1c4989c4a92832391515b6df53ac to your computer and use it in GitHub Desktop.
Using HTM for time-series predictions using multiple inputs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# TODO insert other necessary imports here | |
input_file_name = "/tmp/input.csv" | |
input_file_index = 3 | |
input_file_num_rows = len(stored_input_file) | |
output_column = "col2" | |
cpu_count = 4 # change this to the number of cpu cores on your system | |
SWARM_CONFIG = { | |
"includedFields": [ | |
], | |
"streamDef": { | |
"info": output_column, | |
"version": 1, | |
"streams": [ | |
{ | |
"info": output_column, | |
"source": "file://" + input_file_name, | |
"columns": ["*"], | |
"last_record": input_file_num_rows | |
} | |
] | |
}, | |
"inferenceType": "TemporalAnomaly", | |
"inferenceArgs": { | |
"predictionSteps": [ | |
1, 2, 3, 4, 5 | |
], | |
"predictedField": output_column | |
}, | |
"iterationCount": -1, | |
"swarmSize": str(swarm_size) | |
} | |
# add some additional parameters to the swarm's configuration | |
# I don't remembere now why I did it this way, it saved me time on something if I recall :-) | |
SWARM_CONFIG["streamDef"]["info"] = output_column | |
SWARM_CONFIG["streamDef"]["streams"][0]["info"] = output_column | |
SWARM_CONFIG["streamDef"]["streams"][0]["source"] = "file://" + input_file_name | |
SWARM_CONFIG["streamDef"]["streams"][0]["last_record"] = input_file_num_rows | |
SWARM_CONFIG["inferenceArgs"]["predictedField"] = output_column | |
# Add other columns as includeFields in SWARM_CONFIG | |
df=pd.read_csv(input_file_name, sep=',', error_bad_lines=0.0) | |
for i in range(0, len(df.columns)): | |
SWARM_CONFIG["includedFields"].append({"fieldName": df.columns[i], "fieldType": df.iloc[0, i]}) | |
# rebuild the model by swarming over the data | |
permutations_runner.runWithConfig(SWARM_CONFIG, {'maxWorkers': cpu_count, 'overwrite': True}) | |
# load model parameters from disk, and create a new model and enable inferencial predictions | |
model_params = get_model_parameters() | |
model = ModelFactory.create(model_params) | |
model.enableInference({"predictedField": output_column}) | |
shifter = InferenceShifter() | |
results = None | |
# Make the predictions and do something with the results | |
for time_stamp, row_data in df.iterrows(): | |
results = model.run({df[0][0]: time_stamp, output_column: row_data[output_column]}) | |
# store the results from the prediction so we can compute stats | |
last_result = shifter.shift(results) | |
anomaly_score = last_result.inferences["anomalyScore"] | |
predicted_value = last_result.inferences['multiStepBestPredictions'][1] | |
last_inferences = last_result.inferences['multiStepPredictions'][1] | |
# ... insert code here to calculate and store statistics using the predicted values |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment