Created
August 29, 2014 02:54
-
-
Save hiromu/d5ed079e98caea3b008c to your computer and use it in GitHub Desktop.
機械学習コンテストサンプルソース
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <float.h> | |
#include <limits.h> | |
#include <math.h> | |
#include <stdio.h> | |
#define TRAIN_SET 500 | |
#define TEST_SET 500 | |
#define FEATURE 6 | |
#define K 3 | |
#define SA 10000 | |
#define FOLD 2 | |
#define RANGE 10 | |
int train[TRAIN_SET][FEATURE], label[TRAIN_SET], test[TEST_SET][FEATURE], result[TEST_SET]; | |
void knn(int train_set[][FEATURE], int train_label[], int train_size, int test_set[][FEATURE], int test_label[], int test_size, int weight[]) | |
{ | |
int i, j, k, sum, count, neighbors[K]; | |
double distance, dists[K]; | |
/* | |
for (i = 0; i < FEATURE; i++) | |
printf("%d ", weight[i]); | |
printf("¥n"); | |
*/ | |
for (i = 0; i < test_size; i++) { | |
for (j = 0; j < K; j++) | |
dists[j] = DBL_MAX; | |
for (j = 0; j < train_size; j++) { | |
distance = 0; | |
for (k = 0; k < FEATURE; k++) | |
distance += pow(train_set[j][k] - test_set[i][k], 2) * weight[k]; | |
distance = sqrt(distance); | |
for (k = 0; k < K; k++) { | |
if (distance < dists[k]) { | |
dists[k] = distance; | |
neighbors[k] = train_label[j]; | |
} | |
} | |
} | |
sum = 0, count = 0; | |
for (j = 0; j < K; j++) { | |
if (dists[j] != DBL_MAX) { | |
sum += neighbors[j]; | |
count += 1; | |
} | |
} | |
test_label[i] = (int)round((double)sum / count); | |
} | |
} | |
int main(void) | |
{ | |
int i, j, k, idx, range, old, diff, sum, weight[FEATURE] = {1}; | |
freopen("train.csv", "r", stdin); | |
for (i = 0; i < TRAIN_SET; i++) { | |
for (j = 0; j < FEATURE; j++) | |
scanf("%d,", &train[i][j]); | |
scanf("%d", &label[i]); | |
} | |
knn(train, label, TRAIN_SET / FOLD, &train[TRAIN_SET / FOLD], result, TRAIN_SET - TRAIN_SET / FOLD, weight); | |
diff = 0; | |
for (i = 0; i < TRAIN_SET - TRAIN_SET / FOLD; i++) | |
diff += abs(label[TRAIN_SET / FOLD + i] - result[i]); | |
srand(time(NULL)); | |
for (i = 0; i < SA; i++) { | |
idx = rand() % FEATURE; | |
range = rand() % RANGE; | |
old = weight[idx]; | |
if (weight[idx] < range || rand() % 2 == 0) | |
weight[idx] += range; | |
else | |
weight[idx] -= range; | |
knn(train, label, TRAIN_SET / FOLD, &train[TRAIN_SET / FOLD], result, TRAIN_SET - TRAIN_SET / FOLD, weight); | |
sum = 0; | |
for (j = 0; j < TRAIN_SET - TRAIN_SET / FOLD; j++) | |
sum += abs(label[TRAIN_SET / FOLD + j] - result[j]); | |
if (sum > diff) | |
weight[idx] = old; | |
} | |
freopen("test.csv", "r", stdin); | |
for (i = 0; i < TEST_SET; i++) { | |
for (j = 0; j < FEATURE - 1; j++) | |
scanf("%d,", &test[i][j]); | |
scanf("%d", &test[i][j]); | |
} | |
knn(train, label, TRAIN_SET, test, result, TEST_SET, weight); | |
for (i = 0; i < TEST_SET; i++) | |
printf("%d¥n", result[i]); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <float.h> | |
#include <limits.h> | |
#include <math.h> | |
#include <stdio.h> | |
#define TRAIN_SET 1000 | |
#define TEST_SET 1000 | |
#define FEATURE 19 | |
#define K 3 | |
int train[TRAIN_SET][FEATURE], label[TRAIN_SET]; | |
int main(void) | |
{ | |
int i, j, k, sum, distance; | |
int test[FEATURE], dists[K], neighbors[K]; | |
freopen("train.csv", "r", stdin); | |
for (i = 0; i < TRAIN_SET; i++) { | |
for (j = 0; j < FEATURE; j++) | |
scanf("%d,", &train[i][j]); | |
scanf("%d", &label[i]); | |
} | |
freopen("test.csv", "r", stdin); | |
for (i = 0; i < TEST_SET; i++) { | |
for (j = 0; j < FEATURE - 1; j++) | |
scanf("%d,", &test[j]); | |
scanf("%d", &test[j]); | |
for (j = 0; j < K; j++) | |
dists[j] = INT_MAX; | |
for (j = 0; j < TRAIN_SET; j++) { | |
distance = 0; | |
for (k = 0; k < FEATURE; k++) | |
if (train[j][k] != test[k]) | |
distance += 1; | |
for (k = 0; k < K; k++) { | |
if (distance < dists[k]) { | |
dists[k] = distance; | |
neighbors[k] = label[j]; | |
} | |
} | |
} | |
sum = 0; | |
for (j = 0; j < K; j++) | |
sum += neighbors[j]; | |
printf("%d¥n", (int)round((double)sum / K)); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment