Skip to content

Instantly share code, notes, and snippets.

@kittisak-phetrungnapha
Created February 17, 2019 16:42
Show Gist options
  • Save kittisak-phetrungnapha/5a5bb96089bfa35e23f240684cc7cb99 to your computer and use it in GitHub Desktop.
Save kittisak-phetrungnapha/5a5bb96089bfa35e23f240684cc7cb99 to your computer and use it in GitHub Desktop.
def model_cv(splits, X, y, pipeline, description, category):
print(description)
print()
kfold = StratifiedKFold(n_splits=splits, shuffle=True, random_state=42)
reports = []
accuracy = []
for train, test in kfold.split(X, y):
model_fit = pipeline.fit(X[train], y[train])
prediction = model_fit.predict(X[test])
accuracy.append(accuracy_score(y[test], prediction))
reports.append(classification_report(y[test], prediction, output_dict=True))
if (category == 'bug'):
report_avg_dict = bug_report_avg(reports)
elif (category == 'feature'):
report_avg_dict = feature_report_avg(reports)
elif (category == 'ux'):
report_avg_dict = ux_report_avg(reports)
elif (category == 'rating'):
report_avg_dict = rating_report_avg(reports)
else:
raise ValueError('Category must be Bug, Feature, UX, or Rating.')
accuracy = np.mean(accuracy)
print('Accuracy:', accuracy)
# Export to .csv
report_avg_dict['accuracy'] = {' ': accuracy}
report_to_csv(report_avg_dict, description)
@kittisak-phetrungnapha
Copy link
Author

# Prepare pipeline
texts = Pipeline([
                ('selector', TextSelector(key=features[0])),
                ('tfidf', TfidfVectorizer())
            ])
    
ratings = Pipeline([
                ('selector', NumberSelector(key=features[1]))
            ])

sentiments = Pipeline([
                ('selector', NumberSelector(key=features[2])),
                ('scaler', MinMaxScaler())
            ])

feats = FeatureUnion([
    ('texts', texts),
    ('ratings', ratings),
    ('sentiments', sentiments)
])

pipeline = Pipeline([
    ('features', feats),
    ('classifier', MultinomialNB()),
])
KeyError: '[  1   2   4   6   7   8  10  11  12  13  14  16  17  18  19  20  21  23\n  24  26  27  28  29  31  32  34  35  36  37  38  40  41  43  44  47  48\n  49  50  51  52  53  54  58  59  60  61  62  64  65  66  67  68  69  70\n  71  74  75  79  80  81  83  84  85  86  87  88  89  91  92  95  96  97\n  98  99 100 102 103 104 105 106 107 110 111 112 113 115 116 117 118 120\n 121 122 123 125 127 128 129 130 131 133 134 135 136 138 139 140 141 142\n 143 144 146 147 148 149 150 151 154 156 157 158 159 160 161 162 163 164\n 165 166 167 168 169 170 171 172 173 174 175 177 178 179 181 182 183 184\n 185 186 187 188 189 190 191 192 196 197 198 199 200 201 202 203 204 205\n 206 207 209 210 211 212 213 214 215 216 217 221 222 223 224 226 227 228\n 230 232 234 235 236 238 240 241 242 243 244 245 246 247 248 251 252 253\n 254 257 258 260 262 263 264 266 267 268 269 270 272 273 274 275 276 277\n 279 280 282 283 284 285 287 288 290 291 292 293 294 296 297 299 300 303\n 304 305 306 307 308 309 310 314 315 316 317 318 320 321 322 323 324 325\n 326 327 330 331 335 336 337 339 340 341 342 343 344 345 347 348 351 352\n 353 354 355 356 358 359 360 361 362 363 366 367 368 369 371 372 373 374\n 376 377 378 379 381 383 384 385 386 387 389 390 391 392 394 395 396 397\n 398 399 400 402 403 404 405 406 407 410 412 413 414 415 416 417 418 419\n 420 421 422 423 424 425 426 427 428 429 430 431 433 434 435 437 438 439\n 440 441 442 443 444 445 446 447 448 452 453 454 455 456 457 458 459 460\n 461 462 463 465 466 467 468 469 470 471 472 473 477 478 479 480 482 483\n 484 486 488 490 491 492 494 496 497 498 499 500 501 502 503 504 507 508\n 509 510 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527\n 528 529 530 531 532 533 535 536 537 538 539 540 541 543 545 546 547 549\n 550 551 552 554 555 556 557 558 559 560 562 563 564 565 568 569 571 572\n 573 574 575 576 577 578 579 580 581 584 585 586 587 588 589 591 592 593\n 594 595 596 597 599 600 601 602 603 604 606 608 609 610 611 612 614 615\n 616 617 618 619 620 621 623 624 625 626 627 628 629 630 631 632 633 634\n 635 636 637 638 639 640 641 642 643 644 645 646 647 649 650 651 652 653\n 654 655 657 659 660 661 663 664 665 666 668 669 670 671 672 673 674 676\n 677 678 679 682 683 685 686 687 688 689 690 691 692 693 694 695 698 699\n 700 701 702 703 705 706 707 708 709 710 711 713 714 715 716 717 718 720\n 722 723 724 725 726 728 729 730 731 732 733 734 735 737 738 739] not in index'

Hint: 740 rows (index 0 to 739)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment