import time
import numpy as np
from sklearn.neighbors import NearestCentroid
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn import decomposition

def run(x_train, y_train, x_test, y_test, clf):
    s = time.time()
    clf.fit(x_train, y_train)
    e_train = time.time() - s 
    s = time.time()
    score = clf.score(x_test, y_test)
    e_test = time.time() - s 
    print("Ocena = %0.4f (czas, uczący=%8.3f, testowy=%8.3f)" % (score, e_train, e_test))

def train(x_train, y_train, x_test, y_test):
    print("    Najbliższy centroid          : ", end='')
    run(x_train, y_train, x_test, y_test, NearestCentroid())
    print("    Klasyfikator k-NN (k=3)     : ", end='')
    run(x_train, y_train, x_test, y_test, KNeighborsClassifier(n_neighbors=3))
    print("    Klasyfikator k-NN (k=7)     : ", end='')
    run(x_train, y_train, x_test, y_test, KNeighborsClassifier(n_neighbors=7))
    print("    Naiwny klasyfikator Bayesa (gaussowski)    : ", end='')
    run(x_train, y_train, x_test, y_test, GaussianNB())
    print("    Drzewo decyzyjne             : ", end='')
    run(x_train, y_train, x_test, y_test, DecisionTreeClassifier())
    print("    Las losowy (drzewa=  5) : ", end='')
    run(x_train, y_train, x_test, y_test, RandomForestClassifier(n_estimators=5))
    print("    Las losowy (drzewa= 50) : ", end='')
    run(x_train, y_train, x_test, y_test, RandomForestClassifier(n_estimators=50))
    print("    Las losowy (drzewa=500) : ", end='')
    run(x_train, y_train, x_test, y_test, RandomForestClassifier(n_estimators=500))
    print("    Las losowy (drzewa=1000): ", end='')
    run(x_train, y_train, x_test, y_test, RandomForestClassifier(n_estimators=1000))
    print("    LinearSVM (C=0.01)        : ", end='')
    run(x_train, y_train, x_test, y_test, LinearSVC(C=0.01))
    print("    LinearSVM (C=0.1)         : ", end='')
    run(x_train, y_train, x_test, y_test, LinearSVC(C=0.1))
    print("    LinearSVM (C=1.0)         : ", end='')
    run(x_train, y_train, x_test, y_test, LinearSVC(C=1.0))
    print("    LinearSVM (C=10.0)        : ", end='')
    run(x_train, y_train, x_test, y_test, LinearSVC(C=10.0))

def main():
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    print("Modele wytrenowane na nieprzetworzonych obrazach [0,255]:")
    train(x_train, y_train, x_test, y_test)
    print("Modele wytrenowane na nieprzetworzonych obrazach [0,1):")
    train(x_train/256.0, y_train, x_test/256.0, y_test)

    m = x_train.mean(axis=0)
    s = x_train.std(axis=0) + 1e-8
    x_ntrain = (x_train - m) / s
    x_ntest  = (x_test - m) / s

    print("Modele wytrenowane na znormalizowanych obrazach:")
    train(x_ntrain, y_train, x_ntest, y_test)

    pca = decomposition.PCA(n_components=15)
    pca.fit(x_ntrain)
    x_ptrain = pca.transform(x_ntrain)
    x_ptest = pca.transform(x_ntest)
    
    print("Modele wytrenowane na pierwszych 15 głównych składowych znormalizowanych obrazów:")
    train(x_ptrain, y_train, x_ptest, y_test)

main()

