import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

data = pd.read_csv("UCI/breast-cancer.data", header=None, names=['Class', 'age', 'menopause', 'tumor-size', 'inv-nodes', 'node-caps', 'deg-malig', 'breast', 'breast-quad', 'irradiat'])

data.loc[:, 'Class'] = (
    data['Class'].map({'no-recurrence-events': 0,
                       'recurrence-events':    1}).astype(int))

X = data.drop(columns=['Class'])
y = data['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

cechy_kat = ['age', 'menopause', 'tumor-size', 'inv-nodes', 'node-caps', 'deg-malig', 'breast', 'breast-quad', 'irradiat']

std_transformer = Pipeline(steps=[ ('scaler', StandardScaler())])

one_hot_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore')) # (*)
])

preproc = ColumnTransformer(
    transformers=[
        ('stand',   std_transformer, ~X.columns.isin(cechy_kat)), # (**)
        ('kategor', one_hot_transformer, cechy_kat)
    ])

pipeline = (
    Pipeline(steps = [('preprocesor', preproc),
                      ('classifier', LogisticRegression())]))

y_train = y_train.astype(int)
y_test = y_test.astype(int)

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
# Obliczenie metryk
print("Metryki wydajności:")
print("Dokładność (accuracy):", accuracy_score(y_test, y_pred))
print("Precyzja (precision):", precision_score(y_test, y_pred))
print("Czułość (recall):", recall_score(y_test, y_pred))
print("F1-score:", f1_score(y_test, y_pred))

pacjentka = pd.DataFrame({
    'age': ['30-39'],
    'menopause': ['lt40'],
    'tumor-size': ['0-4'],
    'inv-nodes': ['0-2'],
    'node-caps': ['no'],
    'deg-malig': [2],
    'breast': ['left'],
    'breast-quad': ['left_low'],
    'irradiat': ['no']
})

predykcja = pipeline.predict(pacjentka)
print("Predykcja dla pojedynczej obserwacji:", predykcja)