import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

data = pd.read_excel('ankieta.xlsx', header=0)

X = data.drop('zamoznosc', axis=1)
y = data['zamoznosc']

cechy_kat = ['rejon', 'wyksztalcenie', 'preferencje_polit']
cechy_num = ['wiek', 'konta_net']

one_hot_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

std_transf = Pipeline(steps=[
    ('scaler', StandardScaler())
])

preproc = ColumnTransformer(
    transformers=[
        ('kategor', one_hot_transformer, cechy_kat),
        ('stand',   std_transf, cechy_num)
    ])


pipeline = Pipeline(
    steps=[('preprocesor', preproc),
           ('classifier', LogisticRegression(max_iter=1000))])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline.fit(X_train, y_train)

predictions = pipeline.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
print("Dokładność klasyfikacji:", accuracy)

test_value = pd.DataFrame(
    {'wiek': [30],
     'rejon': ['Polska_A'],
     'wyksztalcenie': ['podstawowe'],
     'preferencje_polit': ['lewica'],
     'konta_net': [2]})

wyniki = pipeline.predict(test_value)
print("Predykcja dla pojedynczej wartości:", wyniki[0])
