import os
import yaml
import logging
import re
import datetime
from google.cloud import bigquery
from google.cloud.bigquery import LoadJobConfig
from google.cloud.bigquery import SchemaField
import google.cloud.logging

# Konfigurujemy rejestrowanie https://cloud.google.com/logging/docs/setup/python
client = google.cloud.logging.Client()
client.get_default_handler()
client.setup_logging()

# Ładujemy config.yaml do config
config_file = "config.yaml"

if os.path.isfile(config_file):
  with open("config.yaml", "r") as stream:
      try:
          config = yaml.safe_load(stream)
      except yaml.YAMLError as exc:
          logging.error(exc)
else:
  logging.error("config.yaml needs to be added")

# Tworzymy listę obiektów SchemaField na podstawie pliku schematu config.yaml
def create_schema(schema_config):

    SCHEMA = []
    for scheme in schema_config:

        if 'description' in scheme:
            description = scheme['description']
        else:
            description = ''

        if 'mode' in scheme:
            mode = scheme['mode']
        else:
            mode = 'NULLABLE'

        try:
            assert isinstance(scheme['name'], str)
            assert isinstance(scheme['type'], str)
            assert isinstance(mode, str)
            assert isinstance(description, str)
        except AssertionError as e:
            logging.info(
                'Błąd w schemacie: name {} - type {} - mode - {} description {}'.format(scheme['name'], scheme['type'], mode, description))
            break

        entry = SchemaField(name=scheme['name'],
                            field_type=scheme['type'],
                            mode=mode,
                            description=description)
        SCHEMA.append(entry)
        
    logging.debug('Utworzono SCHEMA {}'.format(SCHEMA))

    return SCHEMA


    
def make_tbl_name(table_id, schema=False):

    t_split = table_id.split('_20')

    name = t_split[0]
    
    if schema: return name

    suffix = ''.join(re.findall('\d\d', table_id)[0:4])

    return name + '$' + suffix
    

def query_schema(table_id, job_config):

    schema_name = make_tbl_name(table_id, schema=True)

    logging.info('Szukanie schema_name: {} for import: {}'.format(schema_name, table_id))
    # Jeśli nie ma konfiguracji, wykonywana jest próba
    # autowykrywania zalecana tylko dla tablic programistycznych
    if schema_name not in config['schema']:
        logging.info('No config found. Using auto detection of schema')
        job_config.autodetect = True
        return job_config

    logging.info('Znaleziono schemat dla ' + schema_name)

    schema_config = config['schema'][schema_name]['fields']

    job_config.schema = create_schema(schema_config)

    # Standardowe zachowanie ładowania csv możemy zdefiniować tutaj
    job_config.quote_character = '"'
    job_config.skip_leading_rows = 1
    job_config.field_delimiter = ','
    job_config.allow_quoted_newlines = True

    return job_config

def load_gcs_bq(uri, table_id, project, dataset_id):

    client = bigquery.Client(project=project)
    dataset_ref = client.dataset(dataset_id)

    # Zmień poniższą konfigurację zgodnie z własnymi potrzebami importu
    job_config = LoadJobConfig()
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
    job_config.encoding = bigquery.Encoding.UTF_8
    job_config.time_partitioning = bigquery.TimePartitioning()

    job_config = query_schema(table_id, job_config)

    table_name = make_tbl_name(table_id)
    table_ref = dataset_ref.table(table_name)

    job = client.load_table_from_uri(
        uri,
        table_ref,
        location='EU',
        job_config=job_config)  # Żądanie API



def gcs_to_bq(data, context):
    """Działająca w tle funkcja Cloud Functions może być uruchamiana przez Cloud 
       Storage. Ta funkcja konstruuje URI pliku i ładuje go do BigQuery.

    Argumenty:
        data (dict): ładunek zdarzenia Cloud Functions.
        context (google.cloud.functions.Context): Metadane aktywowanego zdarzenia.
    Zwraca:
        Nic; dane wyjściowe są zapisywane w dziennikach usługi Stackdriver
    """

    object_name = data['name']
    project = config['project']
    dataset_id = config['datasetid']

    if object_name:
        # Tworzymy tablicę bigquery związaną z nazwą pliku
        table_id = os.path.splitext(os.path.basename(object_name))[0].replace('.','_')
        uri = 'gs://{}/{}'.format(data['bucket'], object_name)

        load_gcs_bq(uri, table_id, project, dataset_id)

    else:
        logging.info('Nie ma nic do załadowania')

    return