from airflow.decorators import dag, task
from airflow.datasets import Dataset
from datetime import datetime
import re
import csv
import os
import random

sales_data_path = "/airflow-sandbox/datasets/sales_data.csv"
avg_order_path = "/airflow-sandbox/datasets/avg_order_data.csv"

sales_dataset = Dataset(sales_data_path)
avg_order_dataset = Dataset(avg_order_path)

@dag(dag_id="inlets_and_outlets_tf", start_date=None, schedule=None, tags=["r8", "taskflow"])
def inlets_and_outlets_tf():

    @task(outlets=[sales_dataset])
    def generate_sales_data():
        random.seed(42)
        headers = ["date", "order_id", "product", "price", "quantity", "order_value"]
        products = ["Laptop", "Telefon", "Tablet", "Słuchawki", "Monitor"]
        with open(sales_data_path, "w", newline="") as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers)
            writer.writeheader()

            for _ in range(10):
                day = random.randint(1, 10)
                date = f"2023-05-{day:02d}"
                order_id = random.randint(1, 300)
                product = random.choice(products)
                price = round(random.uniform(50, 2000), 2)
                quantity = random.randint(1, 5)
                order_value = round(price * quantity, 2)

                writer.writerow({
                    "date": date,
                    "order_id": order_id,
                    "product": product,
                    "price": price,
                    "quantity": quantity,
                    "order_value": order_value
                })

    @task(inlets=[sales_dataset], outlets=[avg_order_dataset])
    def calculate_avg(**context):
        ti = context["ti"]
        data_path = ti.xcom_pull(key="pipeline_outlets", task_ids="generate_sales_data")[0]
        
        total_sum = 0
        total_rows = 0
        
        with open(data_path, "r", newline="") as csvfile:
            reader = csv.DictReader(csvfile)
            
            for row in reader:
                order_value = float(row["order_value"])
                total_sum += order_value
                total_rows += 1
                
        result = total_sum / total_rows
        
        with open(avg_order_path, "a", newline="") as file:
            file.write(f"{result}\n")
        
    
    t_generate_task = generate_sales_data()
    t_calculate_task = calculate_avg()
    t_generate_task >> t_calculate_task

inlets_and_outlets_tf()