from openai import OpenAI
import openpyxl
from datetime import datetime
import base64
import io
from PIL import Image
from openpyxl_image_loader import SheetImageLoader
import pandas as pd
from django.db import models
from rest_framework import serializers
import re
from django.conf import settings

# Initialize OpenAI client
client = OpenAI(api_key=settings.OPENAI_API_KEY)

def upload_pdf_to_openai(file_path):
    try:
        with open(file_path, "rb") as f:
            upload_response = client.files.create(file=f, purpose="assistants")
            return upload_response.id
    except Exception as e:
        print(f"Failed to upload file to OpenAI: {e}")
        return None   


def parse_visual_excel_sheet(file_path):
   
    wb = openpyxl.load_workbook(file_path)
    ws = wb.active  
    
    
    image_loader = SheetImageLoader(ws)
    
    metadata = {
        'report_title': ws['C1'].value,
        'model_name': ws['D3'].value,
        'checker': ws['D4'].value,
        'organization': ws['D5'].value,
        'date': ws['D6'].value,
        'full_villa_model': ws['D7'].value,
        'villa_mep': ws['D8'].value,
        'report_subtitle': ws['A10'].value
    }
    
    header_row = 11
    
    column_info = []
    for i, cell in enumerate(ws[header_row]):
        column_letter = cell.column_letter
        if cell.value:
            header_value = str(cell.value).strip()
            column_info.append({
                'index': i,
                'letter': column_letter,
                'value': header_value
            })
        else:
            column_info.append({
                'index': i,
                'letter': column_letter,
                'value': f"Column_{column_letter}"
            })
    
    headers = [col['value'] for col in column_info]
    print("All headers:", headers)
    
    picture_column = None
    number_column = None
    
    for col in column_info:
        if col['value'] == "Picture":
            picture_column = col
        elif col['value'] in ["Number", " Number", "#"]:
            number_column = col
    
    table_data = []
    
    for current_row in range(header_row + 1, ws.max_row + 1):
        row_values = [cell.value for cell in ws[current_row]]
        
        if all(value is None or value == "" for value in row_values):
            continue
        
        row_dict = {}
        for col in column_info:
            i = col['index']
            header = col['value']
            
            if i < len(row_values):
                value = row_values[i]
                
                if isinstance(value, datetime):
                    row_dict[header] = value.strftime('%Y-%m-%d')
                elif value is None:
                    row_dict[header] = ""
                else:
                    row_dict[header] = value
            else:
                row_dict[header] = ""
        
        if number_column:
            number_value = row_dict.get(number_column['value'])
            if number_value:
                row_dict['issue_number'] = number_value
        
        if 'issue_number' not in row_dict:
            for key, value in row_dict.items():
                if key.lower() in ['number', 'issue', 'id', '#'] and value:
                    row_dict['issue_number'] = value
                    break
            
           
            if 'issue_number' not in row_dict:
                row_dict['issue_number'] = f"row_{current_row}"
        
        if picture_column:
            cell_coord = f"{picture_column['letter']}{current_row}"
            
            try:
                if image_loader.image_in(cell_coord):
                    img = image_loader.get(cell_coord)
                    
                    buffered = io.BytesIO()
                    img.save(buffered, format="PNG")
                    img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
                    
                    row_dict['Picture_base64'] = f"data:image/png;base64,{img_base64}"
            except Exception as e:
                print(f"Error extracting image from cell {cell_coord}: {e}")
        
        table_data.append(row_dict)
    
    return {
        'metadata': metadata,
        'table_data': table_data
    }




def parse_excel_sheet(file_path):
   
    xls = pd.ExcelFile(file_path)
    df = xls.parse(xls.sheet_names[0], header=None)
    
   
    metadata = {
        "model_name": df.iloc[3, 1],
        "checker": df.iloc[4, 1],
        "organization": df.iloc[5, 1],
        "date": df.iloc[6, 1],
        "consultant_villa": df.iloc[7, 0],
    }
    

    summary = {
        "summary_title": df.iloc[9, 0],
        "total_compliance_text": df.iloc[10, 0],
        "total_non_compliant": df.iloc[11, 4],
        "total_compliant": df.iloc[11, 5],
    }
    
 
    sections = []
    current_section = {
        "header": df.iloc[13, 0],
        "subsections": []
    }
    sections.append(current_section)
    
    
    current_subsection = None
    
    for idx in range(14, len(df)):
        row = df.iloc[idx]
        
     
        if pd.isna(row[0]) and pd.isna(row[1]):
            continue
            
        if ((pd.isna(row[0]) or row[0] == '#' or str(row[0]).strip() == '#') and 
            isinstance(row[1], str) and row[1].strip() and
            (pd.isna(row[4]) if len(row) > 4 else True) and 
            (pd.isna(row[5]) if len(row) > 5 else True) and 
            (pd.isna(row[6]) if len(row) > 6 else True)):
            
            current_subsection = {
                "subheader": row[1].strip(),
                "reference": None if pd.isna(row[3]) else row[3],
                "items": []
            }
            current_section["subsections"].append(current_subsection)
            continue
  
        if current_subsection is not None and not pd.isna(row[0]) and not pd.isna(row[1]):
            item_number = row[0]
            if isinstance(row[0], str) and row[0].strip().isdigit():
                item_number = int(row[0].strip())
                
            if isinstance(item_number, (int, float)):
                item = {
                    "number": item_number,
                    "requirement": row[1],
                    "reference": None if pd.isna(row[3]) else row[3],
                    "applicable": not pd.isna(row[4]) if len(row) > 4 else None,
                    "non_compliant": not pd.isna(row[5]) if len(row) > 5 else None,
                    "compliant": not pd.isna(row[6]) if len(row) > 6 else None,
                }
                current_subsection["items"].append(item)
    
    return [
        {"metadata": metadata},
        {"summary": summary},
        {"sections": sections}
    ]


class APIFileURLModelSerializer(serializers.ModelSerializer):
    """Automatically adds API URL transformations for all FileField and ImageField fields"""
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        model = self.Meta.model
        file_fields = [field.name for field in model._meta.fields 
                       if isinstance(field, (models.FileField, models.ImageField))]
        
        # Add SerializerMethodField for each file field
        for field_name in file_fields:
            self.fields[field_name] = serializers.SerializerMethodField()
            
            # Dynamically add the get_<field_name> method
            setattr(self, f'get_{field_name}', 
                   lambda obj, field=field_name: self._get_file_url(obj, field))
    
    def _get_file_url(self, obj, field_name):
        request = self.context.get('request')
        file_field = getattr(obj, field_name, None)
        if file_field and request:
            url = request.build_absolute_uri(file_field.url)
            return url.replace(request.build_absolute_uri('/'), request.build_absolute_uri('/api/'))
        return None


def normalize_key(key):
    key = key.lower()
    key = key.replace(' - ', '_').replace('-', '_').replace(' ', '_')
    key = re.sub(r'[^a-z0-9_]', '', key)
    return key


def parse_visual_excel_simple(file_path):
    """
    Simplified version of parse_visual_excel_sheet.
    Extracts metadata, table data, and images from Excel with embedded visuals.
    """
    workbook = openpyxl.load_workbook(file_path)
    sheet = workbook.active
    image_loader = SheetImageLoader(sheet)
    
    # Extract metadata from fixed cells
    metadata = {
        'report_title': _get_cell_value(sheet, 'C1'),
        'model_name': _get_cell_value(sheet, 'D3'),
        'checker': _get_cell_value(sheet, 'D4'),
        'organization': _get_cell_value(sheet, 'D5'),
        'date': _get_cell_value(sheet, 'D6'),
        'full_villa_model': _get_cell_value(sheet, 'D7'),
        'villa_mep': _get_cell_value(sheet, 'D8'),
        'report_subtitle': _get_cell_value(sheet, 'A10')
    }
    
    # Parse table starting from row 11 (header row)
    header_row = 11
    headers = _extract_headers(sheet, header_row)
    picture_col, number_col = _find_special_columns(headers)
    
    table_data = []
    for row_num in range(header_row + 1, sheet.max_row + 1):
        if _is_empty_row(sheet, row_num):
            continue
            
        row_data = _process_table_row(sheet, row_num, headers, picture_col, number_col, image_loader)
        table_data.append(row_data)
    
    return {
        'metadata': metadata,
        'table_data': table_data
    }


def parse_excel_simple(file_path):
    """
    Simplified version of parse_excel_sheet.
    Parses structured Excel data into metadata, summary, and sections.
    """
    df = pd.read_excel(file_path, header=None)
    
    # Extract metadata
    metadata = {
        "model_name": _safe_get_cell(df, 3, 1),
        "checker": _safe_get_cell(df, 4, 1),
        "organization": _safe_get_cell(df, 5, 1),
        "date": _safe_get_cell(df, 6, 1),
        "consultant_villa": _safe_get_cell(df, 7, 0),
    }
    
    # Extract summary
    summary = {
        "summary_title": _safe_get_cell(df, 9, 0),
        "total_compliance_text": _safe_get_cell(df, 10, 0),
        "total_non_compliant": _safe_get_cell(df, 11, 4),
        "total_compliant": _safe_get_cell(df, 11, 5),
    }
    
    # Parse sections starting from row 13
    sections = _parse_sections(df, start_row=13)
    
    return [
        {"metadata": metadata},
        {"summary": summary},
        {"sections": sections}
    ]


# Helper functions for simplified Excel parsing

def _get_cell_value(sheet, cell_ref):
    """Get cell value safely, return None if empty."""
    cell_value = sheet[cell_ref].value
    return cell_value if cell_value is not None else None


def _extract_headers(sheet, header_row):
    """Extract headers from the specified row."""
    headers = []
    for cell in sheet[header_row]:
        header_value = cell.value
        if header_value:
            headers.append(str(header_value).strip())
        else:
            headers.append("")
    return headers


def _find_special_columns(headers):
    """Find Picture and Number column indices."""
    picture_col = None
    number_col = None
    
    for i, header in enumerate(headers):
        if header == "Picture":
            picture_col = i
        elif header in ["Number", " Number", "#"]:
            number_col = i
    
    return picture_col, number_col


def _is_empty_row(sheet, row_num):
    """Check if a row is completely empty."""
    row_values = [cell.value for cell in sheet[row_num]]
    return all(value is None or value == "" for value in row_values)


def _process_table_row(sheet, row_num, headers, picture_col, number_col, image_loader):
    """Process a single table row and extract data including images."""
    row_values = [cell.value for cell in sheet[row_num]]
    row_data = {}
    
    # Fill basic row data
    for i, header in enumerate(headers):
        if i < len(row_values) and header:  # Only process non-empty headers
            value = row_values[i]
            if isinstance(value, datetime):
                row_data[header] = value.strftime('%Y-%m-%d')
            else:
                row_data[header] = value if value is not None else ""
    
    # Add image if present
    if picture_col is not None:
        image_base64 = _extract_image(sheet, row_num, picture_col, image_loader)
        if image_base64:
            row_data['Picture_base64'] = image_base64
    
    return row_data


def _extract_image(sheet, row_num, picture_col, image_loader):
    """Extract and encode image from specified cell."""
    column_letter = openpyxl.utils.get_column_letter(picture_col + 1)
    cell_coord = f"{column_letter}{row_num}"

    try:
        if image_loader.image_in(cell_coord):
            img = image_loader.get(cell_coord)
            buffered = io.BytesIO()
            img.save(buffered, format="PNG")
            return base64.b64encode(buffered.getvalue()).decode('utf-8')
    except Exception:
        pass

    return None


def _safe_get_cell(df, row, col):
    """Safely get cell value from DataFrame."""
    try:
        return df.iloc[row, col] if not pd.isna(df.iloc[row, col]) else None
    except (IndexError, KeyError):
        return None


def _parse_sections(df, start_row):
    """Parse sections from DataFrame starting at specified row."""
    sections = []
    current_section = {
        "header": _safe_get_cell(df, start_row, 0),
        "subsections": []
    }
    sections.append(current_section)
    
    current_subsection = None
    
    for idx in range(start_row + 1, len(df)):
        row = df.iloc[idx]
        
        # Skip empty rows
        if pd.isna(row[0]) and pd.isna(row[1]):
            continue
        
        # Check for subsection header
        if _is_subsection_header(row):
            current_subsection = {
                "subheader": row[1].strip(),
                "reference": None if pd.isna(row[3]) else row[3],
                "items": []
            }
            current_section["subsections"].append(current_subsection)
            continue
        
        # Process subsection items
        if current_subsection and _is_valid_item(row):
            item = _create_item(row)
            current_subsection["items"].append(item)
    
    return sections


def _is_subsection_header(row):
    """Check if row represents a subsection header."""
    return ((pd.isna(row[0]) or row[0] == '#' or str(row[0]).strip() == '#') and 
            isinstance(row[1], str) and row[1].strip() and
            (pd.isna(row[4]) if len(row) > 4 else True) and 
            (pd.isna(row[5]) if len(row) > 5 else True) and 
            (pd.isna(row[6]) if len(row) > 6 else True))


def _is_valid_item(row):
    """Check if row represents a valid item."""
    return not pd.isna(row[0]) and not pd.isna(row[1])


def _create_item(row):
    """Create item dictionary from row data."""
    item_number = row[0]
    if isinstance(row[0], str) and row[0].strip().isdigit():
        item_number = int(row[0].strip())
    
    return {
        "number": item_number,
        "requirement": row[1],
        "reference": None if pd.isna(row[3]) else row[3],
        "applicable": not pd.isna(row[4]) if len(row) > 4 else None,
        "non_compliant": not pd.isna(row[5]) if len(row) > 5 else None,
        "compliant": not pd.isna(row[6]) if len(row) > 6 else None,
    }
