import os
import json
from PIL import Image
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer
import torch
from datasets import load_dataset
if __name__ == "__main__":
    model_id ="openbmb/MiniCPM-V-2_6"
    model = AutoModel.from_pretrained(model_id, trust_remote_code=True,
    attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
    model = model.eval().cuda()
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

    # evaluation
    benchmark_repo = 'HuiZhang0812/CreatiDesign_benchmark' #  huggingface repo of benchmark
    benchmark = load_dataset(benchmark_repo, split="test")
    gen_root =  "outputs/CreatiDesign_benchmark/images"
    print("processing:",gen_root)
    save_json_path = gen_root.replace("images", "minicpm-vqa.json")
    temp_root = gen_root.replace("images", "images-perarea")
    os.makedirs(temp_root, exist_ok=True)

    skipped_files_log = gen_root.replace("images", "skipped_files.log")
    skipped_files = []
    image_stats = {}
    
    for case in tqdm(benchmark):
        json_data = json.loads(case["metadata"])
        case_info = json_data["img_info"]
        case_id = case_info["img_id"]
        file_name = f"{case_id}.jpg"
        generated_img_path = os.path.join(gen_root, file_name)
        global_caption = json_data["global_caption"]
        object_annotations = json_data["object_annotations"]
        detial_region_caption_list =  [item["bbox_detail_description"] for item in object_annotations]
        region_caption_list = [item["class_name"] for item in object_annotations]
        region_bboxes_list = [item["bbox"] for item in object_annotations]
        
        img = Image.open(generated_img_path).convert("RGB")
        width, height = img.size

        orignal_img_width = json_data["img_info"]["img_width"]
        orignal_img_height = json_data["img_info"]["img_height"]

        temp_save_root = os.path.join(temp_root, file_name.split('.')[0])
        os.makedirs(temp_save_root, exist_ok=True)

        bbox_count = len(region_caption_list)

        # Initialize scores
        img_score_spatial = 0
        img_score_color = 0
        img_score_texture = 0
        img_score_shape = 0
        for i, (bbox,detial_region_caption,region_caption) in enumerate(zip(region_bboxes_list,detial_region_caption_list,region_caption_list)):
            x1, y1, x2, y2= bbox
            x1 = int(x1 / orignal_img_width*width)
            y1 = int(y1 / orignal_img_height*height)    
            x2 = int(x2 / orignal_img_width*width)
            y2 = int(y2 / orignal_img_height*height)

    
            cropped_img = img.crop((x1, y1, x2, y2))

            # save crop img
            description = region_caption.replace('/', '')
            detail_description = detial_region_caption.replace('/', '')
            cropped_img_path = os.path.join(temp_save_root, f'{description}.jpg')
            cropped_img.save(cropped_img_path)

            # spatial
            question = f'Is the subject "{description}" present in the image? Strictly answer with "Yes" or "No", without any irrelevant words.'
            
            msgs = [{'role': 'user', 'content': [cropped_img, question]}]

            res = model.chat(
                image=None,
                msgs=msgs,
                tokenizer=tokenizer,
                seed=42
                )
            
            if "Yes" in res or "yes" in res:
                score_spatial = 1.0
            else:
                score_spatial = 0.0

            score_color, score_texture,score_shape = 0.0, 0.0, 0.0
            # attribute
            if score_spatial==1.0:
                #color
                question_color = f'Is the subject in "{description}" in the image consistent with the color described in the detailed description: "{detail_description}"? Strictly answer with "Yes" or "No", without any irrelevant words. If the color is not mentioned in the detailed description, the answer is "Yes".'
                msgs_color = [{'role': 'user', 'content': [cropped_img, question_color]}]

                color_attribute = model.chat(
                image=None,
                msgs=msgs_color,
                tokenizer=tokenizer,
                seed=42
                )
                
                if "Yes" in color_attribute or "yes" in color_attribute:
                    score_color = 1.0
            # texture
            if score_spatial==1.0:
                question_texture = f'Is the subject in "{description}" in the image consistent with the texture described in the detailed description: "{detail_description}"? Strictly answer with "Yes" or "No", without any irrelevant words. If the texture is not mentioned in the detailed description, the answer is "Yes".'
                msgs_texture = [{'role': 'user', 'content': [cropped_img, question_texture]}]

                texture_attribute = model.chat(
                image=None,
                msgs=msgs_texture,
                tokenizer=tokenizer,
                seed=42
                )
                if "Yes" in texture_attribute or "yes" in texture_attribute:
                    score_texture = 1.0
            #shape
            if score_spatial==1.0:
                question_shape = f'Is the subject in "{description}" in the image consistent with the shape described in the detailed description: "{detail_description}"? Strictly answer with "Yes" or "No", without any irrelevant words. If the shape is not mentioned in the detailed description, the answer is "Yes".'
                msgs_shape = [{'role': 'user', 'content': [cropped_img, question_shape]}]

                shape_attribute = model.chat(
                image=None,
                msgs=msgs_shape,
                tokenizer=tokenizer,
                seed=42
                )
                
                if "Yes" in shape_attribute or "yes" in shape_attribute:
                    score_shape = 1.0
  
            # Update total scores
            img_score_spatial += score_spatial
            img_score_color += score_color
            img_score_texture += score_texture
            img_score_shape += score_shape
            
            
        # Store image stats
        image_stats[os.path.basename(file_name)] = {
            "bbox_count": bbox_count,
            "score_spatial": img_score_spatial,
            "score_color": img_score_color,
            "score_texture": img_score_texture,
            "score_shape": img_score_shape,
        }

        if len(image_stats) % 50 == 0:
            with open(save_json_path, 'w', encoding='utf-8') as json_file:
                json.dump(image_stats, json_file, indent=4)
    
    # Save the image_stats dictionary to a JSON file
    with open(save_json_path, 'w', encoding='utf-8') as json_file:
        json.dump(image_stats, json_file, indent=4)

    print(f"Image statistics saved to {save_json_path}")

    
    score_save_path = save_json_path.replace('minicpm-vqa.json', 'minicpm-vqa-score.txt')

    # Read the JSON file containing image statistics
    with open(save_json_path, "r") as f:
        json_data = json.load(f)

    total_num = 0
    total_bbox_num = 0
    total_score_spatial = 0
    total_score_color = 0
    total_score_texture = 0
    total_score_shape = 0

    miss_match =0
    # Iterate over the JSON data
    for key, value in json_data.items():
        
        total_num += value["bbox_count"]
        total_score_spatial +=value["score_spatial"] 
        total_score_color +=value["score_color"]
        total_score_texture +=value["score_texture"]
        total_score_shape +=value["score_shape"]

        if value["bbox_count"]!=value["score_spatial"] or value["bbox_count"]!=value["score_color"] or value["bbox_count"]!=value["score_texture"] or value["bbox_count"]!=value["score_shape"]:
            print(key,value["bbox_count"],value["score_spatial"],value["score_color"],value["score_texture"],value["score_shape"])
            miss_match+=1

    print(miss_match)
    #save total_score_spatial,total_score_color,total_score_texture,total_score_shape
    with open(score_save_path, "w") as f:
        f.write(f"Total number of bbox: {total_num}\n")
        f.write(f"Total score of spatial: {total_score_spatial}; Average score of spatial: {round(total_score_spatial/total_num,4)}\n")
        f.write(f"Total score of color: {total_score_color}; Average score of color: {round(total_score_color/total_num,4)}\n")
        f.write(f"Total score of texture: {total_score_texture}; Average score of texture: {round(total_score_texture/total_num,4)}\n")
        f.write(f"Total score of shape: {total_score_shape}; Average score of shape: {round(total_score_shape/total_num,4)}\n")