mehran-sarmadi commited on
Commit
7a21cbf
Β·
1 Parent(s): 5354f37

Add yandexgpt 5 lite

Browse files
Files changed (31) hide show
  1. __pycache__/about.cpython-312.pyc +0 -0
  2. __pycache__/submission.cpython-312.pyc +0 -0
  3. leaderboard/__pycache__/__init__.cpython-312.pyc +0 -0
  4. leaderboard/__pycache__/leaderboard.cpython-312.pyc +0 -0
  5. leaderboard/boards_data/MMLU.jsonl +1 -0
  6. leaderboard/boards_data/all.jsonl +1 -0
  7. leaderboard/boards_data/extractive-qa_PQuAD.jsonl +36 -35
  8. leaderboard/boards_data/ifeval.jsonl +4 -3
  9. leaderboard/boards_data/keyword-extraction_SynKeywords.jsonl +36 -35
  10. leaderboard/boards_data/mt_bench.jsonl +1 -0
  11. leaderboard/boards_data/ner_arman.jsonl +36 -35
  12. leaderboard/boards_data/nli_farstail.jsonl +36 -35
  13. leaderboard/boards_data/paraphrase-detection_FarsiParaphraseDetection.jsonl +36 -35
  14. leaderboard/boards_data/paraphrase-detection_parsinlu.jsonl +36 -35
  15. leaderboard/boards_data/persian_csr.jsonl +1 -0
  16. leaderboard/boards_data/persian_nlg.jsonl +1 -0
  17. leaderboard/boards_data/persian_nlu.jsonl +1 -0
  18. leaderboard/boards_data/question-generation_PersianQA.jsonl +36 -35
  19. leaderboard/boards_data/sentiment-analysis_deepsentipers.jsonl +36 -35
  20. leaderboard/boards_data/sts_FarSICK.jsonl +36 -35
  21. leaderboard/boards_data/sts_SynPerSTS.jsonl +36 -35
  22. leaderboard/boards_data/summarization_PnSummary.jsonl +36 -35
  23. leaderboard/boards_data/summarization_SamSUM-fa.jsonl +36 -35
  24. leaderboard/boards_data/tone-classification_SynTone.jsonl +36 -35
  25. leaderboard/boards_data/topic-classification_sid.jsonl +36 -35
  26. leaderboard/boards_data/translation-ar2fa_ar2fa.jsonl +36 -35
  27. leaderboard/boards_data/translation-en2fa_en2fa.jsonl +36 -35
  28. leaderboard/boards_data/translation-fa2ar_fa2ar.jsonl +36 -35
  29. leaderboard/boards_data/translation-fa2en_fa2en.jsonl +36 -35
  30. leaderboard/leaderboard_config.yaml +6 -0
  31. submission.py +42 -2
__pycache__/about.cpython-312.pyc ADDED
Binary file (8.61 kB). View file
 
__pycache__/submission.cpython-312.pyc ADDED
Binary file (11 kB). View file
 
leaderboard/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (181 Bytes). View file
 
leaderboard/__pycache__/leaderboard.cpython-312.pyc ADDED
Binary file (40.9 kB). View file
 
leaderboard/boards_data/MMLU.jsonl CHANGED
@@ -37,6 +37,7 @@
37
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","acc":0.4046741323,"cinema_acc":0.425,"emergency_number_acc":0.3,"foods_acc":0.52,"games_acc":0.5,"herbal_drugs_acc":0.5,"places_acc":0.7523809524,"poetry_acc":0.325,"politicians_acc":0.5,"popular_people_acc":0.6307692308,"Government_law_acc":0.8043478261,"proverbs_acc":0.59,"religous_acc":0.5111111111,"social_manners_acc":0.595505618,"souvenirs_acc":0.34,"sports_acc":0.4285714286,"GPK_acc":0.5583250249,"SPK_acc":0.4311055366,"UPK_acc":0.3659694598}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.4025179106,"cinema_acc":0.425,"emergency_number_acc":0.2,"foods_acc":0.49,"games_acc":0.4,"herbal_drugs_acc":0.425,"places_acc":0.4095238095,"poetry_acc":0.4,"politicians_acc":0.5,"popular_people_acc":0.4974358974,"Government_law_acc":0.6739130435,"proverbs_acc":0.48,"religous_acc":0.5111111111,"social_manners_acc":0.5617977528,"souvenirs_acc":0.26,"sports_acc":0.4126984127,"GPK_acc":0.4656031904,"SPK_acc":0.4341515857,"UPK_acc":0.3717438727}
39
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.3825554705,"cinema_acc":0.4875,"emergency_number_acc":0.2,"foods_acc":0.46,"games_acc":0.7,"herbal_drugs_acc":0.475,"places_acc":0.5523809524,"poetry_acc":0.525,"politicians_acc":0.5,"popular_people_acc":0.5076923077,"Government_law_acc":0.7608695652,"proverbs_acc":0.54,"religous_acc":0.4444444444,"social_manners_acc":0.6292134831,"souvenirs_acc":0.48,"sports_acc":0.4285714286,"GPK_acc":0.5224327019,"SPK_acc":0.4135459595,"UPK_acc":0.3423585269}
 
40
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2830214927,"cinema_acc":0.5125,"emergency_number_acc":0.2,"foods_acc":0.51,"games_acc":0.25,"herbal_drugs_acc":0.5,"places_acc":0.3904761905,"poetry_acc":0.525,"politicians_acc":0.8,"popular_people_acc":0.5179487179,"Government_law_acc":0.347826087,"proverbs_acc":0.34,"religous_acc":0.3333333333,"social_manners_acc":0.2696629213,"souvenirs_acc":0.24,"sports_acc":0.4603174603,"GPK_acc":0.4267198405,"SPK_acc":0.2777280057,"UPK_acc":0.268317721}
41
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.278430827,"cinema_acc":0.25,"emergency_number_acc":0.0,"foods_acc":0.16,"games_acc":0.3,"herbal_drugs_acc":0.125,"places_acc":0.4380952381,"poetry_acc":0.075,"politicians_acc":0.0,"popular_people_acc":0.1948717949,"Government_law_acc":0.5652173913,"proverbs_acc":0.2,"religous_acc":0.2444444444,"social_manners_acc":0.3146067416,"souvenirs_acc":0.28,"sports_acc":0.2063492063,"GPK_acc":0.2452642074,"SPK_acc":0.2999462462,"UPK_acc":0.2672911587}
42
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.2553383877,"cinema_acc":0.375,"emergency_number_acc":0.1,"foods_acc":0.47,"games_acc":0.15,"herbal_drugs_acc":0.425,"places_acc":0.4285714286,"poetry_acc":0.425,"politicians_acc":0.45,"popular_people_acc":0.4051282051,"Government_law_acc":0.4782608696,"proverbs_acc":0.13,"religous_acc":0.4,"social_manners_acc":0.3707865169,"souvenirs_acc":0.12,"sports_acc":0.3333333333,"GPK_acc":0.3599202393,"SPK_acc":0.2727109837,"UPK_acc":0.2294366739}
 
37
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","acc":0.4046741323,"cinema_acc":0.425,"emergency_number_acc":0.3,"foods_acc":0.52,"games_acc":0.5,"herbal_drugs_acc":0.5,"places_acc":0.7523809524,"poetry_acc":0.325,"politicians_acc":0.5,"popular_people_acc":0.6307692308,"Government_law_acc":0.8043478261,"proverbs_acc":0.59,"religous_acc":0.5111111111,"social_manners_acc":0.595505618,"souvenirs_acc":0.34,"sports_acc":0.4285714286,"GPK_acc":0.5583250249,"SPK_acc":0.4311055366,"UPK_acc":0.3659694598}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.4025179106,"cinema_acc":0.425,"emergency_number_acc":0.2,"foods_acc":0.49,"games_acc":0.4,"herbal_drugs_acc":0.425,"places_acc":0.4095238095,"poetry_acc":0.4,"politicians_acc":0.5,"popular_people_acc":0.4974358974,"Government_law_acc":0.6739130435,"proverbs_acc":0.48,"religous_acc":0.5111111111,"social_manners_acc":0.5617977528,"souvenirs_acc":0.26,"sports_acc":0.4126984127,"GPK_acc":0.4656031904,"SPK_acc":0.4341515857,"UPK_acc":0.3717438727}
39
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.3825554705,"cinema_acc":0.4875,"emergency_number_acc":0.2,"foods_acc":0.46,"games_acc":0.7,"herbal_drugs_acc":0.475,"places_acc":0.5523809524,"poetry_acc":0.525,"politicians_acc":0.5,"popular_people_acc":0.5076923077,"Government_law_acc":0.7608695652,"proverbs_acc":0.54,"religous_acc":0.4444444444,"social_manners_acc":0.6292134831,"souvenirs_acc":0.48,"sports_acc":0.4285714286,"GPK_acc":0.5224327019,"SPK_acc":0.4135459595,"UPK_acc":0.3423585269}
40
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8000000000","source_type":"Open-Source","acc":0.352994366,"cinema_acc":0.375,"emergency_number_acc":0.2,"foods_acc":0.41,"games_acc":0.55,"herbal_drugs_acc":0.325,"places_acc":0.3428571429,"poetry_acc":0.55,"politicians_acc":0.35,"popular_people_acc":0.4666666667,"Government_law_acc":0.3260869565,"proverbs_acc":0.49,"religous_acc":0.4,"social_manners_acc":0.6853932584,"souvenirs_acc":0.2,"sports_acc":0.3174603175,"GPK_acc":0.4247258225,"SPK_acc":0.3635549185,"UPK_acc":0.3361991531}
41
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2830214927,"cinema_acc":0.5125,"emergency_number_acc":0.2,"foods_acc":0.51,"games_acc":0.25,"herbal_drugs_acc":0.5,"places_acc":0.3904761905,"poetry_acc":0.525,"politicians_acc":0.8,"popular_people_acc":0.5179487179,"Government_law_acc":0.347826087,"proverbs_acc":0.34,"religous_acc":0.3333333333,"social_manners_acc":0.2696629213,"souvenirs_acc":0.24,"sports_acc":0.4603174603,"GPK_acc":0.4267198405,"SPK_acc":0.2777280057,"UPK_acc":0.268317721}
42
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.278430827,"cinema_acc":0.25,"emergency_number_acc":0.0,"foods_acc":0.16,"games_acc":0.3,"herbal_drugs_acc":0.125,"places_acc":0.4380952381,"poetry_acc":0.075,"politicians_acc":0.0,"popular_people_acc":0.1948717949,"Government_law_acc":0.5652173913,"proverbs_acc":0.2,"religous_acc":0.2444444444,"social_manners_acc":0.3146067416,"souvenirs_acc":0.28,"sports_acc":0.2063492063,"GPK_acc":0.2452642074,"SPK_acc":0.2999462462,"UPK_acc":0.2672911587}
43
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.2553383877,"cinema_acc":0.375,"emergency_number_acc":0.1,"foods_acc":0.47,"games_acc":0.15,"herbal_drugs_acc":0.425,"places_acc":0.4285714286,"poetry_acc":0.425,"politicians_acc":0.45,"popular_people_acc":0.4051282051,"Government_law_acc":0.4782608696,"proverbs_acc":0.13,"religous_acc":0.4,"social_manners_acc":0.3707865169,"souvenirs_acc":0.12,"sports_acc":0.3333333333,"GPK_acc":0.3599202393,"SPK_acc":0.2727109837,"UPK_acc":0.2294366739}
leaderboard/boards_data/all.jsonl CHANGED
@@ -37,6 +37,7 @@
37
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","Average":0.4914,"Persian IFEval":0.6801242236,"Persian MT-Bench":0.6741666667,"PerMMLU":0.4979475405,"PerCoR":0.5272636318,"Persian NLU":0.456845738,"Persian NLG":0.112015688}
38
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","Average":0.4813,"Persian IFEval":0.5790251108,"Persian MT-Bench":0.6090833333,"PerMMLU":0.4046741323,"PerCoR":0.6,"Persian NLU":0.531045981,"Persian NLG":0.1641995602}
39
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","Average":0.4791,"Persian IFEval":0.7577092511,"Persian MT-Bench":0.5599462366,"PerMMLU":0.4025179106,"PerCoR":0.5033,"Persian NLU":0.5121418762,"Persian NLG":0.1389297212}
 
40
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","Average":0.3252,"Persian IFEval":0.5447870778,"Persian MT-Bench":0.4333333333,"PerMMLU":0.2830214927,"PerCoR":0.2599,"Persian NLU":0.3619547874,"Persian NLG":0.0682994522}
41
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","Average":0.3039,"Persian IFEval":0.4405286344,"Persian MT-Bench":0.3398268398,"PerMMLU":0.2553383877,"PerCoR":0.3015,"Persian NLU":0.3916645306,"Persian NLG":0.0944140383}
42
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","Average":0.2815,"Persian IFEval":0.5330396476,"Persian MT-Bench":0.3756410256,"PerMMLU":0.278430827,"PerCoR":0.2521,"Persian NLU":0.1368924446,"Persian NLG":0.1129755187}
 
37
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","Average":0.4914,"Persian IFEval":0.6801242236,"Persian MT-Bench":0.6741666667,"PerMMLU":0.4979475405,"PerCoR":0.5272636318,"Persian NLU":0.456845738,"Persian NLG":0.112015688}
38
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","Average":0.4813,"Persian IFEval":0.5790251108,"Persian MT-Bench":0.6090833333,"PerMMLU":0.4046741323,"PerCoR":0.6,"Persian NLU":0.531045981,"Persian NLG":0.1641995602}
39
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","Average":0.4791,"Persian IFEval":0.7577092511,"Persian MT-Bench":0.5599462366,"PerMMLU":0.4025179106,"PerCoR":0.5033,"Persian NLU":0.5121418762,"Persian NLG":0.1389297212}
40
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8000000000","source_type":"Open-Source","Average":0.4233,"Persian IFEval":0.6035242291,"Persian MT-Bench":0.5278333333,"PerMMLU":0.352994366,"PerCoR":0.3756,"Persian NLU":0.5761104945,"Persian NLG":0.1035446324}
41
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","Average":0.3252,"Persian IFEval":0.5447870778,"Persian MT-Bench":0.4333333333,"PerMMLU":0.2830214927,"PerCoR":0.2599,"Persian NLU":0.3619547874,"Persian NLG":0.0682994522}
42
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","Average":0.3039,"Persian IFEval":0.4405286344,"Persian MT-Bench":0.3398268398,"PerMMLU":0.2553383877,"PerCoR":0.3015,"Persian NLU":0.3916645306,"Persian NLG":0.0944140383}
43
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","Average":0.2815,"Persian IFEval":0.5330396476,"Persian MT-Bench":0.3756410256,"PerMMLU":0.278430827,"PerCoR":0.2521,"Persian NLU":0.1368924446,"Persian NLG":0.1129755187}
leaderboard/boards_data/extractive-qa_PQuAD.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.8957345972,"extractive-qa_PQuAD_f1":0.5899280585,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":42.0853080569,"extractive-qa_PQuAD_f1":0.747356805,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":29.8578199052,"extractive-qa_PQuAD_f1":0.6483891649,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":51.6587677725,"extractive-qa_PQuAD_f1":0.7997294818,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":20.8530805687,"extractive-qa_PQuAD_f1":0.5742991219,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":42.9383886256,"extractive-qa_PQuAD_f1":0.7674489336,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":41.6113744076,"extractive-qa_PQuAD_f1":0.7625286761,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":38.5781990521,"extractive-qa_PQuAD_f1":0.7377983931,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":16.2085308057,"extractive-qa_PQuAD_f1":0.5540542726,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.7121215175,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":41.2322274882,"extractive-qa_PQuAD_f1":0.7631630756,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":24.9289099526,"extractive-qa_PQuAD_f1":0.5952537387,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":11.9431279621,"extractive-qa_PQuAD_f1":0.5054306037,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":12.0379146919,"extractive-qa_PQuAD_f1":0.5152644082,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":16.5876777251,"extractive-qa_PQuAD_f1":0.5291313789,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":1.5165876777,"extractive-qa_PQuAD_f1":0.3221621809,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":13.0805687204,"extractive-qa_PQuAD_f1":0.5111951184,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":7.0142180095,"extractive-qa_PQuAD_f1":0.4986764425,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":20.4739336493,"extractive-qa_PQuAD_f1":0.5660677645,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":15.6398104265,"extractive-qa_PQuAD_f1":0.4797901431,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.8530805687,"extractive-qa_PQuAD_f1":0.3570972648,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":38.2938388626,"extractive-qa_PQuAD_f1":0.7091014157,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":29.0995260664,"extractive-qa_PQuAD_f1":0.6500014945,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":45.5924170616,"extractive-qa_PQuAD_f1":0.7918102773,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":7.2037914692,"extractive-qa_PQuAD_f1":0.4722142546,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":5.8767772512,"extractive-qa_PQuAD_f1":0.4459269248,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.6861140935,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":45.1184834123,"extractive-qa_PQuAD_f1":0.7795163265,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":33.2701421801,"extractive-qa_PQuAD_f1":0.6885320288,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":41.990521327,"extractive-qa_PQuAD_f1":0.7401025641,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.990521327,"extractive-qa_PQuAD_f1":0.5849945641,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.5165876777,"extractive-qa_PQuAD_f1":0.6052090568,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":23.317535545,"extractive-qa_PQuAD_f1":0.6069022938,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.8957345972,"extractive-qa_PQuAD_f1":0.4954484984,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.4739336493,"extractive-qa_PQuAD_f1":0.3440209421,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":17.5355450237,"extractive-qa_PQuAD_f1":0.5641459437,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":36.7772511848,"extractive-qa_PQuAD_f1":0.7059801524,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.4218009479,"extractive-qa_PQuAD_f1":0.6109462131,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":2.3696682464,"extractive-qa_PQuAD_f1":0.4003473594,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.663507109,"extractive-qa_PQuAD_f1":0.3378125221,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":49.5734597156,"extractive-qa_PQuAD_f1":0.7803597788,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.7582938389,"extractive-qa_PQuAD_f1":0.2123754836,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.5165876777,"extractive-qa_PQuAD_f1":0.6052090568,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":41.990521327,"extractive-qa_PQuAD_f1":0.7401025641,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.6861140935,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":36.7772511848,"extractive-qa_PQuAD_f1":0.7059801524,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":16.5876777251,"extractive-qa_PQuAD_f1":0.5291313789,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":11.9431279621,"extractive-qa_PQuAD_f1":0.5054306037,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":12.0379146919,"extractive-qa_PQuAD_f1":0.5152644082,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":45.1184834123,"extractive-qa_PQuAD_f1":0.7795163265,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":23.317535545,"extractive-qa_PQuAD_f1":0.6069022938,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":49.5734597156,"extractive-qa_PQuAD_f1":0.7803597788,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":41.6113744076,"extractive-qa_PQuAD_f1":0.7625286761,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.4218009479,"extractive-qa_PQuAD_f1":0.6109462131,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.4739336493,"extractive-qa_PQuAD_f1":0.3440209421,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":7.2037914692,"extractive-qa_PQuAD_f1":0.4722142546,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":2.3696682464,"extractive-qa_PQuAD_f1":0.4003473594,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":33.2701421801,"extractive-qa_PQuAD_f1":0.6371704755,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":13.0805687204,"extractive-qa_PQuAD_f1":0.5111951184,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":42.0853080569,"extractive-qa_PQuAD_f1":0.747356805,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.7121215175,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.8530805687,"extractive-qa_PQuAD_f1":0.3570972648,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.8957345972,"extractive-qa_PQuAD_f1":0.4954484984,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":17.5355450237,"extractive-qa_PQuAD_f1":0.5641459437,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.8957345972,"extractive-qa_PQuAD_f1":0.5899280585,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":20.8530805687,"extractive-qa_PQuAD_f1":0.5742991219,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":38.5781990521,"extractive-qa_PQuAD_f1":0.7377983931,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":29.8578199052,"extractive-qa_PQuAD_f1":0.6483891649,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.7582938389,"extractive-qa_PQuAD_f1":0.2123754836,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":1.5165876777,"extractive-qa_PQuAD_f1":0.3221621809,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":16.2085308057,"extractive-qa_PQuAD_f1":0.5540542726,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":5.8767772512,"extractive-qa_PQuAD_f1":0.4459269248,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":33.2701421801,"extractive-qa_PQuAD_f1":0.6885320288,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":41.2322274882,"extractive-qa_PQuAD_f1":0.7631630756,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":7.0142180095,"extractive-qa_PQuAD_f1":0.4986764425,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.990521327,"extractive-qa_PQuAD_f1":0.5849945641,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.663507109,"extractive-qa_PQuAD_f1":0.3378125221,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":29.0995260664,"extractive-qa_PQuAD_f1":0.6500014945,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":20.4739336493,"extractive-qa_PQuAD_f1":0.5660677645,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":24.9289099526,"extractive-qa_PQuAD_f1":0.5952537387,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":42.9383886256,"extractive-qa_PQuAD_f1":0.7674489336,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":38.2938388626,"extractive-qa_PQuAD_f1":0.7091014157,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":15.6398104265,"extractive-qa_PQuAD_f1":0.4797901431,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":51.6587677725,"extractive-qa_PQuAD_f1":0.7997294818,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":45.5924170616,"extractive-qa_PQuAD_f1":0.7918102773,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/ifeval.jsonl CHANGED
@@ -15,8 +15,8 @@
15
  {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8340675477,"loose_prompt_accuracy":0.7986870897,"loose_instruction_accuracy":0.8575624082,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.847826087,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8344827586,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7154471545,"strict_punctuation_category":0.9180327869,"strict_startend_category":0.9523809524,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.847826087,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.8827586207,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.756097561,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9523809524}
16
  {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","strict_prompt_accuracy":0.7662037037,"strict_instruction_accuracy":0.8325508607,"loose_prompt_accuracy":0.8078703704,"loose_instruction_accuracy":0.8638497653,"strict_combination_category":0.6349206349,"strict_detectable_content_category":0.8837209302,"strict_detectable_format_category":0.9136690647,"strict_keywords_category":0.7954545455,"strict_language_category":0.9655172414,"strict_length_constraints_category":0.7192982456,"strict_punctuation_category":0.9655172414,"strict_startend_category":0.9180327869,"loose_combination_category":0.7936507937,"loose_detectable_content_category":0.8837209302,"loose_detectable_format_category":0.928057554,"loose_keywords_category":0.8257575758,"loose_language_category":0.9655172414,"loose_length_constraints_category":0.7543859649,"loose_punctuation_category":0.9655172414,"loose_startend_category":0.9180327869}
17
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7702407002,"strict_instruction_accuracy":0.8311306902,"loose_prompt_accuracy":0.8205689278,"loose_instruction_accuracy":0.8693098385,"strict_combination_category":0.7846153846,"strict_detectable_content_category":1.0,"strict_detectable_format_category":0.8503401361,"strict_keywords_category":0.8,"strict_language_category":1.0,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.8888888889,"loose_combination_category":0.8153846154,"loose_detectable_content_category":1.0,"loose_detectable_format_category":0.8639455782,"loose_keywords_category":0.8551724138,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9365079365}
18
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","strict_prompt_accuracy":0.7636761488,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8052516411,"loose_instruction_accuracy":0.8634361233,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8095238095,"strict_keywords_category":0.7931034483,"strict_language_category":1.0,"strict_length_constraints_category":0.7886178862,"strict_punctuation_category":0.9836065574,"strict_startend_category":0.873015873,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.875862069,"loose_language_category":1.0,"loose_length_constraints_category":0.8780487805,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.873015873}
19
  {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8140043764,"loose_instruction_accuracy":0.8649045521,"strict_combination_category":0.8,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7793103448,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.8360655738,"strict_startend_category":0.9206349206,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9365079365}
 
20
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.761487965,"strict_instruction_accuracy":0.825256975,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.8413793103,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6097560976,"strict_punctuation_category":1.0,"strict_startend_category":0.8888888889,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.875862069,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":1.0,"loose_startend_category":0.9206349206}
21
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"120000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7488986784,"strict_instruction_accuracy":0.8195266272,"loose_prompt_accuracy":0.795154185,"loose_instruction_accuracy":0.8565088757,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8843537415,"strict_keywords_category":0.829787234,"strict_language_category":1.0,"strict_length_constraints_category":0.7786885246,"strict_punctuation_category":0.5573770492,"strict_startend_category":0.8888888889,"loose_combination_category":0.7384615385,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.8794326241,"loose_language_category":1.0,"loose_length_constraints_category":0.8360655738,"loose_punctuation_category":0.6557377049,"loose_startend_category":0.9047619048}
22
  {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","strict_prompt_accuracy":0.7396061269,"strict_instruction_accuracy":0.8149779736,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7586206897,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7479674797,"strict_punctuation_category":0.8032786885,"strict_startend_category":0.8888888889,"loose_combination_category":0.6923076923,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.8911564626,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.8292682927,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.9047619048}
@@ -24,18 +24,19 @@
24
  {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","strict_prompt_accuracy":0.7342342342,"strict_instruction_accuracy":0.803030303,"loose_prompt_accuracy":0.786036036,"loose_instruction_accuracy":0.846969697,"strict_combination_category":0.625,"strict_detectable_content_category":0.9111111111,"strict_detectable_format_category":0.8951048951,"strict_keywords_category":0.7785714286,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6140350877,"strict_punctuation_category":0.9333333333,"strict_startend_category":0.8888888889,"loose_combination_category":0.6875,"loose_detectable_content_category":0.9111111111,"loose_detectable_format_category":0.9090909091,"loose_keywords_category":0.85,"loose_language_category":1.0,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":0.95,"loose_startend_category":0.9682539683}
25
  {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7308533917,"strict_instruction_accuracy":0.8017621145,"loose_prompt_accuracy":0.772428884,"loose_instruction_accuracy":0.8355359765,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.843537415,"strict_keywords_category":0.7379310345,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9016393443,"strict_startend_category":0.9365079365,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.7793103448,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7398373984,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9365079365}
26
  {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https_google.com","parameters_count":"109000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7083333333,"strict_instruction_accuracy":0.7897058824,"loose_prompt_accuracy":0.7280701754,"loose_instruction_accuracy":0.8058823529,"strict_combination_category":0.5076923077,"strict_detectable_content_category":0.847826087,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.7916666667,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.674796748,"strict_punctuation_category":0.868852459,"strict_startend_category":0.8888888889,"loose_combination_category":0.5230769231,"loose_detectable_content_category":0.847826087,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.8263888889,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7073170732,"loose_punctuation_category":0.868852459,"loose_startend_category":0.8888888889}
27
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.6827133479,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7199124726,"loose_instruction_accuracy":0.7885462555,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8027210884,"strict_keywords_category":0.7793103448,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7704918033,"strict_startend_category":0.8095238095,"loose_combination_category":0.6153846154,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.8344827586,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.8412698413}
28
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","strict_prompt_accuracy":0.6717724289,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7264770241,"loose_instruction_accuracy":0.798825257,"strict_combination_category":0.5384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.9047619048,"strict_keywords_category":0.7172413793,"strict_language_category":0.8387096774,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.8196721311,"strict_startend_category":0.7142857143,"loose_combination_category":0.5692307692,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.7931034483,"loose_language_category":0.8709677419,"loose_length_constraints_category":0.7235772358,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.746031746}
 
29
  {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6644144144,"strict_instruction_accuracy":0.7526555387,"loose_prompt_accuracy":0.7274774775,"loose_instruction_accuracy":0.7996965099,"strict_combination_category":0.6153846154,"strict_detectable_content_category":0.8444444444,"strict_detectable_format_category":0.8014184397,"strict_keywords_category":0.7642857143,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.641025641,"strict_punctuation_category":0.7333333333,"strict_startend_category":0.8166666667,"loose_combination_category":0.6615384615,"loose_detectable_content_category":0.8444444444,"loose_detectable_format_category":0.8156028369,"loose_keywords_category":0.8071428571,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7606837607,"loose_punctuation_category":0.8,"loose_startend_category":0.85}
30
  {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6527472527,"strict_instruction_accuracy":0.7507374631,"loose_prompt_accuracy":0.7076923077,"loose_instruction_accuracy":0.7890855457,"strict_combination_category":0.6984126984,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8287671233,"strict_keywords_category":0.7862068966,"strict_language_category":0.935483871,"strict_length_constraints_category":0.6585365854,"strict_punctuation_category":0.4918032787,"strict_startend_category":0.873015873,"loose_combination_category":0.7777777778,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8356164384,"loose_keywords_category":0.8344827586,"loose_language_category":0.935483871,"loose_length_constraints_category":0.7317073171,"loose_punctuation_category":0.5409836066,"loose_startend_category":0.8888888889}
31
  {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","strict_prompt_accuracy":0.6674418605,"strict_instruction_accuracy":0.7503924647,"loose_prompt_accuracy":0.7139534884,"loose_instruction_accuracy":0.7912087912,"strict_combination_category":0.5396825397,"strict_detectable_content_category":0.7435897436,"strict_detectable_format_category":0.8581560284,"strict_keywords_category":0.7067669173,"strict_language_category":1.0,"strict_length_constraints_category":0.6754385965,"strict_punctuation_category":0.8644067797,"strict_startend_category":0.7192982456,"loose_combination_category":0.5873015873,"loose_detectable_content_category":0.7435897436,"loose_detectable_format_category":0.8723404255,"loose_keywords_category":0.7518796992,"loose_language_category":1.0,"loose_length_constraints_category":0.7719298246,"loose_punctuation_category":0.8813559322,"loose_startend_category":0.7719298246}
32
  {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","strict_prompt_accuracy":0.6542669584,"strict_instruction_accuracy":0.7474302496,"loose_prompt_accuracy":0.7089715536,"loose_instruction_accuracy":0.7900146843,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8260869565,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.6689655172,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.8412698413,"loose_combination_category":0.6461538462,"loose_detectable_content_category":0.8260869565,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.7655172414,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6991869919,"loose_punctuation_category":0.7868852459,"loose_startend_category":0.873015873}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.6498905908,"strict_instruction_accuracy":0.7444933921,"loose_prompt_accuracy":0.7111597374,"loose_instruction_accuracy":0.7973568282,"strict_combination_category":0.3692307692,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.7891156463,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.6585365854,"strict_punctuation_category":0.8524590164,"strict_startend_category":0.9206349206,"loose_combination_category":0.4769230769,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.7959183673,"loose_keywords_category":0.7586206897,"loose_language_category":1.0,"loose_length_constraints_category":0.7886178862,"loose_punctuation_category":0.9180327869,"loose_startend_category":0.9523809524}
34
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6652078775,"strict_instruction_accuracy":0.7444933921,"loose_prompt_accuracy":0.7177242888,"loose_instruction_accuracy":0.7914831131,"strict_combination_category":0.5076923077,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8299319728,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7540983607,"strict_startend_category":0.7301587302,"loose_combination_category":0.5538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8367346939,"loose_keywords_category":0.7931034483,"loose_language_category":1.0,"loose_length_constraints_category":0.7804878049,"loose_punctuation_category":0.8032786885,"loose_startend_category":0.7619047619}
 
35
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","strict_prompt_accuracy":0.6208425721,"strict_instruction_accuracy":0.7125925926,"loose_prompt_accuracy":0.6518847007,"loose_instruction_accuracy":0.7392592593,"strict_combination_category":0.65625,"strict_detectable_content_category":0.8043478261,"strict_detectable_format_category":0.8333333333,"strict_keywords_category":0.7342657343,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.5365853659,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.5873015873,"loose_combination_category":0.65625,"loose_detectable_content_category":0.8043478261,"loose_detectable_format_category":0.8402777778,"loose_keywords_category":0.7692307692,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.593495935,"loose_punctuation_category":0.7540983607,"loose_startend_category":0.6349206349}
36
  {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6114790287,"strict_instruction_accuracy":0.7007407407,"loose_prompt_accuracy":0.6379690949,"loose_instruction_accuracy":0.7274074074,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8,"strict_detectable_format_category":0.801369863,"strict_keywords_category":0.7872340426,"strict_language_category":0.6774193548,"strict_length_constraints_category":0.6422764228,"strict_punctuation_category":0.6229508197,"strict_startend_category":0.5238095238,"loose_combination_category":0.5846153846,"loose_detectable_content_category":0.8,"loose_detectable_format_category":0.8287671233,"loose_keywords_category":0.8156028369,"loose_language_category":0.6774193548,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.6393442623,"loose_startend_category":0.5238095238}
37
  {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6126914661,"strict_instruction_accuracy":0.6989720999,"loose_prompt_accuracy":0.6805251641,"loose_instruction_accuracy":0.7547723935,"strict_combination_category":0.5230769231,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.593495935,"strict_punctuation_category":0.2950819672,"strict_startend_category":0.873015873,"loose_combination_category":0.6307692308,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8979591837,"loose_keywords_category":0.7379310345,"loose_language_category":1.0,"loose_length_constraints_category":0.7317073171,"loose_punctuation_category":0.3606557377,"loose_startend_category":0.8888888889}
38
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","strict_prompt_accuracy":0.5898617512,"strict_instruction_accuracy":0.6801242236,"loose_prompt_accuracy":0.6520737327,"loose_instruction_accuracy":0.7313664596,"strict_combination_category":0.606557377,"strict_detectable_content_category":0.9285714286,"strict_detectable_format_category":0.8028169014,"strict_keywords_category":0.7338129496,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.5840707965,"strict_punctuation_category":0.1964285714,"strict_startend_category":0.6833333333,"loose_combination_category":0.6393442623,"loose_detectable_content_category":0.9285714286,"loose_detectable_format_category":0.8169014085,"loose_keywords_category":0.7625899281,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6814159292,"loose_punctuation_category":0.3392857143,"loose_startend_category":0.7833333333}
 
39
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4835164835,"strict_instruction_accuracy":0.5790251108,"loose_prompt_accuracy":0.5384615385,"loose_instruction_accuracy":0.6203840473,"strict_combination_category":0.3384615385,"strict_detectable_content_category":0.6444444444,"strict_detectable_format_category":0.7414965986,"strict_keywords_category":0.7062937063,"strict_language_category":0.6451612903,"strict_length_constraints_category":0.6260162602,"strict_punctuation_category":0.1333333333,"strict_startend_category":0.4126984127,"loose_combination_category":0.4,"loose_detectable_content_category":0.6444444444,"loose_detectable_format_category":0.7551020408,"loose_keywords_category":0.7412587413,"loose_language_category":0.6451612903,"loose_length_constraints_category":0.6910569106,"loose_punctuation_category":0.2166666667,"loose_startend_category":0.4761904762}
40
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4376367615,"strict_instruction_accuracy":0.5447870778,"loose_prompt_accuracy":0.4748358862,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.1384615385,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.768707483,"strict_keywords_category":0.5862068966,"strict_language_category":0.8709677419,"strict_length_constraints_category":0.5853658537,"strict_punctuation_category":0.1803278689,"strict_startend_category":0.3015873016,"loose_combination_category":0.1384615385,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.768707483,"loose_keywords_category":0.6413793103,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6829268293,"loose_punctuation_category":0.2295081967,"loose_startend_category":0.3174603175}
41
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","strict_prompt_accuracy":0.4245076586,"strict_instruction_accuracy":0.5330396476,"loose_prompt_accuracy":0.4792122538,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.2461538462,"strict_detectable_content_category":0.5217391304,"strict_detectable_format_category":0.6258503401,"strict_keywords_category":0.6137931034,"strict_language_category":0.5806451613,"strict_length_constraints_category":0.5772357724,"strict_punctuation_category":0.5409836066,"strict_startend_category":0.3174603175,"loose_combination_category":0.3230769231,"loose_detectable_content_category":0.5217391304,"loose_detectable_format_category":0.6462585034,"loose_keywords_category":0.6551724138,"loose_language_category":0.5806451613,"loose_length_constraints_category":0.674796748,"loose_punctuation_category":0.606557377,"loose_startend_category":0.3650793651}
 
15
  {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8340675477,"loose_prompt_accuracy":0.7986870897,"loose_instruction_accuracy":0.8575624082,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.847826087,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8344827586,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7154471545,"strict_punctuation_category":0.9180327869,"strict_startend_category":0.9523809524,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.847826087,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.8827586207,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.756097561,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9523809524}
16
  {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","strict_prompt_accuracy":0.7662037037,"strict_instruction_accuracy":0.8325508607,"loose_prompt_accuracy":0.8078703704,"loose_instruction_accuracy":0.8638497653,"strict_combination_category":0.6349206349,"strict_detectable_content_category":0.8837209302,"strict_detectable_format_category":0.9136690647,"strict_keywords_category":0.7954545455,"strict_language_category":0.9655172414,"strict_length_constraints_category":0.7192982456,"strict_punctuation_category":0.9655172414,"strict_startend_category":0.9180327869,"loose_combination_category":0.7936507937,"loose_detectable_content_category":0.8837209302,"loose_detectable_format_category":0.928057554,"loose_keywords_category":0.8257575758,"loose_language_category":0.9655172414,"loose_length_constraints_category":0.7543859649,"loose_punctuation_category":0.9655172414,"loose_startend_category":0.9180327869}
17
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7702407002,"strict_instruction_accuracy":0.8311306902,"loose_prompt_accuracy":0.8205689278,"loose_instruction_accuracy":0.8693098385,"strict_combination_category":0.7846153846,"strict_detectable_content_category":1.0,"strict_detectable_format_category":0.8503401361,"strict_keywords_category":0.8,"strict_language_category":1.0,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.8888888889,"loose_combination_category":0.8153846154,"loose_detectable_content_category":1.0,"loose_detectable_format_category":0.8639455782,"loose_keywords_category":0.8551724138,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9365079365}
 
18
  {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8140043764,"loose_instruction_accuracy":0.8649045521,"strict_combination_category":0.8,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7793103448,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.8360655738,"strict_startend_category":0.9206349206,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9365079365}
19
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","strict_prompt_accuracy":0.7636761488,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8052516411,"loose_instruction_accuracy":0.8634361233,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8095238095,"strict_keywords_category":0.7931034483,"strict_language_category":1.0,"strict_length_constraints_category":0.7886178862,"strict_punctuation_category":0.9836065574,"strict_startend_category":0.873015873,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.875862069,"loose_language_category":1.0,"loose_length_constraints_category":0.8780487805,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.873015873}
20
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.761487965,"strict_instruction_accuracy":0.825256975,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.8413793103,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6097560976,"strict_punctuation_category":1.0,"strict_startend_category":0.8888888889,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.875862069,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":1.0,"loose_startend_category":0.9206349206}
21
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"120000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7488986784,"strict_instruction_accuracy":0.8195266272,"loose_prompt_accuracy":0.795154185,"loose_instruction_accuracy":0.8565088757,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8843537415,"strict_keywords_category":0.829787234,"strict_language_category":1.0,"strict_length_constraints_category":0.7786885246,"strict_punctuation_category":0.5573770492,"strict_startend_category":0.8888888889,"loose_combination_category":0.7384615385,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.8794326241,"loose_language_category":1.0,"loose_length_constraints_category":0.8360655738,"loose_punctuation_category":0.6557377049,"loose_startend_category":0.9047619048}
22
  {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","strict_prompt_accuracy":0.7396061269,"strict_instruction_accuracy":0.8149779736,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7586206897,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7479674797,"strict_punctuation_category":0.8032786885,"strict_startend_category":0.8888888889,"loose_combination_category":0.6923076923,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.8911564626,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.8292682927,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.9047619048}
 
24
  {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","strict_prompt_accuracy":0.7342342342,"strict_instruction_accuracy":0.803030303,"loose_prompt_accuracy":0.786036036,"loose_instruction_accuracy":0.846969697,"strict_combination_category":0.625,"strict_detectable_content_category":0.9111111111,"strict_detectable_format_category":0.8951048951,"strict_keywords_category":0.7785714286,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6140350877,"strict_punctuation_category":0.9333333333,"strict_startend_category":0.8888888889,"loose_combination_category":0.6875,"loose_detectable_content_category":0.9111111111,"loose_detectable_format_category":0.9090909091,"loose_keywords_category":0.85,"loose_language_category":1.0,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":0.95,"loose_startend_category":0.9682539683}
25
  {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7308533917,"strict_instruction_accuracy":0.8017621145,"loose_prompt_accuracy":0.772428884,"loose_instruction_accuracy":0.8355359765,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.843537415,"strict_keywords_category":0.7379310345,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9016393443,"strict_startend_category":0.9365079365,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.7793103448,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7398373984,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9365079365}
26
  {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https_google.com","parameters_count":"109000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7083333333,"strict_instruction_accuracy":0.7897058824,"loose_prompt_accuracy":0.7280701754,"loose_instruction_accuracy":0.8058823529,"strict_combination_category":0.5076923077,"strict_detectable_content_category":0.847826087,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.7916666667,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.674796748,"strict_punctuation_category":0.868852459,"strict_startend_category":0.8888888889,"loose_combination_category":0.5230769231,"loose_detectable_content_category":0.847826087,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.8263888889,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7073170732,"loose_punctuation_category":0.868852459,"loose_startend_category":0.8888888889}
 
27
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","strict_prompt_accuracy":0.6717724289,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7264770241,"loose_instruction_accuracy":0.798825257,"strict_combination_category":0.5384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.9047619048,"strict_keywords_category":0.7172413793,"strict_language_category":0.8387096774,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.8196721311,"strict_startend_category":0.7142857143,"loose_combination_category":0.5692307692,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.7931034483,"loose_language_category":0.8709677419,"loose_length_constraints_category":0.7235772358,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.746031746}
28
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.6827133479,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7199124726,"loose_instruction_accuracy":0.7885462555,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8027210884,"strict_keywords_category":0.7793103448,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7704918033,"strict_startend_category":0.8095238095,"loose_combination_category":0.6153846154,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.8344827586,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.8412698413}
29
  {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6644144144,"strict_instruction_accuracy":0.7526555387,"loose_prompt_accuracy":0.7274774775,"loose_instruction_accuracy":0.7996965099,"strict_combination_category":0.6153846154,"strict_detectable_content_category":0.8444444444,"strict_detectable_format_category":0.8014184397,"strict_keywords_category":0.7642857143,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.641025641,"strict_punctuation_category":0.7333333333,"strict_startend_category":0.8166666667,"loose_combination_category":0.6615384615,"loose_detectable_content_category":0.8444444444,"loose_detectable_format_category":0.8156028369,"loose_keywords_category":0.8071428571,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7606837607,"loose_punctuation_category":0.8,"loose_startend_category":0.85}
30
  {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6527472527,"strict_instruction_accuracy":0.7507374631,"loose_prompt_accuracy":0.7076923077,"loose_instruction_accuracy":0.7890855457,"strict_combination_category":0.6984126984,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8287671233,"strict_keywords_category":0.7862068966,"strict_language_category":0.935483871,"strict_length_constraints_category":0.6585365854,"strict_punctuation_category":0.4918032787,"strict_startend_category":0.873015873,"loose_combination_category":0.7777777778,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8356164384,"loose_keywords_category":0.8344827586,"loose_language_category":0.935483871,"loose_length_constraints_category":0.7317073171,"loose_punctuation_category":0.5409836066,"loose_startend_category":0.8888888889}
31
  {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","strict_prompt_accuracy":0.6674418605,"strict_instruction_accuracy":0.7503924647,"loose_prompt_accuracy":0.7139534884,"loose_instruction_accuracy":0.7912087912,"strict_combination_category":0.5396825397,"strict_detectable_content_category":0.7435897436,"strict_detectable_format_category":0.8581560284,"strict_keywords_category":0.7067669173,"strict_language_category":1.0,"strict_length_constraints_category":0.6754385965,"strict_punctuation_category":0.8644067797,"strict_startend_category":0.7192982456,"loose_combination_category":0.5873015873,"loose_detectable_content_category":0.7435897436,"loose_detectable_format_category":0.8723404255,"loose_keywords_category":0.7518796992,"loose_language_category":1.0,"loose_length_constraints_category":0.7719298246,"loose_punctuation_category":0.8813559322,"loose_startend_category":0.7719298246}
32
  {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","strict_prompt_accuracy":0.6542669584,"strict_instruction_accuracy":0.7474302496,"loose_prompt_accuracy":0.7089715536,"loose_instruction_accuracy":0.7900146843,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8260869565,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.6689655172,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.8412698413,"loose_combination_category":0.6461538462,"loose_detectable_content_category":0.8260869565,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.7655172414,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6991869919,"loose_punctuation_category":0.7868852459,"loose_startend_category":0.873015873}
 
33
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6652078775,"strict_instruction_accuracy":0.7444933921,"loose_prompt_accuracy":0.7177242888,"loose_instruction_accuracy":0.7914831131,"strict_combination_category":0.5076923077,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8299319728,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7540983607,"strict_startend_category":0.7301587302,"loose_combination_category":0.5538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8367346939,"loose_keywords_category":0.7931034483,"loose_language_category":1.0,"loose_length_constraints_category":0.7804878049,"loose_punctuation_category":0.8032786885,"loose_startend_category":0.7619047619}
34
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.6498905908,"strict_instruction_accuracy":0.7444933921,"loose_prompt_accuracy":0.7111597374,"loose_instruction_accuracy":0.7973568282,"strict_combination_category":0.3692307692,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.7891156463,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.6585365854,"strict_punctuation_category":0.8524590164,"strict_startend_category":0.9206349206,"loose_combination_category":0.4769230769,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.7959183673,"loose_keywords_category":0.7586206897,"loose_language_category":1.0,"loose_length_constraints_category":0.7886178862,"loose_punctuation_category":0.9180327869,"loose_startend_category":0.9523809524}
35
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","strict_prompt_accuracy":0.6208425721,"strict_instruction_accuracy":0.7125925926,"loose_prompt_accuracy":0.6518847007,"loose_instruction_accuracy":0.7392592593,"strict_combination_category":0.65625,"strict_detectable_content_category":0.8043478261,"strict_detectable_format_category":0.8333333333,"strict_keywords_category":0.7342657343,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.5365853659,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.5873015873,"loose_combination_category":0.65625,"loose_detectable_content_category":0.8043478261,"loose_detectable_format_category":0.8402777778,"loose_keywords_category":0.7692307692,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.593495935,"loose_punctuation_category":0.7540983607,"loose_startend_category":0.6349206349}
36
  {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6114790287,"strict_instruction_accuracy":0.7007407407,"loose_prompt_accuracy":0.6379690949,"loose_instruction_accuracy":0.7274074074,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8,"strict_detectable_format_category":0.801369863,"strict_keywords_category":0.7872340426,"strict_language_category":0.6774193548,"strict_length_constraints_category":0.6422764228,"strict_punctuation_category":0.6229508197,"strict_startend_category":0.5238095238,"loose_combination_category":0.5846153846,"loose_detectable_content_category":0.8,"loose_detectable_format_category":0.8287671233,"loose_keywords_category":0.8156028369,"loose_language_category":0.6774193548,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.6393442623,"loose_startend_category":0.5238095238}
37
  {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6126914661,"strict_instruction_accuracy":0.6989720999,"loose_prompt_accuracy":0.6805251641,"loose_instruction_accuracy":0.7547723935,"strict_combination_category":0.5230769231,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.593495935,"strict_punctuation_category":0.2950819672,"strict_startend_category":0.873015873,"loose_combination_category":0.6307692308,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8979591837,"loose_keywords_category":0.7379310345,"loose_language_category":1.0,"loose_length_constraints_category":0.7317073171,"loose_punctuation_category":0.3606557377,"loose_startend_category":0.8888888889}
38
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","strict_prompt_accuracy":0.5898617512,"strict_instruction_accuracy":0.6801242236,"loose_prompt_accuracy":0.6520737327,"loose_instruction_accuracy":0.7313664596,"strict_combination_category":0.606557377,"strict_detectable_content_category":0.9285714286,"strict_detectable_format_category":0.8028169014,"strict_keywords_category":0.7338129496,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.5840707965,"strict_punctuation_category":0.1964285714,"strict_startend_category":0.6833333333,"loose_combination_category":0.6393442623,"loose_detectable_content_category":0.9285714286,"loose_detectable_format_category":0.8169014085,"loose_keywords_category":0.7625899281,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6814159292,"loose_punctuation_category":0.3392857143,"loose_startend_category":0.7833333333}
39
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8000000000","source_type":"Open-Source","strict_prompt_accuracy":0.5185995624,"strict_instruction_accuracy":0.6035242291,"loose_prompt_accuracy":0.5623632385,"loose_instruction_accuracy":0.6446402349,"strict_combination_category":0.4153846154,"strict_detectable_content_category":0.5434782609,"strict_detectable_format_category":0.8095238095,"strict_keywords_category":0.6689655172,"strict_language_category":0.935483871,"strict_length_constraints_category":0.5203252033,"strict_punctuation_category":0.393442623,"strict_startend_category":0.4126984127,"loose_combination_category":0.4615384615,"loose_detectable_content_category":0.5434782609,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.7034482759,"loose_language_category":0.935483871,"loose_length_constraints_category":0.593495935,"loose_punctuation_category":0.4918032787,"loose_startend_category":0.4920634921}
40
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4835164835,"strict_instruction_accuracy":0.5790251108,"loose_prompt_accuracy":0.5384615385,"loose_instruction_accuracy":0.6203840473,"strict_combination_category":0.3384615385,"strict_detectable_content_category":0.6444444444,"strict_detectable_format_category":0.7414965986,"strict_keywords_category":0.7062937063,"strict_language_category":0.6451612903,"strict_length_constraints_category":0.6260162602,"strict_punctuation_category":0.1333333333,"strict_startend_category":0.4126984127,"loose_combination_category":0.4,"loose_detectable_content_category":0.6444444444,"loose_detectable_format_category":0.7551020408,"loose_keywords_category":0.7412587413,"loose_language_category":0.6451612903,"loose_length_constraints_category":0.6910569106,"loose_punctuation_category":0.2166666667,"loose_startend_category":0.4761904762}
41
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4376367615,"strict_instruction_accuracy":0.5447870778,"loose_prompt_accuracy":0.4748358862,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.1384615385,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.768707483,"strict_keywords_category":0.5862068966,"strict_language_category":0.8709677419,"strict_length_constraints_category":0.5853658537,"strict_punctuation_category":0.1803278689,"strict_startend_category":0.3015873016,"loose_combination_category":0.1384615385,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.768707483,"loose_keywords_category":0.6413793103,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6829268293,"loose_punctuation_category":0.2295081967,"loose_startend_category":0.3174603175}
42
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","strict_prompt_accuracy":0.4245076586,"strict_instruction_accuracy":0.5330396476,"loose_prompt_accuracy":0.4792122538,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.2461538462,"strict_detectable_content_category":0.5217391304,"strict_detectable_format_category":0.6258503401,"strict_keywords_category":0.6137931034,"strict_language_category":0.5806451613,"strict_length_constraints_category":0.5772357724,"strict_punctuation_category":0.5409836066,"strict_startend_category":0.3174603175,"loose_combination_category":0.3230769231,"loose_detectable_content_category":0.5217391304,"loose_detectable_format_category":0.6462585034,"loose_keywords_category":0.6551724138,"loose_language_category":0.5806451613,"loose_length_constraints_category":0.674796748,"loose_punctuation_category":0.606557377,"loose_startend_category":0.3650793651}
leaderboard/boards_data/keyword-extraction_SynKeywords.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2115068728,"keyword-extraction_SynKeywords_precision_mean":0.1912410205,"keyword-extraction_SynKeywords_recall_mean":0.2483695652,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1297217026,"keyword-extraction_SynKeywords_precision_mean":0.1052290945,"keyword-extraction_SynKeywords_recall_mean":0.1816123188,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.229921048,"keyword-extraction_SynKeywords_precision_mean":0.21147343,"keyword-extraction_SynKeywords_recall_mean":0.2634963768,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1791474425,"keyword-extraction_SynKeywords_precision_mean":0.151358639,"keyword-extraction_SynKeywords_recall_mean":0.2382246377,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1547160783,"keyword-extraction_SynKeywords_precision_mean":0.1275089966,"keyword-extraction_SynKeywords_recall_mean":0.2111413043,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1186662307,"keyword-extraction_SynKeywords_precision_mean":0.1013265485,"keyword-extraction_SynKeywords_recall_mean":0.1581521739,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1543389439,"keyword-extraction_SynKeywords_precision_mean":0.1301371778,"keyword-extraction_SynKeywords_recall_mean":0.2038949275,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2645652392,"keyword-extraction_SynKeywords_precision_mean":0.2349391249,"keyword-extraction_SynKeywords_recall_mean":0.3166666667,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1217550899,"keyword-extraction_SynKeywords_precision_mean":0.1020894964,"keyword-extraction_SynKeywords_recall_mean":0.1608695652,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3352048805,"keyword-extraction_SynKeywords_precision_mean":0.2914121808,"keyword-extraction_SynKeywords_recall_mean":0.4166666667,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1876880333,"keyword-extraction_SynKeywords_precision_mean":0.1557665099,"keyword-extraction_SynKeywords_recall_mean":0.2577898551,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2384077673,"keyword-extraction_SynKeywords_precision_mean":0.2041836259,"keyword-extraction_SynKeywords_recall_mean":0.3015398551,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.233766167,"keyword-extraction_SynKeywords_precision_mean":0.1893302534,"keyword-extraction_SynKeywords_recall_mean":0.3297101449,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1369232983,"keyword-extraction_SynKeywords_precision_mean":0.1117212542,"keyword-extraction_SynKeywords_recall_mean":0.1863224638,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0169533238,"keyword-extraction_SynKeywords_precision_mean":0.015422274,"keyword-extraction_SynKeywords_recall_mean":0.0206521739,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2568096145,"keyword-extraction_SynKeywords_precision_mean":0.2483731877,"keyword-extraction_SynKeywords_recall_mean":0.2765873016,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1942845429,"keyword-extraction_SynKeywords_precision_mean":0.168197784,"keyword-extraction_SynKeywords_recall_mean":0.2451992754,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1409784417,"keyword-extraction_SynKeywords_precision_mean":0.1216706248,"keyword-extraction_SynKeywords_recall_mean":0.1832427536,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.261926093,"keyword-extraction_SynKeywords_precision_mean":0.2173028298,"keyword-extraction_SynKeywords_recall_mean":0.3492753623,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2175605056,"keyword-extraction_SynKeywords_precision_mean":0.1768294437,"keyword-extraction_SynKeywords_recall_mean":0.3029891304,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2097414246,"keyword-extraction_SynKeywords_precision_mean":0.1802822781,"keyword-extraction_SynKeywords_recall_mean":0.2621376812,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2199224821,"keyword-extraction_SynKeywords_precision_mean":0.1924904051,"keyword-extraction_SynKeywords_recall_mean":0.2695652174,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2876907753,"keyword-extraction_SynKeywords_precision_mean":0.2733133111,"keyword-extraction_SynKeywords_recall_mean":0.322192029,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1856116909,"keyword-extraction_SynKeywords_precision_mean":0.157770465,"keyword-extraction_SynKeywords_recall_mean":0.2412137681,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1555238683,"keyword-extraction_SynKeywords_precision_mean":0.1317069998,"keyword-extraction_SynKeywords_recall_mean":0.2076992754,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2783400189,"keyword-extraction_SynKeywords_precision_mean":0.2250927598,"keyword-extraction_SynKeywords_recall_mean":0.3842391304,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1901147976,"keyword-extraction_SynKeywords_precision_mean":0.1676428493,"keyword-extraction_SynKeywords_recall_mean":0.2307065217,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.205802897,"keyword-extraction_SynKeywords_precision_mean":0.1860666658,"keyword-extraction_SynKeywords_recall_mean":0.2421195652,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1986622723,"keyword-extraction_SynKeywords_precision_mean":0.1812999953,"keyword-extraction_SynKeywords_recall_mean":0.2295289855,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1470867235,"keyword-extraction_SynKeywords_precision_mean":0.1387418439,"keyword-extraction_SynKeywords_recall_mean":0.1666666667,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0012077295,"keyword-extraction_SynKeywords_precision_mean":0.0013586957,"keyword-extraction_SynKeywords_recall_mean":0.0010869565,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.0888958039,"keyword-extraction_SynKeywords_precision_mean":0.0717122112,"keyword-extraction_SynKeywords_recall_mean":0.1270833333,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1690961,"keyword-extraction_SynKeywords_precision_mean":0.1495665943,"keyword-extraction_SynKeywords_recall_mean":0.2049818841,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2375719719,"keyword-extraction_SynKeywords_precision_mean":0.1924020695,"keyword-extraction_SynKeywords_recall_mean":0.329076087,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2080704644,"keyword-extraction_SynKeywords_precision_mean":0.1673321849,"keyword-extraction_SynKeywords_recall_mean":0.294384058,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2510623051,"keyword-extraction_SynKeywords_precision_mean":0.1899292026,"keyword-extraction_SynKeywords_recall_mean":0.4099637681,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2160808904,"keyword-extraction_SynKeywords_precision_mean":0.1901842722,"keyword-extraction_SynKeywords_recall_mean":0.2683876812,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3267802104,"keyword-extraction_SynKeywords_precision_mean":0.2985915051,"keyword-extraction_SynKeywords_recall_mean":0.3825181159,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2600489896,"keyword-extraction_SynKeywords_precision_mean":0.2150796745,"keyword-extraction_SynKeywords_recall_mean":0.3497282609,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0224485659,"keyword-extraction_SynKeywords_precision_mean":0.0230331263,"keyword-extraction_SynKeywords_recall_mean":0.022826087,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0,"keyword-extraction_SynKeywords_precision_mean":0.0,"keyword-extraction_SynKeywords_recall_mean":0.0,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1559734933,"keyword-extraction_SynKeywords_precision_mean":0.1449240072,"keyword-extraction_SynKeywords_recall_mean":0.1766304348,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0,"keyword-extraction_SynKeywords_precision_mean":0.0,"keyword-extraction_SynKeywords_recall_mean":0.0,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1690961,"keyword-extraction_SynKeywords_precision_mean":0.1495665943,"keyword-extraction_SynKeywords_recall_mean":0.2049818841,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0012077295,"keyword-extraction_SynKeywords_precision_mean":0.0013586957,"keyword-extraction_SynKeywords_recall_mean":0.0010869565,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.205802897,"keyword-extraction_SynKeywords_precision_mean":0.1860666658,"keyword-extraction_SynKeywords_recall_mean":0.2421195652,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3267802104,"keyword-extraction_SynKeywords_precision_mean":0.2985915051,"keyword-extraction_SynKeywords_recall_mean":0.3825181159,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0169533238,"keyword-extraction_SynKeywords_precision_mean":0.015422274,"keyword-extraction_SynKeywords_recall_mean":0.0206521739,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.233766167,"keyword-extraction_SynKeywords_precision_mean":0.1893302534,"keyword-extraction_SynKeywords_recall_mean":0.3297101449,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1369232983,"keyword-extraction_SynKeywords_precision_mean":0.1117212542,"keyword-extraction_SynKeywords_recall_mean":0.1863224638,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1986622723,"keyword-extraction_SynKeywords_precision_mean":0.1812999953,"keyword-extraction_SynKeywords_recall_mean":0.2295289855,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2375719719,"keyword-extraction_SynKeywords_precision_mean":0.1924020695,"keyword-extraction_SynKeywords_recall_mean":0.329076087,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1559734933,"keyword-extraction_SynKeywords_precision_mean":0.1449240072,"keyword-extraction_SynKeywords_recall_mean":0.1766304348,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1186662307,"keyword-extraction_SynKeywords_precision_mean":0.1013265485,"keyword-extraction_SynKeywords_recall_mean":0.1581521739,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2600489896,"keyword-extraction_SynKeywords_precision_mean":0.2150796745,"keyword-extraction_SynKeywords_recall_mean":0.3497282609,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2510623051,"keyword-extraction_SynKeywords_precision_mean":0.1899292026,"keyword-extraction_SynKeywords_recall_mean":0.4099637681,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2783400189,"keyword-extraction_SynKeywords_precision_mean":0.2250927598,"keyword-extraction_SynKeywords_recall_mean":0.3842391304,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0224485659,"keyword-extraction_SynKeywords_precision_mean":0.0230331263,"keyword-extraction_SynKeywords_recall_mean":0.022826087,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2983432135,"keyword-extraction_SynKeywords_precision_mean":0.2444159026,"keyword-extraction_SynKeywords_recall_mean":0.4067934783,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1942845429,"keyword-extraction_SynKeywords_precision_mean":0.168197784,"keyword-extraction_SynKeywords_recall_mean":0.2451992754,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1297217026,"keyword-extraction_SynKeywords_precision_mean":0.1052290945,"keyword-extraction_SynKeywords_recall_mean":0.1816123188,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3352048805,"keyword-extraction_SynKeywords_precision_mean":0.2914121808,"keyword-extraction_SynKeywords_recall_mean":0.4166666667,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2199224821,"keyword-extraction_SynKeywords_precision_mean":0.1924904051,"keyword-extraction_SynKeywords_recall_mean":0.2695652174,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2080704644,"keyword-extraction_SynKeywords_precision_mean":0.1673321849,"keyword-extraction_SynKeywords_recall_mean":0.294384058,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2160808904,"keyword-extraction_SynKeywords_precision_mean":0.1901842722,"keyword-extraction_SynKeywords_recall_mean":0.2683876812,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2115068728,"keyword-extraction_SynKeywords_precision_mean":0.1912410205,"keyword-extraction_SynKeywords_recall_mean":0.2483695652,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1791474425,"keyword-extraction_SynKeywords_precision_mean":0.151358639,"keyword-extraction_SynKeywords_recall_mean":0.2382246377,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1543389439,"keyword-extraction_SynKeywords_precision_mean":0.1301371778,"keyword-extraction_SynKeywords_recall_mean":0.2038949275,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.229921048,"keyword-extraction_SynKeywords_precision_mean":0.21147343,"keyword-extraction_SynKeywords_recall_mean":0.2634963768,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0,"keyword-extraction_SynKeywords_precision_mean":0.0,"keyword-extraction_SynKeywords_recall_mean":0.0,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2568096145,"keyword-extraction_SynKeywords_precision_mean":0.2483731877,"keyword-extraction_SynKeywords_recall_mean":0.2765873016,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1217550899,"keyword-extraction_SynKeywords_precision_mean":0.1020894964,"keyword-extraction_SynKeywords_recall_mean":0.1608695652,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1901147976,"keyword-extraction_SynKeywords_precision_mean":0.1676428493,"keyword-extraction_SynKeywords_recall_mean":0.2307065217,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1470867235,"keyword-extraction_SynKeywords_precision_mean":0.1387418439,"keyword-extraction_SynKeywords_recall_mean":0.1666666667,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1409784417,"keyword-extraction_SynKeywords_precision_mean":0.1216706248,"keyword-extraction_SynKeywords_recall_mean":0.1832427536,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1876880333,"keyword-extraction_SynKeywords_precision_mean":0.1557665099,"keyword-extraction_SynKeywords_recall_mean":0.2577898551,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.261926093,"keyword-extraction_SynKeywords_precision_mean":0.2173028298,"keyword-extraction_SynKeywords_recall_mean":0.3492753623,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.0888958039,"keyword-extraction_SynKeywords_precision_mean":0.0717122112,"keyword-extraction_SynKeywords_recall_mean":0.1270833333,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0,"keyword-extraction_SynKeywords_precision_mean":0.0,"keyword-extraction_SynKeywords_recall_mean":0.0,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1856116909,"keyword-extraction_SynKeywords_precision_mean":0.157770465,"keyword-extraction_SynKeywords_recall_mean":0.2412137681,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2175605056,"keyword-extraction_SynKeywords_precision_mean":0.1768294437,"keyword-extraction_SynKeywords_recall_mean":0.3029891304,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2384077673,"keyword-extraction_SynKeywords_precision_mean":0.2041836259,"keyword-extraction_SynKeywords_recall_mean":0.3015398551,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1547160783,"keyword-extraction_SynKeywords_precision_mean":0.1275089966,"keyword-extraction_SynKeywords_recall_mean":0.2111413043,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2645652392,"keyword-extraction_SynKeywords_precision_mean":0.2349391249,"keyword-extraction_SynKeywords_recall_mean":0.3166666667,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2876907753,"keyword-extraction_SynKeywords_precision_mean":0.2733133111,"keyword-extraction_SynKeywords_recall_mean":0.322192029,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2097414246,"keyword-extraction_SynKeywords_precision_mean":0.1802822781,"keyword-extraction_SynKeywords_recall_mean":0.2621376812,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1555238683,"keyword-extraction_SynKeywords_precision_mean":0.1317069998,"keyword-extraction_SynKeywords_recall_mean":0.2076992754,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/mt_bench.jsonl CHANGED
@@ -37,6 +37,7 @@
37
  {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","score_w_mean":6.5991189427,"score_mean":6.6075268817,"writing_score_w_mean":6.375,"writing_score_mean":6.2037037037,"roleplay_score_w_mean":6.5263157895,"roleplay_score_mean":6.2738095238,"reasoning_score_w_mean":6.2142857143,"reasoning_score_mean":6.1944444444,"math_score_w_mean":8.652173913,"math_score_mean":8.45,"coding_score_w_mean":5.95,"coding_score_mean":5.95,"extraction_score_w_mean":7.15,"extraction_score_mean":7.15,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":7.5555555556,"humanities_score_mean":7.5555555556,"persian_general_knowledge_score_w_mean":1.08,"persian_general_knowledge_score_mean":1.1111111111,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.25}
38
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","score_w_mean":6.0816326531,"score_mean":6.0908333333,"writing_score_w_mean":7.8148148148,"writing_score_mean":7.925,"roleplay_score_w_mean":6.6071428571,"roleplay_score_mean":6.275,"reasoning_score_w_mean":3.8666666667,"reasoning_score_mean":3.7416666667,"math_score_w_mean":3.652173913,"math_score_mean":3.475,"coding_score_w_mean":5.2,"coding_score_mean":5.2,"extraction_score_w_mean":5.95,"extraction_score_mean":5.95,"stem_score_w_mean":6.85,"stem_score_mean":6.85,"humanities_score_w_mean":8.45,"humanities_score_mean":8.45,"persian_general_knowledge_score_w_mean":3.2962962963,"persian_general_knowledge_score_mean":3.8583333333,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1833333333}
39
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","score_w_mean":5.6592920354,"score_mean":5.5994623656,"writing_score_w_mean":5.5416666667,"writing_score_mean":5.1944444444,"roleplay_score_w_mean":5.4230769231,"roleplay_score_mean":4.962962963,"reasoning_score_w_mean":3.6923076923,"reasoning_score_mean":3.9351851852,"math_score_w_mean":7.9565217391,"math_score_mean":7.9416666667,"coding_score_w_mean":6.3888888889,"coding_score_mean":6.3888888889,"extraction_score_w_mean":5.85,"extraction_score_mean":5.85,"stem_score_w_mean":4.85,"stem_score_mean":4.85,"humanities_score_w_mean":5.6111111111,"humanities_score_mean":5.6111111111,"persian_general_knowledge_score_w_mean":1.0476190476,"persian_general_knowledge_score_mean":1.0416666667,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
 
40
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","score_w_mean":4.3829787234,"score_mean":4.3333333333,"writing_score_w_mean":5.5185185185,"writing_score_mean":5.3083333333,"roleplay_score_w_mean":5.6428571429,"roleplay_score_mean":5.2916666667,"reasoning_score_w_mean":2.7666666667,"reasoning_score_mean":2.925,"math_score_w_mean":4.2857142857,"math_score_mean":4.4444444444,"coding_score_w_mean":3.35,"coding_score_mean":3.35,"extraction_score_w_mean":2.3333333333,"extraction_score_mean":2.3333333333,"stem_score_w_mean":4.6875,"stem_score_mean":4.6875,"humanities_score_w_mean":7.1111111111,"humanities_score_mean":7.1111111111,"persian_general_knowledge_score_w_mean":1.2222222222,"persian_general_knowledge_score_mean":1.2166666667,"chatbot_rag_score_w_mean":6.8333333333,"chatbot_rag_score_mean":6.825}
41
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","score_w_mean":3.9224806202,"score_mean":3.7564102564,"writing_score_w_mean":3.8333333333,"writing_score_mean":3.7261904762,"roleplay_score_w_mean":2.8571428571,"roleplay_score_mean":2.7333333333,"reasoning_score_w_mean":null,"reasoning_score_mean":null,"math_score_w_mean":null,"math_score_mean":null,"coding_score_w_mean":null,"coding_score_mean":null,"extraction_score_w_mean":4.1,"extraction_score_mean":4.1,"stem_score_w_mean":3.8125,"stem_score_mean":3.8125,"humanities_score_w_mean":2.6428571429,"humanities_score_mean":2.6428571429,"persian_general_knowledge_score_w_mean":1.1111111111,"persian_general_knowledge_score_mean":1.15,"chatbot_rag_score_w_mean":7.6,"chatbot_rag_score_mean":7.4583333333}
42
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","score_w_mean":3.6885245902,"score_mean":3.3982683983,"writing_score_w_mean":3.8461538462,"writing_score_mean":3.1166666667,"roleplay_score_w_mean":1.9375,"roleplay_score_mean":1.8333333333,"reasoning_score_w_mean":2.2173913043,"reasoning_score_mean":2.125,"math_score_w_mean":2.8260869565,"math_score_mean":2.675,"coding_score_w_mean":4.1666666667,"coding_score_mean":4.1666666667,"extraction_score_w_mean":3.2777777778,"extraction_score_mean":3.2777777778,"stem_score_w_mean":1.6666666667,"stem_score_mean":1.6666666667,"humanities_score_w_mean":2.125,"humanities_score_mean":2.125,"persian_general_knowledge_score_w_mean":1.0,"persian_general_knowledge_score_mean":1.0,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1333333333}
 
37
  {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","score_w_mean":6.5991189427,"score_mean":6.6075268817,"writing_score_w_mean":6.375,"writing_score_mean":6.2037037037,"roleplay_score_w_mean":6.5263157895,"roleplay_score_mean":6.2738095238,"reasoning_score_w_mean":6.2142857143,"reasoning_score_mean":6.1944444444,"math_score_w_mean":8.652173913,"math_score_mean":8.45,"coding_score_w_mean":5.95,"coding_score_mean":5.95,"extraction_score_w_mean":7.15,"extraction_score_mean":7.15,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":7.5555555556,"humanities_score_mean":7.5555555556,"persian_general_knowledge_score_w_mean":1.08,"persian_general_knowledge_score_mean":1.1111111111,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.25}
38
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","score_w_mean":6.0816326531,"score_mean":6.0908333333,"writing_score_w_mean":7.8148148148,"writing_score_mean":7.925,"roleplay_score_w_mean":6.6071428571,"roleplay_score_mean":6.275,"reasoning_score_w_mean":3.8666666667,"reasoning_score_mean":3.7416666667,"math_score_w_mean":3.652173913,"math_score_mean":3.475,"coding_score_w_mean":5.2,"coding_score_mean":5.2,"extraction_score_w_mean":5.95,"extraction_score_mean":5.95,"stem_score_w_mean":6.85,"stem_score_mean":6.85,"humanities_score_w_mean":8.45,"humanities_score_mean":8.45,"persian_general_knowledge_score_w_mean":3.2962962963,"persian_general_knowledge_score_mean":3.8583333333,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1833333333}
39
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","score_w_mean":5.6592920354,"score_mean":5.5994623656,"writing_score_w_mean":5.5416666667,"writing_score_mean":5.1944444444,"roleplay_score_w_mean":5.4230769231,"roleplay_score_mean":4.962962963,"reasoning_score_w_mean":3.6923076923,"reasoning_score_mean":3.9351851852,"math_score_w_mean":7.9565217391,"math_score_mean":7.9416666667,"coding_score_w_mean":6.3888888889,"coding_score_mean":6.3888888889,"extraction_score_w_mean":5.85,"extraction_score_mean":5.85,"stem_score_w_mean":4.85,"stem_score_mean":4.85,"humanities_score_w_mean":5.6111111111,"humanities_score_mean":5.6111111111,"persian_general_knowledge_score_w_mean":1.0476190476,"persian_general_knowledge_score_mean":1.0416666667,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
40
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8000000000","source_type":"Open-Source","score_w_mean":5.2326530612,"score_mean":5.2783333333,"writing_score_w_mean":4.6666666667,"writing_score_mean":4.675,"roleplay_score_w_mean":4.7142857143,"roleplay_score_mean":4.5166666667,"reasoning_score_w_mean":3.4666666667,"reasoning_score_mean":3.5916666667,"math_score_w_mean":7.6956521739,"math_score_mean":7.7833333333,"coding_score_w_mean":4.2,"coding_score_mean":4.2,"extraction_score_w_mean":6.05,"extraction_score_mean":6.05,"stem_score_w_mean":5.95,"stem_score_mean":5.95,"humanities_score_w_mean":5.7,"humanities_score_mean":5.7,"persian_general_knowledge_score_w_mean":1.2222222222,"persian_general_knowledge_score_mean":1.2666666667,"chatbot_rag_score_w_mean":9.0666666667,"chatbot_rag_score_mean":9.05}
41
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","score_w_mean":4.3829787234,"score_mean":4.3333333333,"writing_score_w_mean":5.5185185185,"writing_score_mean":5.3083333333,"roleplay_score_w_mean":5.6428571429,"roleplay_score_mean":5.2916666667,"reasoning_score_w_mean":2.7666666667,"reasoning_score_mean":2.925,"math_score_w_mean":4.2857142857,"math_score_mean":4.4444444444,"coding_score_w_mean":3.35,"coding_score_mean":3.35,"extraction_score_w_mean":2.3333333333,"extraction_score_mean":2.3333333333,"stem_score_w_mean":4.6875,"stem_score_mean":4.6875,"humanities_score_w_mean":7.1111111111,"humanities_score_mean":7.1111111111,"persian_general_knowledge_score_w_mean":1.2222222222,"persian_general_knowledge_score_mean":1.2166666667,"chatbot_rag_score_w_mean":6.8333333333,"chatbot_rag_score_mean":6.825}
42
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","score_w_mean":3.9224806202,"score_mean":3.7564102564,"writing_score_w_mean":3.8333333333,"writing_score_mean":3.7261904762,"roleplay_score_w_mean":2.8571428571,"roleplay_score_mean":2.7333333333,"reasoning_score_w_mean":null,"reasoning_score_mean":null,"math_score_w_mean":null,"math_score_mean":null,"coding_score_w_mean":null,"coding_score_mean":null,"extraction_score_w_mean":4.1,"extraction_score_mean":4.1,"stem_score_w_mean":3.8125,"stem_score_mean":3.8125,"humanities_score_w_mean":2.6428571429,"humanities_score_mean":2.6428571429,"persian_general_knowledge_score_w_mean":1.1111111111,"persian_general_knowledge_score_mean":1.15,"chatbot_rag_score_w_mean":7.6,"chatbot_rag_score_mean":7.4583333333}
43
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","score_w_mean":3.6885245902,"score_mean":3.3982683983,"writing_score_w_mean":3.8461538462,"writing_score_mean":3.1166666667,"roleplay_score_w_mean":1.9375,"roleplay_score_mean":1.8333333333,"reasoning_score_w_mean":2.2173913043,"reasoning_score_mean":2.125,"math_score_w_mean":2.8260869565,"math_score_mean":2.675,"coding_score_w_mean":4.1666666667,"coding_score_mean":4.1666666667,"extraction_score_w_mean":3.2777777778,"extraction_score_mean":3.2777777778,"stem_score_w_mean":1.6666666667,"stem_score_mean":1.6666666667,"humanities_score_w_mean":2.125,"humanities_score_mean":2.125,"persian_general_knowledge_score_w_mean":1.0,"persian_general_knowledge_score_mean":1.0,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1333333333}
leaderboard/boards_data/ner_arman.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.578306047,"ner_arman_precision_mean":0.5583631307,"ner_arman_recall_mean":0.6250099325,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","ner_arman_f1_mean":0.1239012808,"ner_arman_precision_mean":0.1171036949,"ner_arman_recall_mean":0.1388160509,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","ner_arman_f1_mean":0.3839211973,"ner_arman_precision_mean":0.3292326466,"ner_arman_recall_mean":0.5049662296,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0031613599,"ner_arman_precision_mean":0.0024235201,"ner_arman_recall_mean":0.0047675805,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","ner_arman_f1_mean":0.2955295201,"ner_arman_precision_mean":0.2736080368,"ner_arman_recall_mean":0.3462455304,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5087496797,"ner_arman_precision_mean":0.4749602702,"ner_arman_recall_mean":0.5891338896,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5238146835,"ner_arman_precision_mean":0.511958681,"ner_arman_recall_mean":0.5638855781,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","ner_arman_f1_mean":0.090559262,"ner_arman_precision_mean":0.0812673818,"ner_arman_recall_mean":0.1104290822,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","ner_arman_f1_mean":0.5030535945,"ner_arman_precision_mean":0.4617288155,"ner_arman_recall_mean":0.5952522845,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3097820535,"ner_arman_precision_mean":0.2833333333,"ner_arman_recall_mean":0.3710568137,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","ner_arman_f1_mean":0.4764396046,"ner_arman_precision_mean":0.4205999205,"ner_arman_recall_mean":0.5997417561,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5179514235,"ner_arman_precision_mean":0.478894943,"ner_arman_recall_mean":0.6093762416,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","ner_arman_f1_mean":0.5091463761,"ner_arman_precision_mean":0.4719705999,"ner_arman_recall_mean":0.5898887565,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","ner_arman_f1_mean":0.492138652,"ner_arman_precision_mean":0.4553833929,"ner_arman_recall_mean":0.5783671037,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","ner_arman_f1_mean":0.4408498401,"ner_arman_precision_mean":0.4206197855,"ner_arman_recall_mean":0.487067938,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","ner_arman_f1_mean":0.1349481402,"ner_arman_precision_mean":0.1235087122,"ner_arman_recall_mean":0.1586809694,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0134154417,"ner_arman_precision_mean":0.0131505761,"ner_arman_recall_mean":0.0147993643,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","ner_arman_f1_mean":0.028185021,"ner_arman_precision_mean":0.0278440732,"ner_arman_recall_mean":0.0304295943,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","ner_arman_f1_mean":0.1404403172,"ner_arman_precision_mean":0.1243629037,"ner_arman_recall_mean":0.1826181963,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.4737820913,"ner_arman_precision_mean":0.4382598331,"ner_arman_recall_mean":0.5517481128,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","ner_arman_f1_mean":0.3426542402,"ner_arman_precision_mean":0.3283122387,"ner_arman_recall_mean":0.3950735002,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","ner_arman_f1_mean":0.369949366,"ner_arman_precision_mean":0.3251050003,"ner_arman_recall_mean":0.4785061581,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","ner_arman_f1_mean":0.46241695,"ner_arman_precision_mean":0.4338001589,"ner_arman_recall_mean":0.5298768375,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3636093611,"ner_arman_precision_mean":0.3377433453,"ner_arman_recall_mean":0.4240365515,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4897596643,"ner_arman_precision_mean":0.4627021965,"ner_arman_recall_mean":0.5499602702,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0374396958,"ner_arman_precision_mean":0.0342669845,"ner_arman_recall_mean":0.0448549861,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","ner_arman_f1_mean":0.492822101,"ner_arman_precision_mean":0.4530827367,"ner_arman_recall_mean":0.580708035,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5838038137,"ner_arman_precision_mean":0.5621374652,"ner_arman_recall_mean":0.6348629321,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.6048141039,"ner_arman_precision_mean":0.5945967422,"ner_arman_recall_mean":0.6437822805,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5777782078,"ner_arman_precision_mean":0.5722089789,"ner_arman_recall_mean":0.6065156933,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3918497805,"ner_arman_precision_mean":0.3656932857,"ner_arman_recall_mean":0.4707191101,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4520824626,"ner_arman_precision_mean":0.4047789318,"ner_arman_recall_mean":0.5640246325,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","ner_arman_f1_mean":0.4131203057,"ner_arman_precision_mean":0.3882314547,"ner_arman_recall_mean":0.4742947954,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","ner_arman_f1_mean":0.1587859697,"ner_arman_precision_mean":0.1553465009,"ner_arman_recall_mean":0.1764799364,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","ner_arman_f1_mean":0.1625858448,"ner_arman_precision_mean":0.158174414,"ner_arman_recall_mean":0.1884982122,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5492720496,"ner_arman_precision_mean":0.5296185936,"ner_arman_recall_mean":0.5959078268,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.247080201,"ner_arman_precision_mean":0.2176003178,"ner_arman_recall_mean":0.3168653159,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","ner_arman_f1_mean":0.5000495531,"ner_arman_precision_mean":0.4607965832,"ner_arman_recall_mean":0.5927493047,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","ner_arman_f1_mean":0.0638846321,"ner_arman_precision_mean":0.0494466201,"ner_arman_recall_mean":0.1084425904,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4666381162,"ner_arman_precision_mean":0.4301038651,"ner_arman_recall_mean":0.5461462058,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4520824626,"ner_arman_precision_mean":0.4047789318,"ner_arman_recall_mean":0.5640246325,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5838038137,"ner_arman_precision_mean":0.5621374652,"ner_arman_recall_mean":0.6348629321,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.247080201,"ner_arman_precision_mean":0.2176003178,"ner_arman_recall_mean":0.3168653159,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","ner_arman_f1_mean":0.1349481402,"ner_arman_precision_mean":0.1235087122,"ner_arman_recall_mean":0.1586809694,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","ner_arman_f1_mean":0.492138652,"ner_arman_precision_mean":0.4553833929,"ner_arman_recall_mean":0.5783671037,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","ner_arman_f1_mean":0.4408498401,"ner_arman_precision_mean":0.4206197855,"ner_arman_recall_mean":0.487067938,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.6048141039,"ner_arman_precision_mean":0.5945967422,"ner_arman_recall_mean":0.6437822805,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","ner_arman_f1_mean":0.4131203057,"ner_arman_precision_mean":0.3882314547,"ner_arman_recall_mean":0.4742947954,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4666381162,"ner_arman_precision_mean":0.4301038651,"ner_arman_recall_mean":0.5461462058,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5238146835,"ner_arman_precision_mean":0.511958681,"ner_arman_recall_mean":0.5638855781,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","ner_arman_f1_mean":0.5000495531,"ner_arman_precision_mean":0.4607965832,"ner_arman_recall_mean":0.5927493047,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","ner_arman_f1_mean":0.1625858448,"ner_arman_precision_mean":0.158174414,"ner_arman_recall_mean":0.1884982122,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0374396958,"ner_arman_precision_mean":0.0342669845,"ner_arman_recall_mean":0.0448549861,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","ner_arman_f1_mean":0.0638846321,"ner_arman_precision_mean":0.0494466201,"ner_arman_recall_mean":0.1084425904,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","ner_arman_f1_mean":0.3603427165,"ner_arman_precision_mean":0.3233256712,"ner_arman_recall_mean":0.4685538339,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","ner_arman_f1_mean":0.028185021,"ner_arman_precision_mean":0.0278440732,"ner_arman_recall_mean":0.0304295943,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","ner_arman_f1_mean":0.1239012808,"ner_arman_precision_mean":0.1171036949,"ner_arman_recall_mean":0.1388160509,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","ner_arman_f1_mean":0.4764396046,"ner_arman_precision_mean":0.4205999205,"ner_arman_recall_mean":0.5997417561,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","ner_arman_f1_mean":0.369949366,"ner_arman_precision_mean":0.3251050003,"ner_arman_recall_mean":0.4785061581,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","ner_arman_f1_mean":0.1587859697,"ner_arman_precision_mean":0.1553465009,"ner_arman_recall_mean":0.1764799364,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5492720496,"ner_arman_precision_mean":0.5296185936,"ner_arman_recall_mean":0.5959078268,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.578306047,"ner_arman_precision_mean":0.5583631307,"ner_arman_recall_mean":0.6250099325,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","ner_arman_f1_mean":0.2955295201,"ner_arman_precision_mean":0.2736080368,"ner_arman_recall_mean":0.3462455304,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","ner_arman_f1_mean":0.090559262,"ner_arman_precision_mean":0.0812673818,"ner_arman_recall_mean":0.1104290822,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","ner_arman_f1_mean":0.3839211973,"ner_arman_precision_mean":0.3292326466,"ner_arman_recall_mean":0.5049662296,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0134154417,"ner_arman_precision_mean":0.0131505761,"ner_arman_recall_mean":0.0147993643,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3097820535,"ner_arman_precision_mean":0.2833333333,"ner_arman_recall_mean":0.3710568137,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","ner_arman_f1_mean":0.492822101,"ner_arman_precision_mean":0.4530827367,"ner_arman_recall_mean":0.580708035,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5777782078,"ner_arman_precision_mean":0.5722089789,"ner_arman_recall_mean":0.6065156933,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","ner_arman_f1_mean":0.1404403172,"ner_arman_precision_mean":0.1243629037,"ner_arman_recall_mean":0.1826181963,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5179514235,"ner_arman_precision_mean":0.478894943,"ner_arman_recall_mean":0.6093762416,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.4737820913,"ner_arman_precision_mean":0.4382598331,"ner_arman_recall_mean":0.5517481128,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3918497805,"ner_arman_precision_mean":0.3656932857,"ner_arman_recall_mean":0.4707191101,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3636093611,"ner_arman_precision_mean":0.3377433453,"ner_arman_recall_mean":0.4240365515,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","ner_arman_f1_mean":0.3426542402,"ner_arman_precision_mean":0.3283122387,"ner_arman_recall_mean":0.3950735002,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","ner_arman_f1_mean":0.5091463761,"ner_arman_precision_mean":0.4719705999,"ner_arman_recall_mean":0.5898887565,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5087496797,"ner_arman_precision_mean":0.4749602702,"ner_arman_recall_mean":0.5891338896,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","ner_arman_f1_mean":0.5030535945,"ner_arman_precision_mean":0.4617288155,"ner_arman_recall_mean":0.5952522845,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","ner_arman_f1_mean":0.46241695,"ner_arman_precision_mean":0.4338001589,"ner_arman_recall_mean":0.5298768375,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0031613599,"ner_arman_precision_mean":0.0024235201,"ner_arman_recall_mean":0.0047675805,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4897596643,"ner_arman_precision_mean":0.4627021965,"ner_arman_recall_mean":0.5499602702,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/nli_farstail.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.868286445,"nli_farstail_precision_modified":0.8795611895,"nli_farstail_recall_modified":0.8694171245,"nli_farstail_fscore_modified":0.8680818161,"nli_farstail_acc":0.868286445,"nli_farstail_precision":0.8795611895,"nli_farstail_recall":0.8694171245,"nli_farstail_fscore":0.8680818161,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7384910486,"nli_farstail_precision_modified":0.7662350641,"nli_farstail_recall_modified":0.7395626513,"nli_farstail_fscore_modified":0.7354972179,"nli_farstail_acc":0.7399103139,"nli_farstail_precision":0.7677076491,"nli_farstail_recall":0.7409839761,"nli_farstail_fscore":0.7369107296,"nli_farstail_valid_output_ratio":0.9980818414,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6361892583,"nli_farstail_precision_modified":0.6743240456,"nli_farstail_recall_modified":0.6374538968,"nli_farstail_fscore_modified":0.621131875,"nli_farstail_acc":0.6370038412,"nli_farstail_precision":0.6751874567,"nli_farstail_recall":0.638270099,"nli_farstail_fscore":0.6219271782,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6898976982,"nli_farstail_precision_modified":0.752223151,"nli_farstail_recall_modified":0.691698665,"nli_farstail_fscore_modified":0.6834607357,"nli_farstail_acc":0.6898976982,"nli_farstail_precision":0.752223151,"nli_farstail_recall":0.691698665,"nli_farstail_fscore":0.6834607357,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7570332481,"nli_farstail_precision_modified":0.7721152176,"nli_farstail_recall_modified":0.7583616595,"nli_farstail_fscore_modified":0.7534282542,"nli_farstail_acc":0.7570332481,"nli_farstail_precision":0.7721152176,"nli_farstail_recall":0.7583616595,"nli_farstail_fscore":0.7534282542,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8542199488,"nli_farstail_precision_modified":0.8634293173,"nli_farstail_recall_modified":0.8539318442,"nli_farstail_fscore_modified":0.8536753404,"nli_farstail_acc":0.8553137004,"nli_farstail_precision":0.8645348606,"nli_farstail_recall":0.8550252268,"nli_farstail_fscore":0.8547683946,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7653452685,"nli_farstail_precision_modified":0.7847248212,"nli_farstail_recall_modified":0.7659002614,"nli_farstail_fscore_modified":0.7629523234,"nli_farstail_acc":0.7653452685,"nli_farstail_precision":0.7847248212,"nli_farstail_recall":0.7659002614,"nli_farstail_fscore":0.7629523234,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7685421995,"nli_farstail_precision_modified":0.7858737557,"nli_farstail_recall_modified":0.7699859437,"nli_farstail_fscore_modified":0.7654192549,"nli_farstail_acc":0.7685421995,"nli_farstail_precision":0.7858737557,"nli_farstail_recall":0.7699859437,"nli_farstail_fscore":0.7654192549,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7078005115,"nli_farstail_precision_modified":0.7418983007,"nli_farstail_recall_modified":0.70995102,"nli_farstail_fscore_modified":0.6987179454,"nli_farstail_acc":0.7091607944,"nli_farstail_precision":0.7433241143,"nli_farstail_recall":0.7113154358,"nli_farstail_fscore":0.700060773,"nli_farstail_valid_output_ratio":0.9980818414,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8069053708,"nli_farstail_precision_modified":0.8274480721,"nli_farstail_recall_modified":0.8078020735,"nli_farstail_fscore_modified":0.8055860349,"nli_farstail_acc":0.8069053708,"nli_farstail_precision":0.8274480721,"nli_farstail_recall":0.8078020735,"nli_farstail_fscore":0.8055860349,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6534526854,"nli_farstail_precision_modified":0.7279477253,"nli_farstail_recall_modified":0.6559403118,"nli_farstail_fscore_modified":0.6402480245,"nli_farstail_acc":0.6534526854,"nli_farstail_precision":0.7279477253,"nli_farstail_recall":0.6559403118,"nli_farstail_fscore":0.6402480245,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8132992327,"nli_farstail_precision_modified":0.8243913951,"nli_farstail_recall_modified":0.8129377693,"nli_farstail_fscore_modified":0.8112364262,"nli_farstail_acc":0.8180064309,"nli_farstail_precision":0.8291627923,"nli_farstail_recall":0.8176428754,"nli_farstail_fscore":0.8159316853,"nli_farstail_valid_output_ratio":0.9942455243,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6918158568,"nli_farstail_precision_modified":0.76120773,"nli_farstail_recall_modified":0.696633339,"nli_farstail_fscore_modified":0.6637995215,"nli_farstail_acc":0.6918158568,"nli_farstail_precision":0.76120773,"nli_farstail_recall":0.696633339,"nli_farstail_fscore":0.6637995215,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7007672634,"nli_farstail_precision_modified":0.7596784307,"nli_farstail_recall_modified":0.7039816989,"nli_farstail_fscore_modified":0.6834876952,"nli_farstail_acc":0.7007672634,"nli_farstail_precision":0.7596784307,"nli_farstail_recall":0.7039816989,"nli_farstail_fscore":0.6834876952,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.716112532,"nli_farstail_precision_modified":0.7794942647,"nli_farstail_recall_modified":0.7185458002,"nli_farstail_fscore_modified":0.7094139725,"nli_farstail_acc":0.716112532,"nli_farstail_precision":0.7794942647,"nli_farstail_recall":0.7185458002,"nli_farstail_fscore":0.7094139725,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6822250639,"nli_farstail_precision_modified":0.7615681175,"nli_farstail_recall_modified":0.6809278738,"nli_farstail_fscore_modified":0.6476118203,"nli_farstail_acc":0.7033618985,"nli_farstail_precision":0.7851631746,"nli_farstail_recall":0.7020245185,"nli_farstail_fscore":0.6676762603,"nli_farstail_valid_output_ratio":0.9699488491,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.0249360614,"nli_farstail_precision_modified":0.0160900081,"nli_farstail_recall_modified":0.0174126172,"nli_farstail_fscore_modified":0.0167242212,"nli_farstail_acc":0.9069767442,"nli_farstail_precision":0.5852272727,"nli_farstail_recall":0.6333333333,"nli_farstail_fscore":0.6082949309,"nli_farstail_valid_output_ratio":0.0274936061,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7461636829,"nli_farstail_precision_modified":0.8279044878,"nli_farstail_recall_modified":0.7431719278,"nli_farstail_fscore_modified":0.7484099134,"nli_farstail_acc":0.7461636829,"nli_farstail_precision":0.8279044878,"nli_farstail_recall":0.7431719278,"nli_farstail_fscore":0.7484099134,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","nli_farstail_acc_modified":0.618286445,"nli_farstail_precision_modified":0.6499905475,"nli_farstail_recall_modified":0.6180562888,"nli_farstail_fscore_modified":0.612547215,"nli_farstail_acc":0.6254851229,"nli_farstail_precision":0.6575583547,"nli_farstail_recall":0.625252287,"nli_farstail_fscore":0.6196790713,"nli_farstail_valid_output_ratio":0.9884910486,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7405509289,"nli_farstail_precision_modified":0.79378989,"nli_farstail_recall_modified":0.7441180803,"nli_farstail_fscore_modified":0.7266455427,"nli_farstail_acc":0.7405509289,"nli_farstail_precision":0.79378989,"nli_farstail_recall":0.7441180803,"nli_farstail_fscore":0.7266455427,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","nli_farstail_acc_modified":0.1726342711,"nli_farstail_precision_modified":0.1856398147,"nli_farstail_recall_modified":0.156398243,"nli_farstail_fscore_modified":0.1549950666,"nli_farstail_acc":0.7277628032,"nli_farstail_precision":0.7825894076,"nli_farstail_recall":0.6593176606,"nli_farstail_fscore":0.6534023831,"nli_farstail_valid_output_ratio":0.2372122762,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.3433503836,"nli_farstail_precision_modified":0.5618320225,"nli_farstail_recall_modified":0.3440157631,"nli_farstail_fscore_modified":0.279029917,"nli_farstail_acc":0.3435700576,"nli_farstail_precision":0.56219148,"nli_farstail_recall":0.3442358627,"nli_farstail_fscore":0.279208439,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.378516624,"nli_farstail_precision_modified":0.4433198503,"nli_farstail_recall_modified":0.3422920715,"nli_farstail_fscore_modified":0.347492956,"nli_farstail_acc":0.6932084309,"nli_farstail_precision":0.8118878757,"nli_farstail_recall":0.626867447,"nli_farstail_fscore":0.636392252,"nli_farstail_valid_output_ratio":0.5460358056,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","nli_farstail_acc_modified":0.726342711,"nli_farstail_precision_modified":0.8062451443,"nli_farstail_recall_modified":0.7314466615,"nli_farstail_fscore_modified":0.6980605986,"nli_farstail_acc":0.726342711,"nli_farstail_precision":0.8062451443,"nli_farstail_recall":0.7314466615,"nli_farstail_fscore":0.6980605986,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7410485934,"nli_farstail_precision_modified":0.7633275849,"nli_farstail_recall_modified":0.7423464162,"nli_farstail_fscore_modified":0.7375659033,"nli_farstail_acc":0.7410485934,"nli_farstail_precision":0.7633275849,"nli_farstail_recall":0.7423464162,"nli_farstail_fscore":0.7375659033,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8689258312,"nli_farstail_precision_modified":0.8809296764,"nli_farstail_recall_modified":0.8678121788,"nli_farstail_fscore_modified":0.8682707156,"nli_farstail_acc":0.8689258312,"nli_farstail_precision":0.8809296764,"nli_farstail_recall":0.8678121788,"nli_farstail_fscore":0.8682707156,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7647058824,"nli_farstail_precision_modified":0.7814499507,"nli_farstail_recall_modified":0.7670439826,"nli_farstail_fscore_modified":0.7573199649,"nli_farstail_acc":0.7656850192,"nli_farstail_precision":0.7824505269,"nli_farstail_recall":0.7680261132,"nli_farstail_fscore":0.7582896447,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.5051150895,"nli_farstail_precision_modified":0.5526701994,"nli_farstail_recall_modified":0.4383899815,"nli_farstail_fscore_modified":0.4569544839,"nli_farstail_acc":0.8085977482,"nli_farstail_precision":0.8847248637,"nli_farstail_recall":0.7017829387,"nli_farstail_fscore":0.7315013438,"nli_farstail_valid_output_ratio":0.6246803069,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.6943734015,"nli_farstail_precision_modified":0.7566862174,"nli_farstail_recall_modified":0.698049667,"nli_farstail_fscore_modified":0.679445114,"nli_farstail_acc":0.6943734015,"nli_farstail_precision":0.7566862174,"nli_farstail_recall":0.698049667,"nli_farstail_fscore":0.679445114,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7333759591,"nli_farstail_precision_modified":0.7691251939,"nli_farstail_recall_modified":0.7368411575,"nli_farstail_fscore_modified":0.7229770101,"nli_farstail_acc":0.7333759591,"nli_farstail_precision":0.7691251939,"nli_farstail_recall":0.7368411575,"nli_farstail_fscore":0.7229770101,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.6617647059,"nli_farstail_precision_modified":0.7729519221,"nli_farstail_recall_modified":0.6672320962,"nli_farstail_fscore_modified":0.6191223906,"nli_farstail_acc":0.6617647059,"nli_farstail_precision":0.7729519221,"nli_farstail_recall":0.6672320962,"nli_farstail_fscore":0.6191223906,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.537084399,"nli_farstail_precision_modified":0.7429511025,"nli_farstail_recall_modified":0.5428343437,"nli_farstail_fscore_modified":0.4522202373,"nli_farstail_acc":0.537084399,"nli_farstail_precision":0.7429511025,"nli_farstail_recall":0.5428343437,"nli_farstail_fscore":0.4522202373,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.3177749361,"nli_farstail_precision_modified":0.3674330881,"nli_farstail_recall_modified":0.3157646078,"nli_farstail_fscore_modified":0.300349348,"nli_farstail_acc":0.4737845567,"nli_farstail_precision":0.5478220684,"nli_farstail_recall":0.4707872704,"nli_farstail_fscore":0.447803985,"nli_farstail_valid_output_ratio":0.6707161125,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7001278772,"nli_farstail_precision_modified":0.7089877668,"nli_farstail_recall_modified":0.701635311,"nli_farstail_fscore_modified":0.6963810855,"nli_farstail_acc":0.7001278772,"nli_farstail_precision":0.7089877668,"nli_farstail_recall":0.701635311,"nli_farstail_fscore":0.6963810855,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6943734015,"nli_farstail_precision_modified":0.7191142829,"nli_farstail_recall_modified":0.6963118909,"nli_farstail_fscore_modified":0.6882373091,"nli_farstail_acc":0.6943734015,"nli_farstail_precision":0.7191142829,"nli_farstail_recall":0.6963118909,"nli_farstail_fscore":0.6882373091,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6592071611,"nli_farstail_precision_modified":0.7292371837,"nli_farstail_recall_modified":0.6555663858,"nli_farstail_fscore_modified":0.6172863539,"nli_farstail_acc":0.6621708414,"nli_farstail_precision":0.7325157067,"nli_farstail_recall":0.6585136977,"nli_farstail_fscore":0.6200615655,"nli_farstail_valid_output_ratio":0.9955242967,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","nli_farstail_acc_modified":0.5421994885,"nli_farstail_precision_modified":0.5647711826,"nli_farstail_recall_modified":0.5444660793,"nli_farstail_fscore_modified":0.5273172992,"nli_farstail_acc":0.5449871465,"nli_farstail_precision":0.5676748904,"nli_farstail_recall":0.5472653908,"nli_farstail_fscore":0.5300284421,"nli_farstail_valid_output_ratio":0.9948849105,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8254475703,"nli_farstail_precision_modified":0.8384109819,"nli_farstail_recall_modified":0.8264814456,"nli_farstail_fscore_modified":0.8238714462,"nli_farstail_acc":0.8254475703,"nli_farstail_precision":0.8384109819,"nli_farstail_recall":0.8264814456,"nli_farstail_fscore":0.8238714462,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6854219949,"nli_farstail_precision_modified":0.7452254514,"nli_farstail_recall_modified":0.6884495258,"nli_farstail_fscore_modified":0.6690112082,"nli_farstail_acc":0.6858605246,"nli_farstail_precision":0.7457022432,"nli_farstail_recall":0.6888899926,"nli_farstail_fscore":0.6694392384,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6086956522,"nli_farstail_precision_modified":0.6940003558,"nli_farstail_recall_modified":0.6092669096,"nli_farstail_fscore_modified":0.5908473619,"nli_farstail_acc":0.6110397946,"nli_farstail_precision":0.6966730144,"nli_farstail_recall":0.611613252,"nli_farstail_fscore":0.593122769,"nli_farstail_valid_output_ratio":0.9961636829,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7677543186,"nli_farstail_precision_modified":0.8124976099,"nli_farstail_recall_modified":0.77106749,"nli_farstail_fscore_modified":0.7600055287,"nli_farstail_acc":0.7677543186,"nli_farstail_precision":0.8124976099,"nli_farstail_recall":0.77106749,"nli_farstail_fscore":0.7600055287,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7001278772,"nli_farstail_precision_modified":0.7089877668,"nli_farstail_recall_modified":0.701635311,"nli_farstail_fscore_modified":0.6963810855,"nli_farstail_acc":0.7001278772,"nli_farstail_precision":0.7089877668,"nli_farstail_recall":0.701635311,"nli_farstail_fscore":0.6963810855,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.537084399,"nli_farstail_precision_modified":0.7429511025,"nli_farstail_recall_modified":0.5428343437,"nli_farstail_fscore_modified":0.4522202373,"nli_farstail_acc":0.537084399,"nli_farstail_precision":0.7429511025,"nli_farstail_recall":0.5428343437,"nli_farstail_fscore":0.4522202373,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.6943734015,"nli_farstail_precision_modified":0.7566862174,"nli_farstail_recall_modified":0.698049667,"nli_farstail_fscore_modified":0.679445114,"nli_farstail_acc":0.6943734015,"nli_farstail_precision":0.7566862174,"nli_farstail_recall":0.698049667,"nli_farstail_fscore":0.679445114,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6854219949,"nli_farstail_precision_modified":0.7452254514,"nli_farstail_recall_modified":0.6884495258,"nli_farstail_fscore_modified":0.6690112082,"nli_farstail_acc":0.6858605246,"nli_farstail_precision":0.7457022432,"nli_farstail_recall":0.6888899926,"nli_farstail_fscore":0.6694392384,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6822250639,"nli_farstail_precision_modified":0.7615681175,"nli_farstail_recall_modified":0.6809278738,"nli_farstail_fscore_modified":0.6476118203,"nli_farstail_acc":0.7033618985,"nli_farstail_precision":0.7851631746,"nli_farstail_recall":0.7020245185,"nli_farstail_fscore":0.6676762603,"nli_farstail_valid_output_ratio":0.9699488491,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7007672634,"nli_farstail_precision_modified":0.7596784307,"nli_farstail_recall_modified":0.7039816989,"nli_farstail_fscore_modified":0.6834876952,"nli_farstail_acc":0.7007672634,"nli_farstail_precision":0.7596784307,"nli_farstail_recall":0.7039816989,"nli_farstail_fscore":0.6834876952,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.716112532,"nli_farstail_precision_modified":0.7794942647,"nli_farstail_recall_modified":0.7185458002,"nli_farstail_fscore_modified":0.7094139725,"nli_farstail_acc":0.716112532,"nli_farstail_precision":0.7794942647,"nli_farstail_recall":0.7185458002,"nli_farstail_fscore":0.7094139725,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7333759591,"nli_farstail_precision_modified":0.7691251939,"nli_farstail_recall_modified":0.7368411575,"nli_farstail_fscore_modified":0.7229770101,"nli_farstail_acc":0.7333759591,"nli_farstail_precision":0.7691251939,"nli_farstail_recall":0.7368411575,"nli_farstail_fscore":0.7229770101,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6943734015,"nli_farstail_precision_modified":0.7191142829,"nli_farstail_recall_modified":0.6963118909,"nli_farstail_fscore_modified":0.6882373091,"nli_farstail_acc":0.6943734015,"nli_farstail_precision":0.7191142829,"nli_farstail_recall":0.6963118909,"nli_farstail_fscore":0.6882373091,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7677543186,"nli_farstail_precision_modified":0.8124976099,"nli_farstail_recall_modified":0.77106749,"nli_farstail_fscore_modified":0.7600055287,"nli_farstail_acc":0.7677543186,"nli_farstail_precision":0.8124976099,"nli_farstail_recall":0.77106749,"nli_farstail_fscore":0.7600055287,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7653452685,"nli_farstail_precision_modified":0.7847248212,"nli_farstail_recall_modified":0.7659002614,"nli_farstail_fscore_modified":0.7629523234,"nli_farstail_acc":0.7653452685,"nli_farstail_precision":0.7847248212,"nli_farstail_recall":0.7659002614,"nli_farstail_fscore":0.7629523234,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6086956522,"nli_farstail_precision_modified":0.6940003558,"nli_farstail_recall_modified":0.6092669096,"nli_farstail_fscore_modified":0.5908473619,"nli_farstail_acc":0.6110397946,"nli_farstail_precision":0.6966730144,"nli_farstail_recall":0.611613252,"nli_farstail_fscore":0.593122769,"nli_farstail_valid_output_ratio":0.9961636829,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","nli_farstail_acc_modified":0.5421994885,"nli_farstail_precision_modified":0.5647711826,"nli_farstail_recall_modified":0.5444660793,"nli_farstail_fscore_modified":0.5273172992,"nli_farstail_acc":0.5449871465,"nli_farstail_precision":0.5676748904,"nli_farstail_recall":0.5472653908,"nli_farstail_fscore":0.5300284421,"nli_farstail_valid_output_ratio":0.9948849105,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7647058824,"nli_farstail_precision_modified":0.7814499507,"nli_farstail_recall_modified":0.7670439826,"nli_farstail_fscore_modified":0.7573199649,"nli_farstail_acc":0.7656850192,"nli_farstail_precision":0.7824505269,"nli_farstail_recall":0.7680261132,"nli_farstail_fscore":0.7582896447,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.4047314578,"nli_farstail_precision_modified":0.4394843848,"nli_farstail_recall_modified":0.4043652091,"nli_farstail_fscore_modified":0.3862934414,"nli_farstail_acc":0.5954844779,"nli_farstail_precision":0.6466167242,"nli_farstail_recall":0.5949456133,"nli_farstail_fscore":0.5683564838,"nli_farstail_valid_output_ratio":0.6796675192,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7461636829,"nli_farstail_precision_modified":0.8279044878,"nli_farstail_recall_modified":0.7431719278,"nli_farstail_fscore_modified":0.7484099134,"nli_farstail_acc":0.7461636829,"nli_farstail_precision":0.8279044878,"nli_farstail_recall":0.7431719278,"nli_farstail_fscore":0.7484099134,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7384910486,"nli_farstail_precision_modified":0.7662350641,"nli_farstail_recall_modified":0.7395626513,"nli_farstail_fscore_modified":0.7354972179,"nli_farstail_acc":0.7399103139,"nli_farstail_precision":0.7677076491,"nli_farstail_recall":0.7409839761,"nli_farstail_fscore":0.7369107296,"nli_farstail_valid_output_ratio":0.9980818414,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6534526854,"nli_farstail_precision_modified":0.7279477253,"nli_farstail_recall_modified":0.6559403118,"nli_farstail_fscore_modified":0.6402480245,"nli_farstail_acc":0.6534526854,"nli_farstail_precision":0.7279477253,"nli_farstail_recall":0.6559403118,"nli_farstail_fscore":0.6402480245,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.378516624,"nli_farstail_precision_modified":0.4433198503,"nli_farstail_recall_modified":0.3422920715,"nli_farstail_fscore_modified":0.347492956,"nli_farstail_acc":0.6932084309,"nli_farstail_precision":0.8118878757,"nli_farstail_recall":0.626867447,"nli_farstail_fscore":0.636392252,"nli_farstail_valid_output_ratio":0.5460358056,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6592071611,"nli_farstail_precision_modified":0.7292371837,"nli_farstail_recall_modified":0.6555663858,"nli_farstail_fscore_modified":0.6172863539,"nli_farstail_acc":0.6621708414,"nli_farstail_precision":0.7325157067,"nli_farstail_recall":0.6585136977,"nli_farstail_fscore":0.6200615655,"nli_farstail_valid_output_ratio":0.9955242967,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8254475703,"nli_farstail_precision_modified":0.8384109819,"nli_farstail_recall_modified":0.8264814456,"nli_farstail_fscore_modified":0.8238714462,"nli_farstail_acc":0.8254475703,"nli_farstail_precision":0.8384109819,"nli_farstail_recall":0.8264814456,"nli_farstail_fscore":0.8238714462,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.868286445,"nli_farstail_precision_modified":0.8795611895,"nli_farstail_recall_modified":0.8694171245,"nli_farstail_fscore_modified":0.8680818161,"nli_farstail_acc":0.868286445,"nli_farstail_precision":0.8795611895,"nli_farstail_recall":0.8694171245,"nli_farstail_fscore":0.8680818161,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7570332481,"nli_farstail_precision_modified":0.7721152176,"nli_farstail_recall_modified":0.7583616595,"nli_farstail_fscore_modified":0.7534282542,"nli_farstail_acc":0.7570332481,"nli_farstail_precision":0.7721152176,"nli_farstail_recall":0.7583616595,"nli_farstail_fscore":0.7534282542,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7685421995,"nli_farstail_precision_modified":0.7858737557,"nli_farstail_recall_modified":0.7699859437,"nli_farstail_fscore_modified":0.7654192549,"nli_farstail_acc":0.7685421995,"nli_farstail_precision":0.7858737557,"nli_farstail_recall":0.7699859437,"nli_farstail_fscore":0.7654192549,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6361892583,"nli_farstail_precision_modified":0.6743240456,"nli_farstail_recall_modified":0.6374538968,"nli_farstail_fscore_modified":0.621131875,"nli_farstail_acc":0.6370038412,"nli_farstail_precision":0.6751874567,"nli_farstail_recall":0.638270099,"nli_farstail_fscore":0.6219271782,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.0249360614,"nli_farstail_precision_modified":0.0160900081,"nli_farstail_recall_modified":0.0174126172,"nli_farstail_fscore_modified":0.0167242212,"nli_farstail_acc":0.9069767442,"nli_farstail_precision":0.5852272727,"nli_farstail_recall":0.6333333333,"nli_farstail_fscore":0.6082949309,"nli_farstail_valid_output_ratio":0.0274936061,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8069053708,"nli_farstail_precision_modified":0.8274480721,"nli_farstail_recall_modified":0.8078020735,"nli_farstail_fscore_modified":0.8055860349,"nli_farstail_acc":0.8069053708,"nli_farstail_precision":0.8274480721,"nli_farstail_recall":0.8078020735,"nli_farstail_fscore":0.8055860349,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.5051150895,"nli_farstail_precision_modified":0.5526701994,"nli_farstail_recall_modified":0.4383899815,"nli_farstail_fscore_modified":0.4569544839,"nli_farstail_acc":0.8085977482,"nli_farstail_precision":0.8847248637,"nli_farstail_recall":0.7017829387,"nli_farstail_fscore":0.7315013438,"nli_farstail_valid_output_ratio":0.6246803069,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.6617647059,"nli_farstail_precision_modified":0.7729519221,"nli_farstail_recall_modified":0.6672320962,"nli_farstail_fscore_modified":0.6191223906,"nli_farstail_acc":0.6617647059,"nli_farstail_precision":0.7729519221,"nli_farstail_recall":0.6672320962,"nli_farstail_fscore":0.6191223906,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","nli_farstail_acc_modified":0.618286445,"nli_farstail_precision_modified":0.6499905475,"nli_farstail_recall_modified":0.6180562888,"nli_farstail_fscore_modified":0.612547215,"nli_farstail_acc":0.6254851229,"nli_farstail_precision":0.6575583547,"nli_farstail_recall":0.625252287,"nli_farstail_fscore":0.6196790713,"nli_farstail_valid_output_ratio":0.9884910486,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8132992327,"nli_farstail_precision_modified":0.8243913951,"nli_farstail_recall_modified":0.8129377693,"nli_farstail_fscore_modified":0.8112364262,"nli_farstail_acc":0.8180064309,"nli_farstail_precision":0.8291627923,"nli_farstail_recall":0.8176428754,"nli_farstail_fscore":0.8159316853,"nli_farstail_valid_output_ratio":0.9942455243,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7405509289,"nli_farstail_precision_modified":0.79378989,"nli_farstail_recall_modified":0.7441180803,"nli_farstail_fscore_modified":0.7266455427,"nli_farstail_acc":0.7405509289,"nli_farstail_precision":0.79378989,"nli_farstail_recall":0.7441180803,"nli_farstail_fscore":0.7266455427,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.3177749361,"nli_farstail_precision_modified":0.3674330881,"nli_farstail_recall_modified":0.3157646078,"nli_farstail_fscore_modified":0.300349348,"nli_farstail_acc":0.4737845567,"nli_farstail_precision":0.5478220684,"nli_farstail_recall":0.4707872704,"nli_farstail_fscore":0.447803985,"nli_farstail_valid_output_ratio":0.6707161125,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7410485934,"nli_farstail_precision_modified":0.7633275849,"nli_farstail_recall_modified":0.7423464162,"nli_farstail_fscore_modified":0.7375659033,"nli_farstail_acc":0.7410485934,"nli_farstail_precision":0.7633275849,"nli_farstail_recall":0.7423464162,"nli_farstail_fscore":0.7375659033,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","nli_farstail_acc_modified":0.1726342711,"nli_farstail_precision_modified":0.1856398147,"nli_farstail_recall_modified":0.156398243,"nli_farstail_fscore_modified":0.1549950666,"nli_farstail_acc":0.7277628032,"nli_farstail_precision":0.7825894076,"nli_farstail_recall":0.6593176606,"nli_farstail_fscore":0.6534023831,"nli_farstail_valid_output_ratio":0.2372122762,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6918158568,"nli_farstail_precision_modified":0.76120773,"nli_farstail_recall_modified":0.696633339,"nli_farstail_fscore_modified":0.6637995215,"nli_farstail_acc":0.6918158568,"nli_farstail_precision":0.76120773,"nli_farstail_recall":0.696633339,"nli_farstail_fscore":0.6637995215,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8542199488,"nli_farstail_precision_modified":0.8634293173,"nli_farstail_recall_modified":0.8539318442,"nli_farstail_fscore_modified":0.8536753404,"nli_farstail_acc":0.8553137004,"nli_farstail_precision":0.8645348606,"nli_farstail_recall":0.8550252268,"nli_farstail_fscore":0.8547683946,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7078005115,"nli_farstail_precision_modified":0.7418983007,"nli_farstail_recall_modified":0.70995102,"nli_farstail_fscore_modified":0.6987179454,"nli_farstail_acc":0.7091607944,"nli_farstail_precision":0.7433241143,"nli_farstail_recall":0.7113154358,"nli_farstail_fscore":0.700060773,"nli_farstail_valid_output_ratio":0.9980818414,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","nli_farstail_acc_modified":0.726342711,"nli_farstail_precision_modified":0.8062451443,"nli_farstail_recall_modified":0.7314466615,"nli_farstail_fscore_modified":0.6980605986,"nli_farstail_acc":0.726342711,"nli_farstail_precision":0.8062451443,"nli_farstail_recall":0.7314466615,"nli_farstail_fscore":0.6980605986,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.3433503836,"nli_farstail_precision_modified":0.5618320225,"nli_farstail_recall_modified":0.3440157631,"nli_farstail_fscore_modified":0.279029917,"nli_farstail_acc":0.3435700576,"nli_farstail_precision":0.56219148,"nli_farstail_recall":0.3442358627,"nli_farstail_fscore":0.279208439,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6898976982,"nli_farstail_precision_modified":0.752223151,"nli_farstail_recall_modified":0.691698665,"nli_farstail_fscore_modified":0.6834607357,"nli_farstail_acc":0.6898976982,"nli_farstail_precision":0.752223151,"nli_farstail_recall":0.691698665,"nli_farstail_fscore":0.6834607357,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8689258312,"nli_farstail_precision_modified":0.8809296764,"nli_farstail_recall_modified":0.8678121788,"nli_farstail_fscore_modified":0.8682707156,"nli_farstail_acc":0.8689258312,"nli_farstail_precision":0.8809296764,"nli_farstail_recall":0.8678121788,"nli_farstail_fscore":0.8682707156,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/paraphrase-detection_FarsiParaphraseDetection.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8135376756,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8710840658,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7844142715,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7937868553,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8240620957,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8823529412,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7945619335,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8040557668,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9872286079,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8544061303,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8985765125,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8298507463,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8410463708,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8544061303,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8985765125,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8298507463,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8410463708,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8378033206,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8876229723,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8118030537,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8221408221,"paraphrase-detection_FarsiParaphraseDetection_acc":0.841025641,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8910369069,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8149253731,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8253029022,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9961685824,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.840357599,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8909249564,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8134328358,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8241066053,"paraphrase-detection_FarsiParaphraseDetection_acc":0.840357599,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8909249564,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8134328358,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8241066053,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8441890166,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8908159199,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8186193857,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8296597975,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8485237484,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8953900709,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8228228228,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8339199248,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9948914432,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8965517241,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9234404537,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8791044776,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8897859599,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8965517241,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9234404537,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8791044776,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8897859599,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8960759725,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8314059789,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8433777185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8634020619,"paraphrase-detection_FarsiParaphraseDetection_precision":0.904159132,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8389057751,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8509855072,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_acc":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.3627075351,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.3434772816,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.3695590113,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.353505411,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9220779221,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8731906218,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9394957983,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8986842105,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.3933588761,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_acc":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5823754789,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6860358387,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5643977685,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5413041169,"paraphrase-detection_FarsiParaphraseDetection_acc":0.6940639269,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8176043557,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6726384365,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.6451158653,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.8390804598,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_acc":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8659003831,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9050632911,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8432835821,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8546320936,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8659003831,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9050632911,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8432835821,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8546320936,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8876117497,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9179104478,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8686567164,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8796831783,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8876117497,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9179104478,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8686567164,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8796831783,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8939974457,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9218455744,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.876119403,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8869113391,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8939974457,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9218455744,"paraphrase-detection_FarsiParaphraseDetection_recall":0.876119403,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8869113391,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9169859515,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9233165065,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.9095332885,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9144938271,"paraphrase-detection_FarsiParaphraseDetection_acc":0.925257732,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9316453926,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9177378414,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9227431271,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8237547893,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8607726326,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8027155145,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8121248229,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8509234828,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8891622313,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8291903006,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8389099424,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9680715198,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8275862069,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.882206336,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8087040083,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8275862069,"paraphrase-detection_FarsiParaphraseDetection_precision":0.882206336,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8087040083,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_acc":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7777777778,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8343364681,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7575227312,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7618590799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8152610442,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8745454545,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7940298507,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7985751802,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9540229885,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5210727969,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.2605363985,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.4559386973,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.3315917799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5714285714,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2857142857,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3636363636,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9118773946,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.091954023,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.0459770115,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.0791826309,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.0581749941,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5806451613,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2903225806,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3673469388,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.1583652618,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8992869875,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8313432836,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8425676986,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8992869875,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8313432836,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8425676986,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.0,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.0,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.0,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.0,"paraphrase-detection_FarsiParaphraseDetection_acc":0.0,"paraphrase-detection_FarsiParaphraseDetection_precision":0.0,"paraphrase-detection_FarsiParaphraseDetection_recall":0.0,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.0,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.0,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9169859515,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9233165065,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.9095332885,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9144938271,"paraphrase-detection_FarsiParaphraseDetection_acc":0.925257732,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9316453926,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9177378414,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9227431271,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8965517241,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9234404537,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8791044776,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8897859599,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8965517241,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9234404537,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8791044776,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8897859599,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8275862069,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.882206336,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8087040083,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8275862069,"paraphrase-detection_FarsiParaphraseDetection_precision":0.882206336,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8087040083,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8992869875,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8313432836,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8425676986,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8992869875,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8313432836,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8425676986,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7777777778,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8343364681,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7575227312,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7618590799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8152610442,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8745454545,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7940298507,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7985751802,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9540229885,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5210727969,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.2605363985,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.4559386973,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.3315917799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5714285714,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2857142857,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3636363636,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9118773946,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.779054917,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8592259336,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7428737538,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7463134032,"paraphrase-detection_FarsiParaphraseDetection_acc":0.781049936,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8614262561,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7447761194,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7482245771,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9974457216,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_acc":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8135376756,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8710840658,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7844142715,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7937868553,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8240620957,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8823529412,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7945619335,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8040557668,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9872286079,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_acc":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_acc":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8544061303,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8985765125,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8298507463,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8410463708,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8544061303,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8985765125,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8298507463,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8410463708,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.840357599,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8909249564,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8134328358,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8241066053,"paraphrase-detection_FarsiParaphraseDetection_acc":0.840357599,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8909249564,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8134328358,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8241066053,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.0,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.0,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.0,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.0,"paraphrase-detection_FarsiParaphraseDetection_acc":0.0,"paraphrase-detection_FarsiParaphraseDetection_precision":0.0,"paraphrase-detection_FarsiParaphraseDetection_recall":0.0,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.0,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.0,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8960759725,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8314059789,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8433777185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8634020619,"paraphrase-detection_FarsiParaphraseDetection_precision":0.904159132,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8389057751,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8509855072,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8876117497,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9179104478,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8686567164,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8796831783,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8876117497,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9179104478,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8686567164,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8796831783,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8939974457,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9218455744,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.876119403,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8869113391,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8939974457,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9218455744,"paraphrase-detection_FarsiParaphraseDetection_recall":0.876119403,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8869113391,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.3627075351,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.3434772816,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.3695590113,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.353505411,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9220779221,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8731906218,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9394957983,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8986842105,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.3933588761,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8441890166,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8908159199,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8186193857,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8296597975,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8485237484,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8953900709,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8228228228,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8339199248,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9948914432,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_acc":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8237547893,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8607726326,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8027155145,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8121248229,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8509234828,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8891622313,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8291903006,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8389099424,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9680715198,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.091954023,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.0459770115,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.0791826309,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.0581749941,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5806451613,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2903225806,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3673469388,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.1583652618,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5823754789,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6860358387,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5643977685,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5413041169,"paraphrase-detection_FarsiParaphraseDetection_acc":0.6940639269,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8176043557,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6726384365,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.6451158653,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.8390804598,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8378033206,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8876229723,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8118030537,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8221408221,"paraphrase-detection_FarsiParaphraseDetection_acc":0.841025641,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8910369069,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8149253731,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8253029022,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9961685824,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8659003831,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9050632911,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8432835821,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8546320936,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8659003831,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9050632911,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8432835821,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8546320936,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/paraphrase-detection_parsinlu.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.844,"paraphrase-detection_parsinlu_precision_modified":0.8671683358,"paraphrase-detection_parsinlu_recall_modified":0.8248878009,"paraphrase-detection_parsinlu_fscore_modified":0.8334216056,"paraphrase-detection_parsinlu_acc":0.844,"paraphrase-detection_parsinlu_precision":0.8671683358,"paraphrase-detection_parsinlu_recall":0.8248878009,"paraphrase-detection_parsinlu_fscore":0.8334216056,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.796,"paraphrase-detection_parsinlu_precision_modified":0.8275307297,"paraphrase-detection_parsinlu_recall_modified":0.7707807282,"paraphrase-detection_parsinlu_fscore_modified":0.7792394123,"paraphrase-detection_parsinlu_acc":0.8139059305,"paraphrase-detection_parsinlu_precision":0.8461459404,"paraphrase-detection_parsinlu_recall":0.788119354,"paraphrase-detection_parsinlu_fscore":0.7967683152,"paraphrase-detection_parsinlu_valid_output_ratio":0.978,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8416530278,"paraphrase-detection_parsinlu_recall_modified":0.8270501836,"paraphrase-detection_parsinlu_fscore_modified":0.8316645261,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8416530278,"paraphrase-detection_parsinlu_recall":0.8270501836,"paraphrase-detection_parsinlu_fscore":0.8316645261,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.872,"paraphrase-detection_parsinlu_precision_modified":0.872593554,"paraphrase-detection_parsinlu_recall_modified":0.8654426765,"paraphrase-detection_parsinlu_fscore_modified":0.8682824025,"paraphrase-detection_parsinlu_acc":0.872,"paraphrase-detection_parsinlu_precision":0.872593554,"paraphrase-detection_parsinlu_recall":0.8654426765,"paraphrase-detection_parsinlu_fscore":0.8682824025,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.834,"paraphrase-detection_parsinlu_precision_modified":0.8710205993,"paraphrase-detection_parsinlu_recall_modified":0.8104039168,"paraphrase-detection_parsinlu_fscore_modified":0.8196585245,"paraphrase-detection_parsinlu_acc":0.834,"paraphrase-detection_parsinlu_precision":0.8710205993,"paraphrase-detection_parsinlu_recall":0.8104039168,"paraphrase-detection_parsinlu_fscore":0.8196585245,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.812,"paraphrase-detection_parsinlu_precision_modified":0.8560421519,"paraphrase-detection_parsinlu_recall_modified":0.7853480321,"paraphrase-detection_parsinlu_fscore_modified":0.7936411239,"paraphrase-detection_parsinlu_acc":0.8152610442,"paraphrase-detection_parsinlu_precision":0.8594800722,"paraphrase-detection_parsinlu_recall":0.7885020403,"paraphrase-detection_parsinlu_fscore":0.7968284376,"paraphrase-detection_parsinlu_valid_output_ratio":0.996,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.78,"paraphrase-detection_parsinlu_precision_modified":0.8453499806,"paraphrase-detection_parsinlu_recall_modified":0.7464708282,"paraphrase-detection_parsinlu_fscore_modified":0.7504718351,"paraphrase-detection_parsinlu_acc":0.78,"paraphrase-detection_parsinlu_precision":0.8453499806,"paraphrase-detection_parsinlu_recall":0.7464708282,"paraphrase-detection_parsinlu_fscore":0.7504718351,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.792,"paraphrase-detection_parsinlu_precision_modified":0.8370839341,"paraphrase-detection_parsinlu_recall_modified":0.7632802938,"paraphrase-detection_parsinlu_fscore_modified":0.7695689166,"paraphrase-detection_parsinlu_acc":0.792,"paraphrase-detection_parsinlu_precision":0.8370839341,"paraphrase-detection_parsinlu_recall":0.7632802938,"paraphrase-detection_parsinlu_fscore":0.7695689166,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.866,"paraphrase-detection_parsinlu_precision_modified":0.8771198269,"paraphrase-detection_parsinlu_recall_modified":0.852753978,"paraphrase-detection_parsinlu_fscore_modified":0.8595688134,"paraphrase-detection_parsinlu_acc":0.866,"paraphrase-detection_parsinlu_precision":0.8771198269,"paraphrase-detection_parsinlu_recall":0.852753978,"paraphrase-detection_parsinlu_fscore":0.8595688134,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.861267166,"paraphrase-detection_parsinlu_recall_modified":0.802243982,"paraphrase-detection_parsinlu_fscore_modified":0.8109673691,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.861267166,"paraphrase-detection_parsinlu_recall":0.802243982,"paraphrase-detection_parsinlu_fscore":0.8109673691,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.878,"paraphrase-detection_parsinlu_precision_modified":0.8823390152,"paraphrase-detection_parsinlu_recall_modified":0.8689922481,"paraphrase-detection_parsinlu_fscore_modified":0.8736280355,"paraphrase-detection_parsinlu_acc":0.878,"paraphrase-detection_parsinlu_precision":0.8823390152,"paraphrase-detection_parsinlu_recall":0.8689922481,"paraphrase-detection_parsinlu_fscore":0.8736280355,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.856,"paraphrase-detection_parsinlu_precision_modified":0.8688030303,"paraphrase-detection_parsinlu_recall_modified":0.8427539068,"paraphrase-detection_parsinlu_fscore_modified":0.8496988866,"paraphrase-detection_parsinlu_acc":0.8681541582,"paraphrase-detection_parsinlu_precision":0.881138976,"paraphrase-detection_parsinlu_recall":0.8547199866,"paraphrase-detection_parsinlu_fscore":0.8617635767,"paraphrase-detection_parsinlu_valid_output_ratio":0.986,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8806824921,"paraphrase-detection_parsinlu_recall_modified":0.8666666667,"paraphrase-detection_parsinlu_fscore_modified":0.871456768,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8806824921,"paraphrase-detection_parsinlu_recall":0.8666666667,"paraphrase-detection_parsinlu_fscore":0.871456768,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.854,"paraphrase-detection_parsinlu_precision_modified":0.8742015099,"paraphrase-detection_parsinlu_recall_modified":0.8365157079,"paraphrase-detection_parsinlu_fscore_modified":0.8449177639,"paraphrase-detection_parsinlu_acc":0.854,"paraphrase-detection_parsinlu_precision":0.8742015099,"paraphrase-detection_parsinlu_recall":0.8365157079,"paraphrase-detection_parsinlu_fscore":0.8449177639,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8735210118,"paraphrase-detection_parsinlu_recall_modified":0.8735210118,"paraphrase-detection_parsinlu_fscore_modified":0.8735210118,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8735210118,"paraphrase-detection_parsinlu_recall":0.8735210118,"paraphrase-detection_parsinlu_fscore":0.8735210118,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.86,"paraphrase-detection_parsinlu_precision_modified":0.8618019568,"paraphrase-detection_parsinlu_recall_modified":0.8516299393,"paraphrase-detection_parsinlu_fscore_modified":0.8553599196,"paraphrase-detection_parsinlu_acc":0.8617234469,"paraphrase-detection_parsinlu_precision":0.8635290148,"paraphrase-detection_parsinlu_recall":0.8533366126,"paraphrase-detection_parsinlu_fscore":0.8570740678,"paraphrase-detection_parsinlu_valid_output_ratio":0.998,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.744,"paraphrase-detection_parsinlu_precision_modified":0.7517357255,"paraphrase-detection_parsinlu_recall_modified":0.7357334824,"paraphrase-detection_parsinlu_fscore_modified":0.7395,"paraphrase-detection_parsinlu_acc":0.8416289593,"paraphrase-detection_parsinlu_precision":0.85037978,"paraphrase-detection_parsinlu_recall":0.832277695,"paraphrase-detection_parsinlu_fscore":0.8365384615,"paraphrase-detection_parsinlu_valid_output_ratio":0.884,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.66,"paraphrase-detection_parsinlu_precision_modified":0.7933390651,"paraphrase-detection_parsinlu_recall_modified":0.6057935537,"paraphrase-detection_parsinlu_fscore_modified":0.5625411726,"paraphrase-detection_parsinlu_acc":0.66,"paraphrase-detection_parsinlu_precision":0.7933390651,"paraphrase-detection_parsinlu_recall":0.6057935537,"paraphrase-detection_parsinlu_fscore":0.5625411726,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.274,"paraphrase-detection_parsinlu_precision_modified":0.2627667984,"paraphrase-detection_parsinlu_recall_modified":0.2313142857,"paraphrase-detection_parsinlu_fscore_modified":0.2414462011,"paraphrase-detection_parsinlu_acc":0.85625,"paraphrase-detection_parsinlu_precision":0.8211462451,"paraphrase-detection_parsinlu_recall":0.7228571429,"paraphrase-detection_parsinlu_fscore":0.7545193783,"paraphrase-detection_parsinlu_valid_output_ratio":0.32,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.764,"paraphrase-detection_parsinlu_precision_modified":0.8406006212,"paraphrase-detection_parsinlu_recall_modified":0.7272949816,"paraphrase-detection_parsinlu_fscore_modified":0.7279551449,"paraphrase-detection_parsinlu_acc":0.764,"paraphrase-detection_parsinlu_precision":0.8406006212,"paraphrase-detection_parsinlu_recall":0.7272949816,"paraphrase-detection_parsinlu_fscore":0.7279551449,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.672,"paraphrase-detection_parsinlu_precision_modified":0.7868195779,"paraphrase-detection_parsinlu_recall_modified":0.6265486372,"paraphrase-detection_parsinlu_fscore_modified":0.5973050157,"paraphrase-detection_parsinlu_acc":0.6801619433,"paraphrase-detection_parsinlu_precision":0.796376091,"paraphrase-detection_parsinlu_recall":0.6341585396,"paraphrase-detection_parsinlu_fscore":0.6045597325,"paraphrase-detection_parsinlu_valid_output_ratio":0.988,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.576,"paraphrase-detection_parsinlu_precision_modified":0.6352777945,"paraphrase-detection_parsinlu_recall_modified":0.6097919217,"paraphrase-detection_parsinlu_fscore_modified":0.5654241624,"paraphrase-detection_parsinlu_acc":0.576,"paraphrase-detection_parsinlu_precision":0.6352777945,"paraphrase-detection_parsinlu_recall":0.6097919217,"paraphrase-detection_parsinlu_fscore":0.5654241624,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.864,"paraphrase-detection_parsinlu_precision_modified":0.8615596015,"paraphrase-detection_parsinlu_recall_modified":0.8607099143,"paraphrase-detection_parsinlu_fscore_modified":0.8611201882,"paraphrase-detection_parsinlu_acc":0.864,"paraphrase-detection_parsinlu_precision":0.8615596015,"paraphrase-detection_parsinlu_recall":0.8607099143,"paraphrase-detection_parsinlu_fscore":0.8611201882,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.828,"paraphrase-detection_parsinlu_precision_modified":0.8504577445,"paraphrase-detection_parsinlu_recall_modified":0.807996736,"paraphrase-detection_parsinlu_fscore_modified":0.8159372646,"paraphrase-detection_parsinlu_acc":0.828,"paraphrase-detection_parsinlu_precision":0.8504577445,"paraphrase-detection_parsinlu_recall":0.807996736,"paraphrase-detection_parsinlu_fscore":0.8159372646,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.804,"paraphrase-detection_parsinlu_precision_modified":0.8556500813,"paraphrase-detection_parsinlu_recall_modified":0.7749490004,"paraphrase-detection_parsinlu_fscore_modified":0.7822570611,"paraphrase-detection_parsinlu_acc":0.804,"paraphrase-detection_parsinlu_precision":0.8556500813,"paraphrase-detection_parsinlu_recall":0.7749490004,"paraphrase-detection_parsinlu_fscore":0.7822570611,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.85,"paraphrase-detection_parsinlu_precision_modified":0.8811252269,"paraphrase-detection_parsinlu_recall_modified":0.8290085679,"paraphrase-detection_parsinlu_fscore_modified":0.8385784573,"paraphrase-detection_parsinlu_acc":0.85,"paraphrase-detection_parsinlu_precision":0.8811252269,"paraphrase-detection_parsinlu_recall":0.8290085679,"paraphrase-detection_parsinlu_fscore":0.8385784573,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8576776974,"paraphrase-detection_parsinlu_recall_modified":0.819624643,"paraphrase-detection_parsinlu_fscore_modified":0.8275649186,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8576776974,"paraphrase-detection_parsinlu_recall":0.819624643,"paraphrase-detection_parsinlu_fscore":0.8275649186,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.88,"paraphrase-detection_parsinlu_precision_modified":0.8945054945,"paraphrase-detection_parsinlu_recall_modified":0.8661770706,"paraphrase-detection_parsinlu_fscore_modified":0.8738965952,"paraphrase-detection_parsinlu_acc":0.88,"paraphrase-detection_parsinlu_precision":0.8945054945,"paraphrase-detection_parsinlu_recall":0.8661770706,"paraphrase-detection_parsinlu_fscore":0.8738965952,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.868,"paraphrase-detection_parsinlu_precision_modified":0.8786613063,"paraphrase-detection_parsinlu_recall_modified":0.8550795594,"paraphrase-detection_parsinlu_fscore_modified":0.8617882093,"paraphrase-detection_parsinlu_acc":0.868,"paraphrase-detection_parsinlu_precision":0.8786613063,"paraphrase-detection_parsinlu_recall":0.8550795594,"paraphrase-detection_parsinlu_fscore":0.8617882093,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.802,"paraphrase-detection_parsinlu_precision_modified":0.854490281,"paraphrase-detection_parsinlu_recall_modified":0.772623419,"paraphrase-detection_parsinlu_fscore_modified":0.7797249009,"paraphrase-detection_parsinlu_acc":0.802,"paraphrase-detection_parsinlu_precision":0.854490281,"paraphrase-detection_parsinlu_recall":0.772623419,"paraphrase-detection_parsinlu_fscore":0.7797249009,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.888,"paraphrase-detection_parsinlu_precision_modified":0.8885737506,"paraphrase-detection_parsinlu_recall_modified":0.8823337413,"paraphrase-detection_parsinlu_fscore_modified":0.8849043884,"paraphrase-detection_parsinlu_acc":0.888,"paraphrase-detection_parsinlu_precision":0.8885737506,"paraphrase-detection_parsinlu_recall":0.8823337413,"paraphrase-detection_parsinlu_fscore":0.8849043884,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.778,"paraphrase-detection_parsinlu_precision_modified":0.8054432653,"paraphrase-detection_parsinlu_recall_modified":0.7971712985,"paraphrase-detection_parsinlu_fscore_modified":0.7776855183,"paraphrase-detection_parsinlu_acc":0.7842741935,"paraphrase-detection_parsinlu_precision":0.8119387755,"paraphrase-detection_parsinlu_recall":0.8036000993,"paraphrase-detection_parsinlu_fscore":0.7839571757,"paraphrase-detection_parsinlu_valid_output_ratio":0.992,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.764,"paraphrase-detection_parsinlu_precision_modified":0.7656475469,"paraphrase-detection_parsinlu_recall_modified":0.7528414743,"paraphrase-detection_parsinlu_fscore_modified":0.756438131,"paraphrase-detection_parsinlu_acc":0.7925311203,"paraphrase-detection_parsinlu_precision":0.7942401938,"paraphrase-detection_parsinlu_recall":0.7809558862,"paraphrase-detection_parsinlu_fscore":0.7846868579,"paraphrase-detection_parsinlu_valid_output_ratio":0.964,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.792,"paraphrase-detection_parsinlu_precision_modified":0.8426666667,"paraphrase-detection_parsinlu_recall_modified":0.7621379029,"paraphrase-detection_parsinlu_fscore_modified":0.7682709447,"paraphrase-detection_parsinlu_acc":0.792,"paraphrase-detection_parsinlu_precision":0.8426666667,"paraphrase-detection_parsinlu_recall":0.7621379029,"paraphrase-detection_parsinlu_fscore":0.7682709447,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.8,"paraphrase-detection_parsinlu_precision_modified":0.8306007718,"paraphrase-detection_parsinlu_recall_modified":0.7754385965,"paraphrase-detection_parsinlu_fscore_modified":0.7824535756,"paraphrase-detection_parsinlu_acc":0.8,"paraphrase-detection_parsinlu_precision":0.8306007718,"paraphrase-detection_parsinlu_recall":0.7754385965,"paraphrase-detection_parsinlu_fscore":0.7824535756,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.798,"paraphrase-detection_parsinlu_precision_modified":0.8383696273,"paraphrase-detection_parsinlu_recall_modified":0.7708282334,"paraphrase-detection_parsinlu_fscore_modified":0.7777278949,"paraphrase-detection_parsinlu_acc":0.798,"paraphrase-detection_parsinlu_precision":0.8383696273,"paraphrase-detection_parsinlu_recall":0.7708282334,"paraphrase-detection_parsinlu_fscore":0.7777278949,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.634,"paraphrase-detection_parsinlu_precision_modified":0.7237713267,"paraphrase-detection_parsinlu_recall_modified":0.6720930233,"paraphrase-detection_parsinlu_fscore_modified":0.6220242152,"paraphrase-detection_parsinlu_acc":0.634,"paraphrase-detection_parsinlu_precision":0.7237713267,"paraphrase-detection_parsinlu_recall":0.6720930233,"paraphrase-detection_parsinlu_fscore":0.6220242152,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.848,"paraphrase-detection_parsinlu_precision_modified":0.8717792656,"paraphrase-detection_parsinlu_recall_modified":0.8289677683,"paraphrase-detection_parsinlu_fscore_modified":0.8376928465,"paraphrase-detection_parsinlu_acc":0.848,"paraphrase-detection_parsinlu_precision":0.8717792656,"paraphrase-detection_parsinlu_recall":0.8289677683,"paraphrase-detection_parsinlu_fscore":0.8376928465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.8267131595,"paraphrase-detection_parsinlu_recall_modified":0.8165238678,"paraphrase-detection_parsinlu_fscore_modified":0.8200389709,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.8267131595,"paraphrase-detection_parsinlu_recall":0.8165238678,"paraphrase-detection_parsinlu_fscore":0.8200389709,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.824,"paraphrase-detection_parsinlu_precision_modified":0.8599831541,"paraphrase-detection_parsinlu_recall_modified":0.7999184007,"paraphrase-detection_parsinlu_fscore_modified":0.8085591465,"paraphrase-detection_parsinlu_acc":0.824,"paraphrase-detection_parsinlu_precision":0.8599831541,"paraphrase-detection_parsinlu_recall":0.7999184007,"paraphrase-detection_parsinlu_fscore":0.8085591465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.57,"paraphrase-detection_parsinlu_precision_modified":0.285,"paraphrase-detection_parsinlu_recall_modified":0.5,"paraphrase-detection_parsinlu_fscore_modified":0.3630573248,"paraphrase-detection_parsinlu_acc":0.57,"paraphrase-detection_parsinlu_precision":0.285,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3630573248,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.072,"paraphrase-detection_parsinlu_precision_modified":0.036,"paraphrase-detection_parsinlu_recall_modified":0.066,"paraphrase-detection_parsinlu_fscore_modified":0.0465882353,"paraphrase-detection_parsinlu_acc":0.5454545455,"paraphrase-detection_parsinlu_precision":0.2727272727,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3529411765,"paraphrase-detection_parsinlu_valid_output_ratio":0.132,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.814,"paraphrase-detection_parsinlu_precision_modified":0.8488669447,"paraphrase-detection_parsinlu_recall_modified":0.7894328845,"paraphrase-detection_parsinlu_fscore_modified":0.7974306363,"paraphrase-detection_parsinlu_acc":0.814,"paraphrase-detection_parsinlu_precision":0.8488669447,"paraphrase-detection_parsinlu_recall":0.7894328845,"paraphrase-detection_parsinlu_fscore":0.7974306363,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.0,"paraphrase-detection_parsinlu_precision_modified":0.0,"paraphrase-detection_parsinlu_recall_modified":0.0,"paraphrase-detection_parsinlu_fscore_modified":0.0,"paraphrase-detection_parsinlu_acc":0.0,"paraphrase-detection_parsinlu_precision":0.0,"paraphrase-detection_parsinlu_recall":0.0,"paraphrase-detection_parsinlu_fscore":0.0,"paraphrase-detection_parsinlu_valid_output_ratio":0.0,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.792,"paraphrase-detection_parsinlu_precision_modified":0.8426666667,"paraphrase-detection_parsinlu_recall_modified":0.7621379029,"paraphrase-detection_parsinlu_fscore_modified":0.7682709447,"paraphrase-detection_parsinlu_acc":0.792,"paraphrase-detection_parsinlu_precision":0.8426666667,"paraphrase-detection_parsinlu_recall":0.7621379029,"paraphrase-detection_parsinlu_fscore":0.7682709447,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.778,"paraphrase-detection_parsinlu_precision_modified":0.8054432653,"paraphrase-detection_parsinlu_recall_modified":0.7971712985,"paraphrase-detection_parsinlu_fscore_modified":0.7776855183,"paraphrase-detection_parsinlu_acc":0.7842741935,"paraphrase-detection_parsinlu_precision":0.8119387755,"paraphrase-detection_parsinlu_recall":0.8036000993,"paraphrase-detection_parsinlu_fscore":0.7839571757,"paraphrase-detection_parsinlu_valid_output_ratio":0.992,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.868,"paraphrase-detection_parsinlu_precision_modified":0.8786613063,"paraphrase-detection_parsinlu_recall_modified":0.8550795594,"paraphrase-detection_parsinlu_fscore_modified":0.8617882093,"paraphrase-detection_parsinlu_acc":0.868,"paraphrase-detection_parsinlu_precision":0.8786613063,"paraphrase-detection_parsinlu_recall":0.8550795594,"paraphrase-detection_parsinlu_fscore":0.8617882093,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.8267131595,"paraphrase-detection_parsinlu_recall_modified":0.8165238678,"paraphrase-detection_parsinlu_fscore_modified":0.8200389709,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.8267131595,"paraphrase-detection_parsinlu_recall":0.8165238678,"paraphrase-detection_parsinlu_fscore":0.8200389709,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.86,"paraphrase-detection_parsinlu_precision_modified":0.8618019568,"paraphrase-detection_parsinlu_recall_modified":0.8516299393,"paraphrase-detection_parsinlu_fscore_modified":0.8553599196,"paraphrase-detection_parsinlu_acc":0.8617234469,"paraphrase-detection_parsinlu_precision":0.8635290148,"paraphrase-detection_parsinlu_recall":0.8533366126,"paraphrase-detection_parsinlu_fscore":0.8570740678,"paraphrase-detection_parsinlu_valid_output_ratio":0.998,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.854,"paraphrase-detection_parsinlu_precision_modified":0.8742015099,"paraphrase-detection_parsinlu_recall_modified":0.8365157079,"paraphrase-detection_parsinlu_fscore_modified":0.8449177639,"paraphrase-detection_parsinlu_acc":0.854,"paraphrase-detection_parsinlu_precision":0.8742015099,"paraphrase-detection_parsinlu_recall":0.8365157079,"paraphrase-detection_parsinlu_fscore":0.8449177639,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8735210118,"paraphrase-detection_parsinlu_recall_modified":0.8735210118,"paraphrase-detection_parsinlu_fscore_modified":0.8735210118,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8735210118,"paraphrase-detection_parsinlu_recall":0.8735210118,"paraphrase-detection_parsinlu_fscore":0.8735210118,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.802,"paraphrase-detection_parsinlu_precision_modified":0.854490281,"paraphrase-detection_parsinlu_recall_modified":0.772623419,"paraphrase-detection_parsinlu_fscore_modified":0.7797249009,"paraphrase-detection_parsinlu_acc":0.802,"paraphrase-detection_parsinlu_precision":0.854490281,"paraphrase-detection_parsinlu_recall":0.772623419,"paraphrase-detection_parsinlu_fscore":0.7797249009,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.8,"paraphrase-detection_parsinlu_precision_modified":0.8306007718,"paraphrase-detection_parsinlu_recall_modified":0.7754385965,"paraphrase-detection_parsinlu_fscore_modified":0.7824535756,"paraphrase-detection_parsinlu_acc":0.8,"paraphrase-detection_parsinlu_precision":0.8306007718,"paraphrase-detection_parsinlu_recall":0.7754385965,"paraphrase-detection_parsinlu_fscore":0.7824535756,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.814,"paraphrase-detection_parsinlu_precision_modified":0.8488669447,"paraphrase-detection_parsinlu_recall_modified":0.7894328845,"paraphrase-detection_parsinlu_fscore_modified":0.7974306363,"paraphrase-detection_parsinlu_acc":0.814,"paraphrase-detection_parsinlu_precision":0.8488669447,"paraphrase-detection_parsinlu_recall":0.7894328845,"paraphrase-detection_parsinlu_fscore":0.7974306363,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.78,"paraphrase-detection_parsinlu_precision_modified":0.8453499806,"paraphrase-detection_parsinlu_recall_modified":0.7464708282,"paraphrase-detection_parsinlu_fscore_modified":0.7504718351,"paraphrase-detection_parsinlu_acc":0.78,"paraphrase-detection_parsinlu_precision":0.8453499806,"paraphrase-detection_parsinlu_recall":0.7464708282,"paraphrase-detection_parsinlu_fscore":0.7504718351,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.824,"paraphrase-detection_parsinlu_precision_modified":0.8599831541,"paraphrase-detection_parsinlu_recall_modified":0.7999184007,"paraphrase-detection_parsinlu_fscore_modified":0.8085591465,"paraphrase-detection_parsinlu_acc":0.824,"paraphrase-detection_parsinlu_precision":0.8599831541,"paraphrase-detection_parsinlu_recall":0.7999184007,"paraphrase-detection_parsinlu_fscore":0.8085591465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.634,"paraphrase-detection_parsinlu_precision_modified":0.7237713267,"paraphrase-detection_parsinlu_recall_modified":0.6720930233,"paraphrase-detection_parsinlu_fscore_modified":0.6220242152,"paraphrase-detection_parsinlu_acc":0.634,"paraphrase-detection_parsinlu_precision":0.7237713267,"paraphrase-detection_parsinlu_recall":0.6720930233,"paraphrase-detection_parsinlu_fscore":0.6220242152,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8576776974,"paraphrase-detection_parsinlu_recall_modified":0.819624643,"paraphrase-detection_parsinlu_fscore_modified":0.8275649186,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8576776974,"paraphrase-detection_parsinlu_recall":0.819624643,"paraphrase-detection_parsinlu_fscore":0.8275649186,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.57,"paraphrase-detection_parsinlu_precision_modified":0.285,"paraphrase-detection_parsinlu_recall_modified":0.5,"paraphrase-detection_parsinlu_fscore_modified":0.3630573248,"paraphrase-detection_parsinlu_acc":0.57,"paraphrase-detection_parsinlu_precision":0.285,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3630573248,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.728,"paraphrase-detection_parsinlu_precision_modified":0.7567556588,"paraphrase-detection_parsinlu_recall_modified":0.7050900303,"paraphrase-detection_parsinlu_fscore_modified":0.7106739724,"paraphrase-detection_parsinlu_acc":0.7895878525,"paraphrase-detection_parsinlu_precision":0.8207762026,"paraphrase-detection_parsinlu_recall":0.7647397292,"paraphrase-detection_parsinlu_fscore":0.7707960655,"paraphrase-detection_parsinlu_valid_output_ratio":0.922,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.66,"paraphrase-detection_parsinlu_precision_modified":0.7933390651,"paraphrase-detection_parsinlu_recall_modified":0.6057935537,"paraphrase-detection_parsinlu_fscore_modified":0.5625411726,"paraphrase-detection_parsinlu_acc":0.66,"paraphrase-detection_parsinlu_precision":0.7933390651,"paraphrase-detection_parsinlu_recall":0.6057935537,"paraphrase-detection_parsinlu_fscore":0.5625411726,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.796,"paraphrase-detection_parsinlu_precision_modified":0.8275307297,"paraphrase-detection_parsinlu_recall_modified":0.7707807282,"paraphrase-detection_parsinlu_fscore_modified":0.7792394123,"paraphrase-detection_parsinlu_acc":0.8139059305,"paraphrase-detection_parsinlu_precision":0.8461459404,"paraphrase-detection_parsinlu_recall":0.788119354,"paraphrase-detection_parsinlu_fscore":0.7967683152,"paraphrase-detection_parsinlu_valid_output_ratio":0.978,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.878,"paraphrase-detection_parsinlu_precision_modified":0.8823390152,"paraphrase-detection_parsinlu_recall_modified":0.8689922481,"paraphrase-detection_parsinlu_fscore_modified":0.8736280355,"paraphrase-detection_parsinlu_acc":0.878,"paraphrase-detection_parsinlu_precision":0.8823390152,"paraphrase-detection_parsinlu_recall":0.8689922481,"paraphrase-detection_parsinlu_fscore":0.8736280355,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.864,"paraphrase-detection_parsinlu_precision_modified":0.8615596015,"paraphrase-detection_parsinlu_recall_modified":0.8607099143,"paraphrase-detection_parsinlu_fscore_modified":0.8611201882,"paraphrase-detection_parsinlu_acc":0.864,"paraphrase-detection_parsinlu_precision":0.8615596015,"paraphrase-detection_parsinlu_recall":0.8607099143,"paraphrase-detection_parsinlu_fscore":0.8611201882,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.798,"paraphrase-detection_parsinlu_precision_modified":0.8383696273,"paraphrase-detection_parsinlu_recall_modified":0.7708282334,"paraphrase-detection_parsinlu_fscore_modified":0.7777278949,"paraphrase-detection_parsinlu_acc":0.798,"paraphrase-detection_parsinlu_precision":0.8383696273,"paraphrase-detection_parsinlu_recall":0.7708282334,"paraphrase-detection_parsinlu_fscore":0.7777278949,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.848,"paraphrase-detection_parsinlu_precision_modified":0.8717792656,"paraphrase-detection_parsinlu_recall_modified":0.8289677683,"paraphrase-detection_parsinlu_fscore_modified":0.8376928465,"paraphrase-detection_parsinlu_acc":0.848,"paraphrase-detection_parsinlu_precision":0.8717792656,"paraphrase-detection_parsinlu_recall":0.8289677683,"paraphrase-detection_parsinlu_fscore":0.8376928465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.844,"paraphrase-detection_parsinlu_precision_modified":0.8671683358,"paraphrase-detection_parsinlu_recall_modified":0.8248878009,"paraphrase-detection_parsinlu_fscore_modified":0.8334216056,"paraphrase-detection_parsinlu_acc":0.844,"paraphrase-detection_parsinlu_precision":0.8671683358,"paraphrase-detection_parsinlu_recall":0.8248878009,"paraphrase-detection_parsinlu_fscore":0.8334216056,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.834,"paraphrase-detection_parsinlu_precision_modified":0.8710205993,"paraphrase-detection_parsinlu_recall_modified":0.8104039168,"paraphrase-detection_parsinlu_fscore_modified":0.8196585245,"paraphrase-detection_parsinlu_acc":0.834,"paraphrase-detection_parsinlu_precision":0.8710205993,"paraphrase-detection_parsinlu_recall":0.8104039168,"paraphrase-detection_parsinlu_fscore":0.8196585245,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.792,"paraphrase-detection_parsinlu_precision_modified":0.8370839341,"paraphrase-detection_parsinlu_recall_modified":0.7632802938,"paraphrase-detection_parsinlu_fscore_modified":0.7695689166,"paraphrase-detection_parsinlu_acc":0.792,"paraphrase-detection_parsinlu_precision":0.8370839341,"paraphrase-detection_parsinlu_recall":0.7632802938,"paraphrase-detection_parsinlu_fscore":0.7695689166,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8416530278,"paraphrase-detection_parsinlu_recall_modified":0.8270501836,"paraphrase-detection_parsinlu_fscore_modified":0.8316645261,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8416530278,"paraphrase-detection_parsinlu_recall":0.8270501836,"paraphrase-detection_parsinlu_fscore":0.8316645261,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.0,"paraphrase-detection_parsinlu_precision_modified":0.0,"paraphrase-detection_parsinlu_recall_modified":0.0,"paraphrase-detection_parsinlu_fscore_modified":0.0,"paraphrase-detection_parsinlu_acc":0.0,"paraphrase-detection_parsinlu_precision":0.0,"paraphrase-detection_parsinlu_recall":0.0,"paraphrase-detection_parsinlu_fscore":0.0,"paraphrase-detection_parsinlu_valid_output_ratio":0.0,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.744,"paraphrase-detection_parsinlu_precision_modified":0.7517357255,"paraphrase-detection_parsinlu_recall_modified":0.7357334824,"paraphrase-detection_parsinlu_fscore_modified":0.7395,"paraphrase-detection_parsinlu_acc":0.8416289593,"paraphrase-detection_parsinlu_precision":0.85037978,"paraphrase-detection_parsinlu_recall":0.832277695,"paraphrase-detection_parsinlu_fscore":0.8365384615,"paraphrase-detection_parsinlu_valid_output_ratio":0.884,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.861267166,"paraphrase-detection_parsinlu_recall_modified":0.802243982,"paraphrase-detection_parsinlu_fscore_modified":0.8109673691,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.861267166,"paraphrase-detection_parsinlu_recall":0.802243982,"paraphrase-detection_parsinlu_fscore":0.8109673691,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.88,"paraphrase-detection_parsinlu_precision_modified":0.8945054945,"paraphrase-detection_parsinlu_recall_modified":0.8661770706,"paraphrase-detection_parsinlu_fscore_modified":0.8738965952,"paraphrase-detection_parsinlu_acc":0.88,"paraphrase-detection_parsinlu_precision":0.8945054945,"paraphrase-detection_parsinlu_recall":0.8661770706,"paraphrase-detection_parsinlu_fscore":0.8738965952,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.888,"paraphrase-detection_parsinlu_precision_modified":0.8885737506,"paraphrase-detection_parsinlu_recall_modified":0.8823337413,"paraphrase-detection_parsinlu_fscore_modified":0.8849043884,"paraphrase-detection_parsinlu_acc":0.888,"paraphrase-detection_parsinlu_precision":0.8885737506,"paraphrase-detection_parsinlu_recall":0.8823337413,"paraphrase-detection_parsinlu_fscore":0.8849043884,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.274,"paraphrase-detection_parsinlu_precision_modified":0.2627667984,"paraphrase-detection_parsinlu_recall_modified":0.2313142857,"paraphrase-detection_parsinlu_fscore_modified":0.2414462011,"paraphrase-detection_parsinlu_acc":0.85625,"paraphrase-detection_parsinlu_precision":0.8211462451,"paraphrase-detection_parsinlu_recall":0.7228571429,"paraphrase-detection_parsinlu_fscore":0.7545193783,"paraphrase-detection_parsinlu_valid_output_ratio":0.32,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.856,"paraphrase-detection_parsinlu_precision_modified":0.8688030303,"paraphrase-detection_parsinlu_recall_modified":0.8427539068,"paraphrase-detection_parsinlu_fscore_modified":0.8496988866,"paraphrase-detection_parsinlu_acc":0.8681541582,"paraphrase-detection_parsinlu_precision":0.881138976,"paraphrase-detection_parsinlu_recall":0.8547199866,"paraphrase-detection_parsinlu_fscore":0.8617635767,"paraphrase-detection_parsinlu_valid_output_ratio":0.986,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.764,"paraphrase-detection_parsinlu_precision_modified":0.8406006212,"paraphrase-detection_parsinlu_recall_modified":0.7272949816,"paraphrase-detection_parsinlu_fscore_modified":0.7279551449,"paraphrase-detection_parsinlu_acc":0.764,"paraphrase-detection_parsinlu_precision":0.8406006212,"paraphrase-detection_parsinlu_recall":0.7272949816,"paraphrase-detection_parsinlu_fscore":0.7279551449,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.764,"paraphrase-detection_parsinlu_precision_modified":0.7656475469,"paraphrase-detection_parsinlu_recall_modified":0.7528414743,"paraphrase-detection_parsinlu_fscore_modified":0.756438131,"paraphrase-detection_parsinlu_acc":0.7925311203,"paraphrase-detection_parsinlu_precision":0.7942401938,"paraphrase-detection_parsinlu_recall":0.7809558862,"paraphrase-detection_parsinlu_fscore":0.7846868579,"paraphrase-detection_parsinlu_valid_output_ratio":0.964,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.072,"paraphrase-detection_parsinlu_precision_modified":0.036,"paraphrase-detection_parsinlu_recall_modified":0.066,"paraphrase-detection_parsinlu_fscore_modified":0.0465882353,"paraphrase-detection_parsinlu_acc":0.5454545455,"paraphrase-detection_parsinlu_precision":0.2727272727,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3529411765,"paraphrase-detection_parsinlu_valid_output_ratio":0.132,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.804,"paraphrase-detection_parsinlu_precision_modified":0.8556500813,"paraphrase-detection_parsinlu_recall_modified":0.7749490004,"paraphrase-detection_parsinlu_fscore_modified":0.7822570611,"paraphrase-detection_parsinlu_acc":0.804,"paraphrase-detection_parsinlu_precision":0.8556500813,"paraphrase-detection_parsinlu_recall":0.7749490004,"paraphrase-detection_parsinlu_fscore":0.7822570611,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.672,"paraphrase-detection_parsinlu_precision_modified":0.7868195779,"paraphrase-detection_parsinlu_recall_modified":0.6265486372,"paraphrase-detection_parsinlu_fscore_modified":0.5973050157,"paraphrase-detection_parsinlu_acc":0.6801619433,"paraphrase-detection_parsinlu_precision":0.796376091,"paraphrase-detection_parsinlu_recall":0.6341585396,"paraphrase-detection_parsinlu_fscore":0.6045597325,"paraphrase-detection_parsinlu_valid_output_ratio":0.988,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8806824921,"paraphrase-detection_parsinlu_recall_modified":0.8666666667,"paraphrase-detection_parsinlu_fscore_modified":0.871456768,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8806824921,"paraphrase-detection_parsinlu_recall":0.8666666667,"paraphrase-detection_parsinlu_fscore":0.871456768,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.812,"paraphrase-detection_parsinlu_precision_modified":0.8560421519,"paraphrase-detection_parsinlu_recall_modified":0.7853480321,"paraphrase-detection_parsinlu_fscore_modified":0.7936411239,"paraphrase-detection_parsinlu_acc":0.8152610442,"paraphrase-detection_parsinlu_precision":0.8594800722,"paraphrase-detection_parsinlu_recall":0.7885020403,"paraphrase-detection_parsinlu_fscore":0.7968284376,"paraphrase-detection_parsinlu_valid_output_ratio":0.996,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.866,"paraphrase-detection_parsinlu_precision_modified":0.8771198269,"paraphrase-detection_parsinlu_recall_modified":0.852753978,"paraphrase-detection_parsinlu_fscore_modified":0.8595688134,"paraphrase-detection_parsinlu_acc":0.866,"paraphrase-detection_parsinlu_precision":0.8771198269,"paraphrase-detection_parsinlu_recall":0.852753978,"paraphrase-detection_parsinlu_fscore":0.8595688134,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.828,"paraphrase-detection_parsinlu_precision_modified":0.8504577445,"paraphrase-detection_parsinlu_recall_modified":0.807996736,"paraphrase-detection_parsinlu_fscore_modified":0.8159372646,"paraphrase-detection_parsinlu_acc":0.828,"paraphrase-detection_parsinlu_precision":0.8504577445,"paraphrase-detection_parsinlu_recall":0.807996736,"paraphrase-detection_parsinlu_fscore":0.8159372646,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.576,"paraphrase-detection_parsinlu_precision_modified":0.6352777945,"paraphrase-detection_parsinlu_recall_modified":0.6097919217,"paraphrase-detection_parsinlu_fscore_modified":0.5654241624,"paraphrase-detection_parsinlu_acc":0.576,"paraphrase-detection_parsinlu_precision":0.6352777945,"paraphrase-detection_parsinlu_recall":0.6097919217,"paraphrase-detection_parsinlu_fscore":0.5654241624,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.872,"paraphrase-detection_parsinlu_precision_modified":0.872593554,"paraphrase-detection_parsinlu_recall_modified":0.8654426765,"paraphrase-detection_parsinlu_fscore_modified":0.8682824025,"paraphrase-detection_parsinlu_acc":0.872,"paraphrase-detection_parsinlu_precision":0.872593554,"paraphrase-detection_parsinlu_recall":0.8654426765,"paraphrase-detection_parsinlu_fscore":0.8682824025,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.85,"paraphrase-detection_parsinlu_precision_modified":0.8811252269,"paraphrase-detection_parsinlu_recall_modified":0.8290085679,"paraphrase-detection_parsinlu_fscore_modified":0.8385784573,"paraphrase-detection_parsinlu_acc":0.85,"paraphrase-detection_parsinlu_precision":0.8811252269,"paraphrase-detection_parsinlu_recall":0.8290085679,"paraphrase-detection_parsinlu_fscore":0.8385784573,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/persian_csr.jsonl CHANGED
@@ -37,6 +37,7 @@
37
  {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.5193635545,"acc_strict":0.5192634844,"donyaeeqtesad_acc":0.5377643505,"isna_acc":0.4784313725,"ninisite_article_acc":0.5609756098,"virgool_4_acc":0.5407854985,"khabaronline_acc":0.456,"digiato_acc":0.5209205021,"doctoreto_acc":0.55,"sarzamindownload_acc":0.5555555556,"hamgardi_acc":0.4896755162,"bigbangpage_acc":0.5350318471,"wiki_ahlolbait_acc":0.6052631579,"virgool_3_acc":0.528358209,"virgool_2_acc":0.5412844037,"virgool_1_acc":0.5316455696,"hamshahrionline_acc":0.5328947368,"tabnak_acc":0.5394190871,"alibaba_acc":0.5653594771,"digikala_mag_acc":0.5210420842,"yjc_acc":0.4482758621,"beytoote_acc":0.512465374,"asriran_acc":0.4927536232,"ecoiran_acc":0.4698412698,"hawzah_acc":0.5056179775,"zoomit_acc":0.5563380282,"wikipedia_acc":0.580952381,"namnak_acc":0.4904632153,"khodro45_acc":0.6029411765,"fidibo_acc":0.5594713656,"newmiind_acc":0.4826388889,"taaghche_acc":0.4580645161,"motamem_acc":0.6,"varzesh3_acc":0.4832214765,"mehrnews_acc":0.4717741935,"tasnim_acc":0.5250965251,"magerta_acc":0.5084033613,"radiokodak_book_acc":0.3913043478,"vipofilm_acc":0.6153846154,"wikishia_acc":0.5454545455,"voolak_acc":0.3953488372,"farsroid_acc":0.5526315789,"parsiday_acc":0.4083333333,"soft98_acc":0.5,"ninisite_discussion_acc":0.4}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.5033,"acc_strict":0.5033,"donyaeeqtesad_acc":0.4954682779,"isna_acc":0.48046875,"ninisite_article_acc":0.4810810811,"virgool_4_acc":0.5256797583,"khabaronline_acc":0.504,"digiato_acc":0.5073068894,"doctoreto_acc":0.615,"sarzamindownload_acc":0.4901960784,"hamgardi_acc":0.4601769912,"bigbangpage_acc":0.5414012739,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.5731343284,"virgool_2_acc":0.5565749235,"virgool_1_acc":0.5094936709,"hamshahrionline_acc":0.4655737705,"tabnak_acc":0.5145228216,"alibaba_acc":0.5098039216,"digikala_mag_acc":0.5230460922,"yjc_acc":0.5114942529,"beytoote_acc":0.4764542936,"asriran_acc":0.4782608696,"ecoiran_acc":0.4253968254,"hawzah_acc":0.5028089888,"zoomit_acc":0.5328638498,"wikipedia_acc":0.6047619048,"namnak_acc":0.4795640327,"khodro45_acc":0.6102941176,"fidibo_acc":0.550660793,"newmiind_acc":0.4895833333,"taaghche_acc":0.5064102564,"motamem_acc":0.5894736842,"varzesh3_acc":0.3913043478,"mehrnews_acc":0.439516129,"tasnim_acc":0.4807692308,"magerta_acc":0.5546218487,"radiokodak_book_acc":0.347826087,"vipofilm_acc":0.5384615385,"wikishia_acc":0.5454545455,"voolak_acc":0.488372093,"farsroid_acc":0.5263157895,"parsiday_acc":0.3083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
39
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.4832,"acc_strict":0.4832,"donyaeeqtesad_acc":0.498489426,"isna_acc":0.44140625,"ninisite_article_acc":0.4486486486,"virgool_4_acc":0.5075528701,"khabaronline_acc":0.504,"digiato_acc":0.4822546973,"doctoreto_acc":0.5,"sarzamindownload_acc":0.4117647059,"hamgardi_acc":0.5250737463,"bigbangpage_acc":0.5031847134,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.4985074627,"virgool_2_acc":0.4495412844,"virgool_1_acc":0.5063291139,"hamshahrionline_acc":0.5344262295,"tabnak_acc":0.4605809129,"alibaba_acc":0.5032679739,"digikala_mag_acc":0.4729458918,"yjc_acc":0.4482758621,"beytoote_acc":0.4903047091,"asriran_acc":0.4734299517,"ecoiran_acc":0.419047619,"hawzah_acc":0.4831460674,"zoomit_acc":0.5305164319,"wikipedia_acc":0.5666666667,"namnak_acc":0.4931880109,"khodro45_acc":0.5294117647,"fidibo_acc":0.4801762115,"newmiind_acc":0.4479166667,"taaghche_acc":0.4230769231,"motamem_acc":0.6421052632,"varzesh3_acc":0.4515050167,"mehrnews_acc":0.4072580645,"tasnim_acc":0.5,"magerta_acc":0.4453781513,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.5384615385,"wikishia_acc":0.6363636364,"voolak_acc":0.4651162791,"farsroid_acc":0.4736842105,"parsiday_acc":0.35,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
 
40
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.3015,"acc_strict":0.3011,"donyaeeqtesad_acc":0.2990936556,"isna_acc":0.29296875,"ninisite_article_acc":0.2864864865,"virgool_4_acc":0.2839879154,"khabaronline_acc":0.276,"digiato_acc":0.2922755741,"doctoreto_acc":0.345,"sarzamindownload_acc":0.3267973856,"hamgardi_acc":0.3215339233,"bigbangpage_acc":0.2547770701,"wiki_ahlolbait_acc":0.2894736842,"virgool_3_acc":0.3313432836,"virgool_2_acc":0.3058103976,"virgool_1_acc":0.3512658228,"hamshahrionline_acc":0.262295082,"tabnak_acc":0.3278008299,"alibaba_acc":0.3235294118,"digikala_mag_acc":0.3206412826,"yjc_acc":0.2816091954,"beytoote_acc":0.2991689751,"asriran_acc":0.3188405797,"ecoiran_acc":0.2698412698,"hawzah_acc":0.3174157303,"zoomit_acc":0.3028169014,"wikipedia_acc":0.3380952381,"namnak_acc":0.2888283379,"khodro45_acc":0.3308823529,"fidibo_acc":0.3259911894,"newmiind_acc":0.2916666667,"taaghche_acc":0.2371794872,"motamem_acc":0.4,"varzesh3_acc":0.2240802676,"mehrnews_acc":0.2459677419,"tasnim_acc":0.3346153846,"magerta_acc":0.3361344538,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.4615384615,"wikishia_acc":0.2727272727,"voolak_acc":0.4418604651,"farsroid_acc":0.1578947368,"parsiday_acc":0.2083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
41
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2599,"acc_strict":0.2599,"donyaeeqtesad_acc":0.2719033233,"isna_acc":0.25,"ninisite_article_acc":0.2567567568,"virgool_4_acc":0.3202416918,"khabaronline_acc":0.276,"digiato_acc":0.2630480167,"doctoreto_acc":0.245,"sarzamindownload_acc":0.2418300654,"hamgardi_acc":0.2979351032,"bigbangpage_acc":0.2484076433,"wiki_ahlolbait_acc":0.2631578947,"virgool_3_acc":0.2507462687,"virgool_2_acc":0.247706422,"virgool_1_acc":0.2594936709,"hamshahrionline_acc":0.2852459016,"tabnak_acc":0.2489626556,"alibaba_acc":0.2712418301,"digikala_mag_acc":0.2705410822,"yjc_acc":0.275862069,"beytoote_acc":0.2603878116,"asriran_acc":0.2608695652,"ecoiran_acc":0.2634920635,"hawzah_acc":0.2724719101,"zoomit_acc":0.2511737089,"wikipedia_acc":0.2857142857,"namnak_acc":0.2098092643,"khodro45_acc":0.2720588235,"fidibo_acc":0.2466960352,"newmiind_acc":0.2222222222,"taaghche_acc":0.25,"motamem_acc":0.2947368421,"varzesh3_acc":0.2441471572,"mehrnews_acc":0.25,"tasnim_acc":0.2692307692,"magerta_acc":0.2352941176,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.3846153846,"wikishia_acc":0.1515151515,"voolak_acc":0.2558139535,"farsroid_acc":0.2631578947,"parsiday_acc":0.1916666667,"soft98_acc":0.1,"ninisite_discussion_acc":0.4}
42
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.2521,"acc_strict":0.2517,"donyaeeqtesad_acc":0.2779456193,"isna_acc":0.29296875,"ninisite_article_acc":0.2594594595,"virgool_4_acc":0.2235649547,"khabaronline_acc":0.2,"digiato_acc":0.24217119,"doctoreto_acc":0.24,"sarzamindownload_acc":0.2352941176,"hamgardi_acc":0.2684365782,"bigbangpage_acc":0.2802547771,"wiki_ahlolbait_acc":0.2368421053,"virgool_3_acc":0.2298507463,"virgool_2_acc":0.2599388379,"virgool_1_acc":0.2689873418,"hamshahrionline_acc":0.2327868852,"tabnak_acc":0.2697095436,"alibaba_acc":0.2124183007,"digikala_mag_acc":0.246492986,"yjc_acc":0.2586206897,"beytoote_acc":0.2631578947,"asriran_acc":0.2898550725,"ecoiran_acc":0.2603174603,"hawzah_acc":0.2556179775,"zoomit_acc":0.2887323944,"wikipedia_acc":0.2238095238,"namnak_acc":0.2561307902,"khodro45_acc":0.25,"fidibo_acc":0.2202643172,"newmiind_acc":0.25,"taaghche_acc":0.2692307692,"motamem_acc":0.2842105263,"varzesh3_acc":0.2107023411,"mehrnews_acc":0.2338709677,"tasnim_acc":0.2307692308,"magerta_acc":0.3235294118,"radiokodak_book_acc":0.1739130435,"vipofilm_acc":0.4615384615,"wikishia_acc":0.3333333333,"voolak_acc":0.2790697674,"farsroid_acc":0.2368421053,"parsiday_acc":0.1833333333,"soft98_acc":0.3,"ninisite_discussion_acc":0.5}
 
37
  {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.5193635545,"acc_strict":0.5192634844,"donyaeeqtesad_acc":0.5377643505,"isna_acc":0.4784313725,"ninisite_article_acc":0.5609756098,"virgool_4_acc":0.5407854985,"khabaronline_acc":0.456,"digiato_acc":0.5209205021,"doctoreto_acc":0.55,"sarzamindownload_acc":0.5555555556,"hamgardi_acc":0.4896755162,"bigbangpage_acc":0.5350318471,"wiki_ahlolbait_acc":0.6052631579,"virgool_3_acc":0.528358209,"virgool_2_acc":0.5412844037,"virgool_1_acc":0.5316455696,"hamshahrionline_acc":0.5328947368,"tabnak_acc":0.5394190871,"alibaba_acc":0.5653594771,"digikala_mag_acc":0.5210420842,"yjc_acc":0.4482758621,"beytoote_acc":0.512465374,"asriran_acc":0.4927536232,"ecoiran_acc":0.4698412698,"hawzah_acc":0.5056179775,"zoomit_acc":0.5563380282,"wikipedia_acc":0.580952381,"namnak_acc":0.4904632153,"khodro45_acc":0.6029411765,"fidibo_acc":0.5594713656,"newmiind_acc":0.4826388889,"taaghche_acc":0.4580645161,"motamem_acc":0.6,"varzesh3_acc":0.4832214765,"mehrnews_acc":0.4717741935,"tasnim_acc":0.5250965251,"magerta_acc":0.5084033613,"radiokodak_book_acc":0.3913043478,"vipofilm_acc":0.6153846154,"wikishia_acc":0.5454545455,"voolak_acc":0.3953488372,"farsroid_acc":0.5526315789,"parsiday_acc":0.4083333333,"soft98_acc":0.5,"ninisite_discussion_acc":0.4}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.5033,"acc_strict":0.5033,"donyaeeqtesad_acc":0.4954682779,"isna_acc":0.48046875,"ninisite_article_acc":0.4810810811,"virgool_4_acc":0.5256797583,"khabaronline_acc":0.504,"digiato_acc":0.5073068894,"doctoreto_acc":0.615,"sarzamindownload_acc":0.4901960784,"hamgardi_acc":0.4601769912,"bigbangpage_acc":0.5414012739,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.5731343284,"virgool_2_acc":0.5565749235,"virgool_1_acc":0.5094936709,"hamshahrionline_acc":0.4655737705,"tabnak_acc":0.5145228216,"alibaba_acc":0.5098039216,"digikala_mag_acc":0.5230460922,"yjc_acc":0.5114942529,"beytoote_acc":0.4764542936,"asriran_acc":0.4782608696,"ecoiran_acc":0.4253968254,"hawzah_acc":0.5028089888,"zoomit_acc":0.5328638498,"wikipedia_acc":0.6047619048,"namnak_acc":0.4795640327,"khodro45_acc":0.6102941176,"fidibo_acc":0.550660793,"newmiind_acc":0.4895833333,"taaghche_acc":0.5064102564,"motamem_acc":0.5894736842,"varzesh3_acc":0.3913043478,"mehrnews_acc":0.439516129,"tasnim_acc":0.4807692308,"magerta_acc":0.5546218487,"radiokodak_book_acc":0.347826087,"vipofilm_acc":0.5384615385,"wikishia_acc":0.5454545455,"voolak_acc":0.488372093,"farsroid_acc":0.5263157895,"parsiday_acc":0.3083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
39
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.4832,"acc_strict":0.4832,"donyaeeqtesad_acc":0.498489426,"isna_acc":0.44140625,"ninisite_article_acc":0.4486486486,"virgool_4_acc":0.5075528701,"khabaronline_acc":0.504,"digiato_acc":0.4822546973,"doctoreto_acc":0.5,"sarzamindownload_acc":0.4117647059,"hamgardi_acc":0.5250737463,"bigbangpage_acc":0.5031847134,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.4985074627,"virgool_2_acc":0.4495412844,"virgool_1_acc":0.5063291139,"hamshahrionline_acc":0.5344262295,"tabnak_acc":0.4605809129,"alibaba_acc":0.5032679739,"digikala_mag_acc":0.4729458918,"yjc_acc":0.4482758621,"beytoote_acc":0.4903047091,"asriran_acc":0.4734299517,"ecoiran_acc":0.419047619,"hawzah_acc":0.4831460674,"zoomit_acc":0.5305164319,"wikipedia_acc":0.5666666667,"namnak_acc":0.4931880109,"khodro45_acc":0.5294117647,"fidibo_acc":0.4801762115,"newmiind_acc":0.4479166667,"taaghche_acc":0.4230769231,"motamem_acc":0.6421052632,"varzesh3_acc":0.4515050167,"mehrnews_acc":0.4072580645,"tasnim_acc":0.5,"magerta_acc":0.4453781513,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.5384615385,"wikishia_acc":0.6363636364,"voolak_acc":0.4651162791,"farsroid_acc":0.4736842105,"parsiday_acc":0.35,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
40
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8000000000","source_type":"Open-Source","acc":0.3756,"acc_strict":0.3756,"donyaeeqtesad_acc":0.3021148036,"isna_acc":0.296875,"ninisite_article_acc":0.4216216216,"virgool_4_acc":0.4894259819,"khabaronline_acc":0.336,"digiato_acc":0.3736951983,"doctoreto_acc":0.405,"sarzamindownload_acc":0.3921568627,"hamgardi_acc":0.3805309735,"bigbangpage_acc":0.4904458599,"wiki_ahlolbait_acc":0.2697368421,"virgool_3_acc":0.4358208955,"virgool_2_acc":0.3944954128,"virgool_1_acc":0.3955696203,"hamshahrionline_acc":0.393442623,"tabnak_acc":0.3692946058,"alibaba_acc":0.4019607843,"digikala_mag_acc":0.4028056112,"yjc_acc":0.3275862069,"beytoote_acc":0.4265927978,"asriran_acc":0.3381642512,"ecoiran_acc":0.2793650794,"hawzah_acc":0.2808988764,"zoomit_acc":0.4389671362,"wikipedia_acc":0.4142857143,"namnak_acc":0.4114441417,"khodro45_acc":0.4044117647,"fidibo_acc":0.4185022026,"newmiind_acc":0.3923611111,"taaghche_acc":0.3653846154,"motamem_acc":0.5052631579,"varzesh3_acc":0.27090301,"mehrnews_acc":0.3306451613,"tasnim_acc":0.2769230769,"magerta_acc":0.3655462185,"radiokodak_book_acc":0.2608695652,"vipofilm_acc":0.5384615385,"wikishia_acc":0.2727272727,"voolak_acc":0.3953488372,"farsroid_acc":0.3421052632,"parsiday_acc":0.2666666667,"soft98_acc":0.5,"ninisite_discussion_acc":0.5}
41
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.3015,"acc_strict":0.3011,"donyaeeqtesad_acc":0.2990936556,"isna_acc":0.29296875,"ninisite_article_acc":0.2864864865,"virgool_4_acc":0.2839879154,"khabaronline_acc":0.276,"digiato_acc":0.2922755741,"doctoreto_acc":0.345,"sarzamindownload_acc":0.3267973856,"hamgardi_acc":0.3215339233,"bigbangpage_acc":0.2547770701,"wiki_ahlolbait_acc":0.2894736842,"virgool_3_acc":0.3313432836,"virgool_2_acc":0.3058103976,"virgool_1_acc":0.3512658228,"hamshahrionline_acc":0.262295082,"tabnak_acc":0.3278008299,"alibaba_acc":0.3235294118,"digikala_mag_acc":0.3206412826,"yjc_acc":0.2816091954,"beytoote_acc":0.2991689751,"asriran_acc":0.3188405797,"ecoiran_acc":0.2698412698,"hawzah_acc":0.3174157303,"zoomit_acc":0.3028169014,"wikipedia_acc":0.3380952381,"namnak_acc":0.2888283379,"khodro45_acc":0.3308823529,"fidibo_acc":0.3259911894,"newmiind_acc":0.2916666667,"taaghche_acc":0.2371794872,"motamem_acc":0.4,"varzesh3_acc":0.2240802676,"mehrnews_acc":0.2459677419,"tasnim_acc":0.3346153846,"magerta_acc":0.3361344538,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.4615384615,"wikishia_acc":0.2727272727,"voolak_acc":0.4418604651,"farsroid_acc":0.1578947368,"parsiday_acc":0.2083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
42
  {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2599,"acc_strict":0.2599,"donyaeeqtesad_acc":0.2719033233,"isna_acc":0.25,"ninisite_article_acc":0.2567567568,"virgool_4_acc":0.3202416918,"khabaronline_acc":0.276,"digiato_acc":0.2630480167,"doctoreto_acc":0.245,"sarzamindownload_acc":0.2418300654,"hamgardi_acc":0.2979351032,"bigbangpage_acc":0.2484076433,"wiki_ahlolbait_acc":0.2631578947,"virgool_3_acc":0.2507462687,"virgool_2_acc":0.247706422,"virgool_1_acc":0.2594936709,"hamshahrionline_acc":0.2852459016,"tabnak_acc":0.2489626556,"alibaba_acc":0.2712418301,"digikala_mag_acc":0.2705410822,"yjc_acc":0.275862069,"beytoote_acc":0.2603878116,"asriran_acc":0.2608695652,"ecoiran_acc":0.2634920635,"hawzah_acc":0.2724719101,"zoomit_acc":0.2511737089,"wikipedia_acc":0.2857142857,"namnak_acc":0.2098092643,"khodro45_acc":0.2720588235,"fidibo_acc":0.2466960352,"newmiind_acc":0.2222222222,"taaghche_acc":0.25,"motamem_acc":0.2947368421,"varzesh3_acc":0.2441471572,"mehrnews_acc":0.25,"tasnim_acc":0.2692307692,"magerta_acc":0.2352941176,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.3846153846,"wikishia_acc":0.1515151515,"voolak_acc":0.2558139535,"farsroid_acc":0.2631578947,"parsiday_acc":0.1916666667,"soft98_acc":0.1,"ninisite_discussion_acc":0.4}
43
  {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.2521,"acc_strict":0.2517,"donyaeeqtesad_acc":0.2779456193,"isna_acc":0.29296875,"ninisite_article_acc":0.2594594595,"virgool_4_acc":0.2235649547,"khabaronline_acc":0.2,"digiato_acc":0.24217119,"doctoreto_acc":0.24,"sarzamindownload_acc":0.2352941176,"hamgardi_acc":0.2684365782,"bigbangpage_acc":0.2802547771,"wiki_ahlolbait_acc":0.2368421053,"virgool_3_acc":0.2298507463,"virgool_2_acc":0.2599388379,"virgool_1_acc":0.2689873418,"hamshahrionline_acc":0.2327868852,"tabnak_acc":0.2697095436,"alibaba_acc":0.2124183007,"digikala_mag_acc":0.246492986,"yjc_acc":0.2586206897,"beytoote_acc":0.2631578947,"asriran_acc":0.2898550725,"ecoiran_acc":0.2603174603,"hawzah_acc":0.2556179775,"zoomit_acc":0.2887323944,"wikipedia_acc":0.2238095238,"namnak_acc":0.2561307902,"khodro45_acc":0.25,"fidibo_acc":0.2202643172,"newmiind_acc":0.25,"taaghche_acc":0.2692307692,"motamem_acc":0.2842105263,"varzesh3_acc":0.2107023411,"mehrnews_acc":0.2338709677,"tasnim_acc":0.2307692308,"magerta_acc":0.3235294118,"radiokodak_book_acc":0.1739130435,"vipofilm_acc":0.4615384615,"wikishia_acc":0.3333333333,"voolak_acc":0.2790697674,"farsroid_acc":0.2368421053,"parsiday_acc":0.1833333333,"soft98_acc":0.3,"ninisite_discussion_acc":0.5}
leaderboard/boards_data/persian_nlg.jsonl CHANGED
@@ -34,6 +34,7 @@
34
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1669255457,"question-generation_PersianQA_rougeL_recall":0.2952488346,"question-generation_PersianQA_rougeL_f1_score":0.2007786564},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.111865825,"translation-en2fa_en2fa_epoque_bleu":0.2732051575,"translation-en2fa_en2fa_mizan_bleu":0.0868500698,"translation-en2fa_en2fa_quran_bleu":0.0529852068,"translation-en2fa_en2fa_sahife_bleu":0.0502293343,"translation-en2fa_en2fa_nahj_bleu":0.0322064942,"translation-en2fa_en2fa_tep_bleu":0.0404507778},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1137219495,"summarization_SamSUM-fa_rougeL_recall":0.3496708707,"summarization_SamSUM-fa_rougeL_f1_score":0.1628971148},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1249013271,"translation-fa2en_fa2en_tep_bleu":0.0564543942,"translation-fa2en_fa2en_mizan_bleu":0.0739643668,"translation-fa2en_fa2en_quran_bleu":0.0677317381,"translation-fa2en_fa2en_epoque_bleu":0.3111968032,"translation-fa2en_fa2en_nahj_bleu":0.0523541092,"translation-fa2en_fa2en_sahife_bleu":0.0479821907},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0439654732,"translation-ar2fa_ar2fa_sahife_bleu":0.0313283708,"translation-ar2fa_ar2fa_nahj_bleu":0.0308641232,"translation-ar2fa_ar2fa_quran_bleu":0.0690488581},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0833149314,"summarization_PnSummary_rougeL_recall":0.4027758903,"summarization_PnSummary_rougeL_f1_score":0.1338404051},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0058610145,"translation-fa2ar_fa2ar_nahj_bleu":0.0063998692,"translation-fa2ar_fa2ar_sahife_bleu":0.0068172489,"translation-fa2ar_fa2ar_quran_bleu":0.0043275898},"nlg_score":0.112015688}
35
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.0870939736,"question-generation_PersianQA_rougeL_recall":0.3600941065,"question-generation_PersianQA_rougeL_f1_score":0.1336375958},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.056370201,"translation-en2fa_en2fa_epoque_bleu":0.1154392548,"translation-en2fa_en2fa_mizan_bleu":0.0484324583,"translation-en2fa_en2fa_quran_bleu":0.0612465488,"translation-en2fa_en2fa_sahife_bleu":0.0466818991,"translation-en2fa_en2fa_nahj_bleu":0.0218444477,"translation-en2fa_en2fa_tep_bleu":0.0118186665},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1772724525,"summarization_SamSUM-fa_rougeL_recall":0.341583677,"summarization_SamSUM-fa_rougeL_f1_score":0.2233271064},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0691353117,"translation-fa2en_fa2en_tep_bleu":0.0320908261,"translation-fa2en_fa2en_mizan_bleu":0.0535229905,"translation-fa2en_fa2en_quran_bleu":0.0800143919,"translation-fa2en_fa2en_epoque_bleu":0.133977443,"translation-fa2en_fa2en_nahj_bleu":0.0362958954,"translation-fa2en_fa2en_sahife_bleu":0.0393317574},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0581992714,"translation-ar2fa_ar2fa_sahife_bleu":0.0540221076,"translation-ar2fa_ar2fa_nahj_bleu":0.0233017704,"translation-ar2fa_ar2fa_quran_bleu":0.095529061},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.132916538,"summarization_PnSummary_rougeL_recall":0.3579358655,"summarization_PnSummary_rougeL_f1_score":0.1887379797},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0331262129,"translation-fa2ar_fa2ar_nahj_bleu":0.0202107323,"translation-fa2ar_fa2ar_sahife_bleu":0.0280883311,"translation-fa2ar_fa2ar_quran_bleu":0.0510795752},"nlg_score":0.1089333827}
36
  {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2041596361,"question-generation_PersianQA_rougeL_recall":0.3456815337,"question-generation_PersianQA_rougeL_f1_score":0.2459732807},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0439502467,"translation-en2fa_en2fa_epoque_bleu":0.0932804064,"translation-en2fa_en2fa_mizan_bleu":0.0446467932,"translation-en2fa_en2fa_quran_bleu":0.0435800727,"translation-en2fa_en2fa_sahife_bleu":0.0197005921,"translation-en2fa_en2fa_nahj_bleu":0.0132822652,"translation-en2fa_en2fa_tep_bleu":0.0087342692},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1588367988,"summarization_SamSUM-fa_rougeL_recall":0.3735722635,"summarization_SamSUM-fa_rougeL_f1_score":0.2131671502},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0199585579,"translation-fa2en_fa2en_tep_bleu":0.0097804397,"translation-fa2en_fa2en_mizan_bleu":0.0144809896,"translation-fa2en_fa2en_quran_bleu":0.0259691427,"translation-fa2en_fa2en_epoque_bleu":0.0345304173,"translation-fa2en_fa2en_nahj_bleu":0.0150589625,"translation-fa2en_fa2en_sahife_bleu":0.0157047184},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0465792583,"translation-ar2fa_ar2fa_sahife_bleu":0.023795336,"translation-ar2fa_ar2fa_nahj_bleu":0.0121091058,"translation-ar2fa_ar2fa_quran_bleu":0.1021098256},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1124574222,"summarization_PnSummary_rougeL_recall":0.3717393409,"summarization_PnSummary_rougeL_f1_score":0.1673025553},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0100630648,"translation-fa2ar_fa2ar_nahj_bleu":0.0071647909,"translation-fa2ar_fa2ar_sahife_bleu":0.0101185743,"translation-fa2ar_fa2ar_quran_bleu":0.0129058292},"nlg_score":0.1067134448}
 
37
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1704020873,"question-generation_PersianQA_rougeL_recall":0.3000756202,"question-generation_PersianQA_rougeL_f1_score":0.2079039891},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0366912467,"translation-en2fa_en2fa_epoque_bleu":0.0623359898,"translation-en2fa_en2fa_mizan_bleu":0.0442763597,"translation-en2fa_en2fa_quran_bleu":0.0309309044,"translation-en2fa_en2fa_sahife_bleu":0.0330663757,"translation-en2fa_en2fa_nahj_bleu":0.0124767847,"translation-en2fa_en2fa_tep_bleu":0.0116612774},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1638274716,"summarization_SamSUM-fa_rougeL_recall":0.3535878882,"summarization_SamSUM-fa_rougeL_f1_score":0.2134854664},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.016856047,"translation-fa2en_fa2en_tep_bleu":0.0085125001,"translation-fa2en_fa2en_mizan_bleu":0.013661635,"translation-fa2en_fa2en_quran_bleu":0.0181666202,"translation-fa2en_fa2en_epoque_bleu":0.0301282339,"translation-fa2en_fa2en_nahj_bleu":0.0122360126,"translation-fa2en_fa2en_sahife_bleu":0.0110323989},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0186923531,"translation-ar2fa_ar2fa_sahife_bleu":0.0174521967,"translation-ar2fa_ar2fa_nahj_bleu":0.0097734226,"translation-ar2fa_ar2fa_quran_bleu":0.0284054936},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1095844839,"summarization_PnSummary_rougeL_recall":0.3735331299,"summarization_PnSummary_rougeL_f1_score":0.1645385252},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0067928767,"translation-fa2ar_fa2ar_nahj_bleu":0.0056689454,"translation-fa2ar_fa2ar_sahife_bleu":0.009024465,"translation-fa2ar_fa2ar_quran_bleu":0.0056852198},"nlg_score":0.0949943578}
38
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1127092702,"question-generation_PersianQA_rougeL_recall":0.2982763168,"question-generation_PersianQA_rougeL_f1_score":0.1525970768},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0472831089,"translation-en2fa_en2fa_epoque_bleu":0.0950858392,"translation-en2fa_en2fa_mizan_bleu":0.0348348322,"translation-en2fa_en2fa_quran_bleu":0.0417444578,"translation-en2fa_en2fa_sahife_bleu":0.044168541,"translation-en2fa_en2fa_nahj_bleu":0.0239185439,"translation-en2fa_en2fa_tep_bleu":0.0188699837},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1180795687,"summarization_SamSUM-fa_rougeL_recall":0.3922712004,"summarization_SamSUM-fa_rougeL_f1_score":0.170765794},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0901939948,"translation-fa2en_fa2en_tep_bleu":0.0521908916,"translation-fa2en_fa2en_mizan_bleu":0.0828690879,"translation-fa2en_fa2en_quran_bleu":0.0756298248,"translation-fa2en_fa2en_epoque_bleu":0.1645619674,"translation-fa2en_fa2en_nahj_bleu":0.048616237,"translation-fa2en_fa2en_sahife_bleu":0.0518842318},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0352516229,"translation-ar2fa_ar2fa_sahife_bleu":0.031818336,"translation-ar2fa_ar2fa_nahj_bleu":0.0219225394,"translation-ar2fa_ar2fa_quran_bleu":0.0513475391},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0921640152,"summarization_PnSummary_rougeL_recall":0.4401953868,"summarization_PnSummary_rougeL_f1_score":0.1480945013},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0167121698,"translation-fa2ar_fa2ar_nahj_bleu":0.0182214992,"translation-fa2ar_fa2ar_sahife_bleu":0.0203567578,"translation-fa2ar_fa2ar_quran_bleu":0.0115582526},"nlg_score":0.0944140383}
39
  {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1365997259,"question-generation_PersianQA_rougeL_recall":0.3257934111,"question-generation_PersianQA_rougeL_f1_score":0.1803398036},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0600629844,"translation-en2fa_en2fa_epoque_bleu":0.1538216141,"translation-en2fa_en2fa_mizan_bleu":0.0548001026,"translation-en2fa_en2fa_quran_bleu":0.0217436833,"translation-en2fa_en2fa_sahife_bleu":0.0205645274,"translation-en2fa_en2fa_nahj_bleu":0.0172427415,"translation-en2fa_en2fa_tep_bleu":0.0093260061},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1468362777,"summarization_SamSUM-fa_rougeL_recall":0.3858089513,"summarization_SamSUM-fa_rougeL_f1_score":0.2041300257},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0196181945,"translation-fa2en_fa2en_tep_bleu":0.0101636027,"translation-fa2en_fa2en_mizan_bleu":0.0153753718,"translation-fa2en_fa2en_quran_bleu":0.0231110679,"translation-fa2en_fa2en_epoque_bleu":0.0359429205,"translation-fa2en_fa2en_nahj_bleu":0.0119451943,"translation-fa2en_fa2en_sahife_bleu":0.0117936527},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0226935201,"translation-ar2fa_ar2fa_sahife_bleu":0.0196359142,"translation-ar2fa_ar2fa_nahj_bleu":0.010693835,"translation-ar2fa_ar2fa_quran_bleu":0.0371508269},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1072486168,"summarization_PnSummary_rougeL_recall":0.3966587345,"summarization_PnSummary_rougeL_f1_score":0.1627029568},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0086214593,"translation-fa2ar_fa2ar_nahj_bleu":0.006894051,"translation-fa2ar_fa2ar_sahife_bleu":0.009695506,"translation-fa2ar_fa2ar_quran_bleu":0.0092748209},"nlg_score":0.0940241349}
 
34
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1669255457,"question-generation_PersianQA_rougeL_recall":0.2952488346,"question-generation_PersianQA_rougeL_f1_score":0.2007786564},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.111865825,"translation-en2fa_en2fa_epoque_bleu":0.2732051575,"translation-en2fa_en2fa_mizan_bleu":0.0868500698,"translation-en2fa_en2fa_quran_bleu":0.0529852068,"translation-en2fa_en2fa_sahife_bleu":0.0502293343,"translation-en2fa_en2fa_nahj_bleu":0.0322064942,"translation-en2fa_en2fa_tep_bleu":0.0404507778},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1137219495,"summarization_SamSUM-fa_rougeL_recall":0.3496708707,"summarization_SamSUM-fa_rougeL_f1_score":0.1628971148},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1249013271,"translation-fa2en_fa2en_tep_bleu":0.0564543942,"translation-fa2en_fa2en_mizan_bleu":0.0739643668,"translation-fa2en_fa2en_quran_bleu":0.0677317381,"translation-fa2en_fa2en_epoque_bleu":0.3111968032,"translation-fa2en_fa2en_nahj_bleu":0.0523541092,"translation-fa2en_fa2en_sahife_bleu":0.0479821907},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0439654732,"translation-ar2fa_ar2fa_sahife_bleu":0.0313283708,"translation-ar2fa_ar2fa_nahj_bleu":0.0308641232,"translation-ar2fa_ar2fa_quran_bleu":0.0690488581},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0833149314,"summarization_PnSummary_rougeL_recall":0.4027758903,"summarization_PnSummary_rougeL_f1_score":0.1338404051},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0058610145,"translation-fa2ar_fa2ar_nahj_bleu":0.0063998692,"translation-fa2ar_fa2ar_sahife_bleu":0.0068172489,"translation-fa2ar_fa2ar_quran_bleu":0.0043275898},"nlg_score":0.112015688}
35
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.0870939736,"question-generation_PersianQA_rougeL_recall":0.3600941065,"question-generation_PersianQA_rougeL_f1_score":0.1336375958},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.056370201,"translation-en2fa_en2fa_epoque_bleu":0.1154392548,"translation-en2fa_en2fa_mizan_bleu":0.0484324583,"translation-en2fa_en2fa_quran_bleu":0.0612465488,"translation-en2fa_en2fa_sahife_bleu":0.0466818991,"translation-en2fa_en2fa_nahj_bleu":0.0218444477,"translation-en2fa_en2fa_tep_bleu":0.0118186665},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1772724525,"summarization_SamSUM-fa_rougeL_recall":0.341583677,"summarization_SamSUM-fa_rougeL_f1_score":0.2233271064},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0691353117,"translation-fa2en_fa2en_tep_bleu":0.0320908261,"translation-fa2en_fa2en_mizan_bleu":0.0535229905,"translation-fa2en_fa2en_quran_bleu":0.0800143919,"translation-fa2en_fa2en_epoque_bleu":0.133977443,"translation-fa2en_fa2en_nahj_bleu":0.0362958954,"translation-fa2en_fa2en_sahife_bleu":0.0393317574},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0581992714,"translation-ar2fa_ar2fa_sahife_bleu":0.0540221076,"translation-ar2fa_ar2fa_nahj_bleu":0.0233017704,"translation-ar2fa_ar2fa_quran_bleu":0.095529061},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.132916538,"summarization_PnSummary_rougeL_recall":0.3579358655,"summarization_PnSummary_rougeL_f1_score":0.1887379797},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0331262129,"translation-fa2ar_fa2ar_nahj_bleu":0.0202107323,"translation-fa2ar_fa2ar_sahife_bleu":0.0280883311,"translation-fa2ar_fa2ar_quran_bleu":0.0510795752},"nlg_score":0.1089333827}
36
  {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2041596361,"question-generation_PersianQA_rougeL_recall":0.3456815337,"question-generation_PersianQA_rougeL_f1_score":0.2459732807},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0439502467,"translation-en2fa_en2fa_epoque_bleu":0.0932804064,"translation-en2fa_en2fa_mizan_bleu":0.0446467932,"translation-en2fa_en2fa_quran_bleu":0.0435800727,"translation-en2fa_en2fa_sahife_bleu":0.0197005921,"translation-en2fa_en2fa_nahj_bleu":0.0132822652,"translation-en2fa_en2fa_tep_bleu":0.0087342692},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1588367988,"summarization_SamSUM-fa_rougeL_recall":0.3735722635,"summarization_SamSUM-fa_rougeL_f1_score":0.2131671502},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0199585579,"translation-fa2en_fa2en_tep_bleu":0.0097804397,"translation-fa2en_fa2en_mizan_bleu":0.0144809896,"translation-fa2en_fa2en_quran_bleu":0.0259691427,"translation-fa2en_fa2en_epoque_bleu":0.0345304173,"translation-fa2en_fa2en_nahj_bleu":0.0150589625,"translation-fa2en_fa2en_sahife_bleu":0.0157047184},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0465792583,"translation-ar2fa_ar2fa_sahife_bleu":0.023795336,"translation-ar2fa_ar2fa_nahj_bleu":0.0121091058,"translation-ar2fa_ar2fa_quran_bleu":0.1021098256},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1124574222,"summarization_PnSummary_rougeL_recall":0.3717393409,"summarization_PnSummary_rougeL_f1_score":0.1673025553},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0100630648,"translation-fa2ar_fa2ar_nahj_bleu":0.0071647909,"translation-fa2ar_fa2ar_sahife_bleu":0.0101185743,"translation-fa2ar_fa2ar_quran_bleu":0.0129058292},"nlg_score":0.1067134448}
37
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.0901454704,"question-generation_PersianQA_rougeL_recall":0.1786518031,"question-generation_PersianQA_rougeL_f1_score":0.1132710233},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1155717032,"translation-en2fa_en2fa_epoque_bleu":0.2832842339,"translation-en2fa_en2fa_mizan_bleu":0.0985255619,"translation-en2fa_en2fa_quran_bleu":0.0278717932,"translation-en2fa_en2fa_sahife_bleu":0.0436185703,"translation-en2fa_en2fa_nahj_bleu":0.0358442369,"translation-en2fa_en2fa_tep_bleu":0.0458078328},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1006745403,"summarization_SamSUM-fa_rougeL_recall":0.2023535874,"summarization_SamSUM-fa_rougeL_f1_score":0.1269407774},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1854877882,"translation-fa2en_fa2en_tep_bleu":0.1041087436,"translation-fa2en_fa2en_mizan_bleu":0.1522436467,"translation-fa2en_fa2en_quran_bleu":0.1409628655,"translation-fa2en_fa2en_epoque_bleu":0.391167392,"translation-fa2en_fa2en_nahj_bleu":0.0609865725,"translation-fa2en_fa2en_sahife_bleu":0.0800799314},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0316527189,"translation-ar2fa_ar2fa_sahife_bleu":0.0206548583,"translation-ar2fa_ar2fa_nahj_bleu":0.0436607335,"translation-ar2fa_ar2fa_quran_bleu":0.0312429658},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0725893065,"summarization_PnSummary_rougeL_recall":0.3020007897,"summarization_PnSummary_rougeL_f1_score":0.1133719008},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0385165149,"translation-fa2ar_fa2ar_nahj_bleu":0.0272291934,"translation-fa2ar_fa2ar_sahife_bleu":0.0471613083,"translation-fa2ar_fa2ar_quran_bleu":0.0411590431},"nlg_score":0.1035446324}
38
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1704020873,"question-generation_PersianQA_rougeL_recall":0.3000756202,"question-generation_PersianQA_rougeL_f1_score":0.2079039891},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0366912467,"translation-en2fa_en2fa_epoque_bleu":0.0623359898,"translation-en2fa_en2fa_mizan_bleu":0.0442763597,"translation-en2fa_en2fa_quran_bleu":0.0309309044,"translation-en2fa_en2fa_sahife_bleu":0.0330663757,"translation-en2fa_en2fa_nahj_bleu":0.0124767847,"translation-en2fa_en2fa_tep_bleu":0.0116612774},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1638274716,"summarization_SamSUM-fa_rougeL_recall":0.3535878882,"summarization_SamSUM-fa_rougeL_f1_score":0.2134854664},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.016856047,"translation-fa2en_fa2en_tep_bleu":0.0085125001,"translation-fa2en_fa2en_mizan_bleu":0.013661635,"translation-fa2en_fa2en_quran_bleu":0.0181666202,"translation-fa2en_fa2en_epoque_bleu":0.0301282339,"translation-fa2en_fa2en_nahj_bleu":0.0122360126,"translation-fa2en_fa2en_sahife_bleu":0.0110323989},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0186923531,"translation-ar2fa_ar2fa_sahife_bleu":0.0174521967,"translation-ar2fa_ar2fa_nahj_bleu":0.0097734226,"translation-ar2fa_ar2fa_quran_bleu":0.0284054936},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1095844839,"summarization_PnSummary_rougeL_recall":0.3735331299,"summarization_PnSummary_rougeL_f1_score":0.1645385252},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0067928767,"translation-fa2ar_fa2ar_nahj_bleu":0.0056689454,"translation-fa2ar_fa2ar_sahife_bleu":0.009024465,"translation-fa2ar_fa2ar_quran_bleu":0.0056852198},"nlg_score":0.0949943578}
39
  {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1127092702,"question-generation_PersianQA_rougeL_recall":0.2982763168,"question-generation_PersianQA_rougeL_f1_score":0.1525970768},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0472831089,"translation-en2fa_en2fa_epoque_bleu":0.0950858392,"translation-en2fa_en2fa_mizan_bleu":0.0348348322,"translation-en2fa_en2fa_quran_bleu":0.0417444578,"translation-en2fa_en2fa_sahife_bleu":0.044168541,"translation-en2fa_en2fa_nahj_bleu":0.0239185439,"translation-en2fa_en2fa_tep_bleu":0.0188699837},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1180795687,"summarization_SamSUM-fa_rougeL_recall":0.3922712004,"summarization_SamSUM-fa_rougeL_f1_score":0.170765794},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0901939948,"translation-fa2en_fa2en_tep_bleu":0.0521908916,"translation-fa2en_fa2en_mizan_bleu":0.0828690879,"translation-fa2en_fa2en_quran_bleu":0.0756298248,"translation-fa2en_fa2en_epoque_bleu":0.1645619674,"translation-fa2en_fa2en_nahj_bleu":0.048616237,"translation-fa2en_fa2en_sahife_bleu":0.0518842318},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0352516229,"translation-ar2fa_ar2fa_sahife_bleu":0.031818336,"translation-ar2fa_ar2fa_nahj_bleu":0.0219225394,"translation-ar2fa_ar2fa_quran_bleu":0.0513475391},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0921640152,"summarization_PnSummary_rougeL_recall":0.4401953868,"summarization_PnSummary_rougeL_f1_score":0.1480945013},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0167121698,"translation-fa2ar_fa2ar_nahj_bleu":0.0182214992,"translation-fa2ar_fa2ar_sahife_bleu":0.0203567578,"translation-fa2ar_fa2ar_quran_bleu":0.0115582526},"nlg_score":0.0944140383}
40
  {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1365997259,"question-generation_PersianQA_rougeL_recall":0.3257934111,"question-generation_PersianQA_rougeL_f1_score":0.1803398036},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0600629844,"translation-en2fa_en2fa_epoque_bleu":0.1538216141,"translation-en2fa_en2fa_mizan_bleu":0.0548001026,"translation-en2fa_en2fa_quran_bleu":0.0217436833,"translation-en2fa_en2fa_sahife_bleu":0.0205645274,"translation-en2fa_en2fa_nahj_bleu":0.0172427415,"translation-en2fa_en2fa_tep_bleu":0.0093260061},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1468362777,"summarization_SamSUM-fa_rougeL_recall":0.3858089513,"summarization_SamSUM-fa_rougeL_f1_score":0.2041300257},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0196181945,"translation-fa2en_fa2en_tep_bleu":0.0101636027,"translation-fa2en_fa2en_mizan_bleu":0.0153753718,"translation-fa2en_fa2en_quran_bleu":0.0231110679,"translation-fa2en_fa2en_epoque_bleu":0.0359429205,"translation-fa2en_fa2en_nahj_bleu":0.0119451943,"translation-fa2en_fa2en_sahife_bleu":0.0117936527},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0226935201,"translation-ar2fa_ar2fa_sahife_bleu":0.0196359142,"translation-ar2fa_ar2fa_nahj_bleu":0.010693835,"translation-ar2fa_ar2fa_quran_bleu":0.0371508269},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1072486168,"summarization_PnSummary_rougeL_recall":0.3966587345,"summarization_PnSummary_rougeL_f1_score":0.1627029568},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0086214593,"translation-fa2ar_fa2ar_nahj_bleu":0.006894051,"translation-fa2ar_fa2ar_sahife_bleu":0.009695506,"translation-fa2ar_fa2ar_quran_bleu":0.0092748209},"nlg_score":0.0940241349}
leaderboard/boards_data/persian_nlu.jsonl CHANGED
@@ -28,6 +28,7 @@
28
  {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7502696872,"sentiment-analysis_deepsentipers_precision_modified":0.7162099301,"sentiment-analysis_deepsentipers_recall_modified":0.7649004728,"sentiment-analysis_deepsentipers_fscore_modified":0.716460892,"sentiment-analysis_deepsentipers_acc":0.7502696872,"sentiment-analysis_deepsentipers_precision":0.7162099301,"sentiment-analysis_deepsentipers_recall":0.7649004728,"sentiment-analysis_deepsentipers_fscore":0.716460892,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9421493238,"sts_SynPerSTS_corrcoef":0.9421493238,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.5000495531,"ner_arman_precision_mean":0.4607965832,"ner_arman_recall_mean":0.5927493047},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.2600489896,"keyword-extraction_SynKeywords_precision_mean":0.2150796745,"keyword-extraction_SynKeywords_recall_mean":0.3497282609},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.347826087,"tone-classification_SynTone_precision_modified":0.4069335674,"tone-classification_SynTone_recall_modified":0.3420272309,"tone-classification_SynTone_fscore_modified":0.3323819164,"tone-classification_SynTone_acc":0.5333333333,"tone-classification_SynTone_precision":0.6239648033,"tone-classification_SynTone_recall":0.5244417541,"tone-classification_SynTone_fscore":0.5096522718,"tone-classification_SynTone_valid_output_ratio":0.652173913},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8643540763,"sts_FarSICK_corrcoef":0.8643540763,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7777777778,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8343364681,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7575227312,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7618590799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8152610442,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8745454545,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7940298507,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7985751802,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9540229885},"nli_farstail":{"nli_farstail_acc_modified":0.6086956522,"nli_farstail_precision_modified":0.6940003558,"nli_farstail_recall_modified":0.6092669096,"nli_farstail_fscore_modified":0.5908473619,"nli_farstail_acc":0.6110397946,"nli_farstail_precision":0.6966730144,"nli_farstail_recall":0.611613252,"nli_farstail_fscore":0.593122769,"nli_farstail_valid_output_ratio":0.9961636829},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.824,"paraphrase-detection_parsinlu_precision_modified":0.8599831541,"paraphrase-detection_parsinlu_recall_modified":0.7999184007,"paraphrase-detection_parsinlu_fscore_modified":0.8085591465,"paraphrase-detection_parsinlu_acc":0.824,"paraphrase-detection_parsinlu_precision":0.8599831541,"paraphrase-detection_parsinlu_recall":0.7999184007,"paraphrase-detection_parsinlu_fscore":0.8085591465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":1.4218009479,"extractive-qa_PQuAD_f1":0.6109462131},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.586,"topic-classification_sid_precision_modified":0.5883032084,"topic-classification_sid_recall_modified":0.4720717732,"topic-classification_sid_fscore_modified":0.4937437004,"topic-classification_sid_acc":0.586,"topic-classification_sid_precision":0.5883032084,"topic-classification_sid_recall":0.4720717732,"topic-classification_sid_fscore":0.4937437004,"topic-classification_sid_valid_output_ratio":1.0},"nlu_score":0.6255818412}
29
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.6817691478,"sentiment-analysis_deepsentipers_precision_modified":0.6546744642,"sentiment-analysis_deepsentipers_recall_modified":0.7378694789,"sentiment-analysis_deepsentipers_fscore_modified":0.6356142977,"sentiment-analysis_deepsentipers_acc":0.6817691478,"sentiment-analysis_deepsentipers_precision":0.6546744642,"sentiment-analysis_deepsentipers_recall":0.7378694789,"sentiment-analysis_deepsentipers_fscore":0.6356142977,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9198771683,"sts_SynPerSTS_corrcoef":0.9198771683,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.3839211973,"ner_arman_precision_mean":0.3292326466,"ner_arman_recall_mean":0.5049662296},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.229921048,"keyword-extraction_SynKeywords_precision_mean":0.21147343,"keyword-extraction_SynKeywords_recall_mean":0.2634963768},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.5031055901,"tone-classification_SynTone_precision_modified":0.5228364877,"tone-classification_SynTone_recall_modified":0.5168736971,"tone-classification_SynTone_fscore_modified":0.4644759375,"tone-classification_SynTone_acc":0.5094339623,"tone-classification_SynTone_precision":0.5294130473,"tone-classification_SynTone_recall":0.523375253,"tone-classification_SynTone_fscore":0.4703184021,"tone-classification_SynTone_valid_output_ratio":0.9875776398},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8471466571,"sts_FarSICK_corrcoef":0.8471466571,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.6361892583,"nli_farstail_precision_modified":0.6743240456,"nli_farstail_recall_modified":0.6374538968,"nli_farstail_fscore_modified":0.621131875,"nli_farstail_acc":0.6370038412,"nli_farstail_precision":0.6751874567,"nli_farstail_recall":0.638270099,"nli_farstail_fscore":0.6219271782,"nli_farstail_valid_output_ratio":0.9987212276},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8416530278,"paraphrase-detection_parsinlu_recall_modified":0.8270501836,"paraphrase-detection_parsinlu_fscore_modified":0.8316645261,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8416530278,"paraphrase-detection_parsinlu_recall":0.8270501836,"paraphrase-detection_parsinlu_fscore":0.8316645261,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":29.8578199052,"extractive-qa_PQuAD_f1":0.6483891649},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.562,"topic-classification_sid_precision_modified":0.4846723602,"topic-classification_sid_recall_modified":0.454656985,"topic-classification_sid_fscore_modified":0.424509489,"topic-classification_sid_acc":0.5928270042,"topic-classification_sid_precision":0.511257764,"topic-classification_sid_recall":0.4795959757,"topic-classification_sid_fscore":0.4477948196,"topic-classification_sid_valid_output_ratio":0.948},"nlu_score":0.6241793507}
30
  {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7411003236,"sentiment-analysis_deepsentipers_precision_modified":0.7010084925,"sentiment-analysis_deepsentipers_recall_modified":0.7529009939,"sentiment-analysis_deepsentipers_fscore_modified":0.7172781226,"sentiment-analysis_deepsentipers_acc":0.7411003236,"sentiment-analysis_deepsentipers_precision":0.7010084925,"sentiment-analysis_deepsentipers_recall":0.7529009939,"sentiment-analysis_deepsentipers_fscore":0.7172781226,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9555087155,"sts_SynPerSTS_corrcoef":0.9555087155,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.1587859697,"ner_arman_precision_mean":0.1553465009,"ner_arman_recall_mean":0.1764799364},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.2080704644,"keyword-extraction_SynKeywords_precision_mean":0.1673321849,"keyword-extraction_SynKeywords_recall_mean":0.294384058},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.6397515528,"tone-classification_SynTone_precision_modified":0.5483185514,"tone-classification_SynTone_recall_modified":0.590333248,"tone-classification_SynTone_fscore_modified":0.530467546,"tone-classification_SynTone_acc":0.6397515528,"tone-classification_SynTone_precision":0.5483185514,"tone-classification_SynTone_recall":0.590333248,"tone-classification_SynTone_fscore":0.530467546,"tone-classification_SynTone_valid_output_ratio":1.0},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8497629768,"sts_FarSICK_corrcoef":0.8497629768,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_acc":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.6592071611,"nli_farstail_precision_modified":0.7292371837,"nli_farstail_recall_modified":0.6555663858,"nli_farstail_fscore_modified":0.6172863539,"nli_farstail_acc":0.6621708414,"nli_farstail_precision":0.7325157067,"nli_farstail_recall":0.6585136977,"nli_farstail_fscore":0.6200615655,"nli_farstail_valid_output_ratio":0.9955242967},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.798,"paraphrase-detection_parsinlu_precision_modified":0.8383696273,"paraphrase-detection_parsinlu_recall_modified":0.7708282334,"paraphrase-detection_parsinlu_fscore_modified":0.7777278949,"paraphrase-detection_parsinlu_acc":0.798,"paraphrase-detection_parsinlu_precision":0.8383696273,"paraphrase-detection_parsinlu_recall":0.7708282334,"paraphrase-detection_parsinlu_fscore":0.7777278949,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":1.8957345972,"extractive-qa_PQuAD_f1":0.4954484984},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.56,"topic-classification_sid_precision_modified":0.5309838171,"topic-classification_sid_recall_modified":0.4706044677,"topic-classification_sid_fscore_modified":0.484170357,"topic-classification_sid_acc":0.5702647658,"topic-classification_sid_precision":0.5407167181,"topic-classification_sid_recall":0.4792306188,"topic-classification_sid_fscore":0.4930451701,"topic-classification_sid_valid_output_ratio":0.982},"nlu_score":0.5968415875}
 
31
  {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.5949298813,"sentiment-analysis_deepsentipers_precision_modified":0.6633283768,"sentiment-analysis_deepsentipers_recall_modified":0.6739827327,"sentiment-analysis_deepsentipers_fscore_modified":0.597099001,"sentiment-analysis_deepsentipers_acc":0.5958941113,"sentiment-analysis_deepsentipers_precision":0.6644034633,"sentiment-analysis_deepsentipers_recall":0.6750750872,"sentiment-analysis_deepsentipers_fscore":0.5980667466,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9424987971,"sts_SynPerSTS_corrcoef":0.9424987971,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.028185021,"ner_arman_precision_mean":0.0278440732,"ner_arman_recall_mean":0.0304295943},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.1942845429,"keyword-extraction_SynKeywords_precision_mean":0.168197784,"keyword-extraction_SynKeywords_recall_mean":0.2451992754},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6774948824,"tone-classification_SynTone_recall_modified":0.67683866,"tone-classification_SynTone_fscore_modified":0.668356732,"tone-classification_SynTone_acc":0.7911392405,"tone-classification_SynTone_precision":0.6903587093,"tone-classification_SynTone_recall":0.689690027,"tone-classification_SynTone_fscore":0.6810470497,"tone-classification_SynTone_valid_output_ratio":0.9813664596},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8346099969,"sts_FarSICK_corrcoef":0.8346099969,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_acc":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.7461636829,"nli_farstail_precision_modified":0.8279044878,"nli_farstail_recall_modified":0.7431719278,"nli_farstail_fscore_modified":0.7484099134,"nli_farstail_acc":0.7461636829,"nli_farstail_precision":0.8279044878,"nli_farstail_recall":0.7431719278,"nli_farstail_fscore":0.7484099134,"nli_farstail_valid_output_ratio":1.0},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.66,"paraphrase-detection_parsinlu_precision_modified":0.7933390651,"paraphrase-detection_parsinlu_recall_modified":0.6057935537,"paraphrase-detection_parsinlu_fscore_modified":0.5625411726,"paraphrase-detection_parsinlu_acc":0.66,"paraphrase-detection_parsinlu_precision":0.7933390651,"paraphrase-detection_parsinlu_recall":0.6057935537,"paraphrase-detection_parsinlu_fscore":0.5625411726,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":13.0805687204,"extractive-qa_PQuAD_f1":0.5111951184},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.656,"topic-classification_sid_precision_modified":0.5819241823,"topic-classification_sid_recall_modified":0.5649560499,"topic-classification_sid_fscore_modified":0.5472284688,"topic-classification_sid_acc":0.7038626609,"topic-classification_sid_precision":0.6243821699,"topic-classification_sid_recall":0.6061760192,"topic-classification_sid_fscore":0.5871550095,"topic-classification_sid_valid_output_ratio":0.932},"nlu_score":0.5661558794}
32
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.6154261057,"sentiment-analysis_deepsentipers_precision_modified":0.6519864557,"sentiment-analysis_deepsentipers_recall_modified":0.6762525877,"sentiment-analysis_deepsentipers_fscore_modified":0.5290317996,"sentiment-analysis_deepsentipers_acc":0.6154261057,"sentiment-analysis_deepsentipers_precision":0.6519864557,"sentiment-analysis_deepsentipers_recall":0.6762525877,"sentiment-analysis_deepsentipers_fscore":0.5290317996,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9009001164,"sts_SynPerSTS_corrcoef":0.9009001164,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.0012077295,"keyword-extraction_SynKeywords_precision_mean":0.0013586957,"keyword-extraction_SynKeywords_recall_mean":0.0010869565},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.7453416149,"tone-classification_SynTone_precision_modified":0.5770588432,"tone-classification_SynTone_recall_modified":0.5070698686,"tone-classification_SynTone_fscore_modified":0.5221093948,"tone-classification_SynTone_acc":0.7547169811,"tone-classification_SynTone_precision":0.584317445,"tone-classification_SynTone_recall":0.513448106,"tone-classification_SynTone_fscore":0.5286768085,"tone-classification_SynTone_valid_output_ratio":0.9875776398},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8375953381,"sts_FarSICK_corrcoef":0.8375953381,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9169859515,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9233165065,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.9095332885,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9144938271,"paraphrase-detection_FarsiParaphraseDetection_acc":0.925257732,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9316453926,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9177378414,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9227431271,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255},"nli_farstail":{"nli_farstail_acc_modified":0.537084399,"nli_farstail_precision_modified":0.7429511025,"nli_farstail_recall_modified":0.5428343437,"nli_farstail_fscore_modified":0.4522202373,"nli_farstail_acc":0.537084399,"nli_farstail_precision":0.7429511025,"nli_farstail_recall":0.5428343437,"nli_farstail_fscore":0.4522202373,"nli_farstail_valid_output_ratio":1.0},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.778,"paraphrase-detection_parsinlu_precision_modified":0.8054432653,"paraphrase-detection_parsinlu_recall_modified":0.7971712985,"paraphrase-detection_parsinlu_fscore_modified":0.7776855183,"paraphrase-detection_parsinlu_acc":0.7842741935,"paraphrase-detection_parsinlu_precision":0.8119387755,"paraphrase-detection_parsinlu_recall":0.8036000993,"paraphrase-detection_parsinlu_fscore":0.7839571757,"paraphrase-detection_parsinlu_valid_output_ratio":0.992},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":41.990521327,"extractive-qa_PQuAD_f1":0.7401025641},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.412,"topic-classification_sid_precision_modified":0.3819473808,"topic-classification_sid_recall_modified":0.2194110821,"topic-classification_sid_fscore_modified":0.166159266,"topic-classification_sid_acc":0.4735632184,"topic-classification_sid_precision":0.439019978,"topic-classification_sid_recall":0.2521966461,"topic-classification_sid_fscore":0.190987662,"topic-classification_sid_valid_output_ratio":0.87},"nlu_score":0.531045981}
33
  {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.5604099245,"sentiment-analysis_deepsentipers_precision_modified":0.6546929453,"sentiment-analysis_deepsentipers_recall_modified":0.63956688,"sentiment-analysis_deepsentipers_fscore_modified":0.4401917985,"sentiment-analysis_deepsentipers_acc":0.5625338387,"sentiment-analysis_deepsentipers_precision":0.6571741855,"sentiment-analysis_deepsentipers_recall":0.6419907934,"sentiment-analysis_deepsentipers_fscore":0.4418600945,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9962243797},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.8301701871,"sts_SynPerSTS_corrcoef":0.8318338548,"sts_SynPerSTS_valid_output_ratio":0.998},"ner_arman":{"ner_arman_f1_mean":0.3918497805,"ner_arman_precision_mean":0.3656932857,"ner_arman_recall_mean":0.4707191101},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.0888958039,"keyword-extraction_SynKeywords_precision_mean":0.0717122112,"keyword-extraction_SynKeywords_recall_mean":0.1270833333},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.6645962733,"tone-classification_SynTone_precision_modified":0.5738861003,"tone-classification_SynTone_recall_modified":0.5372045516,"tone-classification_SynTone_fscore_modified":0.5004137071,"tone-classification_SynTone_acc":0.6729559748,"tone-classification_SynTone_precision":0.5811047933,"tone-classification_SynTone_recall":0.5439618416,"tone-classification_SynTone_fscore":0.5067082191,"tone-classification_SynTone_valid_output_ratio":0.9875776398},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.7973824106,"sts_FarSICK_corrcoef":0.8005847496,"sts_FarSICK_valid_output_ratio":0.996},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8237547893,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8607726326,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8027155145,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8121248229,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8509234828,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8891622313,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8291903006,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8389099424,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9680715198},"nli_farstail":{"nli_farstail_acc_modified":0.3177749361,"nli_farstail_precision_modified":0.3674330881,"nli_farstail_recall_modified":0.3157646078,"nli_farstail_fscore_modified":0.300349348,"nli_farstail_acc":0.4737845567,"nli_farstail_precision":0.5478220684,"nli_farstail_recall":0.4707872704,"nli_farstail_fscore":0.447803985,"nli_farstail_valid_output_ratio":0.6707161125},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.764,"paraphrase-detection_parsinlu_precision_modified":0.7656475469,"paraphrase-detection_parsinlu_recall_modified":0.7528414743,"paraphrase-detection_parsinlu_fscore_modified":0.756438131,"paraphrase-detection_parsinlu_acc":0.7925311203,"paraphrase-detection_parsinlu_precision":0.7942401938,"paraphrase-detection_parsinlu_recall":0.7809558862,"paraphrase-detection_parsinlu_fscore":0.7846868579,"paraphrase-detection_parsinlu_valid_output_ratio":0.964},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":21.990521327,"extractive-qa_PQuAD_f1":0.5849945641},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.394,"topic-classification_sid_precision_modified":0.5177096689,"topic-classification_sid_recall_modified":0.297966245,"topic-classification_sid_fscore_modified":0.2626151509,"topic-classification_sid_acc":0.4368070953,"topic-classification_sid_precision":0.5739575043,"topic-classification_sid_recall":0.3303395178,"topic-classification_sid_fscore":0.2911476174,"topic-classification_sid_valid_output_ratio":0.902},"nlu_score":0.5241296095}
 
28
  {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7502696872,"sentiment-analysis_deepsentipers_precision_modified":0.7162099301,"sentiment-analysis_deepsentipers_recall_modified":0.7649004728,"sentiment-analysis_deepsentipers_fscore_modified":0.716460892,"sentiment-analysis_deepsentipers_acc":0.7502696872,"sentiment-analysis_deepsentipers_precision":0.7162099301,"sentiment-analysis_deepsentipers_recall":0.7649004728,"sentiment-analysis_deepsentipers_fscore":0.716460892,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9421493238,"sts_SynPerSTS_corrcoef":0.9421493238,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.5000495531,"ner_arman_precision_mean":0.4607965832,"ner_arman_recall_mean":0.5927493047},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.2600489896,"keyword-extraction_SynKeywords_precision_mean":0.2150796745,"keyword-extraction_SynKeywords_recall_mean":0.3497282609},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.347826087,"tone-classification_SynTone_precision_modified":0.4069335674,"tone-classification_SynTone_recall_modified":0.3420272309,"tone-classification_SynTone_fscore_modified":0.3323819164,"tone-classification_SynTone_acc":0.5333333333,"tone-classification_SynTone_precision":0.6239648033,"tone-classification_SynTone_recall":0.5244417541,"tone-classification_SynTone_fscore":0.5096522718,"tone-classification_SynTone_valid_output_ratio":0.652173913},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8643540763,"sts_FarSICK_corrcoef":0.8643540763,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7777777778,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8343364681,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7575227312,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7618590799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8152610442,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8745454545,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7940298507,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7985751802,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9540229885},"nli_farstail":{"nli_farstail_acc_modified":0.6086956522,"nli_farstail_precision_modified":0.6940003558,"nli_farstail_recall_modified":0.6092669096,"nli_farstail_fscore_modified":0.5908473619,"nli_farstail_acc":0.6110397946,"nli_farstail_precision":0.6966730144,"nli_farstail_recall":0.611613252,"nli_farstail_fscore":0.593122769,"nli_farstail_valid_output_ratio":0.9961636829},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.824,"paraphrase-detection_parsinlu_precision_modified":0.8599831541,"paraphrase-detection_parsinlu_recall_modified":0.7999184007,"paraphrase-detection_parsinlu_fscore_modified":0.8085591465,"paraphrase-detection_parsinlu_acc":0.824,"paraphrase-detection_parsinlu_precision":0.8599831541,"paraphrase-detection_parsinlu_recall":0.7999184007,"paraphrase-detection_parsinlu_fscore":0.8085591465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":1.4218009479,"extractive-qa_PQuAD_f1":0.6109462131},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.586,"topic-classification_sid_precision_modified":0.5883032084,"topic-classification_sid_recall_modified":0.4720717732,"topic-classification_sid_fscore_modified":0.4937437004,"topic-classification_sid_acc":0.586,"topic-classification_sid_precision":0.5883032084,"topic-classification_sid_recall":0.4720717732,"topic-classification_sid_fscore":0.4937437004,"topic-classification_sid_valid_output_ratio":1.0},"nlu_score":0.6255818412}
29
  {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.6817691478,"sentiment-analysis_deepsentipers_precision_modified":0.6546744642,"sentiment-analysis_deepsentipers_recall_modified":0.7378694789,"sentiment-analysis_deepsentipers_fscore_modified":0.6356142977,"sentiment-analysis_deepsentipers_acc":0.6817691478,"sentiment-analysis_deepsentipers_precision":0.6546744642,"sentiment-analysis_deepsentipers_recall":0.7378694789,"sentiment-analysis_deepsentipers_fscore":0.6356142977,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9198771683,"sts_SynPerSTS_corrcoef":0.9198771683,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.3839211973,"ner_arman_precision_mean":0.3292326466,"ner_arman_recall_mean":0.5049662296},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.229921048,"keyword-extraction_SynKeywords_precision_mean":0.21147343,"keyword-extraction_SynKeywords_recall_mean":0.2634963768},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.5031055901,"tone-classification_SynTone_precision_modified":0.5228364877,"tone-classification_SynTone_recall_modified":0.5168736971,"tone-classification_SynTone_fscore_modified":0.4644759375,"tone-classification_SynTone_acc":0.5094339623,"tone-classification_SynTone_precision":0.5294130473,"tone-classification_SynTone_recall":0.523375253,"tone-classification_SynTone_fscore":0.4703184021,"tone-classification_SynTone_valid_output_ratio":0.9875776398},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8471466571,"sts_FarSICK_corrcoef":0.8471466571,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.6361892583,"nli_farstail_precision_modified":0.6743240456,"nli_farstail_recall_modified":0.6374538968,"nli_farstail_fscore_modified":0.621131875,"nli_farstail_acc":0.6370038412,"nli_farstail_precision":0.6751874567,"nli_farstail_recall":0.638270099,"nli_farstail_fscore":0.6219271782,"nli_farstail_valid_output_ratio":0.9987212276},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8416530278,"paraphrase-detection_parsinlu_recall_modified":0.8270501836,"paraphrase-detection_parsinlu_fscore_modified":0.8316645261,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8416530278,"paraphrase-detection_parsinlu_recall":0.8270501836,"paraphrase-detection_parsinlu_fscore":0.8316645261,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":29.8578199052,"extractive-qa_PQuAD_f1":0.6483891649},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.562,"topic-classification_sid_precision_modified":0.4846723602,"topic-classification_sid_recall_modified":0.454656985,"topic-classification_sid_fscore_modified":0.424509489,"topic-classification_sid_acc":0.5928270042,"topic-classification_sid_precision":0.511257764,"topic-classification_sid_recall":0.4795959757,"topic-classification_sid_fscore":0.4477948196,"topic-classification_sid_valid_output_ratio":0.948},"nlu_score":0.6241793507}
30
  {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7411003236,"sentiment-analysis_deepsentipers_precision_modified":0.7010084925,"sentiment-analysis_deepsentipers_recall_modified":0.7529009939,"sentiment-analysis_deepsentipers_fscore_modified":0.7172781226,"sentiment-analysis_deepsentipers_acc":0.7411003236,"sentiment-analysis_deepsentipers_precision":0.7010084925,"sentiment-analysis_deepsentipers_recall":0.7529009939,"sentiment-analysis_deepsentipers_fscore":0.7172781226,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9555087155,"sts_SynPerSTS_corrcoef":0.9555087155,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.1587859697,"ner_arman_precision_mean":0.1553465009,"ner_arman_recall_mean":0.1764799364},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.2080704644,"keyword-extraction_SynKeywords_precision_mean":0.1673321849,"keyword-extraction_SynKeywords_recall_mean":0.294384058},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.6397515528,"tone-classification_SynTone_precision_modified":0.5483185514,"tone-classification_SynTone_recall_modified":0.590333248,"tone-classification_SynTone_fscore_modified":0.530467546,"tone-classification_SynTone_acc":0.6397515528,"tone-classification_SynTone_precision":0.5483185514,"tone-classification_SynTone_recall":0.590333248,"tone-classification_SynTone_fscore":0.530467546,"tone-classification_SynTone_valid_output_ratio":1.0},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8497629768,"sts_FarSICK_corrcoef":0.8497629768,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_acc":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.6592071611,"nli_farstail_precision_modified":0.7292371837,"nli_farstail_recall_modified":0.6555663858,"nli_farstail_fscore_modified":0.6172863539,"nli_farstail_acc":0.6621708414,"nli_farstail_precision":0.7325157067,"nli_farstail_recall":0.6585136977,"nli_farstail_fscore":0.6200615655,"nli_farstail_valid_output_ratio":0.9955242967},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.798,"paraphrase-detection_parsinlu_precision_modified":0.8383696273,"paraphrase-detection_parsinlu_recall_modified":0.7708282334,"paraphrase-detection_parsinlu_fscore_modified":0.7777278949,"paraphrase-detection_parsinlu_acc":0.798,"paraphrase-detection_parsinlu_precision":0.8383696273,"paraphrase-detection_parsinlu_recall":0.7708282334,"paraphrase-detection_parsinlu_fscore":0.7777278949,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":1.8957345972,"extractive-qa_PQuAD_f1":0.4954484984},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.56,"topic-classification_sid_precision_modified":0.5309838171,"topic-classification_sid_recall_modified":0.4706044677,"topic-classification_sid_fscore_modified":0.484170357,"topic-classification_sid_acc":0.5702647658,"topic-classification_sid_precision":0.5407167181,"topic-classification_sid_recall":0.4792306188,"topic-classification_sid_fscore":0.4930451701,"topic-classification_sid_valid_output_ratio":0.982},"nlu_score":0.5968415875}
31
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.6531823085,"sentiment-analysis_deepsentipers_precision_modified":0.6360048559,"sentiment-analysis_deepsentipers_recall_modified":0.6171894526,"sentiment-analysis_deepsentipers_fscore_modified":0.6143157593,"sentiment-analysis_deepsentipers_acc":0.6631982475,"sentiment-analysis_deepsentipers_precision":0.6457573947,"sentiment-analysis_deepsentipers_recall":0.6266534749,"sentiment-analysis_deepsentipers_fscore":0.6237357162,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9848975189},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.884204514,"sts_SynPerSTS_corrcoef":0.9307415936,"sts_SynPerSTS_valid_output_ratio":0.95},"ner_arman":{"ner_arman_f1_mean":0.3603427165,"ner_arman_precision_mean":0.3233256712,"ner_arman_recall_mean":0.4685538339},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.2983432135,"keyword-extraction_SynKeywords_precision_mean":0.2444159026,"keyword-extraction_SynKeywords_recall_mean":0.4067934783},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.5714285714,"tone-classification_SynTone_precision_modified":0.4591341311,"tone-classification_SynTone_recall_modified":0.3918521789,"tone-classification_SynTone_fscore_modified":0.4053320024,"tone-classification_SynTone_acc":0.6174496644,"tone-classification_SynTone_precision":0.4961113765,"tone-classification_SynTone_recall":0.4234107436,"tone-classification_SynTone_fscore":0.4379761905,"tone-classification_SynTone_valid_output_ratio":0.9254658385},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8139555874,"sts_FarSICK_corrcoef":0.8155867609,"sts_FarSICK_valid_output_ratio":0.998},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.779054917,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8592259336,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7428737538,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7463134032,"paraphrase-detection_FarsiParaphraseDetection_acc":0.781049936,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8614262561,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7447761194,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7482245771,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9974457216},"nli_farstail":{"nli_farstail_acc_modified":0.4047314578,"nli_farstail_precision_modified":0.4394843848,"nli_farstail_recall_modified":0.4043652091,"nli_farstail_fscore_modified":0.3862934414,"nli_farstail_acc":0.5954844779,"nli_farstail_precision":0.6466167242,"nli_farstail_recall":0.5949456133,"nli_farstail_fscore":0.5683564838,"nli_farstail_valid_output_ratio":0.6796675192},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.728,"paraphrase-detection_parsinlu_precision_modified":0.7567556588,"paraphrase-detection_parsinlu_recall_modified":0.7050900303,"paraphrase-detection_parsinlu_fscore_modified":0.7106739724,"paraphrase-detection_parsinlu_acc":0.7895878525,"paraphrase-detection_parsinlu_precision":0.8207762026,"paraphrase-detection_parsinlu_recall":0.7647397292,"paraphrase-detection_parsinlu_fscore":0.7707960655,"paraphrase-detection_parsinlu_valid_output_ratio":0.922},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":33.2701421801,"extractive-qa_PQuAD_f1":0.6371704755},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.542,"topic-classification_sid_precision_modified":0.5260067354,"topic-classification_sid_recall_modified":0.4616060981,"topic-classification_sid_fscore_modified":0.4802703537,"topic-classification_sid_acc":0.5599173554,"topic-classification_sid_precision":0.5433953878,"topic-classification_sid_recall":0.4768658038,"topic-classification_sid_fscore":0.4961470596,"topic-classification_sid_valid_output_ratio":0.968},"nlu_score":0.5761104945}
32
  {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.5949298813,"sentiment-analysis_deepsentipers_precision_modified":0.6633283768,"sentiment-analysis_deepsentipers_recall_modified":0.6739827327,"sentiment-analysis_deepsentipers_fscore_modified":0.597099001,"sentiment-analysis_deepsentipers_acc":0.5958941113,"sentiment-analysis_deepsentipers_precision":0.6644034633,"sentiment-analysis_deepsentipers_recall":0.6750750872,"sentiment-analysis_deepsentipers_fscore":0.5980667466,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9424987971,"sts_SynPerSTS_corrcoef":0.9424987971,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.028185021,"ner_arman_precision_mean":0.0278440732,"ner_arman_recall_mean":0.0304295943},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.1942845429,"keyword-extraction_SynKeywords_precision_mean":0.168197784,"keyword-extraction_SynKeywords_recall_mean":0.2451992754},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6774948824,"tone-classification_SynTone_recall_modified":0.67683866,"tone-classification_SynTone_fscore_modified":0.668356732,"tone-classification_SynTone_acc":0.7911392405,"tone-classification_SynTone_precision":0.6903587093,"tone-classification_SynTone_recall":0.689690027,"tone-classification_SynTone_fscore":0.6810470497,"tone-classification_SynTone_valid_output_ratio":0.9813664596},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8346099969,"sts_FarSICK_corrcoef":0.8346099969,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_acc":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.7461636829,"nli_farstail_precision_modified":0.8279044878,"nli_farstail_recall_modified":0.7431719278,"nli_farstail_fscore_modified":0.7484099134,"nli_farstail_acc":0.7461636829,"nli_farstail_precision":0.8279044878,"nli_farstail_recall":0.7431719278,"nli_farstail_fscore":0.7484099134,"nli_farstail_valid_output_ratio":1.0},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.66,"paraphrase-detection_parsinlu_precision_modified":0.7933390651,"paraphrase-detection_parsinlu_recall_modified":0.6057935537,"paraphrase-detection_parsinlu_fscore_modified":0.5625411726,"paraphrase-detection_parsinlu_acc":0.66,"paraphrase-detection_parsinlu_precision":0.7933390651,"paraphrase-detection_parsinlu_recall":0.6057935537,"paraphrase-detection_parsinlu_fscore":0.5625411726,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":13.0805687204,"extractive-qa_PQuAD_f1":0.5111951184},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.656,"topic-classification_sid_precision_modified":0.5819241823,"topic-classification_sid_recall_modified":0.5649560499,"topic-classification_sid_fscore_modified":0.5472284688,"topic-classification_sid_acc":0.7038626609,"topic-classification_sid_precision":0.6243821699,"topic-classification_sid_recall":0.6061760192,"topic-classification_sid_fscore":0.5871550095,"topic-classification_sid_valid_output_ratio":0.932},"nlu_score":0.5661558794}
33
  {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.6154261057,"sentiment-analysis_deepsentipers_precision_modified":0.6519864557,"sentiment-analysis_deepsentipers_recall_modified":0.6762525877,"sentiment-analysis_deepsentipers_fscore_modified":0.5290317996,"sentiment-analysis_deepsentipers_acc":0.6154261057,"sentiment-analysis_deepsentipers_precision":0.6519864557,"sentiment-analysis_deepsentipers_recall":0.6762525877,"sentiment-analysis_deepsentipers_fscore":0.5290317996,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9009001164,"sts_SynPerSTS_corrcoef":0.9009001164,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.0012077295,"keyword-extraction_SynKeywords_precision_mean":0.0013586957,"keyword-extraction_SynKeywords_recall_mean":0.0010869565},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.7453416149,"tone-classification_SynTone_precision_modified":0.5770588432,"tone-classification_SynTone_recall_modified":0.5070698686,"tone-classification_SynTone_fscore_modified":0.5221093948,"tone-classification_SynTone_acc":0.7547169811,"tone-classification_SynTone_precision":0.584317445,"tone-classification_SynTone_recall":0.513448106,"tone-classification_SynTone_fscore":0.5286768085,"tone-classification_SynTone_valid_output_ratio":0.9875776398},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8375953381,"sts_FarSICK_corrcoef":0.8375953381,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9169859515,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9233165065,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.9095332885,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9144938271,"paraphrase-detection_FarsiParaphraseDetection_acc":0.925257732,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9316453926,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9177378414,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9227431271,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255},"nli_farstail":{"nli_farstail_acc_modified":0.537084399,"nli_farstail_precision_modified":0.7429511025,"nli_farstail_recall_modified":0.5428343437,"nli_farstail_fscore_modified":0.4522202373,"nli_farstail_acc":0.537084399,"nli_farstail_precision":0.7429511025,"nli_farstail_recall":0.5428343437,"nli_farstail_fscore":0.4522202373,"nli_farstail_valid_output_ratio":1.0},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.778,"paraphrase-detection_parsinlu_precision_modified":0.8054432653,"paraphrase-detection_parsinlu_recall_modified":0.7971712985,"paraphrase-detection_parsinlu_fscore_modified":0.7776855183,"paraphrase-detection_parsinlu_acc":0.7842741935,"paraphrase-detection_parsinlu_precision":0.8119387755,"paraphrase-detection_parsinlu_recall":0.8036000993,"paraphrase-detection_parsinlu_fscore":0.7839571757,"paraphrase-detection_parsinlu_valid_output_ratio":0.992},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":41.990521327,"extractive-qa_PQuAD_f1":0.7401025641},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.412,"topic-classification_sid_precision_modified":0.3819473808,"topic-classification_sid_recall_modified":0.2194110821,"topic-classification_sid_fscore_modified":0.166159266,"topic-classification_sid_acc":0.4735632184,"topic-classification_sid_precision":0.439019978,"topic-classification_sid_recall":0.2521966461,"topic-classification_sid_fscore":0.190987662,"topic-classification_sid_valid_output_ratio":0.87},"nlu_score":0.531045981}
34
  {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.5604099245,"sentiment-analysis_deepsentipers_precision_modified":0.6546929453,"sentiment-analysis_deepsentipers_recall_modified":0.63956688,"sentiment-analysis_deepsentipers_fscore_modified":0.4401917985,"sentiment-analysis_deepsentipers_acc":0.5625338387,"sentiment-analysis_deepsentipers_precision":0.6571741855,"sentiment-analysis_deepsentipers_recall":0.6419907934,"sentiment-analysis_deepsentipers_fscore":0.4418600945,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9962243797},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.8301701871,"sts_SynPerSTS_corrcoef":0.8318338548,"sts_SynPerSTS_valid_output_ratio":0.998},"ner_arman":{"ner_arman_f1_mean":0.3918497805,"ner_arman_precision_mean":0.3656932857,"ner_arman_recall_mean":0.4707191101},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.0888958039,"keyword-extraction_SynKeywords_precision_mean":0.0717122112,"keyword-extraction_SynKeywords_recall_mean":0.1270833333},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.6645962733,"tone-classification_SynTone_precision_modified":0.5738861003,"tone-classification_SynTone_recall_modified":0.5372045516,"tone-classification_SynTone_fscore_modified":0.5004137071,"tone-classification_SynTone_acc":0.6729559748,"tone-classification_SynTone_precision":0.5811047933,"tone-classification_SynTone_recall":0.5439618416,"tone-classification_SynTone_fscore":0.5067082191,"tone-classification_SynTone_valid_output_ratio":0.9875776398},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.7973824106,"sts_FarSICK_corrcoef":0.8005847496,"sts_FarSICK_valid_output_ratio":0.996},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8237547893,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8607726326,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8027155145,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8121248229,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8509234828,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8891622313,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8291903006,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8389099424,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9680715198},"nli_farstail":{"nli_farstail_acc_modified":0.3177749361,"nli_farstail_precision_modified":0.3674330881,"nli_farstail_recall_modified":0.3157646078,"nli_farstail_fscore_modified":0.300349348,"nli_farstail_acc":0.4737845567,"nli_farstail_precision":0.5478220684,"nli_farstail_recall":0.4707872704,"nli_farstail_fscore":0.447803985,"nli_farstail_valid_output_ratio":0.6707161125},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.764,"paraphrase-detection_parsinlu_precision_modified":0.7656475469,"paraphrase-detection_parsinlu_recall_modified":0.7528414743,"paraphrase-detection_parsinlu_fscore_modified":0.756438131,"paraphrase-detection_parsinlu_acc":0.7925311203,"paraphrase-detection_parsinlu_precision":0.7942401938,"paraphrase-detection_parsinlu_recall":0.7809558862,"paraphrase-detection_parsinlu_fscore":0.7846868579,"paraphrase-detection_parsinlu_valid_output_ratio":0.964},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":21.990521327,"extractive-qa_PQuAD_f1":0.5849945641},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.394,"topic-classification_sid_precision_modified":0.5177096689,"topic-classification_sid_recall_modified":0.297966245,"topic-classification_sid_fscore_modified":0.2626151509,"topic-classification_sid_acc":0.4368070953,"topic-classification_sid_precision":0.5739575043,"topic-classification_sid_recall":0.3303395178,"topic-classification_sid_fscore":0.2911476174,"topic-classification_sid_valid_output_ratio":0.902},"nlu_score":0.5241296095}
leaderboard/boards_data/question-generation_PersianQA.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1693490122,"question-generation_PersianQA_rougeL_recall":0.3886090827,"question-generation_PersianQA_rougeL_f1_score":0.227277052,"nlg_score":0.1779340777}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1877254615,"question-generation_PersianQA_rougeL_recall":0.3036923298,"question-generation_PersianQA_rougeL_f1_score":0.2215402117,"nlg_score":0.1334687319}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1704020873,"question-generation_PersianQA_rougeL_recall":0.3000756202,"question-generation_PersianQA_rougeL_f1_score":0.2079039891,"nlg_score":0.0949943578}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2149535143,"question-generation_PersianQA_rougeL_recall":0.3019561885,"question-generation_PersianQA_rougeL_f1_score":0.2405115465,"nlg_score":0.1880477876}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1641763339,"question-generation_PersianQA_rougeL_recall":0.3222474527,"question-generation_PersianQA_rougeL_f1_score":0.2021059918,"nlg_score":0.1430866672}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2602516122,"question-generation_PersianQA_rougeL_recall":0.3803807526,"question-generation_PersianQA_rougeL_f1_score":0.2967852302,"nlg_score":0.181552926}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2221178335,"question-generation_PersianQA_rougeL_recall":0.335306645,"question-generation_PersianQA_rougeL_f1_score":0.2552875817,"nlg_score":0.1643361642}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2140114675,"question-generation_PersianQA_rougeL_recall":0.3382796762,"question-generation_PersianQA_rougeL_f1_score":0.2507426631,"nlg_score":0.1538910531}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1365997259,"question-generation_PersianQA_rougeL_recall":0.3257934111,"question-generation_PersianQA_rougeL_f1_score":0.1803398036,"nlg_score":0.0940241349}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2576021626,"question-generation_PersianQA_rougeL_recall":0.3924501003,"question-generation_PersianQA_rougeL_f1_score":0.2985826349,"nlg_score":0.194675133}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2572991833,"question-generation_PersianQA_rougeL_recall":0.3740225235,"question-generation_PersianQA_rougeL_f1_score":0.2927586837,"nlg_score":0.1196804312}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.154927725,"question-generation_PersianQA_rougeL_recall":0.279873372,"question-generation_PersianQA_rougeL_f1_score":0.1873687458,"nlg_score":0.1631530657}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2041596361,"question-generation_PersianQA_rougeL_recall":0.3456815337,"question-generation_PersianQA_rougeL_f1_score":0.2459732807,"nlg_score":0.1067134448}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1987198912,"question-generation_PersianQA_rougeL_recall":0.3431437262,"question-generation_PersianQA_rougeL_f1_score":0.2419384398,"nlg_score":0.16056333}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2281053588,"question-generation_PersianQA_rougeL_recall":0.370933314,"question-generation_PersianQA_rougeL_f1_score":0.273363418,"nlg_score":0.1679338638}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1572445395,"question-generation_PersianQA_rougeL_recall":0.2651515671,"question-generation_PersianQA_rougeL_f1_score":0.1889377754,"nlg_score":0.1567965528}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.0870939736,"question-generation_PersianQA_rougeL_recall":0.3600941065,"question-generation_PersianQA_rougeL_f1_score":0.1336375958,"nlg_score":0.1089333827}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1520819517,"question-generation_PersianQA_rougeL_recall":0.26324767,"question-generation_PersianQA_rougeL_f1_score":0.1843401988,"nlg_score":0.1319091735}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1669255457,"question-generation_PersianQA_rougeL_recall":0.2952488346,"question-generation_PersianQA_rougeL_f1_score":0.2007786564,"nlg_score":0.112015688}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1939037413,"question-generation_PersianQA_rougeL_recall":0.4070822245,"question-generation_PersianQA_rougeL_f1_score":0.2439578999,"nlg_score":0.0934094344}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1744197112,"question-generation_PersianQA_rougeL_recall":0.2697024508,"question-generation_PersianQA_rougeL_f1_score":0.2017710943,"nlg_score":0.1389297212}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.045673941,"question-generation_PersianQA_rougeL_recall":0.0991932753,"question-generation_PersianQA_rougeL_f1_score":0.0576169145,"nlg_score":0.0682994522}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0980160864,"question-generation_PersianQA_rougeL_recall":0.347983913,"question-generation_PersianQA_rougeL_f1_score":0.1443872083,"nlg_score":0.1196400535}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2447184183,"question-generation_PersianQA_rougeL_recall":0.3388367288,"question-generation_PersianQA_rougeL_f1_score":0.269297654,"nlg_score":0.2010896964}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2438951227,"question-generation_PersianQA_rougeL_recall":0.3687301621,"question-generation_PersianQA_rougeL_f1_score":0.2816187853,"nlg_score":0.1901206806}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2126001437,"question-generation_PersianQA_rougeL_recall":0.3731677121,"question-generation_PersianQA_rougeL_f1_score":0.2603121806,"nlg_score":0.1764906292}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1942536013,"question-generation_PersianQA_rougeL_recall":0.3435531442,"question-generation_PersianQA_rougeL_f1_score":0.2369359061,"nlg_score":0.1810678527}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2171998078,"question-generation_PersianQA_rougeL_recall":0.3938560893,"question-generation_PersianQA_rougeL_f1_score":0.268371521,"nlg_score":0.1137933652}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.3141052553,"question-generation_PersianQA_rougeL_recall":0.4102615831,"question-generation_PersianQA_rougeL_f1_score":0.3441804021,"nlg_score":0.178231145}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.3121385499,"question-generation_PersianQA_rougeL_recall":0.4162991047,"question-generation_PersianQA_rougeL_f1_score":0.3445136596,"nlg_score":0.1368740087}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2782492277,"question-generation_PersianQA_rougeL_recall":0.3823213358,"question-generation_PersianQA_rougeL_f1_score":0.3109786075,"nlg_score":0.1659339021}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1546246184,"question-generation_PersianQA_rougeL_recall":0.253394795,"question-generation_PersianQA_rougeL_f1_score":0.1829113647,"nlg_score":0.1641995602}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1015709066,"question-generation_PersianQA_rougeL_recall":0.2942260719,"question-generation_PersianQA_rougeL_f1_score":0.1433273231,"nlg_score":0.1417778788}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1622159789,"question-generation_PersianQA_rougeL_recall":0.302597472,"question-generation_PersianQA_rougeL_f1_score":0.2021048057,"nlg_score":0.1665903777}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1300043841,"question-generation_PersianQA_rougeL_recall":0.2706972572,"question-generation_PersianQA_rougeL_f1_score":0.1652440214,"nlg_score":0.1324031203}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1965366702,"question-generation_PersianQA_rougeL_recall":0.340760284,"question-generation_PersianQA_rougeL_f1_score":0.2388923895,"nlg_score":0.1557270864}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1127092702,"question-generation_PersianQA_rougeL_recall":0.2982763168,"question-generation_PersianQA_rougeL_f1_score":0.1525970768,"nlg_score":0.0944140383}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2275858051,"question-generation_PersianQA_rougeL_recall":0.3654754607,"question-generation_PersianQA_rougeL_f1_score":0.2679025722,"nlg_score":0.18964968}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1342253144,"question-generation_PersianQA_rougeL_recall":0.4100317735,"question-generation_PersianQA_rougeL_f1_score":0.18410589,"nlg_score":0.0880621978}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1996840686,"question-generation_PersianQA_rougeL_recall":0.3393114266,"question-generation_PersianQA_rougeL_f1_score":0.2417040176,"nlg_score":0.164118288}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1264186031,"question-generation_PersianQA_rougeL_recall":0.2582953109,"question-generation_PersianQA_rougeL_f1_score":0.1600835412,"nlg_score":0.1129755187}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0567952998,"question-generation_PersianQA_rougeL_recall":0.2105979358,"question-generation_PersianQA_rougeL_f1_score":0.0793499521,"nlg_score":0.0823387318}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2978290521,"question-generation_PersianQA_rougeL_recall":0.4184808562,"question-generation_PersianQA_rougeL_f1_score":0.3324485723,"nlg_score":0.1151518212}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0427303601,"question-generation_PersianQA_rougeL_recall":0.044781684,"question-generation_PersianQA_rougeL_f1_score":0.0291125751,"nlg_score":0.0509841903}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1622159789,"question-generation_PersianQA_rougeL_recall":0.302597472,"question-generation_PersianQA_rougeL_f1_score":0.2021048057,"nlg_score":0.1665903777}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1546246184,"question-generation_PersianQA_rougeL_recall":0.253394795,"question-generation_PersianQA_rougeL_f1_score":0.1829113647,"nlg_score":0.1641995602}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.3141052553,"question-generation_PersianQA_rougeL_recall":0.4102615831,"question-generation_PersianQA_rougeL_f1_score":0.3441804021,"nlg_score":0.178231145}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1342253144,"question-generation_PersianQA_rougeL_recall":0.4100317735,"question-generation_PersianQA_rougeL_f1_score":0.18410589,"nlg_score":0.0880621978}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1572445395,"question-generation_PersianQA_rougeL_recall":0.2651515671,"question-generation_PersianQA_rougeL_f1_score":0.1889377754,"nlg_score":0.1567965528}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1987198912,"question-generation_PersianQA_rougeL_recall":0.3431437262,"question-generation_PersianQA_rougeL_f1_score":0.2419384398,"nlg_score":0.16056333}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2281053588,"question-generation_PersianQA_rougeL_recall":0.370933314,"question-generation_PersianQA_rougeL_f1_score":0.273363418,"nlg_score":0.1679338638}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.3121385499,"question-generation_PersianQA_rougeL_recall":0.4162991047,"question-generation_PersianQA_rougeL_f1_score":0.3445136596,"nlg_score":0.1368740087}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1300043841,"question-generation_PersianQA_rougeL_recall":0.2706972572,"question-generation_PersianQA_rougeL_f1_score":0.1652440214,"nlg_score":0.1324031203}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2978290521,"question-generation_PersianQA_rougeL_recall":0.4184808562,"question-generation_PersianQA_rougeL_f1_score":0.3324485723,"nlg_score":0.1151518212}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2221178335,"question-generation_PersianQA_rougeL_recall":0.335306645,"question-generation_PersianQA_rougeL_f1_score":0.2552875817,"nlg_score":0.1643361642}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1996840686,"question-generation_PersianQA_rougeL_recall":0.3393114266,"question-generation_PersianQA_rougeL_f1_score":0.2417040176,"nlg_score":0.164118288}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1127092702,"question-generation_PersianQA_rougeL_recall":0.2982763168,"question-generation_PersianQA_rougeL_f1_score":0.1525970768,"nlg_score":0.0944140383}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1942536013,"question-generation_PersianQA_rougeL_recall":0.3435531442,"question-generation_PersianQA_rougeL_f1_score":0.2369359061,"nlg_score":0.1810678527}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1264186031,"question-generation_PersianQA_rougeL_recall":0.2582953109,"question-generation_PersianQA_rougeL_f1_score":0.1600835412,"nlg_score":0.1129755187}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0901454704,"question-generation_PersianQA_rougeL_recall":0.1786518031,"question-generation_PersianQA_rougeL_f1_score":0.1132710233,"nlg_score":0.1035446324}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1520819517,"question-generation_PersianQA_rougeL_recall":0.26324767,"question-generation_PersianQA_rougeL_f1_score":0.1843401988,"nlg_score":0.1319091735}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1877254615,"question-generation_PersianQA_rougeL_recall":0.3036923298,"question-generation_PersianQA_rougeL_f1_score":0.2215402117,"nlg_score":0.1334687319}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2572991833,"question-generation_PersianQA_rougeL_recall":0.3740225235,"question-generation_PersianQA_rougeL_f1_score":0.2927586837,"nlg_score":0.1196804312}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0980160864,"question-generation_PersianQA_rougeL_recall":0.347983913,"question-generation_PersianQA_rougeL_f1_score":0.1443872083,"nlg_score":0.1196400535}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1965366702,"question-generation_PersianQA_rougeL_recall":0.340760284,"question-generation_PersianQA_rougeL_f1_score":0.2388923895,"nlg_score":0.1557270864}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2275858051,"question-generation_PersianQA_rougeL_recall":0.3654754607,"question-generation_PersianQA_rougeL_f1_score":0.2679025722,"nlg_score":0.18964968}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1693490122,"question-generation_PersianQA_rougeL_recall":0.3886090827,"question-generation_PersianQA_rougeL_f1_score":0.227277052,"nlg_score":0.1779340777}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1641763339,"question-generation_PersianQA_rougeL_recall":0.3222474527,"question-generation_PersianQA_rougeL_f1_score":0.2021059918,"nlg_score":0.1430866672}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2140114675,"question-generation_PersianQA_rougeL_recall":0.3382796762,"question-generation_PersianQA_rougeL_f1_score":0.2507426631,"nlg_score":0.1538910531}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1704020873,"question-generation_PersianQA_rougeL_recall":0.3000756202,"question-generation_PersianQA_rougeL_f1_score":0.2079039891,"nlg_score":0.0949943578}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0427303601,"question-generation_PersianQA_rougeL_recall":0.044781684,"question-generation_PersianQA_rougeL_f1_score":0.0291125751,"nlg_score":0.0509841903}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.0870939736,"question-generation_PersianQA_rougeL_recall":0.3600941065,"question-generation_PersianQA_rougeL_f1_score":0.1336375958,"nlg_score":0.1089333827}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2576021626,"question-generation_PersianQA_rougeL_recall":0.3924501003,"question-generation_PersianQA_rougeL_f1_score":0.2985826349,"nlg_score":0.194675133}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2171998078,"question-generation_PersianQA_rougeL_recall":0.3938560893,"question-generation_PersianQA_rougeL_f1_score":0.268371521,"nlg_score":0.1137933652}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2782492277,"question-generation_PersianQA_rougeL_recall":0.3823213358,"question-generation_PersianQA_rougeL_f1_score":0.3109786075,"nlg_score":0.1659339021}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1669255457,"question-generation_PersianQA_rougeL_recall":0.2952488346,"question-generation_PersianQA_rougeL_f1_score":0.2007786564,"nlg_score":0.112015688}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.154927725,"question-generation_PersianQA_rougeL_recall":0.279873372,"question-generation_PersianQA_rougeL_f1_score":0.1873687458,"nlg_score":0.1631530657}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1939037413,"question-generation_PersianQA_rougeL_recall":0.4070822245,"question-generation_PersianQA_rougeL_f1_score":0.2439578999,"nlg_score":0.0934094344}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1015709066,"question-generation_PersianQA_rougeL_recall":0.2942260719,"question-generation_PersianQA_rougeL_f1_score":0.1433273231,"nlg_score":0.1417778788}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0567952998,"question-generation_PersianQA_rougeL_recall":0.2105979358,"question-generation_PersianQA_rougeL_f1_score":0.0793499521,"nlg_score":0.0823387318}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2438951227,"question-generation_PersianQA_rougeL_recall":0.3687301621,"question-generation_PersianQA_rougeL_f1_score":0.2816187853,"nlg_score":0.1901206806}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1744197112,"question-generation_PersianQA_rougeL_recall":0.2697024508,"question-generation_PersianQA_rougeL_f1_score":0.2017710943,"nlg_score":0.1389297212}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2041596361,"question-generation_PersianQA_rougeL_recall":0.3456815337,"question-generation_PersianQA_rougeL_f1_score":0.2459732807,"nlg_score":0.1067134448}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2602516122,"question-generation_PersianQA_rougeL_recall":0.3803807526,"question-generation_PersianQA_rougeL_f1_score":0.2967852302,"nlg_score":0.181552926}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1365997259,"question-generation_PersianQA_rougeL_recall":0.3257934111,"question-generation_PersianQA_rougeL_f1_score":0.1803398036,"nlg_score":0.0940241349}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2447184183,"question-generation_PersianQA_rougeL_recall":0.3388367288,"question-generation_PersianQA_rougeL_f1_score":0.269297654,"nlg_score":0.2010896964}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.045673941,"question-generation_PersianQA_rougeL_recall":0.0991932753,"question-generation_PersianQA_rougeL_f1_score":0.0576169145,"nlg_score":0.0682994522}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2149535143,"question-generation_PersianQA_rougeL_recall":0.3019561885,"question-generation_PersianQA_rougeL_f1_score":0.2405115465,"nlg_score":0.1880477876}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2126001437,"question-generation_PersianQA_rougeL_recall":0.3731677121,"question-generation_PersianQA_rougeL_f1_score":0.2603121806,"nlg_score":0.1764906292}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/sentiment-analysis_deepsentipers.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8058409951,"sentiment-analysis_deepsentipers_precision_modified":0.7717795715,"sentiment-analysis_deepsentipers_recall_modified":0.8211827366,"sentiment-analysis_deepsentipers_fscore_modified":0.7889064935,"sentiment-analysis_deepsentipers_acc":0.8062770563,"sentiment-analysis_deepsentipers_precision":0.7721972011,"sentiment-analysis_deepsentipers_recall":0.8216270995,"sentiment-analysis_deepsentipers_fscore":0.7893333909,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994591671,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7421790723,"sentiment-analysis_deepsentipers_precision_modified":0.705605232,"sentiment-analysis_deepsentipers_recall_modified":0.7565637786,"sentiment-analysis_deepsentipers_fscore_modified":0.7108099837,"sentiment-analysis_deepsentipers_acc":0.7674288901,"sentiment-analysis_deepsentipers_precision":0.7296107642,"sentiment-analysis_deepsentipers_recall":0.7823029813,"sentiment-analysis_deepsentipers_fscore":0.7349925877,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9670981661,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6817691478,"sentiment-analysis_deepsentipers_precision_modified":0.6546744642,"sentiment-analysis_deepsentipers_recall_modified":0.7378694789,"sentiment-analysis_deepsentipers_fscore_modified":0.6356142977,"sentiment-analysis_deepsentipers_acc":0.6817691478,"sentiment-analysis_deepsentipers_precision":0.6546744642,"sentiment-analysis_deepsentipers_recall":0.7378694789,"sentiment-analysis_deepsentipers_fscore":0.6356142977,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.738403452,"sentiment-analysis_deepsentipers_precision_modified":0.706763853,"sentiment-analysis_deepsentipers_recall_modified":0.7658510846,"sentiment-analysis_deepsentipers_fscore_modified":0.726373242,"sentiment-analysis_deepsentipers_acc":0.738403452,"sentiment-analysis_deepsentipers_precision":0.706763853,"sentiment-analysis_deepsentipers_recall":0.7658510846,"sentiment-analysis_deepsentipers_fscore":0.726373242,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7659115426,"sentiment-analysis_deepsentipers_precision_modified":0.726363779,"sentiment-analysis_deepsentipers_recall_modified":0.795958415,"sentiment-analysis_deepsentipers_fscore_modified":0.7384201215,"sentiment-analysis_deepsentipers_acc":0.7659115426,"sentiment-analysis_deepsentipers_precision":0.726363779,"sentiment-analysis_deepsentipers_recall":0.795958415,"sentiment-analysis_deepsentipers_fscore":0.7384201215,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.806364617,"sentiment-analysis_deepsentipers_precision_modified":0.7705568258,"sentiment-analysis_deepsentipers_recall_modified":0.8234753765,"sentiment-analysis_deepsentipers_fscore_modified":0.7802386366,"sentiment-analysis_deepsentipers_acc":0.806364617,"sentiment-analysis_deepsentipers_precision":0.7705568258,"sentiment-analysis_deepsentipers_recall":0.8234753765,"sentiment-analysis_deepsentipers_fscore":0.7802386366,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7885652643,"sentiment-analysis_deepsentipers_precision_modified":0.757057239,"sentiment-analysis_deepsentipers_recall_modified":0.8134053732,"sentiment-analysis_deepsentipers_fscore_modified":0.7618040556,"sentiment-analysis_deepsentipers_acc":0.7911255411,"sentiment-analysis_deepsentipers_precision":0.7595152171,"sentiment-analysis_deepsentipers_recall":0.8160462998,"sentiment-analysis_deepsentipers_fscore":0.7642774453,"sentiment-analysis_deepsentipers_valid_output_ratio":0.996763754,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7842502697,"sentiment-analysis_deepsentipers_precision_modified":0.7475186413,"sentiment-analysis_deepsentipers_recall_modified":0.8040239865,"sentiment-analysis_deepsentipers_fscore_modified":0.7603028067,"sentiment-analysis_deepsentipers_acc":0.7842502697,"sentiment-analysis_deepsentipers_precision":0.7475186413,"sentiment-analysis_deepsentipers_recall":0.8040239865,"sentiment-analysis_deepsentipers_fscore":0.7603028067,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7518878101,"sentiment-analysis_deepsentipers_precision_modified":0.7333601788,"sentiment-analysis_deepsentipers_recall_modified":0.7850018857,"sentiment-analysis_deepsentipers_fscore_modified":0.7214827861,"sentiment-analysis_deepsentipers_acc":0.752293578,"sentiment-analysis_deepsentipers_precision":0.7337559479,"sentiment-analysis_deepsentipers_recall":0.785425524,"sentiment-analysis_deepsentipers_fscore":0.7218721454,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7661826532,"sentiment-analysis_deepsentipers_recall_modified":0.8089861144,"sentiment-analysis_deepsentipers_fscore_modified":0.7830417049,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7661826532,"sentiment-analysis_deepsentipers_recall":0.8089861144,"sentiment-analysis_deepsentipers_fscore":0.7830417049,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7664509169,"sentiment-analysis_deepsentipers_precision_modified":0.7235774595,"sentiment-analysis_deepsentipers_recall_modified":0.785720049,"sentiment-analysis_deepsentipers_fscore_modified":0.7440236575,"sentiment-analysis_deepsentipers_acc":0.766864544,"sentiment-analysis_deepsentipers_precision":0.7239679492,"sentiment-analysis_deepsentipers_recall":0.786144075,"sentiment-analysis_deepsentipers_fscore":0.7444251813,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.783171521,"sentiment-analysis_deepsentipers_precision_modified":0.7433314894,"sentiment-analysis_deepsentipers_recall_modified":0.8047725112,"sentiment-analysis_deepsentipers_fscore_modified":0.759109397,"sentiment-analysis_deepsentipers_acc":0.7835941716,"sentiment-analysis_deepsentipers_precision":0.7437326397,"sentiment-analysis_deepsentipers_recall":0.805206819,"sentiment-analysis_deepsentipers_fscore":0.7595190621,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7556634304,"sentiment-analysis_deepsentipers_precision_modified":0.7100962569,"sentiment-analysis_deepsentipers_recall_modified":0.796296032,"sentiment-analysis_deepsentipers_fscore_modified":0.7198160026,"sentiment-analysis_deepsentipers_acc":0.7556634304,"sentiment-analysis_deepsentipers_precision":0.7100962569,"sentiment-analysis_deepsentipers_recall":0.796296032,"sentiment-analysis_deepsentipers_fscore":0.7198160026,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7459546926,"sentiment-analysis_deepsentipers_precision_modified":0.696002467,"sentiment-analysis_deepsentipers_recall_modified":0.7725731976,"sentiment-analysis_deepsentipers_fscore_modified":0.7160207999,"sentiment-analysis_deepsentipers_acc":0.7459546926,"sentiment-analysis_deepsentipers_precision":0.696002467,"sentiment-analysis_deepsentipers_recall":0.7725731976,"sentiment-analysis_deepsentipers_fscore":0.7160207999,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7820927724,"sentiment-analysis_deepsentipers_precision_modified":0.7592820571,"sentiment-analysis_deepsentipers_recall_modified":0.7768252647,"sentiment-analysis_deepsentipers_fscore_modified":0.7562669975,"sentiment-analysis_deepsentipers_acc":0.7820927724,"sentiment-analysis_deepsentipers_precision":0.7592820571,"sentiment-analysis_deepsentipers_recall":0.7768252647,"sentiment-analysis_deepsentipers_fscore":0.7562669975,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.715210356,"sentiment-analysis_deepsentipers_precision_modified":0.6703215936,"sentiment-analysis_deepsentipers_recall_modified":0.7384303354,"sentiment-analysis_deepsentipers_fscore_modified":0.6873376696,"sentiment-analysis_deepsentipers_acc":0.7651471437,"sentiment-analysis_deepsentipers_precision":0.7171241976,"sentiment-analysis_deepsentipers_recall":0.7899883681,"sentiment-analysis_deepsentipers_fscore":0.7353283551,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9347357066,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7521691974,"sentiment-analysis_deepsentipers_precision_modified":0.7149147283,"sentiment-analysis_deepsentipers_recall_modified":0.7661218172,"sentiment-analysis_deepsentipers_fscore_modified":0.7340307684,"sentiment-analysis_deepsentipers_acc":0.7550353838,"sentiment-analysis_deepsentipers_precision":0.7176389542,"sentiment-analysis_deepsentipers_recall":0.7690411709,"sentiment-analysis_deepsentipers_fscore":0.7368278372,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9962039046,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5949298813,"sentiment-analysis_deepsentipers_precision_modified":0.6633283768,"sentiment-analysis_deepsentipers_recall_modified":0.6739827327,"sentiment-analysis_deepsentipers_fscore_modified":0.597099001,"sentiment-analysis_deepsentipers_acc":0.5958941113,"sentiment-analysis_deepsentipers_precision":0.6644034633,"sentiment-analysis_deepsentipers_recall":0.6750750872,"sentiment-analysis_deepsentipers_fscore":0.5980667466,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7038834951,"sentiment-analysis_deepsentipers_precision_modified":0.6558175114,"sentiment-analysis_deepsentipers_recall_modified":0.7195323034,"sentiment-analysis_deepsentipers_fscore_modified":0.6634075099,"sentiment-analysis_deepsentipers_acc":0.7038834951,"sentiment-analysis_deepsentipers_precision":0.6558175114,"sentiment-analysis_deepsentipers_recall":0.7195323034,"sentiment-analysis_deepsentipers_fscore":0.6634075099,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7669902913,"sentiment-analysis_deepsentipers_precision_modified":0.7411642414,"sentiment-analysis_deepsentipers_recall_modified":0.7990679398,"sentiment-analysis_deepsentipers_fscore_modified":0.7346216275,"sentiment-analysis_deepsentipers_acc":0.7669902913,"sentiment-analysis_deepsentipers_precision":0.7411642414,"sentiment-analysis_deepsentipers_recall":0.7990679398,"sentiment-analysis_deepsentipers_fscore":0.7346216275,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6855447681,"sentiment-analysis_deepsentipers_precision_modified":0.6408552737,"sentiment-analysis_deepsentipers_recall_modified":0.7180772523,"sentiment-analysis_deepsentipers_fscore_modified":0.6446920024,"sentiment-analysis_deepsentipers_acc":0.6855447681,"sentiment-analysis_deepsentipers_precision":0.6408552737,"sentiment-analysis_deepsentipers_recall":0.7180772523,"sentiment-analysis_deepsentipers_fscore":0.6446920024,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5199568501,"sentiment-analysis_deepsentipers_precision_modified":0.4907692439,"sentiment-analysis_deepsentipers_recall_modified":0.5047701764,"sentiment-analysis_deepsentipers_fscore_modified":0.4457895794,"sentiment-analysis_deepsentipers_acc":0.571767497,"sentiment-analysis_deepsentipers_precision":0.5396715174,"sentiment-analysis_deepsentipers_recall":0.5550675605,"sentiment-analysis_deepsentipers_fscore":0.4902098934,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9093851133,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7642934196,"sentiment-analysis_deepsentipers_precision_modified":0.7287131406,"sentiment-analysis_deepsentipers_recall_modified":0.7801104156,"sentiment-analysis_deepsentipers_fscore_modified":0.7434018552,"sentiment-analysis_deepsentipers_acc":0.7951739618,"sentiment-analysis_deepsentipers_precision":0.7581560958,"sentiment-analysis_deepsentipers_recall":0.8116300284,"sentiment-analysis_deepsentipers_fscore":0.7734382938,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9611650485,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7324703344,"sentiment-analysis_deepsentipers_precision_modified":0.7023773257,"sentiment-analysis_deepsentipers_recall_modified":0.7824931708,"sentiment-analysis_deepsentipers_fscore_modified":0.6905624385,"sentiment-analysis_deepsentipers_acc":0.7324703344,"sentiment-analysis_deepsentipers_precision":0.7023773257,"sentiment-analysis_deepsentipers_recall":0.7824931708,"sentiment-analysis_deepsentipers_fscore":0.6905624385,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7869471413,"sentiment-analysis_deepsentipers_precision_modified":0.7486325068,"sentiment-analysis_deepsentipers_recall_modified":0.811119619,"sentiment-analysis_deepsentipers_fscore_modified":0.7669134988,"sentiment-analysis_deepsentipers_acc":0.7869471413,"sentiment-analysis_deepsentipers_precision":0.7486325068,"sentiment-analysis_deepsentipers_recall":0.811119619,"sentiment-analysis_deepsentipers_fscore":0.7669134988,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.806364617,"sentiment-analysis_deepsentipers_precision_modified":0.7700109372,"sentiment-analysis_deepsentipers_recall_modified":0.8303259501,"sentiment-analysis_deepsentipers_fscore_modified":0.7817187645,"sentiment-analysis_deepsentipers_acc":0.806364617,"sentiment-analysis_deepsentipers_precision":0.7700109372,"sentiment-analysis_deepsentipers_recall":0.8303259501,"sentiment-analysis_deepsentipers_fscore":0.7817187645,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7713052859,"sentiment-analysis_deepsentipers_precision_modified":0.7288724929,"sentiment-analysis_deepsentipers_recall_modified":0.803675275,"sentiment-analysis_deepsentipers_fscore_modified":0.753174206,"sentiment-analysis_deepsentipers_acc":0.7713052859,"sentiment-analysis_deepsentipers_precision":0.7288724929,"sentiment-analysis_deepsentipers_recall":0.803675275,"sentiment-analysis_deepsentipers_fscore":0.753174206,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7692351798,"sentiment-analysis_deepsentipers_recall_modified":0.8125606487,"sentiment-analysis_deepsentipers_fscore_modified":0.7842327246,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7692351798,"sentiment-analysis_deepsentipers_recall":0.8125606487,"sentiment-analysis_deepsentipers_fscore":0.7842327246,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7740021575,"sentiment-analysis_deepsentipers_precision_modified":0.7295627073,"sentiment-analysis_deepsentipers_recall_modified":0.7969121455,"sentiment-analysis_deepsentipers_fscore_modified":0.7492633779,"sentiment-analysis_deepsentipers_acc":0.7744198597,"sentiment-analysis_deepsentipers_precision":0.729956427,"sentiment-analysis_deepsentipers_recall":0.7973422114,"sentiment-analysis_deepsentipers_fscore":0.7496677294,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7610571737,"sentiment-analysis_deepsentipers_precision_modified":0.7412673027,"sentiment-analysis_deepsentipers_recall_modified":0.7880284687,"sentiment-analysis_deepsentipers_fscore_modified":0.7263322065,"sentiment-analysis_deepsentipers_acc":0.7610571737,"sentiment-analysis_deepsentipers_precision":0.7412673027,"sentiment-analysis_deepsentipers_recall":0.7880284687,"sentiment-analysis_deepsentipers_fscore":0.7263322065,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7610571737,"sentiment-analysis_deepsentipers_precision_modified":0.7233450689,"sentiment-analysis_deepsentipers_recall_modified":0.7938691015,"sentiment-analysis_deepsentipers_fscore_modified":0.7265888673,"sentiment-analysis_deepsentipers_acc":0.7622906537,"sentiment-analysis_deepsentipers_precision":0.7245174272,"sentiment-analysis_deepsentipers_recall":0.7951557613,"sentiment-analysis_deepsentipers_fscore":0.727766483,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6154261057,"sentiment-analysis_deepsentipers_precision_modified":0.6519864557,"sentiment-analysis_deepsentipers_recall_modified":0.6762525877,"sentiment-analysis_deepsentipers_fscore_modified":0.5290317996,"sentiment-analysis_deepsentipers_acc":0.6154261057,"sentiment-analysis_deepsentipers_precision":0.6519864557,"sentiment-analysis_deepsentipers_recall":0.6762525877,"sentiment-analysis_deepsentipers_fscore":0.5290317996,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.5604099245,"sentiment-analysis_deepsentipers_precision_modified":0.6546929453,"sentiment-analysis_deepsentipers_recall_modified":0.63956688,"sentiment-analysis_deepsentipers_fscore_modified":0.4401917985,"sentiment-analysis_deepsentipers_acc":0.5625338387,"sentiment-analysis_deepsentipers_precision":0.6571741855,"sentiment-analysis_deepsentipers_recall":0.6419907934,"sentiment-analysis_deepsentipers_fscore":0.4418600945,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9962243797,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7448759439,"sentiment-analysis_deepsentipers_precision_modified":0.7197594162,"sentiment-analysis_deepsentipers_recall_modified":0.7773395601,"sentiment-analysis_deepsentipers_fscore_modified":0.7035572334,"sentiment-analysis_deepsentipers_acc":0.7448759439,"sentiment-analysis_deepsentipers_precision":0.7197594162,"sentiment-analysis_deepsentipers_recall":0.7773395601,"sentiment-analysis_deepsentipers_fscore":0.7035572334,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7696871629,"sentiment-analysis_deepsentipers_precision_modified":0.731208086,"sentiment-analysis_deepsentipers_recall_modified":0.7918265983,"sentiment-analysis_deepsentipers_fscore_modified":0.7382534961,"sentiment-analysis_deepsentipers_acc":0.7696871629,"sentiment-analysis_deepsentipers_precision":0.731208086,"sentiment-analysis_deepsentipers_recall":0.7918265983,"sentiment-analysis_deepsentipers_fscore":0.7382534961,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7411003236,"sentiment-analysis_deepsentipers_precision_modified":0.7010084925,"sentiment-analysis_deepsentipers_recall_modified":0.7529009939,"sentiment-analysis_deepsentipers_fscore_modified":0.7172781226,"sentiment-analysis_deepsentipers_acc":0.7411003236,"sentiment-analysis_deepsentipers_precision":0.7010084925,"sentiment-analysis_deepsentipers_recall":0.7529009939,"sentiment-analysis_deepsentipers_fscore":0.7172781226,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3737864078,"sentiment-analysis_deepsentipers_precision_modified":0.4303148768,"sentiment-analysis_deepsentipers_recall_modified":0.3053254234,"sentiment-analysis_deepsentipers_fscore_modified":0.2934454786,"sentiment-analysis_deepsentipers_acc":0.6209677419,"sentiment-analysis_deepsentipers_precision":0.7148779405,"sentiment-analysis_deepsentipers_recall":0.5072341711,"sentiment-analysis_deepsentipers_fscore":0.4874981338,"sentiment-analysis_deepsentipers_valid_output_ratio":0.6019417476,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7988133765,"sentiment-analysis_deepsentipers_precision_modified":0.7580375513,"sentiment-analysis_deepsentipers_recall_modified":0.8108044611,"sentiment-analysis_deepsentipers_fscore_modified":0.7757714496,"sentiment-analysis_deepsentipers_acc":0.7988133765,"sentiment-analysis_deepsentipers_precision":0.7580375513,"sentiment-analysis_deepsentipers_recall":0.8108044611,"sentiment-analysis_deepsentipers_fscore":0.7757714496,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6278317152,"sentiment-analysis_deepsentipers_precision_modified":0.5954545705,"sentiment-analysis_deepsentipers_recall_modified":0.6239967818,"sentiment-analysis_deepsentipers_fscore_modified":0.6073033689,"sentiment-analysis_deepsentipers_acc":0.7288666249,"sentiment-analysis_deepsentipers_precision":0.691279132,"sentiment-analysis_deepsentipers_recall":0.7244145482,"sentiment-analysis_deepsentipers_fscore":0.7050347188,"sentiment-analysis_deepsentipers_valid_output_ratio":0.8613807983,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7502696872,"sentiment-analysis_deepsentipers_precision_modified":0.7162099301,"sentiment-analysis_deepsentipers_recall_modified":0.7649004728,"sentiment-analysis_deepsentipers_fscore_modified":0.716460892,"sentiment-analysis_deepsentipers_acc":0.7502696872,"sentiment-analysis_deepsentipers_precision":0.7162099301,"sentiment-analysis_deepsentipers_recall":0.7649004728,"sentiment-analysis_deepsentipers_fscore":0.716460892,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3149946063,"sentiment-analysis_deepsentipers_precision_modified":0.6011059335,"sentiment-analysis_deepsentipers_recall_modified":0.4700288555,"sentiment-analysis_deepsentipers_fscore_modified":0.3135968578,"sentiment-analysis_deepsentipers_acc":0.3155051324,"sentiment-analysis_deepsentipers_precision":0.6020801732,"sentiment-analysis_deepsentipers_recall":0.4707906527,"sentiment-analysis_deepsentipers_fscore":0.3141051185,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.0787486516,"sentiment-analysis_deepsentipers_precision_modified":0.1370950606,"sentiment-analysis_deepsentipers_recall_modified":0.1281381117,"sentiment-analysis_deepsentipers_fscore_modified":0.0722798642,"sentiment-analysis_deepsentipers_acc":0.2106782107,"sentiment-analysis_deepsentipers_precision":0.3667737986,"sentiment-analysis_deepsentipers_recall":0.3428110522,"sentiment-analysis_deepsentipers_fscore":0.1933721042,"sentiment-analysis_deepsentipers_valid_output_ratio":0.3737864078,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7939590076,"sentiment-analysis_deepsentipers_precision_modified":0.7547931347,"sentiment-analysis_deepsentipers_recall_modified":0.8064164986,"sentiment-analysis_deepsentipers_fscore_modified":0.7679289467,"sentiment-analysis_deepsentipers_acc":0.7939590076,"sentiment-analysis_deepsentipers_precision":0.7547931347,"sentiment-analysis_deepsentipers_recall":0.8064164986,"sentiment-analysis_deepsentipers_fscore":0.7679289467,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.0021574973,"sentiment-analysis_deepsentipers_precision_modified":0.0009588877,"sentiment-analysis_deepsentipers_recall_modified":0.0021574973,"sentiment-analysis_deepsentipers_fscore_modified":0.0013276906,"sentiment-analysis_deepsentipers_acc":0.3333333333,"sentiment-analysis_deepsentipers_precision":0.1481481481,"sentiment-analysis_deepsentipers_recall":0.3333333333,"sentiment-analysis_deepsentipers_fscore":0.2051282051,"sentiment-analysis_deepsentipers_valid_output_ratio":0.0064724919,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7448759439,"sentiment-analysis_deepsentipers_precision_modified":0.7197594162,"sentiment-analysis_deepsentipers_recall_modified":0.7773395601,"sentiment-analysis_deepsentipers_fscore_modified":0.7035572334,"sentiment-analysis_deepsentipers_acc":0.7448759439,"sentiment-analysis_deepsentipers_precision":0.7197594162,"sentiment-analysis_deepsentipers_recall":0.7773395601,"sentiment-analysis_deepsentipers_fscore":0.7035572334,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6154261057,"sentiment-analysis_deepsentipers_precision_modified":0.6519864557,"sentiment-analysis_deepsentipers_recall_modified":0.6762525877,"sentiment-analysis_deepsentipers_fscore_modified":0.5290317996,"sentiment-analysis_deepsentipers_acc":0.6154261057,"sentiment-analysis_deepsentipers_precision":0.6519864557,"sentiment-analysis_deepsentipers_recall":0.6762525877,"sentiment-analysis_deepsentipers_fscore":0.5290317996,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7740021575,"sentiment-analysis_deepsentipers_precision_modified":0.7295627073,"sentiment-analysis_deepsentipers_recall_modified":0.7969121455,"sentiment-analysis_deepsentipers_fscore_modified":0.7492633779,"sentiment-analysis_deepsentipers_acc":0.7744198597,"sentiment-analysis_deepsentipers_precision":0.729956427,"sentiment-analysis_deepsentipers_recall":0.7973422114,"sentiment-analysis_deepsentipers_fscore":0.7496677294,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6278317152,"sentiment-analysis_deepsentipers_precision_modified":0.5954545705,"sentiment-analysis_deepsentipers_recall_modified":0.6239967818,"sentiment-analysis_deepsentipers_fscore_modified":0.6073033689,"sentiment-analysis_deepsentipers_acc":0.7288666249,"sentiment-analysis_deepsentipers_precision":0.691279132,"sentiment-analysis_deepsentipers_recall":0.7244145482,"sentiment-analysis_deepsentipers_fscore":0.7050347188,"sentiment-analysis_deepsentipers_valid_output_ratio":0.8613807983,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.715210356,"sentiment-analysis_deepsentipers_precision_modified":0.6703215936,"sentiment-analysis_deepsentipers_recall_modified":0.7384303354,"sentiment-analysis_deepsentipers_fscore_modified":0.6873376696,"sentiment-analysis_deepsentipers_acc":0.7651471437,"sentiment-analysis_deepsentipers_precision":0.7171241976,"sentiment-analysis_deepsentipers_recall":0.7899883681,"sentiment-analysis_deepsentipers_fscore":0.7353283551,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9347357066,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7459546926,"sentiment-analysis_deepsentipers_precision_modified":0.696002467,"sentiment-analysis_deepsentipers_recall_modified":0.7725731976,"sentiment-analysis_deepsentipers_fscore_modified":0.7160207999,"sentiment-analysis_deepsentipers_acc":0.7459546926,"sentiment-analysis_deepsentipers_precision":0.696002467,"sentiment-analysis_deepsentipers_recall":0.7725731976,"sentiment-analysis_deepsentipers_fscore":0.7160207999,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7820927724,"sentiment-analysis_deepsentipers_precision_modified":0.7592820571,"sentiment-analysis_deepsentipers_recall_modified":0.7768252647,"sentiment-analysis_deepsentipers_fscore_modified":0.7562669975,"sentiment-analysis_deepsentipers_acc":0.7820927724,"sentiment-analysis_deepsentipers_precision":0.7592820571,"sentiment-analysis_deepsentipers_recall":0.7768252647,"sentiment-analysis_deepsentipers_fscore":0.7562669975,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7610571737,"sentiment-analysis_deepsentipers_precision_modified":0.7412673027,"sentiment-analysis_deepsentipers_recall_modified":0.7880284687,"sentiment-analysis_deepsentipers_fscore_modified":0.7263322065,"sentiment-analysis_deepsentipers_acc":0.7610571737,"sentiment-analysis_deepsentipers_precision":0.7412673027,"sentiment-analysis_deepsentipers_recall":0.7880284687,"sentiment-analysis_deepsentipers_fscore":0.7263322065,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7696871629,"sentiment-analysis_deepsentipers_precision_modified":0.731208086,"sentiment-analysis_deepsentipers_recall_modified":0.7918265983,"sentiment-analysis_deepsentipers_fscore_modified":0.7382534961,"sentiment-analysis_deepsentipers_acc":0.7696871629,"sentiment-analysis_deepsentipers_precision":0.731208086,"sentiment-analysis_deepsentipers_recall":0.7918265983,"sentiment-analysis_deepsentipers_fscore":0.7382534961,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7939590076,"sentiment-analysis_deepsentipers_precision_modified":0.7547931347,"sentiment-analysis_deepsentipers_recall_modified":0.8064164986,"sentiment-analysis_deepsentipers_fscore_modified":0.7679289467,"sentiment-analysis_deepsentipers_acc":0.7939590076,"sentiment-analysis_deepsentipers_precision":0.7547931347,"sentiment-analysis_deepsentipers_recall":0.8064164986,"sentiment-analysis_deepsentipers_fscore":0.7679289467,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7885652643,"sentiment-analysis_deepsentipers_precision_modified":0.757057239,"sentiment-analysis_deepsentipers_recall_modified":0.8134053732,"sentiment-analysis_deepsentipers_fscore_modified":0.7618040556,"sentiment-analysis_deepsentipers_acc":0.7911255411,"sentiment-analysis_deepsentipers_precision":0.7595152171,"sentiment-analysis_deepsentipers_recall":0.8160462998,"sentiment-analysis_deepsentipers_fscore":0.7642774453,"sentiment-analysis_deepsentipers_valid_output_ratio":0.996763754,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7502696872,"sentiment-analysis_deepsentipers_precision_modified":0.7162099301,"sentiment-analysis_deepsentipers_recall_modified":0.7649004728,"sentiment-analysis_deepsentipers_fscore_modified":0.716460892,"sentiment-analysis_deepsentipers_acc":0.7502696872,"sentiment-analysis_deepsentipers_precision":0.7162099301,"sentiment-analysis_deepsentipers_recall":0.7649004728,"sentiment-analysis_deepsentipers_fscore":0.716460892,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3737864078,"sentiment-analysis_deepsentipers_precision_modified":0.4303148768,"sentiment-analysis_deepsentipers_recall_modified":0.3053254234,"sentiment-analysis_deepsentipers_fscore_modified":0.2934454786,"sentiment-analysis_deepsentipers_acc":0.6209677419,"sentiment-analysis_deepsentipers_precision":0.7148779405,"sentiment-analysis_deepsentipers_recall":0.5072341711,"sentiment-analysis_deepsentipers_fscore":0.4874981338,"sentiment-analysis_deepsentipers_valid_output_ratio":0.6019417476,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7713052859,"sentiment-analysis_deepsentipers_precision_modified":0.7288724929,"sentiment-analysis_deepsentipers_recall_modified":0.803675275,"sentiment-analysis_deepsentipers_fscore_modified":0.753174206,"sentiment-analysis_deepsentipers_acc":0.7713052859,"sentiment-analysis_deepsentipers_precision":0.7288724929,"sentiment-analysis_deepsentipers_recall":0.803675275,"sentiment-analysis_deepsentipers_fscore":0.753174206,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3149946063,"sentiment-analysis_deepsentipers_precision_modified":0.6011059335,"sentiment-analysis_deepsentipers_recall_modified":0.4700288555,"sentiment-analysis_deepsentipers_fscore_modified":0.3135968578,"sentiment-analysis_deepsentipers_acc":0.3155051324,"sentiment-analysis_deepsentipers_precision":0.6020801732,"sentiment-analysis_deepsentipers_recall":0.4707906527,"sentiment-analysis_deepsentipers_fscore":0.3141051185,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6531823085,"sentiment-analysis_deepsentipers_precision_modified":0.6360048559,"sentiment-analysis_deepsentipers_recall_modified":0.6171894526,"sentiment-analysis_deepsentipers_fscore_modified":0.6143157593,"sentiment-analysis_deepsentipers_acc":0.6631982475,"sentiment-analysis_deepsentipers_precision":0.6457573947,"sentiment-analysis_deepsentipers_recall":0.6266534749,"sentiment-analysis_deepsentipers_fscore":0.6237357162,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9848975189,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5949298813,"sentiment-analysis_deepsentipers_precision_modified":0.6633283768,"sentiment-analysis_deepsentipers_recall_modified":0.6739827327,"sentiment-analysis_deepsentipers_fscore_modified":0.597099001,"sentiment-analysis_deepsentipers_acc":0.5958941113,"sentiment-analysis_deepsentipers_precision":0.6644034633,"sentiment-analysis_deepsentipers_recall":0.6750750872,"sentiment-analysis_deepsentipers_fscore":0.5980667466,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7421790723,"sentiment-analysis_deepsentipers_precision_modified":0.705605232,"sentiment-analysis_deepsentipers_recall_modified":0.7565637786,"sentiment-analysis_deepsentipers_fscore_modified":0.7108099837,"sentiment-analysis_deepsentipers_acc":0.7674288901,"sentiment-analysis_deepsentipers_precision":0.7296107642,"sentiment-analysis_deepsentipers_recall":0.7823029813,"sentiment-analysis_deepsentipers_fscore":0.7349925877,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9670981661,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7664509169,"sentiment-analysis_deepsentipers_precision_modified":0.7235774595,"sentiment-analysis_deepsentipers_recall_modified":0.785720049,"sentiment-analysis_deepsentipers_fscore_modified":0.7440236575,"sentiment-analysis_deepsentipers_acc":0.766864544,"sentiment-analysis_deepsentipers_precision":0.7239679492,"sentiment-analysis_deepsentipers_recall":0.786144075,"sentiment-analysis_deepsentipers_fscore":0.7444251813,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7642934196,"sentiment-analysis_deepsentipers_precision_modified":0.7287131406,"sentiment-analysis_deepsentipers_recall_modified":0.7801104156,"sentiment-analysis_deepsentipers_fscore_modified":0.7434018552,"sentiment-analysis_deepsentipers_acc":0.7951739618,"sentiment-analysis_deepsentipers_precision":0.7581560958,"sentiment-analysis_deepsentipers_recall":0.8116300284,"sentiment-analysis_deepsentipers_fscore":0.7734382938,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9611650485,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7411003236,"sentiment-analysis_deepsentipers_precision_modified":0.7010084925,"sentiment-analysis_deepsentipers_recall_modified":0.7529009939,"sentiment-analysis_deepsentipers_fscore_modified":0.7172781226,"sentiment-analysis_deepsentipers_acc":0.7411003236,"sentiment-analysis_deepsentipers_precision":0.7010084925,"sentiment-analysis_deepsentipers_recall":0.7529009939,"sentiment-analysis_deepsentipers_fscore":0.7172781226,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7988133765,"sentiment-analysis_deepsentipers_precision_modified":0.7580375513,"sentiment-analysis_deepsentipers_recall_modified":0.8108044611,"sentiment-analysis_deepsentipers_fscore_modified":0.7757714496,"sentiment-analysis_deepsentipers_acc":0.7988133765,"sentiment-analysis_deepsentipers_precision":0.7580375513,"sentiment-analysis_deepsentipers_recall":0.8108044611,"sentiment-analysis_deepsentipers_fscore":0.7757714496,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8058409951,"sentiment-analysis_deepsentipers_precision_modified":0.7717795715,"sentiment-analysis_deepsentipers_recall_modified":0.8211827366,"sentiment-analysis_deepsentipers_fscore_modified":0.7889064935,"sentiment-analysis_deepsentipers_acc":0.8062770563,"sentiment-analysis_deepsentipers_precision":0.7721972011,"sentiment-analysis_deepsentipers_recall":0.8216270995,"sentiment-analysis_deepsentipers_fscore":0.7893333909,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994591671,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7659115426,"sentiment-analysis_deepsentipers_precision_modified":0.726363779,"sentiment-analysis_deepsentipers_recall_modified":0.795958415,"sentiment-analysis_deepsentipers_fscore_modified":0.7384201215,"sentiment-analysis_deepsentipers_acc":0.7659115426,"sentiment-analysis_deepsentipers_precision":0.726363779,"sentiment-analysis_deepsentipers_recall":0.795958415,"sentiment-analysis_deepsentipers_fscore":0.7384201215,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7842502697,"sentiment-analysis_deepsentipers_precision_modified":0.7475186413,"sentiment-analysis_deepsentipers_recall_modified":0.8040239865,"sentiment-analysis_deepsentipers_fscore_modified":0.7603028067,"sentiment-analysis_deepsentipers_acc":0.7842502697,"sentiment-analysis_deepsentipers_precision":0.7475186413,"sentiment-analysis_deepsentipers_recall":0.8040239865,"sentiment-analysis_deepsentipers_fscore":0.7603028067,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6817691478,"sentiment-analysis_deepsentipers_precision_modified":0.6546744642,"sentiment-analysis_deepsentipers_recall_modified":0.7378694789,"sentiment-analysis_deepsentipers_fscore_modified":0.6356142977,"sentiment-analysis_deepsentipers_acc":0.6817691478,"sentiment-analysis_deepsentipers_precision":0.6546744642,"sentiment-analysis_deepsentipers_recall":0.7378694789,"sentiment-analysis_deepsentipers_fscore":0.6356142977,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.0021574973,"sentiment-analysis_deepsentipers_precision_modified":0.0009588877,"sentiment-analysis_deepsentipers_recall_modified":0.0021574973,"sentiment-analysis_deepsentipers_fscore_modified":0.0013276906,"sentiment-analysis_deepsentipers_acc":0.3333333333,"sentiment-analysis_deepsentipers_precision":0.1481481481,"sentiment-analysis_deepsentipers_recall":0.3333333333,"sentiment-analysis_deepsentipers_fscore":0.2051282051,"sentiment-analysis_deepsentipers_valid_output_ratio":0.0064724919,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7521691974,"sentiment-analysis_deepsentipers_precision_modified":0.7149147283,"sentiment-analysis_deepsentipers_recall_modified":0.7661218172,"sentiment-analysis_deepsentipers_fscore_modified":0.7340307684,"sentiment-analysis_deepsentipers_acc":0.7550353838,"sentiment-analysis_deepsentipers_precision":0.7176389542,"sentiment-analysis_deepsentipers_recall":0.7690411709,"sentiment-analysis_deepsentipers_fscore":0.7368278372,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9962039046,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7661826532,"sentiment-analysis_deepsentipers_recall_modified":0.8089861144,"sentiment-analysis_deepsentipers_fscore_modified":0.7830417049,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7661826532,"sentiment-analysis_deepsentipers_recall":0.8089861144,"sentiment-analysis_deepsentipers_fscore":0.7830417049,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7692351798,"sentiment-analysis_deepsentipers_recall_modified":0.8125606487,"sentiment-analysis_deepsentipers_fscore_modified":0.7842327246,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7692351798,"sentiment-analysis_deepsentipers_recall":0.8125606487,"sentiment-analysis_deepsentipers_fscore":0.7842327246,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7610571737,"sentiment-analysis_deepsentipers_precision_modified":0.7233450689,"sentiment-analysis_deepsentipers_recall_modified":0.7938691015,"sentiment-analysis_deepsentipers_fscore_modified":0.7265888673,"sentiment-analysis_deepsentipers_acc":0.7622906537,"sentiment-analysis_deepsentipers_precision":0.7245174272,"sentiment-analysis_deepsentipers_recall":0.7951557613,"sentiment-analysis_deepsentipers_fscore":0.727766483,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7038834951,"sentiment-analysis_deepsentipers_precision_modified":0.6558175114,"sentiment-analysis_deepsentipers_recall_modified":0.7195323034,"sentiment-analysis_deepsentipers_fscore_modified":0.6634075099,"sentiment-analysis_deepsentipers_acc":0.7038834951,"sentiment-analysis_deepsentipers_precision":0.6558175114,"sentiment-analysis_deepsentipers_recall":0.7195323034,"sentiment-analysis_deepsentipers_fscore":0.6634075099,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.783171521,"sentiment-analysis_deepsentipers_precision_modified":0.7433314894,"sentiment-analysis_deepsentipers_recall_modified":0.8047725112,"sentiment-analysis_deepsentipers_fscore_modified":0.759109397,"sentiment-analysis_deepsentipers_acc":0.7835941716,"sentiment-analysis_deepsentipers_precision":0.7437326397,"sentiment-analysis_deepsentipers_recall":0.805206819,"sentiment-analysis_deepsentipers_fscore":0.7595190621,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7669902913,"sentiment-analysis_deepsentipers_precision_modified":0.7411642414,"sentiment-analysis_deepsentipers_recall_modified":0.7990679398,"sentiment-analysis_deepsentipers_fscore_modified":0.7346216275,"sentiment-analysis_deepsentipers_acc":0.7669902913,"sentiment-analysis_deepsentipers_precision":0.7411642414,"sentiment-analysis_deepsentipers_recall":0.7990679398,"sentiment-analysis_deepsentipers_fscore":0.7346216275,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.5604099245,"sentiment-analysis_deepsentipers_precision_modified":0.6546929453,"sentiment-analysis_deepsentipers_recall_modified":0.63956688,"sentiment-analysis_deepsentipers_fscore_modified":0.4401917985,"sentiment-analysis_deepsentipers_acc":0.5625338387,"sentiment-analysis_deepsentipers_precision":0.6571741855,"sentiment-analysis_deepsentipers_recall":0.6419907934,"sentiment-analysis_deepsentipers_fscore":0.4418600945,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9962243797,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.0787486516,"sentiment-analysis_deepsentipers_precision_modified":0.1370950606,"sentiment-analysis_deepsentipers_recall_modified":0.1281381117,"sentiment-analysis_deepsentipers_fscore_modified":0.0722798642,"sentiment-analysis_deepsentipers_acc":0.2106782107,"sentiment-analysis_deepsentipers_precision":0.3667737986,"sentiment-analysis_deepsentipers_recall":0.3428110522,"sentiment-analysis_deepsentipers_fscore":0.1933721042,"sentiment-analysis_deepsentipers_valid_output_ratio":0.3737864078,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7869471413,"sentiment-analysis_deepsentipers_precision_modified":0.7486325068,"sentiment-analysis_deepsentipers_recall_modified":0.811119619,"sentiment-analysis_deepsentipers_fscore_modified":0.7669134988,"sentiment-analysis_deepsentipers_acc":0.7869471413,"sentiment-analysis_deepsentipers_precision":0.7486325068,"sentiment-analysis_deepsentipers_recall":0.811119619,"sentiment-analysis_deepsentipers_fscore":0.7669134988,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6855447681,"sentiment-analysis_deepsentipers_precision_modified":0.6408552737,"sentiment-analysis_deepsentipers_recall_modified":0.7180772523,"sentiment-analysis_deepsentipers_fscore_modified":0.6446920024,"sentiment-analysis_deepsentipers_acc":0.6855447681,"sentiment-analysis_deepsentipers_precision":0.6408552737,"sentiment-analysis_deepsentipers_recall":0.7180772523,"sentiment-analysis_deepsentipers_fscore":0.6446920024,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7556634304,"sentiment-analysis_deepsentipers_precision_modified":0.7100962569,"sentiment-analysis_deepsentipers_recall_modified":0.796296032,"sentiment-analysis_deepsentipers_fscore_modified":0.7198160026,"sentiment-analysis_deepsentipers_acc":0.7556634304,"sentiment-analysis_deepsentipers_precision":0.7100962569,"sentiment-analysis_deepsentipers_recall":0.796296032,"sentiment-analysis_deepsentipers_fscore":0.7198160026,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.806364617,"sentiment-analysis_deepsentipers_precision_modified":0.7705568258,"sentiment-analysis_deepsentipers_recall_modified":0.8234753765,"sentiment-analysis_deepsentipers_fscore_modified":0.7802386366,"sentiment-analysis_deepsentipers_acc":0.806364617,"sentiment-analysis_deepsentipers_precision":0.7705568258,"sentiment-analysis_deepsentipers_recall":0.8234753765,"sentiment-analysis_deepsentipers_fscore":0.7802386366,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7518878101,"sentiment-analysis_deepsentipers_precision_modified":0.7333601788,"sentiment-analysis_deepsentipers_recall_modified":0.7850018857,"sentiment-analysis_deepsentipers_fscore_modified":0.7214827861,"sentiment-analysis_deepsentipers_acc":0.752293578,"sentiment-analysis_deepsentipers_precision":0.7337559479,"sentiment-analysis_deepsentipers_recall":0.785425524,"sentiment-analysis_deepsentipers_fscore":0.7218721454,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7324703344,"sentiment-analysis_deepsentipers_precision_modified":0.7023773257,"sentiment-analysis_deepsentipers_recall_modified":0.7824931708,"sentiment-analysis_deepsentipers_fscore_modified":0.6905624385,"sentiment-analysis_deepsentipers_acc":0.7324703344,"sentiment-analysis_deepsentipers_precision":0.7023773257,"sentiment-analysis_deepsentipers_recall":0.7824931708,"sentiment-analysis_deepsentipers_fscore":0.6905624385,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5199568501,"sentiment-analysis_deepsentipers_precision_modified":0.4907692439,"sentiment-analysis_deepsentipers_recall_modified":0.5047701764,"sentiment-analysis_deepsentipers_fscore_modified":0.4457895794,"sentiment-analysis_deepsentipers_acc":0.571767497,"sentiment-analysis_deepsentipers_precision":0.5396715174,"sentiment-analysis_deepsentipers_recall":0.5550675605,"sentiment-analysis_deepsentipers_fscore":0.4902098934,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9093851133,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.738403452,"sentiment-analysis_deepsentipers_precision_modified":0.706763853,"sentiment-analysis_deepsentipers_recall_modified":0.7658510846,"sentiment-analysis_deepsentipers_fscore_modified":0.726373242,"sentiment-analysis_deepsentipers_acc":0.738403452,"sentiment-analysis_deepsentipers_precision":0.706763853,"sentiment-analysis_deepsentipers_recall":0.7658510846,"sentiment-analysis_deepsentipers_fscore":0.726373242,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.806364617,"sentiment-analysis_deepsentipers_precision_modified":0.7700109372,"sentiment-analysis_deepsentipers_recall_modified":0.8303259501,"sentiment-analysis_deepsentipers_fscore_modified":0.7817187645,"sentiment-analysis_deepsentipers_acc":0.806364617,"sentiment-analysis_deepsentipers_precision":0.7700109372,"sentiment-analysis_deepsentipers_recall":0.8303259501,"sentiment-analysis_deepsentipers_fscore":0.7817187645,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/sts_FarSICK.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8606070195,"sts_FarSICK_corrcoef":0.8606070195,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8438423833,"sts_FarSICK_corrcoef":0.8438423833,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8471466571,"sts_FarSICK_corrcoef":0.8471466571,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8410343894,"sts_FarSICK_corrcoef":0.8410343894,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8521163575,"sts_FarSICK_corrcoef":0.8521163575,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8404353896,"sts_FarSICK_corrcoef":0.8404353896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8412365875,"sts_FarSICK_corrcoef":0.8412365875,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8602460091,"sts_FarSICK_corrcoef":0.8602460091,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8624442565,"sts_FarSICK_corrcoef":0.8624442565,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8767598269,"sts_FarSICK_corrcoef":0.8767598269,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8569357778,"sts_FarSICK_corrcoef":0.8569357778,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8796836219,"sts_FarSICK_corrcoef":0.8796836219,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8509006434,"sts_FarSICK_corrcoef":0.8509006434,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8550824218,"sts_FarSICK_corrcoef":0.8550824218,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0982656524,"sts_FarSICK_corrcoef":0.9633887492,"sts_FarSICK_valid_output_ratio":0.102,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.2533632205,"sts_FarSICK_corrcoef":0.8617796616,"sts_FarSICK_valid_output_ratio":0.294,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8346099969,"sts_FarSICK_corrcoef":0.8346099969,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8020636156,"sts_FarSICK_corrcoef":0.8020636156,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8641781993,"sts_FarSICK_corrcoef":0.8641781993,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8363152655,"sts_FarSICK_corrcoef":0.8430597434,"sts_FarSICK_valid_output_ratio":0.992,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.6678492429,"sts_FarSICK_corrcoef":0.6913553239,"sts_FarSICK_valid_output_ratio":0.966,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8274969834,"sts_FarSICK_corrcoef":0.8274969834,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.86471356,"sts_FarSICK_corrcoef":0.86471356,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8663758584,"sts_FarSICK_corrcoef":0.8663758584,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8671704383,"sts_FarSICK_corrcoef":0.8671704383,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.058,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8492628764,"sts_FarSICK_corrcoef":0.8492628764,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8367188896,"sts_FarSICK_corrcoef":0.8367188896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8461251715,"sts_FarSICK_corrcoef":0.8461251715,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8375953381,"sts_FarSICK_corrcoef":0.8375953381,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.7973824106,"sts_FarSICK_corrcoef":0.8005847496,"sts_FarSICK_valid_output_ratio":0.996,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8720703866,"sts_FarSICK_corrcoef":0.8720703866,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.852083892,"sts_FarSICK_corrcoef":0.852083892,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8497629768,"sts_FarSICK_corrcoef":0.8497629768,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.5531047251,"sts_FarSICK_corrcoef":0.8039312865,"sts_FarSICK_valid_output_ratio":0.688,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8612153956,"sts_FarSICK_corrcoef":0.8612153956,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8547994421,"sts_FarSICK_corrcoef":0.8547994421,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8643540763,"sts_FarSICK_corrcoef":0.8643540763,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.018,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8357730413,"sts_FarSICK_corrcoef":0.8357730413,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8720703866,"sts_FarSICK_corrcoef":0.8720703866,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8375953381,"sts_FarSICK_corrcoef":0.8375953381,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8492628764,"sts_FarSICK_corrcoef":0.8492628764,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8547994421,"sts_FarSICK_corrcoef":0.8547994421,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0982656524,"sts_FarSICK_corrcoef":0.9633887492,"sts_FarSICK_valid_output_ratio":0.102,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8509006434,"sts_FarSICK_corrcoef":0.8509006434,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8550824218,"sts_FarSICK_corrcoef":0.8550824218,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8367188896,"sts_FarSICK_corrcoef":0.8367188896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.852083892,"sts_FarSICK_corrcoef":0.852083892,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8357730413,"sts_FarSICK_corrcoef":0.8357730413,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8404353896,"sts_FarSICK_corrcoef":0.8404353896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8643540763,"sts_FarSICK_corrcoef":0.8643540763,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.5531047251,"sts_FarSICK_corrcoef":0.8039312865,"sts_FarSICK_valid_output_ratio":0.688,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8671704383,"sts_FarSICK_corrcoef":0.8671704383,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8139555874,"sts_FarSICK_corrcoef":0.8155867609,"sts_FarSICK_valid_output_ratio":0.998,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8346099969,"sts_FarSICK_corrcoef":0.8346099969,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8438423833,"sts_FarSICK_corrcoef":0.8438423833,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8767598269,"sts_FarSICK_corrcoef":0.8767598269,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8497629768,"sts_FarSICK_corrcoef":0.8497629768,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8612153956,"sts_FarSICK_corrcoef":0.8612153956,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8606070195,"sts_FarSICK_corrcoef":0.8606070195,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8410343894,"sts_FarSICK_corrcoef":0.8410343894,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8412365875,"sts_FarSICK_corrcoef":0.8412365875,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8471466571,"sts_FarSICK_corrcoef":0.8471466571,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.2533632205,"sts_FarSICK_corrcoef":0.8617796616,"sts_FarSICK_valid_output_ratio":0.294,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8624442565,"sts_FarSICK_corrcoef":0.8624442565,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.058,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8461251715,"sts_FarSICK_corrcoef":0.8461251715,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8020636156,"sts_FarSICK_corrcoef":0.8020636156,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8569357778,"sts_FarSICK_corrcoef":0.8569357778,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8641781993,"sts_FarSICK_corrcoef":0.8641781993,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.7973824106,"sts_FarSICK_corrcoef":0.8005847496,"sts_FarSICK_valid_output_ratio":0.996,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.018,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.86471356,"sts_FarSICK_corrcoef":0.86471356,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8363152655,"sts_FarSICK_corrcoef":0.8430597434,"sts_FarSICK_valid_output_ratio":0.992,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8796836219,"sts_FarSICK_corrcoef":0.8796836219,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8521163575,"sts_FarSICK_corrcoef":0.8521163575,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8602460091,"sts_FarSICK_corrcoef":0.8602460091,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8274969834,"sts_FarSICK_corrcoef":0.8274969834,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.6678492429,"sts_FarSICK_corrcoef":0.6913553239,"sts_FarSICK_valid_output_ratio":0.966,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8663758584,"sts_FarSICK_corrcoef":0.8663758584,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/sts_SynPerSTS.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9639002346,"sts_SynPerSTS_corrcoef":0.9639002346,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9496068485,"sts_SynPerSTS_corrcoef":0.9496068485,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9198771683,"sts_SynPerSTS_corrcoef":0.9198771683,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9445586775,"sts_SynPerSTS_corrcoef":0.9445586775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9475374949,"sts_SynPerSTS_corrcoef":0.9475374949,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9405179912,"sts_SynPerSTS_corrcoef":0.9405179912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9417676956,"sts_SynPerSTS_corrcoef":0.9417676956,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9492493767,"sts_SynPerSTS_corrcoef":0.9492493767,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9587180795,"sts_SynPerSTS_corrcoef":0.9587180795,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9620104912,"sts_SynPerSTS_corrcoef":0.9620104912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.950218242,"sts_SynPerSTS_corrcoef":0.950218242,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9548003491,"sts_SynPerSTS_corrcoef":0.9548003491,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9583160478,"sts_SynPerSTS_corrcoef":0.9583160478,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9603899338,"sts_SynPerSTS_corrcoef":0.9603899338,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.961773092,"sts_SynPerSTS_corrcoef":0.961773092,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0975200826,"sts_SynPerSTS_corrcoef":0.9752008261,"sts_SynPerSTS_valid_output_ratio":0.1,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.3217761614,"sts_SynPerSTS_corrcoef":0.8791698399,"sts_SynPerSTS_valid_output_ratio":0.366,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9424987971,"sts_SynPerSTS_corrcoef":0.9424987971,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.8942713775,"sts_SynPerSTS_corrcoef":0.8942713775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9550693345,"sts_SynPerSTS_corrcoef":0.9550693345,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9175164452,"sts_SynPerSTS_corrcoef":0.9324354118,"sts_SynPerSTS_valid_output_ratio":0.984,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.7233454448,"sts_SynPerSTS_corrcoef":0.8037171609,"sts_SynPerSTS_valid_output_ratio":0.9,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9409955022,"sts_SynPerSTS_corrcoef":0.9409955022,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9622305784,"sts_SynPerSTS_corrcoef":0.9622305784,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9509371488,"sts_SynPerSTS_corrcoef":0.9509371488,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9590342543,"sts_SynPerSTS_corrcoef":0.9590342543,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.1337524795,"sts_SynPerSTS_corrcoef":0.8057378284,"sts_SynPerSTS_valid_output_ratio":0.166,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9665566215,"sts_SynPerSTS_corrcoef":0.9665566215,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9401486573,"sts_SynPerSTS_corrcoef":0.9401486573,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9551219626,"sts_SynPerSTS_corrcoef":0.9551219626,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9009001164,"sts_SynPerSTS_corrcoef":0.9009001164,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.8301701871,"sts_SynPerSTS_corrcoef":0.8318338548,"sts_SynPerSTS_valid_output_ratio":0.998,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.930027057,"sts_SynPerSTS_corrcoef":0.930027057,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9438739105,"sts_SynPerSTS_corrcoef":0.9438739105,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9555087155,"sts_SynPerSTS_corrcoef":0.9555087155,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.4432810096,"sts_SynPerSTS_corrcoef":0.7363471921,"sts_SynPerSTS_valid_output_ratio":0.602,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9586779662,"sts_SynPerSTS_corrcoef":0.9586779662,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9581074422,"sts_SynPerSTS_corrcoef":0.9581074422,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9421493238,"sts_SynPerSTS_corrcoef":0.9421493238,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.02,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9446265171,"sts_SynPerSTS_corrcoef":0.9446265171,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.994,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.930027057,"sts_SynPerSTS_corrcoef":0.930027057,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9009001164,"sts_SynPerSTS_corrcoef":0.9009001164,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9665566215,"sts_SynPerSTS_corrcoef":0.9665566215,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9581074422,"sts_SynPerSTS_corrcoef":0.9581074422,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0975200826,"sts_SynPerSTS_corrcoef":0.9752008261,"sts_SynPerSTS_valid_output_ratio":0.1,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9603899338,"sts_SynPerSTS_corrcoef":0.9603899338,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.961773092,"sts_SynPerSTS_corrcoef":0.961773092,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9401486573,"sts_SynPerSTS_corrcoef":0.9401486573,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9438739105,"sts_SynPerSTS_corrcoef":0.9438739105,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9446265171,"sts_SynPerSTS_corrcoef":0.9446265171,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9417676956,"sts_SynPerSTS_corrcoef":0.9417676956,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9421493238,"sts_SynPerSTS_corrcoef":0.9421493238,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.4432810096,"sts_SynPerSTS_corrcoef":0.7363471921,"sts_SynPerSTS_valid_output_ratio":0.602,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9590342543,"sts_SynPerSTS_corrcoef":0.9590342543,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.884204514,"sts_SynPerSTS_corrcoef":0.9307415936,"sts_SynPerSTS_valid_output_ratio":0.95,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9424987971,"sts_SynPerSTS_corrcoef":0.9424987971,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9496068485,"sts_SynPerSTS_corrcoef":0.9496068485,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.950218242,"sts_SynPerSTS_corrcoef":0.950218242,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9555087155,"sts_SynPerSTS_corrcoef":0.9555087155,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9586779662,"sts_SynPerSTS_corrcoef":0.9586779662,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9639002346,"sts_SynPerSTS_corrcoef":0.9639002346,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9475374949,"sts_SynPerSTS_corrcoef":0.9475374949,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9492493767,"sts_SynPerSTS_corrcoef":0.9492493767,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9198771683,"sts_SynPerSTS_corrcoef":0.9198771683,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.994,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.3217761614,"sts_SynPerSTS_corrcoef":0.8791698399,"sts_SynPerSTS_valid_output_ratio":0.366,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9620104912,"sts_SynPerSTS_corrcoef":0.9620104912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.1337524795,"sts_SynPerSTS_corrcoef":0.8057378284,"sts_SynPerSTS_valid_output_ratio":0.166,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9551219626,"sts_SynPerSTS_corrcoef":0.9551219626,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.8942713775,"sts_SynPerSTS_corrcoef":0.8942713775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9548003491,"sts_SynPerSTS_corrcoef":0.9548003491,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9550693345,"sts_SynPerSTS_corrcoef":0.9550693345,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.8301701871,"sts_SynPerSTS_corrcoef":0.8318338548,"sts_SynPerSTS_valid_output_ratio":0.998,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.02,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9622305784,"sts_SynPerSTS_corrcoef":0.9622305784,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9175164452,"sts_SynPerSTS_corrcoef":0.9324354118,"sts_SynPerSTS_valid_output_ratio":0.984,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9583160478,"sts_SynPerSTS_corrcoef":0.9583160478,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9405179912,"sts_SynPerSTS_corrcoef":0.9405179912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9587180795,"sts_SynPerSTS_corrcoef":0.9587180795,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9409955022,"sts_SynPerSTS_corrcoef":0.9409955022,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.7233454448,"sts_SynPerSTS_corrcoef":0.8037171609,"sts_SynPerSTS_valid_output_ratio":0.9,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9445586775,"sts_SynPerSTS_corrcoef":0.9445586775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9509371488,"sts_SynPerSTS_corrcoef":0.9509371488,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/summarization_PnSummary.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1188323392,"summarization_PnSummary_rougeL_recall":0.3948447809,"summarization_PnSummary_rougeL_f1_score":0.1786530476,"nlg_score":0.1779340777}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0796516779,"summarization_PnSummary_rougeL_recall":0.3573917363,"summarization_PnSummary_rougeL_f1_score":0.1263677591,"nlg_score":0.1334687319}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1095844839,"summarization_PnSummary_rougeL_recall":0.3735331299,"summarization_PnSummary_rougeL_f1_score":0.1645385252,"nlg_score":0.0949943578}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1227039295,"summarization_PnSummary_rougeL_recall":0.4315497639,"summarization_PnSummary_rougeL_f1_score":0.1856517383,"nlg_score":0.1880477876}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0809192383,"summarization_PnSummary_rougeL_recall":0.3794442922,"summarization_PnSummary_rougeL_f1_score":0.1297840236,"nlg_score":0.1430866672}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0917305447,"summarization_PnSummary_rougeL_recall":0.3893845098,"summarization_PnSummary_rougeL_f1_score":0.1447284086,"nlg_score":0.181552926}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0830986853,"summarization_PnSummary_rougeL_recall":0.3565850313,"summarization_PnSummary_rougeL_f1_score":0.1308633101,"nlg_score":0.1643361642}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0838678728,"summarization_PnSummary_rougeL_recall":0.3842899041,"summarization_PnSummary_rougeL_f1_score":0.1338531153,"nlg_score":0.1538910531}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1072486168,"summarization_PnSummary_rougeL_recall":0.3966587345,"summarization_PnSummary_rougeL_f1_score":0.1627029568,"nlg_score":0.0940241349}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1234743619,"summarization_PnSummary_rougeL_recall":0.376111826,"summarization_PnSummary_rougeL_f1_score":0.1808600563,"nlg_score":0.194675133}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1156871575,"summarization_PnSummary_rougeL_recall":0.3630716995,"summarization_PnSummary_rougeL_f1_score":0.1697348346,"nlg_score":0.1196804312}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0968971247,"summarization_PnSummary_rougeL_recall":0.3847676963,"summarization_PnSummary_rougeL_f1_score":0.1505040282,"nlg_score":0.1631530657}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1124574222,"summarization_PnSummary_rougeL_recall":0.3717393409,"summarization_PnSummary_rougeL_f1_score":0.1673025553,"nlg_score":0.1067134448}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1123870374,"summarization_PnSummary_rougeL_recall":0.4032007327,"summarization_PnSummary_rougeL_f1_score":0.17115848,"nlg_score":0.16056333}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1089978489,"summarization_PnSummary_rougeL_recall":0.3936021933,"summarization_PnSummary_rougeL_f1_score":0.1662525669,"nlg_score":0.1679338638}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1377053282,"summarization_PnSummary_rougeL_recall":0.380073051,"summarization_PnSummary_rougeL_f1_score":0.1928750247,"nlg_score":0.1567965528}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.132916538,"summarization_PnSummary_rougeL_recall":0.3579358655,"summarization_PnSummary_rougeL_f1_score":0.1887379797,"nlg_score":0.1089333827}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1029257145,"summarization_PnSummary_rougeL_recall":0.4347811424,"summarization_PnSummary_rougeL_f1_score":0.1621438757,"nlg_score":0.1319091735}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0833149314,"summarization_PnSummary_rougeL_recall":0.4027758903,"summarization_PnSummary_rougeL_f1_score":0.1338404051,"nlg_score":0.112015688}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0811186656,"summarization_PnSummary_rougeL_recall":0.3940089293,"summarization_PnSummary_rougeL_f1_score":0.1316106196,"nlg_score":0.0934094344}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1067208324,"summarization_PnSummary_rougeL_recall":0.4109136551,"summarization_PnSummary_rougeL_f1_score":0.1648475797,"nlg_score":0.1389297212}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.111370713,"summarization_PnSummary_rougeL_recall":0.3732014316,"summarization_PnSummary_rougeL_f1_score":0.1661125342,"nlg_score":0.0682994522}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1094933227,"summarization_PnSummary_rougeL_recall":0.3979476995,"summarization_PnSummary_rougeL_f1_score":0.1674664883,"nlg_score":0.1196400535}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2237919051,"summarization_PnSummary_rougeL_recall":0.3532978852,"summarization_PnSummary_rougeL_f1_score":0.2484855426,"nlg_score":0.2010896964}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1156493376,"summarization_PnSummary_rougeL_recall":0.403347998,"summarization_PnSummary_rougeL_f1_score":0.1750055649,"nlg_score":0.1901206806}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0950296419,"summarization_PnSummary_rougeL_recall":0.3412128078,"summarization_PnSummary_rougeL_f1_score":0.1438085772,"nlg_score":0.1764906292}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1160048958,"summarization_PnSummary_rougeL_recall":0.3980422927,"summarization_PnSummary_rougeL_f1_score":0.1751797476,"nlg_score":0.1810678527}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1116612356,"summarization_PnSummary_rougeL_recall":0.420922163,"summarization_PnSummary_rougeL_f1_score":0.1723099731,"nlg_score":0.1137933652}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1228424623,"summarization_PnSummary_rougeL_recall":0.3750771332,"summarization_PnSummary_rougeL_f1_score":0.1793201723,"nlg_score":0.178231145}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1070140366,"summarization_PnSummary_rougeL_recall":0.4357356292,"summarization_PnSummary_rougeL_f1_score":0.1672508999,"nlg_score":0.1368740087}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1191404443,"summarization_PnSummary_rougeL_recall":0.365434541,"summarization_PnSummary_rougeL_f1_score":0.1744092468,"nlg_score":0.1659339021}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1298447221,"summarization_PnSummary_rougeL_recall":0.3548911672,"summarization_PnSummary_rougeL_f1_score":0.1841564462,"nlg_score":0.1641995602}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.092858834,"summarization_PnSummary_rougeL_recall":0.3502845677,"summarization_PnSummary_rougeL_f1_score":0.142482969,"nlg_score":0.1417778788}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1120916238,"summarization_PnSummary_rougeL_recall":0.3610411286,"summarization_PnSummary_rougeL_f1_score":0.1660826543,"nlg_score":0.1665903777}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0812584951,"summarization_PnSummary_rougeL_recall":0.3855275637,"summarization_PnSummary_rougeL_f1_score":0.1306097421,"nlg_score":0.1324031203}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.109255822,"summarization_PnSummary_rougeL_recall":0.3979273385,"summarization_PnSummary_rougeL_f1_score":0.1669061111,"nlg_score":0.1557270864}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0921640152,"summarization_PnSummary_rougeL_recall":0.4401953868,"summarization_PnSummary_rougeL_f1_score":0.1480945013,"nlg_score":0.0944140383}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1134979628,"summarization_PnSummary_rougeL_recall":0.3909794734,"summarization_PnSummary_rougeL_f1_score":0.1716841943,"nlg_score":0.18964968}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0849469928,"summarization_PnSummary_rougeL_recall":0.3820724231,"summarization_PnSummary_rougeL_f1_score":0.1359575611,"nlg_score":0.0880621978}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1212751301,"summarization_PnSummary_rougeL_recall":0.3923323141,"summarization_PnSummary_rougeL_f1_score":0.1804727387,"nlg_score":0.164118288}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.151465786,"summarization_PnSummary_rougeL_recall":0.3775823327,"summarization_PnSummary_rougeL_f1_score":0.203395452,"nlg_score":0.1129755187}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2061378815,"summarization_PnSummary_rougeL_recall":0.2755376589,"summarization_PnSummary_rougeL_f1_score":0.2192316506,"nlg_score":0.0823387318}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1091020199,"summarization_PnSummary_rougeL_recall":0.3487472682,"summarization_PnSummary_rougeL_f1_score":0.1614333679,"nlg_score":0.1151518212}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0819226263,"summarization_PnSummary_rougeL_recall":0.4333005471,"summarization_PnSummary_rougeL_f1_score":0.1274251245,"nlg_score":0.0509841903}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1120916238,"summarization_PnSummary_rougeL_recall":0.3610411286,"summarization_PnSummary_rougeL_f1_score":0.1660826543,"nlg_score":0.1665903777}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1298447221,"summarization_PnSummary_rougeL_recall":0.3548911672,"summarization_PnSummary_rougeL_f1_score":0.1841564462,"nlg_score":0.1641995602}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1228424623,"summarization_PnSummary_rougeL_recall":0.3750771332,"summarization_PnSummary_rougeL_f1_score":0.1793201723,"nlg_score":0.178231145}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0849469928,"summarization_PnSummary_rougeL_recall":0.3820724231,"summarization_PnSummary_rougeL_f1_score":0.1359575611,"nlg_score":0.0880621978}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1377053282,"summarization_PnSummary_rougeL_recall":0.380073051,"summarization_PnSummary_rougeL_f1_score":0.1928750247,"nlg_score":0.1567965528}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1123870374,"summarization_PnSummary_rougeL_recall":0.4032007327,"summarization_PnSummary_rougeL_f1_score":0.17115848,"nlg_score":0.16056333}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1089978489,"summarization_PnSummary_rougeL_recall":0.3936021933,"summarization_PnSummary_rougeL_f1_score":0.1662525669,"nlg_score":0.1679338638}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1070140366,"summarization_PnSummary_rougeL_recall":0.4357356292,"summarization_PnSummary_rougeL_f1_score":0.1672508999,"nlg_score":0.1368740087}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0812584951,"summarization_PnSummary_rougeL_recall":0.3855275637,"summarization_PnSummary_rougeL_f1_score":0.1306097421,"nlg_score":0.1324031203}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1091020199,"summarization_PnSummary_rougeL_recall":0.3487472682,"summarization_PnSummary_rougeL_f1_score":0.1614333679,"nlg_score":0.1151518212}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0830986853,"summarization_PnSummary_rougeL_recall":0.3565850313,"summarization_PnSummary_rougeL_f1_score":0.1308633101,"nlg_score":0.1643361642}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1212751301,"summarization_PnSummary_rougeL_recall":0.3923323141,"summarization_PnSummary_rougeL_f1_score":0.1804727387,"nlg_score":0.164118288}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0921640152,"summarization_PnSummary_rougeL_recall":0.4401953868,"summarization_PnSummary_rougeL_f1_score":0.1480945013,"nlg_score":0.0944140383}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1160048958,"summarization_PnSummary_rougeL_recall":0.3980422927,"summarization_PnSummary_rougeL_f1_score":0.1751797476,"nlg_score":0.1810678527}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.151465786,"summarization_PnSummary_rougeL_recall":0.3775823327,"summarization_PnSummary_rougeL_f1_score":0.203395452,"nlg_score":0.1129755187}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0725893065,"summarization_PnSummary_rougeL_recall":0.3020007897,"summarization_PnSummary_rougeL_f1_score":0.1133719008,"nlg_score":0.1035446324}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1029257145,"summarization_PnSummary_rougeL_recall":0.4347811424,"summarization_PnSummary_rougeL_f1_score":0.1621438757,"nlg_score":0.1319091735}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0796516779,"summarization_PnSummary_rougeL_recall":0.3573917363,"summarization_PnSummary_rougeL_f1_score":0.1263677591,"nlg_score":0.1334687319}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1156871575,"summarization_PnSummary_rougeL_recall":0.3630716995,"summarization_PnSummary_rougeL_f1_score":0.1697348346,"nlg_score":0.1196804312}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1094933227,"summarization_PnSummary_rougeL_recall":0.3979476995,"summarization_PnSummary_rougeL_f1_score":0.1674664883,"nlg_score":0.1196400535}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.109255822,"summarization_PnSummary_rougeL_recall":0.3979273385,"summarization_PnSummary_rougeL_f1_score":0.1669061111,"nlg_score":0.1557270864}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1134979628,"summarization_PnSummary_rougeL_recall":0.3909794734,"summarization_PnSummary_rougeL_f1_score":0.1716841943,"nlg_score":0.18964968}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1188323392,"summarization_PnSummary_rougeL_recall":0.3948447809,"summarization_PnSummary_rougeL_f1_score":0.1786530476,"nlg_score":0.1779340777}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0809192383,"summarization_PnSummary_rougeL_recall":0.3794442922,"summarization_PnSummary_rougeL_f1_score":0.1297840236,"nlg_score":0.1430866672}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0838678728,"summarization_PnSummary_rougeL_recall":0.3842899041,"summarization_PnSummary_rougeL_f1_score":0.1338531153,"nlg_score":0.1538910531}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1095844839,"summarization_PnSummary_rougeL_recall":0.3735331299,"summarization_PnSummary_rougeL_f1_score":0.1645385252,"nlg_score":0.0949943578}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0819226263,"summarization_PnSummary_rougeL_recall":0.4333005471,"summarization_PnSummary_rougeL_f1_score":0.1274251245,"nlg_score":0.0509841903}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.132916538,"summarization_PnSummary_rougeL_recall":0.3579358655,"summarization_PnSummary_rougeL_f1_score":0.1887379797,"nlg_score":0.1089333827}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1234743619,"summarization_PnSummary_rougeL_recall":0.376111826,"summarization_PnSummary_rougeL_f1_score":0.1808600563,"nlg_score":0.194675133}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1116612356,"summarization_PnSummary_rougeL_recall":0.420922163,"summarization_PnSummary_rougeL_f1_score":0.1723099731,"nlg_score":0.1137933652}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1191404443,"summarization_PnSummary_rougeL_recall":0.365434541,"summarization_PnSummary_rougeL_f1_score":0.1744092468,"nlg_score":0.1659339021}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0833149314,"summarization_PnSummary_rougeL_recall":0.4027758903,"summarization_PnSummary_rougeL_f1_score":0.1338404051,"nlg_score":0.112015688}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0968971247,"summarization_PnSummary_rougeL_recall":0.3847676963,"summarization_PnSummary_rougeL_f1_score":0.1505040282,"nlg_score":0.1631530657}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0811186656,"summarization_PnSummary_rougeL_recall":0.3940089293,"summarization_PnSummary_rougeL_f1_score":0.1316106196,"nlg_score":0.0934094344}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.092858834,"summarization_PnSummary_rougeL_recall":0.3502845677,"summarization_PnSummary_rougeL_f1_score":0.142482969,"nlg_score":0.1417778788}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2061378815,"summarization_PnSummary_rougeL_recall":0.2755376589,"summarization_PnSummary_rougeL_f1_score":0.2192316506,"nlg_score":0.0823387318}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1156493376,"summarization_PnSummary_rougeL_recall":0.403347998,"summarization_PnSummary_rougeL_f1_score":0.1750055649,"nlg_score":0.1901206806}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1067208324,"summarization_PnSummary_rougeL_recall":0.4109136551,"summarization_PnSummary_rougeL_f1_score":0.1648475797,"nlg_score":0.1389297212}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1124574222,"summarization_PnSummary_rougeL_recall":0.3717393409,"summarization_PnSummary_rougeL_f1_score":0.1673025553,"nlg_score":0.1067134448}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0917305447,"summarization_PnSummary_rougeL_recall":0.3893845098,"summarization_PnSummary_rougeL_f1_score":0.1447284086,"nlg_score":0.181552926}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1072486168,"summarization_PnSummary_rougeL_recall":0.3966587345,"summarization_PnSummary_rougeL_f1_score":0.1627029568,"nlg_score":0.0940241349}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2237919051,"summarization_PnSummary_rougeL_recall":0.3532978852,"summarization_PnSummary_rougeL_f1_score":0.2484855426,"nlg_score":0.2010896964}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.111370713,"summarization_PnSummary_rougeL_recall":0.3732014316,"summarization_PnSummary_rougeL_f1_score":0.1661125342,"nlg_score":0.0682994522}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1227039295,"summarization_PnSummary_rougeL_recall":0.4315497639,"summarization_PnSummary_rougeL_f1_score":0.1856517383,"nlg_score":0.1880477876}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0950296419,"summarization_PnSummary_rougeL_recall":0.3412128078,"summarization_PnSummary_rougeL_f1_score":0.1438085772,"nlg_score":0.1764906292}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/summarization_SamSUM-fa.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1538512444,"summarization_SamSUM-fa_rougeL_recall":0.3849531288,"summarization_SamSUM-fa_rougeL_f1_score":0.2115502707,"nlg_score":0.1779340777}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1076131004,"summarization_SamSUM-fa_rougeL_recall":0.354952604,"summarization_SamSUM-fa_rougeL_f1_score":0.1578241504,"nlg_score":0.1334687319}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1638274716,"summarization_SamSUM-fa_rougeL_recall":0.3535878882,"summarization_SamSUM-fa_rougeL_f1_score":0.2134854664,"nlg_score":0.0949943578}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1591262985,"summarization_SamSUM-fa_rougeL_recall":0.4163090512,"summarization_SamSUM-fa_rougeL_f1_score":0.2208876443,"nlg_score":0.1880477876}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1172226058,"summarization_SamSUM-fa_rougeL_recall":0.3645840814,"summarization_SamSUM-fa_rougeL_f1_score":0.1704408388,"nlg_score":0.1430866672}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1553547312,"summarization_SamSUM-fa_rougeL_recall":0.3357735524,"summarization_SamSUM-fa_rougeL_f1_score":0.2045988783,"nlg_score":0.181552926}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1387162197,"summarization_SamSUM-fa_rougeL_recall":0.3472256524,"summarization_SamSUM-fa_rougeL_f1_score":0.1899415698,"nlg_score":0.1643361642}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1197038513,"summarization_SamSUM-fa_rougeL_recall":0.3691802463,"summarization_SamSUM-fa_rougeL_f1_score":0.1737939492,"nlg_score":0.1538910531}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1468362777,"summarization_SamSUM-fa_rougeL_recall":0.3858089513,"summarization_SamSUM-fa_rougeL_f1_score":0.2041300257,"nlg_score":0.0940241349}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1681357159,"summarization_SamSUM-fa_rougeL_recall":0.3567938895,"summarization_SamSUM-fa_rougeL_f1_score":0.2189693454,"nlg_score":0.194675133}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1658145118,"summarization_SamSUM-fa_rougeL_recall":0.3677760479,"summarization_SamSUM-fa_rougeL_f1_score":0.2189237562,"nlg_score":0.1196804312}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1508002877,"summarization_SamSUM-fa_rougeL_recall":0.3371058997,"summarization_SamSUM-fa_rougeL_f1_score":0.1999301574,"nlg_score":0.1631530657}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1588367988,"summarization_SamSUM-fa_rougeL_recall":0.3735722635,"summarization_SamSUM-fa_rougeL_f1_score":0.2131671502,"nlg_score":0.1067134448}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1469468837,"summarization_SamSUM-fa_rougeL_recall":0.3743807014,"summarization_SamSUM-fa_rougeL_f1_score":0.2022859929,"nlg_score":0.16056333}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1514618398,"summarization_SamSUM-fa_rougeL_recall":0.3683020708,"summarization_SamSUM-fa_rougeL_f1_score":0.2063212948,"nlg_score":0.1679338638}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1539866165,"summarization_SamSUM-fa_rougeL_recall":0.341409574,"summarization_SamSUM-fa_rougeL_f1_score":0.2007085976,"nlg_score":0.1567965528}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1772724525,"summarization_SamSUM-fa_rougeL_recall":0.341583677,"summarization_SamSUM-fa_rougeL_f1_score":0.2233271064,"nlg_score":0.1089333827}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.147286408,"summarization_SamSUM-fa_rougeL_recall":0.4066657958,"summarization_SamSUM-fa_rougeL_f1_score":0.2072278176,"nlg_score":0.1319091735}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1137219495,"summarization_SamSUM-fa_rougeL_recall":0.3496708707,"summarization_SamSUM-fa_rougeL_f1_score":0.1628971148,"nlg_score":0.112015688}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.0893319419,"summarization_SamSUM-fa_rougeL_recall":0.3701712252,"summarization_SamSUM-fa_rougeL_f1_score":0.1392333016,"nlg_score":0.0934094344}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1429609514,"summarization_SamSUM-fa_rougeL_recall":0.397717388,"summarization_SamSUM-fa_rougeL_f1_score":0.2013136641,"nlg_score":0.1389297212}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1340334866,"summarization_SamSUM-fa_rougeL_recall":0.3184206946,"summarization_SamSUM-fa_rougeL_f1_score":0.179098961,"nlg_score":0.0682994522}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1338082958,"summarization_SamSUM-fa_rougeL_recall":0.397938928,"summarization_SamSUM-fa_rougeL_f1_score":0.1933390916,"nlg_score":0.1196400535}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1632163927,"summarization_SamSUM-fa_rougeL_recall":0.387510969,"summarization_SamSUM-fa_rougeL_f1_score":0.2157634129,"nlg_score":0.2010896964}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.171454009,"summarization_SamSUM-fa_rougeL_recall":0.3692597258,"summarization_SamSUM-fa_rougeL_f1_score":0.2248722593,"nlg_score":0.1901206806}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1495202929,"summarization_SamSUM-fa_rougeL_recall":0.3342802415,"summarization_SamSUM-fa_rougeL_f1_score":0.1977642173,"nlg_score":0.1764906292}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1578034675,"summarization_SamSUM-fa_rougeL_recall":0.3902121243,"summarization_SamSUM-fa_rougeL_f1_score":0.2156396673,"nlg_score":0.1810678527}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.141334615,"summarization_SamSUM-fa_rougeL_recall":0.4016833546,"summarization_SamSUM-fa_rougeL_f1_score":0.2005260444,"nlg_score":0.1137933652}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1808561992,"summarization_SamSUM-fa_rougeL_recall":0.414509553,"summarization_SamSUM-fa_rougeL_f1_score":0.2406998552,"nlg_score":0.178231145}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1525978605,"summarization_SamSUM-fa_rougeL_recall":0.3945587249,"summarization_SamSUM-fa_rougeL_f1_score":0.209852471,"nlg_score":0.1368740087}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1810410242,"summarization_SamSUM-fa_rougeL_recall":0.4016180552,"summarization_SamSUM-fa_rougeL_f1_score":0.2380560527,"nlg_score":0.1659339021}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1944265929,"summarization_SamSUM-fa_rougeL_recall":0.3761499249,"summarization_SamSUM-fa_rougeL_f1_score":0.242617187,"nlg_score":0.1641995602}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1389828817,"summarization_SamSUM-fa_rougeL_recall":0.3358609298,"summarization_SamSUM-fa_rougeL_f1_score":0.188323236,"nlg_score":0.1417778788}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.16175156,"summarization_SamSUM-fa_rougeL_recall":0.3477483743,"summarization_SamSUM-fa_rougeL_f1_score":0.209834706,"nlg_score":0.1665903777}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1171953889,"summarization_SamSUM-fa_rougeL_recall":0.3802417903,"summarization_SamSUM-fa_rougeL_f1_score":0.1720175761,"nlg_score":0.1324031203}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1463365551,"summarization_SamSUM-fa_rougeL_recall":0.3856017289,"summarization_SamSUM-fa_rougeL_f1_score":0.2024070197,"nlg_score":0.1557270864}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1180795687,"summarization_SamSUM-fa_rougeL_recall":0.3922712004,"summarization_SamSUM-fa_rougeL_f1_score":0.170765794,"nlg_score":0.0944140383}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.165108522,"summarization_SamSUM-fa_rougeL_recall":0.3982318891,"summarization_SamSUM-fa_rougeL_f1_score":0.2240082992,"nlg_score":0.18964968}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1081719649,"summarization_SamSUM-fa_rougeL_recall":0.3726803698,"summarization_SamSUM-fa_rougeL_f1_score":0.1606804283,"nlg_score":0.0880621978}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1523824175,"summarization_SamSUM-fa_rougeL_recall":0.3838683519,"summarization_SamSUM-fa_rougeL_f1_score":0.2083553767,"nlg_score":0.164118288}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1385750553,"summarization_SamSUM-fa_rougeL_recall":0.3133561002,"summarization_SamSUM-fa_rougeL_f1_score":0.1819150852,"nlg_score":0.1129755187}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1565749742,"summarization_SamSUM-fa_rougeL_recall":0.2642298658,"summarization_SamSUM-fa_rougeL_f1_score":0.1759907012,"nlg_score":0.0823387318}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1579878887,"summarization_SamSUM-fa_rougeL_recall":0.3549924347,"summarization_SamSUM-fa_rougeL_f1_score":0.2083528945,"nlg_score":0.1151518212}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1154145475,"summarization_SamSUM-fa_rougeL_recall":0.2402163683,"summarization_SamSUM-fa_rougeL_f1_score":0.1388504604,"nlg_score":0.0509841903}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.16175156,"summarization_SamSUM-fa_rougeL_recall":0.3477483743,"summarization_SamSUM-fa_rougeL_f1_score":0.209834706,"nlg_score":0.1665903777}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1944265929,"summarization_SamSUM-fa_rougeL_recall":0.3761499249,"summarization_SamSUM-fa_rougeL_f1_score":0.242617187,"nlg_score":0.1641995602}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1808561992,"summarization_SamSUM-fa_rougeL_recall":0.414509553,"summarization_SamSUM-fa_rougeL_f1_score":0.2406998552,"nlg_score":0.178231145}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1081719649,"summarization_SamSUM-fa_rougeL_recall":0.3726803698,"summarization_SamSUM-fa_rougeL_f1_score":0.1606804283,"nlg_score":0.0880621978}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1539866165,"summarization_SamSUM-fa_rougeL_recall":0.341409574,"summarization_SamSUM-fa_rougeL_f1_score":0.2007085976,"nlg_score":0.1567965528}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1469468837,"summarization_SamSUM-fa_rougeL_recall":0.3743807014,"summarization_SamSUM-fa_rougeL_f1_score":0.2022859929,"nlg_score":0.16056333}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1514618398,"summarization_SamSUM-fa_rougeL_recall":0.3683020708,"summarization_SamSUM-fa_rougeL_f1_score":0.2063212948,"nlg_score":0.1679338638}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1525978605,"summarization_SamSUM-fa_rougeL_recall":0.3945587249,"summarization_SamSUM-fa_rougeL_f1_score":0.209852471,"nlg_score":0.1368740087}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1171953889,"summarization_SamSUM-fa_rougeL_recall":0.3802417903,"summarization_SamSUM-fa_rougeL_f1_score":0.1720175761,"nlg_score":0.1324031203}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1579878887,"summarization_SamSUM-fa_rougeL_recall":0.3549924347,"summarization_SamSUM-fa_rougeL_f1_score":0.2083528945,"nlg_score":0.1151518212}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1387162197,"summarization_SamSUM-fa_rougeL_recall":0.3472256524,"summarization_SamSUM-fa_rougeL_f1_score":0.1899415698,"nlg_score":0.1643361642}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1523824175,"summarization_SamSUM-fa_rougeL_recall":0.3838683519,"summarization_SamSUM-fa_rougeL_f1_score":0.2083553767,"nlg_score":0.164118288}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1180795687,"summarization_SamSUM-fa_rougeL_recall":0.3922712004,"summarization_SamSUM-fa_rougeL_f1_score":0.170765794,"nlg_score":0.0944140383}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1578034675,"summarization_SamSUM-fa_rougeL_recall":0.3902121243,"summarization_SamSUM-fa_rougeL_f1_score":0.2156396673,"nlg_score":0.1810678527}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1385750553,"summarization_SamSUM-fa_rougeL_recall":0.3133561002,"summarization_SamSUM-fa_rougeL_f1_score":0.1819150852,"nlg_score":0.1129755187}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1006745403,"summarization_SamSUM-fa_rougeL_recall":0.2023535874,"summarization_SamSUM-fa_rougeL_f1_score":0.1269407774,"nlg_score":0.1035446324}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.147286408,"summarization_SamSUM-fa_rougeL_recall":0.4066657958,"summarization_SamSUM-fa_rougeL_f1_score":0.2072278176,"nlg_score":0.1319091735}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1076131004,"summarization_SamSUM-fa_rougeL_recall":0.354952604,"summarization_SamSUM-fa_rougeL_f1_score":0.1578241504,"nlg_score":0.1334687319}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1658145118,"summarization_SamSUM-fa_rougeL_recall":0.3677760479,"summarization_SamSUM-fa_rougeL_f1_score":0.2189237562,"nlg_score":0.1196804312}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1338082958,"summarization_SamSUM-fa_rougeL_recall":0.397938928,"summarization_SamSUM-fa_rougeL_f1_score":0.1933390916,"nlg_score":0.1196400535}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1463365551,"summarization_SamSUM-fa_rougeL_recall":0.3856017289,"summarization_SamSUM-fa_rougeL_f1_score":0.2024070197,"nlg_score":0.1557270864}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.165108522,"summarization_SamSUM-fa_rougeL_recall":0.3982318891,"summarization_SamSUM-fa_rougeL_f1_score":0.2240082992,"nlg_score":0.18964968}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1538512444,"summarization_SamSUM-fa_rougeL_recall":0.3849531288,"summarization_SamSUM-fa_rougeL_f1_score":0.2115502707,"nlg_score":0.1779340777}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1172226058,"summarization_SamSUM-fa_rougeL_recall":0.3645840814,"summarization_SamSUM-fa_rougeL_f1_score":0.1704408388,"nlg_score":0.1430866672}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1197038513,"summarization_SamSUM-fa_rougeL_recall":0.3691802463,"summarization_SamSUM-fa_rougeL_f1_score":0.1737939492,"nlg_score":0.1538910531}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1638274716,"summarization_SamSUM-fa_rougeL_recall":0.3535878882,"summarization_SamSUM-fa_rougeL_f1_score":0.2134854664,"nlg_score":0.0949943578}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1154145475,"summarization_SamSUM-fa_rougeL_recall":0.2402163683,"summarization_SamSUM-fa_rougeL_f1_score":0.1388504604,"nlg_score":0.0509841903}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1772724525,"summarization_SamSUM-fa_rougeL_recall":0.341583677,"summarization_SamSUM-fa_rougeL_f1_score":0.2233271064,"nlg_score":0.1089333827}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1681357159,"summarization_SamSUM-fa_rougeL_recall":0.3567938895,"summarization_SamSUM-fa_rougeL_f1_score":0.2189693454,"nlg_score":0.194675133}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.141334615,"summarization_SamSUM-fa_rougeL_recall":0.4016833546,"summarization_SamSUM-fa_rougeL_f1_score":0.2005260444,"nlg_score":0.1137933652}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1810410242,"summarization_SamSUM-fa_rougeL_recall":0.4016180552,"summarization_SamSUM-fa_rougeL_f1_score":0.2380560527,"nlg_score":0.1659339021}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1137219495,"summarization_SamSUM-fa_rougeL_recall":0.3496708707,"summarization_SamSUM-fa_rougeL_f1_score":0.1628971148,"nlg_score":0.112015688}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1508002877,"summarization_SamSUM-fa_rougeL_recall":0.3371058997,"summarization_SamSUM-fa_rougeL_f1_score":0.1999301574,"nlg_score":0.1631530657}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.0893319419,"summarization_SamSUM-fa_rougeL_recall":0.3701712252,"summarization_SamSUM-fa_rougeL_f1_score":0.1392333016,"nlg_score":0.0934094344}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1389828817,"summarization_SamSUM-fa_rougeL_recall":0.3358609298,"summarization_SamSUM-fa_rougeL_f1_score":0.188323236,"nlg_score":0.1417778788}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1565749742,"summarization_SamSUM-fa_rougeL_recall":0.2642298658,"summarization_SamSUM-fa_rougeL_f1_score":0.1759907012,"nlg_score":0.0823387318}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.171454009,"summarization_SamSUM-fa_rougeL_recall":0.3692597258,"summarization_SamSUM-fa_rougeL_f1_score":0.2248722593,"nlg_score":0.1901206806}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1429609514,"summarization_SamSUM-fa_rougeL_recall":0.397717388,"summarization_SamSUM-fa_rougeL_f1_score":0.2013136641,"nlg_score":0.1389297212}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1588367988,"summarization_SamSUM-fa_rougeL_recall":0.3735722635,"summarization_SamSUM-fa_rougeL_f1_score":0.2131671502,"nlg_score":0.1067134448}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1553547312,"summarization_SamSUM-fa_rougeL_recall":0.3357735524,"summarization_SamSUM-fa_rougeL_f1_score":0.2045988783,"nlg_score":0.181552926}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1468362777,"summarization_SamSUM-fa_rougeL_recall":0.3858089513,"summarization_SamSUM-fa_rougeL_f1_score":0.2041300257,"nlg_score":0.0940241349}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1632163927,"summarization_SamSUM-fa_rougeL_recall":0.387510969,"summarization_SamSUM-fa_rougeL_f1_score":0.2157634129,"nlg_score":0.2010896964}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1340334866,"summarization_SamSUM-fa_rougeL_recall":0.3184206946,"summarization_SamSUM-fa_rougeL_f1_score":0.179098961,"nlg_score":0.0682994522}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1591262985,"summarization_SamSUM-fa_rougeL_recall":0.4163090512,"summarization_SamSUM-fa_rougeL_f1_score":0.2208876443,"nlg_score":0.1880477876}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1495202929,"summarization_SamSUM-fa_rougeL_recall":0.3342802415,"summarization_SamSUM-fa_rougeL_f1_score":0.1977642173,"nlg_score":0.1764906292}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/tone-classification_SynTone.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8198757764,"tone-classification_SynTone_precision_modified":0.8180456965,"tone-classification_SynTone_recall_modified":0.5977640757,"tone-classification_SynTone_fscore_modified":0.6364434216,"tone-classification_SynTone_acc":0.8859060403,"tone-classification_SynTone_precision":0.8839285714,"tone-classification_SynTone_recall":0.6459061489,"tone-classification_SynTone_fscore":0.68770061,"tone-classification_SynTone_valid_output_ratio":0.9254658385,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.801242236,"tone-classification_SynTone_precision_modified":0.6325567597,"tone-classification_SynTone_recall_modified":0.5106323908,"tone-classification_SynTone_fscore_modified":0.5211970678,"tone-classification_SynTone_acc":0.8164556962,"tone-classification_SynTone_precision":0.6445673311,"tone-classification_SynTone_recall":0.5203279425,"tone-classification_SynTone_fscore":0.5310932146,"tone-classification_SynTone_valid_output_ratio":0.9813664596,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5031055901,"tone-classification_SynTone_precision_modified":0.5228364877,"tone-classification_SynTone_recall_modified":0.5168736971,"tone-classification_SynTone_fscore_modified":0.4644759375,"tone-classification_SynTone_acc":0.5094339623,"tone-classification_SynTone_precision":0.5294130473,"tone-classification_SynTone_recall":0.523375253,"tone-classification_SynTone_fscore":0.4703184021,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6770186335,"tone-classification_SynTone_precision_modified":0.6041666667,"tone-classification_SynTone_recall_modified":0.5499765318,"tone-classification_SynTone_fscore_modified":0.5393404488,"tone-classification_SynTone_acc":0.6770186335,"tone-classification_SynTone_precision":0.6041666667,"tone-classification_SynTone_recall":0.5499765318,"tone-classification_SynTone_fscore":0.5393404488,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8198757764,"tone-classification_SynTone_precision_modified":0.7421490148,"tone-classification_SynTone_recall_modified":0.5782002048,"tone-classification_SynTone_fscore_modified":0.5758960402,"tone-classification_SynTone_acc":0.8198757764,"tone-classification_SynTone_precision":0.7421490148,"tone-classification_SynTone_recall":0.5782002048,"tone-classification_SynTone_fscore":0.5758960402,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8944099379,"tone-classification_SynTone_precision_modified":0.8288135593,"tone-classification_SynTone_recall_modified":0.6730414747,"tone-classification_SynTone_fscore_modified":0.7238560859,"tone-classification_SynTone_acc":0.8944099379,"tone-classification_SynTone_precision":0.8288135593,"tone-classification_SynTone_recall":0.6730414747,"tone-classification_SynTone_fscore":0.7238560859,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8447204969,"tone-classification_SynTone_precision_modified":0.6820625483,"tone-classification_SynTone_recall_modified":0.5681634349,"tone-classification_SynTone_fscore_modified":0.579448271,"tone-classification_SynTone_acc":0.85,"tone-classification_SynTone_precision":0.6863254393,"tone-classification_SynTone_recall":0.5717144564,"tone-classification_SynTone_fscore":0.5830698227,"tone-classification_SynTone_valid_output_ratio":0.9937888199,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8447204969,"tone-classification_SynTone_precision_modified":0.7777640248,"tone-classification_SynTone_recall_modified":0.5874594641,"tone-classification_SynTone_fscore_modified":0.5990203378,"tone-classification_SynTone_acc":0.8447204969,"tone-classification_SynTone_precision":0.7777640248,"tone-classification_SynTone_recall":0.5874594641,"tone-classification_SynTone_fscore":0.5990203378,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5776397516,"tone-classification_SynTone_precision_modified":0.4784671979,"tone-classification_SynTone_recall_modified":0.4996211806,"tone-classification_SynTone_fscore_modified":0.4487276462,"tone-classification_SynTone_acc":0.5849056604,"tone-classification_SynTone_precision":0.4844856532,"tone-classification_SynTone_recall":0.5059057238,"tone-classification_SynTone_fscore":0.4543720191,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.8396595026,"tone-classification_SynTone_recall_modified":0.7058371736,"tone-classification_SynTone_fscore_modified":0.748745873,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.8396595026,"tone-classification_SynTone_recall":0.7058371736,"tone-classification_SynTone_fscore":0.748745873,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7005172378,"tone-classification_SynTone_recall_modified":0.6637288786,"tone-classification_SynTone_fscore_modified":0.6775611485,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7005172378,"tone-classification_SynTone_recall":0.6637288786,"tone-classification_SynTone_fscore":0.6775611485,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8881987578,"tone-classification_SynTone_precision_modified":0.7920536322,"tone-classification_SynTone_recall_modified":0.7547299027,"tone-classification_SynTone_fscore_modified":0.7717575296,"tone-classification_SynTone_acc":0.8881987578,"tone-classification_SynTone_precision":0.7920536322,"tone-classification_SynTone_recall":0.7547299027,"tone-classification_SynTone_fscore":0.7717575296,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7888198758,"tone-classification_SynTone_precision_modified":0.6529418051,"tone-classification_SynTone_recall_modified":0.7187467998,"tone-classification_SynTone_fscore_modified":0.6745690521,"tone-classification_SynTone_acc":0.7888198758,"tone-classification_SynTone_precision":0.6529418051,"tone-classification_SynTone_recall":0.7187467998,"tone-classification_SynTone_fscore":0.6745690521,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4968944099,"tone-classification_SynTone_precision_modified":0.5363835928,"tone-classification_SynTone_recall_modified":0.5772956136,"tone-classification_SynTone_fscore_modified":0.4755414981,"tone-classification_SynTone_acc":0.4968944099,"tone-classification_SynTone_precision":0.5363835928,"tone-classification_SynTone_recall":0.5772956136,"tone-classification_SynTone_fscore":0.4755414981,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8074534161,"tone-classification_SynTone_precision_modified":0.6799424424,"tone-classification_SynTone_recall_modified":0.6338304318,"tone-classification_SynTone_fscore_modified":0.6490392995,"tone-classification_SynTone_acc":0.8074534161,"tone-classification_SynTone_precision":0.6799424424,"tone-classification_SynTone_recall":0.6338304318,"tone-classification_SynTone_fscore":0.6490392995,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0310559006,"tone-classification_SynTone_precision_modified":0.0155279503,"tone-classification_SynTone_recall_modified":0.0186335404,"tone-classification_SynTone_fscore_modified":0.0169395822,"tone-classification_SynTone_acc":0.8333333333,"tone-classification_SynTone_precision":0.4166666667,"tone-classification_SynTone_recall":0.5,"tone-classification_SynTone_fscore":0.4545454545,"tone-classification_SynTone_valid_output_ratio":0.0372670807,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6774948824,"tone-classification_SynTone_recall_modified":0.67683866,"tone-classification_SynTone_fscore_modified":0.668356732,"tone-classification_SynTone_acc":0.7911392405,"tone-classification_SynTone_precision":0.6903587093,"tone-classification_SynTone_recall":0.689690027,"tone-classification_SynTone_fscore":0.6810470497,"tone-classification_SynTone_valid_output_ratio":0.9813664596,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.1552795031,"tone-classification_SynTone_precision_modified":0.1130928528,"tone-classification_SynTone_recall_modified":0.2155401891,"tone-classification_SynTone_fscore_modified":0.1465713376,"tone-classification_SynTone_acc":0.3424657534,"tone-classification_SynTone_precision":0.2494239631,"tone-classification_SynTone_recall":0.4753694581,"tone-classification_SynTone_fscore":0.3232600733,"tone-classification_SynTone_valid_output_ratio":0.4534161491,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7339379455,"tone-classification_SynTone_recall_modified":0.6971283495,"tone-classification_SynTone_fscore_modified":0.7109712868,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7339379455,"tone-classification_SynTone_recall":0.6971283495,"tone-classification_SynTone_fscore":0.7109712868,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6583850932,"tone-classification_SynTone_precision_modified":0.541342126,"tone-classification_SynTone_recall_modified":0.5629697742,"tone-classification_SynTone_fscore_modified":0.5384255059,"tone-classification_SynTone_acc":0.6666666667,"tone-classification_SynTone_precision":0.5481514609,"tone-classification_SynTone_recall":0.570051155,"tone-classification_SynTone_fscore":0.5451981537,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0248447205,"tone-classification_SynTone_precision_modified":0.0258799172,"tone-classification_SynTone_recall_modified":0.0266193434,"tone-classification_SynTone_fscore_modified":0.0174833218,"tone-classification_SynTone_acc":0.4,"tone-classification_SynTone_precision":0.4166666667,"tone-classification_SynTone_recall":0.4285714286,"tone-classification_SynTone_fscore":0.2814814815,"tone-classification_SynTone_valid_output_ratio":0.0621118012,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6832298137,"tone-classification_SynTone_precision_modified":0.6414593698,"tone-classification_SynTone_recall_modified":0.7386456733,"tone-classification_SynTone_fscore_modified":0.6520765046,"tone-classification_SynTone_acc":0.6832298137,"tone-classification_SynTone_precision":0.6414593698,"tone-classification_SynTone_recall":0.7386456733,"tone-classification_SynTone_fscore":0.6520765046,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.7790862291,"tone-classification_SynTone_recall_modified":0.747838795,"tone-classification_SynTone_fscore_modified":0.7624467793,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.7790862291,"tone-classification_SynTone_recall":0.747838795,"tone-classification_SynTone_fscore":0.7624467793,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8881987578,"tone-classification_SynTone_precision_modified":0.7743534483,"tone-classification_SynTone_recall_modified":0.6258725892,"tone-classification_SynTone_fscore_modified":0.6586899664,"tone-classification_SynTone_acc":0.8881987578,"tone-classification_SynTone_precision":0.7743534483,"tone-classification_SynTone_recall":0.6258725892,"tone-classification_SynTone_fscore":0.6586899664,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7478203083,"tone-classification_SynTone_recall_modified":0.6482356204,"tone-classification_SynTone_fscore_modified":0.6776329308,"tone-classification_SynTone_acc":0.850931677,"tone-classification_SynTone_precision":0.7478203083,"tone-classification_SynTone_recall":0.6482356204,"tone-classification_SynTone_fscore":0.6776329308,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6335403727,"tone-classification_SynTone_precision_modified":0.5112987647,"tone-classification_SynTone_recall_modified":0.4686215462,"tone-classification_SynTone_fscore_modified":0.4864069611,"tone-classification_SynTone_acc":0.8947368421,"tone-classification_SynTone_precision":0.7220973783,"tone-classification_SynTone_recall":0.6618251661,"tone-classification_SynTone_fscore":0.6869431644,"tone-classification_SynTone_valid_output_ratio":0.7080745342,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6667611945,"tone-classification_SynTone_recall_modified":0.6423579109,"tone-classification_SynTone_fscore_modified":0.6405012061,"tone-classification_SynTone_acc":0.7763975155,"tone-classification_SynTone_precision":0.6667611945,"tone-classification_SynTone_recall":0.6423579109,"tone-classification_SynTone_fscore":0.6405012061,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7233493732,"tone-classification_SynTone_recall_modified":0.6079611555,"tone-classification_SynTone_fscore_modified":0.6452957705,"tone-classification_SynTone_acc":0.8616352201,"tone-classification_SynTone_precision":0.7324481074,"tone-classification_SynTone_recall":0.6156084656,"tone-classification_SynTone_fscore":0.6534126984,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7210630279,"tone-classification_SynTone_recall_modified":0.6034242192,"tone-classification_SynTone_fscore_modified":0.6394308021,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7210630279,"tone-classification_SynTone_recall":0.6034242192,"tone-classification_SynTone_fscore":0.6394308021,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7453416149,"tone-classification_SynTone_precision_modified":0.5770588432,"tone-classification_SynTone_recall_modified":0.5070698686,"tone-classification_SynTone_fscore_modified":0.5221093948,"tone-classification_SynTone_acc":0.7547169811,"tone-classification_SynTone_precision":0.584317445,"tone-classification_SynTone_recall":0.513448106,"tone-classification_SynTone_fscore":0.5286768085,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.6645962733,"tone-classification_SynTone_precision_modified":0.5738861003,"tone-classification_SynTone_recall_modified":0.5372045516,"tone-classification_SynTone_fscore_modified":0.5004137071,"tone-classification_SynTone_acc":0.6729559748,"tone-classification_SynTone_precision":0.5811047933,"tone-classification_SynTone_recall":0.5439618416,"tone-classification_SynTone_fscore":0.5067082191,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8322981366,"tone-classification_SynTone_precision_modified":0.7291450859,"tone-classification_SynTone_recall_modified":0.5849377027,"tone-classification_SynTone_fscore_modified":0.6077296942,"tone-classification_SynTone_acc":0.8322981366,"tone-classification_SynTone_precision":0.7291450859,"tone-classification_SynTone_recall":0.5849377027,"tone-classification_SynTone_fscore":0.6077296942,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7577639752,"tone-classification_SynTone_precision_modified":0.4790444894,"tone-classification_SynTone_recall_modified":0.4595494112,"tone-classification_SynTone_fscore_modified":0.4397211112,"tone-classification_SynTone_acc":0.7577639752,"tone-classification_SynTone_precision":0.4790444894,"tone-classification_SynTone_recall":0.4595494112,"tone-classification_SynTone_fscore":0.4397211112,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6397515528,"tone-classification_SynTone_precision_modified":0.5483185514,"tone-classification_SynTone_recall_modified":0.590333248,"tone-classification_SynTone_fscore_modified":0.530467546,"tone-classification_SynTone_acc":0.6397515528,"tone-classification_SynTone_precision":0.5483185514,"tone-classification_SynTone_recall":0.590333248,"tone-classification_SynTone_fscore":0.530467546,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.2919254658,"tone-classification_SynTone_precision_modified":0.193413297,"tone-classification_SynTone_recall_modified":0.2361166008,"tone-classification_SynTone_fscore_modified":0.1873840673,"tone-classification_SynTone_acc":0.5802469136,"tone-classification_SynTone_precision":0.3844387755,"tone-classification_SynTone_recall":0.4693181818,"tone-classification_SynTone_fscore":0.3724547511,"tone-classification_SynTone_valid_output_ratio":0.5031055901,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9068322981,"tone-classification_SynTone_precision_modified":0.8215992694,"tone-classification_SynTone_recall_modified":0.7311721283,"tone-classification_SynTone_fscore_modified":0.7679761027,"tone-classification_SynTone_acc":0.9068322981,"tone-classification_SynTone_precision":0.8215992694,"tone-classification_SynTone_recall":0.7311721283,"tone-classification_SynTone_fscore":0.7679761027,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4347826087,"tone-classification_SynTone_precision_modified":0.3863322077,"tone-classification_SynTone_recall_modified":0.383431452,"tone-classification_SynTone_fscore_modified":0.3837887153,"tone-classification_SynTone_acc":0.7865168539,"tone-classification_SynTone_precision":0.6988706228,"tone-classification_SynTone_recall":0.6936231884,"tone-classification_SynTone_fscore":0.6942694738,"tone-classification_SynTone_valid_output_ratio":0.5527950311,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.347826087,"tone-classification_SynTone_precision_modified":0.4069335674,"tone-classification_SynTone_recall_modified":0.3420272309,"tone-classification_SynTone_fscore_modified":0.3323819164,"tone-classification_SynTone_acc":0.5333333333,"tone-classification_SynTone_precision":0.6239648033,"tone-classification_SynTone_recall":0.5244417541,"tone-classification_SynTone_fscore":0.5096522718,"tone-classification_SynTone_valid_output_ratio":0.652173913,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0062111801,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8757763975,"tone-classification_SynTone_precision_modified":0.7230276907,"tone-classification_SynTone_recall_modified":0.6373933265,"tone-classification_SynTone_fscore_modified":0.669281794,"tone-classification_SynTone_acc":0.8757763975,"tone-classification_SynTone_precision":0.7230276907,"tone-classification_SynTone_recall":0.6373933265,"tone-classification_SynTone_fscore":0.669281794,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6583850932,"tone-classification_SynTone_precision_modified":0.1645962733,"tone-classification_SynTone_recall_modified":0.2391304348,"tone-classification_SynTone_fscore_modified":0.1949832776,"tone-classification_SynTone_acc":0.6883116883,"tone-classification_SynTone_precision":0.1720779221,"tone-classification_SynTone_recall":0.25,"tone-classification_SynTone_fscore":0.2038461538,"tone-classification_SynTone_valid_output_ratio":0.9565217391,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8322981366,"tone-classification_SynTone_precision_modified":0.7291450859,"tone-classification_SynTone_recall_modified":0.5849377027,"tone-classification_SynTone_fscore_modified":0.6077296942,"tone-classification_SynTone_acc":0.8322981366,"tone-classification_SynTone_precision":0.7291450859,"tone-classification_SynTone_recall":0.5849377027,"tone-classification_SynTone_fscore":0.6077296942,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7453416149,"tone-classification_SynTone_precision_modified":0.5770588432,"tone-classification_SynTone_recall_modified":0.5070698686,"tone-classification_SynTone_fscore_modified":0.5221093948,"tone-classification_SynTone_acc":0.7547169811,"tone-classification_SynTone_precision":0.584317445,"tone-classification_SynTone_recall":0.513448106,"tone-classification_SynTone_fscore":0.5286768085,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6667611945,"tone-classification_SynTone_recall_modified":0.6423579109,"tone-classification_SynTone_fscore_modified":0.6405012061,"tone-classification_SynTone_acc":0.7763975155,"tone-classification_SynTone_precision":0.6667611945,"tone-classification_SynTone_recall":0.6423579109,"tone-classification_SynTone_fscore":0.6405012061,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4347826087,"tone-classification_SynTone_precision_modified":0.3863322077,"tone-classification_SynTone_recall_modified":0.383431452,"tone-classification_SynTone_fscore_modified":0.3837887153,"tone-classification_SynTone_acc":0.7865168539,"tone-classification_SynTone_precision":0.6988706228,"tone-classification_SynTone_recall":0.6936231884,"tone-classification_SynTone_fscore":0.6942694738,"tone-classification_SynTone_valid_output_ratio":0.5527950311,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0310559006,"tone-classification_SynTone_precision_modified":0.0155279503,"tone-classification_SynTone_recall_modified":0.0186335404,"tone-classification_SynTone_fscore_modified":0.0169395822,"tone-classification_SynTone_acc":0.8333333333,"tone-classification_SynTone_precision":0.4166666667,"tone-classification_SynTone_recall":0.5,"tone-classification_SynTone_fscore":0.4545454545,"tone-classification_SynTone_valid_output_ratio":0.0372670807,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4968944099,"tone-classification_SynTone_precision_modified":0.5363835928,"tone-classification_SynTone_recall_modified":0.5772956136,"tone-classification_SynTone_fscore_modified":0.4755414981,"tone-classification_SynTone_acc":0.4968944099,"tone-classification_SynTone_precision":0.5363835928,"tone-classification_SynTone_recall":0.5772956136,"tone-classification_SynTone_fscore":0.4755414981,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8074534161,"tone-classification_SynTone_precision_modified":0.6799424424,"tone-classification_SynTone_recall_modified":0.6338304318,"tone-classification_SynTone_fscore_modified":0.6490392995,"tone-classification_SynTone_acc":0.8074534161,"tone-classification_SynTone_precision":0.6799424424,"tone-classification_SynTone_recall":0.6338304318,"tone-classification_SynTone_fscore":0.6490392995,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7233493732,"tone-classification_SynTone_recall_modified":0.6079611555,"tone-classification_SynTone_fscore_modified":0.6452957705,"tone-classification_SynTone_acc":0.8616352201,"tone-classification_SynTone_precision":0.7324481074,"tone-classification_SynTone_recall":0.6156084656,"tone-classification_SynTone_fscore":0.6534126984,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7577639752,"tone-classification_SynTone_precision_modified":0.4790444894,"tone-classification_SynTone_recall_modified":0.4595494112,"tone-classification_SynTone_fscore_modified":0.4397211112,"tone-classification_SynTone_acc":0.7577639752,"tone-classification_SynTone_precision":0.4790444894,"tone-classification_SynTone_recall":0.4595494112,"tone-classification_SynTone_fscore":0.4397211112,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8757763975,"tone-classification_SynTone_precision_modified":0.7230276907,"tone-classification_SynTone_recall_modified":0.6373933265,"tone-classification_SynTone_fscore_modified":0.669281794,"tone-classification_SynTone_acc":0.8757763975,"tone-classification_SynTone_precision":0.7230276907,"tone-classification_SynTone_recall":0.6373933265,"tone-classification_SynTone_fscore":0.669281794,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8447204969,"tone-classification_SynTone_precision_modified":0.6820625483,"tone-classification_SynTone_recall_modified":0.5681634349,"tone-classification_SynTone_fscore_modified":0.579448271,"tone-classification_SynTone_acc":0.85,"tone-classification_SynTone_precision":0.6863254393,"tone-classification_SynTone_recall":0.5717144564,"tone-classification_SynTone_fscore":0.5830698227,"tone-classification_SynTone_valid_output_ratio":0.9937888199,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.347826087,"tone-classification_SynTone_precision_modified":0.4069335674,"tone-classification_SynTone_recall_modified":0.3420272309,"tone-classification_SynTone_fscore_modified":0.3323819164,"tone-classification_SynTone_acc":0.5333333333,"tone-classification_SynTone_precision":0.6239648033,"tone-classification_SynTone_recall":0.5244417541,"tone-classification_SynTone_fscore":0.5096522718,"tone-classification_SynTone_valid_output_ratio":0.652173913,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.2919254658,"tone-classification_SynTone_precision_modified":0.193413297,"tone-classification_SynTone_recall_modified":0.2361166008,"tone-classification_SynTone_fscore_modified":0.1873840673,"tone-classification_SynTone_acc":0.5802469136,"tone-classification_SynTone_precision":0.3844387755,"tone-classification_SynTone_recall":0.4693181818,"tone-classification_SynTone_fscore":0.3724547511,"tone-classification_SynTone_valid_output_ratio":0.5031055901,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7478203083,"tone-classification_SynTone_recall_modified":0.6482356204,"tone-classification_SynTone_fscore_modified":0.6776329308,"tone-classification_SynTone_acc":0.850931677,"tone-classification_SynTone_precision":0.7478203083,"tone-classification_SynTone_recall":0.6482356204,"tone-classification_SynTone_fscore":0.6776329308,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5714285714,"tone-classification_SynTone_precision_modified":0.4591341311,"tone-classification_SynTone_recall_modified":0.3918521789,"tone-classification_SynTone_fscore_modified":0.4053320024,"tone-classification_SynTone_acc":0.6174496644,"tone-classification_SynTone_precision":0.4961113765,"tone-classification_SynTone_recall":0.4234107436,"tone-classification_SynTone_fscore":0.4379761905,"tone-classification_SynTone_valid_output_ratio":0.9254658385,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6774948824,"tone-classification_SynTone_recall_modified":0.67683866,"tone-classification_SynTone_fscore_modified":0.668356732,"tone-classification_SynTone_acc":0.7911392405,"tone-classification_SynTone_precision":0.6903587093,"tone-classification_SynTone_recall":0.689690027,"tone-classification_SynTone_fscore":0.6810470497,"tone-classification_SynTone_valid_output_ratio":0.9813664596,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.801242236,"tone-classification_SynTone_precision_modified":0.6325567597,"tone-classification_SynTone_recall_modified":0.5106323908,"tone-classification_SynTone_fscore_modified":0.5211970678,"tone-classification_SynTone_acc":0.8164556962,"tone-classification_SynTone_precision":0.6445673311,"tone-classification_SynTone_recall":0.5203279425,"tone-classification_SynTone_fscore":0.5310932146,"tone-classification_SynTone_valid_output_ratio":0.9813664596,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7005172378,"tone-classification_SynTone_recall_modified":0.6637288786,"tone-classification_SynTone_fscore_modified":0.6775611485,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7005172378,"tone-classification_SynTone_recall":0.6637288786,"tone-classification_SynTone_fscore":0.6775611485,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6397515528,"tone-classification_SynTone_precision_modified":0.5483185514,"tone-classification_SynTone_recall_modified":0.590333248,"tone-classification_SynTone_fscore_modified":0.530467546,"tone-classification_SynTone_acc":0.6397515528,"tone-classification_SynTone_precision":0.5483185514,"tone-classification_SynTone_recall":0.590333248,"tone-classification_SynTone_fscore":0.530467546,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9068322981,"tone-classification_SynTone_precision_modified":0.8215992694,"tone-classification_SynTone_recall_modified":0.7311721283,"tone-classification_SynTone_fscore_modified":0.7679761027,"tone-classification_SynTone_acc":0.9068322981,"tone-classification_SynTone_precision":0.8215992694,"tone-classification_SynTone_recall":0.7311721283,"tone-classification_SynTone_fscore":0.7679761027,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8198757764,"tone-classification_SynTone_precision_modified":0.8180456965,"tone-classification_SynTone_recall_modified":0.5977640757,"tone-classification_SynTone_fscore_modified":0.6364434216,"tone-classification_SynTone_acc":0.8859060403,"tone-classification_SynTone_precision":0.8839285714,"tone-classification_SynTone_recall":0.6459061489,"tone-classification_SynTone_fscore":0.68770061,"tone-classification_SynTone_valid_output_ratio":0.9254658385,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8198757764,"tone-classification_SynTone_precision_modified":0.7421490148,"tone-classification_SynTone_recall_modified":0.5782002048,"tone-classification_SynTone_fscore_modified":0.5758960402,"tone-classification_SynTone_acc":0.8198757764,"tone-classification_SynTone_precision":0.7421490148,"tone-classification_SynTone_recall":0.5782002048,"tone-classification_SynTone_fscore":0.5758960402,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8447204969,"tone-classification_SynTone_precision_modified":0.7777640248,"tone-classification_SynTone_recall_modified":0.5874594641,"tone-classification_SynTone_fscore_modified":0.5990203378,"tone-classification_SynTone_acc":0.8447204969,"tone-classification_SynTone_precision":0.7777640248,"tone-classification_SynTone_recall":0.5874594641,"tone-classification_SynTone_fscore":0.5990203378,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5031055901,"tone-classification_SynTone_precision_modified":0.5228364877,"tone-classification_SynTone_recall_modified":0.5168736971,"tone-classification_SynTone_fscore_modified":0.4644759375,"tone-classification_SynTone_acc":0.5094339623,"tone-classification_SynTone_precision":0.5294130473,"tone-classification_SynTone_recall":0.523375253,"tone-classification_SynTone_fscore":0.4703184021,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6583850932,"tone-classification_SynTone_precision_modified":0.1645962733,"tone-classification_SynTone_recall_modified":0.2391304348,"tone-classification_SynTone_fscore_modified":0.1949832776,"tone-classification_SynTone_acc":0.6883116883,"tone-classification_SynTone_precision":0.1720779221,"tone-classification_SynTone_recall":0.25,"tone-classification_SynTone_fscore":0.2038461538,"tone-classification_SynTone_valid_output_ratio":0.9565217391,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.8396595026,"tone-classification_SynTone_recall_modified":0.7058371736,"tone-classification_SynTone_fscore_modified":0.748745873,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.8396595026,"tone-classification_SynTone_recall":0.7058371736,"tone-classification_SynTone_fscore":0.748745873,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6335403727,"tone-classification_SynTone_precision_modified":0.5112987647,"tone-classification_SynTone_recall_modified":0.4686215462,"tone-classification_SynTone_fscore_modified":0.4864069611,"tone-classification_SynTone_acc":0.8947368421,"tone-classification_SynTone_precision":0.7220973783,"tone-classification_SynTone_recall":0.6618251661,"tone-classification_SynTone_fscore":0.6869431644,"tone-classification_SynTone_valid_output_ratio":0.7080745342,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7210630279,"tone-classification_SynTone_recall_modified":0.6034242192,"tone-classification_SynTone_fscore_modified":0.6394308021,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7210630279,"tone-classification_SynTone_recall":0.6034242192,"tone-classification_SynTone_fscore":0.6394308021,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.1552795031,"tone-classification_SynTone_precision_modified":0.1130928528,"tone-classification_SynTone_recall_modified":0.2155401891,"tone-classification_SynTone_fscore_modified":0.1465713376,"tone-classification_SynTone_acc":0.3424657534,"tone-classification_SynTone_precision":0.2494239631,"tone-classification_SynTone_recall":0.4753694581,"tone-classification_SynTone_fscore":0.3232600733,"tone-classification_SynTone_valid_output_ratio":0.4534161491,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8881987578,"tone-classification_SynTone_precision_modified":0.7920536322,"tone-classification_SynTone_recall_modified":0.7547299027,"tone-classification_SynTone_fscore_modified":0.7717575296,"tone-classification_SynTone_acc":0.8881987578,"tone-classification_SynTone_precision":0.7920536322,"tone-classification_SynTone_recall":0.7547299027,"tone-classification_SynTone_fscore":0.7717575296,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7339379455,"tone-classification_SynTone_recall_modified":0.6971283495,"tone-classification_SynTone_fscore_modified":0.7109712868,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7339379455,"tone-classification_SynTone_recall":0.6971283495,"tone-classification_SynTone_fscore":0.7109712868,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.6645962733,"tone-classification_SynTone_precision_modified":0.5738861003,"tone-classification_SynTone_recall_modified":0.5372045516,"tone-classification_SynTone_fscore_modified":0.5004137071,"tone-classification_SynTone_acc":0.6729559748,"tone-classification_SynTone_precision":0.5811047933,"tone-classification_SynTone_recall":0.5439618416,"tone-classification_SynTone_fscore":0.5067082191,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0062111801,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.7790862291,"tone-classification_SynTone_recall_modified":0.747838795,"tone-classification_SynTone_fscore_modified":0.7624467793,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.7790862291,"tone-classification_SynTone_recall":0.747838795,"tone-classification_SynTone_fscore":0.7624467793,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6583850932,"tone-classification_SynTone_precision_modified":0.541342126,"tone-classification_SynTone_recall_modified":0.5629697742,"tone-classification_SynTone_fscore_modified":0.5384255059,"tone-classification_SynTone_acc":0.6666666667,"tone-classification_SynTone_precision":0.5481514609,"tone-classification_SynTone_recall":0.570051155,"tone-classification_SynTone_fscore":0.5451981537,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7888198758,"tone-classification_SynTone_precision_modified":0.6529418051,"tone-classification_SynTone_recall_modified":0.7187467998,"tone-classification_SynTone_fscore_modified":0.6745690521,"tone-classification_SynTone_acc":0.7888198758,"tone-classification_SynTone_precision":0.6529418051,"tone-classification_SynTone_recall":0.7187467998,"tone-classification_SynTone_fscore":0.6745690521,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8944099379,"tone-classification_SynTone_precision_modified":0.8288135593,"tone-classification_SynTone_recall_modified":0.6730414747,"tone-classification_SynTone_fscore_modified":0.7238560859,"tone-classification_SynTone_acc":0.8944099379,"tone-classification_SynTone_precision":0.8288135593,"tone-classification_SynTone_recall":0.6730414747,"tone-classification_SynTone_fscore":0.7238560859,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5776397516,"tone-classification_SynTone_precision_modified":0.4784671979,"tone-classification_SynTone_recall_modified":0.4996211806,"tone-classification_SynTone_fscore_modified":0.4487276462,"tone-classification_SynTone_acc":0.5849056604,"tone-classification_SynTone_precision":0.4844856532,"tone-classification_SynTone_recall":0.5059057238,"tone-classification_SynTone_fscore":0.4543720191,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6832298137,"tone-classification_SynTone_precision_modified":0.6414593698,"tone-classification_SynTone_recall_modified":0.7386456733,"tone-classification_SynTone_fscore_modified":0.6520765046,"tone-classification_SynTone_acc":0.6832298137,"tone-classification_SynTone_precision":0.6414593698,"tone-classification_SynTone_recall":0.7386456733,"tone-classification_SynTone_fscore":0.6520765046,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0248447205,"tone-classification_SynTone_precision_modified":0.0258799172,"tone-classification_SynTone_recall_modified":0.0266193434,"tone-classification_SynTone_fscore_modified":0.0174833218,"tone-classification_SynTone_acc":0.4,"tone-classification_SynTone_precision":0.4166666667,"tone-classification_SynTone_recall":0.4285714286,"tone-classification_SynTone_fscore":0.2814814815,"tone-classification_SynTone_valid_output_ratio":0.0621118012,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6770186335,"tone-classification_SynTone_precision_modified":0.6041666667,"tone-classification_SynTone_recall_modified":0.5499765318,"tone-classification_SynTone_fscore_modified":0.5393404488,"tone-classification_SynTone_acc":0.6770186335,"tone-classification_SynTone_precision":0.6041666667,"tone-classification_SynTone_recall":0.5499765318,"tone-classification_SynTone_fscore":0.5393404488,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8881987578,"tone-classification_SynTone_precision_modified":0.7743534483,"tone-classification_SynTone_recall_modified":0.6258725892,"tone-classification_SynTone_fscore_modified":0.6586899664,"tone-classification_SynTone_acc":0.8881987578,"tone-classification_SynTone_precision":0.7743534483,"tone-classification_SynTone_recall":0.6258725892,"tone-classification_SynTone_fscore":0.6586899664,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/topic-classification_sid.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.792,"topic-classification_sid_precision_modified":0.662532811,"topic-classification_sid_recall_modified":0.6635073397,"topic-classification_sid_fscore_modified":0.6583038933,"topic-classification_sid_acc":0.792,"topic-classification_sid_precision":0.662532811,"topic-classification_sid_recall":0.6635073397,"topic-classification_sid_fscore":0.6583038933,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7143086066}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.674,"topic-classification_sid_precision_modified":0.583557125,"topic-classification_sid_recall_modified":0.5945763405,"topic-classification_sid_fscore_modified":0.5786133505,"topic-classification_sid_acc":0.6962809917,"topic-classification_sid_precision":0.6028482696,"topic-classification_sid_recall":0.6142317567,"topic-classification_sid_fscore":0.5977410646,"topic-classification_sid_valid_output_ratio":0.968,"nlu_score":0.628506628}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.562,"topic-classification_sid_precision_modified":0.4846723602,"topic-classification_sid_recall_modified":0.454656985,"topic-classification_sid_fscore_modified":0.424509489,"topic-classification_sid_acc":0.5928270042,"topic-classification_sid_precision":0.511257764,"topic-classification_sid_recall":0.4795959757,"topic-classification_sid_fscore":0.4477948196,"topic-classification_sid_valid_output_ratio":0.948,"nlu_score":0.6241793507}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.682,"topic-classification_sid_precision_modified":0.5730049986,"topic-classification_sid_recall_modified":0.5717337985,"topic-classification_sid_fscore_modified":0.5606248332,"topic-classification_sid_acc":0.7239915074,"topic-classification_sid_precision":0.6082855612,"topic-classification_sid_recall":0.6069360918,"topic-classification_sid_fscore":0.595143135,"topic-classification_sid_valid_output_ratio":0.942,"nlu_score":0.6297634971}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.648,"topic-classification_sid_precision_modified":0.6016092111,"topic-classification_sid_recall_modified":0.5643209905,"topic-classification_sid_fscore_modified":0.5520527995,"topic-classification_sid_acc":0.648,"topic-classification_sid_precision":0.6016092111,"topic-classification_sid_recall":0.5643209905,"topic-classification_sid_fscore":0.5520527995,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6470954618}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.77,"topic-classification_sid_precision_modified":0.6850986976,"topic-classification_sid_recall_modified":0.6830888647,"topic-classification_sid_fscore_modified":0.6616877857,"topic-classification_sid_acc":0.77,"topic-classification_sid_precision":0.6850986976,"topic-classification_sid_recall":0.6830888647,"topic-classification_sid_fscore":0.6616877857,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7144353486}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.5896102708,"topic-classification_sid_recall_modified":0.5884196886,"topic-classification_sid_fscore_modified":0.5823719558,"topic-classification_sid_acc":0.704,"topic-classification_sid_precision":0.5896102708,"topic-classification_sid_recall":0.5884196886,"topic-classification_sid_fscore":0.5823719558,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6749652797}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.722,"topic-classification_sid_precision_modified":0.6456808245,"topic-classification_sid_recall_modified":0.6300099701,"topic-classification_sid_fscore_modified":0.6126876785,"topic-classification_sid_acc":0.722,"topic-classification_sid_precision":0.6456808245,"topic-classification_sid_recall":0.6300099701,"topic-classification_sid_fscore":0.6126876785,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6458443785}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.458,"topic-classification_sid_precision_modified":0.3928192753,"topic-classification_sid_recall_modified":0.4072039595,"topic-classification_sid_fscore_modified":0.3544902396,"topic-classification_sid_acc":0.6378830084,"topic-classification_sid_precision":0.5471020547,"topic-classification_sid_recall":0.5671364338,"topic-classification_sid_fscore":0.4937189966,"topic-classification_sid_valid_output_ratio":0.718,"nlu_score":0.6552152029}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.748,"topic-classification_sid_precision_modified":0.6428566774,"topic-classification_sid_recall_modified":0.6633522535,"topic-classification_sid_fscore_modified":0.628605048,"topic-classification_sid_acc":0.748,"topic-classification_sid_precision":0.6428566774,"topic-classification_sid_recall":0.6633522535,"topic-classification_sid_fscore":0.628605048,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6758278127}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.658,"topic-classification_sid_precision_modified":0.6006169042,"topic-classification_sid_recall_modified":0.5559595512,"topic-classification_sid_fscore_modified":0.5315039094,"topic-classification_sid_acc":0.6659919028,"topic-classification_sid_precision":0.6079118464,"topic-classification_sid_recall":0.5627120963,"topic-classification_sid_fscore":0.5379594225,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.699116864}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.698,"topic-classification_sid_precision_modified":0.6469803488,"topic-classification_sid_recall_modified":0.6081573421,"topic-classification_sid_fscore_modified":0.5947799999,"topic-classification_sid_acc":0.698,"topic-classification_sid_precision":0.6469803488,"topic-classification_sid_recall":0.6081573421,"topic-classification_sid_fscore":0.5947799999,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7178891542}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.702,"topic-classification_sid_precision_modified":0.6070538637,"topic-classification_sid_recall_modified":0.5902772191,"topic-classification_sid_fscore_modified":0.5805725473,"topic-classification_sid_acc":0.7076612903,"topic-classification_sid_precision":0.6119494594,"topic-classification_sid_recall":0.5950375192,"topic-classification_sid_fscore":0.585254584,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.6898261633}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.636,"topic-classification_sid_precision_modified":0.6248180645,"topic-classification_sid_recall_modified":0.5200071748,"topic-classification_sid_fscore_modified":0.5212205085,"topic-classification_sid_acc":0.636,"topic-classification_sid_precision":0.6248180645,"topic-classification_sid_recall":0.5200071748,"topic-classification_sid_fscore":0.5212205085,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6460328733}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.742,"topic-classification_sid_precision_modified":0.6167254178,"topic-classification_sid_recall_modified":0.6125584783,"topic-classification_sid_fscore_modified":0.5990165281,"topic-classification_sid_acc":0.7449799197,"topic-classification_sid_precision":0.6192022267,"topic-classification_sid_recall":0.6150185525,"topic-classification_sid_fscore":0.6014222169,"topic-classification_sid_valid_output_ratio":0.996,"nlu_score":0.6714091535}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.62,"topic-classification_sid_precision_modified":0.5570257373,"topic-classification_sid_recall_modified":0.5619595288,"topic-classification_sid_fscore_modified":0.5217673603,"topic-classification_sid_acc":0.6540084388,"topic-classification_sid_precision":0.5875798917,"topic-classification_sid_recall":0.5927843131,"topic-classification_sid_fscore":0.5503875108,"topic-classification_sid_valid_output_ratio":0.948,"nlu_score":0.4086928082}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.75,"topic-classification_sid_precision_modified":0.6327306402,"topic-classification_sid_recall_modified":0.6347455045,"topic-classification_sid_fscore_modified":0.6231971632,"topic-classification_sid_acc":0.75,"topic-classification_sid_precision":0.6327306402,"topic-classification_sid_recall":0.6347455045,"topic-classification_sid_fscore":0.6231971632,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.3749414991}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.656,"topic-classification_sid_precision_modified":0.5819241823,"topic-classification_sid_recall_modified":0.5649560499,"topic-classification_sid_fscore_modified":0.5472284688,"topic-classification_sid_acc":0.7038626609,"topic-classification_sid_precision":0.6243821699,"topic-classification_sid_recall":0.6061760192,"topic-classification_sid_fscore":0.5871550095,"topic-classification_sid_valid_output_ratio":0.932,"nlu_score":0.5661558794}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.664,"topic-classification_sid_precision_modified":0.5946530353,"topic-classification_sid_recall_modified":0.5719701311,"topic-classification_sid_fscore_modified":0.5732259537,"topic-classification_sid_acc":0.6720647773,"topic-classification_sid_precision":0.6018755418,"topic-classification_sid_recall":0.5789171368,"topic-classification_sid_fscore":0.5801882122,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.456845738}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.77,"topic-classification_sid_precision_modified":0.6561573641,"topic-classification_sid_recall_modified":0.6752129415,"topic-classification_sid_fscore_modified":0.6425647774,"topic-classification_sid_acc":0.77,"topic-classification_sid_precision":0.6561573641,"topic-classification_sid_recall":0.6752129415,"topic-classification_sid_fscore":0.6425647774,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6752949557}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.368,"topic-classification_sid_precision_modified":0.6014683953,"topic-classification_sid_recall_modified":0.2970267773,"topic-classification_sid_fscore_modified":0.2767247094,"topic-classification_sid_acc":0.3739837398,"topic-classification_sid_precision":0.6112483692,"topic-classification_sid_recall":0.301856481,"topic-classification_sid_fscore":0.2812242981,"topic-classification_sid_valid_output_ratio":0.984,"nlu_score":0.5121418762}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.38,"topic-classification_sid_precision_modified":0.2019117794,"topic-classification_sid_recall_modified":0.1756256003,"topic-classification_sid_fscore_modified":0.1195613397,"topic-classification_sid_acc":0.3830645161,"topic-classification_sid_precision":0.2035401003,"topic-classification_sid_recall":0.1770419358,"topic-classification_sid_fscore":0.1205255441,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.3619547874}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.628,"topic-classification_sid_precision_modified":0.5459969989,"topic-classification_sid_recall_modified":0.52365232,"topic-classification_sid_fscore_modified":0.5199604173,"topic-classification_sid_acc":0.6840958606,"topic-classification_sid_precision":0.5947679727,"topic-classification_sid_recall":0.5704273638,"topic-classification_sid_fscore":0.5664056834,"topic-classification_sid_valid_output_ratio":0.918,"nlu_score":0.3928685253}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.594,"topic-classification_sid_precision_modified":0.5608272475,"topic-classification_sid_recall_modified":0.5329233323,"topic-classification_sid_fscore_modified":0.527792484,"topic-classification_sid_acc":0.6359743041,"topic-classification_sid_precision":0.6004574384,"topic-classification_sid_recall":0.5705817263,"topic-classification_sid_fscore":0.5650883126,"topic-classification_sid_valid_output_ratio":0.934,"nlu_score":0.6800109206}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.734,"topic-classification_sid_precision_modified":0.6555308571,"topic-classification_sid_recall_modified":0.6460010178,"topic-classification_sid_fscore_modified":0.6267962167,"topic-classification_sid_acc":0.734,"topic-classification_sid_precision":0.6555308571,"topic-classification_sid_recall":0.6460010178,"topic-classification_sid_fscore":0.6267962167,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6833497104}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.812,"topic-classification_sid_precision_modified":0.682958996,"topic-classification_sid_recall_modified":0.6932034561,"topic-classification_sid_fscore_modified":0.6715874758,"topic-classification_sid_acc":0.812,"topic-classification_sid_precision":0.682958996,"topic-classification_sid_recall":0.6932034561,"topic-classification_sid_fscore":0.6715874758,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7207167537}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.6679390306,"topic-classification_sid_recall_modified":0.6259469635,"topic-classification_sid_fscore_modified":0.6265189311,"topic-classification_sid_acc":0.7054108216,"topic-classification_sid_precision":0.6692775858,"topic-classification_sid_recall":0.6272013662,"topic-classification_sid_fscore":0.62777448,"topic-classification_sid_valid_output_ratio":0.998,"nlu_score":0.6459120734}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.618,"topic-classification_sid_precision_modified":0.6284518226,"topic-classification_sid_recall_modified":0.548003418,"topic-classification_sid_fscore_modified":0.5631911176,"topic-classification_sid_acc":0.618,"topic-classification_sid_precision":0.6284518226,"topic-classification_sid_recall":0.548003418,"topic-classification_sid_fscore":0.5631911176,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.4824528512}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.784,"topic-classification_sid_precision_modified":0.6819447861,"topic-classification_sid_recall_modified":0.6518325808,"topic-classification_sid_fscore_modified":0.6519138204,"topic-classification_sid_acc":0.7903225806,"topic-classification_sid_precision":0.6874443408,"topic-classification_sid_recall":0.6570892952,"topic-classification_sid_fscore":0.6571711899,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.7050532433}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.728,"topic-classification_sid_precision_modified":0.6159857721,"topic-classification_sid_recall_modified":0.6134167814,"topic-classification_sid_fscore_modified":0.60399392,"topic-classification_sid_acc":0.728,"topic-classification_sid_precision":0.6159857721,"topic-classification_sid_recall":0.6134167814,"topic-classification_sid_fscore":0.60399392,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6944128198}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.74,"topic-classification_sid_precision_modified":0.6308563241,"topic-classification_sid_recall_modified":0.6658780758,"topic-classification_sid_fscore_modified":0.634021247,"topic-classification_sid_acc":0.7505070994,"topic-classification_sid_precision":0.6398137161,"topic-classification_sid_recall":0.6753327341,"topic-classification_sid_fscore":0.6430235771,"topic-classification_sid_valid_output_ratio":0.986,"nlu_score":0.6914202844}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.412,"topic-classification_sid_precision_modified":0.3819473808,"topic-classification_sid_recall_modified":0.2194110821,"topic-classification_sid_fscore_modified":0.166159266,"topic-classification_sid_acc":0.4735632184,"topic-classification_sid_precision":0.439019978,"topic-classification_sid_recall":0.2521966461,"topic-classification_sid_fscore":0.190987662,"topic-classification_sid_valid_output_ratio":0.87,"nlu_score":0.531045981}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.394,"topic-classification_sid_precision_modified":0.5177096689,"topic-classification_sid_recall_modified":0.297966245,"topic-classification_sid_fscore_modified":0.2626151509,"topic-classification_sid_acc":0.4368070953,"topic-classification_sid_precision":0.5739575043,"topic-classification_sid_recall":0.3303395178,"topic-classification_sid_fscore":0.2911476174,"topic-classification_sid_valid_output_ratio":0.902,"nlu_score":0.5241296095}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.462,"topic-classification_sid_precision_modified":0.4918469172,"topic-classification_sid_recall_modified":0.3261812324,"topic-classification_sid_fscore_modified":0.304777991,"topic-classification_sid_acc":0.5191011236,"topic-classification_sid_precision":0.5526369856,"topic-classification_sid_recall":0.3664957667,"topic-classification_sid_fscore":0.3424471809,"topic-classification_sid_valid_output_ratio":0.89,"nlu_score":0.6262096694}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.674,"topic-classification_sid_precision_modified":0.5851704149,"topic-classification_sid_recall_modified":0.5936032958,"topic-classification_sid_fscore_modified":0.5766197062,"topic-classification_sid_acc":0.6821862348,"topic-classification_sid_precision":0.5922777479,"topic-classification_sid_recall":0.6008130524,"topic-classification_sid_fscore":0.5836231844,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.6443219619}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.56,"topic-classification_sid_precision_modified":0.5309838171,"topic-classification_sid_recall_modified":0.4706044677,"topic-classification_sid_fscore_modified":0.484170357,"topic-classification_sid_acc":0.5702647658,"topic-classification_sid_precision":0.5407167181,"topic-classification_sid_recall":0.4792306188,"topic-classification_sid_fscore":0.4930451701,"topic-classification_sid_valid_output_ratio":0.982,"nlu_score":0.5968415875}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.078,"topic-classification_sid_precision_modified":0.1626278832,"topic-classification_sid_recall_modified":0.0869379377,"topic-classification_sid_fscore_modified":0.061595189,"topic-classification_sid_acc":0.1211180124,"topic-classification_sid_precision":0.2525277689,"topic-classification_sid_recall":0.1349967977,"topic-classification_sid_fscore":0.0956447035,"topic-classification_sid_valid_output_ratio":0.644,"nlu_score":0.3916645306}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.756,"topic-classification_sid_precision_modified":0.6530505866,"topic-classification_sid_recall_modified":0.6684817133,"topic-classification_sid_fscore_modified":0.6358572631,"topic-classification_sid_acc":0.756,"topic-classification_sid_precision":0.6530505866,"topic-classification_sid_recall":0.6684817133,"topic-classification_sid_fscore":0.6358572631,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7146808531}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.608,"topic-classification_sid_precision_modified":0.5971774069,"topic-classification_sid_recall_modified":0.5095088497,"topic-classification_sid_fscore_modified":0.5160494942,"topic-classification_sid_acc":0.6333333333,"topic-classification_sid_precision":0.6220597988,"topic-classification_sid_recall":0.5307383851,"topic-classification_sid_fscore":0.5375515565,"topic-classification_sid_valid_output_ratio":0.96,"nlu_score":0.6361186163}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.586,"topic-classification_sid_precision_modified":0.5883032084,"topic-classification_sid_recall_modified":0.4720717732,"topic-classification_sid_fscore_modified":0.4937437004,"topic-classification_sid_acc":0.586,"topic-classification_sid_precision":0.5883032084,"topic-classification_sid_recall":0.4720717732,"topic-classification_sid_fscore":0.4937437004,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6255818412}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.018,"topic-classification_sid_precision_modified":0.1357154412,"topic-classification_sid_recall_modified":0.1259808206,"topic-classification_sid_fscore_modified":0.0108903706,"topic-classification_sid_acc":0.0184804928,"topic-classification_sid_precision":0.1393382353,"topic-classification_sid_recall":0.1293437584,"topic-classification_sid_fscore":0.0111810786,"topic-classification_sid_valid_output_ratio":0.974,"nlu_score":0.1368924446}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.0,"topic-classification_sid_precision_modified":0.0,"topic-classification_sid_recall_modified":0.0,"topic-classification_sid_fscore_modified":0.0,"topic-classification_sid_acc":0.0,"topic-classification_sid_precision":0.0,"topic-classification_sid_recall":0.0,"topic-classification_sid_fscore":0.0,"topic-classification_sid_valid_output_ratio":0.0,"nlu_score":0.046805056}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.802,"topic-classification_sid_precision_modified":0.674947982,"topic-classification_sid_recall_modified":0.6917120865,"topic-classification_sid_fscore_modified":0.6712251699,"topic-classification_sid_acc":0.802,"topic-classification_sid_precision":0.674947982,"topic-classification_sid_recall":0.6917120865,"topic-classification_sid_fscore":0.6712251699,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6992555201}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.006,"topic-classification_sid_precision_modified":0.001,"topic-classification_sid_recall_modified":0.004,"topic-classification_sid_fscore_modified":0.0016,"topic-classification_sid_acc":0.25,"topic-classification_sid_precision":0.0416666667,"topic-classification_sid_recall":0.1666666667,"topic-classification_sid_fscore":0.0666666667,"topic-classification_sid_valid_output_ratio":0.024,"nlu_score":0.0372987683}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.462,"topic-classification_sid_precision_modified":0.4918469172,"topic-classification_sid_recall_modified":0.3261812324,"topic-classification_sid_fscore_modified":0.304777991,"topic-classification_sid_acc":0.5191011236,"topic-classification_sid_precision":0.5526369856,"topic-classification_sid_recall":0.3664957667,"topic-classification_sid_fscore":0.3424471809,"topic-classification_sid_valid_output_ratio":0.89,"nlu_score":0.6262096694}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.412,"topic-classification_sid_precision_modified":0.3819473808,"topic-classification_sid_recall_modified":0.2194110821,"topic-classification_sid_fscore_modified":0.166159266,"topic-classification_sid_acc":0.4735632184,"topic-classification_sid_precision":0.439019978,"topic-classification_sid_recall":0.2521966461,"topic-classification_sid_fscore":0.190987662,"topic-classification_sid_valid_output_ratio":0.87,"nlu_score":0.531045981}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.784,"topic-classification_sid_precision_modified":0.6819447861,"topic-classification_sid_recall_modified":0.6518325808,"topic-classification_sid_fscore_modified":0.6519138204,"topic-classification_sid_acc":0.7903225806,"topic-classification_sid_precision":0.6874443408,"topic-classification_sid_recall":0.6570892952,"topic-classification_sid_fscore":0.6571711899,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.7050532433}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.608,"topic-classification_sid_precision_modified":0.5971774069,"topic-classification_sid_recall_modified":0.5095088497,"topic-classification_sid_fscore_modified":0.5160494942,"topic-classification_sid_acc":0.6333333333,"topic-classification_sid_precision":0.6220597988,"topic-classification_sid_recall":0.5307383851,"topic-classification_sid_fscore":0.5375515565,"topic-classification_sid_valid_output_ratio":0.96,"nlu_score":0.6361186163}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.62,"topic-classification_sid_precision_modified":0.5570257373,"topic-classification_sid_recall_modified":0.5619595288,"topic-classification_sid_fscore_modified":0.5217673603,"topic-classification_sid_acc":0.6540084388,"topic-classification_sid_precision":0.5875798917,"topic-classification_sid_recall":0.5927843131,"topic-classification_sid_fscore":0.5503875108,"topic-classification_sid_valid_output_ratio":0.948,"nlu_score":0.4086928082}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.636,"topic-classification_sid_precision_modified":0.6248180645,"topic-classification_sid_recall_modified":0.5200071748,"topic-classification_sid_fscore_modified":0.5212205085,"topic-classification_sid_acc":0.636,"topic-classification_sid_precision":0.6248180645,"topic-classification_sid_recall":0.5200071748,"topic-classification_sid_fscore":0.5212205085,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6460328733}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.742,"topic-classification_sid_precision_modified":0.6167254178,"topic-classification_sid_recall_modified":0.6125584783,"topic-classification_sid_fscore_modified":0.5990165281,"topic-classification_sid_acc":0.7449799197,"topic-classification_sid_precision":0.6192022267,"topic-classification_sid_recall":0.6150185525,"topic-classification_sid_fscore":0.6014222169,"topic-classification_sid_valid_output_ratio":0.996,"nlu_score":0.6714091535}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"���️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.728,"topic-classification_sid_precision_modified":0.6159857721,"topic-classification_sid_recall_modified":0.6134167814,"topic-classification_sid_fscore_modified":0.60399392,"topic-classification_sid_acc":0.728,"topic-classification_sid_precision":0.6159857721,"topic-classification_sid_recall":0.6134167814,"topic-classification_sid_fscore":0.60399392,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6944128198}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.674,"topic-classification_sid_precision_modified":0.5851704149,"topic-classification_sid_recall_modified":0.5936032958,"topic-classification_sid_fscore_modified":0.5766197062,"topic-classification_sid_acc":0.6821862348,"topic-classification_sid_precision":0.5922777479,"topic-classification_sid_recall":0.6008130524,"topic-classification_sid_fscore":0.5836231844,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.6443219619}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.802,"topic-classification_sid_precision_modified":0.674947982,"topic-classification_sid_recall_modified":0.6917120865,"topic-classification_sid_fscore_modified":0.6712251699,"topic-classification_sid_acc":0.802,"topic-classification_sid_precision":0.674947982,"topic-classification_sid_recall":0.6917120865,"topic-classification_sid_fscore":0.6712251699,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6992555201}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.5896102708,"topic-classification_sid_recall_modified":0.5884196886,"topic-classification_sid_fscore_modified":0.5823719558,"topic-classification_sid_acc":0.704,"topic-classification_sid_precision":0.5896102708,"topic-classification_sid_recall":0.5884196886,"topic-classification_sid_fscore":0.5823719558,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6749652797}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.586,"topic-classification_sid_precision_modified":0.5883032084,"topic-classification_sid_recall_modified":0.4720717732,"topic-classification_sid_fscore_modified":0.4937437004,"topic-classification_sid_acc":0.586,"topic-classification_sid_precision":0.5883032084,"topic-classification_sid_recall":0.4720717732,"topic-classification_sid_fscore":0.4937437004,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6255818412}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.078,"topic-classification_sid_precision_modified":0.1626278832,"topic-classification_sid_recall_modified":0.0869379377,"topic-classification_sid_fscore_modified":0.061595189,"topic-classification_sid_acc":0.1211180124,"topic-classification_sid_precision":0.2525277689,"topic-classification_sid_recall":0.1349967977,"topic-classification_sid_fscore":0.0956447035,"topic-classification_sid_valid_output_ratio":0.644,"nlu_score":0.3916645306}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.6679390306,"topic-classification_sid_recall_modified":0.6259469635,"topic-classification_sid_fscore_modified":0.6265189311,"topic-classification_sid_acc":0.7054108216,"topic-classification_sid_precision":0.6692775858,"topic-classification_sid_recall":0.6272013662,"topic-classification_sid_fscore":0.62777448,"topic-classification_sid_valid_output_ratio":0.998,"nlu_score":0.6459120734}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.018,"topic-classification_sid_precision_modified":0.1357154412,"topic-classification_sid_recall_modified":0.1259808206,"topic-classification_sid_fscore_modified":0.0108903706,"topic-classification_sid_acc":0.0184804928,"topic-classification_sid_precision":0.1393382353,"topic-classification_sid_recall":0.1293437584,"topic-classification_sid_fscore":0.0111810786,"topic-classification_sid_valid_output_ratio":0.974,"nlu_score":0.1368924446}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.542,"topic-classification_sid_precision_modified":0.5260067354,"topic-classification_sid_recall_modified":0.4616060981,"topic-classification_sid_fscore_modified":0.4802703537,"topic-classification_sid_acc":0.5599173554,"topic-classification_sid_precision":0.5433953878,"topic-classification_sid_recall":0.4768658038,"topic-classification_sid_fscore":0.4961470596,"topic-classification_sid_valid_output_ratio":0.968,"nlu_score":0.5761104945}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.656,"topic-classification_sid_precision_modified":0.5819241823,"topic-classification_sid_recall_modified":0.5649560499,"topic-classification_sid_fscore_modified":0.5472284688,"topic-classification_sid_acc":0.7038626609,"topic-classification_sid_precision":0.6243821699,"topic-classification_sid_recall":0.6061760192,"topic-classification_sid_fscore":0.5871550095,"topic-classification_sid_valid_output_ratio":0.932,"nlu_score":0.5661558794}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.674,"topic-classification_sid_precision_modified":0.583557125,"topic-classification_sid_recall_modified":0.5945763405,"topic-classification_sid_fscore_modified":0.5786133505,"topic-classification_sid_acc":0.6962809917,"topic-classification_sid_precision":0.6028482696,"topic-classification_sid_recall":0.6142317567,"topic-classification_sid_fscore":0.5977410646,"topic-classification_sid_valid_output_ratio":0.968,"nlu_score":0.628506628}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.658,"topic-classification_sid_precision_modified":0.6006169042,"topic-classification_sid_recall_modified":0.5559595512,"topic-classification_sid_fscore_modified":0.5315039094,"topic-classification_sid_acc":0.6659919028,"topic-classification_sid_precision":0.6079118464,"topic-classification_sid_recall":0.5627120963,"topic-classification_sid_fscore":0.5379594225,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.699116864}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.628,"topic-classification_sid_precision_modified":0.5459969989,"topic-classification_sid_recall_modified":0.52365232,"topic-classification_sid_fscore_modified":0.5199604173,"topic-classification_sid_acc":0.6840958606,"topic-classification_sid_precision":0.5947679727,"topic-classification_sid_recall":0.5704273638,"topic-classification_sid_fscore":0.5664056834,"topic-classification_sid_valid_output_ratio":0.918,"nlu_score":0.3928685253}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.56,"topic-classification_sid_precision_modified":0.5309838171,"topic-classification_sid_recall_modified":0.4706044677,"topic-classification_sid_fscore_modified":0.484170357,"topic-classification_sid_acc":0.5702647658,"topic-classification_sid_precision":0.5407167181,"topic-classification_sid_recall":0.4792306188,"topic-classification_sid_fscore":0.4930451701,"topic-classification_sid_valid_output_ratio":0.982,"nlu_score":0.5968415875}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.756,"topic-classification_sid_precision_modified":0.6530505866,"topic-classification_sid_recall_modified":0.6684817133,"topic-classification_sid_fscore_modified":0.6358572631,"topic-classification_sid_acc":0.756,"topic-classification_sid_precision":0.6530505866,"topic-classification_sid_recall":0.6684817133,"topic-classification_sid_fscore":0.6358572631,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7146808531}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.792,"topic-classification_sid_precision_modified":0.662532811,"topic-classification_sid_recall_modified":0.6635073397,"topic-classification_sid_fscore_modified":0.6583038933,"topic-classification_sid_acc":0.792,"topic-classification_sid_precision":0.662532811,"topic-classification_sid_recall":0.6635073397,"topic-classification_sid_fscore":0.6583038933,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7143086066}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.648,"topic-classification_sid_precision_modified":0.6016092111,"topic-classification_sid_recall_modified":0.5643209905,"topic-classification_sid_fscore_modified":0.5520527995,"topic-classification_sid_acc":0.648,"topic-classification_sid_precision":0.6016092111,"topic-classification_sid_recall":0.5643209905,"topic-classification_sid_fscore":0.5520527995,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6470954618}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.722,"topic-classification_sid_precision_modified":0.6456808245,"topic-classification_sid_recall_modified":0.6300099701,"topic-classification_sid_fscore_modified":0.6126876785,"topic-classification_sid_acc":0.722,"topic-classification_sid_precision":0.6456808245,"topic-classification_sid_recall":0.6300099701,"topic-classification_sid_fscore":0.6126876785,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6458443785}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.562,"topic-classification_sid_precision_modified":0.4846723602,"topic-classification_sid_recall_modified":0.454656985,"topic-classification_sid_fscore_modified":0.424509489,"topic-classification_sid_acc":0.5928270042,"topic-classification_sid_precision":0.511257764,"topic-classification_sid_recall":0.4795959757,"topic-classification_sid_fscore":0.4477948196,"topic-classification_sid_valid_output_ratio":0.948,"nlu_score":0.6241793507}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.006,"topic-classification_sid_precision_modified":0.001,"topic-classification_sid_recall_modified":0.004,"topic-classification_sid_fscore_modified":0.0016,"topic-classification_sid_acc":0.25,"topic-classification_sid_precision":0.0416666667,"topic-classification_sid_recall":0.1666666667,"topic-classification_sid_fscore":0.0666666667,"topic-classification_sid_valid_output_ratio":0.024,"nlu_score":0.0372987683}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.75,"topic-classification_sid_precision_modified":0.6327306402,"topic-classification_sid_recall_modified":0.6347455045,"topic-classification_sid_fscore_modified":0.6231971632,"topic-classification_sid_acc":0.75,"topic-classification_sid_precision":0.6327306402,"topic-classification_sid_recall":0.6347455045,"topic-classification_sid_fscore":0.6231971632,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.3749414991}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.748,"topic-classification_sid_precision_modified":0.6428566774,"topic-classification_sid_recall_modified":0.6633522535,"topic-classification_sid_fscore_modified":0.628605048,"topic-classification_sid_acc":0.748,"topic-classification_sid_precision":0.6428566774,"topic-classification_sid_recall":0.6633522535,"topic-classification_sid_fscore":0.628605048,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6758278127}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.618,"topic-classification_sid_precision_modified":0.6284518226,"topic-classification_sid_recall_modified":0.548003418,"topic-classification_sid_fscore_modified":0.5631911176,"topic-classification_sid_acc":0.618,"topic-classification_sid_precision":0.6284518226,"topic-classification_sid_recall":0.548003418,"topic-classification_sid_fscore":0.5631911176,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.4824528512}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.74,"topic-classification_sid_precision_modified":0.6308563241,"topic-classification_sid_recall_modified":0.6658780758,"topic-classification_sid_fscore_modified":0.634021247,"topic-classification_sid_acc":0.7505070994,"topic-classification_sid_precision":0.6398137161,"topic-classification_sid_recall":0.6753327341,"topic-classification_sid_fscore":0.6430235771,"topic-classification_sid_valid_output_ratio":0.986,"nlu_score":0.6914202844}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.664,"topic-classification_sid_precision_modified":0.5946530353,"topic-classification_sid_recall_modified":0.5719701311,"topic-classification_sid_fscore_modified":0.5732259537,"topic-classification_sid_acc":0.6720647773,"topic-classification_sid_precision":0.6018755418,"topic-classification_sid_recall":0.5789171368,"topic-classification_sid_fscore":0.5801882122,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.456845738}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.698,"topic-classification_sid_precision_modified":0.6469803488,"topic-classification_sid_recall_modified":0.6081573421,"topic-classification_sid_fscore_modified":0.5947799999,"topic-classification_sid_acc":0.698,"topic-classification_sid_precision":0.6469803488,"topic-classification_sid_recall":0.6081573421,"topic-classification_sid_fscore":0.5947799999,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7178891542}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.77,"topic-classification_sid_precision_modified":0.6561573641,"topic-classification_sid_recall_modified":0.6752129415,"topic-classification_sid_fscore_modified":0.6425647774,"topic-classification_sid_acc":0.77,"topic-classification_sid_precision":0.6561573641,"topic-classification_sid_recall":0.6752129415,"topic-classification_sid_fscore":0.6425647774,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6752949557}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.394,"topic-classification_sid_precision_modified":0.5177096689,"topic-classification_sid_recall_modified":0.297966245,"topic-classification_sid_fscore_modified":0.2626151509,"topic-classification_sid_acc":0.4368070953,"topic-classification_sid_precision":0.5739575043,"topic-classification_sid_recall":0.3303395178,"topic-classification_sid_fscore":0.2911476174,"topic-classification_sid_valid_output_ratio":0.902,"nlu_score":0.5241296095}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.0,"topic-classification_sid_precision_modified":0.0,"topic-classification_sid_recall_modified":0.0,"topic-classification_sid_fscore_modified":0.0,"topic-classification_sid_acc":0.0,"topic-classification_sid_precision":0.0,"topic-classification_sid_recall":0.0,"topic-classification_sid_fscore":0.0,"topic-classification_sid_valid_output_ratio":0.0,"nlu_score":0.046805056}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.734,"topic-classification_sid_precision_modified":0.6555308571,"topic-classification_sid_recall_modified":0.6460010178,"topic-classification_sid_fscore_modified":0.6267962167,"topic-classification_sid_acc":0.734,"topic-classification_sid_precision":0.6555308571,"topic-classification_sid_recall":0.6460010178,"topic-classification_sid_fscore":0.6267962167,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6833497104}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.368,"topic-classification_sid_precision_modified":0.6014683953,"topic-classification_sid_recall_modified":0.2970267773,"topic-classification_sid_fscore_modified":0.2767247094,"topic-classification_sid_acc":0.3739837398,"topic-classification_sid_precision":0.6112483692,"topic-classification_sid_recall":0.301856481,"topic-classification_sid_fscore":0.2812242981,"topic-classification_sid_valid_output_ratio":0.984,"nlu_score":0.5121418762}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.702,"topic-classification_sid_precision_modified":0.6070538637,"topic-classification_sid_recall_modified":0.5902772191,"topic-classification_sid_fscore_modified":0.5805725473,"topic-classification_sid_acc":0.7076612903,"topic-classification_sid_precision":0.6119494594,"topic-classification_sid_recall":0.5950375192,"topic-classification_sid_fscore":0.585254584,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.6898261633}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.77,"topic-classification_sid_precision_modified":0.6850986976,"topic-classification_sid_recall_modified":0.6830888647,"topic-classification_sid_fscore_modified":0.6616877857,"topic-classification_sid_acc":0.77,"topic-classification_sid_precision":0.6850986976,"topic-classification_sid_recall":0.6830888647,"topic-classification_sid_fscore":0.6616877857,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7144353486}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.458,"topic-classification_sid_precision_modified":0.3928192753,"topic-classification_sid_recall_modified":0.4072039595,"topic-classification_sid_fscore_modified":0.3544902396,"topic-classification_sid_acc":0.6378830084,"topic-classification_sid_precision":0.5471020547,"topic-classification_sid_recall":0.5671364338,"topic-classification_sid_fscore":0.4937189966,"topic-classification_sid_valid_output_ratio":0.718,"nlu_score":0.6552152029}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.594,"topic-classification_sid_precision_modified":0.5608272475,"topic-classification_sid_recall_modified":0.5329233323,"topic-classification_sid_fscore_modified":0.527792484,"topic-classification_sid_acc":0.6359743041,"topic-classification_sid_precision":0.6004574384,"topic-classification_sid_recall":0.5705817263,"topic-classification_sid_fscore":0.5650883126,"topic-classification_sid_valid_output_ratio":0.934,"nlu_score":0.6800109206}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.38,"topic-classification_sid_precision_modified":0.2019117794,"topic-classification_sid_recall_modified":0.1756256003,"topic-classification_sid_fscore_modified":0.1195613397,"topic-classification_sid_acc":0.3830645161,"topic-classification_sid_precision":0.2035401003,"topic-classification_sid_recall":0.1770419358,"topic-classification_sid_fscore":0.1205255441,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.3619547874}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.682,"topic-classification_sid_precision_modified":0.5730049986,"topic-classification_sid_recall_modified":0.5717337985,"topic-classification_sid_fscore_modified":0.5606248332,"topic-classification_sid_acc":0.7239915074,"topic-classification_sid_precision":0.6082855612,"topic-classification_sid_recall":0.6069360918,"topic-classification_sid_fscore":0.595143135,"topic-classification_sid_valid_output_ratio":0.942,"nlu_score":0.6297634971}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.812,"topic-classification_sid_precision_modified":0.682958996,"topic-classification_sid_recall_modified":0.6932034561,"topic-classification_sid_fscore_modified":0.6715874758,"topic-classification_sid_acc":0.812,"topic-classification_sid_precision":0.682958996,"topic-classification_sid_recall":0.6932034561,"topic-classification_sid_fscore":0.6715874758,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7207167537}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/translation-ar2fa_ar2fa.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1471879954,"translation-ar2fa_ar2fa_sahife_bleu":0.1294214814,"translation-ar2fa_ar2fa_nahj_bleu":0.0642841927,"translation-ar2fa_ar2fa_quran_bleu":0.2437131219,"nlg_score":0.1779340777}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0704966071,"translation-ar2fa_ar2fa_sahife_bleu":0.0675578984,"translation-ar2fa_ar2fa_nahj_bleu":0.042116411,"translation-ar2fa_ar2fa_quran_bleu":0.1003965021,"nlg_score":0.1334687319}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0186923531,"translation-ar2fa_ar2fa_sahife_bleu":0.0174521967,"translation-ar2fa_ar2fa_nahj_bleu":0.0097734226,"translation-ar2fa_ar2fa_quran_bleu":0.0284054936,"nlg_score":0.0949943578}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1414109272,"translation-ar2fa_ar2fa_sahife_bleu":0.136408042,"translation-ar2fa_ar2fa_nahj_bleu":0.0653197648,"translation-ar2fa_ar2fa_quran_bleu":0.2187004167,"nlg_score":0.1880477876}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1034307333,"translation-ar2fa_ar2fa_sahife_bleu":0.0990571463,"translation-ar2fa_ar2fa_nahj_bleu":0.0554677739,"translation-ar2fa_ar2fa_quran_bleu":0.1533691318,"nlg_score":0.1430866672}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1334075162,"translation-ar2fa_ar2fa_sahife_bleu":0.1143867102,"translation-ar2fa_ar2fa_nahj_bleu":0.063272709,"translation-ar2fa_ar2fa_quran_bleu":0.2190563892,"nlg_score":0.181552926}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1183647194,"translation-ar2fa_ar2fa_sahife_bleu":0.0905358622,"translation-ar2fa_ar2fa_nahj_bleu":0.0522591914,"translation-ar2fa_ar2fa_quran_bleu":0.2089938281,"nlg_score":0.1643361642}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1064381612,"translation-ar2fa_ar2fa_sahife_bleu":0.089727948,"translation-ar2fa_ar2fa_nahj_bleu":0.0552602241,"translation-ar2fa_ar2fa_quran_bleu":0.1717674145,"nlg_score":0.1538910531}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0226935201,"translation-ar2fa_ar2fa_sahife_bleu":0.0196359142,"translation-ar2fa_ar2fa_nahj_bleu":0.010693835,"translation-ar2fa_ar2fa_quran_bleu":0.0371508269,"nlg_score":0.0940241349}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1534130086,"translation-ar2fa_ar2fa_sahife_bleu":0.1250461134,"translation-ar2fa_ar2fa_nahj_bleu":0.0624466634,"translation-ar2fa_ar2fa_quran_bleu":0.2681979318,"nlg_score":0.194675133}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.032619077,"translation-ar2fa_ar2fa_sahife_bleu":0.0333185867,"translation-ar2fa_ar2fa_nahj_bleu":0.0106299838,"translation-ar2fa_ar2fa_quran_bleu":0.0528092057,"nlg_score":0.1196804312}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.13132367,"translation-ar2fa_ar2fa_sahife_bleu":0.109076205,"translation-ar2fa_ar2fa_nahj_bleu":0.0659265093,"translation-ar2fa_ar2fa_quran_bleu":0.2156984377,"nlg_score":0.1631530657}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0465792583,"translation-ar2fa_ar2fa_sahife_bleu":0.023795336,"translation-ar2fa_ar2fa_nahj_bleu":0.0121091058,"translation-ar2fa_ar2fa_quran_bleu":0.1021098256,"nlg_score":0.1067134448}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0910450298,"translation-ar2fa_ar2fa_sahife_bleu":0.0862679894,"translation-ar2fa_ar2fa_nahj_bleu":0.0558129824,"translation-ar2fa_ar2fa_quran_bleu":0.1292925153,"nlg_score":0.16056333}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0985860814,"translation-ar2fa_ar2fa_sahife_bleu":0.0857687109,"translation-ar2fa_ar2fa_nahj_bleu":0.0622600203,"translation-ar2fa_ar2fa_quran_bleu":0.1459132099,"nlg_score":0.1679338638}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0997661237,"translation-ar2fa_ar2fa_sahife_bleu":0.0880416079,"translation-ar2fa_ar2fa_nahj_bleu":0.0425251453,"translation-ar2fa_ar2fa_quran_bleu":0.165869569,"nlg_score":0.1567965528}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0581992714,"translation-ar2fa_ar2fa_sahife_bleu":0.0540221076,"translation-ar2fa_ar2fa_nahj_bleu":0.0233017704,"translation-ar2fa_ar2fa_quran_bleu":0.095529061,"nlg_score":0.1089333827}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0594554675,"translation-ar2fa_ar2fa_sahife_bleu":0.0539986603,"translation-ar2fa_ar2fa_nahj_bleu":0.035240584,"translation-ar2fa_ar2fa_quran_bleu":0.0879164142,"nlg_score":0.1319091735}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0439654732,"translation-ar2fa_ar2fa_sahife_bleu":0.0313283708,"translation-ar2fa_ar2fa_nahj_bleu":0.0308641232,"translation-ar2fa_ar2fa_quran_bleu":0.0690488581,"nlg_score":0.112015688}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0302818033,"translation-ar2fa_ar2fa_sahife_bleu":0.0272381325,"translation-ar2fa_ar2fa_nahj_bleu":0.0129029913,"translation-ar2fa_ar2fa_quran_bleu":0.0498353456,"nlg_score":0.0934094344}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0636385541,"translation-ar2fa_ar2fa_sahife_bleu":0.0557180428,"translation-ar2fa_ar2fa_nahj_bleu":0.0539968488,"translation-ar2fa_ar2fa_quran_bleu":0.0807186853,"nlg_score":0.1389297212}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.01007549,"translation-ar2fa_ar2fa_sahife_bleu":0.0116017776,"translation-ar2fa_ar2fa_nahj_bleu":0.0067782437,"translation-ar2fa_ar2fa_quran_bleu":0.0116815864,"nlg_score":0.0682994522}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0608470749,"translation-ar2fa_ar2fa_sahife_bleu":0.0636783644,"translation-ar2fa_ar2fa_nahj_bleu":0.0258604511,"translation-ar2fa_ar2fa_quran_bleu":0.091253078,"nlg_score":0.1196400535}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1302111402,"translation-ar2fa_ar2fa_sahife_bleu":0.1104606951,"translation-ar2fa_ar2fa_nahj_bleu":0.0742081609,"translation-ar2fa_ar2fa_quran_bleu":0.2031644157,"nlg_score":0.2010896964}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.143500711,"translation-ar2fa_ar2fa_sahife_bleu":0.1221294429,"translation-ar2fa_ar2fa_nahj_bleu":0.069521493,"translation-ar2fa_ar2fa_quran_bleu":0.235152236,"nlg_score":0.1901206806}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1371181815,"translation-ar2fa_ar2fa_sahife_bleu":0.1148434226,"translation-ar2fa_ar2fa_nahj_bleu":0.0635817712,"translation-ar2fa_ar2fa_quran_bleu":0.2292525303,"nlg_score":0.1764906292}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1397574972,"translation-ar2fa_ar2fa_sahife_bleu":0.1273211367,"translation-ar2fa_ar2fa_nahj_bleu":0.0658485892,"translation-ar2fa_ar2fa_quran_bleu":0.2224073202,"nlg_score":0.1810678527}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0396780688,"translation-ar2fa_ar2fa_sahife_bleu":0.0355366473,"translation-ar2fa_ar2fa_nahj_bleu":0.0160671452,"translation-ar2fa_ar2fa_quran_bleu":0.0662498677,"nlg_score":0.1137933652}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.15661924,"translation-ar2fa_ar2fa_sahife_bleu":0.1122809429,"translation-ar2fa_ar2fa_nahj_bleu":0.0629397909,"translation-ar2fa_ar2fa_quran_bleu":0.2899530138,"nlg_score":0.178231145}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0772188097,"translation-ar2fa_ar2fa_sahife_bleu":0.0610321929,"translation-ar2fa_ar2fa_nahj_bleu":0.0273061824,"translation-ar2fa_ar2fa_quran_bleu":0.1408224224,"nlg_score":0.1368740087}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1430472928,"translation-ar2fa_ar2fa_sahife_bleu":0.1326018858,"translation-ar2fa_ar2fa_nahj_bleu":0.0532180128,"translation-ar2fa_ar2fa_quran_bleu":0.2388305158,"nlg_score":0.1659339021}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1144863268,"translation-ar2fa_ar2fa_sahife_bleu":0.1190971594,"translation-ar2fa_ar2fa_nahj_bleu":0.0648109303,"translation-ar2fa_ar2fa_quran_bleu":0.157067121,"nlg_score":0.1641995602}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0997358382,"translation-ar2fa_ar2fa_sahife_bleu":0.0861966287,"translation-ar2fa_ar2fa_nahj_bleu":0.0459362946,"translation-ar2fa_ar2fa_quran_bleu":0.164384614,"nlg_score":0.1417778788}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1315367808,"translation-ar2fa_ar2fa_sahife_bleu":0.1063921688,"translation-ar2fa_ar2fa_nahj_bleu":0.0642188893,"translation-ar2fa_ar2fa_quran_bleu":0.2206333896,"nlg_score":0.1665903777}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0807053257,"translation-ar2fa_ar2fa_sahife_bleu":0.0691470022,"translation-ar2fa_ar2fa_nahj_bleu":0.0461328234,"translation-ar2fa_ar2fa_quran_bleu":0.1251075264,"nlg_score":0.1324031203}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0821020713,"translation-ar2fa_ar2fa_sahife_bleu":0.0730469461,"translation-ar2fa_ar2fa_nahj_bleu":0.0579031327,"translation-ar2fa_ar2fa_quran_bleu":0.1141461882,"nlg_score":0.1557270864}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0352516229,"translation-ar2fa_ar2fa_sahife_bleu":0.031818336,"translation-ar2fa_ar2fa_nahj_bleu":0.0219225394,"translation-ar2fa_ar2fa_quran_bleu":0.0513475391,"nlg_score":0.0944140383}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1542520457,"translation-ar2fa_ar2fa_sahife_bleu":0.1283925803,"translation-ar2fa_ar2fa_nahj_bleu":0.0660434951,"translation-ar2fa_ar2fa_quran_bleu":0.2639096342,"nlg_score":0.18964968}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0334933514,"translation-ar2fa_ar2fa_sahife_bleu":0.0313812328,"translation-ar2fa_ar2fa_nahj_bleu":0.013862611,"translation-ar2fa_ar2fa_quran_bleu":0.0542546733,"nlg_score":0.0880621978}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.090408759,"translation-ar2fa_ar2fa_sahife_bleu":0.0778953352,"translation-ar2fa_ar2fa_nahj_bleu":0.0610049198,"translation-ar2fa_ar2fa_quran_bleu":0.13085583,"nlg_score":0.164118288}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0381647164,"translation-ar2fa_ar2fa_sahife_bleu":0.0517672982,"translation-ar2fa_ar2fa_nahj_bleu":0.0235396776,"translation-ar2fa_ar2fa_quran_bleu":0.0384559215,"nlg_score":0.1129755187}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0056487667,"translation-ar2fa_ar2fa_sahife_bleu":0.0084650778,"translation-ar2fa_ar2fa_nahj_bleu":0.0073044047,"translation-ar2fa_ar2fa_quran_bleu":0.0012595996,"nlg_score":0.0823387318}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0228295116,"translation-ar2fa_ar2fa_sahife_bleu":0.024915354,"translation-ar2fa_ar2fa_nahj_bleu":0.0099254821,"translation-ar2fa_ar2fa_quran_bleu":0.0326798966,"nlg_score":0.1151518212}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.015748738,"translation-ar2fa_ar2fa_sahife_bleu":0.0222158178,"translation-ar2fa_ar2fa_nahj_bleu":0.0107622927,"translation-ar2fa_ar2fa_quran_bleu":0.0140187813,"nlg_score":0.0509841903}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1315367808,"translation-ar2fa_ar2fa_sahife_bleu":0.1063921688,"translation-ar2fa_ar2fa_nahj_bleu":0.0642188893,"translation-ar2fa_ar2fa_quran_bleu":0.2206333896,"nlg_score":0.1665903777}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1144863268,"translation-ar2fa_ar2fa_sahife_bleu":0.1190971594,"translation-ar2fa_ar2fa_nahj_bleu":0.0648109303,"translation-ar2fa_ar2fa_quran_bleu":0.157067121,"nlg_score":0.1641995602}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.15661924,"translation-ar2fa_ar2fa_sahife_bleu":0.1122809429,"translation-ar2fa_ar2fa_nahj_bleu":0.0629397909,"translation-ar2fa_ar2fa_quran_bleu":0.2899530138,"nlg_score":0.178231145}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0334933514,"translation-ar2fa_ar2fa_sahife_bleu":0.0313812328,"translation-ar2fa_ar2fa_nahj_bleu":0.013862611,"translation-ar2fa_ar2fa_quran_bleu":0.0542546733,"nlg_score":0.0880621978}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0997661237,"translation-ar2fa_ar2fa_sahife_bleu":0.0880416079,"translation-ar2fa_ar2fa_nahj_bleu":0.0425251453,"translation-ar2fa_ar2fa_quran_bleu":0.165869569,"nlg_score":0.1567965528}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0910450298,"translation-ar2fa_ar2fa_sahife_bleu":0.0862679894,"translation-ar2fa_ar2fa_nahj_bleu":0.0558129824,"translation-ar2fa_ar2fa_quran_bleu":0.1292925153,"nlg_score":0.16056333}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0985860814,"translation-ar2fa_ar2fa_sahife_bleu":0.0857687109,"translation-ar2fa_ar2fa_nahj_bleu":0.0622600203,"translation-ar2fa_ar2fa_quran_bleu":0.1459132099,"nlg_score":0.1679338638}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0772188097,"translation-ar2fa_ar2fa_sahife_bleu":0.0610321929,"translation-ar2fa_ar2fa_nahj_bleu":0.0273061824,"translation-ar2fa_ar2fa_quran_bleu":0.1408224224,"nlg_score":0.1368740087}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0807053257,"translation-ar2fa_ar2fa_sahife_bleu":0.0691470022,"translation-ar2fa_ar2fa_nahj_bleu":0.0461328234,"translation-ar2fa_ar2fa_quran_bleu":0.1251075264,"nlg_score":0.1324031203}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0228295116,"translation-ar2fa_ar2fa_sahife_bleu":0.024915354,"translation-ar2fa_ar2fa_nahj_bleu":0.0099254821,"translation-ar2fa_ar2fa_quran_bleu":0.0326798966,"nlg_score":0.1151518212}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1183647194,"translation-ar2fa_ar2fa_sahife_bleu":0.0905358622,"translation-ar2fa_ar2fa_nahj_bleu":0.0522591914,"translation-ar2fa_ar2fa_quran_bleu":0.2089938281,"nlg_score":0.1643361642}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.090408759,"translation-ar2fa_ar2fa_sahife_bleu":0.0778953352,"translation-ar2fa_ar2fa_nahj_bleu":0.0610049198,"translation-ar2fa_ar2fa_quran_bleu":0.13085583,"nlg_score":0.164118288}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0352516229,"translation-ar2fa_ar2fa_sahife_bleu":0.031818336,"translation-ar2fa_ar2fa_nahj_bleu":0.0219225394,"translation-ar2fa_ar2fa_quran_bleu":0.0513475391,"nlg_score":0.0944140383}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1397574972,"translation-ar2fa_ar2fa_sahife_bleu":0.1273211367,"translation-ar2fa_ar2fa_nahj_bleu":0.0658485892,"translation-ar2fa_ar2fa_quran_bleu":0.2224073202,"nlg_score":0.1810678527}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0381647164,"translation-ar2fa_ar2fa_sahife_bleu":0.0517672982,"translation-ar2fa_ar2fa_nahj_bleu":0.0235396776,"translation-ar2fa_ar2fa_quran_bleu":0.0384559215,"nlg_score":0.1129755187}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0316527189,"translation-ar2fa_ar2fa_sahife_bleu":0.0206548583,"translation-ar2fa_ar2fa_nahj_bleu":0.0436607335,"translation-ar2fa_ar2fa_quran_bleu":0.0312429658,"nlg_score":0.1035446324}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0594554675,"translation-ar2fa_ar2fa_sahife_bleu":0.0539986603,"translation-ar2fa_ar2fa_nahj_bleu":0.035240584,"translation-ar2fa_ar2fa_quran_bleu":0.0879164142,"nlg_score":0.1319091735}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0704966071,"translation-ar2fa_ar2fa_sahife_bleu":0.0675578984,"translation-ar2fa_ar2fa_nahj_bleu":0.042116411,"translation-ar2fa_ar2fa_quran_bleu":0.1003965021,"nlg_score":0.1334687319}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.032619077,"translation-ar2fa_ar2fa_sahife_bleu":0.0333185867,"translation-ar2fa_ar2fa_nahj_bleu":0.0106299838,"translation-ar2fa_ar2fa_quran_bleu":0.0528092057,"nlg_score":0.1196804312}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0608470749,"translation-ar2fa_ar2fa_sahife_bleu":0.0636783644,"translation-ar2fa_ar2fa_nahj_bleu":0.0258604511,"translation-ar2fa_ar2fa_quran_bleu":0.091253078,"nlg_score":0.1196400535}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0821020713,"translation-ar2fa_ar2fa_sahife_bleu":0.0730469461,"translation-ar2fa_ar2fa_nahj_bleu":0.0579031327,"translation-ar2fa_ar2fa_quran_bleu":0.1141461882,"nlg_score":0.1557270864}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1542520457,"translation-ar2fa_ar2fa_sahife_bleu":0.1283925803,"translation-ar2fa_ar2fa_nahj_bleu":0.0660434951,"translation-ar2fa_ar2fa_quran_bleu":0.2639096342,"nlg_score":0.18964968}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1471879954,"translation-ar2fa_ar2fa_sahife_bleu":0.1294214814,"translation-ar2fa_ar2fa_nahj_bleu":0.0642841927,"translation-ar2fa_ar2fa_quran_bleu":0.2437131219,"nlg_score":0.1779340777}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1034307333,"translation-ar2fa_ar2fa_sahife_bleu":0.0990571463,"translation-ar2fa_ar2fa_nahj_bleu":0.0554677739,"translation-ar2fa_ar2fa_quran_bleu":0.1533691318,"nlg_score":0.1430866672}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1064381612,"translation-ar2fa_ar2fa_sahife_bleu":0.089727948,"translation-ar2fa_ar2fa_nahj_bleu":0.0552602241,"translation-ar2fa_ar2fa_quran_bleu":0.1717674145,"nlg_score":0.1538910531}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0186923531,"translation-ar2fa_ar2fa_sahife_bleu":0.0174521967,"translation-ar2fa_ar2fa_nahj_bleu":0.0097734226,"translation-ar2fa_ar2fa_quran_bleu":0.0284054936,"nlg_score":0.0949943578}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.015748738,"translation-ar2fa_ar2fa_sahife_bleu":0.0222158178,"translation-ar2fa_ar2fa_nahj_bleu":0.0107622927,"translation-ar2fa_ar2fa_quran_bleu":0.0140187813,"nlg_score":0.0509841903}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0581992714,"translation-ar2fa_ar2fa_sahife_bleu":0.0540221076,"translation-ar2fa_ar2fa_nahj_bleu":0.0233017704,"translation-ar2fa_ar2fa_quran_bleu":0.095529061,"nlg_score":0.1089333827}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1534130086,"translation-ar2fa_ar2fa_sahife_bleu":0.1250461134,"translation-ar2fa_ar2fa_nahj_bleu":0.0624466634,"translation-ar2fa_ar2fa_quran_bleu":0.2681979318,"nlg_score":0.194675133}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0396780688,"translation-ar2fa_ar2fa_sahife_bleu":0.0355366473,"translation-ar2fa_ar2fa_nahj_bleu":0.0160671452,"translation-ar2fa_ar2fa_quran_bleu":0.0662498677,"nlg_score":0.1137933652}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1430472928,"translation-ar2fa_ar2fa_sahife_bleu":0.1326018858,"translation-ar2fa_ar2fa_nahj_bleu":0.0532180128,"translation-ar2fa_ar2fa_quran_bleu":0.2388305158,"nlg_score":0.1659339021}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0439654732,"translation-ar2fa_ar2fa_sahife_bleu":0.0313283708,"translation-ar2fa_ar2fa_nahj_bleu":0.0308641232,"translation-ar2fa_ar2fa_quran_bleu":0.0690488581,"nlg_score":0.112015688}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.13132367,"translation-ar2fa_ar2fa_sahife_bleu":0.109076205,"translation-ar2fa_ar2fa_nahj_bleu":0.0659265093,"translation-ar2fa_ar2fa_quran_bleu":0.2156984377,"nlg_score":0.1631530657}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0302818033,"translation-ar2fa_ar2fa_sahife_bleu":0.0272381325,"translation-ar2fa_ar2fa_nahj_bleu":0.0129029913,"translation-ar2fa_ar2fa_quran_bleu":0.0498353456,"nlg_score":0.0934094344}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0997358382,"translation-ar2fa_ar2fa_sahife_bleu":0.0861966287,"translation-ar2fa_ar2fa_nahj_bleu":0.0459362946,"translation-ar2fa_ar2fa_quran_bleu":0.164384614,"nlg_score":0.1417778788}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0056487667,"translation-ar2fa_ar2fa_sahife_bleu":0.0084650778,"translation-ar2fa_ar2fa_nahj_bleu":0.0073044047,"translation-ar2fa_ar2fa_quran_bleu":0.0012595996,"nlg_score":0.0823387318}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.143500711,"translation-ar2fa_ar2fa_sahife_bleu":0.1221294429,"translation-ar2fa_ar2fa_nahj_bleu":0.069521493,"translation-ar2fa_ar2fa_quran_bleu":0.235152236,"nlg_score":0.1901206806}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0636385541,"translation-ar2fa_ar2fa_sahife_bleu":0.0557180428,"translation-ar2fa_ar2fa_nahj_bleu":0.0539968488,"translation-ar2fa_ar2fa_quran_bleu":0.0807186853,"nlg_score":0.1389297212}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0465792583,"translation-ar2fa_ar2fa_sahife_bleu":0.023795336,"translation-ar2fa_ar2fa_nahj_bleu":0.0121091058,"translation-ar2fa_ar2fa_quran_bleu":0.1021098256,"nlg_score":0.1067134448}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1334075162,"translation-ar2fa_ar2fa_sahife_bleu":0.1143867102,"translation-ar2fa_ar2fa_nahj_bleu":0.063272709,"translation-ar2fa_ar2fa_quran_bleu":0.2190563892,"nlg_score":0.181552926}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0226935201,"translation-ar2fa_ar2fa_sahife_bleu":0.0196359142,"translation-ar2fa_ar2fa_nahj_bleu":0.010693835,"translation-ar2fa_ar2fa_quran_bleu":0.0371508269,"nlg_score":0.0940241349}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1302111402,"translation-ar2fa_ar2fa_sahife_bleu":0.1104606951,"translation-ar2fa_ar2fa_nahj_bleu":0.0742081609,"translation-ar2fa_ar2fa_quran_bleu":0.2031644157,"nlg_score":0.2010896964}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.01007549,"translation-ar2fa_ar2fa_sahife_bleu":0.0116017776,"translation-ar2fa_ar2fa_nahj_bleu":0.0067782437,"translation-ar2fa_ar2fa_quran_bleu":0.0116815864,"nlg_score":0.0682994522}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1414109272,"translation-ar2fa_ar2fa_sahife_bleu":0.136408042,"translation-ar2fa_ar2fa_nahj_bleu":0.0653197648,"translation-ar2fa_ar2fa_quran_bleu":0.2187004167,"nlg_score":0.1880477876}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1371181815,"translation-ar2fa_ar2fa_sahife_bleu":0.1148434226,"translation-ar2fa_ar2fa_nahj_bleu":0.0635817712,"translation-ar2fa_ar2fa_quran_bleu":0.2292525303,"nlg_score":0.1764906292}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/translation-en2fa_en2fa.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1799534349,"translation-en2fa_en2fa_epoque_bleu":0.4004213933,"translation-en2fa_en2fa_mizan_bleu":0.1703393716,"translation-en2fa_en2fa_quran_bleu":0.1225698669,"translation-en2fa_en2fa_sahife_bleu":0.0832764011,"translation-en2fa_en2fa_nahj_bleu":0.0439108113,"translation-en2fa_en2fa_tep_bleu":0.0595417592,"nlg_score":0.1779340777}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1482164359,"translation-en2fa_en2fa_epoque_bleu":0.3332313032,"translation-en2fa_en2fa_mizan_bleu":0.1348649993,"translation-en2fa_en2fa_quran_bleu":0.0798910499,"translation-en2fa_en2fa_sahife_bleu":0.0724923326,"translation-en2fa_en2fa_nahj_bleu":0.0425031053,"translation-en2fa_en2fa_tep_bleu":0.0570157331,"nlg_score":0.1334687319}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0366912467,"translation-en2fa_en2fa_epoque_bleu":0.0623359898,"translation-en2fa_en2fa_mizan_bleu":0.0442763597,"translation-en2fa_en2fa_quran_bleu":0.0309309044,"translation-en2fa_en2fa_sahife_bleu":0.0330663757,"translation-en2fa_en2fa_nahj_bleu":0.0124767847,"translation-en2fa_en2fa_tep_bleu":0.0116612774,"nlg_score":0.0949943578}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2018158808,"translation-en2fa_en2fa_epoque_bleu":0.4332944681,"translation-en2fa_en2fa_mizan_bleu":0.1925182751,"translation-en2fa_en2fa_quran_bleu":0.1530925462,"translation-en2fa_en2fa_sahife_bleu":0.1026499453,"translation-en2fa_en2fa_nahj_bleu":0.051968827,"translation-en2fa_en2fa_tep_bleu":0.0708487287,"nlg_score":0.1880477876}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.164906114,"translation-en2fa_en2fa_epoque_bleu":0.3663484194,"translation-en2fa_en2fa_mizan_bleu":0.1564961694,"translation-en2fa_en2fa_quran_bleu":0.100726536,"translation-en2fa_en2fa_sahife_bleu":0.0786865541,"translation-en2fa_en2fa_nahj_bleu":0.045865321,"translation-en2fa_en2fa_tep_bleu":0.0567120347,"nlg_score":0.1430866672}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1757431213,"translation-en2fa_en2fa_epoque_bleu":0.3745398253,"translation-en2fa_en2fa_mizan_bleu":0.1640890656,"translation-en2fa_en2fa_quran_bleu":0.1377843747,"translation-en2fa_en2fa_sahife_bleu":0.0895949257,"translation-en2fa_en2fa_nahj_bleu":0.0437585905,"translation-en2fa_en2fa_tep_bleu":0.0679088622,"nlg_score":0.181552926}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1700971031,"translation-en2fa_en2fa_epoque_bleu":0.3619925896,"translation-en2fa_en2fa_mizan_bleu":0.1638764762,"translation-en2fa_en2fa_quran_bleu":0.1169026899,"translation-en2fa_en2fa_sahife_bleu":0.0785107337,"translation-en2fa_en2fa_nahj_bleu":0.0463016599,"translation-en2fa_en2fa_tep_bleu":0.0704424388,"nlg_score":0.1643361642}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1732020657,"translation-en2fa_en2fa_epoque_bleu":0.3837758669,"translation-en2fa_en2fa_mizan_bleu":0.1680666593,"translation-en2fa_en2fa_quran_bleu":0.1005711034,"translation-en2fa_en2fa_sahife_bleu":0.0761731989,"translation-en2fa_en2fa_nahj_bleu":0.0436218334,"translation-en2fa_en2fa_tep_bleu":0.0641477759,"nlg_score":0.1538910531}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0600629844,"translation-en2fa_en2fa_epoque_bleu":0.1538216141,"translation-en2fa_en2fa_mizan_bleu":0.0548001026,"translation-en2fa_en2fa_quran_bleu":0.0217436833,"translation-en2fa_en2fa_sahife_bleu":0.0205645274,"translation-en2fa_en2fa_nahj_bleu":0.0172427415,"translation-en2fa_en2fa_tep_bleu":0.0093260061,"nlg_score":0.0940241349}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1974288311,"translation-en2fa_en2fa_epoque_bleu":0.4102902123,"translation-en2fa_en2fa_mizan_bleu":0.1898606624,"translation-en2fa_en2fa_quran_bleu":0.1638084791,"translation-en2fa_en2fa_sahife_bleu":0.1095493859,"translation-en2fa_en2fa_nahj_bleu":0.0487097316,"translation-en2fa_en2fa_tep_bleu":0.0737497745,"nlg_score":0.194675133}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.08817485,"translation-en2fa_en2fa_epoque_bleu":0.1886801725,"translation-en2fa_en2fa_mizan_bleu":0.0879987558,"translation-en2fa_en2fa_quran_bleu":0.0657922023,"translation-en2fa_en2fa_sahife_bleu":0.0296141618,"translation-en2fa_en2fa_nahj_bleu":0.0192266597,"translation-en2fa_en2fa_tep_bleu":0.0366296874,"nlg_score":0.1196804312}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.173929966,"translation-en2fa_en2fa_epoque_bleu":0.3719958476,"translation-en2fa_en2fa_mizan_bleu":0.1618238918,"translation-en2fa_en2fa_quran_bleu":0.1364597091,"translation-en2fa_en2fa_sahife_bleu":0.0887355174,"translation-en2fa_en2fa_nahj_bleu":0.0458341529,"translation-en2fa_en2fa_tep_bleu":0.0650194522,"nlg_score":0.1631530657}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0439502467,"translation-en2fa_en2fa_epoque_bleu":0.0932804064,"translation-en2fa_en2fa_mizan_bleu":0.0446467932,"translation-en2fa_en2fa_quran_bleu":0.0435800727,"translation-en2fa_en2fa_sahife_bleu":0.0197005921,"translation-en2fa_en2fa_nahj_bleu":0.0132822652,"translation-en2fa_en2fa_tep_bleu":0.0087342692,"nlg_score":0.1067134448}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1535253787,"translation-en2fa_en2fa_epoque_bleu":0.3553678809,"translation-en2fa_en2fa_mizan_bleu":0.1285441922,"translation-en2fa_en2fa_quran_bleu":0.0857809616,"translation-en2fa_en2fa_sahife_bleu":0.0787025343,"translation-en2fa_en2fa_nahj_bleu":0.0404850935,"translation-en2fa_en2fa_tep_bleu":0.0586129062,"nlg_score":0.16056333}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1623218856,"translation-en2fa_en2fa_epoque_bleu":0.3677879105,"translation-en2fa_en2fa_mizan_bleu":0.147599732,"translation-en2fa_en2fa_quran_bleu":0.0938457658,"translation-en2fa_en2fa_sahife_bleu":0.0698903005,"translation-en2fa_en2fa_nahj_bleu":0.0435129812,"translation-en2fa_en2fa_tep_bleu":0.0620337306,"nlg_score":0.1679338638}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1694667296,"translation-en2fa_en2fa_epoque_bleu":0.3980975238,"translation-en2fa_en2fa_mizan_bleu":0.1400810731,"translation-en2fa_en2fa_quran_bleu":0.1173019123,"translation-en2fa_en2fa_sahife_bleu":0.0758825134,"translation-en2fa_en2fa_nahj_bleu":0.0380397952,"translation-en2fa_en2fa_tep_bleu":0.05711699,"nlg_score":0.1567965528}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.056370201,"translation-en2fa_en2fa_epoque_bleu":0.1154392548,"translation-en2fa_en2fa_mizan_bleu":0.0484324583,"translation-en2fa_en2fa_quran_bleu":0.0612465488,"translation-en2fa_en2fa_sahife_bleu":0.0466818991,"translation-en2fa_en2fa_nahj_bleu":0.0218444477,"translation-en2fa_en2fa_tep_bleu":0.0118186665,"nlg_score":0.1089333827}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1458447378,"translation-en2fa_en2fa_epoque_bleu":0.3541508677,"translation-en2fa_en2fa_mizan_bleu":0.1259468635,"translation-en2fa_en2fa_quran_bleu":0.0887225632,"translation-en2fa_en2fa_sahife_bleu":0.0672732746,"translation-en2fa_en2fa_nahj_bleu":0.0407327793,"translation-en2fa_en2fa_tep_bleu":0.0293172873,"nlg_score":0.1319091735}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.111865825,"translation-en2fa_en2fa_epoque_bleu":0.2732051575,"translation-en2fa_en2fa_mizan_bleu":0.0868500698,"translation-en2fa_en2fa_quran_bleu":0.0529852068,"translation-en2fa_en2fa_sahife_bleu":0.0502293343,"translation-en2fa_en2fa_nahj_bleu":0.0322064942,"translation-en2fa_en2fa_tep_bleu":0.0404507778,"nlg_score":0.112015688}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0438887609,"translation-en2fa_en2fa_epoque_bleu":0.0714603918,"translation-en2fa_en2fa_mizan_bleu":0.0595250407,"translation-en2fa_en2fa_quran_bleu":0.0428487402,"translation-en2fa_en2fa_sahife_bleu":0.0258372032,"translation-en2fa_en2fa_nahj_bleu":0.0133722454,"translation-en2fa_en2fa_tep_bleu":0.0142899909,"nlg_score":0.0934094344}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1217211215,"translation-en2fa_en2fa_epoque_bleu":0.2916268514,"translation-en2fa_en2fa_mizan_bleu":0.091925603,"translation-en2fa_en2fa_quran_bleu":0.065498518,"translation-en2fa_en2fa_sahife_bleu":0.0612237455,"translation-en2fa_en2fa_nahj_bleu":0.0385824628,"translation-en2fa_en2fa_tep_bleu":0.0453883692,"nlg_score":0.1389297212}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0460704645,"translation-en2fa_en2fa_epoque_bleu":0.1309820272,"translation-en2fa_en2fa_mizan_bleu":0.0316650952,"translation-en2fa_en2fa_quran_bleu":0.0134401079,"translation-en2fa_en2fa_sahife_bleu":0.0141114981,"translation-en2fa_en2fa_nahj_bleu":0.0127654414,"translation-en2fa_en2fa_tep_bleu":0.0065463218,"nlg_score":0.0682994522}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0951102411,"translation-en2fa_en2fa_epoque_bleu":0.2204131973,"translation-en2fa_en2fa_mizan_bleu":0.0772021612,"translation-en2fa_en2fa_quran_bleu":0.0914129011,"translation-en2fa_en2fa_sahife_bleu":0.0555605793,"translation-en2fa_en2fa_nahj_bleu":0.0296371925,"translation-en2fa_en2fa_tep_bleu":0.0145962694,"nlg_score":0.1196400535}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2019260724,"translation-en2fa_en2fa_epoque_bleu":0.4752747269,"translation-en2fa_en2fa_mizan_bleu":0.165706346,"translation-en2fa_en2fa_quran_bleu":0.1194336982,"translation-en2fa_en2fa_sahife_bleu":0.0819129449,"translation-en2fa_en2fa_nahj_bleu":0.0545857968,"translation-en2fa_en2fa_tep_bleu":0.0782996247,"nlg_score":0.2010896964}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1979467916,"translation-en2fa_en2fa_epoque_bleu":0.4460981632,"translation-en2fa_en2fa_mizan_bleu":0.1745376389,"translation-en2fa_en2fa_quran_bleu":0.137406774,"translation-en2fa_en2fa_sahife_bleu":0.091586235,"translation-en2fa_en2fa_nahj_bleu":0.0490159552,"translation-en2fa_en2fa_tep_bleu":0.072776086,"nlg_score":0.1901206806}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1745395927,"translation-en2fa_en2fa_epoque_bleu":0.3494300897,"translation-en2fa_en2fa_mizan_bleu":0.1701139909,"translation-en2fa_en2fa_quran_bleu":0.1446863433,"translation-en2fa_en2fa_sahife_bleu":0.0993817781,"translation-en2fa_en2fa_nahj_bleu":0.0486286065,"translation-en2fa_en2fa_tep_bleu":0.0750157717,"nlg_score":0.1764906292}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2014428857,"translation-en2fa_en2fa_epoque_bleu":0.4710672433,"translation-en2fa_en2fa_mizan_bleu":0.1830885263,"translation-en2fa_en2fa_quran_bleu":0.1141518863,"translation-en2fa_en2fa_sahife_bleu":0.0806159411,"translation-en2fa_en2fa_nahj_bleu":0.0504089542,"translation-en2fa_en2fa_tep_bleu":0.0648627292,"nlg_score":0.1810678527}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0434570993,"translation-en2fa_en2fa_epoque_bleu":0.0864683098,"translation-en2fa_en2fa_mizan_bleu":0.0372126642,"translation-en2fa_en2fa_quran_bleu":0.0580568111,"translation-en2fa_en2fa_sahife_bleu":0.0311184796,"translation-en2fa_en2fa_nahj_bleu":0.0145031404,"translation-en2fa_en2fa_tep_bleu":0.0105327687,"nlg_score":0.1137933652}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1718324934,"translation-en2fa_en2fa_epoque_bleu":0.364783925,"translation-en2fa_en2fa_mizan_bleu":0.1532613543,"translation-en2fa_en2fa_quran_bleu":0.1620975016,"translation-en2fa_en2fa_sahife_bleu":0.0967871625,"translation-en2fa_en2fa_nahj_bleu":0.0457580774,"translation-en2fa_en2fa_tep_bleu":0.05756103,"nlg_score":0.178231145}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.0650794176,"translation-en2fa_en2fa_epoque_bleu":0.1194375779,"translation-en2fa_en2fa_mizan_bleu":0.0640649978,"translation-en2fa_en2fa_quran_bleu":0.0926514743,"translation-en2fa_en2fa_sahife_bleu":0.0392464347,"translation-en2fa_en2fa_nahj_bleu":0.022322883,"translation-en2fa_en2fa_tep_bleu":0.0184227674,"nlg_score":0.1368740087}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1750457687,"translation-en2fa_en2fa_epoque_bleu":0.3740903807,"translation-en2fa_en2fa_mizan_bleu":0.1593083308,"translation-en2fa_en2fa_quran_bleu":0.1325582833,"translation-en2fa_en2fa_sahife_bleu":0.1002994879,"translation-en2fa_en2fa_nahj_bleu":0.0501235873,"translation-en2fa_en2fa_tep_bleu":0.0652393013,"nlg_score":0.1659339021}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1506934102,"translation-en2fa_en2fa_epoque_bleu":0.2951032905,"translation-en2fa_en2fa_mizan_bleu":0.1500681264,"translation-en2fa_en2fa_quran_bleu":0.1104277702,"translation-en2fa_en2fa_sahife_bleu":0.092222972,"translation-en2fa_en2fa_nahj_bleu":0.0497623005,"translation-en2fa_en2fa_tep_bleu":0.0692905167,"nlg_score":0.1641995602}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1625473606,"translation-en2fa_en2fa_epoque_bleu":0.3728148632,"translation-en2fa_en2fa_mizan_bleu":0.1426041304,"translation-en2fa_en2fa_quran_bleu":0.1020345657,"translation-en2fa_en2fa_sahife_bleu":0.0766062054,"translation-en2fa_en2fa_nahj_bleu":0.0472751057,"translation-en2fa_en2fa_tep_bleu":0.0539804032,"nlg_score":0.1417778788}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1831593088,"translation-en2fa_en2fa_epoque_bleu":0.4052150706,"translation-en2fa_en2fa_mizan_bleu":0.1692823494,"translation-en2fa_en2fa_quran_bleu":0.1400476579,"translation-en2fa_en2fa_sahife_bleu":0.0812805634,"translation-en2fa_en2fa_nahj_bleu":0.048146149,"translation-en2fa_en2fa_tep_bleu":0.0610881446,"nlg_score":0.1665903777}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1645954959,"translation-en2fa_en2fa_epoque_bleu":0.369137779,"translation-en2fa_en2fa_mizan_bleu":0.1567335186,"translation-en2fa_en2fa_quran_bleu":0.0945388771,"translation-en2fa_en2fa_sahife_bleu":0.0697896731,"translation-en2fa_en2fa_nahj_bleu":0.0419654732,"translation-en2fa_en2fa_tep_bleu":0.0607239891,"nlg_score":0.1324031203}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1550276898,"translation-en2fa_en2fa_epoque_bleu":0.3721582216,"translation-en2fa_en2fa_mizan_bleu":0.1231599039,"translation-en2fa_en2fa_quran_bleu":0.0882213453,"translation-en2fa_en2fa_sahife_bleu":0.0725213197,"translation-en2fa_en2fa_nahj_bleu":0.0424186358,"translation-en2fa_en2fa_tep_bleu":0.0528718634,"nlg_score":0.1557270864}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0472831089,"translation-en2fa_en2fa_epoque_bleu":0.0950858392,"translation-en2fa_en2fa_mizan_bleu":0.0348348322,"translation-en2fa_en2fa_quran_bleu":0.0417444578,"translation-en2fa_en2fa_sahife_bleu":0.044168541,"translation-en2fa_en2fa_nahj_bleu":0.0239185439,"translation-en2fa_en2fa_tep_bleu":0.0188699837,"nlg_score":0.0944140383}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2099911906,"translation-en2fa_en2fa_epoque_bleu":0.4805793807,"translation-en2fa_en2fa_mizan_bleu":0.1904867707,"translation-en2fa_en2fa_quran_bleu":0.1412389522,"translation-en2fa_en2fa_sahife_bleu":0.0861059288,"translation-en2fa_en2fa_nahj_bleu":0.0528683421,"translation-en2fa_en2fa_tep_bleu":0.0688528109,"nlg_score":0.18964968}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0409401575,"translation-en2fa_en2fa_epoque_bleu":0.0902479461,"translation-en2fa_en2fa_mizan_bleu":0.0327725294,"translation-en2fa_en2fa_quran_bleu":0.0443958388,"translation-en2fa_en2fa_sahife_bleu":0.0278897851,"translation-en2fa_en2fa_nahj_bleu":0.0148027555,"translation-en2fa_en2fa_tep_bleu":0.0071499459,"nlg_score":0.0880621978}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1617787549,"translation-en2fa_en2fa_epoque_bleu":0.3821529147,"translation-en2fa_en2fa_mizan_bleu":0.1337537913,"translation-en2fa_en2fa_quran_bleu":0.0860909143,"translation-en2fa_en2fa_sahife_bleu":0.0770506908,"translation-en2fa_en2fa_nahj_bleu":0.0441728515,"translation-en2fa_en2fa_tep_bleu":0.0587014819,"nlg_score":0.164118288}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1074044673,"translation-en2fa_en2fa_epoque_bleu":0.2689676347,"translation-en2fa_en2fa_mizan_bleu":0.0784179406,"translation-en2fa_en2fa_quran_bleu":0.0573255404,"translation-en2fa_en2fa_sahife_bleu":0.0534655564,"translation-en2fa_en2fa_nahj_bleu":0.0373749355,"translation-en2fa_en2fa_tep_bleu":0.0279497965,"nlg_score":0.1129755187}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0373710986,"translation-en2fa_en2fa_epoque_bleu":0.0773774592,"translation-en2fa_en2fa_mizan_bleu":0.034234366,"translation-en2fa_en2fa_quran_bleu":0.0258474786,"translation-en2fa_en2fa_sahife_bleu":0.0240302635,"translation-en2fa_en2fa_nahj_bleu":0.0149718554,"translation-en2fa_en2fa_tep_bleu":0.0146400693,"nlg_score":0.0823387318}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.0281307344,"translation-en2fa_en2fa_epoque_bleu":0.0581118569,"translation-en2fa_en2fa_mizan_bleu":0.0252951725,"translation-en2fa_en2fa_quran_bleu":0.0299637537,"translation-en2fa_en2fa_sahife_bleu":0.0215091569,"translation-en2fa_en2fa_nahj_bleu":0.0081971706,"translation-en2fa_en2fa_tep_bleu":0.0063339652,"nlg_score":0.1151518212}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0012922175,"translation-en2fa_en2fa_epoque_bleu":0.0034993945,"translation-en2fa_en2fa_mizan_bleu":0.0,"translation-en2fa_en2fa_quran_bleu":0.0,"translation-en2fa_en2fa_sahife_bleu":0.0,"translation-en2fa_en2fa_nahj_bleu":0.0,"translation-en2fa_en2fa_tep_bleu":0.0020451278,"nlg_score":0.0509841903}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1831593088,"translation-en2fa_en2fa_epoque_bleu":0.4052150706,"translation-en2fa_en2fa_mizan_bleu":0.1692823494,"translation-en2fa_en2fa_quran_bleu":0.1400476579,"translation-en2fa_en2fa_sahife_bleu":0.0812805634,"translation-en2fa_en2fa_nahj_bleu":0.048146149,"translation-en2fa_en2fa_tep_bleu":0.0610881446,"nlg_score":0.1665903777}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1506934102,"translation-en2fa_en2fa_epoque_bleu":0.2951032905,"translation-en2fa_en2fa_mizan_bleu":0.1500681264,"translation-en2fa_en2fa_quran_bleu":0.1104277702,"translation-en2fa_en2fa_sahife_bleu":0.092222972,"translation-en2fa_en2fa_nahj_bleu":0.0497623005,"translation-en2fa_en2fa_tep_bleu":0.0692905167,"nlg_score":0.1641995602}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1718324934,"translation-en2fa_en2fa_epoque_bleu":0.364783925,"translation-en2fa_en2fa_mizan_bleu":0.1532613543,"translation-en2fa_en2fa_quran_bleu":0.1620975016,"translation-en2fa_en2fa_sahife_bleu":0.0967871625,"translation-en2fa_en2fa_nahj_bleu":0.0457580774,"translation-en2fa_en2fa_tep_bleu":0.05756103,"nlg_score":0.178231145}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0409401575,"translation-en2fa_en2fa_epoque_bleu":0.0902479461,"translation-en2fa_en2fa_mizan_bleu":0.0327725294,"translation-en2fa_en2fa_quran_bleu":0.0443958388,"translation-en2fa_en2fa_sahife_bleu":0.0278897851,"translation-en2fa_en2fa_nahj_bleu":0.0148027555,"translation-en2fa_en2fa_tep_bleu":0.0071499459,"nlg_score":0.0880621978}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1694667296,"translation-en2fa_en2fa_epoque_bleu":0.3980975238,"translation-en2fa_en2fa_mizan_bleu":0.1400810731,"translation-en2fa_en2fa_quran_bleu":0.1173019123,"translation-en2fa_en2fa_sahife_bleu":0.0758825134,"translation-en2fa_en2fa_nahj_bleu":0.0380397952,"translation-en2fa_en2fa_tep_bleu":0.05711699,"nlg_score":0.1567965528}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1535253787,"translation-en2fa_en2fa_epoque_bleu":0.3553678809,"translation-en2fa_en2fa_mizan_bleu":0.1285441922,"translation-en2fa_en2fa_quran_bleu":0.0857809616,"translation-en2fa_en2fa_sahife_bleu":0.0787025343,"translation-en2fa_en2fa_nahj_bleu":0.0404850935,"translation-en2fa_en2fa_tep_bleu":0.0586129062,"nlg_score":0.16056333}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1623218856,"translation-en2fa_en2fa_epoque_bleu":0.3677879105,"translation-en2fa_en2fa_mizan_bleu":0.147599732,"translation-en2fa_en2fa_quran_bleu":0.0938457658,"translation-en2fa_en2fa_sahife_bleu":0.0698903005,"translation-en2fa_en2fa_nahj_bleu":0.0435129812,"translation-en2fa_en2fa_tep_bleu":0.0620337306,"nlg_score":0.1679338638}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.0650794176,"translation-en2fa_en2fa_epoque_bleu":0.1194375779,"translation-en2fa_en2fa_mizan_bleu":0.0640649978,"translation-en2fa_en2fa_quran_bleu":0.0926514743,"translation-en2fa_en2fa_sahife_bleu":0.0392464347,"translation-en2fa_en2fa_nahj_bleu":0.022322883,"translation-en2fa_en2fa_tep_bleu":0.0184227674,"nlg_score":0.1368740087}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1645954959,"translation-en2fa_en2fa_epoque_bleu":0.369137779,"translation-en2fa_en2fa_mizan_bleu":0.1567335186,"translation-en2fa_en2fa_quran_bleu":0.0945388771,"translation-en2fa_en2fa_sahife_bleu":0.0697896731,"translation-en2fa_en2fa_nahj_bleu":0.0419654732,"translation-en2fa_en2fa_tep_bleu":0.0607239891,"nlg_score":0.1324031203}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.0281307344,"translation-en2fa_en2fa_epoque_bleu":0.0581118569,"translation-en2fa_en2fa_mizan_bleu":0.0252951725,"translation-en2fa_en2fa_quran_bleu":0.0299637537,"translation-en2fa_en2fa_sahife_bleu":0.0215091569,"translation-en2fa_en2fa_nahj_bleu":0.0081971706,"translation-en2fa_en2fa_tep_bleu":0.0063339652,"nlg_score":0.1151518212}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1700971031,"translation-en2fa_en2fa_epoque_bleu":0.3619925896,"translation-en2fa_en2fa_mizan_bleu":0.1638764762,"translation-en2fa_en2fa_quran_bleu":0.1169026899,"translation-en2fa_en2fa_sahife_bleu":0.0785107337,"translation-en2fa_en2fa_nahj_bleu":0.0463016599,"translation-en2fa_en2fa_tep_bleu":0.0704424388,"nlg_score":0.1643361642}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1617787549,"translation-en2fa_en2fa_epoque_bleu":0.3821529147,"translation-en2fa_en2fa_mizan_bleu":0.1337537913,"translation-en2fa_en2fa_quran_bleu":0.0860909143,"translation-en2fa_en2fa_sahife_bleu":0.0770506908,"translation-en2fa_en2fa_nahj_bleu":0.0441728515,"translation-en2fa_en2fa_tep_bleu":0.0587014819,"nlg_score":0.164118288}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0472831089,"translation-en2fa_en2fa_epoque_bleu":0.0950858392,"translation-en2fa_en2fa_mizan_bleu":0.0348348322,"translation-en2fa_en2fa_quran_bleu":0.0417444578,"translation-en2fa_en2fa_sahife_bleu":0.044168541,"translation-en2fa_en2fa_nahj_bleu":0.0239185439,"translation-en2fa_en2fa_tep_bleu":0.0188699837,"nlg_score":0.0944140383}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2014428857,"translation-en2fa_en2fa_epoque_bleu":0.4710672433,"translation-en2fa_en2fa_mizan_bleu":0.1830885263,"translation-en2fa_en2fa_quran_bleu":0.1141518863,"translation-en2fa_en2fa_sahife_bleu":0.0806159411,"translation-en2fa_en2fa_nahj_bleu":0.0504089542,"translation-en2fa_en2fa_tep_bleu":0.0648627292,"nlg_score":0.1810678527}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1074044673,"translation-en2fa_en2fa_epoque_bleu":0.2689676347,"translation-en2fa_en2fa_mizan_bleu":0.0784179406,"translation-en2fa_en2fa_quran_bleu":0.0573255404,"translation-en2fa_en2fa_sahife_bleu":0.0534655564,"translation-en2fa_en2fa_nahj_bleu":0.0373749355,"translation-en2fa_en2fa_tep_bleu":0.0279497965,"nlg_score":0.1129755187}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1155717032,"translation-en2fa_en2fa_epoque_bleu":0.2832842339,"translation-en2fa_en2fa_mizan_bleu":0.0985255619,"translation-en2fa_en2fa_quran_bleu":0.0278717932,"translation-en2fa_en2fa_sahife_bleu":0.0436185703,"translation-en2fa_en2fa_nahj_bleu":0.0358442369,"translation-en2fa_en2fa_tep_bleu":0.0458078328,"nlg_score":0.1035446324}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1458447378,"translation-en2fa_en2fa_epoque_bleu":0.3541508677,"translation-en2fa_en2fa_mizan_bleu":0.1259468635,"translation-en2fa_en2fa_quran_bleu":0.0887225632,"translation-en2fa_en2fa_sahife_bleu":0.0672732746,"translation-en2fa_en2fa_nahj_bleu":0.0407327793,"translation-en2fa_en2fa_tep_bleu":0.0293172873,"nlg_score":0.1319091735}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1482164359,"translation-en2fa_en2fa_epoque_bleu":0.3332313032,"translation-en2fa_en2fa_mizan_bleu":0.1348649993,"translation-en2fa_en2fa_quran_bleu":0.0798910499,"translation-en2fa_en2fa_sahife_bleu":0.0724923326,"translation-en2fa_en2fa_nahj_bleu":0.0425031053,"translation-en2fa_en2fa_tep_bleu":0.0570157331,"nlg_score":0.1334687319}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.08817485,"translation-en2fa_en2fa_epoque_bleu":0.1886801725,"translation-en2fa_en2fa_mizan_bleu":0.0879987558,"translation-en2fa_en2fa_quran_bleu":0.0657922023,"translation-en2fa_en2fa_sahife_bleu":0.0296141618,"translation-en2fa_en2fa_nahj_bleu":0.0192266597,"translation-en2fa_en2fa_tep_bleu":0.0366296874,"nlg_score":0.1196804312}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0951102411,"translation-en2fa_en2fa_epoque_bleu":0.2204131973,"translation-en2fa_en2fa_mizan_bleu":0.0772021612,"translation-en2fa_en2fa_quran_bleu":0.0914129011,"translation-en2fa_en2fa_sahife_bleu":0.0555605793,"translation-en2fa_en2fa_nahj_bleu":0.0296371925,"translation-en2fa_en2fa_tep_bleu":0.0145962694,"nlg_score":0.1196400535}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1550276898,"translation-en2fa_en2fa_epoque_bleu":0.3721582216,"translation-en2fa_en2fa_mizan_bleu":0.1231599039,"translation-en2fa_en2fa_quran_bleu":0.0882213453,"translation-en2fa_en2fa_sahife_bleu":0.0725213197,"translation-en2fa_en2fa_nahj_bleu":0.0424186358,"translation-en2fa_en2fa_tep_bleu":0.0528718634,"nlg_score":0.1557270864}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2099911906,"translation-en2fa_en2fa_epoque_bleu":0.4805793807,"translation-en2fa_en2fa_mizan_bleu":0.1904867707,"translation-en2fa_en2fa_quran_bleu":0.1412389522,"translation-en2fa_en2fa_sahife_bleu":0.0861059288,"translation-en2fa_en2fa_nahj_bleu":0.0528683421,"translation-en2fa_en2fa_tep_bleu":0.0688528109,"nlg_score":0.18964968}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1799534349,"translation-en2fa_en2fa_epoque_bleu":0.4004213933,"translation-en2fa_en2fa_mizan_bleu":0.1703393716,"translation-en2fa_en2fa_quran_bleu":0.1225698669,"translation-en2fa_en2fa_sahife_bleu":0.0832764011,"translation-en2fa_en2fa_nahj_bleu":0.0439108113,"translation-en2fa_en2fa_tep_bleu":0.0595417592,"nlg_score":0.1779340777}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.164906114,"translation-en2fa_en2fa_epoque_bleu":0.3663484194,"translation-en2fa_en2fa_mizan_bleu":0.1564961694,"translation-en2fa_en2fa_quran_bleu":0.100726536,"translation-en2fa_en2fa_sahife_bleu":0.0786865541,"translation-en2fa_en2fa_nahj_bleu":0.045865321,"translation-en2fa_en2fa_tep_bleu":0.0567120347,"nlg_score":0.1430866672}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1732020657,"translation-en2fa_en2fa_epoque_bleu":0.3837758669,"translation-en2fa_en2fa_mizan_bleu":0.1680666593,"translation-en2fa_en2fa_quran_bleu":0.1005711034,"translation-en2fa_en2fa_sahife_bleu":0.0761731989,"translation-en2fa_en2fa_nahj_bleu":0.0436218334,"translation-en2fa_en2fa_tep_bleu":0.0641477759,"nlg_score":0.1538910531}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0366912467,"translation-en2fa_en2fa_epoque_bleu":0.0623359898,"translation-en2fa_en2fa_mizan_bleu":0.0442763597,"translation-en2fa_en2fa_quran_bleu":0.0309309044,"translation-en2fa_en2fa_sahife_bleu":0.0330663757,"translation-en2fa_en2fa_nahj_bleu":0.0124767847,"translation-en2fa_en2fa_tep_bleu":0.0116612774,"nlg_score":0.0949943578}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0012922175,"translation-en2fa_en2fa_epoque_bleu":0.0034993945,"translation-en2fa_en2fa_mizan_bleu":0.0,"translation-en2fa_en2fa_quran_bleu":0.0,"translation-en2fa_en2fa_sahife_bleu":0.0,"translation-en2fa_en2fa_nahj_bleu":0.0,"translation-en2fa_en2fa_tep_bleu":0.0020451278,"nlg_score":0.0509841903}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.056370201,"translation-en2fa_en2fa_epoque_bleu":0.1154392548,"translation-en2fa_en2fa_mizan_bleu":0.0484324583,"translation-en2fa_en2fa_quran_bleu":0.0612465488,"translation-en2fa_en2fa_sahife_bleu":0.0466818991,"translation-en2fa_en2fa_nahj_bleu":0.0218444477,"translation-en2fa_en2fa_tep_bleu":0.0118186665,"nlg_score":0.1089333827}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1974288311,"translation-en2fa_en2fa_epoque_bleu":0.4102902123,"translation-en2fa_en2fa_mizan_bleu":0.1898606624,"translation-en2fa_en2fa_quran_bleu":0.1638084791,"translation-en2fa_en2fa_sahife_bleu":0.1095493859,"translation-en2fa_en2fa_nahj_bleu":0.0487097316,"translation-en2fa_en2fa_tep_bleu":0.0737497745,"nlg_score":0.194675133}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0434570993,"translation-en2fa_en2fa_epoque_bleu":0.0864683098,"translation-en2fa_en2fa_mizan_bleu":0.0372126642,"translation-en2fa_en2fa_quran_bleu":0.0580568111,"translation-en2fa_en2fa_sahife_bleu":0.0311184796,"translation-en2fa_en2fa_nahj_bleu":0.0145031404,"translation-en2fa_en2fa_tep_bleu":0.0105327687,"nlg_score":0.1137933652}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1750457687,"translation-en2fa_en2fa_epoque_bleu":0.3740903807,"translation-en2fa_en2fa_mizan_bleu":0.1593083308,"translation-en2fa_en2fa_quran_bleu":0.1325582833,"translation-en2fa_en2fa_sahife_bleu":0.1002994879,"translation-en2fa_en2fa_nahj_bleu":0.0501235873,"translation-en2fa_en2fa_tep_bleu":0.0652393013,"nlg_score":0.1659339021}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.111865825,"translation-en2fa_en2fa_epoque_bleu":0.2732051575,"translation-en2fa_en2fa_mizan_bleu":0.0868500698,"translation-en2fa_en2fa_quran_bleu":0.0529852068,"translation-en2fa_en2fa_sahife_bleu":0.0502293343,"translation-en2fa_en2fa_nahj_bleu":0.0322064942,"translation-en2fa_en2fa_tep_bleu":0.0404507778,"nlg_score":0.112015688}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.173929966,"translation-en2fa_en2fa_epoque_bleu":0.3719958476,"translation-en2fa_en2fa_mizan_bleu":0.1618238918,"translation-en2fa_en2fa_quran_bleu":0.1364597091,"translation-en2fa_en2fa_sahife_bleu":0.0887355174,"translation-en2fa_en2fa_nahj_bleu":0.0458341529,"translation-en2fa_en2fa_tep_bleu":0.0650194522,"nlg_score":0.1631530657}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0438887609,"translation-en2fa_en2fa_epoque_bleu":0.0714603918,"translation-en2fa_en2fa_mizan_bleu":0.0595250407,"translation-en2fa_en2fa_quran_bleu":0.0428487402,"translation-en2fa_en2fa_sahife_bleu":0.0258372032,"translation-en2fa_en2fa_nahj_bleu":0.0133722454,"translation-en2fa_en2fa_tep_bleu":0.0142899909,"nlg_score":0.0934094344}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1625473606,"translation-en2fa_en2fa_epoque_bleu":0.3728148632,"translation-en2fa_en2fa_mizan_bleu":0.1426041304,"translation-en2fa_en2fa_quran_bleu":0.1020345657,"translation-en2fa_en2fa_sahife_bleu":0.0766062054,"translation-en2fa_en2fa_nahj_bleu":0.0472751057,"translation-en2fa_en2fa_tep_bleu":0.0539804032,"nlg_score":0.1417778788}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0373710986,"translation-en2fa_en2fa_epoque_bleu":0.0773774592,"translation-en2fa_en2fa_mizan_bleu":0.034234366,"translation-en2fa_en2fa_quran_bleu":0.0258474786,"translation-en2fa_en2fa_sahife_bleu":0.0240302635,"translation-en2fa_en2fa_nahj_bleu":0.0149718554,"translation-en2fa_en2fa_tep_bleu":0.0146400693,"nlg_score":0.0823387318}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1979467916,"translation-en2fa_en2fa_epoque_bleu":0.4460981632,"translation-en2fa_en2fa_mizan_bleu":0.1745376389,"translation-en2fa_en2fa_quran_bleu":0.137406774,"translation-en2fa_en2fa_sahife_bleu":0.091586235,"translation-en2fa_en2fa_nahj_bleu":0.0490159552,"translation-en2fa_en2fa_tep_bleu":0.072776086,"nlg_score":0.1901206806}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1217211215,"translation-en2fa_en2fa_epoque_bleu":0.2916268514,"translation-en2fa_en2fa_mizan_bleu":0.091925603,"translation-en2fa_en2fa_quran_bleu":0.065498518,"translation-en2fa_en2fa_sahife_bleu":0.0612237455,"translation-en2fa_en2fa_nahj_bleu":0.0385824628,"translation-en2fa_en2fa_tep_bleu":0.0453883692,"nlg_score":0.1389297212}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0439502467,"translation-en2fa_en2fa_epoque_bleu":0.0932804064,"translation-en2fa_en2fa_mizan_bleu":0.0446467932,"translation-en2fa_en2fa_quran_bleu":0.0435800727,"translation-en2fa_en2fa_sahife_bleu":0.0197005921,"translation-en2fa_en2fa_nahj_bleu":0.0132822652,"translation-en2fa_en2fa_tep_bleu":0.0087342692,"nlg_score":0.1067134448}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1757431213,"translation-en2fa_en2fa_epoque_bleu":0.3745398253,"translation-en2fa_en2fa_mizan_bleu":0.1640890656,"translation-en2fa_en2fa_quran_bleu":0.1377843747,"translation-en2fa_en2fa_sahife_bleu":0.0895949257,"translation-en2fa_en2fa_nahj_bleu":0.0437585905,"translation-en2fa_en2fa_tep_bleu":0.0679088622,"nlg_score":0.181552926}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0600629844,"translation-en2fa_en2fa_epoque_bleu":0.1538216141,"translation-en2fa_en2fa_mizan_bleu":0.0548001026,"translation-en2fa_en2fa_quran_bleu":0.0217436833,"translation-en2fa_en2fa_sahife_bleu":0.0205645274,"translation-en2fa_en2fa_nahj_bleu":0.0172427415,"translation-en2fa_en2fa_tep_bleu":0.0093260061,"nlg_score":0.0940241349}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2019260724,"translation-en2fa_en2fa_epoque_bleu":0.4752747269,"translation-en2fa_en2fa_mizan_bleu":0.165706346,"translation-en2fa_en2fa_quran_bleu":0.1194336982,"translation-en2fa_en2fa_sahife_bleu":0.0819129449,"translation-en2fa_en2fa_nahj_bleu":0.0545857968,"translation-en2fa_en2fa_tep_bleu":0.0782996247,"nlg_score":0.2010896964}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0460704645,"translation-en2fa_en2fa_epoque_bleu":0.1309820272,"translation-en2fa_en2fa_mizan_bleu":0.0316650952,"translation-en2fa_en2fa_quran_bleu":0.0134401079,"translation-en2fa_en2fa_sahife_bleu":0.0141114981,"translation-en2fa_en2fa_nahj_bleu":0.0127654414,"translation-en2fa_en2fa_tep_bleu":0.0065463218,"nlg_score":0.0682994522}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2018158808,"translation-en2fa_en2fa_epoque_bleu":0.4332944681,"translation-en2fa_en2fa_mizan_bleu":0.1925182751,"translation-en2fa_en2fa_quran_bleu":0.1530925462,"translation-en2fa_en2fa_sahife_bleu":0.1026499453,"translation-en2fa_en2fa_nahj_bleu":0.051968827,"translation-en2fa_en2fa_tep_bleu":0.0708487287,"nlg_score":0.1880477876}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1745395927,"translation-en2fa_en2fa_epoque_bleu":0.3494300897,"translation-en2fa_en2fa_mizan_bleu":0.1701139909,"translation-en2fa_en2fa_quran_bleu":0.1446863433,"translation-en2fa_en2fa_sahife_bleu":0.0993817781,"translation-en2fa_en2fa_nahj_bleu":0.0486286065,"translation-en2fa_en2fa_tep_bleu":0.0750157717,"nlg_score":0.1764906292}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/translation-fa2ar_fa2ar.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0761269879,"translation-fa2ar_fa2ar_nahj_bleu":0.0321440801,"translation-fa2ar_fa2ar_sahife_bleu":0.0613632957,"translation-fa2ar_fa2ar_quran_bleu":0.134873588,"nlg_score":0.1779340777}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0287298887,"translation-fa2ar_fa2ar_nahj_bleu":0.025061663,"translation-fa2ar_fa2ar_sahife_bleu":0.0355711393,"translation-fa2ar_fa2ar_quran_bleu":0.0255568639,"nlg_score":0.1334687319}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0067928767,"translation-fa2ar_fa2ar_nahj_bleu":0.0056689454,"translation-fa2ar_fa2ar_sahife_bleu":0.009024465,"translation-fa2ar_fa2ar_quran_bleu":0.0056852198,"nlg_score":0.0949943578}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0922998074,"translation-fa2ar_fa2ar_nahj_bleu":0.0511154919,"translation-fa2ar_fa2ar_sahife_bleu":0.0589808221,"translation-fa2ar_fa2ar_quran_bleu":0.1668031083,"nlg_score":0.1880477876}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0365793496,"translation-fa2ar_fa2ar_nahj_bleu":0.0236205074,"translation-fa2ar_fa2ar_sahife_bleu":0.0445733535,"translation-fa2ar_fa2ar_quran_bleu":0.0415441879,"nlg_score":0.1430866672}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0720575119,"translation-fa2ar_fa2ar_nahj_bleu":0.037394526,"translation-fa2ar_fa2ar_sahife_bleu":0.0636064419,"translation-fa2ar_fa2ar_quran_bleu":0.1151715676,"nlg_score":0.181552926}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0564204856,"translation-fa2ar_fa2ar_nahj_bleu":0.0345882932,"translation-fa2ar_fa2ar_sahife_bleu":0.0554604649,"translation-fa2ar_fa2ar_quran_bleu":0.0792126988,"nlg_score":0.1643361642}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0381651941,"translation-fa2ar_fa2ar_nahj_bleu":0.0246058927,"translation-fa2ar_fa2ar_sahife_bleu":0.0402564081,"translation-fa2ar_fa2ar_quran_bleu":0.0496332815,"nlg_score":0.1538910531}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0086214593,"translation-fa2ar_fa2ar_nahj_bleu":0.006894051,"translation-fa2ar_fa2ar_sahife_bleu":0.009695506,"translation-fa2ar_fa2ar_quran_bleu":0.0092748209,"nlg_score":0.0940241349}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0827618418,"translation-fa2ar_fa2ar_nahj_bleu":0.038434531,"translation-fa2ar_fa2ar_sahife_bleu":0.0781455938,"translation-fa2ar_fa2ar_quran_bleu":0.1317054007,"nlg_score":0.194675133}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0098333287,"translation-fa2ar_fa2ar_nahj_bleu":0.0072190824,"translation-fa2ar_fa2ar_sahife_bleu":0.0110570977,"translation-fa2ar_fa2ar_quran_bleu":0.0112238061,"nlg_score":0.1196804312}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0556821534,"translation-fa2ar_fa2ar_nahj_bleu":0.0341198889,"translation-fa2ar_fa2ar_sahife_bleu":0.0596078948,"translation-fa2ar_fa2ar_quran_bleu":0.0733186765,"nlg_score":0.1631530657}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0100630648,"translation-fa2ar_fa2ar_nahj_bleu":0.0071647909,"translation-fa2ar_fa2ar_sahife_bleu":0.0101185743,"translation-fa2ar_fa2ar_quran_bleu":0.0129058292,"nlg_score":0.1067134448}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0494411806,"translation-fa2ar_fa2ar_nahj_bleu":0.0369805868,"translation-fa2ar_fa2ar_sahife_bleu":0.0567654991,"translation-fa2ar_fa2ar_quran_bleu":0.0545774559,"nlg_score":0.16056333}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0507003531,"translation-fa2ar_fa2ar_nahj_bleu":0.0316047659,"translation-fa2ar_fa2ar_sahife_bleu":0.0534488007,"translation-fa2ar_fa2ar_quran_bleu":0.0670474926,"nlg_score":0.1679338638}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0548753777,"translation-fa2ar_fa2ar_nahj_bleu":0.0318327001,"translation-fa2ar_fa2ar_sahife_bleu":0.0567893259,"translation-fa2ar_fa2ar_quran_bleu":0.076004107,"nlg_score":0.1567965528}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0331262129,"translation-fa2ar_fa2ar_nahj_bleu":0.0202107323,"translation-fa2ar_fa2ar_sahife_bleu":0.0280883311,"translation-fa2ar_fa2ar_quran_bleu":0.0510795752,"nlg_score":0.1089333827}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0192357288,"translation-fa2ar_fa2ar_nahj_bleu":0.0151369319,"translation-fa2ar_fa2ar_sahife_bleu":0.0245784397,"translation-fa2ar_fa2ar_quran_bleu":0.0179918148,"nlg_score":0.1319091735}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0058610145,"translation-fa2ar_fa2ar_nahj_bleu":0.0063998692,"translation-fa2ar_fa2ar_sahife_bleu":0.0068172489,"translation-fa2ar_fa2ar_quran_bleu":0.0043275898,"nlg_score":0.112015688}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198691873,"translation-fa2ar_fa2ar_nahj_bleu":0.0113771734,"translation-fa2ar_fa2ar_sahife_bleu":0.0154846482,"translation-fa2ar_fa2ar_quran_bleu":0.0327457404,"nlg_score":0.0934094344}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0351351131,"translation-fa2ar_fa2ar_nahj_bleu":0.0313503027,"translation-fa2ar_fa2ar_sahife_bleu":0.042075565,"translation-fa2ar_fa2ar_quran_bleu":0.0319794715,"nlg_score":0.1389297212}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0045158138,"translation-fa2ar_fa2ar_nahj_bleu":0.004600061,"translation-fa2ar_fa2ar_sahife_bleu":0.0052362431,"translation-fa2ar_fa2ar_quran_bleu":0.0037111373,"nlg_score":0.0682994522}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0318976507,"translation-fa2ar_fa2ar_nahj_bleu":0.0222927973,"translation-fa2ar_fa2ar_sahife_bleu":0.0296757253,"translation-fa2ar_fa2ar_quran_bleu":0.0437244293,"nlg_score":0.1196400535}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0860361971,"translation-fa2ar_fa2ar_nahj_bleu":0.0440530096,"translation-fa2ar_fa2ar_sahife_bleu":0.0833828112,"translation-fa2ar_fa2ar_quran_bleu":0.1306727704,"nlg_score":0.2010896964}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0689994984,"translation-fa2ar_fa2ar_nahj_bleu":0.0397020785,"translation-fa2ar_fa2ar_sahife_bleu":0.0751264317,"translation-fa2ar_fa2ar_quran_bleu":0.092169985,"nlg_score":0.1901206806}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0848646543,"translation-fa2ar_fa2ar_nahj_bleu":0.0378800509,"translation-fa2ar_fa2ar_sahife_bleu":0.0810757988,"translation-fa2ar_fa2ar_quran_bleu":0.1356381134,"nlg_score":0.1764906292}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0652599666,"translation-fa2ar_fa2ar_nahj_bleu":0.0373134355,"translation-fa2ar_fa2ar_sahife_bleu":0.0688517527,"translation-fa2ar_fa2ar_quran_bleu":0.0896147118,"nlg_score":0.1810678527}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0235629204,"translation-fa2ar_fa2ar_nahj_bleu":0.0123939624,"translation-fa2ar_fa2ar_sahife_bleu":0.0198538447,"translation-fa2ar_fa2ar_quran_bleu":0.0384409541,"nlg_score":0.1137933652}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.079257203,"translation-fa2ar_fa2ar_nahj_bleu":0.0338415847,"translation-fa2ar_fa2ar_sahife_bleu":0.0570744002,"translation-fa2ar_fa2ar_quran_bleu":0.146855624,"nlg_score":0.178231145}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0382543341,"translation-fa2ar_fa2ar_nahj_bleu":0.032191006,"translation-fa2ar_fa2ar_sahife_bleu":0.028980881,"translation-fa2ar_fa2ar_quran_bleu":0.0535911152,"nlg_score":0.1368740087}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0341529902,"translation-fa2ar_fa2ar_nahj_bleu":0.0198079243,"translation-fa2ar_fa2ar_sahife_bleu":0.041930434,"translation-fa2ar_fa2ar_quran_bleu":0.0407206123,"nlg_score":0.1659339021}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0852951832,"translation-fa2ar_fa2ar_nahj_bleu":0.0464072569,"translation-fa2ar_fa2ar_sahife_bleu":0.0713426227,"translation-fa2ar_fa2ar_quran_bleu":0.1381356701,"nlg_score":0.1641995602}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0461577329,"translation-fa2ar_fa2ar_nahj_bleu":0.0338017443,"translation-fa2ar_fa2ar_sahife_bleu":0.0494633957,"translation-fa2ar_fa2ar_quran_bleu":0.0552080587,"nlg_score":0.1417778788}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0568324844,"translation-fa2ar_fa2ar_nahj_bleu":0.03267488,"translation-fa2ar_fa2ar_sahife_bleu":0.0579381183,"translation-fa2ar_fa2ar_quran_bleu":0.0798844549,"nlg_score":0.1665903777}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.031724694,"translation-fa2ar_fa2ar_nahj_bleu":0.0284960627,"translation-fa2ar_fa2ar_sahife_bleu":0.0368209736,"translation-fa2ar_fa2ar_quran_bleu":0.0298570456,"nlg_score":0.1324031203}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0423318046,"translation-fa2ar_fa2ar_nahj_bleu":0.0329089717,"translation-fa2ar_fa2ar_sahife_bleu":0.0445101244,"translation-fa2ar_fa2ar_quran_bleu":0.0495763178,"nlg_score":0.1557270864}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0167121698,"translation-fa2ar_fa2ar_nahj_bleu":0.0182214992,"translation-fa2ar_fa2ar_sahife_bleu":0.0203567578,"translation-fa2ar_fa2ar_quran_bleu":0.0115582526,"nlg_score":0.0944140383}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0656699851,"translation-fa2ar_fa2ar_nahj_bleu":0.0347167128,"translation-fa2ar_fa2ar_sahife_bleu":0.0732417084,"translation-fa2ar_fa2ar_quran_bleu":0.0890515341,"nlg_score":0.18964968}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198485582,"translation-fa2ar_fa2ar_nahj_bleu":0.0111873845,"translation-fa2ar_fa2ar_sahife_bleu":0.015856468,"translation-fa2ar_fa2ar_quran_bleu":0.032501822,"nlg_score":0.0880621978}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0483297895,"translation-fa2ar_fa2ar_nahj_bleu":0.0310247441,"translation-fa2ar_fa2ar_sahife_bleu":0.0512375201,"translation-fa2ar_fa2ar_quran_bleu":0.0627271043,"nlg_score":0.164118288}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0136530376,"translation-fa2ar_fa2ar_nahj_bleu":0.0110489285,"translation-fa2ar_fa2ar_sahife_bleu":0.0135009036,"translation-fa2ar_fa2ar_quran_bleu":0.0164092807,"nlg_score":0.1129755187}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0164489799,"translation-fa2ar_fa2ar_nahj_bleu":0.0152537955,"translation-fa2ar_fa2ar_sahife_bleu":0.0220286512,"translation-fa2ar_fa2ar_quran_bleu":0.012064493,"nlg_score":0.0823387318}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0179244872,"translation-fa2ar_fa2ar_nahj_bleu":0.0097189051,"translation-fa2ar_fa2ar_sahife_bleu":0.0109662672,"translation-fa2ar_fa2ar_quran_bleu":0.0330882891,"nlg_score":0.1151518212}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.006166788,"translation-fa2ar_fa2ar_nahj_bleu":0.0057707517,"translation-fa2ar_fa2ar_sahife_bleu":0.0059119596,"translation-fa2ar_fa2ar_quran_bleu":0.0068176525,"nlg_score":0.0509841903}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0568324844,"translation-fa2ar_fa2ar_nahj_bleu":0.03267488,"translation-fa2ar_fa2ar_sahife_bleu":0.0579381183,"translation-fa2ar_fa2ar_quran_bleu":0.0798844549,"nlg_score":0.1665903777}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0852951832,"translation-fa2ar_fa2ar_nahj_bleu":0.0464072569,"translation-fa2ar_fa2ar_sahife_bleu":0.0713426227,"translation-fa2ar_fa2ar_quran_bleu":0.1381356701,"nlg_score":0.1641995602}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.079257203,"translation-fa2ar_fa2ar_nahj_bleu":0.0338415847,"translation-fa2ar_fa2ar_sahife_bleu":0.0570744002,"translation-fa2ar_fa2ar_quran_bleu":0.146855624,"nlg_score":0.178231145}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198485582,"translation-fa2ar_fa2ar_nahj_bleu":0.0111873845,"translation-fa2ar_fa2ar_sahife_bleu":0.015856468,"translation-fa2ar_fa2ar_quran_bleu":0.032501822,"nlg_score":0.0880621978}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0548753777,"translation-fa2ar_fa2ar_nahj_bleu":0.0318327001,"translation-fa2ar_fa2ar_sahife_bleu":0.0567893259,"translation-fa2ar_fa2ar_quran_bleu":0.076004107,"nlg_score":0.1567965528}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0494411806,"translation-fa2ar_fa2ar_nahj_bleu":0.0369805868,"translation-fa2ar_fa2ar_sahife_bleu":0.0567654991,"translation-fa2ar_fa2ar_quran_bleu":0.0545774559,"nlg_score":0.16056333}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0507003531,"translation-fa2ar_fa2ar_nahj_bleu":0.0316047659,"translation-fa2ar_fa2ar_sahife_bleu":0.0534488007,"translation-fa2ar_fa2ar_quran_bleu":0.0670474926,"nlg_score":0.1679338638}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0382543341,"translation-fa2ar_fa2ar_nahj_bleu":0.032191006,"translation-fa2ar_fa2ar_sahife_bleu":0.028980881,"translation-fa2ar_fa2ar_quran_bleu":0.0535911152,"nlg_score":0.1368740087}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.031724694,"translation-fa2ar_fa2ar_nahj_bleu":0.0284960627,"translation-fa2ar_fa2ar_sahife_bleu":0.0368209736,"translation-fa2ar_fa2ar_quran_bleu":0.0298570456,"nlg_score":0.1324031203}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0179244872,"translation-fa2ar_fa2ar_nahj_bleu":0.0097189051,"translation-fa2ar_fa2ar_sahife_bleu":0.0109662672,"translation-fa2ar_fa2ar_quran_bleu":0.0330882891,"nlg_score":0.1151518212}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0564204856,"translation-fa2ar_fa2ar_nahj_bleu":0.0345882932,"translation-fa2ar_fa2ar_sahife_bleu":0.0554604649,"translation-fa2ar_fa2ar_quran_bleu":0.0792126988,"nlg_score":0.1643361642}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0483297895,"translation-fa2ar_fa2ar_nahj_bleu":0.0310247441,"translation-fa2ar_fa2ar_sahife_bleu":0.0512375201,"translation-fa2ar_fa2ar_quran_bleu":0.0627271043,"nlg_score":0.164118288}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0167121698,"translation-fa2ar_fa2ar_nahj_bleu":0.0182214992,"translation-fa2ar_fa2ar_sahife_bleu":0.0203567578,"translation-fa2ar_fa2ar_quran_bleu":0.0115582526,"nlg_score":0.0944140383}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0652599666,"translation-fa2ar_fa2ar_nahj_bleu":0.0373134355,"translation-fa2ar_fa2ar_sahife_bleu":0.0688517527,"translation-fa2ar_fa2ar_quran_bleu":0.0896147118,"nlg_score":0.1810678527}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0136530376,"translation-fa2ar_fa2ar_nahj_bleu":0.0110489285,"translation-fa2ar_fa2ar_sahife_bleu":0.0135009036,"translation-fa2ar_fa2ar_quran_bleu":0.0164092807,"nlg_score":0.1129755187}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0385165149,"translation-fa2ar_fa2ar_nahj_bleu":0.0272291934,"translation-fa2ar_fa2ar_sahife_bleu":0.0471613083,"translation-fa2ar_fa2ar_quran_bleu":0.0411590431,"nlg_score":0.1035446324}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0192357288,"translation-fa2ar_fa2ar_nahj_bleu":0.0151369319,"translation-fa2ar_fa2ar_sahife_bleu":0.0245784397,"translation-fa2ar_fa2ar_quran_bleu":0.0179918148,"nlg_score":0.1319091735}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0287298887,"translation-fa2ar_fa2ar_nahj_bleu":0.025061663,"translation-fa2ar_fa2ar_sahife_bleu":0.0355711393,"translation-fa2ar_fa2ar_quran_bleu":0.0255568639,"nlg_score":0.1334687319}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0098333287,"translation-fa2ar_fa2ar_nahj_bleu":0.0072190824,"translation-fa2ar_fa2ar_sahife_bleu":0.0110570977,"translation-fa2ar_fa2ar_quran_bleu":0.0112238061,"nlg_score":0.1196804312}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0318976507,"translation-fa2ar_fa2ar_nahj_bleu":0.0222927973,"translation-fa2ar_fa2ar_sahife_bleu":0.0296757253,"translation-fa2ar_fa2ar_quran_bleu":0.0437244293,"nlg_score":0.1196400535}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0423318046,"translation-fa2ar_fa2ar_nahj_bleu":0.0329089717,"translation-fa2ar_fa2ar_sahife_bleu":0.0445101244,"translation-fa2ar_fa2ar_quran_bleu":0.0495763178,"nlg_score":0.1557270864}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0656699851,"translation-fa2ar_fa2ar_nahj_bleu":0.0347167128,"translation-fa2ar_fa2ar_sahife_bleu":0.0732417084,"translation-fa2ar_fa2ar_quran_bleu":0.0890515341,"nlg_score":0.18964968}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0761269879,"translation-fa2ar_fa2ar_nahj_bleu":0.0321440801,"translation-fa2ar_fa2ar_sahife_bleu":0.0613632957,"translation-fa2ar_fa2ar_quran_bleu":0.134873588,"nlg_score":0.1779340777}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0365793496,"translation-fa2ar_fa2ar_nahj_bleu":0.0236205074,"translation-fa2ar_fa2ar_sahife_bleu":0.0445733535,"translation-fa2ar_fa2ar_quran_bleu":0.0415441879,"nlg_score":0.1430866672}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0381651941,"translation-fa2ar_fa2ar_nahj_bleu":0.0246058927,"translation-fa2ar_fa2ar_sahife_bleu":0.0402564081,"translation-fa2ar_fa2ar_quran_bleu":0.0496332815,"nlg_score":0.1538910531}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0067928767,"translation-fa2ar_fa2ar_nahj_bleu":0.0056689454,"translation-fa2ar_fa2ar_sahife_bleu":0.009024465,"translation-fa2ar_fa2ar_quran_bleu":0.0056852198,"nlg_score":0.0949943578}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.006166788,"translation-fa2ar_fa2ar_nahj_bleu":0.0057707517,"translation-fa2ar_fa2ar_sahife_bleu":0.0059119596,"translation-fa2ar_fa2ar_quran_bleu":0.0068176525,"nlg_score":0.0509841903}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0331262129,"translation-fa2ar_fa2ar_nahj_bleu":0.0202107323,"translation-fa2ar_fa2ar_sahife_bleu":0.0280883311,"translation-fa2ar_fa2ar_quran_bleu":0.0510795752,"nlg_score":0.1089333827}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0827618418,"translation-fa2ar_fa2ar_nahj_bleu":0.038434531,"translation-fa2ar_fa2ar_sahife_bleu":0.0781455938,"translation-fa2ar_fa2ar_quran_bleu":0.1317054007,"nlg_score":0.194675133}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0235629204,"translation-fa2ar_fa2ar_nahj_bleu":0.0123939624,"translation-fa2ar_fa2ar_sahife_bleu":0.0198538447,"translation-fa2ar_fa2ar_quran_bleu":0.0384409541,"nlg_score":0.1137933652}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0341529902,"translation-fa2ar_fa2ar_nahj_bleu":0.0198079243,"translation-fa2ar_fa2ar_sahife_bleu":0.041930434,"translation-fa2ar_fa2ar_quran_bleu":0.0407206123,"nlg_score":0.1659339021}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0058610145,"translation-fa2ar_fa2ar_nahj_bleu":0.0063998692,"translation-fa2ar_fa2ar_sahife_bleu":0.0068172489,"translation-fa2ar_fa2ar_quran_bleu":0.0043275898,"nlg_score":0.112015688}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0556821534,"translation-fa2ar_fa2ar_nahj_bleu":0.0341198889,"translation-fa2ar_fa2ar_sahife_bleu":0.0596078948,"translation-fa2ar_fa2ar_quran_bleu":0.0733186765,"nlg_score":0.1631530657}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198691873,"translation-fa2ar_fa2ar_nahj_bleu":0.0113771734,"translation-fa2ar_fa2ar_sahife_bleu":0.0154846482,"translation-fa2ar_fa2ar_quran_bleu":0.0327457404,"nlg_score":0.0934094344}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0461577329,"translation-fa2ar_fa2ar_nahj_bleu":0.0338017443,"translation-fa2ar_fa2ar_sahife_bleu":0.0494633957,"translation-fa2ar_fa2ar_quran_bleu":0.0552080587,"nlg_score":0.1417778788}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0164489799,"translation-fa2ar_fa2ar_nahj_bleu":0.0152537955,"translation-fa2ar_fa2ar_sahife_bleu":0.0220286512,"translation-fa2ar_fa2ar_quran_bleu":0.012064493,"nlg_score":0.0823387318}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0689994984,"translation-fa2ar_fa2ar_nahj_bleu":0.0397020785,"translation-fa2ar_fa2ar_sahife_bleu":0.0751264317,"translation-fa2ar_fa2ar_quran_bleu":0.092169985,"nlg_score":0.1901206806}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0351351131,"translation-fa2ar_fa2ar_nahj_bleu":0.0313503027,"translation-fa2ar_fa2ar_sahife_bleu":0.042075565,"translation-fa2ar_fa2ar_quran_bleu":0.0319794715,"nlg_score":0.1389297212}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0100630648,"translation-fa2ar_fa2ar_nahj_bleu":0.0071647909,"translation-fa2ar_fa2ar_sahife_bleu":0.0101185743,"translation-fa2ar_fa2ar_quran_bleu":0.0129058292,"nlg_score":0.1067134448}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0720575119,"translation-fa2ar_fa2ar_nahj_bleu":0.037394526,"translation-fa2ar_fa2ar_sahife_bleu":0.0636064419,"translation-fa2ar_fa2ar_quran_bleu":0.1151715676,"nlg_score":0.181552926}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0086214593,"translation-fa2ar_fa2ar_nahj_bleu":0.006894051,"translation-fa2ar_fa2ar_sahife_bleu":0.009695506,"translation-fa2ar_fa2ar_quran_bleu":0.0092748209,"nlg_score":0.0940241349}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0860361971,"translation-fa2ar_fa2ar_nahj_bleu":0.0440530096,"translation-fa2ar_fa2ar_sahife_bleu":0.0833828112,"translation-fa2ar_fa2ar_quran_bleu":0.1306727704,"nlg_score":0.2010896964}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0045158138,"translation-fa2ar_fa2ar_nahj_bleu":0.004600061,"translation-fa2ar_fa2ar_sahife_bleu":0.0052362431,"translation-fa2ar_fa2ar_quran_bleu":0.0037111373,"nlg_score":0.0682994522}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0922998074,"translation-fa2ar_fa2ar_nahj_bleu":0.0511154919,"translation-fa2ar_fa2ar_sahife_bleu":0.0589808221,"translation-fa2ar_fa2ar_quran_bleu":0.1668031083,"nlg_score":0.1880477876}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0848646543,"translation-fa2ar_fa2ar_nahj_bleu":0.0378800509,"translation-fa2ar_fa2ar_sahife_bleu":0.0810757988,"translation-fa2ar_fa2ar_quran_bleu":0.1356381134,"nlg_score":0.1764906292}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/boards_data/translation-fa2en_fa2en.jsonl CHANGED
@@ -1,44 +1,45 @@
1
- {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2247897554,"translation-fa2en_fa2en_tep_bleu":0.1341840946,"translation-fa2en_fa2en_mizan_bleu":0.1909021288,"translation-fa2en_fa2en_quran_bleu":0.1740971535,"translation-fa2en_fa2en_epoque_bleu":0.4544315204,"translation-fa2en_fa2en_nahj_bleu":0.0877235615,"translation-fa2en_fa2en_sahife_bleu":0.0975791022,"nlg_score":0.1779340777}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1811060704,"translation-fa2en_fa2en_tep_bleu":0.1180786789,"translation-fa2en_fa2en_mizan_bleu":0.1503794353,"translation-fa2en_fa2en_quran_bleu":0.1042682142,"translation-fa2en_fa2en_epoque_bleu":0.3794274854,"translation-fa2en_fa2en_nahj_bleu":0.0641545233,"translation-fa2en_fa2en_sahife_bleu":0.0772362522,"nlg_score":0.1334687319}
3
- {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.016856047,"translation-fa2en_fa2en_tep_bleu":0.0085125001,"translation-fa2en_fa2en_mizan_bleu":0.013661635,"translation-fa2en_fa2en_quran_bleu":0.0181666202,"translation-fa2en_fa2en_epoque_bleu":0.0301282339,"translation-fa2en_fa2en_nahj_bleu":0.0122360126,"translation-fa2en_fa2en_sahife_bleu":0.0110323989,"nlg_score":0.0949943578}
4
- {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2337569687,"translation-fa2en_fa2en_tep_bleu":0.1386371644,"translation-fa2en_fa2en_mizan_bleu":0.2129637469,"translation-fa2en_fa2en_quran_bleu":0.1702102457,"translation-fa2en_fa2en_epoque_bleu":0.478211182,"translation-fa2en_fa2en_nahj_bleu":0.083013513,"translation-fa2en_fa2en_sahife_bleu":0.072000292,"nlg_score":0.1880477876}
 
 
 
5
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1943596188,"translation-fa2en_fa2en_tep_bleu":0.1061304132,"translation-fa2en_fa2en_mizan_bleu":0.1627839168,"translation-fa2en_fa2en_quran_bleu":0.1169494078,"translation-fa2en_fa2en_epoque_bleu":0.4173115022,"translation-fa2en_fa2en_nahj_bleu":0.081839623,"translation-fa2en_fa2en_sahife_bleu":0.0756905933,"nlg_score":0.1430866672}
6
- {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2435498156,"translation-fa2en_fa2en_tep_bleu":0.1656898075,"translation-fa2en_fa2en_mizan_bleu":0.2055420364,"translation-fa2en_fa2en_quran_bleu":0.1726910304,"translation-fa2en_fa2en_epoque_bleu":0.4912890145,"translation-fa2en_fa2en_nahj_bleu":0.0882784037,"translation-fa2en_fa2en_sahife_bleu":0.0952319793,"nlg_score":0.181552926}
7
- {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2293783795,"translation-fa2en_fa2en_tep_bleu":0.1348246647,"translation-fa2en_fa2en_mizan_bleu":0.1880942935,"translation-fa2en_fa2en_quran_bleu":0.1642751236,"translation-fa2en_fa2en_epoque_bleu":0.4821448205,"translation-fa2en_fa2en_nahj_bleu":0.0857659109,"translation-fa2en_fa2en_sahife_bleu":0.0914041173,"nlg_score":0.1643361642}
8
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2010422229,"translation-fa2en_fa2en_tep_bleu":0.1296290178,"translation-fa2en_fa2en_mizan_bleu":0.1687085372,"translation-fa2en_fa2en_quran_bleu":0.1258778791,"translation-fa2en_fa2en_epoque_bleu":0.4180918256,"translation-fa2en_fa2en_nahj_bleu":0.0766886466,"translation-fa2en_fa2en_sahife_bleu":0.07624077,"nlg_score":0.1538910531}
9
- {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0196181945,"translation-fa2en_fa2en_tep_bleu":0.0101636027,"translation-fa2en_fa2en_mizan_bleu":0.0153753718,"translation-fa2en_fa2en_quran_bleu":0.0231110679,"translation-fa2en_fa2en_epoque_bleu":0.0359429205,"translation-fa2en_fa2en_nahj_bleu":0.0119451943,"translation-fa2en_fa2en_sahife_bleu":0.0117936527,"nlg_score":0.0940241349}
10
- {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2307102128,"translation-fa2en_fa2en_tep_bleu":0.1527807458,"translation-fa2en_fa2en_mizan_bleu":0.1927067243,"translation-fa2en_fa2en_quran_bleu":0.1628198329,"translation-fa2en_fa2en_epoque_bleu":0.4676472481,"translation-fa2en_fa2en_nahj_bleu":0.0810494281,"translation-fa2en_fa2en_sahife_bleu":0.1009417344,"nlg_score":0.194675133}
11
- {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0257184881,"translation-fa2en_fa2en_tep_bleu":0.011593122,"translation-fa2en_fa2en_mizan_bleu":0.0215328963,"translation-fa2en_fa2en_quran_bleu":0.0262056878,"translation-fa2en_fa2en_epoque_bleu":0.047221295,"translation-fa2en_fa2en_nahj_bleu":0.0178557856,"translation-fa2en_fa2en_sahife_bleu":0.0169922826,"nlg_score":0.1196804312}
12
- {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.243332739,"translation-fa2en_fa2en_tep_bleu":0.1612382623,"translation-fa2en_fa2en_mizan_bleu":0.2044715619,"translation-fa2en_fa2en_quran_bleu":0.1659066062,"translation-fa2en_fa2en_epoque_bleu":0.4955763662,"translation-fa2en_fa2en_nahj_bleu":0.0879393077,"translation-fa2en_fa2en_sahife_bleu":0.1007280453,"nlg_score":0.1631530657}
13
- {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0199585579,"translation-fa2en_fa2en_tep_bleu":0.0097804397,"translation-fa2en_fa2en_mizan_bleu":0.0144809896,"translation-fa2en_fa2en_quran_bleu":0.0259691427,"translation-fa2en_fa2en_epoque_bleu":0.0345304173,"translation-fa2en_fa2en_nahj_bleu":0.0150589625,"translation-fa2en_fa2en_sahife_bleu":0.0157047184,"nlg_score":0.1067134448}
14
- {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2145488085,"translation-fa2en_fa2en_tep_bleu":0.1307272464,"translation-fa2en_fa2en_mizan_bleu":0.1697754862,"translation-fa2en_fa2en_quran_bleu":0.1552415558,"translation-fa2en_fa2en_epoque_bleu":0.4513682579,"translation-fa2en_fa2en_nahj_bleu":0.0842673472,"translation-fa2en_fa2en_sahife_bleu":0.0853787118,"nlg_score":0.16056333}
15
- {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.217991447,"translation-fa2en_fa2en_tep_bleu":0.1271542443,"translation-fa2en_fa2en_mizan_bleu":0.1728081337,"translation-fa2en_fa2en_quran_bleu":0.158860515,"translation-fa2en_fa2en_epoque_bleu":0.4572670962,"translation-fa2en_fa2en_nahj_bleu":0.0902445729,"translation-fa2en_fa2en_sahife_bleu":0.0945000287,"nlg_score":0.1679338638}
16
- {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1909462413,"translation-fa2en_fa2en_tep_bleu":0.1038996524,"translation-fa2en_fa2en_mizan_bleu":0.1513900262,"translation-fa2en_fa2en_quran_bleu":0.129609905,"translation-fa2en_fa2en_epoque_bleu":0.4266734606,"translation-fa2en_fa2en_nahj_bleu":0.0619630431,"translation-fa2en_fa2en_sahife_bleu":0.0584029483,"nlg_score":0.1567965528}
17
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0691353117,"translation-fa2en_fa2en_tep_bleu":0.0320908261,"translation-fa2en_fa2en_mizan_bleu":0.0535229905,"translation-fa2en_fa2en_quran_bleu":0.0800143919,"translation-fa2en_fa2en_epoque_bleu":0.133977443,"translation-fa2en_fa2en_nahj_bleu":0.0362958954,"translation-fa2en_fa2en_sahife_bleu":0.0393317574,"nlg_score":0.1089333827}
18
- {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1451163884,"translation-fa2en_fa2en_tep_bleu":0.0393307601,"translation-fa2en_fa2en_mizan_bleu":0.1009347025,"translation-fa2en_fa2en_quran_bleu":0.0929688918,"translation-fa2en_fa2en_epoque_bleu":0.3660914464,"translation-fa2en_fa2en_nahj_bleu":0.0536507876,"translation-fa2en_fa2en_sahife_bleu":0.05038339,"nlg_score":0.1319091735}
 
 
19
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1249013271,"translation-fa2en_fa2en_tep_bleu":0.0564543942,"translation-fa2en_fa2en_mizan_bleu":0.0739643668,"translation-fa2en_fa2en_quran_bleu":0.0677317381,"translation-fa2en_fa2en_epoque_bleu":0.3111968032,"translation-fa2en_fa2en_nahj_bleu":0.0523541092,"translation-fa2en_fa2en_sahife_bleu":0.0479821907,"nlg_score":0.112015688}
 
20
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0450244679,"translation-fa2en_fa2en_tep_bleu":0.0166138322,"translation-fa2en_fa2en_mizan_bleu":0.0478141187,"translation-fa2en_fa2en_quran_bleu":0.0426202225,"translation-fa2en_fa2en_epoque_bleu":0.0802277942,"translation-fa2en_fa2en_nahj_bleu":0.0252662094,"translation-fa2en_fa2en_sahife_bleu":0.0268950031,"nlg_score":0.0934094344}
 
 
 
21
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1840809218,"translation-fa2en_fa2en_tep_bleu":0.1011436783,"translation-fa2en_fa2en_mizan_bleu":0.149157222,"translation-fa2en_fa2en_quran_bleu":0.1377761662,"translation-fa2en_fa2en_epoque_bleu":0.3802946233,"translation-fa2en_fa2en_nahj_bleu":0.0851756367,"translation-fa2en_fa2en_sahife_bleu":0.0857201524,"nlg_score":0.1389297212}
22
- {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0146059874,"translation-fa2en_fa2en_tep_bleu":0.0065306354,"translation-fa2en_fa2en_mizan_bleu":0.0119363121,"translation-fa2en_fa2en_quran_bleu":0.0152281808,"translation-fa2en_fa2en_epoque_bleu":0.0274143056,"translation-fa2en_fa2en_nahj_bleu":0.0094070307,"translation-fa2en_fa2en_sahife_bleu":0.0093811964,"nlg_score":0.0682994522}
23
- {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.14443262,"translation-fa2en_fa2en_tep_bleu":0.0636878051,"translation-fa2en_fa2en_mizan_bleu":0.1045784226,"translation-fa2en_fa2en_quran_bleu":0.1065169191,"translation-fa2en_fa2en_epoque_bleu":0.3331896819,"translation-fa2en_fa2en_nahj_bleu":0.0573420672,"translation-fa2en_fa2en_sahife_bleu":0.0526154809,"nlg_score":0.1196400535}
 
24
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2559078555,"translation-fa2en_fa2en_tep_bleu":0.1687480056,"translation-fa2en_fa2en_mizan_bleu":0.2113676707,"translation-fa2en_fa2en_quran_bleu":0.2008290856,"translation-fa2en_fa2en_epoque_bleu":0.5099219192,"translation-fa2en_fa2en_nahj_bleu":0.0984185664,"translation-fa2en_fa2en_sahife_bleu":0.1125739279,"nlg_score":0.2010896964}
25
- {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2389011537,"translation-fa2en_fa2en_tep_bleu":0.1431825698,"translation-fa2en_fa2en_mizan_bleu":0.2056729072,"translation-fa2en_fa2en_quran_bleu":0.1776018574,"translation-fa2en_fa2en_epoque_bleu":0.4842161688,"translation-fa2en_fa2en_nahj_bleu":0.0886384727,"translation-fa2en_fa2en_sahife_bleu":0.1045044839,"nlg_score":0.1901206806}
 
26
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2370270006,"translation-fa2en_fa2en_tep_bleu":0.1858400323,"translation-fa2en_fa2en_mizan_bleu":0.2008132758,"translation-fa2en_fa2en_quran_bleu":0.1727292787,"translation-fa2en_fa2en_epoque_bleu":0.4507197199,"translation-fa2en_fa2en_nahj_bleu":0.0893284136,"translation-fa2en_fa2en_sahife_bleu":0.1038607373,"nlg_score":0.1764906292}
27
- {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2332592983,"translation-fa2en_fa2en_tep_bleu":0.1497847918,"translation-fa2en_fa2en_mizan_bleu":0.1972270386,"translation-fa2en_fa2en_quran_bleu":0.1725699648,"translation-fa2en_fa2en_epoque_bleu":0.4678973942,"translation-fa2en_fa2en_nahj_bleu":0.090543674,"translation-fa2en_fa2en_sahife_bleu":0.1008380909,"nlg_score":0.1810678527}
28
- {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0486479291,"translation-fa2en_fa2en_tep_bleu":0.023223206,"translation-fa2en_fa2en_mizan_bleu":0.0397123038,"translation-fa2en_fa2en_quran_bleu":0.0471874873,"translation-fa2en_fa2en_epoque_bleu":0.0972392875,"translation-fa2en_fa2en_nahj_bleu":0.0246695639,"translation-fa2en_fa2en_sahife_bleu":0.0238899949,"nlg_score":0.1137933652}
29
- {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0757086487,"translation-fa2en_fa2en_tep_bleu":0.0316922994,"translation-fa2en_fa2en_mizan_bleu":0.0530331645,"translation-fa2en_fa2en_quran_bleu":0.1028139165,"translation-fa2en_fa2en_epoque_bleu":0.157367237,"translation-fa2en_fa2en_nahj_bleu":0.0336372263,"translation-fa2en_fa2en_sahife_bleu":0.0279485156,"nlg_score":0.178231145}
30
- {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0559484689,"translation-fa2en_fa2en_tep_bleu":0.0190401646,"translation-fa2en_fa2en_mizan_bleu":0.0369126121,"translation-fa2en_fa2en_quran_bleu":0.0401048971,"translation-fa2en_fa2en_epoque_bleu":0.1381975553,"translation-fa2en_fa2en_nahj_bleu":0.0232788817,"translation-fa2en_fa2en_sahife_bleu":0.017477039,"nlg_score":0.1368740087}
31
- {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0858473556,"translation-fa2en_fa2en_tep_bleu":0.0381837678,"translation-fa2en_fa2en_mizan_bleu":0.0750320212,"translation-fa2en_fa2en_quran_bleu":0.0986486354,"translation-fa2en_fa2en_epoque_bleu":0.1513689047,"translation-fa2en_fa2en_nahj_bleu":0.0568182224,"translation-fa2en_fa2en_sahife_bleu":0.0570620784,"nlg_score":0.1659339021}
32
- {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1892370035,"translation-fa2en_fa2en_tep_bleu":0.1290684643,"translation-fa2en_fa2en_mizan_bleu":0.1721408901,"translation-fa2en_fa2en_quran_bleu":0.1736791408,"translation-fa2en_fa2en_epoque_bleu":0.346100597,"translation-fa2en_fa2en_nahj_bleu":0.0776400174,"translation-fa2en_fa2en_sahife_bleu":0.08279759,"nlg_score":0.1641995602}
33
- {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2098706917,"translation-fa2en_fa2en_tep_bleu":0.1127439575,"translation-fa2en_fa2en_mizan_bleu":0.1700828916,"translation-fa2en_fa2en_quran_bleu":0.1573224172,"translation-fa2en_fa2en_epoque_bleu":0.4464135788,"translation-fa2en_fa2en_nahj_bleu":0.080502837,"translation-fa2en_fa2en_sahife_bleu":0.0868680665,"nlg_score":0.1417778788}
34
- {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2165819036,"translation-fa2en_fa2en_tep_bleu":0.13491043,"translation-fa2en_fa2en_mizan_bleu":0.1810957829,"translation-fa2en_fa2en_quran_bleu":0.164168601,"translation-fa2en_fa2en_epoque_bleu":0.4383628208,"translation-fa2en_fa2en_nahj_bleu":0.0942939662,"translation-fa2en_fa2en_sahife_bleu":0.0827637394,"nlg_score":0.1665903777}
35
- {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1819249869,"translation-fa2en_fa2en_tep_bleu":0.1112770149,"translation-fa2en_fa2en_mizan_bleu":0.1462998812,"translation-fa2en_fa2en_quran_bleu":0.1190152961,"translation-fa2en_fa2en_epoque_bleu":0.3772244447,"translation-fa2en_fa2en_nahj_bleu":0.0773225294,"translation-fa2en_fa2en_sahife_bleu":0.086248601,"nlg_score":0.1324031203}
36
- {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2024225184,"translation-fa2en_fa2en_tep_bleu":0.1163127945,"translation-fa2en_fa2en_mizan_bleu":0.1649009947,"translation-fa2en_fa2en_quran_bleu":0.1513328968,"translation-fa2en_fa2en_epoque_bleu":0.4171232399,"translation-fa2en_fa2en_nahj_bleu":0.0857999462,"translation-fa2en_fa2en_sahife_bleu":0.0929479364,"nlg_score":0.1557270864}
37
- {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0901939948,"translation-fa2en_fa2en_tep_bleu":0.0521908916,"translation-fa2en_fa2en_mizan_bleu":0.0828690879,"translation-fa2en_fa2en_quran_bleu":0.0756298248,"translation-fa2en_fa2en_epoque_bleu":0.1645619674,"translation-fa2en_fa2en_nahj_bleu":0.048616237,"translation-fa2en_fa2en_sahife_bleu":0.0518842318,"nlg_score":0.0944140383}
38
- {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.234039473,"translation-fa2en_fa2en_tep_bleu":0.1597644653,"translation-fa2en_fa2en_mizan_bleu":0.1946759365,"translation-fa2en_fa2en_quran_bleu":0.1638938233,"translation-fa2en_fa2en_epoque_bleu":0.474760879,"translation-fa2en_fa2en_nahj_bleu":0.0825458621,"translation-fa2en_fa2en_sahife_bleu":0.0952634494,"nlg_score":0.18964968}
39
- {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0414094379,"translation-fa2en_fa2en_tep_bleu":0.019539618,"translation-fa2en_fa2en_mizan_bleu":0.0346087447,"translation-fa2en_fa2en_quran_bleu":0.0396858881,"translation-fa2en_fa2en_epoque_bleu":0.0798341141,"translation-fa2en_fa2en_nahj_bleu":0.0244191809,"translation-fa2en_fa2en_sahife_bleu":0.0231626908,"nlg_score":0.0880621978}
40
- {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2177785793,"translation-fa2en_fa2en_tep_bleu":0.1189948472,"translation-fa2en_fa2en_mizan_bleu":0.1793626928,"translation-fa2en_fa2en_quran_bleu":0.1718006478,"translation-fa2en_fa2en_epoque_bleu":0.4500382308,"translation-fa2en_fa2en_nahj_bleu":0.0836776138,"translation-fa2en_fa2en_sahife_bleu":0.1034067477,"nlg_score":0.164118288}
41
- {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0862123314,"translation-fa2en_fa2en_tep_bleu":0.0334491641,"translation-fa2en_fa2en_mizan_bleu":0.0758837027,"translation-fa2en_fa2en_quran_bleu":0.0892296624,"translation-fa2en_fa2en_epoque_bleu":0.1688644918,"translation-fa2en_fa2en_nahj_bleu":0.042819328,"translation-fa2en_fa2en_sahife_bleu":0.0473482715,"nlg_score":0.1129755187}
42
- {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0423299736,"translation-fa2en_fa2en_tep_bleu":0.0124774953,"translation-fa2en_fa2en_mizan_bleu":0.0314077643,"translation-fa2en_fa2en_quran_bleu":0.0294898862,"translation-fa2en_fa2en_epoque_bleu":0.1006673489,"translation-fa2en_fa2en_nahj_bleu":0.0117672852,"translation-fa2en_fa2en_sahife_bleu":0.0246608556,"nlg_score":0.0823387318}
43
- {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0349431803,"translation-fa2en_fa2en_tep_bleu":0.017140489,"translation-fa2en_fa2en_mizan_bleu":0.0284546797,"translation-fa2en_fa2en_quran_bleu":0.0300397279,"translation-fa2en_fa2en_epoque_bleu":0.0720425155,"translation-fa2en_fa2en_nahj_bleu":0.0166649152,"translation-fa2en_fa2en_sahife_bleu":0.0158679919,"nlg_score":0.1151518212}
44
- {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0382934287,"translation-fa2en_fa2en_tep_bleu":0.0179577911,"translation-fa2en_fa2en_mizan_bleu":0.0358622368,"translation-fa2en_fa2en_quran_bleu":0.0370982403,"translation-fa2en_fa2en_epoque_bleu":0.0649389754,"translation-fa2en_fa2en_nahj_bleu":0.0222483795,"translation-fa2en_fa2en_sahife_bleu":0.0341438816,"nlg_score":0.0509841903}
 
1
+ {"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2165819036,"translation-fa2en_fa2en_tep_bleu":0.13491043,"translation-fa2en_fa2en_mizan_bleu":0.1810957829,"translation-fa2en_fa2en_quran_bleu":0.164168601,"translation-fa2en_fa2en_epoque_bleu":0.4383628208,"translation-fa2en_fa2en_nahj_bleu":0.0942939662,"translation-fa2en_fa2en_sahife_bleu":0.0827637394,"nlg_score":0.1665903777}
2
+ {"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1892370035,"translation-fa2en_fa2en_tep_bleu":0.1290684643,"translation-fa2en_fa2en_mizan_bleu":0.1721408901,"translation-fa2en_fa2en_quran_bleu":0.1736791408,"translation-fa2en_fa2en_epoque_bleu":0.346100597,"translation-fa2en_fa2en_nahj_bleu":0.0776400174,"translation-fa2en_fa2en_sahife_bleu":0.08279759,"nlg_score":0.1641995602}
3
+ {"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0757086487,"translation-fa2en_fa2en_tep_bleu":0.0316922994,"translation-fa2en_fa2en_mizan_bleu":0.0530331645,"translation-fa2en_fa2en_quran_bleu":0.1028139165,"translation-fa2en_fa2en_epoque_bleu":0.157367237,"translation-fa2en_fa2en_nahj_bleu":0.0336372263,"translation-fa2en_fa2en_sahife_bleu":0.0279485156,"nlg_score":0.178231145}
4
+ {"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0414094379,"translation-fa2en_fa2en_tep_bleu":0.019539618,"translation-fa2en_fa2en_mizan_bleu":0.0346087447,"translation-fa2en_fa2en_quran_bleu":0.0396858881,"translation-fa2en_fa2en_epoque_bleu":0.0798341141,"translation-fa2en_fa2en_nahj_bleu":0.0244191809,"translation-fa2en_fa2en_sahife_bleu":0.0231626908,"nlg_score":0.0880621978}
5
+ {"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1909462413,"translation-fa2en_fa2en_tep_bleu":0.1038996524,"translation-fa2en_fa2en_mizan_bleu":0.1513900262,"translation-fa2en_fa2en_quran_bleu":0.129609905,"translation-fa2en_fa2en_epoque_bleu":0.4266734606,"translation-fa2en_fa2en_nahj_bleu":0.0619630431,"translation-fa2en_fa2en_sahife_bleu":0.0584029483,"nlg_score":0.1567965528}
6
+ {"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2145488085,"translation-fa2en_fa2en_tep_bleu":0.1307272464,"translation-fa2en_fa2en_mizan_bleu":0.1697754862,"translation-fa2en_fa2en_quran_bleu":0.1552415558,"translation-fa2en_fa2en_epoque_bleu":0.4513682579,"translation-fa2en_fa2en_nahj_bleu":0.0842673472,"translation-fa2en_fa2en_sahife_bleu":0.0853787118,"nlg_score":0.16056333}
7
+ {"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.217991447,"translation-fa2en_fa2en_tep_bleu":0.1271542443,"translation-fa2en_fa2en_mizan_bleu":0.1728081337,"translation-fa2en_fa2en_quran_bleu":0.158860515,"translation-fa2en_fa2en_epoque_bleu":0.4572670962,"translation-fa2en_fa2en_nahj_bleu":0.0902445729,"translation-fa2en_fa2en_sahife_bleu":0.0945000287,"nlg_score":0.1679338638}
8
+ {"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0559484689,"translation-fa2en_fa2en_tep_bleu":0.0190401646,"translation-fa2en_fa2en_mizan_bleu":0.0369126121,"translation-fa2en_fa2en_quran_bleu":0.0401048971,"translation-fa2en_fa2en_epoque_bleu":0.1381975553,"translation-fa2en_fa2en_nahj_bleu":0.0232788817,"translation-fa2en_fa2en_sahife_bleu":0.017477039,"nlg_score":0.1368740087}
9
+ {"Model Name":"gpt-oss-20b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1819249869,"translation-fa2en_fa2en_tep_bleu":0.1112770149,"translation-fa2en_fa2en_mizan_bleu":0.1462998812,"translation-fa2en_fa2en_quran_bleu":0.1190152961,"translation-fa2en_fa2en_epoque_bleu":0.3772244447,"translation-fa2en_fa2en_nahj_bleu":0.0773225294,"translation-fa2en_fa2en_sahife_bleu":0.086248601,"nlg_score":0.1324031203}
10
+ {"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0349431803,"translation-fa2en_fa2en_tep_bleu":0.017140489,"translation-fa2en_fa2en_mizan_bleu":0.0284546797,"translation-fa2en_fa2en_quran_bleu":0.0300397279,"translation-fa2en_fa2en_epoque_bleu":0.0720425155,"translation-fa2en_fa2en_nahj_bleu":0.0166649152,"translation-fa2en_fa2en_sahife_bleu":0.0158679919,"nlg_score":0.1151518212}
11
+ {"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2293783795,"translation-fa2en_fa2en_tep_bleu":0.1348246647,"translation-fa2en_fa2en_mizan_bleu":0.1880942935,"translation-fa2en_fa2en_quran_bleu":0.1642751236,"translation-fa2en_fa2en_epoque_bleu":0.4821448205,"translation-fa2en_fa2en_nahj_bleu":0.0857659109,"translation-fa2en_fa2en_sahife_bleu":0.0914041173,"nlg_score":0.1643361642}
12
+ {"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2177785793,"translation-fa2en_fa2en_tep_bleu":0.1189948472,"translation-fa2en_fa2en_mizan_bleu":0.1793626928,"translation-fa2en_fa2en_quran_bleu":0.1718006478,"translation-fa2en_fa2en_epoque_bleu":0.4500382308,"translation-fa2en_fa2en_nahj_bleu":0.0836776138,"translation-fa2en_fa2en_sahife_bleu":0.1034067477,"nlg_score":0.164118288}
13
+ {"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0901939948,"translation-fa2en_fa2en_tep_bleu":0.0521908916,"translation-fa2en_fa2en_mizan_bleu":0.0828690879,"translation-fa2en_fa2en_quran_bleu":0.0756298248,"translation-fa2en_fa2en_epoque_bleu":0.1645619674,"translation-fa2en_fa2en_nahj_bleu":0.048616237,"translation-fa2en_fa2en_sahife_bleu":0.0518842318,"nlg_score":0.0944140383}
14
+ {"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2332592983,"translation-fa2en_fa2en_tep_bleu":0.1497847918,"translation-fa2en_fa2en_mizan_bleu":0.1972270386,"translation-fa2en_fa2en_quran_bleu":0.1725699648,"translation-fa2en_fa2en_epoque_bleu":0.4678973942,"translation-fa2en_fa2en_nahj_bleu":0.090543674,"translation-fa2en_fa2en_sahife_bleu":0.1008380909,"nlg_score":0.1810678527}
15
+ {"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0862123314,"translation-fa2en_fa2en_tep_bleu":0.0334491641,"translation-fa2en_fa2en_mizan_bleu":0.0758837027,"translation-fa2en_fa2en_quran_bleu":0.0892296624,"translation-fa2en_fa2en_epoque_bleu":0.1688644918,"translation-fa2en_fa2en_nahj_bleu":0.042819328,"translation-fa2en_fa2en_sahife_bleu":0.0473482715,"nlg_score":0.1129755187}
16
+ {"Model Name":"yandexgpt-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1854877882,"translation-fa2en_fa2en_tep_bleu":0.1041087436,"translation-fa2en_fa2en_mizan_bleu":0.1522436467,"translation-fa2en_fa2en_quran_bleu":0.1409628655,"translation-fa2en_fa2en_epoque_bleu":0.391167392,"translation-fa2en_fa2en_nahj_bleu":0.0609865725,"translation-fa2en_fa2en_sahife_bleu":0.0800799314,"nlg_score":0.1035446324}
17
+ {"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1451163884,"translation-fa2en_fa2en_tep_bleu":0.0393307601,"translation-fa2en_fa2en_mizan_bleu":0.1009347025,"translation-fa2en_fa2en_quran_bleu":0.0929688918,"translation-fa2en_fa2en_epoque_bleu":0.3660914464,"translation-fa2en_fa2en_nahj_bleu":0.0536507876,"translation-fa2en_fa2en_sahife_bleu":0.05038339,"nlg_score":0.1319091735}
18
  {"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1811060704,"translation-fa2en_fa2en_tep_bleu":0.1180786789,"translation-fa2en_fa2en_mizan_bleu":0.1503794353,"translation-fa2en_fa2en_quran_bleu":0.1042682142,"translation-fa2en_fa2en_epoque_bleu":0.3794274854,"translation-fa2en_fa2en_nahj_bleu":0.0641545233,"translation-fa2en_fa2en_sahife_bleu":0.0772362522,"nlg_score":0.1334687319}
19
+ {"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0257184881,"translation-fa2en_fa2en_tep_bleu":0.011593122,"translation-fa2en_fa2en_mizan_bleu":0.0215328963,"translation-fa2en_fa2en_quran_bleu":0.0262056878,"translation-fa2en_fa2en_epoque_bleu":0.047221295,"translation-fa2en_fa2en_nahj_bleu":0.0178557856,"translation-fa2en_fa2en_sahife_bleu":0.0169922826,"nlg_score":0.1196804312}
20
+ {"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.14443262,"translation-fa2en_fa2en_tep_bleu":0.0636878051,"translation-fa2en_fa2en_mizan_bleu":0.1045784226,"translation-fa2en_fa2en_quran_bleu":0.1065169191,"translation-fa2en_fa2en_epoque_bleu":0.3331896819,"translation-fa2en_fa2en_nahj_bleu":0.0573420672,"translation-fa2en_fa2en_sahife_bleu":0.0526154809,"nlg_score":0.1196400535}
21
+ {"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2024225184,"translation-fa2en_fa2en_tep_bleu":0.1163127945,"translation-fa2en_fa2en_mizan_bleu":0.1649009947,"translation-fa2en_fa2en_quran_bleu":0.1513328968,"translation-fa2en_fa2en_epoque_bleu":0.4171232399,"translation-fa2en_fa2en_nahj_bleu":0.0857999462,"translation-fa2en_fa2en_sahife_bleu":0.0929479364,"nlg_score":0.1557270864}
22
+ {"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.234039473,"translation-fa2en_fa2en_tep_bleu":0.1597644653,"translation-fa2en_fa2en_mizan_bleu":0.1946759365,"translation-fa2en_fa2en_quran_bleu":0.1638938233,"translation-fa2en_fa2en_epoque_bleu":0.474760879,"translation-fa2en_fa2en_nahj_bleu":0.0825458621,"translation-fa2en_fa2en_sahife_bleu":0.0952634494,"nlg_score":0.18964968}
23
+ {"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2247897554,"translation-fa2en_fa2en_tep_bleu":0.1341840946,"translation-fa2en_fa2en_mizan_bleu":0.1909021288,"translation-fa2en_fa2en_quran_bleu":0.1740971535,"translation-fa2en_fa2en_epoque_bleu":0.4544315204,"translation-fa2en_fa2en_nahj_bleu":0.0877235615,"translation-fa2en_fa2en_sahife_bleu":0.0975791022,"nlg_score":0.1779340777}
24
  {"Model Name":"gpt-oss-120b-low-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1943596188,"translation-fa2en_fa2en_tep_bleu":0.1061304132,"translation-fa2en_fa2en_mizan_bleu":0.1627839168,"translation-fa2en_fa2en_quran_bleu":0.1169494078,"translation-fa2en_fa2en_epoque_bleu":0.4173115022,"translation-fa2en_fa2en_nahj_bleu":0.081839623,"translation-fa2en_fa2en_sahife_bleu":0.0756905933,"nlg_score":0.1430866672}
 
 
25
  {"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2010422229,"translation-fa2en_fa2en_tep_bleu":0.1296290178,"translation-fa2en_fa2en_mizan_bleu":0.1687085372,"translation-fa2en_fa2en_quran_bleu":0.1258778791,"translation-fa2en_fa2en_epoque_bleu":0.4180918256,"translation-fa2en_fa2en_nahj_bleu":0.0766886466,"translation-fa2en_fa2en_sahife_bleu":0.07624077,"nlg_score":0.1538910531}
26
+ {"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.016856047,"translation-fa2en_fa2en_tep_bleu":0.0085125001,"translation-fa2en_fa2en_mizan_bleu":0.013661635,"translation-fa2en_fa2en_quran_bleu":0.0181666202,"translation-fa2en_fa2en_epoque_bleu":0.0301282339,"translation-fa2en_fa2en_nahj_bleu":0.0122360126,"translation-fa2en_fa2en_sahife_bleu":0.0110323989,"nlg_score":0.0949943578}
27
+ {"Model Name":"gemma-3-270m-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"268000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0382934287,"translation-fa2en_fa2en_tep_bleu":0.0179577911,"translation-fa2en_fa2en_mizan_bleu":0.0358622368,"translation-fa2en_fa2en_quran_bleu":0.0370982403,"translation-fa2en_fa2en_epoque_bleu":0.0649389754,"translation-fa2en_fa2en_nahj_bleu":0.0222483795,"translation-fa2en_fa2en_sahife_bleu":0.0341438816,"nlg_score":0.0509841903}
 
 
 
 
 
 
28
  {"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0691353117,"translation-fa2en_fa2en_tep_bleu":0.0320908261,"translation-fa2en_fa2en_mizan_bleu":0.0535229905,"translation-fa2en_fa2en_quran_bleu":0.0800143919,"translation-fa2en_fa2en_epoque_bleu":0.133977443,"translation-fa2en_fa2en_nahj_bleu":0.0362958954,"translation-fa2en_fa2en_sahife_bleu":0.0393317574,"nlg_score":0.1089333827}
29
+ {"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2307102128,"translation-fa2en_fa2en_tep_bleu":0.1527807458,"translation-fa2en_fa2en_mizan_bleu":0.1927067243,"translation-fa2en_fa2en_quran_bleu":0.1628198329,"translation-fa2en_fa2en_epoque_bleu":0.4676472481,"translation-fa2en_fa2en_nahj_bleu":0.0810494281,"translation-fa2en_fa2en_sahife_bleu":0.1009417344,"nlg_score":0.194675133}
30
+ {"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0486479291,"translation-fa2en_fa2en_tep_bleu":0.023223206,"translation-fa2en_fa2en_mizan_bleu":0.0397123038,"translation-fa2en_fa2en_quran_bleu":0.0471874873,"translation-fa2en_fa2en_epoque_bleu":0.0972392875,"translation-fa2en_fa2en_nahj_bleu":0.0246695639,"translation-fa2en_fa2en_sahife_bleu":0.0238899949,"nlg_score":0.1137933652}
31
+ {"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0858473556,"translation-fa2en_fa2en_tep_bleu":0.0381837678,"translation-fa2en_fa2en_mizan_bleu":0.0750320212,"translation-fa2en_fa2en_quran_bleu":0.0986486354,"translation-fa2en_fa2en_epoque_bleu":0.1513689047,"translation-fa2en_fa2en_nahj_bleu":0.0568182224,"translation-fa2en_fa2en_sahife_bleu":0.0570620784,"nlg_score":0.1659339021}
32
  {"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1249013271,"translation-fa2en_fa2en_tep_bleu":0.0564543942,"translation-fa2en_fa2en_mizan_bleu":0.0739643668,"translation-fa2en_fa2en_quran_bleu":0.0677317381,"translation-fa2en_fa2en_epoque_bleu":0.3111968032,"translation-fa2en_fa2en_nahj_bleu":0.0523541092,"translation-fa2en_fa2en_sahife_bleu":0.0479821907,"nlg_score":0.112015688}
33
+ {"Model Name":"gpt-5-mini-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.243332739,"translation-fa2en_fa2en_tep_bleu":0.1612382623,"translation-fa2en_fa2en_mizan_bleu":0.2044715619,"translation-fa2en_fa2en_quran_bleu":0.1659066062,"translation-fa2en_fa2en_epoque_bleu":0.4955763662,"translation-fa2en_fa2en_nahj_bleu":0.0879393077,"translation-fa2en_fa2en_sahife_bleu":0.1007280453,"nlg_score":0.1631530657}
34
  {"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0450244679,"translation-fa2en_fa2en_tep_bleu":0.0166138322,"translation-fa2en_fa2en_mizan_bleu":0.0478141187,"translation-fa2en_fa2en_quran_bleu":0.0426202225,"translation-fa2en_fa2en_epoque_bleu":0.0802277942,"translation-fa2en_fa2en_nahj_bleu":0.0252662094,"translation-fa2en_fa2en_sahife_bleu":0.0268950031,"nlg_score":0.0934094344}
35
+ {"Model Name":"gpt-5-nano-minimal-reasoning","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2098706917,"translation-fa2en_fa2en_tep_bleu":0.1127439575,"translation-fa2en_fa2en_mizan_bleu":0.1700828916,"translation-fa2en_fa2en_quran_bleu":0.1573224172,"translation-fa2en_fa2en_epoque_bleu":0.4464135788,"translation-fa2en_fa2en_nahj_bleu":0.080502837,"translation-fa2en_fa2en_sahife_bleu":0.0868680665,"nlg_score":0.1417778788}
36
+ {"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0423299736,"translation-fa2en_fa2en_tep_bleu":0.0124774953,"translation-fa2en_fa2en_mizan_bleu":0.0314077643,"translation-fa2en_fa2en_quran_bleu":0.0294898862,"translation-fa2en_fa2en_epoque_bleu":0.1006673489,"translation-fa2en_fa2en_nahj_bleu":0.0117672852,"translation-fa2en_fa2en_sahife_bleu":0.0246608556,"nlg_score":0.0823387318}
37
+ {"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2389011537,"translation-fa2en_fa2en_tep_bleu":0.1431825698,"translation-fa2en_fa2en_mizan_bleu":0.2056729072,"translation-fa2en_fa2en_quran_bleu":0.1776018574,"translation-fa2en_fa2en_epoque_bleu":0.4842161688,"translation-fa2en_fa2en_nahj_bleu":0.0886384727,"translation-fa2en_fa2en_sahife_bleu":0.1045044839,"nlg_score":0.1901206806}
38
  {"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1840809218,"translation-fa2en_fa2en_tep_bleu":0.1011436783,"translation-fa2en_fa2en_mizan_bleu":0.149157222,"translation-fa2en_fa2en_quran_bleu":0.1377761662,"translation-fa2en_fa2en_epoque_bleu":0.3802946233,"translation-fa2en_fa2en_nahj_bleu":0.0851756367,"translation-fa2en_fa2en_sahife_bleu":0.0857201524,"nlg_score":0.1389297212}
39
+ {"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0199585579,"translation-fa2en_fa2en_tep_bleu":0.0097804397,"translation-fa2en_fa2en_mizan_bleu":0.0144809896,"translation-fa2en_fa2en_quran_bleu":0.0259691427,"translation-fa2en_fa2en_epoque_bleu":0.0345304173,"translation-fa2en_fa2en_nahj_bleu":0.0150589625,"translation-fa2en_fa2en_sahife_bleu":0.0157047184,"nlg_score":0.1067134448}
40
+ {"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2435498156,"translation-fa2en_fa2en_tep_bleu":0.1656898075,"translation-fa2en_fa2en_mizan_bleu":0.2055420364,"translation-fa2en_fa2en_quran_bleu":0.1726910304,"translation-fa2en_fa2en_epoque_bleu":0.4912890145,"translation-fa2en_fa2en_nahj_bleu":0.0882784037,"translation-fa2en_fa2en_sahife_bleu":0.0952319793,"nlg_score":0.181552926}
41
+ {"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0196181945,"translation-fa2en_fa2en_tep_bleu":0.0101636027,"translation-fa2en_fa2en_mizan_bleu":0.0153753718,"translation-fa2en_fa2en_quran_bleu":0.0231110679,"translation-fa2en_fa2en_epoque_bleu":0.0359429205,"translation-fa2en_fa2en_nahj_bleu":0.0119451943,"translation-fa2en_fa2en_sahife_bleu":0.0117936527,"nlg_score":0.0940241349}
42
  {"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2559078555,"translation-fa2en_fa2en_tep_bleu":0.1687480056,"translation-fa2en_fa2en_mizan_bleu":0.2113676707,"translation-fa2en_fa2en_quran_bleu":0.2008290856,"translation-fa2en_fa2en_epoque_bleu":0.5099219192,"translation-fa2en_fa2en_nahj_bleu":0.0984185664,"translation-fa2en_fa2en_sahife_bleu":0.1125739279,"nlg_score":0.2010896964}
43
+ {"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0146059874,"translation-fa2en_fa2en_tep_bleu":0.0065306354,"translation-fa2en_fa2en_mizan_bleu":0.0119363121,"translation-fa2en_fa2en_quran_bleu":0.0152281808,"translation-fa2en_fa2en_epoque_bleu":0.0274143056,"translation-fa2en_fa2en_nahj_bleu":0.0094070307,"translation-fa2en_fa2en_sahife_bleu":0.0093811964,"nlg_score":0.0682994522}
44
+ {"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2337569687,"translation-fa2en_fa2en_tep_bleu":0.1386371644,"translation-fa2en_fa2en_mizan_bleu":0.2129637469,"translation-fa2en_fa2en_quran_bleu":0.1702102457,"translation-fa2en_fa2en_epoque_bleu":0.478211182,"translation-fa2en_fa2en_nahj_bleu":0.083013513,"translation-fa2en_fa2en_sahife_bleu":0.072000292,"nlg_score":0.1880477876}
45
  {"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2370270006,"translation-fa2en_fa2en_tep_bleu":0.1858400323,"translation-fa2en_fa2en_mizan_bleu":0.2008132758,"translation-fa2en_fa2en_quran_bleu":0.1727292787,"translation-fa2en_fa2en_epoque_bleu":0.4507197199,"translation-fa2en_fa2en_nahj_bleu":0.0893284136,"translation-fa2en_fa2en_sahife_bleu":0.1038607373,"nlg_score":0.1764906292}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leaderboard/leaderboard_config.yaml CHANGED
@@ -292,6 +292,12 @@ model_display_configs:
292
  url: "https://developers.googleblog.com/en/introducing-gemma-3-270m/"
293
  thinking: "❌" # Sure
294
 
 
 
 
 
 
 
295
  # Add one entry for each model whose display name or URL you want to customize.
296
  # If a model ID from your data is not listed here, its raw ID will be used as its name.
297
 
 
292
  url: "https://developers.googleblog.com/en/introducing-gemma-3-270m/"
293
  thinking: "❌" # Sure
294
 
295
+ "yandexgpt-lite":
296
+ display_name: "YandexGPT-5 Lite"
297
+ url: "https://yandex.cloud/en/services/yandexgpt?utm_referrer=https%3A%2F%2Fwww.google.com%2F"
298
+ thinking: "❌" # Sure
299
+
300
+
301
  # Add one entry for each model whose display name or URL you want to customize.
302
  # If a model ID from your data is not listed here, its raw ID will be used as its name.
303
 
submission.py CHANGED
@@ -7,8 +7,48 @@ import pandas as pd
7
  import io
8
  import logging
9
 
10
- from huggingface_hub import HfApi, HfFolder, hf_hub_download
11
- from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  logging.basicConfig(
14
  level=logging.INFO,
 
7
  import io
8
  import logging
9
 
10
+ # Compatibility-safe imports for huggingface_hub:
11
+ # Newer versions removed HfFolder; provide a minimal shim when it's absent.
12
+ try:
13
+ from huggingface_hub import HfApi, hf_hub_download # preferred imports
14
+ try:
15
+ # Older versions provide HfFolder
16
+ from huggingface_hub import HfFolder # type: ignore
17
+ except Exception:
18
+ # Minimal compatibility shim for HfFolder (implements get_token and save_token fallbacks)
19
+ import os
20
+
21
+ class HfFolder:
22
+ @staticmethod
23
+ def get_token():
24
+ # Try HfApi.token/_token, then environment variables
25
+ try:
26
+ api = HfApi()
27
+ token = getattr(api, "token", None) or getattr(api, "_token", None)
28
+ if token:
29
+ return token
30
+ except Exception:
31
+ pass
32
+ return os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
33
+
34
+ @staticmethod
35
+ def save_token(token: str):
36
+ # Best-effort: try to set via HfApi.login if available, otherwise set env var (in-memory)
37
+ try:
38
+ api = HfApi()
39
+ if hasattr(api, "login"):
40
+ try:
41
+ api.login(token=token)
42
+ return
43
+ except Exception:
44
+ pass
45
+ except Exception:
46
+ pass
47
+ # fallback: export to os.environ for current process
48
+ os.environ["HF_TOKEN"] = token
49
+ except Exception:
50
+ # Let the original ImportError propagate so it's visible for debugging if huggingface_hub is missing entirely.
51
+ raise
52
 
53
  logging.basicConfig(
54
  level=logging.INFO,