korayaggul commited on
Commit
f509ff5
·
verified ·
1 Parent(s): 7983f5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -38
app.py CHANGED
@@ -1,46 +1,143 @@
1
- import gradio as gr
2
  import json
 
 
 
 
3
  from transformers import pipeline
4
 
5
- # Modeli yükle
6
- quality_model = pipeline("text-classification", model="snorkelai/instruction-response-quality")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- def analyze_json(file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  data = json.load(open(file.name))
10
- results = []
11
-
12
- for i, item in enumerate(data if isinstance(data, list) else [data]):
13
- question = item.get("question", "")
14
- answer = item.get("answer", "")
15
- text = f"Q: {question}\nA: {answer}"
16
-
17
- # Modelden kalite skoru al
18
- pred = quality_model(text, truncation=True)[0]
19
- score = round(pred["score"], 3)
20
-
21
- # Kalite seviyesini skora göre belirle
22
- if score > 0.75:
23
- quality = "high"
24
- elif score > 0.4:
25
- quality = "medium"
26
- else:
27
- quality = "low"
28
-
29
- # Orijinal JSON'a quality ekle
30
- item["quality"] = {
31
- "label": quality,
32
- "score": score
33
- }
34
- results.append(item)
35
-
36
- return json.dumps(results, indent=2)
37
-
38
- demo = gr.Interface(
39
- fn=analyze_json,
40
- inputs=gr.File(file_types=[".json"], label="Upload JSON file"),
41
- outputs="text",
42
- title="Q&A Quality Evaluator"
43
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  if __name__ == "__main__":
46
  demo.launch()
 
 
1
  import json
2
+ import tempfile
3
+ from typing import List, Dict, Any
4
+
5
+ import gradio as gr
6
  from transformers import pipeline
7
 
8
+ # --- Lazy init: Space başlarken modeli bir kez yükleyelim
9
+ quality_clf = pipeline("text-classification", model="snorkelai/instruction-response-quality")
10
+
11
+ def score_item(item: Dict[str, Any]) -> Dict[str, Any]:
12
+ """Tek bir QA kaydını skorla ve quality alanını ekle."""
13
+ q = item.get("question", "")
14
+ a = item.get("answer", "")
15
+ text = f"Q: {q}\nA: {a}"
16
+ pred = quality_clf(text, truncation=True)[0]
17
+ score = float(pred["score"])
18
+ if score > 0.75:
19
+ label = "high"
20
+ elif score > 0.40:
21
+ label = "medium"
22
+ else:
23
+ label = "low"
24
+ item["quality"] = {"label": label, "score": round(score, 3)}
25
+ return item
26
+
27
+ def improve_item(item: Dict[str, Any], target: str = "medium") -> Dict[str, Any]:
28
+ """
29
+ Düşük skorlara basit 'kural tabanlı' iyileştirme (LLM yok; hafif ve ücretsiz).
30
+ İstersen buraya bir instruct LLM entegre edebiliriz.
31
+ """
32
+ label = item.get("quality", {}).get("label")
33
+ if label in ("high",) or target == "none":
34
+ return item
35
+
36
+ q = item.get("question", "")
37
+ a = item.get("answer", "")
38
 
39
+ # Basit temizlikler: boşluk, büyük harf, noktalama
40
+ q2 = q.strip()
41
+ if q2 and q2[-1] not in "?":
42
+ q2 += "?"
43
+ if q2 and q2[0].islower():
44
+ q2 = q2[0].upper() + q2[1:]
45
+
46
+ a2 = a.strip()
47
+ if a2 and a2[0].islower():
48
+ a2 = a2[0].upper() + a2[1:]
49
+ if a2 and a2[-1] not in ".!?":
50
+ a2 += "."
51
+
52
+ # Çok kısa cevapları minimal genişletme
53
+ if len(a2.split()) < 5:
54
+ a2 = a2 + " This answer has been clarified for brevity and precision."
55
+
56
+ item["question"] = q2
57
+ item["answer"] = a2
58
+ return item
59
+
60
+ def process_json(
61
+ file,
62
+ auto_improve: bool,
63
+ improve_threshold: str
64
+ ):
65
+ # JSON içeriğini yükle (liste veya tek obje destekler)
66
  data = json.load(open(file.name))
67
+ items: List[Dict[str, Any]] = data if isinstance(data, list) else [data]
68
+
69
+ # Skorla
70
+ scored: List[Dict[str, Any]] = [score_item(dict(it)) for it in items]
71
+
72
+ # İyileştirme isteğe bağlı
73
+ if auto_improve:
74
+ def needs_improve(lbl: str) -> bool:
75
+ if improve_threshold == "low_only":
76
+ return lbl == "low"
77
+ elif improve_threshold == "low_and_medium":
78
+ return lbl in ("low", "medium")
79
+ return False
80
+
81
+ improved = []
82
+ for it in scored:
83
+ lbl = it.get("quality", {}).get("label", "low")
84
+ if needs_improve(lbl):
85
+ it = improve_item(it)
86
+ # yeniden skorlayalım ki farkı görelim
87
+ it = score_item(it)
88
+ improved.append(it)
89
+ scored = improved
90
+
91
+ # Özet tablo için küçük bir görünüm (id yoksa index)
92
+ summary = []
93
+ for idx, it in enumerate(scored):
94
+ summary.append({
95
+ "id": it.get("id", idx),
96
+ "quality_label": it["quality"]["label"],
97
+ "quality_score": it["quality"]["score"],
98
+ "question_preview": (it.get("question") or "")[:120]
99
+ })
100
+
101
+ # İndirilebilir JSON oluştur
102
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w")
103
+ json.dump(scored, tmp, indent=2, ensure_ascii=False)
104
+ tmp.flush()
105
+ tmp.close()
106
+
107
+ # Büyük JSON’u rahat okumak için Code kutusu
108
+ pretty = json.dumps(scored[:50], indent=2, ensure_ascii=False) # önizlemede ilk 50 satır
109
+ if len(scored) > 50:
110
+ pretty += f"\n\n// NOTE: Showing first 50 items. Download full file below."
111
+
112
+ return summary, pretty, tmp.name
113
+
114
+ with gr.Blocks(title="Q&A Quality Evaluator", theme=gr.themes.Soft()) as demo:
115
+ gr.Markdown("## Q&A Quality Evaluator\nUpload your JSON, score quality, and (optionally) auto-improve low items.")
116
+
117
+ with gr.Row():
118
+ inp_file = gr.File(file_types=[".json"], label="Upload JSON (list of objects)")
119
+
120
+ with gr.Row():
121
+ auto_switch = gr.Checkbox(label="Auto-improve low-quality items (light rules, no LLM)", value=False)
122
+ improve_sel = gr.Radio(choices=["low_only", "low_and_medium", "none"], value="low_only",
123
+ label="Improve threshold")
124
+
125
+ run_btn = gr.Button("Score (and Improve)")
126
+
127
+ gr.Markdown("### Results")
128
+ with gr.Tab("Summary Table"):
129
+ out_table = gr.Dataframe(headers=["id", "quality_label", "quality_score", "question_preview"], wrap=True, height=400)
130
+ with gr.Tab("Preview JSON"):
131
+ # Büyük bir pencere: lines=30 ile rahat görüntü
132
+ out_code = gr.Code(language="json", label="Preview (first 50 items)", interactive=False, lines=30)
133
+ with gr.Tab("Download"):
134
+ out_file = gr.File(label="Download full scored JSON")
135
+
136
+ run_btn.click(
137
+ process_json,
138
+ inputs=[inp_file, auto_switch, improve_sel],
139
+ outputs=[out_table, out_code, out_file]
140
+ )
141
 
142
  if __name__ == "__main__":
143
  demo.launch()