Spaces:

korayaggul
/

QA-Quality-Evaluator

Sleeping

App Files Files Community

QA-Quality-Evaluator / app.py

korayaggul

Update app.py

7be7203 verified 2 months ago

raw

history blame contribute delete

13.4 kB

	import json
	import re
	import tempfile
	from typing import List, Dict, Any, Tuple

	import gradio as gr
	from transformers import pipeline

	# ---------------------------
	# Minimal sample (id, question, answer only)
	# ---------------------------
	SAMPLE_JSON_MIN = """[
	{
	"id": "ex-001",
	"question": "question",
	"answer": "answer"
	},
	{
	"id": "ex-002",
	"question": "question",
	"answer": "answer"
	},
	{
	"id": "ex-003",
	"question": "question",
	"answer": "answer"
	}
	]"""

	def download_minimal_sample_json():
	with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding="utf-8") as tmp:
	tmp.write(SAMPLE_JSON_MIN)
	tmp.flush()
	return tmp.name

	# ---------------------------
	# Helpers (formatting & menu-path)
	# ---------------------------
	def _normalize_ws(s: str) -> str:
	return re.sub(r"\s+", " ", (s or "").strip())

	def _sentence_case(s: str) -> str:
	s = _normalize_ws(s)
	if not s:
	return s
	# single lowercase 'i' -> 'I'
	s = re.sub(r"\bi\b", "I", s)
	if s[0].islower():
	s = s[0].upper() + s[1:]
	if s[-1] not in ".!?":
	s += "."
	return s

	def _join_path(section: str \| None, option: str \| None) -> str \| None:
	section = (section or "").strip()
	option = (option or "").strip()
	if section and option:
	return f"{section} > {option}"
	if option:
	return option
	if section:
	return section
	return None

	# ---------------------------
	# Model: reward/quality (Transformers uyumlu)
	# ---------------------------
	MODEL_ID = "OpenAssistant/reward-model-deberta-v3-large-v2"

	try:
	quality_pipe = pipeline(
	task="text-classification",
	model=MODEL_ID,
	tokenizer=MODEL_ID,
	function_to_apply="none" # regression score
	)
	MODEL_READY = True
	LOAD_ERR = ""
	except Exception as e:
	MODEL_READY = False
	LOAD_ERR = str(e)

	# ---------------------------
	# Scoring & labeling
	# ---------------------------
	def score_pair(question: str, answer: str) -> float:
	"""Reward score (higher = better). If model not ready, use light heuristic."""
	if not MODEL_READY:
	base = 0.3
	if question.strip().endswith("?"):
	base += 0.1
	if len(answer.split()) >= 6:
	base += 0.2
	if answer.strip().endswith((".", "!", "?")):
	base += 0.1
	return base
	text = f"Human: {question}\nAssistant: {answer}"
	out = quality_pipe(text, truncation=True)[0] # top_k=1 default
	return float(out["score"])

	def label_mapper_from_distribution(scores: List[float]):
	"""
	Scores may be negative; use distribution-based thresholds:
	low : < 33rd percentile
	medium: 33–66
	high : >= 66
	"""
	if not scores:
	return lambda s: "medium"
	s_sorted = sorted(scores)
	def pct(p):
	if len(s_sorted) == 1:
	return s_sorted[0]
	idx = int(round((p/100) * (len(s_sorted)-1)))
	return s_sorted[idx]
	low_th = pct(33)
	high_th = pct(66)
	def mapper(s: float) -> str:
	if s >= high_th:
	return "high"
	elif s >= low_th:
	return "medium"
	else:
	return "low"
	return mapper

	# ---------------------------
	# Smart rewrite helpers
	# ---------------------------
	def _extract_module(item: Dict[str, Any], q_text: str) -> str \| None:
	"""
	Extract uppercase 3+ letter 'module-like' token from question/context, e.g., CUSTOMERS.
	"""
	ctx = f"{item.get('context','')} {q_text}"
	m = re.search(r"\b([A-Z]{3,})\b", ctx)
	return m.group(1) if m else None

	def _roles_from_answer(ans: str) -> List[str]:
	"""
	Pull a role list from the answer; Title Case; drop empties.
	"""
	parts = re.split(r",\| and ", ans or "", flags=re.IGNORECASE)
	roles = []
	for p in parts:
	t = p.strip(" .")
	if not t:
	continue
	t = " ".join(w.capitalize() for w in t.split())
	roles.append(t)
	return [r for r in roles if r]

	# ---------------------------
	# Rewriter (WHO/WHERE/WHAT/HOW aware)
	# ---------------------------
	def improve_smart(item: Dict[str, Any]) -> Dict[str, Any]:
	"""
	LLM-free safe rewrite:
	- WHO + MODULE: use roles pattern. (Only here!)
	- WHERE: produce menu-path sentence (e.g., Settings > Inventory Parameters).
	- WHAT: definition/purpose or 'allows searching by …' sentence.
	- HOW: short procedural sentence.
	- Else: grammar/format normalization.
	"""
	q = _normalize_ws(item.get("question") or "")
	a = _normalize_ws(item.get("answer") or "")
	meta = item.get("metadata") or {}
	qtype = (meta.get("question_type") or "").lower()
	orig_q = _normalize_ws(item.get("original_question") or "")
	orig_a = _normalize_ws(item.get("original_answer") or "")
	base = orig_a or a # prefer original to keep semantics

	module = _extract_module(item, q) # e.g., CUSTOMERS

	# -- WHO: ONLY here we use roles pattern
	if qtype == "who" and module:
	roles = _roles_from_answer(base)
	new_q = f"Which roles are authorized to access the {module} module in DealerTIQ?"
	if roles:
	if len(roles) == 1:
	roles_str = roles[0]
	elif len(roles) == 2:
	roles_str = " and ".join(roles)
	else:
	roles_str = ", ".join(roles[:-1]) + f", and {roles[-1]}"
	new_a = f"Authorized roles include {roles_str}."
	else:
	new_a = _sentence_case(base) if base else _sentence_case(a)

	if item.get("context"):
	item["context"] = f"DealerTIQ — {module} module"

	item["question"] = _sentence_case(new_q[:-1] + "?")
	item["answer"] = _sentence_case(new_a)
	return item

	# -- WHERE: menu path sentence
	if qtype == "where":
	text = base or a
	# "under the Settings section"
	m_sec = re.search(r"under the\s+([A-Za-z ]+?)\s+section", text or "", flags=re.IGNORECASE)
	section = m_sec.group(1).strip().title() if m_sec else None
	# quoted option: "Inventory Parameters"
	quotes = re.findall(r'"([^"]+)"', text or "")
	option = quotes[0].strip() if quotes else None

	path = _join_path(section, option)
	target = option or (module and f"{module} module") or "page"
	new_q = f"Where is the {target} located in DealerTIQ?"
	new_a = f"It is located under {path} in the left navigation menu." if path else (text or "It is available in the left navigation menu.")

	item["question"] = _sentence_case(new_q[:-1] + "?")
	item["answer"] = _sentence_case(new_a)
	return item

	# -- WHAT: definition/purpose or 'allows searching by …'
	if qtype == "what":
	text = base or a
	if re.search(r"allows\s+search(ing)?\s+by", text or "", flags=re.IGNORECASE):
	m = re.search(r"such as\s+(.+)", text or "", flags=re.IGNORECASE)
	if m:
	feats = m.group(1).strip().rstrip(".")
	new_q = f"What can you search for in {module or 'this module'}?"
	new_a = f"It allows searching by criteria such as {feats}."
	else:
	new_q = orig_q or q or "What can you search for in this module?"
	new_a = text or "It allows searching by multiple criteria."
	else:
	if re.search(r"\b(configure\|configuration\|settings)\b", text or "", flags=re.IGNORECASE):
	target = module or "Inventory Parameters"
	new_q = f"What is configured in the {target}?"
	new_a = text or "It configures related settings and rules."
	else:
	new_q = orig_q or q or "What is the purpose of this module?"
	new_a = text or "It provides the core functionality for this area."
	item["question"] = _sentence_case(new_q[:-1] + "?")
	item["answer"] = _sentence_case(new_a)
	return item

	# -- HOW: short procedure
	if qtype == "how":
	text = base or a
	quotes = re.findall(r'"([^"]+)"', text or "") # "Add Channel", "Inventory Parameters"
	m_sec = re.search(r"under the\s+([A-Za-z ]+?)\s+section", text or "", flags=re.IGNORECASE)
	section = m_sec.group(1).strip().title() if m_sec else None
	path = _join_path(section, quotes[0] if quotes else None)

	new_q = orig_q or q or f"How do I perform this action in {module or 'the module'}?"
	steps = _normalize_ws(text or "")
	steps = re.sub(r"\bclick on\b", "select", steps, flags=re.IGNORECASE)
	if path and "left navigation" not in steps.lower():
	steps = f"Go to {path} in the left navigation menu, then {steps[0].lower() + steps[1:]}" if steps else f"Go to {path} in the left navigation menu."

	item["question"] = _sentence_case(new_q[:-1] + "?") if not new_q.endswith("?") else _sentence_case(new_q)
	item["answer"] = _sentence_case(steps or "Follow the on-screen instructions to complete the action.")
	return item

	# -- Fallback: grammar/format normalization only
	if q and not q.endswith("?"):
	q += "?"
	item["question"] = _sentence_case(q)
	item["answer"] = _sentence_case(base or a)
	return item

	# ---------------------------
	# Pipeline
	# ---------------------------
	def process_json(file) -> Tuple[List[Dict[str, Any]], str, str, str]:
	"""
	Input: JSON (list or single object)
	Steps:
	1) First scoring pass for all items
	2) Label via distribution thresholds (low/medium/high)
	3) Auto-rewrite items labeled 'low' (improve_smart)
	4) Rescore & write quality_before / quality_after
	Output:
	- Summary (Dataframe)
	- Preview JSON (first 50)
	- Download JSON path
	- Warn/Info
	"""
	data = json.load(open(file.name))
	items: List[Dict[str, Any]] = data if isinstance(data, list) else [data]

	# 1) First scoring pass
	first = []
	scores = []
	for raw in items:
	it = dict(raw)
	s = score_pair(it.get("question",""), it.get("answer",""))
	it["quality_before"] = {"score": round(s, 3)}
	first.append(it)
	scores.append(s)

	# 2) Dynamic label function
	to_label = label_mapper_from_distribution(scores)

	# 3) Label, rewrite if 'low', then rescore
	processed = []
	for it in first:
	base_label = to_label(it["quality_before"]["score"])
	it["quality_before"]["label"] = base_label

	if base_label == "low":
	it = improve_smart(it)
	s2 = score_pair(it.get("question",""), it.get("answer",""))
	it["quality_after"] = {
	"score": round(s2, 3),
	"label": to_label(s2)
	}
	processed.append(it)

	# Summary table
	summary = []
	for idx, it in enumerate(processed):
	qb = it.get("quality_before", {})
	qa = it.get("quality_after")
	summary.append({
	"id": it.get("id", idx),
	"before_label": qb.get("label"),
	"before_score": qb.get("score"),
	"after_label": qa.get("label") if qa else None,
	"after_score": qa.get("score") if qa else None,
	"question_preview": (it.get("question") or "")[:120]
	})

	# Downloadable JSON
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding="utf-8")
	json.dump(processed, tmp, indent=2, ensure_ascii=False)
	tmp.flush(); tmp.close()

	# Preview
	preview = json.dumps(processed[:50], indent=2, ensure_ascii=False)
	if len(processed) > 50:
	preview += "\n\n// NOTE: Showing first 50 items. Download full file below."

	warn = ""
	if not MODEL_READY:
	warn = f"Warning: model '{MODEL_ID}' could not be loaded; heuristic scoring used. Error: {LOAD_ERR}"

	return summary, preview, tmp.name, warn

	# ---------------------------
	# UI
	# ---------------------------
	with gr.Blocks(title="Q&A Quality Upgrader", theme=gr.themes.Soft()) as demo:
	gr.Markdown("## Q&A Quality Upgrader\nUpload your JSON. Low-quality items will be auto-rewritten and rescored.")

	# ---- Minimal sample accordion (show + download) ----
	with gr.Accordion("Minimal sample JSON (only id, question, answer)", open=False):
	gr.Markdown("Upload a JSON array of objects with the following schema:")
	gr.Code(value=SAMPLE_JSON_MIN, language="json", lines=18, label="Minimal JSON example")
	sample_btn = gr.Button("Download minimal sample.json")
	sample_file = gr.File(label="minimal-sample.json")
	sample_btn.click(fn=download_minimal_sample_json, outputs=sample_file)

	# ---- Upload & Run ----
	inp = gr.File(file_types=[".json"], label="Upload JSON (list of objects)")
	run = gr.Button("Run")

	with gr.Tab("Summary"):
	tbl = gr.Dataframe(headers=["id","before_label","before_score","after_label","after_score","question_preview"])
	with gr.Tab("Preview JSON"):
	code = gr.Code(language="json", lines=34, label="Preview (first 50 items)")
	with gr.Tab("Download"):
	dfile = gr.File(label="Download full JSON")
	warnbox = gr.Markdown("")

	run.click(process_json, inputs=[inp], outputs=[tbl, code, dfile, warnbox])

	if __name__ == "__main__":
	demo.launch()