Spaces:

eaglelandsonce
/

BertScorer

Running

App Files Files Community

eaglelandsonce commited on 26 days ago

Commit

c0663ad

verified ·

1 Parent(s): 70a4bd3

Create app.py

Browse files

Files changed (1) hide show

app.py +104 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Install these first if needed:
+# pip install gradio bert-score transformers
+import gradio as gr
+from bert_score import score
+def compute_bertscore(candidate, reference, model_type, lang, rescale_with_baseline):
+    if not candidate.strip() or not reference.strip():
+        return "—", "—", "Please enter BOTH reference and candidate text."
+    # BERTScore expects lists of strings
+    cands = [candidate]
+    refs = [reference]
+    P, R, F1 = score(
+        cands,
+        refs,
+        lang=lang,
+        model_type=model_type,
+        rescale_with_baseline=rescale_with_baseline,
+    )
+    precision = f"{P[0].item():.4f}"
+    recall = f"{R[0].item():.4f}"
+    f1 = f"{F1[0].item():.4f}"
+    return precision, recall, f1
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+    # 🔍 BERTScore Demo
+    **BERTScore** evaluates the quality of generated text by comparing contextualized
+    embeddings from models like BERT against a reference text.
+    Unlike n-gram metrics (e.g., BLEU), BERTScore focuses on **semantic similarity**
+    and is often better at capturing whether **meaning is preserved**.
+    1. Enter a **reference** text (ground truth).
+    2. Enter a **candidate** text (model output or paraphrase).
+    3. Click **Compute BERTScore**.
+    """
+    )
+    with gr.Row():
+        reference_input = gr.Textbox(
+            label="Reference Text (Ground Truth)",
+            lines=5,
+            placeholder="e.g., The quick brown fox jumps over the lazy dog.",
+        )
+        candidate_input = gr.Textbox(
+            label="Candidate Text (Generated/Paraphrased)",
+            lines=5,
+            placeholder="e.g., A fast brown fox leaps over a sleepy dog.",
+        )
+    with gr.Row():
+        model_type = gr.Dropdown(
+            label="Embedding Model",
+            choices=[
+                "microsoft/deberta-large-mnli",
+                "bert-base-uncased",
+                "roberta-large",
+            ],
+            value="microsoft/deberta-large-mnli",
+            info="Recommended: microsoft/deberta-large-mnli for English",
+        )
+        lang = gr.Dropdown(
+            label="Language Code",
+            choices=["en", "de", "fr", "es", "zh"],
+            value="en",
+            info="Language of the texts (ISO code).",
+        )
+        rescale_with_baseline = gr.Checkbox(
+            label="Rescale with Baseline (recommended for comparing scores)",
+            value=True,
+        )
+    compute_button = gr.Button("Compute BERTScore", variant="primary")
+    with gr.Row():
+        precision_output = gr.Textbox(
+            label="Precision", interactive=False, value="—"
+        )
+        recall_output = gr.Textbox(
+            label="Recall", interactive=False, value="—"
+        )
+        f1_output = gr.Textbox(
+            label="F1 (Main BERTScore Metric)", interactive=False, value="—"
+        )
+    compute_button.click(
+        fn=compute_bertscore,
+        inputs=[candidate_input, reference_input, model_type, lang, rescale_with_baseline],
+        outputs=[precision_output, recall_output, f1_output],
+    )
+if __name__ == "__main__":
+    demo.launch()