eaglelandsonce commited on
Commit
c0663ad
Β·
verified Β·
1 Parent(s): 70a4bd3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install these first if needed:
2
+ # pip install gradio bert-score transformers
3
+
4
+ import gradio as gr
5
+ from bert_score import score
6
+
7
+
8
+ def compute_bertscore(candidate, reference, model_type, lang, rescale_with_baseline):
9
+ if not candidate.strip() or not reference.strip():
10
+ return "β€”", "β€”", "Please enter BOTH reference and candidate text."
11
+
12
+ # BERTScore expects lists of strings
13
+ cands = [candidate]
14
+ refs = [reference]
15
+
16
+ P, R, F1 = score(
17
+ cands,
18
+ refs,
19
+ lang=lang,
20
+ model_type=model_type,
21
+ rescale_with_baseline=rescale_with_baseline,
22
+ )
23
+
24
+ precision = f"{P[0].item():.4f}"
25
+ recall = f"{R[0].item():.4f}"
26
+ f1 = f"{F1[0].item():.4f}"
27
+
28
+ return precision, recall, f1
29
+
30
+
31
+ with gr.Blocks() as demo:
32
+ gr.Markdown(
33
+ """
34
+ # πŸ” BERTScore Demo
35
+
36
+ **BERTScore** evaluates the quality of generated text by comparing contextualized
37
+ embeddings from models like BERT against a reference text.
38
+
39
+ Unlike n-gram metrics (e.g., BLEU), BERTScore focuses on **semantic similarity**
40
+ and is often better at capturing whether **meaning is preserved**.
41
+
42
+ 1. Enter a **reference** text (ground truth).
43
+ 2. Enter a **candidate** text (model output or paraphrase).
44
+ 3. Click **Compute BERTScore**.
45
+ """
46
+ )
47
+
48
+ with gr.Row():
49
+ reference_input = gr.Textbox(
50
+ label="Reference Text (Ground Truth)",
51
+ lines=5,
52
+ placeholder="e.g., The quick brown fox jumps over the lazy dog.",
53
+ )
54
+ candidate_input = gr.Textbox(
55
+ label="Candidate Text (Generated/Paraphrased)",
56
+ lines=5,
57
+ placeholder="e.g., A fast brown fox leaps over a sleepy dog.",
58
+ )
59
+
60
+ with gr.Row():
61
+ model_type = gr.Dropdown(
62
+ label="Embedding Model",
63
+ choices=[
64
+ "microsoft/deberta-large-mnli",
65
+ "bert-base-uncased",
66
+ "roberta-large",
67
+ ],
68
+ value="microsoft/deberta-large-mnli",
69
+ info="Recommended: microsoft/deberta-large-mnli for English",
70
+ )
71
+
72
+ lang = gr.Dropdown(
73
+ label="Language Code",
74
+ choices=["en", "de", "fr", "es", "zh"],
75
+ value="en",
76
+ info="Language of the texts (ISO code).",
77
+ )
78
+
79
+ rescale_with_baseline = gr.Checkbox(
80
+ label="Rescale with Baseline (recommended for comparing scores)",
81
+ value=True,
82
+ )
83
+
84
+ compute_button = gr.Button("Compute BERTScore", variant="primary")
85
+
86
+ with gr.Row():
87
+ precision_output = gr.Textbox(
88
+ label="Precision", interactive=False, value="β€”"
89
+ )
90
+ recall_output = gr.Textbox(
91
+ label="Recall", interactive=False, value="β€”"
92
+ )
93
+ f1_output = gr.Textbox(
94
+ label="F1 (Main BERTScore Metric)", interactive=False, value="β€”"
95
+ )
96
+
97
+ compute_button.click(
98
+ fn=compute_bertscore,
99
+ inputs=[candidate_input, reference_input, model_type, lang, rescale_with_baseline],
100
+ outputs=[precision_output, recall_output, f1_output],
101
+ )
102
+
103
+ if __name__ == "__main__":
104
+ demo.launch()