LLDDWW Claude commited on
Commit
63c2769
ยท
1 Parent(s): dcb7540

perf: switch to Gemma-2-2B for faster inference

Browse files

- Replace MedGemma-4B with Gemma-2-2B (2x smaller, much faster)
- Reduce max_new_tokens from 1536 to 768
- Add timing logs to track OCR and analysis performance
- Target: <30s total processing time

๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -17,8 +17,8 @@ HF_TOKEN = os.getenv("HF_TOKEN")
17
  if HF_TOKEN:
18
  login(token=HF_TOKEN.strip())
19
 
20
- # ์•ฝ ์ •๋ณด ๋ถ„์„ ๋ชจ๋ธ ID (์˜๋ฃŒ ์ „๋ฌธ)
21
- MED_MODEL_ID = "google/medgemma-4b-it"
22
 
23
  # ์ „์—ญ ๋ชจ๋ธ ๋ณ€์ˆ˜ (ํ•œ ๋ฒˆ๋งŒ ๋กœ๋“œ)
24
  OCR_READER = None
@@ -35,7 +35,7 @@ def load_models():
35
  print("โœ… EasyOCR loaded!")
36
 
37
  if MED_MODEL is None:
38
- print("๐Ÿ”„ Loading MedGemma-4B for medical analysis (8bit quantization)...")
39
  MED_MODEL = AutoModelForCausalLM.from_pretrained(
40
  MED_MODEL_ID,
41
  torch_dtype=torch.bfloat16,
@@ -69,10 +69,14 @@ def _extract_json_block(text: str) -> Optional[str]:
69
  @spaces.GPU(duration=120)
70
  def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
71
  """์ด๋ฏธ์ง€์—์„œ OCR ์ถ”์ถœ ํ›„ ์•ฝ ์ •๋ณด ๋ถ„์„"""
 
72
  try:
73
  # Step 1: OCR - EasyOCR๋กœ ๋น ๋ฅด๊ฒŒ ํ…์ŠคํŠธ ์ถ”์ถœ
 
74
  img_array = np.array(image)
75
  ocr_results = OCR_READER.readtext(img_array)
 
 
76
 
77
  if not ocr_results:
78
  return "ํ…์ŠคํŠธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", ""
@@ -82,6 +86,7 @@ def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
82
  ocr_text = "\n".join([text for _, text, _ in ocr_results])
83
 
84
  # Step 2: ์•ฝ ์ •๋ณด ๋ถ„์„ - MedGemma๋กœ ์˜๋ฃŒ ์ •๋ณด ์ œ๊ณต
 
85
 
86
  analysis_prompt = f"""๋‹ค์Œ์€ ์•ฝ ๋ด‰ํˆฌ๋‚˜ ์ฒ˜๋ฐฉ์ „์—์„œ ์ถ”์ถœํ•œ ํ…์ŠคํŠธ์ž…๋‹ˆ๋‹ค:
87
 
@@ -116,7 +121,7 @@ def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
116
  with torch.no_grad():
117
  outputs = MED_MODEL.generate(
118
  **inputs,
119
- max_new_tokens=1536,
120
  temperature=0.7,
121
  top_p=0.9,
122
  do_sample=True
@@ -124,6 +129,11 @@ def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
124
 
125
  analysis_text = MED_TOKENIZER.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
126
 
 
 
 
 
 
127
  return ocr_text.strip(), analysis_text.strip()
128
 
129
  except Exception as e:
@@ -363,8 +373,8 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
363
  - AI๊ฐ€ ์ƒ์„ฑํ•œ ์ •๋ณด์ด๋ฏ€๋กœ ์ •ํ™•ํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
364
 
365
  **๐Ÿค– ๊ธฐ์ˆ  ์Šคํƒ**
366
- - EasyOCR (ํ•œ๊ธ€+์˜์–ด, ์ดˆ๊ณ ์† OCR - 1์ดˆ ์ด๋‚ด!)
367
- - Google MedGemma-4B-IT (8bit ์–‘์žํ™”, ์˜๋ฃŒ ์ „๋ฌธ ๋ชจ๋ธ)
368
 
369
  **๐Ÿ”‘ ์„ค์ • ๋ฐฉ๋ฒ•**
370
  - Hugging Face Spaces์˜ Settings โ†’ Repository secrets์—์„œ `HF_TOKEN` ์ถ”๊ฐ€ ํ•„์š”
 
17
  if HF_TOKEN:
18
  login(token=HF_TOKEN.strip())
19
 
20
+ # ์•ฝ ์ •๋ณด ๋ถ„์„ ๋ชจ๋ธ ID (๋น ๋ฅธ ์ถ”๋ก ์„ ์œ„ํ•ด ๊ฒฝ๋Ÿ‰ ๋ชจ๋ธ ์‚ฌ์šฉ)
21
+ MED_MODEL_ID = "google/gemma-2-2b-it"
22
 
23
  # ์ „์—ญ ๋ชจ๋ธ ๋ณ€์ˆ˜ (ํ•œ ๋ฒˆ๋งŒ ๋กœ๋“œ)
24
  OCR_READER = None
 
35
  print("โœ… EasyOCR loaded!")
36
 
37
  if MED_MODEL is None:
38
+ print("๐Ÿ”„ Loading Gemma-2-2B for medical analysis (8bit quantization)...")
39
  MED_MODEL = AutoModelForCausalLM.from_pretrained(
40
  MED_MODEL_ID,
41
  torch_dtype=torch.bfloat16,
 
69
  @spaces.GPU(duration=120)
70
  def analyze_medication_image(image: Image.Image) -> Tuple[str, str]:
71
  """์ด๋ฏธ์ง€์—์„œ OCR ์ถ”์ถœ ํ›„ ์•ฝ ์ •๋ณด ๋ถ„์„"""
72
+ import time
73
  try:
74
  # Step 1: OCR - EasyOCR๋กœ ๋น ๋ฅด๊ฒŒ ํ…์ŠคํŠธ ์ถ”์ถœ
75
+ start_time = time.time()
76
  img_array = np.array(image)
77
  ocr_results = OCR_READER.readtext(img_array)
78
+ ocr_time = time.time() - start_time
79
+ print(f"โฑ๏ธ OCR took {ocr_time:.2f}s")
80
 
81
  if not ocr_results:
82
  return "ํ…์ŠคํŠธ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.", ""
 
86
  ocr_text = "\n".join([text for _, text, _ in ocr_results])
87
 
88
  # Step 2: ์•ฝ ์ •๋ณด ๋ถ„์„ - MedGemma๋กœ ์˜๋ฃŒ ์ •๋ณด ์ œ๊ณต
89
+ analysis_start = time.time()
90
 
91
  analysis_prompt = f"""๋‹ค์Œ์€ ์•ฝ ๋ด‰ํˆฌ๋‚˜ ์ฒ˜๋ฐฉ์ „์—์„œ ์ถ”์ถœํ•œ ํ…์ŠคํŠธ์ž…๋‹ˆ๋‹ค:
92
 
 
121
  with torch.no_grad():
122
  outputs = MED_MODEL.generate(
123
  **inputs,
124
+ max_new_tokens=768,
125
  temperature=0.7,
126
  top_p=0.9,
127
  do_sample=True
 
129
 
130
  analysis_text = MED_TOKENIZER.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
131
 
132
+ analysis_time = time.time() - analysis_start
133
+ total_time = time.time() - start_time
134
+ print(f"โฑ๏ธ Medical analysis took {analysis_time:.2f}s")
135
+ print(f"โฑ๏ธ Total processing time: {total_time:.2f}s")
136
+
137
  return ocr_text.strip(), analysis_text.strip()
138
 
139
  except Exception as e:
 
373
  - AI๊ฐ€ ์ƒ์„ฑํ•œ ์ •๋ณด์ด๋ฏ€๋กœ ์ •ํ™•ํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
374
 
375
  **๐Ÿค– ๊ธฐ์ˆ  ์Šคํƒ**
376
+ - EasyOCR (ํ•œ๊ธ€+์˜์–ด, ์ดˆ๊ณ ์† OCR)
377
+ - Google Gemma-2-2B-IT (8bit ์–‘์žํ™”, ๋น ๋ฅธ ์˜๋ฃŒ ์ •๋ณด ๋ถ„์„)
378
 
379
  **๐Ÿ”‘ ์„ค์ • ๋ฐฉ๋ฒ•**
380
  - Hugging Face Spaces์˜ Settings โ†’ Repository secrets์—์„œ `HF_TOKEN` ์ถ”๊ฐ€ ํ•„์š”