Spaces:
Runtime error
Runtime error
| import PyPDF2 | |
| import json | |
| # Load the medical book PDF | |
| pdf_path = "gale_of_medicin.pdf" | |
| def extract_text(pdf_path): | |
| with open(pdf_path, "rb") as file: | |
| reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| # Extract and save text as JSON dataset | |
| text_data = extract_text(pdf_path) | |
| dataset = [{"prompt": "Medical Query", "response": text_data}] | |
| with open("medical_dataset.json", "w", encoding="utf-8") as f: | |
| json.dump(dataset, f, ensure_ascii=False, indent=4) | |
| print("β Extracted text saved as `medical_dataset.json`") | |