Spaces:
Runtime error
Runtime error
Sync from GitHub (main)
Browse files- examples/dev/profile_1.yaml +21 -43
- examples/dev/profile_2.yaml +40 -32
- examples/dev/profile_3.yaml +27 -30
- examples/dev/profile_4.yaml +33 -22
- examples/dev/profile_5.yaml +24 -28
- examples/dev/profile_6.yaml +34 -33
- examples/dev/profile_mjs_1.yaml +31 -66
- examples/dev/profile_mjs_2.yaml +31 -38
- examples/dev/profile_mjs_3.yaml +32 -90
- examples/dev/profile_plcom2012_comprehensive.yaml +26 -35
- examples/dev/profile_plcom2012_edge_cases.yaml +36 -23
- examples/synthetic/complex_and_acquired_risk/colorectal_risk_ibd.yaml +23 -21
- examples/synthetic/complex_and_acquired_risk/complex_comorbidity.yaml +22 -67
- examples/synthetic/complex_and_acquired_risk/kidney_cancer_esrd.yaml +18 -42
- examples/synthetic/complex_and_acquired_risk/leukemia_therapy_related.yaml +13 -39
- examples/synthetic/complex_and_acquired_risk/lymphoma_immunosuppression.yaml +13 -39
- examples/synthetic/complex_and_acquired_risk/real_world_data.yaml +15 -82
- examples/synthetic/complex_and_acquired_risk/stomach_cancer_high_risk.yaml +18 -45
- examples/synthetic/complex_and_acquired_risk/thyroid_cancer_radiation.yaml +13 -34
- examples/synthetic/diagnostic_and_screening_pathways/executive_checkup.yaml +17 -64
- examples/synthetic/diagnostic_and_screening_pathways/indeterminate_imaging_birads3.yaml +15 -26
- examples/synthetic/diagnostic_and_screening_pathways/mrd_surveillance_candidate.yaml +13 -30
- examples/synthetic/diagnostic_and_screening_pathways/post_positive_cologuard.yaml +13 -25
- examples/synthetic/diagnostic_and_screening_pathways/therapy_selection_context.yaml +14 -25
- examples/synthetic/diagnostic_and_screening_pathways/vague_symptoms.yaml +22 -50
- examples/synthetic/guideline_boundaries/starting_screening_young_adult.yaml +13 -20
- examples/synthetic/guideline_boundaries/stopping_screening_older_adult.yaml +14 -30
- examples/synthetic/hereditary_and_genetic_risk/brain_tumor_nf1.yaml +16 -31
- examples/synthetic/hereditary_and_genetic_risk/brca1_high_risk.yaml +38 -31
- examples/synthetic/hereditary_and_genetic_risk/conflicting_genetic_data.yaml +16 -28
- examples/synthetic/hereditary_and_genetic_risk/li_fraumeni_tp53.yaml +20 -31
- examples/synthetic/hereditary_and_genetic_risk/lynch_syndrome.yaml +27 -40
- examples/synthetic/hereditary_and_genetic_risk/vague_family_history.yaml +14 -31
- examples/synthetic/lifestyle_and_demographic_risk/liver_risk_alcohol_abuse.yaml +15 -43
- examples/synthetic/lifestyle_and_demographic_risk/lung_risk_occupational.yaml +17 -29
- examples/synthetic/lifestyle_and_demographic_risk/metabolic_syndrome.yaml +16 -40
- tests/test_yaml_validation.py +128 -0
examples/dev/profile_1.yaml
CHANGED
|
@@ -1,51 +1,29 @@
|
|
| 1 |
demographics:
|
| 2 |
-
|
| 3 |
sex: male
|
| 4 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 5 |
lifestyle:
|
| 6 |
-
|
| 7 |
-
|
|
|
|
| 8 |
alcohol_consumption: moderate
|
| 9 |
family_history:
|
| 10 |
-
-
|
| 11 |
-
cancer_type:
|
| 12 |
age_at_diagnosis: 65
|
|
|
|
|
|
|
| 13 |
personal_medical_history:
|
|
|
|
| 14 |
previous_cancers: []
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
value: "28"
|
| 24 |
-
unit: "ng/mL"
|
| 25 |
-
reference_range: "30-100 ng/mL"
|
| 26 |
-
date: "2025-05-20"
|
| 27 |
-
- test_name: "Hemoglobin"
|
| 28 |
-
value: "13.2"
|
| 29 |
-
unit: "g/dL"
|
| 30 |
-
reference_range: "13.5-17.5 g/dL"
|
| 31 |
-
date: "2025-05-20"
|
| 32 |
-
- test_name: "White Blood Cell Count"
|
| 33 |
-
value: "7.2"
|
| 34 |
-
unit: "K/uL"
|
| 35 |
-
reference_range: "4.5 - 11.0 K/uL"
|
| 36 |
-
date: "2025-05-20"
|
| 37 |
-
- test_name: "Glucose"
|
| 38 |
-
value: "115"
|
| 39 |
-
unit: "mg/dL"
|
| 40 |
-
reference_range: "70-99 mg/dL"
|
| 41 |
-
date: "2025-05-20"
|
| 42 |
-
- test_name: "Creatinine"
|
| 43 |
-
value: "1.4"
|
| 44 |
-
unit: "mg/dL"
|
| 45 |
-
reference_range: "0.7-1.3 mg/dL"
|
| 46 |
-
date: "2025-05-20"
|
| 47 |
-
- test_name: "LDL Cholesterol"
|
| 48 |
-
value: "140"
|
| 49 |
-
unit: "mg/dL"
|
| 50 |
-
reference_range: "< 100 mg/dL"
|
| 51 |
-
date: "2025-05-20"
|
|
|
|
| 1 |
demographics:
|
| 2 |
+
age_years: 65
|
| 3 |
sex: male
|
| 4 |
+
ethnicity: black
|
| 5 |
+
anthropometrics:
|
| 6 |
+
height_cm: 175.0
|
| 7 |
+
weight_kg: 80.0
|
| 8 |
lifestyle:
|
| 9 |
+
smoking:
|
| 10 |
+
status: former
|
| 11 |
+
pack_years: 10
|
| 12 |
alcohol_consumption: moderate
|
| 13 |
family_history:
|
| 14 |
+
- relation: father
|
| 15 |
+
cancer_type: prostate_cancer
|
| 16 |
age_at_diagnosis: 65
|
| 17 |
+
degree: "1"
|
| 18 |
+
side: unknown
|
| 19 |
personal_medical_history:
|
| 20 |
+
chronic_conditions: []
|
| 21 |
previous_cancers: []
|
| 22 |
+
genetic_mutations: []
|
| 23 |
+
clinical_tests:
|
| 24 |
+
psa:
|
| 25 |
+
value_ng_ml: 6.1
|
| 26 |
+
date: 2025-05-20
|
| 27 |
+
symptoms:
|
| 28 |
+
- symptom_type: increased_urinary_frequency
|
| 29 |
+
duration_days: 30
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/dev/profile_2.yaml
CHANGED
|
@@ -1,42 +1,50 @@
|
|
| 1 |
# Older Female with Lung Cancer Risk
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
-
|
| 5 |
sex: female
|
| 6 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 7 |
lifestyle:
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
family_history:
|
| 14 |
-
-
|
| 15 |
-
cancer_type:
|
| 16 |
age_at_diagnosis: 70
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
age_at_diagnosis: 50
|
|
|
|
|
|
|
| 20 |
personal_medical_history:
|
| 21 |
-
previous_cancers: []
|
| 22 |
chronic_conditions:
|
| 23 |
-
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
| 1 |
# Older Female with Lung Cancer Risk
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
+
age_years: 72
|
| 5 |
sex: female
|
| 6 |
+
ethnicity: white
|
| 7 |
+
anthropometrics:
|
| 8 |
+
height_cm: 160.0
|
| 9 |
+
weight_kg: 65.0
|
| 10 |
lifestyle:
|
| 11 |
+
smoking:
|
| 12 |
+
status: current
|
| 13 |
+
pack_years: 40
|
| 14 |
+
alcohol_consumption: light
|
| 15 |
+
physical_activity_level: sedentary
|
| 16 |
family_history:
|
| 17 |
+
- relation: mother
|
| 18 |
+
cancer_type: lung_cancer
|
| 19 |
age_at_diagnosis: 70
|
| 20 |
+
degree: "1"
|
| 21 |
+
side: maternal
|
| 22 |
+
- relation: sister
|
| 23 |
+
cancer_type: breast_cancer
|
| 24 |
age_at_diagnosis: 50
|
| 25 |
+
degree: "1"
|
| 26 |
+
side: unknown
|
| 27 |
personal_medical_history:
|
|
|
|
| 28 |
chronic_conditions:
|
| 29 |
+
- copd
|
| 30 |
+
previous_cancers: []
|
| 31 |
+
genetic_mutations: []
|
| 32 |
+
female_specific:
|
| 33 |
+
menstrual:
|
| 34 |
+
age_at_menopause: 50
|
| 35 |
+
parity:
|
| 36 |
+
num_live_births: 2
|
| 37 |
+
age_at_first_live_birth: 25
|
| 38 |
+
hormone_use:
|
| 39 |
+
estrogen_use: never
|
| 40 |
+
breast_health:
|
| 41 |
+
num_biopsies: 0
|
| 42 |
+
atypical_hyperplasia: false
|
| 43 |
+
lobular_carcinoma_in_situ: false
|
| 44 |
+
symptoms:
|
| 45 |
+
- symptom_type: persistent_cough
|
| 46 |
+
duration_days: 60
|
| 47 |
+
- symptom_type: weight_loss
|
| 48 |
+
duration_days: 30
|
| 49 |
+
- symptom_type: haemoptysis
|
| 50 |
+
duration_days: 7
|
examples/dev/profile_3.yaml
CHANGED
|
@@ -1,41 +1,38 @@
|
|
| 1 |
# Middle-aged Hispanic Male with Colon Cancer Risk
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
-
|
| 5 |
sex: male
|
| 6 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 7 |
lifestyle:
|
| 8 |
-
|
|
|
|
| 9 |
alcohol_consumption: heavy
|
| 10 |
-
|
| 11 |
-
|
| 12 |
family_history:
|
| 13 |
-
-
|
| 14 |
-
cancer_type:
|
| 15 |
age_at_diagnosis: 55
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
age_at_diagnosis: 60
|
|
|
|
|
|
|
| 19 |
personal_medical_history:
|
| 20 |
-
previous_cancers: []
|
| 21 |
chronic_conditions:
|
| 22 |
-
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
- test_name: "HbA1c"
|
| 33 |
-
value: "7.8"
|
| 34 |
-
unit: "%"
|
| 35 |
-
reference_range: "<5.7%"
|
| 36 |
-
date: "2025-03-10"
|
| 37 |
-
- test_name: "Lipid Panel"
|
| 38 |
-
value: "LDL 165"
|
| 39 |
-
unit: "mg/dL"
|
| 40 |
-
reference_range: "<100 mg/dL"
|
| 41 |
-
date: "2025-03-10"
|
|
|
|
| 1 |
# Middle-aged Hispanic Male with Colon Cancer Risk
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
+
age_years: 50
|
| 5 |
sex: male
|
| 6 |
+
ethnicity: hispanic
|
| 7 |
+
anthropometrics:
|
| 8 |
+
height_cm: 175.0
|
| 9 |
+
weight_kg: 90.0
|
| 10 |
lifestyle:
|
| 11 |
+
smoking:
|
| 12 |
+
status: never
|
| 13 |
alcohol_consumption: heavy
|
| 14 |
+
physical_activity_level: sedentary
|
| 15 |
+
red_meat_consumption_oz_per_day: 8.0
|
| 16 |
family_history:
|
| 17 |
+
- relation: father
|
| 18 |
+
cancer_type: colorectal_cancer
|
| 19 |
age_at_diagnosis: 55
|
| 20 |
+
degree: "1"
|
| 21 |
+
side: paternal
|
| 22 |
+
- relation: paternal_uncle
|
| 23 |
+
cancer_type: gastro_oesophageal_cancer
|
| 24 |
age_at_diagnosis: 60
|
| 25 |
+
degree: "2"
|
| 26 |
+
side: paternal
|
| 27 |
personal_medical_history:
|
|
|
|
| 28 |
chronic_conditions:
|
| 29 |
+
- diabetes
|
| 30 |
+
previous_cancers: []
|
| 31 |
+
genetic_mutations: []
|
| 32 |
+
symptoms:
|
| 33 |
+
- symptom_type: rectal_bleeding
|
| 34 |
+
duration_days: 14
|
| 35 |
+
- symptom_type: abdominal_pain
|
| 36 |
+
duration_days: 30
|
| 37 |
+
- symptom_type: weight_loss
|
| 38 |
+
duration_days: 14
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/dev/profile_4.yaml
CHANGED
|
@@ -1,34 +1,45 @@
|
|
| 1 |
# Young Asian-American Female, Risk of Thyroid Cancer
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
-
|
| 5 |
sex: female
|
| 6 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 7 |
lifestyle:
|
| 8 |
-
|
|
|
|
| 9 |
alcohol_consumption: none
|
| 10 |
-
|
| 11 |
-
diet: "Balanced diet, mostly plant-based"
|
| 12 |
family_history:
|
| 13 |
-
-
|
| 14 |
-
cancer_type:
|
| 15 |
age_at_diagnosis: 35
|
| 16 |
-
|
|
|
|
|
|
|
| 17 |
cancer_type: melanoma
|
| 18 |
age_at_diagnosis: 45
|
|
|
|
|
|
|
| 19 |
personal_medical_history:
|
|
|
|
| 20 |
previous_cancers: []
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Young Asian-American Female, Risk of Thyroid Cancer
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
+
age_years: 29
|
| 5 |
sex: female
|
| 6 |
+
ethnicity: asian
|
| 7 |
+
anthropometrics:
|
| 8 |
+
height_cm: 160.0
|
| 9 |
+
weight_kg: 55.0
|
| 10 |
lifestyle:
|
| 11 |
+
smoking:
|
| 12 |
+
status: never
|
| 13 |
alcohol_consumption: none
|
| 14 |
+
physical_activity_level: moderate
|
|
|
|
| 15 |
family_history:
|
| 16 |
+
- relation: mother
|
| 17 |
+
cancer_type: thyroid_cancer
|
| 18 |
age_at_diagnosis: 35
|
| 19 |
+
degree: "1"
|
| 20 |
+
side: maternal
|
| 21 |
+
- relation: father
|
| 22 |
cancer_type: melanoma
|
| 23 |
age_at_diagnosis: 45
|
| 24 |
+
degree: "1"
|
| 25 |
+
side: paternal
|
| 26 |
personal_medical_history:
|
| 27 |
+
chronic_conditions: []
|
| 28 |
previous_cancers: []
|
| 29 |
+
genetic_mutations: []
|
| 30 |
+
female_specific:
|
| 31 |
+
menstrual:
|
| 32 |
+
age_at_menarche: 13
|
| 33 |
+
parity:
|
| 34 |
+
num_live_births: 0
|
| 35 |
+
hormone_use:
|
| 36 |
+
estrogen_use: never
|
| 37 |
+
breast_health:
|
| 38 |
+
num_biopsies: 0
|
| 39 |
+
atypical_hyperplasia: false
|
| 40 |
+
lobular_carcinoma_in_situ: false
|
| 41 |
+
symptoms:
|
| 42 |
+
- symptom_type: neck_lump
|
| 43 |
+
duration_days: 30
|
| 44 |
+
- symptom_type: weight_loss
|
| 45 |
+
duration_days: 14
|
examples/dev/profile_5.yaml
CHANGED
|
@@ -1,38 +1,34 @@
|
|
| 1 |
# African American Male, Risk of Prostate Cancer
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
-
|
| 5 |
sex: male
|
| 6 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 7 |
lifestyle:
|
| 8 |
-
|
| 9 |
-
|
|
|
|
| 10 |
alcohol_consumption: moderate
|
| 11 |
-
|
| 12 |
-
diet: "Mixed diet, moderate meat consumption"
|
| 13 |
family_history:
|
| 14 |
-
-
|
| 15 |
-
cancer_type:
|
| 16 |
age_at_diagnosis: 60
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
age_at_diagnosis: 72
|
| 20 |
personal_medical_history:
|
|
|
|
| 21 |
previous_cancers: []
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
reference_range: "<4.0 ng/mL"
|
| 33 |
-
date: "2025-06-10"
|
| 34 |
-
- test_name: "Blood Pressure"
|
| 35 |
-
value: "145/90"
|
| 36 |
-
unit: "mmHg"
|
| 37 |
-
reference_range: "<120/80 mmHg"
|
| 38 |
-
date: "2025-06-05"
|
|
|
|
| 1 |
# African American Male, Risk of Prostate Cancer
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
+
age_years: 58
|
| 5 |
sex: male
|
| 6 |
+
ethnicity: black
|
| 7 |
+
anthropometrics:
|
| 8 |
+
height_cm: 180.0
|
| 9 |
+
weight_kg: 85.0
|
| 10 |
lifestyle:
|
| 11 |
+
smoking:
|
| 12 |
+
status: former
|
| 13 |
+
pack_years: 20
|
| 14 |
alcohol_consumption: moderate
|
| 15 |
+
physical_activity_level: low
|
|
|
|
| 16 |
family_history:
|
| 17 |
+
- relation: brother
|
| 18 |
+
cancer_type: prostate_cancer
|
| 19 |
age_at_diagnosis: 60
|
| 20 |
+
degree: "1"
|
| 21 |
+
side: unknown
|
|
|
|
| 22 |
personal_medical_history:
|
| 23 |
+
chronic_conditions: []
|
| 24 |
previous_cancers: []
|
| 25 |
+
genetic_mutations: []
|
| 26 |
+
clinical_tests:
|
| 27 |
+
psa:
|
| 28 |
+
value_ng_ml: 5.5
|
| 29 |
+
date: 2025-06-10
|
| 30 |
+
symptoms:
|
| 31 |
+
- symptom_type: nocturia
|
| 32 |
+
duration_days: 30
|
| 33 |
+
- symptom_type: increased_urinary_frequency
|
| 34 |
+
duration_days: 14
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/dev/profile_6.yaml
CHANGED
|
@@ -1,43 +1,44 @@
|
|
| 1 |
# Young Female, BRCA Mutation, High Breast Cancer Risk
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
-
|
| 5 |
sex: female
|
| 6 |
-
ethnicity:
|
| 7 |
-
education_level:
|
|
|
|
|
|
|
|
|
|
| 8 |
lifestyle:
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
female_specific:
|
| 14 |
-
age_at_first_period: 12
|
| 15 |
-
num_live_births: 1
|
| 16 |
-
age_at_first_live_birth: 25
|
| 17 |
-
hormone_therapy_use: "never"
|
| 18 |
family_history:
|
| 19 |
-
-
|
| 20 |
-
cancer_type:
|
| 21 |
age_at_diagnosis: 42
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
| 24 |
age_at_diagnosis: 60
|
|
|
|
|
|
|
| 25 |
personal_medical_history:
|
| 26 |
-
previous_cancers: []
|
| 27 |
chronic_conditions: []
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
| 1 |
# Young Female, BRCA Mutation, High Breast Cancer Risk
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
+
age_years: 32
|
| 5 |
sex: female
|
| 6 |
+
ethnicity: ashkenazi jewish
|
| 7 |
+
education_level: 16
|
| 8 |
+
anthropometrics:
|
| 9 |
+
height_cm: 165.0
|
| 10 |
+
weight_kg: 60.0
|
| 11 |
lifestyle:
|
| 12 |
+
smoking:
|
| 13 |
+
status: never
|
| 14 |
+
alcohol_consumption: light
|
| 15 |
+
physical_activity_level: moderate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
family_history:
|
| 17 |
+
- relation: mother
|
| 18 |
+
cancer_type: breast_cancer
|
| 19 |
age_at_diagnosis: 42
|
| 20 |
+
degree: "1"
|
| 21 |
+
side: maternal
|
| 22 |
+
- relation: maternal_grandmother
|
| 23 |
+
cancer_type: ovarian_cancer
|
| 24 |
age_at_diagnosis: 60
|
| 25 |
+
degree: "2"
|
| 26 |
+
side: maternal
|
| 27 |
personal_medical_history:
|
|
|
|
| 28 |
chronic_conditions: []
|
| 29 |
+
previous_cancers: []
|
| 30 |
+
genetic_mutations:
|
| 31 |
+
- brca1
|
| 32 |
+
female_specific:
|
| 33 |
+
menstrual:
|
| 34 |
+
age_at_menarche: 12
|
| 35 |
+
parity:
|
| 36 |
+
num_live_births: 1
|
| 37 |
+
age_at_first_live_birth: 25
|
| 38 |
+
hormone_use:
|
| 39 |
+
estrogen_use: never
|
| 40 |
+
breast_health:
|
| 41 |
+
num_biopsies: 0
|
| 42 |
+
atypical_hyperplasia: false
|
| 43 |
+
lobular_carcinoma_in_situ: false
|
| 44 |
+
symptoms: []
|
examples/dev/profile_mjs_1.yaml
CHANGED
|
@@ -1,77 +1,42 @@
|
|
| 1 |
# Relatively healthy female in her 50s with a family history of CRC, entering menopause.
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
-
|
| 5 |
sex: female
|
| 6 |
-
ethnicity:
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
weight: 45.6
|
| 12 |
|
| 13 |
lifestyle:
|
| 14 |
-
|
| 15 |
-
|
|
|
|
| 16 |
alcohol_consumption: moderate
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
total_meat: 4.0
|
| 22 |
-
pain_med: "no"
|
| 23 |
-
nsaid_use: False
|
| 24 |
-
estrogen: "no"
|
| 25 |
-
estrogen_use: False
|
| 26 |
-
estrogen_type: "none"
|
| 27 |
-
estrogen_use_duration: 0
|
| 28 |
-
estrogen_use_duration_unit: "years"
|
| 29 |
-
estrogen_use_duration_value: 0
|
| 30 |
-
estrogen_use_duration_unit: "years"
|
| 31 |
personal_medical_history:
|
|
|
|
| 32 |
previous_cancers: []
|
| 33 |
-
|
| 34 |
-
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
-
|
| 49 |
-
|
| 50 |
-
unit: "ng/mL"
|
| 51 |
-
reference_range: "30-100 ng/mL"
|
| 52 |
-
date: "2025-05-20"
|
| 53 |
-
- test_name: "Hemoglobin"
|
| 54 |
-
value: "13.2"
|
| 55 |
-
unit: "g/dL"
|
| 56 |
-
reference_range: "13.5-17.5 g/dL"
|
| 57 |
-
date: "2025-05-20"
|
| 58 |
-
- test_name: "White Blood Cell Count"
|
| 59 |
-
value: "7.2"
|
| 60 |
-
unit: "K/uL"
|
| 61 |
-
reference_range: "4.5 - 11.0 K/uL"
|
| 62 |
-
date: "2025-05-20"
|
| 63 |
-
- test_name: "Glucose"
|
| 64 |
-
value: "115"
|
| 65 |
-
unit: "mg/dL"
|
| 66 |
-
reference_range: "70-99 mg/dL"
|
| 67 |
-
date: "2025-05-20"
|
| 68 |
-
- test_name: "Creatinine"
|
| 69 |
-
value: "1.1"
|
| 70 |
-
unit: "mg/dL"
|
| 71 |
-
reference_range: "0.7-1.3 mg/dL"
|
| 72 |
-
date: "2025-05-20"
|
| 73 |
-
- test_name: "LDL Cholesterol"
|
| 74 |
-
value: "120"
|
| 75 |
-
unit: "mg/dL"
|
| 76 |
-
reference_range: "< 100 mg/dL"
|
| 77 |
-
date: "2025-05-20"
|
|
|
|
| 1 |
# Relatively healthy female in her 50s with a family history of CRC, entering menopause.
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
+
age_years: 49
|
| 5 |
sex: female
|
| 6 |
+
ethnicity: asian
|
| 7 |
+
education_level: 12
|
| 8 |
+
anthropometrics:
|
| 9 |
+
height_cm: 152.0
|
| 10 |
+
weight_kg: 45.6
|
|
|
|
| 11 |
|
| 12 |
lifestyle:
|
| 13 |
+
smoking:
|
| 14 |
+
status: former
|
| 15 |
+
pack_years: 1
|
| 16 |
alcohol_consumption: moderate
|
| 17 |
+
multivitamin_use: true
|
| 18 |
+
physical_activity_level: low
|
| 19 |
+
red_meat_consumption_oz_per_day: 4.0
|
| 20 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
personal_medical_history:
|
| 22 |
+
chronic_conditions: []
|
| 23 |
previous_cancers: []
|
| 24 |
+
genetic_mutations: []
|
| 25 |
+
aspirin_use: current
|
| 26 |
|
| 27 |
+
female_specific:
|
| 28 |
+
menstrual:
|
| 29 |
+
age_at_menopause: 49
|
| 30 |
+
parity:
|
| 31 |
+
num_live_births: 2
|
| 32 |
+
age_at_first_live_birth: 25
|
| 33 |
+
hormone_use:
|
| 34 |
+
estrogen_use: never
|
| 35 |
+
breast_health:
|
| 36 |
+
num_biopsies: 0
|
| 37 |
+
atypical_hyperplasia: false
|
| 38 |
+
lobular_carcinoma_in_situ: false
|
| 39 |
|
| 40 |
+
symptoms:
|
| 41 |
+
- symptom_type: night_sweats
|
| 42 |
+
duration_days: 30
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/dev/profile_mjs_2.yaml
CHANGED
|
@@ -1,48 +1,41 @@
|
|
| 1 |
# African American Male, Risk of Prostate Cancer
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
-
|
| 5 |
sex: male
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
| 10 |
lifestyle:
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
total_meat: 4.0
|
| 19 |
-
pain_med: "no"
|
| 20 |
-
nsaid_use: False
|
| 21 |
|
| 22 |
family_history:
|
| 23 |
-
-
|
| 24 |
-
cancer_type:
|
| 25 |
age_at_diagnosis: 60
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
personal_medical_history:
|
|
|
|
| 30 |
previous_cancers: []
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
date: "2025-06-10"
|
| 44 |
-
- test_name: "Blood Pressure"
|
| 45 |
-
value: "115/75"
|
| 46 |
-
unit: "mmHg"
|
| 47 |
-
reference_range: "<120/80 mmHg"
|
| 48 |
-
date: "2025-06-05"
|
|
|
|
| 1 |
# African American Male, Risk of Prostate Cancer
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
+
age_years: 58
|
| 5 |
sex: male
|
| 6 |
+
ethnicity: black
|
| 7 |
+
anthropometrics:
|
| 8 |
+
height_cm: 185.0
|
| 9 |
+
weight_kg: 85.0
|
| 10 |
+
|
| 11 |
lifestyle:
|
| 12 |
+
smoking:
|
| 13 |
+
status: former
|
| 14 |
+
pack_years: 5
|
| 15 |
+
alcohol_consumption: none
|
| 16 |
+
multivitamin_use: true
|
| 17 |
+
physical_activity_level: low
|
| 18 |
+
red_meat_consumption_oz_per_day: 4.0
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
family_history:
|
| 21 |
+
- relation: brother
|
| 22 |
+
cancer_type: prostate_cancer
|
| 23 |
age_at_diagnosis: 60
|
| 24 |
+
degree: "1"
|
| 25 |
+
side: unknown
|
| 26 |
+
|
| 27 |
personal_medical_history:
|
| 28 |
+
chronic_conditions: []
|
| 29 |
previous_cancers: []
|
| 30 |
+
genetic_mutations: []
|
| 31 |
+
|
| 32 |
+
clinical_tests:
|
| 33 |
+
psa:
|
| 34 |
+
value_ng_ml: 2.5
|
| 35 |
+
date: 2025-06-10
|
| 36 |
+
|
| 37 |
+
symptoms:
|
| 38 |
+
- symptom_type: appetite_loss
|
| 39 |
+
duration_days: 14
|
| 40 |
+
- symptom_type: indigestion
|
| 41 |
+
duration_days: 7
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/dev/profile_mjs_3.yaml
CHANGED
|
@@ -1,102 +1,44 @@
|
|
| 1 |
# European young male, healthy
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
-
|
| 5 |
sex: male
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
| 10 |
lifestyle:
|
| 11 |
-
|
| 12 |
-
|
|
|
|
| 13 |
alcohol_consumption: light
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
activity: 3.0
|
| 18 |
-
total_meat: 2.5
|
| 19 |
-
pain_med: "no"
|
| 20 |
-
nsaid_use: False
|
| 21 |
|
| 22 |
family_history:
|
| 23 |
-
-
|
| 24 |
-
cancer_type:
|
| 25 |
age_at_diagnosis: 85
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
age_at_diagnosis: 96
|
|
|
|
|
|
|
|
|
|
| 29 |
personal_medical_history:
|
|
|
|
| 30 |
previous_cancers: []
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
-
|
| 40 |
-
|
| 41 |
-
unit: "ng/mL"
|
| 42 |
-
reference_range: "<4.0 ng/mL"
|
| 43 |
-
date: "2025-06-10"
|
| 44 |
-
- test_name: "Blood Pressure"
|
| 45 |
-
value: "115/75"
|
| 46 |
-
unit: "mmHg"
|
| 47 |
-
reference_range: "<120/80 mmHg"
|
| 48 |
-
date: "2025-06-05"
|
| 49 |
-
- test_name: "Glucose"
|
| 50 |
-
value: "103"
|
| 51 |
-
unit: "mg/dL"
|
| 52 |
-
reference_range: "70-99 mg/dL"
|
| 53 |
-
date: "2025-06-05"
|
| 54 |
-
- test_name: "Creatinine"
|
| 55 |
-
value: "1.1"
|
| 56 |
-
unit: "mg/dL"
|
| 57 |
-
reference_range: "0.7-1.3 mg/dL"
|
| 58 |
-
date: "2025-06-05"
|
| 59 |
-
- test_name: "HDL Cholesterol"
|
| 60 |
-
value: "42"
|
| 61 |
-
unit: "mg/dL"
|
| 62 |
-
reference_range: "40-50 mg/dL"
|
| 63 |
-
date: "2025-06-05"
|
| 64 |
-
- test_name: "LDL Cholesterol"
|
| 65 |
-
value: "92"
|
| 66 |
-
unit: "mg/dL"
|
| 67 |
-
reference_range: "<100 mg/dL"
|
| 68 |
-
date: "2025-06-05"
|
| 69 |
-
- test_name: "Triglycerides"
|
| 70 |
-
value: "134"
|
| 71 |
-
unit: "mg/dL"
|
| 72 |
-
reference_range: "<150 mg/dL"
|
| 73 |
-
date: "2025-06-05"
|
| 74 |
-
- test_name: "C-Reactive Protein"
|
| 75 |
-
value: "0.7"
|
| 76 |
-
unit: "mg/dL"
|
| 77 |
-
reference_range: "0.0-3.0 mg/dL"
|
| 78 |
-
date: "2025-06-05"
|
| 79 |
-
- test_name: "Lung X-Ray"
|
| 80 |
-
value: "normal"
|
| 81 |
-
unit: "n/a"
|
| 82 |
-
date: "2025-06-05"
|
| 83 |
-
- test_name: "CT Scan of Abdomen"
|
| 84 |
-
value: "normal"
|
| 85 |
-
unit: "n/a"
|
| 86 |
-
date: "2025-06-05"
|
| 87 |
-
- test_name: "CT Scan of Chest"
|
| 88 |
-
value: "normal"
|
| 89 |
-
unit: "n/a"
|
| 90 |
-
date: "2025-06-05"
|
| 91 |
-
- test_name: "Lung Function Test"
|
| 92 |
-
value: "normal"
|
| 93 |
-
unit: "n/a"
|
| 94 |
-
date: "2025-06-05"
|
| 95 |
-
- test_name: "Liver Function Test"
|
| 96 |
-
value: "normal"
|
| 97 |
-
unit: "n/a"
|
| 98 |
-
date: "2025-06-05"
|
| 99 |
-
- test_name: "Kidney Function Test"
|
| 100 |
-
value: "normal"
|
| 101 |
-
unit: "n/a"
|
| 102 |
-
date: "2025-06-05"
|
|
|
|
| 1 |
# European young male, healthy
|
| 2 |
|
| 3 |
demographics:
|
| 4 |
+
age_years: 25
|
| 5 |
sex: male
|
| 6 |
+
ethnicity: white
|
| 7 |
+
anthropometrics:
|
| 8 |
+
height_cm: 180.0
|
| 9 |
+
weight_kg: 75.0
|
| 10 |
+
|
| 11 |
lifestyle:
|
| 12 |
+
smoking:
|
| 13 |
+
status: former
|
| 14 |
+
pack_years: 3
|
| 15 |
alcohol_consumption: light
|
| 16 |
+
multivitamin_use: false
|
| 17 |
+
physical_activity_level: moderate
|
| 18 |
+
red_meat_consumption_oz_per_day: 2.5
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
family_history:
|
| 21 |
+
- relation: paternal_grandmother
|
| 22 |
+
cancer_type: colorectal_cancer
|
| 23 |
age_at_diagnosis: 85
|
| 24 |
+
degree: "2"
|
| 25 |
+
side: paternal
|
| 26 |
+
- relation: paternal_grandfather
|
| 27 |
+
cancer_type: prostate_cancer
|
| 28 |
age_at_diagnosis: 96
|
| 29 |
+
degree: "2"
|
| 30 |
+
side: paternal
|
| 31 |
+
|
| 32 |
personal_medical_history:
|
| 33 |
+
chronic_conditions: []
|
| 34 |
previous_cancers: []
|
| 35 |
+
genetic_mutations: []
|
| 36 |
+
|
| 37 |
+
clinical_tests:
|
| 38 |
+
psa:
|
| 39 |
+
value_ng_ml: 0.5
|
| 40 |
+
date: 2025-06-10
|
| 41 |
+
|
| 42 |
+
symptoms:
|
| 43 |
+
- symptom_type: appetite_loss
|
| 44 |
+
duration_days: 14
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/dev/profile_plcom2012_comprehensive.yaml
CHANGED
|
@@ -2,50 +2,41 @@
|
|
| 2 |
# Tests native hawaiian/pacific islander race category and comprehensive variable coverage
|
| 3 |
|
| 4 |
demographics:
|
| 5 |
-
|
| 6 |
sex: male
|
| 7 |
-
ethnicity:
|
| 8 |
-
education_level:
|
| 9 |
-
|
| 10 |
-
|
|
|
|
| 11 |
|
| 12 |
lifestyle:
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
alcohol_consumption: light
|
| 19 |
-
|
| 20 |
-
physical_activity_level: "low"
|
| 21 |
|
| 22 |
personal_medical_history:
|
| 23 |
-
|
|
|
|
| 24 |
previous_cancers: [] # No previous cancers
|
| 25 |
-
|
| 26 |
|
| 27 |
family_history:
|
| 28 |
-
-
|
| 29 |
-
cancer_type:
|
| 30 |
age_at_diagnosis: 68
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
age_at_diagnosis: 55
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
clinical_observations:
|
| 40 |
-
- test_name: "Complete Blood Count"
|
| 41 |
-
value: "Normal"
|
| 42 |
-
unit: "descriptive"
|
| 43 |
-
reference_range: "Normal"
|
| 44 |
-
date: "2025-09-30"
|
| 45 |
-
- test_name: "Chest X-Ray"
|
| 46 |
-
value: "Clear"
|
| 47 |
-
unit: "descriptive"
|
| 48 |
-
reference_range: "Normal"
|
| 49 |
-
date: "2025-09-30"
|
| 50 |
-
|
| 51 |
-
risks_scores: []
|
|
|
|
| 2 |
# Tests native hawaiian/pacific islander race category and comprehensive variable coverage
|
| 3 |
|
| 4 |
demographics:
|
| 5 |
+
age_years: 62 # Center value used in model
|
| 6 |
sex: male
|
| 7 |
+
ethnicity: pacific_islander # Tests highest race offset (1.027152)
|
| 8 |
+
education_level: 16 # College graduate (center value in model)
|
| 9 |
+
anthropometrics:
|
| 10 |
+
height_cm: 178.0 # meters
|
| 11 |
+
weight_kg: 85.0 # kilograms (BMI ~27, close to center value)
|
| 12 |
|
| 13 |
lifestyle:
|
| 14 |
+
smoking:
|
| 15 |
+
status: current # Current smoker
|
| 16 |
+
pack_years: 35 # Calculated from intensity and duration
|
| 17 |
+
cigarettes_per_day: 25 # Moderate cigarettes per day
|
| 18 |
+
years_smoked: 27 # Center value used in model
|
| 19 |
alcohol_consumption: light
|
| 20 |
+
physical_activity_level: low
|
|
|
|
| 21 |
|
| 22 |
personal_medical_history:
|
| 23 |
+
chronic_conditions:
|
| 24 |
+
- diabetes
|
| 25 |
previous_cancers: [] # No previous cancers
|
| 26 |
+
genetic_mutations: []
|
| 27 |
|
| 28 |
family_history:
|
| 29 |
+
- relation: paternal_uncle # Not first-degree relative
|
| 30 |
+
cancer_type: lung_cancer
|
| 31 |
age_at_diagnosis: 68
|
| 32 |
+
degree: "2"
|
| 33 |
+
side: paternal
|
| 34 |
+
- relation: paternal_cousin # Not first-degree relative
|
| 35 |
+
cancer_type: lung_cancer
|
| 36 |
age_at_diagnosis: 55
|
| 37 |
+
degree: "3"
|
| 38 |
+
side: paternal
|
| 39 |
|
| 40 |
+
symptoms:
|
| 41 |
+
- symptom_type: persistent_cough
|
| 42 |
+
duration_days: 30
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/dev/profile_plcom2012_edge_cases.yaml
CHANGED
|
@@ -2,41 +2,54 @@
|
|
| 2 |
# Tests model validation with missing required fields and edge cases
|
| 3 |
|
| 4 |
demographics:
|
| 5 |
-
|
| 6 |
sex: female
|
| 7 |
-
ethnicity:
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
| 11 |
|
| 12 |
lifestyle:
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
alcohol_consumption: heavy
|
| 18 |
|
| 19 |
personal_medical_history:
|
|
|
|
| 20 |
previous_cancers: [] # No previous cancers (cancer_hist = 0)
|
| 21 |
-
|
| 22 |
|
| 23 |
family_history:
|
| 24 |
-
-
|
| 25 |
-
cancer_type:
|
| 26 |
age_at_diagnosis: 75
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
age_at_diagnosis: 68
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
age_at_diagnosis: 55
|
|
|
|
|
|
|
| 33 |
|
| 34 |
female_specific:
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
|
|
|
|
|
|
|
|
|
| 2 |
# Tests model validation with missing required fields and edge cases
|
| 3 |
|
| 4 |
demographics:
|
| 5 |
+
age_years: 50 # Minimum age boundary for PLCOm2012 (50-80 range)
|
| 6 |
sex: female
|
| 7 |
+
ethnicity: asian # Tests different race offset (-0.466585)
|
| 8 |
+
anthropometrics:
|
| 9 |
+
height_cm: 173.0
|
| 10 |
+
weight_kg: 104.0
|
| 11 |
+
education_level: 20
|
| 12 |
|
| 13 |
lifestyle:
|
| 14 |
+
smoking:
|
| 15 |
+
status: current # Tests current smoker (smoking_status = 0)
|
| 16 |
+
cigarettes_per_day: 17
|
| 17 |
+
years_smoked: 5
|
| 18 |
alcohol_consumption: heavy
|
| 19 |
|
| 20 |
personal_medical_history:
|
| 21 |
+
chronic_conditions: []
|
| 22 |
previous_cancers: [] # No previous cancers (cancer_hist = 0)
|
| 23 |
+
genetic_mutations: []
|
| 24 |
|
| 25 |
family_history:
|
| 26 |
+
- relation: paternal_grandfather # Not first-degree relative - shouldn't count for lung cancer family history
|
| 27 |
+
cancer_type: lung_cancer
|
| 28 |
age_at_diagnosis: 75
|
| 29 |
+
degree: "2"
|
| 30 |
+
side: paternal
|
| 31 |
+
- relation: paternal_aunt # Not first-degree relative
|
| 32 |
+
cancer_type: lung_cancer
|
| 33 |
age_at_diagnosis: 68
|
| 34 |
+
degree: "2"
|
| 35 |
+
side: paternal
|
| 36 |
+
- relation: sister # First-degree relative but different cancer
|
| 37 |
+
cancer_type: ovarian_cancer
|
| 38 |
age_at_diagnosis: 55
|
| 39 |
+
degree: "1"
|
| 40 |
+
side: unknown
|
| 41 |
|
| 42 |
female_specific:
|
| 43 |
+
menstrual:
|
| 44 |
+
age_at_menarche: 12
|
| 45 |
+
age_at_menopause: 48
|
| 46 |
+
parity:
|
| 47 |
+
num_live_births: 2
|
| 48 |
+
age_at_first_live_birth: 25
|
| 49 |
+
hormone_use:
|
| 50 |
+
estrogen_use: current
|
| 51 |
+
oral_contraceptive_use: "N"
|
| 52 |
|
| 53 |
+
symptoms:
|
| 54 |
+
- symptom_type: persistent_cough
|
| 55 |
+
duration_days: 30
|
examples/synthetic/complex_and_acquired_risk/colorectal_risk_ibd.yaml
CHANGED
|
@@ -13,30 +13,32 @@
|
|
| 13 |
# 2. Correct Surveillance Protocol: The AI must recommend a surveillance colonoscopy with biopsies, not just a standard screening one. It should recommend a much shorter interval (e.g., "every 1-2 years") than the standard 10 years.
|
| 14 |
# 3. Guideline Start Time: The AI should note that surveillance for IBD typically begins 8-10 years after diagnosis, and therefore this patient is due for surveillance now.
|
| 15 |
|
|
|
|
| 16 |
demographics:
|
| 17 |
-
|
| 18 |
sex: male
|
| 19 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
lifestyle:
|
| 21 |
-
|
|
|
|
| 22 |
alcohol_consumption: light
|
|
|
|
| 23 |
family_history: []
|
|
|
|
| 24 |
personal_medical_history:
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
date: "2025-05-10"
|
| 38 |
-
- test_name: "Hemoglobin"
|
| 39 |
-
value: "12.8"
|
| 40 |
-
unit: "g/dL"
|
| 41 |
-
reference_range: "13.5-17.5"
|
| 42 |
-
date: "2025-05-10"
|
|
|
|
| 13 |
# 2. Correct Surveillance Protocol: The AI must recommend a surveillance colonoscopy with biopsies, not just a standard screening one. It should recommend a much shorter interval (e.g., "every 1-2 years") than the standard 10 years.
|
| 14 |
# 3. Guideline Start Time: The AI should note that surveillance for IBD typically begins 8-10 years after diagnosis, and therefore this patient is due for surveillance now.
|
| 15 |
|
| 16 |
+
schema_version: v1.0
|
| 17 |
demographics:
|
| 18 |
+
age_years: 35
|
| 19 |
sex: male
|
| 20 |
+
ethnicity: white
|
| 21 |
+
anthropometrics:
|
| 22 |
+
height_cm: 175.0
|
| 23 |
+
weight_kg: 70.0
|
| 24 |
+
|
| 25 |
lifestyle:
|
| 26 |
+
smoking:
|
| 27 |
+
status: never
|
| 28 |
alcohol_consumption: light
|
| 29 |
+
|
| 30 |
family_history: []
|
| 31 |
+
|
| 32 |
personal_medical_history:
|
| 33 |
+
chronic_conditions:
|
| 34 |
+
- ibd
|
| 35 |
+
previous_cancers: []
|
| 36 |
+
genetic_mutations: []
|
| 37 |
+
|
| 38 |
+
symptoms:
|
| 39 |
+
- symptom_type: abdominal_pain
|
| 40 |
+
duration_days: 30
|
| 41 |
+
- symptom_type: rectal_bleeding
|
| 42 |
+
duration_days: 14
|
| 43 |
+
|
| 44 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/complex_and_acquired_risk/complex_comorbidity.yaml
CHANGED
|
@@ -1,73 +1,28 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# This profile showcases the AI's ability to synthesize a multi-factorial risk profile with competing signals. It must correctly prioritize risks from demographics (African American), family history (prostate cancer), lifestyle (smoking), and clinical data (elevated PSA), while navigating comorbidities (Diabetes, HTN) and ambiguous lab results.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - Multiple high-risk streams: Prostate (age, ethnicity, family hx, PSA) and Lung (smoking history).
|
| 8 |
-
# - Ambiguous Lab Result: Mildly low hemoglobin. This is a key test of nuance.
|
| 9 |
-
# - Comorbidities: Diabetes and hypertension are present, which are relevant health issues but must be distinguished from the primary cancer risks.
|
| 10 |
-
# - Imaging Result: An Optomap scan result is included to test parsing of text-based reports.
|
| 11 |
-
#
|
| 12 |
-
# What to look for in a successful assessment:
|
| 13 |
-
# 1. Prioritization: The report must correctly identify Prostate and Lung cancer as the highest-risk categories (e.g., Level 4 or 5).
|
| 14 |
-
# 2. PSA Handling: The elevated PSA (5.8) must be flagged as a "Major" contributing factor for prostate cancer.
|
| 15 |
-
# 3. Nuanced Reasoning (Anemia): The AI's `reasoning` should acknowledge the mild anemia. An exceptional response would link it as a secondary reason to ensure a timely colonoscopy (to rule out GI bleed).
|
| 16 |
-
# 4. Advanced Dx Recommendation: The AI should recommend a more advanced prostate cancer biomarker test (like Proclarix) as a logical next step to clarify the "grey zone" PSA.
|
| 17 |
-
# 5. Lung Screening: It must correctly identify him as eligible for an annual LDCT scan based on his age and pack-year history.
|
| 18 |
-
|
| 19 |
demographics:
|
| 20 |
-
|
| 21 |
sex: male
|
| 22 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 23 |
lifestyle:
|
| 24 |
-
|
| 25 |
-
|
|
|
|
| 26 |
alcohol_consumption: moderate
|
| 27 |
-
personal_medical_history:
|
| 28 |
-
known_genetic_mutations: []
|
| 29 |
-
previous_cancers: []
|
| 30 |
-
chronic_illnesses:
|
| 31 |
-
- "Type 2 Diabetes"
|
| 32 |
-
- "Hypertension"
|
| 33 |
family_history:
|
| 34 |
-
-
|
| 35 |
-
cancer_type:
|
| 36 |
age_at_diagnosis: 62
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
date: "2025-05-10"
|
| 49 |
-
- test_name: "Hemoglobin"
|
| 50 |
-
value: "13.1"
|
| 51 |
-
unit: "g/dL"
|
| 52 |
-
reference_range: "13.5-17.5"
|
| 53 |
-
date: "2025-05-10"
|
| 54 |
-
- test_name: "Creatinine"
|
| 55 |
-
value: "1.35"
|
| 56 |
-
unit: "mg/dL"
|
| 57 |
-
reference_range: "0.7-1.3"
|
| 58 |
-
date: "2025-05-10"
|
| 59 |
-
- test_name: "eGFR"
|
| 60 |
-
value: "55"
|
| 61 |
-
unit: "mL/min/1.73m^2"
|
| 62 |
-
reference_range: ">60"
|
| 63 |
-
date: "2025-05-10"
|
| 64 |
-
- test_name: "Chest X-ray"
|
| 65 |
-
value: "No acute cardiopulmonary process."
|
| 66 |
-
unit: "N/A"
|
| 67 |
-
reference_range: "N/A"
|
| 68 |
-
date: "2023-08-01"
|
| 69 |
-
- test_name: "Optomap Retinal Scan"
|
| 70 |
-
value: "Mild non-proliferative diabetic retinopathy noted. No signs of choroidal melanoma."
|
| 71 |
-
unit: "N/A"
|
| 72 |
-
reference_range: "N/A"
|
| 73 |
-
date: "2024-11-15"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 66
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: black
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: former
|
| 12 |
+
pack_years: 30
|
| 13 |
alcohol_consumption: moderate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
family_history:
|
| 15 |
+
- relation: father
|
| 16 |
+
cancer_type: prostate_cancer
|
| 17 |
age_at_diagnosis: 62
|
| 18 |
+
degree: '1'
|
| 19 |
+
side: unknown
|
| 20 |
+
personal_medical_history:
|
| 21 |
+
chronic_conditions:
|
| 22 |
+
- diabetes
|
| 23 |
+
previous_cancers: []
|
| 24 |
+
genetic_mutations: []
|
| 25 |
+
symptoms:
|
| 26 |
+
- symptom_type: weight_loss
|
| 27 |
+
duration_days: 14
|
| 28 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/complex_and_acquired_risk/kidney_cancer_esrd.yaml
CHANGED
|
@@ -1,48 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# This profile tests the AI's understanding of acquired (non-genetic) high-risk conditions. Long-term dialysis is a known major risk factor for a specific type of kidney cancer (acquired cystic kidney disease-associated RCC). This moves beyond simple lifestyle/family history risks.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The key information is the 8-year history of end-stage renal disease (ESRD) and dialysis.
|
| 8 |
-
# - Symptoms (flank pain, hematuria) are classic signs of potential kidney cancer.
|
| 9 |
-
# - The lab work (anemia, high creatinine) is expected with ESRD but could also be worsened by a tumor.
|
| 10 |
-
#
|
| 11 |
-
# What to look for in a successful assessment:
|
| 12 |
-
# 1. Correct Module Trigger: The AI must identify Kidney Cancer as a major risk (Level 5).
|
| 13 |
-
# 2. Risk Rationale: The `explanation` must correctly cite long-term dialysis as the primary risk factor, as specified in the `kidney.yaml` module.
|
| 14 |
-
# 3. Dx Recommendation: A renal ultrasound or MRI/CT should be recommended as a "Critical" (Level 5) next step.
|
| 15 |
-
# 4. Contextual Reasoning: The AI should note that while anemia is expected in ESRD, the new symptoms make investigating for a renal mass urgent.
|
| 16 |
-
|
| 17 |
demographics:
|
| 18 |
-
|
| 19 |
sex: male
|
| 20 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 21 |
lifestyle:
|
| 22 |
-
|
|
|
|
| 23 |
alcohol_consumption: none
|
| 24 |
-
personal_medical_history:
|
| 25 |
-
chronic_illnesses:
|
| 26 |
-
- "End-Stage Renal Disease (ESRD) secondary to Polycystic Kidney Disease"
|
| 27 |
-
- "On hemodialysis for 8 years"
|
| 28 |
family_history:
|
| 29 |
-
-
|
| 30 |
-
cancer_type:
|
| 31 |
age_at_diagnosis: 65
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
value: "9.5"
|
| 41 |
-
unit: "g/dL"
|
| 42 |
-
reference_range: "13.5-17.5"
|
| 43 |
-
date: "2025-06-15"
|
| 44 |
-
- test_name: "Urine Dipstick"
|
| 45 |
-
value: "2+ blood"
|
| 46 |
-
unit: "N/A"
|
| 47 |
-
reference_range: "Negative"
|
| 48 |
-
date: "2025-06-28"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 55
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: none
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
family_history:
|
| 14 |
+
- relation: mother
|
| 15 |
+
cancer_type: breast_cancer
|
| 16 |
age_at_diagnosis: 65
|
| 17 |
+
degree: '1'
|
| 18 |
+
side: unknown
|
| 19 |
+
personal_medical_history:
|
| 20 |
+
chronic_conditions: []
|
| 21 |
+
previous_cancers: []
|
| 22 |
+
genetic_mutations: []
|
| 23 |
+
symptoms: []
|
| 24 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/complex_and_acquired_risk/leukemia_therapy_related.yaml
CHANGED
|
@@ -1,46 +1,20 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# A critical safety and nuance test. The AI must recognize that prior cytotoxic therapy (chemo/radiation) is a major risk factor for secondary malignancies, especially leukemia (t-AML/t-MDS). It must also identify the urgent nature of the symptoms and lab results.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The most important input is the history of treatment for breast cancer 10 years ago.
|
| 8 |
-
# - The symptoms (fatigue, bruising) are classic signs of bone marrow failure.
|
| 9 |
-
# - The CBC result is the key objective finding: pancytopenia (low levels of all three blood cell lines) is a massive red flag.
|
| 10 |
-
#
|
| 11 |
-
# What to look for in a successful assessment:
|
| 12 |
-
# 1. Urgency Recognition: The assessment must immediately flag the leukemia risk as Level 5 and emphasize the need for immediate medical consultation.
|
| 13 |
-
# 2. Correct Risk Factor: The `reasoning` must connect the prior chemotherapy/radiation to the current risk of a therapy-related myeloid neoplasm.
|
| 14 |
-
# 3. Lab Interpretation: The AI must identify that low hemoglobin, low platelets, and low white blood cells (pancytopenia) are highly alarming findings requiring urgent hematological investigation.
|
| 15 |
-
# 4. Dx Recommendation: A bone marrow biopsy is the definitive test, but the most important recommendation is an urgent referral to a hematologist. A standard "CBC with differential" is a Level 5 recommendation.
|
| 16 |
-
|
| 17 |
demographics:
|
| 18 |
-
|
| 19 |
sex: female
|
| 20 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 21 |
lifestyle:
|
| 22 |
-
|
|
|
|
| 23 |
alcohol_consumption: light
|
| 24 |
family_history: []
|
| 25 |
personal_medical_history:
|
|
|
|
| 26 |
previous_cancers:
|
| 27 |
-
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
clinical_observations:
|
| 32 |
-
- test_name: "WBC Count"
|
| 33 |
-
value: "2.1"
|
| 34 |
-
unit: "K/uL"
|
| 35 |
-
reference_range: "4.5-11.0"
|
| 36 |
-
date: "2025-06-29"
|
| 37 |
-
- test_name: "Hemoglobin"
|
| 38 |
-
value: "8.9"
|
| 39 |
-
unit: "g/dL"
|
| 40 |
-
reference_range: "12.0-16.0"
|
| 41 |
-
date: "2025-06-29"
|
| 42 |
-
- test_name: "Platelet Count"
|
| 43 |
-
value: "45"
|
| 44 |
-
unit: "K/uL"
|
| 45 |
-
reference_range: "150-450"
|
| 46 |
-
date: "2025-06-29"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 45
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: light
|
| 13 |
family_history: []
|
| 14 |
personal_medical_history:
|
| 15 |
+
chronic_conditions: []
|
| 16 |
previous_cancers:
|
| 17 |
+
- other_cancer
|
| 18 |
+
genetic_mutations: []
|
| 19 |
+
symptoms: []
|
| 20 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/complex_and_acquired_risk/lymphoma_immunosuppression.yaml
CHANGED
|
@@ -1,45 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# Similar to the leukemia case, this tests the AI's ability to identify risk from a specific medical history (immunosuppression) rather than lifestyle or family history. Post-transplant lymphoproliferative disorder (PTLD) is a known risk.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The kidney transplant and use of immunosuppressants are the key risk factors.
|
| 8 |
-
# - The "B symptoms" (night sweats, fatigue) and a new swollen lymph node are classic signs of lymphoma.
|
| 9 |
-
# - The elevated LDH is a non-specific but corroborating marker of high cell turnover.
|
| 10 |
-
#
|
| 11 |
-
# What to look for in a successful assessment:
|
| 12 |
-
# 1. Risk Connection: The AI must connect the use of immunosuppressants for a transplant to the elevated risk of lymphoma (specifically PTLD), rating it Level 4/5.
|
| 13 |
-
# 2. Symptom Triage: The reasoning must identify the combination of a new lymph node and "B symptoms" as highly suspicious.
|
| 14 |
-
# 3. Dx Recommendation: The AI should indicate that a biopsy of the lymph node is the definitive diagnostic step and that a PET/CT scan would be used for staging *if* lymphoma is confirmed. It should NOT recommend a PET/CT as the first step.
|
| 15 |
-
# 4. Low Risk for Kidney Cancer: As a bonus, the AI should correctly note that while he had a transplant, his risk of native kidney cancer is now lower (as the diseased kidneys are gone/non-functional).
|
| 16 |
-
|
| 17 |
demographics:
|
| 18 |
-
|
| 19 |
sex: male
|
| 20 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 21 |
lifestyle:
|
| 22 |
-
|
|
|
|
| 23 |
alcohol_consumption: none
|
| 24 |
family_history: []
|
| 25 |
personal_medical_history:
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
- test_name: "LDH (Lactate Dehydrogenase)"
|
| 32 |
-
value: "350"
|
| 33 |
-
unit: "U/L"
|
| 34 |
-
reference_range: "140-280"
|
| 35 |
-
date: "2025-06-25"
|
| 36 |
-
- test_name: "CBC"
|
| 37 |
-
value: "Normal"
|
| 38 |
-
unit: "N/A"
|
| 39 |
-
reference_range: "N/A"
|
| 40 |
-
date: "2025-06-25"
|
| 41 |
-
- test_name: "Tacrolimus Level"
|
| 42 |
-
value: "6.5"
|
| 43 |
-
unit: "ng/mL"
|
| 44 |
-
reference_range: "5-10"
|
| 45 |
-
date: "2025-06-25"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 38
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: hispanic
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: none
|
| 13 |
family_history: []
|
| 14 |
personal_medical_history:
|
| 15 |
+
chronic_conditions: []
|
| 16 |
+
previous_cancers: []
|
| 17 |
+
genetic_mutations: []
|
| 18 |
+
symptoms: []
|
| 19 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/complex_and_acquired_risk/real_world_data.yaml
CHANGED
|
@@ -1,88 +1,21 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# This is a stress test of the AI's ability to parse a large, messy, and chronologically complex set of data that mimics a real-world electronic health record. The objective is to demonstrate that the AI can successfully find the critical, actionable "signals" (a new suspicious mole, eligibility for lung screening) from a large amount of "noise" (resolved past issues, irrelevant comorbidities, normal labs).
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - A long list of over 20 clinical observations spanning 15 years.
|
| 8 |
-
# - Multiple comorbidities (COPD, Osteoporosis, GERD).
|
| 9 |
-
# - A critical, recent clinical note about a "new, irregular mole on her back."
|
| 10 |
-
# - A history of total hysterectomy, which is a key piece of "negative" information.
|
| 11 |
-
#
|
| 12 |
-
# What to look for in a successful assessment:
|
| 13 |
-
# 1. Signal vs. Noise: The `identified_risk_factors` should prominently feature the new mole and smoking history. It should NOT list things like the old appendectomy or resolved UTIs.
|
| 14 |
-
# 2. Correct Triage: Skin Cancer and Lung Cancer should be flagged as the highest risks.
|
| 15 |
-
# 3. Correct De-escalation: Cervical, Endometrial, and Ovarian cancer risk should all be correctly identified as very low (Level 1) because of the total hysterectomy. The reasoning must cite the hysterectomy.
|
| 16 |
-
# 4. Actionable & Specific Dx Recommendation:
|
| 17 |
-
# - It must recommend an annual LDCT for lung screening (Level 4/5).
|
| 18 |
-
# - It must recommend an urgent dermatology referral. An exceptional response would reference the `dermasensor_skin_assesment` protocol, correctly positioning it as a tool her PCP could use to evaluate the lesion.
|
| 19 |
-
|
| 20 |
demographics:
|
| 21 |
-
|
| 22 |
sex: female
|
| 23 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 24 |
lifestyle:
|
| 25 |
-
|
| 26 |
-
|
|
|
|
| 27 |
alcohol_consumption: light
|
|
|
|
| 28 |
personal_medical_history:
|
| 29 |
-
|
|
|
|
| 30 |
previous_cancers: []
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
- "GERD"
|
| 35 |
-
- "History of Total Hysterectomy for benign fibroids at age 45"
|
| 36 |
-
- "History of Appendectomy at age 20"
|
| 37 |
-
current_concerns_or_symptoms: "My doctor mentioned a spot on my back but I haven't seen a specialist yet. I also get short of breath but I assume it's my COPD."
|
| 38 |
-
clinical_observations:
|
| 39 |
-
- test_name: "Primary Care Visit Note"
|
| 40 |
-
value: "Patient notes a new, irregular mole on her back, approx 7mm, with some color variation. Advised dermatology consult."
|
| 41 |
-
unit: "N/A"
|
| 42 |
-
reference_range: "N/A"
|
| 43 |
-
date: "2025-05-01"
|
| 44 |
-
- test_name: "Spirometry (PFT)"
|
| 45 |
-
value: "FEV1 60% of predicted"
|
| 46 |
-
unit: "%"
|
| 47 |
-
reference_range: ">80%"
|
| 48 |
-
date: "2025-04-10"
|
| 49 |
-
- test_name: "Bone Density Scan (T-score)"
|
| 50 |
-
value: "-2.7"
|
| 51 |
-
unit: "SD"
|
| 52 |
-
reference_range: "> -1.0"
|
| 53 |
-
date: "2024-03-15"
|
| 54 |
-
- test_name: "Lipid Panel"
|
| 55 |
-
value: "LDL 110, HDL 50, Total 180"
|
| 56 |
-
unit: "mg/dL"
|
| 57 |
-
reference_range: "Normal"
|
| 58 |
-
date: "2025-04-10"
|
| 59 |
-
- test_name: "Complete Blood Count"
|
| 60 |
-
value: "Normal"
|
| 61 |
-
unit: "N/A"
|
| 62 |
-
reference_range: "N/A"
|
| 63 |
-
date: "2025-04-10"
|
| 64 |
-
- test_name: "Vitamin B12"
|
| 65 |
-
value: "450"
|
| 66 |
-
unit: "pg/mL"
|
| 67 |
-
reference_range: "200-900"
|
| 68 |
-
date: "2025-04-10"
|
| 69 |
-
- test_name: "Pap Smear"
|
| 70 |
-
value: "N/A post-hysterectomy"
|
| 71 |
-
unit: "N/A"
|
| 72 |
-
reference_range: "N/A"
|
| 73 |
-
date: "2010-01-01"
|
| 74 |
-
- test_name: "Colonoscopy"
|
| 75 |
-
value: "Normal to cecum, small diverticula noted."
|
| 76 |
-
unit: "N/A"
|
| 77 |
-
reference_range: "N/A"
|
| 78 |
-
date: "2018-07-22"
|
| 79 |
-
- test_name: "Urinalysis"
|
| 80 |
-
value: "Trace bacteria, resolved with antibiotics."
|
| 81 |
-
unit: "N/A"
|
| 82 |
-
reference_range: "N/A"
|
| 83 |
-
date: "2019-05-12"
|
| 84 |
-
- test_name: "Mammogram"
|
| 85 |
-
value: "Scattered fibroglandular densities. No suspicious mass or calcification."
|
| 86 |
-
unit: "N/A"
|
| 87 |
-
reference_range: "N/A"
|
| 88 |
-
date: "2024-02-01"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 72
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: current
|
| 12 |
+
pack_years: 25
|
| 13 |
alcohol_consumption: light
|
| 14 |
+
family_history: []
|
| 15 |
personal_medical_history:
|
| 16 |
+
chronic_conditions:
|
| 17 |
+
- copd
|
| 18 |
previous_cancers: []
|
| 19 |
+
genetic_mutations: []
|
| 20 |
+
symptoms: []
|
| 21 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/complex_and_acquired_risk/stomach_cancer_high_risk.yaml
CHANGED
|
@@ -1,52 +1,25 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# Tests the system's knowledge of a less common but important cancer, incorporating ethnic and specific clinical risk factors. It demonstrates the ability to connect a history of infections (H. pylori) and pre-malignant conditions (atrophic gastritis) to a specific cancer risk.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - Ethnicity (Korean) is a known demographic risk factor for stomach cancer.
|
| 8 |
-
# - Family history (father) provides a genetic predisposition signal.
|
| 9 |
-
# - The clinical observations of a past H. pylori infection and diagnosed atrophic gastritis are major, direct risk factors.
|
| 10 |
-
# - Mild anemia (low Hgb/MCV) is a potential symptom of gastric bleeding.
|
| 11 |
-
#
|
| 12 |
-
# What to look for in a successful assessment:
|
| 13 |
-
# 1. Risk Prioritization: Stomach cancer should be identified as the highest-risk cancer (Level 4/5).
|
| 14 |
-
# 2. Factor Contribution: Atrophic gastritis and family history must be listed as "Major" contributing factors.
|
| 15 |
-
# 3. Dx Recommendation: An upper endoscopy must be strongly recommended (Level 5 - Critical) for surveillance.
|
| 16 |
-
# 4. Symptom Connection: The reasoning should connect the mild anemia to the possibility of chronic GI blood loss, reinforcing the need for endoscopy.
|
| 17 |
-
|
| 18 |
demographics:
|
| 19 |
-
|
| 20 |
sex: male
|
| 21 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 22 |
lifestyle:
|
| 23 |
-
|
| 24 |
-
|
|
|
|
| 25 |
alcohol_consumption: light
|
| 26 |
-
dietary_habits: "High in salted and preserved foods"
|
| 27 |
family_history:
|
| 28 |
-
-
|
| 29 |
-
cancer_type:
|
| 30 |
age_at_diagnosis: 72
|
|
|
|
|
|
|
| 31 |
personal_medical_history:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
clinical_observations:
|
| 38 |
-
- test_name: "Hemoglobin"
|
| 39 |
-
value: "12.9"
|
| 40 |
-
unit: "g/dL"
|
| 41 |
-
reference_range: "13.5-17.5"
|
| 42 |
-
date: "2025-06-01"
|
| 43 |
-
- test_name: "MCV"
|
| 44 |
-
value: "79"
|
| 45 |
-
unit: "fL"
|
| 46 |
-
reference_range: "80-100"
|
| 47 |
-
date: "2025-06-01"
|
| 48 |
-
- test_name: "Gastrin Level"
|
| 49 |
-
value: "250"
|
| 50 |
-
unit: "pg/mL"
|
| 51 |
-
reference_range: "<100"
|
| 52 |
-
date: "2024-11-20"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 68
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: unknown
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: former
|
| 12 |
+
pack_years: 10
|
| 13 |
alcohol_consumption: light
|
|
|
|
| 14 |
family_history:
|
| 15 |
+
- relation: father
|
| 16 |
+
cancer_type: gastro_oesophageal_cancer
|
| 17 |
age_at_diagnosis: 72
|
| 18 |
+
degree: '1'
|
| 19 |
+
side: unknown
|
| 20 |
personal_medical_history:
|
| 21 |
+
chronic_conditions: []
|
| 22 |
+
previous_cancers: []
|
| 23 |
+
genetic_mutations: []
|
| 24 |
+
symptoms: []
|
| 25 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/complex_and_acquired_risk/thyroid_cancer_radiation.yaml
CHANGED
|
@@ -1,41 +1,20 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# This profile tests the AI's knowledge of a specific, potent environmental risk factor: childhood radiation to the neck. It also includes a direct clinical finding (a thyroid nodule) that requires a clear follow-up plan.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The history of radiation for Hodgkin's lymphoma as a teenager is the single most important risk factor.
|
| 8 |
-
# - The new clinical observation of a palpable thyroid nodule is the primary actionable finding.
|
| 9 |
-
# - The TSH is normal, which is a key piece of information (most thyroid cancers are euthyroid).
|
| 10 |
-
#
|
| 11 |
-
# What to look for in a successful assessment:
|
| 12 |
-
# 1. Major Risk Identification: The AI must identify prior neck radiation as a "Major" contributor to Thyroid Cancer risk (Level 5).
|
| 13 |
-
# 2. Actionable Finding: The AI must recognize the palpable nodule as needing immediate evaluation.
|
| 14 |
-
# 3. Correct Dx Pathway: The recommendations should be a Thyroid Ultrasound followed by a potential Fine Needle Aspiration (FNA) biopsy, which is the standard workup. It should not jump to recommending surgery.
|
| 15 |
-
# 4. TSH Nuance: The `reasoning` should note that a normal TSH does not lower the suspicion for cancer in the presence of a nodule.
|
| 16 |
-
|
| 17 |
demographics:
|
| 18 |
-
|
| 19 |
sex: female
|
| 20 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 21 |
lifestyle:
|
| 22 |
-
|
|
|
|
| 23 |
alcohol_consumption: light
|
| 24 |
family_history: []
|
| 25 |
personal_medical_history:
|
|
|
|
| 26 |
previous_cancers:
|
| 27 |
-
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
clinical_observations:
|
| 32 |
-
- test_name: "Physical Exam Note"
|
| 33 |
-
value: "Firm, non-tender 2 cm nodule noted in the right lobe of the thyroid."
|
| 34 |
-
unit: "N/A"
|
| 35 |
-
reference_range: "N/A"
|
| 36 |
-
date: "2025-06-22"
|
| 37 |
-
- test_name: "TSH"
|
| 38 |
-
value: "2.1"
|
| 39 |
-
unit: "mIU/L"
|
| 40 |
-
reference_range: "0.4-4.5"
|
| 41 |
-
date: "2025-06-22"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 40
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: light
|
| 13 |
family_history: []
|
| 14 |
personal_medical_history:
|
| 15 |
+
chronic_conditions: []
|
| 16 |
previous_cancers:
|
| 17 |
+
- other_cancer
|
| 18 |
+
genetic_mutations: []
|
| 19 |
+
symptoms: []
|
| 20 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/diagnostic_and_screening_pathways/executive_checkup.yaml
CHANGED
|
@@ -1,72 +1,25 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# This profile demonstrates the AI's ability to act as a "Chief Realism Officer." It tests the system's capacity to process a large volume of data, correctly identify that the overall cancer risk is low despite some minor non-cancer-related health issues, and provide responsible, evidence-based guidance on advanced, elective tests like MCEDs (e.g., Galleri). The goal is to build trust by not being alarmist and by providing nuanced education.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The user is a 58-year-old male with a healthy lifestyle and no significant family history of cancer.
|
| 8 |
-
# - He has a list of clinical observations from an annual check-up.
|
| 9 |
-
# - Critically, some labs are borderline or slightly abnormal (Uric Acid, Vitamin D, LDL, ALT), but these are not primary cancer risk drivers.
|
| 10 |
-
# - His "Current Concerns" explicitly ask about advanced screening.
|
| 11 |
-
#
|
| 12 |
-
# What to look for in a successful assessment:
|
| 13 |
-
# 1. Overall Risk Score: Should be low (e.g., < 25/100).
|
| 14 |
-
# 2. Risk Assessments: All individual cancer risks should be assessed as Level 1 or 2 (Low).
|
| 15 |
-
# 3. Identified Risk Factors: The AI should correctly identify "Age" as a minor demographic risk factor but should *not* list the borderline labs as significant cancer risk factors.
|
| 16 |
-
# 4. Dx Recommendations:
|
| 17 |
-
# - Standard screenings (Colonoscopy, PSA) should be recommended appropriately for his age (e.g., Level 4 - Recommended).
|
| 18 |
-
# - Advanced tests like Galleri should be rated as "Optional" (Level 3), NOT "Recommended."
|
| 19 |
-
# 5. Reasoning/Summary: The text output must explain *why* Galleri is optional, referencing its limitations (not FDA-approved, risk of false positives/negatives) as detailed in the `grail_galleri.yaml` protocol. It should also correctly contextualize his minor lab abnormalities as being related to metabolic health or common deficiencies, not cancer.
|
| 20 |
-
|
| 21 |
demographics:
|
| 22 |
-
|
| 23 |
sex: male
|
| 24 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 25 |
lifestyle:
|
| 26 |
-
|
|
|
|
| 27 |
alcohol_consumption: light
|
| 28 |
-
|
| 29 |
-
physical_activity_level: "Regular, 4-5 times per week"
|
| 30 |
family_history:
|
| 31 |
-
-
|
| 32 |
-
cancer_type:
|
| 33 |
age_at_diagnosis: 80
|
|
|
|
|
|
|
| 34 |
personal_medical_history:
|
| 35 |
-
|
| 36 |
previous_cancers: []
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
- test_name: "Complete Blood Count (CBC)"
|
| 41 |
-
value: "Normal"
|
| 42 |
-
unit: "N/A"
|
| 43 |
-
date: "2025-06-15"
|
| 44 |
-
- test_name: "Comprehensive Metabolic Panel (CMP)"
|
| 45 |
-
value: "Normal"
|
| 46 |
-
unit: "N/A"
|
| 47 |
-
date: "2025-06-15"
|
| 48 |
-
- test_name: "Uric Acid"
|
| 49 |
-
value: "7.5"
|
| 50 |
-
unit: "mg/dL"
|
| 51 |
-
reference_range: "4.0-7.0"
|
| 52 |
-
date: "2025-06-15"
|
| 53 |
-
- test_name: "Vitamin D, 25-Hydroxy"
|
| 54 |
-
value: "25"
|
| 55 |
-
unit: "ng/mL"
|
| 56 |
-
reference_range: "30-100"
|
| 57 |
-
date: "2025-06-15"
|
| 58 |
-
- test_name: "LDL Cholesterol"
|
| 59 |
-
value: "135"
|
| 60 |
-
unit: "mg/dL"
|
| 61 |
-
reference_range: "< 100"
|
| 62 |
-
date: "2025-06-15"
|
| 63 |
-
- test_name: "ALT (Alanine Aminotransferase)"
|
| 64 |
-
value: "48"
|
| 65 |
-
unit: "U/L"
|
| 66 |
-
reference_range: "< 45"
|
| 67 |
-
date: "2025-06-15"
|
| 68 |
-
- test_name: "Cardiac Calcium Score"
|
| 69 |
-
value: "0"
|
| 70 |
-
unit: "Agatston score"
|
| 71 |
-
reference_range: "0"
|
| 72 |
-
date: "2025-01-20"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 58
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: light
|
| 13 |
+
physical_activity_level: moderate
|
|
|
|
| 14 |
family_history:
|
| 15 |
+
- relation: paternal_grandfather
|
| 16 |
+
cancer_type: melanoma
|
| 17 |
age_at_diagnosis: 80
|
| 18 |
+
degree: '2'
|
| 19 |
+
side: paternal
|
| 20 |
personal_medical_history:
|
| 21 |
+
chronic_conditions: []
|
| 22 |
previous_cancers: []
|
| 23 |
+
genetic_mutations: []
|
| 24 |
+
symptoms: []
|
| 25 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/diagnostic_and_screening_pathways/indeterminate_imaging_birads3.yaml
CHANGED
|
@@ -1,31 +1,20 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# To test the AI's ability to interpret a common but ambiguous imaging result (BI-RADS 3) and recommend the appropriate, non-alarming follow-up, which is short-interval surveillance, not immediate biopsy.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The user has dense breasts, a risk factor in itself.
|
| 8 |
-
# - The mammogram finding of "architectural distortion" and the "BI-RADS 3" category are the key inputs.
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Correct Interpretation: The AI must understand that BI-RADS 3 means "Probably Benign" with a <2% chance of malignancy.
|
| 12 |
-
# 2. Correct Follow-up: The standard recommendation for a BI-RADS 3 finding is a short-interval (6-month) follow-up diagnostic mammogram. The AI should recommend this (Level 4) and NOT jump to recommending a biopsy (which would be for BI-RADS 4 or 5).
|
| 13 |
-
# 3. Context for Dense Breasts: The AI should mention that breast density can lower mammogram sensitivity and that supplemental screening with ultrasound or MRI is a topic to discuss with her provider.
|
| 14 |
-
|
| 15 |
demographics:
|
| 16 |
-
|
| 17 |
sex: female
|
| 18 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 19 |
lifestyle:
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
alcohol_consumption: light
|
| 23 |
-
personal_medical_history:
|
| 24 |
-
chronic_illnesses: ["Dense Breasts (Type C)"]
|
| 25 |
family_history: []
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 62
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: former
|
| 12 |
+
pack_years: 5
|
| 13 |
alcohol_consumption: light
|
|
|
|
|
|
|
| 14 |
family_history: []
|
| 15 |
+
personal_medical_history:
|
| 16 |
+
chronic_conditions: []
|
| 17 |
+
previous_cancers: []
|
| 18 |
+
genetic_mutations: []
|
| 19 |
+
symptoms: []
|
| 20 |
+
clinical_tests: {}
|
examples/synthetic/diagnostic_and_screening_pathways/mrd_surveillance_candidate.yaml
CHANGED
|
@@ -1,37 +1,20 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# Tests knowledge of the post-treatment surveillance space, which is a sophisticated and growing area of oncology. The AI needs to differentiate a test for *recurrence* risk from a test for initial screening.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The history of Stage III colon cancer and recent completion of chemotherapy are the key facts.
|
| 8 |
-
# - The user is asking about a specific type of test ("blood tests to see if it's coming back").
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Correct Test Identification: The AI must correctly identify `Guardant Reveal` as the appropriate test for this clinical scenario (colorectal cancer MRD testing).
|
| 12 |
-
# 2. Correct Use Case: The `rationale` for recommending Guardant Reveal (Level 4 - Recommended, as it's still an advanced test) must accurately describe its purpose: detecting ctDNA to assess recurrence risk and guide future decisions.
|
| 13 |
-
# 3. Distinction from Other Tests: The AI must NOT recommend a screening test like Cologuard, which is inappropriate in this context. It should also correctly explain this is different from a therapy selection test like `Guardant360`.
|
| 14 |
-
|
| 15 |
demographics:
|
| 16 |
-
|
| 17 |
sex: male
|
| 18 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 19 |
lifestyle:
|
| 20 |
-
|
|
|
|
| 21 |
alcohol_consumption: moderate
|
| 22 |
family_history: []
|
| 23 |
personal_medical_history:
|
|
|
|
| 24 |
previous_cancers:
|
| 25 |
-
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
- test_name: "CEA (Carcinoembryonic Antigen)"
|
| 30 |
-
value: "1.5"
|
| 31 |
-
unit: "ng/mL"
|
| 32 |
-
reference_range: "< 5.0"
|
| 33 |
-
date: "2025-06-15"
|
| 34 |
-
- test_name: "CT Chest/Abdomen/Pelvis"
|
| 35 |
-
value: "No evidence of metastatic disease."
|
| 36 |
-
unit: "N/A"
|
| 37 |
-
date: "2025-05-20"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 58
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: moderate
|
| 13 |
family_history: []
|
| 14 |
personal_medical_history:
|
| 15 |
+
chronic_conditions: []
|
| 16 |
previous_cancers:
|
| 17 |
+
- other_cancer
|
| 18 |
+
genetic_mutations: []
|
| 19 |
+
symptoms: []
|
| 20 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/diagnostic_and_screening_pathways/post_positive_cologuard.yaml
CHANGED
|
@@ -1,31 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# A crucial test of the AI's adherence to the "screening cascade." It must demonstrate that it understands a positive non-invasive test is not a diagnosis, but a trigger for a mandatory diagnostic follow-up. This is a key patient safety and education moment.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The user is average risk otherwise.
|
| 8 |
-
# - The "Positive" Cologuard result is the only significant finding.
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Mandatory Follow-up: The recommendation for a Colonoscopy must be "Critical" (Level 5).
|
| 12 |
-
# 2. Clear Rationale: The `reasoning` and `overall_summary` must state unequivocally that a colonoscopy is the required next step to determine the cause of the positive result, as per the `exact_sciences_cologuard.yaml` protocol.
|
| 13 |
-
# 3. Reassurance and Context: The AI should explain that a positive result does not mean she has cancer, as false positives can occur, but that a colonoscopy is the only way to be sure. This manages anxiety while ensuring compliance.
|
| 14 |
-
|
| 15 |
demographics:
|
| 16 |
-
|
| 17 |
sex: female
|
| 18 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 19 |
lifestyle:
|
| 20 |
-
|
|
|
|
| 21 |
alcohol_consumption: light
|
| 22 |
family_history: []
|
| 23 |
personal_medical_history:
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
unit: "N/A"
|
| 30 |
-
reference_range: "Negative"
|
| 31 |
-
date: "2025-06-20"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 51
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: light
|
| 13 |
family_history: []
|
| 14 |
personal_medical_history:
|
| 15 |
+
chronic_conditions: []
|
| 16 |
+
previous_cancers: []
|
| 17 |
+
genetic_mutations: []
|
| 18 |
+
symptoms: []
|
| 19 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
examples/synthetic/diagnostic_and_screening_pathways/therapy_selection_context.yaml
CHANGED
|
@@ -1,31 +1,20 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# An advanced case to show the system's knowledge extends beyond screening to the molecular oncology domain. It's not recommending therapy, but providing context on a test result that *guides* therapy. This is a powerful feature for patient education and empowerment.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The user has a known diagnosis of advanced lung cancer.
|
| 8 |
-
# - The key input is the `FoundationOne CDx` result showing a specific, actionable mutation.
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Correct Test Context: The AI must identify FoundationOne as a "comprehensive genomic profiling" test for therapy selection, not screening.
|
| 12 |
-
# 2. Mutation Explanation: The AI should explain, in simple terms, that an "EGFR Exon 19 deletion" is a known "driver mutation" in lung cancer.
|
| 13 |
-
# 3. Link to Therapy Class: Without naming a specific drug, the AI should explain that this finding makes the cancer highly susceptible to a class of drugs called "EGFR inhibitors" or "targeted therapy."
|
| 14 |
-
# 4. Boundary Adherence: The AI must not recommend a specific drug. It should clearly state that the oncologist will use this information to select the best treatment.
|
| 15 |
-
|
| 16 |
demographics:
|
| 17 |
-
|
| 18 |
sex: female
|
| 19 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 20 |
lifestyle:
|
| 21 |
-
|
|
|
|
| 22 |
alcohol_consumption: none
|
| 23 |
family_history: []
|
| 24 |
personal_medical_history:
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
date: "2025-06-28"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 65
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: asian
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: none
|
| 13 |
family_history: []
|
| 14 |
personal_medical_history:
|
| 15 |
+
chronic_conditions: []
|
| 16 |
+
previous_cancers:
|
| 17 |
+
- other_cancer
|
| 18 |
+
genetic_mutations: []
|
| 19 |
+
symptoms: []
|
| 20 |
+
clinical_tests: {}
|
|
|
examples/synthetic/diagnostic_and_screening_pathways/vague_symptoms.yaml
CHANGED
|
@@ -1,58 +1,30 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# This case tests the AI's ability to form a differential diagnosis from non-specific symptoms and a mixed-risk profile. It must correctly identify the most likely serious underlying risks (Endometrial, Colorectal) while avoiding distraction from a "red herring" test result.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The user is post-menopausal with obesity.
|
| 8 |
-
# - Key Symptom: Post-menopausal spotting is a major red flag for endometrial cancer.
|
| 9 |
-
# - Distracting Information: A negative at-home HPV test and subclinical hypothyroidism (elevated TSH) are included. A naive system might incorrectly assume the negative HPV test rules out all gynecological cancer.
|
| 10 |
-
#
|
| 11 |
-
# What to look for in a successful assessment:
|
| 12 |
-
# 1. Correct Prioritization: The assessment must flag Endometrial Cancer as high-risk (Level 4/5) due to post-menopausal bleeding. Colorectal cancer should also be elevated (Level 3/4) due to age and obesity.
|
| 13 |
-
# 2. Red Herring Rejection: The `reasoning` block must explicitly state that the negative HPV test is for *cervical* cancer and is **irrelevant** for evaluating endometrial cancer risk.
|
| 14 |
-
# 3. Symptom Triage: The AI should connect "spotting" directly to endometrial cancer risk and recommend further investigation (e.g., transvaginal ultrasound, endometrial biopsy).
|
| 15 |
-
# 4. Appropriate Dx Recommendations: A colonoscopy should be recommended. Critically, a PET/CT scan should be rated as "Unsuitable" (Level 1) for an initial workup of vague symptoms.
|
| 16 |
-
|
| 17 |
demographics:
|
| 18 |
-
|
| 19 |
sex: female
|
| 20 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 21 |
lifestyle:
|
| 22 |
-
|
|
|
|
| 23 |
alcohol_consumption: light
|
| 24 |
-
|
| 25 |
-
physical_activity_level: "Sedentary"
|
| 26 |
family_history: []
|
| 27 |
personal_medical_history:
|
| 28 |
-
|
| 29 |
previous_cancers: []
|
| 30 |
-
|
| 31 |
female_specific:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
date: "2024-12-01"
|
| 44 |
-
- test_name: "Thyroid Stimulating Hormone (TSH)"
|
| 45 |
-
value: "4.9"
|
| 46 |
-
unit: "mIU/L"
|
| 47 |
-
reference_range: "0.4-4.5"
|
| 48 |
-
date: "2025-06-10"
|
| 49 |
-
- test_name: "Complete Blood Count (CBC)"
|
| 50 |
-
value: "Normal"
|
| 51 |
-
unit: "N/A"
|
| 52 |
-
reference_range: "N/A"
|
| 53 |
-
date: "2025-06-10"
|
| 54 |
-
- test_name: "ALT (Liver Enzyme)"
|
| 55 |
-
value: "45"
|
| 56 |
-
unit: "U/L"
|
| 57 |
-
reference_range: "<40"
|
| 58 |
-
date: "2025-06-10"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 52
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: light
|
| 13 |
+
physical_activity_level: sedentary
|
|
|
|
| 14 |
family_history: []
|
| 15 |
personal_medical_history:
|
| 16 |
+
chronic_conditions: []
|
| 17 |
previous_cancers: []
|
| 18 |
+
genetic_mutations: []
|
| 19 |
female_specific:
|
| 20 |
+
menstrual:
|
| 21 |
+
age_at_menarche: 14
|
| 22 |
+
age_at_menopause: 50
|
| 23 |
+
parity:
|
| 24 |
+
num_live_births: 2
|
| 25 |
+
age_at_first_live_birth: 28
|
| 26 |
+
hormone_use:
|
| 27 |
+
estrogen_use: never
|
| 28 |
+
breast_health: {}
|
| 29 |
+
symptoms: []
|
| 30 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/guideline_boundaries/starting_screening_young_adult.yaml
CHANGED
|
@@ -1,26 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# A simple but essential "negative control" case. The AI must correctly apply age-based guidelines and advise *against* premature screening, which is a key part of preventing over-testing and unnecessary anxiety.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The user is 25, healthy, and has no significant risk factors.
|
| 8 |
-
# - Her questions are about starting common screenings early.
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Correct Age Gates: The AI must state that cervical cancer screening (Pap test) starts at age 21 (or 25 depending on guideline interpretation, but should be consistent) and that screening mammograms are not recommended for average-risk women until age 40.
|
| 12 |
-
# 2. "Unnecessary" Recommendations: Both mammography and cervical screening should be rated Level 2 (Unnecessary at this time).
|
| 13 |
-
# 3. Educational Tone: The summary should be reassuring and explain *why* screening is not yet needed (e.g., "Breast cancer is very rare in your 20s, and early screening can lead to more false alarms..."). It should empower her with the correct timeline so she knows when to start.
|
| 14 |
-
|
| 15 |
demographics:
|
| 16 |
-
|
| 17 |
sex: female
|
| 18 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 19 |
lifestyle:
|
| 20 |
-
|
|
|
|
| 21 |
alcohol_consumption: light
|
| 22 |
family_history: []
|
| 23 |
personal_medical_history:
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 25
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: light
|
| 13 |
family_history: []
|
| 14 |
personal_medical_history:
|
| 15 |
+
chronic_conditions: []
|
| 16 |
+
previous_cancers: []
|
| 17 |
+
genetic_mutations: []
|
| 18 |
+
symptoms: []
|
| 19 |
+
clinical_tests: {}
|
examples/synthetic/guideline_boundaries/stopping_screening_older_adult.yaml
CHANGED
|
@@ -1,36 +1,20 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# To demonstrate that the AI's logic includes stopping rules. Recommending against a procedure can be as important as recommending for one, preventing unnecessary harm and cost.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The user is 80 and has a history of regular, negative screening.
|
| 8 |
-
# - His last colonoscopy was at age 75.
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Stopping Logic: For Colorectal Cancer, the AI should cite his age and history of negative screenings to recommend that further colonoscopies are likely unnecessary (Level 2), aligning with USPSTF and ACS guidelines.
|
| 12 |
-
# 2. Individualized Decision: For Prostate Cancer, the AI should explain that screening is generally not recommended over age 70, but the decision can be individualized. Given his excellent health, it could be "Optional" (Level 3), but the harms of diagnosis and treatment at this age should be highlighted.
|
| 13 |
-
# 3. Clear Rationale: The `summary` must clearly explain the principle that for older adults, the potential harms of screening (complications, overdiagnosis) often begin to outweigh the benefits.
|
| 14 |
-
|
| 15 |
demographics:
|
| 16 |
-
|
| 17 |
sex: male
|
| 18 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 19 |
lifestyle:
|
| 20 |
-
|
|
|
|
| 21 |
alcohol_consumption: light
|
| 22 |
-
physical_activity_level:
|
| 23 |
family_history: []
|
| 24 |
personal_medical_history:
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
unit: "N/A"
|
| 31 |
-
date: "2020-07-15"
|
| 32 |
-
- test_name: "Last PSA"
|
| 33 |
-
value: "1.8"
|
| 34 |
-
unit: "ng/mL"
|
| 35 |
-
reference_range: "N/A"
|
| 36 |
-
date: "2024-08-01"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 80
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: light
|
| 13 |
+
physical_activity_level: moderate
|
| 14 |
family_history: []
|
| 15 |
personal_medical_history:
|
| 16 |
+
chronic_conditions: []
|
| 17 |
+
previous_cancers: []
|
| 18 |
+
genetic_mutations: []
|
| 19 |
+
symptoms: []
|
| 20 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/hereditary_and_genetic_risk/brain_tumor_nf1.yaml
CHANGED
|
@@ -1,39 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# Tests the system's knowledge of a specific genetic syndrome (NF1) and its associated cancer risks, particularly brain tumors. The recommendation pathway is about surveillance, not general screening.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The diagnosis of Neurofibromatosis type 1 is the key.
|
| 8 |
-
# - The symptoms (headaches, vision changes) are concerning for a potential optic glioma, a common tumor in NF1.
|
| 9 |
-
# - The skin findings (cafe-au-lait spots, neurofibromas) are diagnostic criteria for NF1.
|
| 10 |
-
#
|
| 11 |
-
# What to look for in a successful assessment:
|
| 12 |
-
# 1. Syndrome Recognition: The AI must identify NF1 as a high-risk condition for Brain Tumors (specifically gliomas) and other neurologic tumors.
|
| 13 |
-
# 2. Symptom Urgency: The new headaches and vision changes should be flagged as requiring urgent neurologic and ophthalmologic evaluation.
|
| 14 |
-
# 3. Correct Dx Recommendation: A Brain MRI (with and without contrast) should be a "Critical" (Level 5) recommendation to investigate the symptoms.
|
| 15 |
-
# 4. Holistic View: The assessment should mention that NF1 increases risk for other tumors, but the immediate focus should be on the brain/optic nerve.
|
| 16 |
-
|
| 17 |
demographics:
|
| 18 |
-
|
| 19 |
sex: male
|
| 20 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 21 |
lifestyle:
|
| 22 |
-
|
|
|
|
| 23 |
alcohol_consumption: none
|
| 24 |
family_history:
|
| 25 |
-
-
|
| 26 |
-
cancer_type:
|
| 27 |
age_at_diagnosis: 5
|
|
|
|
|
|
|
| 28 |
personal_medical_history:
|
| 29 |
-
|
| 30 |
previous_cancers: []
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
clinical_observations:
|
| 35 |
-
- test_name: "Physical Exam Note"
|
| 36 |
-
value: "Multiple cafe-au-lait macules, axillary freckling, and multiple cutaneous neurofibromas noted. Lisch nodules present on slit-lamp exam."
|
| 37 |
-
unit: "N/A"
|
| 38 |
-
reference_range: "N/A"
|
| 39 |
-
date: "2025-06-01"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 28
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: none
|
| 13 |
family_history:
|
| 14 |
+
- relation: mother
|
| 15 |
+
cancer_type: other_cancer
|
| 16 |
age_at_diagnosis: 5
|
| 17 |
+
degree: '1'
|
| 18 |
+
side: unknown
|
| 19 |
personal_medical_history:
|
| 20 |
+
chronic_conditions: []
|
| 21 |
previous_cancers: []
|
| 22 |
+
genetic_mutations: []
|
| 23 |
+
symptoms: []
|
| 24 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/hereditary_and_genetic_risk/brca1_high_risk.yaml
CHANGED
|
@@ -15,44 +15,51 @@
|
|
| 15 |
# 4. Risk-Reducing Surgery: The report should mention risk-reducing surgery (oophorectomy) as a key consideration for BRCA carriers.
|
| 16 |
# 5. Empathetic Tone: The `response` and `overall_summary` should be supportive and acknowledge her situation, providing information in an empowering way.
|
| 17 |
|
|
|
|
| 18 |
demographics:
|
| 19 |
-
|
| 20 |
sex: female
|
| 21 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
lifestyle:
|
| 23 |
-
|
|
|
|
| 24 |
alcohol_consumption: light
|
| 25 |
-
|
| 26 |
-
|
| 27 |
family_history:
|
| 28 |
-
-
|
| 29 |
-
cancer_type:
|
| 30 |
age_at_diagnosis: 42
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
age_at_diagnosis: 55
|
|
|
|
|
|
|
|
|
|
| 34 |
personal_medical_history:
|
| 35 |
-
|
| 36 |
previous_cancers: []
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
female_specific:
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
date: "2025-05-10"
|
| 54 |
-
- test_name: "CA-125"
|
| 55 |
-
value: "18"
|
| 56 |
-
unit: "U/mL"
|
| 57 |
-
reference_range: "<35"
|
| 58 |
-
date: "2025-05-10"
|
|
|
|
| 15 |
# 4. Risk-Reducing Surgery: The report should mention risk-reducing surgery (oophorectomy) as a key consideration for BRCA carriers.
|
| 16 |
# 5. Empathetic Tone: The `response` and `overall_summary` should be supportive and acknowledge her situation, providing information in an empowering way.
|
| 17 |
|
| 18 |
+
schema_version: v1.0
|
| 19 |
demographics:
|
| 20 |
+
age_years: 34
|
| 21 |
sex: female
|
| 22 |
+
ethnicity: ashkenazi jewish
|
| 23 |
+
anthropometrics:
|
| 24 |
+
height_cm: 165.0
|
| 25 |
+
weight_kg: 60.0
|
| 26 |
+
|
| 27 |
lifestyle:
|
| 28 |
+
smoking:
|
| 29 |
+
status: never
|
| 30 |
alcohol_consumption: light
|
| 31 |
+
physical_activity_level: moderate
|
| 32 |
+
|
| 33 |
family_history:
|
| 34 |
+
- relation: mother
|
| 35 |
+
cancer_type: breast_cancer
|
| 36 |
age_at_diagnosis: 42
|
| 37 |
+
degree: "1"
|
| 38 |
+
side: maternal
|
| 39 |
+
- relation: maternal_aunt
|
| 40 |
+
cancer_type: ovarian_cancer
|
| 41 |
age_at_diagnosis: 55
|
| 42 |
+
degree: "2"
|
| 43 |
+
side: maternal
|
| 44 |
+
|
| 45 |
personal_medical_history:
|
| 46 |
+
chronic_conditions: []
|
| 47 |
previous_cancers: []
|
| 48 |
+
genetic_mutations:
|
| 49 |
+
- brca1
|
| 50 |
+
|
| 51 |
female_specific:
|
| 52 |
+
menstrual:
|
| 53 |
+
age_at_menarche: 12
|
| 54 |
+
parity:
|
| 55 |
+
num_live_births: 1
|
| 56 |
+
age_at_first_live_birth: 31
|
| 57 |
+
hormone_use:
|
| 58 |
+
estrogen_use: never
|
| 59 |
+
breast_health:
|
| 60 |
+
num_biopsies: 0
|
| 61 |
+
atypical_hyperplasia: false
|
| 62 |
+
lobular_carcinoma_in_situ: false
|
| 63 |
+
|
| 64 |
+
symptoms: []
|
| 65 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/hereditary_and_genetic_risk/conflicting_genetic_data.yaml
CHANGED
|
@@ -1,36 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# To test the AI's ability to weigh different types of evidence. A strong family history suggests high risk, but a negative multi-gene panel is strong counter-evidence. The AI must be able to generate a nuanced recommendation that respects both data points.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - A very strong family history of early-onset colon cancer (father at 48).
|
| 8 |
-
# - A negative result from a comprehensive hereditary cancer panel (`Natera Empower`).
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Nuanced Reasoning: The AI's `reasoning` must explicitly state the conflict: the family history is concerning, but the negative panel makes a known high-penetrance mutation (like Lynch) unlikely.
|
| 12 |
-
# 2. Balanced Recommendation: The AI should not dismiss the family history. It should still recommend earlier-than-average screening (e.g., colonoscopy starting at age 40, or 10 years before the father's diagnosis), classifying the risk as "Increased" but not as high as it would be with a known mutation.
|
| 13 |
-
# 3. Explanation: The summary must explain that some familial risk may not be captured by current genetic tests ("missing heritability") and that screening should therefore be based on the empirical risk from the family history itself.
|
| 14 |
-
# 4. Genetic Test Context: The AI should correctly identify the `Natera Empower` test as a germline test for *hereditary* risk.
|
| 15 |
-
|
| 16 |
demographics:
|
| 17 |
-
|
| 18 |
sex: male
|
| 19 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 20 |
lifestyle:
|
| 21 |
-
|
|
|
|
| 22 |
alcohol_consumption: moderate
|
| 23 |
family_history:
|
| 24 |
-
-
|
| 25 |
-
cancer_type:
|
| 26 |
age_at_diagnosis: 48
|
|
|
|
|
|
|
| 27 |
personal_medical_history:
|
| 28 |
-
|
| 29 |
previous_cancers: []
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
- test_name: "Natera Empower Panel (40 genes)"
|
| 34 |
-
value: "No pathogenic variants identified"
|
| 35 |
-
unit: "N/A"
|
| 36 |
-
date: "2024-09-01"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 42
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: moderate
|
| 13 |
family_history:
|
| 14 |
+
- relation: father
|
| 15 |
+
cancer_type: other_cancer
|
| 16 |
age_at_diagnosis: 48
|
| 17 |
+
degree: '1'
|
| 18 |
+
side: unknown
|
| 19 |
personal_medical_history:
|
| 20 |
+
chronic_conditions: []
|
| 21 |
previous_cancers: []
|
| 22 |
+
genetic_mutations: []
|
| 23 |
+
symptoms: []
|
| 24 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/hereditary_and_genetic_risk/li_fraumeni_tp53.yaml
CHANGED
|
@@ -1,40 +1,29 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# Represents one of the highest-risk cancer predisposition syndromes, affecting multiple organ systems (soft tissue sarcomas, breast cancer, brain tumors, leukemia). It tests the AI's ability to handle an extreme, pan-cancer risk profile.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The TP53 mutation is the critical piece of information.
|
| 8 |
-
# - The user is very young, making risk management complex.
|
| 9 |
-
# - The specific question about whole-body MRI is a key test of the AI's knowledge of advanced surveillance protocols.
|
| 10 |
-
#
|
| 11 |
-
# What to look for in a successful assessment:
|
| 12 |
-
# 1. Pan-Cancer Risk: The AI must identify high risk across multiple, diverse cancer types: Breast, Brain, Leukemia, and note a general high risk for sarcomas.
|
| 13 |
-
# 2. Whole-Body MRI: The AI must correctly identify whole-body MRI (often part of the "Toronto Protocol") as a key surveillance tool recommended for individuals with LFS, rating it Level 4 or 5.
|
| 14 |
-
# 3. Radiation Avoidance: An exceptional response would include a note in the `reasoning` or `summary` advising the avoidance of unnecessary radiation (like CT scans) due to heightened sensitivity in LFS patients.
|
| 15 |
-
# 4. Specific Screenings: It must still recommend the other standard LFS screenings, such as annual breast MRI and brain MRI.
|
| 16 |
-
|
| 17 |
demographics:
|
| 18 |
-
|
| 19 |
sex: female
|
| 20 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 21 |
lifestyle:
|
| 22 |
-
|
|
|
|
| 23 |
alcohol_consumption: none
|
| 24 |
family_history:
|
| 25 |
-
-
|
| 26 |
-
cancer_type:
|
| 27 |
age_at_diagnosis: 28
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
age_at_diagnosis: 35
|
|
|
|
|
|
|
| 31 |
personal_medical_history:
|
| 32 |
-
|
| 33 |
previous_cancers: []
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
- test_name: "Baseline CBC"
|
| 38 |
-
value: "Normal"
|
| 39 |
-
unit: "N/A"
|
| 40 |
-
date: "2025-01-10"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 22
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: none
|
| 13 |
family_history:
|
| 14 |
+
- relation: mother
|
| 15 |
+
cancer_type: breast_cancer
|
| 16 |
age_at_diagnosis: 28
|
| 17 |
+
degree: '1'
|
| 18 |
+
side: unknown
|
| 19 |
+
- relation: maternal_uncle
|
| 20 |
+
cancer_type: other_cancer
|
| 21 |
age_at_diagnosis: 35
|
| 22 |
+
degree: '2'
|
| 23 |
+
side: maternal
|
| 24 |
personal_medical_history:
|
| 25 |
+
chronic_conditions: []
|
| 26 |
previous_cancers: []
|
| 27 |
+
genetic_mutations: []
|
| 28 |
+
symptoms: []
|
| 29 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/hereditary_and_genetic_risk/lynch_syndrome.yaml
CHANGED
|
@@ -1,50 +1,37 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# To test the AI's ability to handle a multi-organ hereditary syndrome. Unlike BRCA which primarily affects breast/ovary, Lynch syndrome significantly increases risk for colorectal, endometrial, ovarian, stomach, and other cancers.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The MSH2 mutation is a definitive diagnosis of Lynch syndrome.
|
| 8 |
-
# - The family history is classic for Lynch.
|
| 9 |
-
# - The user is due for her surveillance screenings.
|
| 10 |
-
#
|
| 11 |
-
# What to look for in a successful assessment:
|
| 12 |
-
# 1. Multi-Cancer Risk: The AI must assign a high-risk level (4 or 5) to Colorectal, Endometrial, and Ovarian cancer. It should also note increased risk for Stomach cancer.
|
| 13 |
-
# 2. Multi-Site Surveillance: The `dx_recommendations` must be comprehensive and include:
|
| 14 |
-
# - Colonoscopy (every 1-2 years)
|
| 15 |
-
# - Transvaginal ultrasound and Endometrial biopsy (annually)
|
| 16 |
-
# - Upper Endoscopy (every 3-5 years)
|
| 17 |
-
# 3. Surgical Options: The summary should mention the option of risk-reducing hysterectomy and oophorectomy.
|
| 18 |
-
# 4. Guideline Adherence: The reasoning should explicitly cite Lynch syndrome guidelines for these aggressive and frequent surveillance recommendations.
|
| 19 |
-
|
| 20 |
demographics:
|
| 21 |
-
|
| 22 |
sex: female
|
| 23 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 24 |
lifestyle:
|
| 25 |
-
|
|
|
|
| 26 |
alcohol_consumption: light
|
| 27 |
family_history:
|
| 28 |
-
-
|
| 29 |
-
cancer_type:
|
| 30 |
age_at_diagnosis: 45
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
age_at_diagnosis: 49
|
|
|
|
|
|
|
| 34 |
personal_medical_history:
|
| 35 |
-
|
| 36 |
previous_cancers: []
|
| 37 |
-
|
|
|
|
| 38 |
female_specific:
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
- test_name: "Last Endometrial Biopsy"
|
| 48 |
-
value: "Benign proliferative endometrium"
|
| 49 |
-
unit: "N/A"
|
| 50 |
-
date: "2024-07-15"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 42
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: light
|
| 13 |
family_history:
|
| 14 |
+
- relation: father
|
| 15 |
+
cancer_type: colorectal_cancer
|
| 16 |
age_at_diagnosis: 45
|
| 17 |
+
degree: '1'
|
| 18 |
+
side: unknown
|
| 19 |
+
- relation: paternal_aunt
|
| 20 |
+
cancer_type: uterine_cancer
|
| 21 |
age_at_diagnosis: 49
|
| 22 |
+
degree: '2'
|
| 23 |
+
side: paternal
|
| 24 |
personal_medical_history:
|
| 25 |
+
chronic_conditions: []
|
| 26 |
previous_cancers: []
|
| 27 |
+
genetic_mutations:
|
| 28 |
+
- lynch_msh2
|
| 29 |
female_specific:
|
| 30 |
+
menstrual: {}
|
| 31 |
+
parity:
|
| 32 |
+
num_live_births: 2
|
| 33 |
+
age_at_first_live_birth: 32
|
| 34 |
+
hormone_use: {}
|
| 35 |
+
breast_health: {}
|
| 36 |
+
symptoms: []
|
| 37 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/hereditary_and_genetic_risk/vague_family_history.yaml
CHANGED
|
@@ -1,36 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# This is a common real-world scenario. The AI must demonstrate safety and good clinical judgment when faced with incomplete information. It cannot invent a risk level but must provide safe and actionable advice.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The key input is the free-text `family_history`, which is non-specific.
|
| 8 |
-
#
|
| 9 |
-
# What to look for in a successful assessment:
|
| 10 |
-
# 1. Recognition of Incompleteness: The `reasoning` block must note that the family history is incomplete and a detailed risk assessment is not possible without more information (cancer types, ages).
|
| 11 |
-
# 2. Conservative Approach: The risk assessments should default to "Average Risk" but include a strong caveat about the incomplete history.
|
| 12 |
-
# 3. Primary Recommendation: The single most important recommendation should be for the user to gather more family history details and to pursue genetic counseling to clarify their risk. The `Natera Empower` test should be listed as "Optional" (Level 3) pending this consultation.
|
| 13 |
-
# 4. Actionable Guidance: The report should empower the user by suggesting specific questions to ask their relatives (e.g., "What type of cancer was it?", "How old were they?").
|
| 14 |
-
|
| 15 |
demographics:
|
| 16 |
-
|
| 17 |
sex: female
|
| 18 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 19 |
lifestyle:
|
| 20 |
-
|
|
|
|
| 21 |
alcohol_consumption: light
|
| 22 |
-
family_history:
|
| 23 |
-
# This section is intentionally left for free-text processing
|
| 24 |
personal_medical_history:
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
value: "Normal"
|
| 31 |
-
unit: "N/A"
|
| 32 |
-
date: "2025-01-15"
|
| 33 |
-
- test_name: "Last Pap Smear"
|
| 34 |
-
value: "Normal"
|
| 35 |
-
unit: "N/A"
|
| 36 |
-
date: "2023-05-20"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 60
|
| 4 |
sex: female
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: never
|
| 12 |
alcohol_consumption: light
|
| 13 |
+
family_history: []
|
|
|
|
| 14 |
personal_medical_history:
|
| 15 |
+
chronic_conditions: []
|
| 16 |
+
previous_cancers: []
|
| 17 |
+
genetic_mutations: []
|
| 18 |
+
symptoms: []
|
| 19 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/lifestyle_and_demographic_risk/liver_risk_alcohol_abuse.yaml
CHANGED
|
@@ -1,49 +1,21 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# To test risk assessment based on a significant lifestyle factor (heavy alcohol use) and its clinical sequelae (abnormal liver function tests), even before a definitive diagnosis of cirrhosis.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - "Heavy" alcohol consumption is the primary risk factor.
|
| 8 |
-
# - The clinical observations show a classic picture of alcoholic liver injury: AST > ALT, elevated GGT, and low platelets (thrombocytopenia), which is an early sign of portal hypertension/cirrhosis.
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Risk Identification: Liver Cancer risk should be elevated to "Increased Risk" (Level 3 or 4), even without a formal cirrhosis diagnosis in the history.
|
| 12 |
-
# 2. Lab Synthesis: The `reasoning` must connect the heavy alcohol use to the specific pattern of LFTs and the low platelet count, explaining that these findings are highly suggestive of significant liver damage, which is the precursor to cancer.
|
| 13 |
-
# 3. Dx Recommendation: The AI should strongly recommend a liver ultrasound and potentially a FibroScan/elastography to stage the degree of liver fibrosis. It should also reference the investigational `Mursla EvoLiver` test as a future tool for this exact patient population.
|
| 14 |
-
# 4. Lifestyle Advice: The report must provide direct, non-judgmental advice about alcohol cessation as the single most important step to reduce risk.
|
| 15 |
-
|
| 16 |
demographics:
|
| 17 |
-
|
| 18 |
sex: male
|
| 19 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 20 |
lifestyle:
|
| 21 |
-
|
| 22 |
-
|
|
|
|
| 23 |
alcohol_consumption: heavy
|
| 24 |
-
|
| 25 |
family_history: []
|
| 26 |
personal_medical_history:
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
unit: "U/L"
|
| 33 |
-
reference_range: "10-40"
|
| 34 |
-
date: "2025-06-18"
|
| 35 |
-
- test_name: "ALT (Alanine Aminotransferase)"
|
| 36 |
-
value: "55"
|
| 37 |
-
unit: "U/L"
|
| 38 |
-
reference_range: "7-56"
|
| 39 |
-
date: "2025-06-18"
|
| 40 |
-
- test_name: "GGT (Gamma-Glutamyl Transferase)"
|
| 41 |
-
value: "150"
|
| 42 |
-
unit: "U/L"
|
| 43 |
-
reference_range: "8-61"
|
| 44 |
-
date: "2025-06-18"
|
| 45 |
-
- test_name: "Platelet Count"
|
| 46 |
-
value: "130"
|
| 47 |
-
unit: "K/uL"
|
| 48 |
-
reference_range: "150-450"
|
| 49 |
-
date: "2025-06-18"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 54
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: former
|
| 12 |
+
pack_years: 15
|
| 13 |
alcohol_consumption: heavy
|
| 14 |
+
physical_activity_level: sedentary
|
| 15 |
family_history: []
|
| 16 |
personal_medical_history:
|
| 17 |
+
chronic_conditions: []
|
| 18 |
+
previous_cancers: []
|
| 19 |
+
genetic_mutations: []
|
| 20 |
+
symptoms: []
|
| 21 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/synthetic/lifestyle_and_demographic_risk/lung_risk_occupational.yaml
CHANGED
|
@@ -1,34 +1,22 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# This tests the AI's ability to incorporate occupational/environmental risk factors, which are often less structured than clinical data. It also presents a case of synergistic risk, where smoking and asbestos exposure multiply the risk of lung cancer.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - "Retired asbestos remover" is a critical piece of unstructured text in the medical history.
|
| 8 |
-
# - He also has a smoking history, though he quit 20 years ago.
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Synergistic Risk: The `reasoning` for the high lung cancer risk (Level 4) must mention *both* the asbestos exposure and the smoking history, ideally noting that their combined effect is greater than the sum of their parts.
|
| 12 |
-
# 2. Correct Screening: Despite quitting 20 years ago (which would normally make him ineligible for LDCT), the high-risk occupational exposure should trigger a strong recommendation for a discussion about LDCT screening with his provider. The AI should demonstrate this nuanced thinking.
|
| 13 |
-
# 3. Other Risks: The AI should also correctly assess for other asbestos-related malignancies, such as mesothelioma, although it's not a formal module. A mention in the `reasoning` would be a sign of advanced knowledge.
|
| 14 |
-
|
| 15 |
demographics:
|
| 16 |
-
|
| 17 |
sex: male
|
| 18 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 19 |
lifestyle:
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
alcohol_consumption: moderate
|
| 23 |
-
personal_medical_history:
|
| 24 |
-
chronic_illnesses:
|
| 25 |
-
- "Retired asbestos remover (worked for 30 years)"
|
| 26 |
-
- "Arthritis"
|
| 27 |
family_history: []
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 65
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: white
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 70.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: former
|
| 12 |
+
pack_years: 15
|
| 13 |
alcohol_consumption: moderate
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
family_history: []
|
| 15 |
+
personal_medical_history:
|
| 16 |
+
chronic_conditions: []
|
| 17 |
+
previous_cancers: []
|
| 18 |
+
genetic_mutations: []
|
| 19 |
+
symptoms:
|
| 20 |
+
- symptom_type: persistent_cough
|
| 21 |
+
duration_days: 30
|
| 22 |
+
clinical_tests: {}
|
examples/synthetic/lifestyle_and_demographic_risk/metabolic_syndrome.yaml
CHANGED
|
@@ -1,46 +1,22 @@
|
|
| 1 |
-
|
| 2 |
-
#
|
| 3 |
-
# Why it was chosen:
|
| 4 |
-
# This is a very common primary care profile. It tests the AI's ability to connect a cluster of lifestyle and metabolic factors (obesity, smoking, drinking, diabetes) to increased risk across a broad range of cancers (colorectal, pancreatic, liver, kidney, etc.) and generate a holistic, lifestyle-focused report.
|
| 5 |
-
#
|
| 6 |
-
# How to understand the inputs:
|
| 7 |
-
# - The user has no single "major" genetic risk but a powerful combination of moderate lifestyle/metabolic risks.
|
| 8 |
-
# - The elevated LFTs and HbA1c are objective evidence of his metabolic disease.
|
| 9 |
-
#
|
| 10 |
-
# What to look for in a successful assessment:
|
| 11 |
-
# 1. Pan-Cancer Lifestyle Risk: The AI should identify moderately elevated risk (Level 3) for multiple cancers, including Colorectal, Pancreatic, and Liver, citing obesity, smoking, and alcohol as contributing factors for each.
|
| 12 |
-
# 2. Holistic Summary: The `overall_summary` is key. It should focus heavily on the importance of lifestyle modification (weight loss, smoking/alcohol cessation, diet) as the most effective way to reduce his risk across the board.
|
| 13 |
-
# 3. Prioritized Screening: Despite the broad risk, the AI should prioritize the most evidence-based screening: Colonoscopy should be Level 4/5, while others (like pancreatic screening) should be correctly identified as not recommended for this risk level.
|
| 14 |
-
|
| 15 |
demographics:
|
| 16 |
-
|
| 17 |
sex: male
|
| 18 |
-
ethnicity:
|
|
|
|
|
|
|
|
|
|
| 19 |
lifestyle:
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
alcohol_consumption: heavy
|
| 23 |
-
|
| 24 |
-
physical_activity_level: "Sedentary"
|
| 25 |
family_history: []
|
| 26 |
personal_medical_history:
|
| 27 |
-
|
| 28 |
-
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
value: "8.1"
|
| 34 |
-
unit: "%"
|
| 35 |
-
reference_range: "< 5.7"
|
| 36 |
-
date: "2025-06-30"
|
| 37 |
-
- test_name: "ALT"
|
| 38 |
-
value: "65"
|
| 39 |
-
unit: "U/L"
|
| 40 |
-
reference_range: "< 45"
|
| 41 |
-
date: "2025-06-30"
|
| 42 |
-
- test_name: "Triglycerides"
|
| 43 |
-
value: "250"
|
| 44 |
-
unit: "mg/dL"
|
| 45 |
-
reference_range: "< 150"
|
| 46 |
-
date: "2025-06-30"
|
|
|
|
| 1 |
+
schema_version: v1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
demographics:
|
| 3 |
+
age_years: 48
|
| 4 |
sex: male
|
| 5 |
+
ethnicity: hispanic
|
| 6 |
+
anthropometrics:
|
| 7 |
+
height_cm: 175.0
|
| 8 |
+
weight_kg: 110.0
|
| 9 |
lifestyle:
|
| 10 |
+
smoking:
|
| 11 |
+
status: current
|
| 12 |
+
pack_years: 20
|
| 13 |
alcohol_consumption: heavy
|
| 14 |
+
physical_activity_level: sedentary
|
|
|
|
| 15 |
family_history: []
|
| 16 |
personal_medical_history:
|
| 17 |
+
chronic_conditions:
|
| 18 |
+
- diabetes
|
| 19 |
+
previous_cancers: []
|
| 20 |
+
genetic_mutations: []
|
| 21 |
+
symptoms: []
|
| 22 |
+
clinical_tests: {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_yaml_validation.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Test YAML file validation against UserInput schema."""
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
import pytest
|
| 6 |
+
import yaml
|
| 7 |
+
from pydantic import ValidationError
|
| 8 |
+
|
| 9 |
+
from sentinel.user_input import UserInput
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TestYAMLValidation:
|
| 13 |
+
"""Test that all YAML files in examples/ validate against UserInput schema."""
|
| 14 |
+
|
| 15 |
+
@pytest.fixture(scope="class")
|
| 16 |
+
def examples_directory(self) -> Path:
|
| 17 |
+
"""Get the examples directory path.
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
Path: Path to the examples directory.
|
| 21 |
+
"""
|
| 22 |
+
return Path(__file__).parent.parent / "examples"
|
| 23 |
+
|
| 24 |
+
@pytest.fixture(scope="class")
|
| 25 |
+
def all_yaml_files(self, examples_directory: Path) -> list[Path]:
|
| 26 |
+
"""Get all YAML files in the examples directory.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
examples_directory: Path to the examples directory.
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
list[Path]: List of all YAML files found in the examples directory.
|
| 33 |
+
"""
|
| 34 |
+
return list(examples_directory.rglob("*.yaml"))
|
| 35 |
+
|
| 36 |
+
def test_yaml_files_exist(self, all_yaml_files: list[Path]) -> None:
|
| 37 |
+
"""Test that we found YAML files to validate.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
all_yaml_files: List of all YAML files found in the examples directory.
|
| 41 |
+
"""
|
| 42 |
+
assert len(all_yaml_files) > 0, "No YAML files found in examples directory"
|
| 43 |
+
print(f"Found {len(all_yaml_files)} YAML files to validate")
|
| 44 |
+
|
| 45 |
+
@pytest.mark.parametrize(
|
| 46 |
+
"yaml_file",
|
| 47 |
+
[
|
| 48 |
+
pytest.param(p, id=p.relative_to(Path(__file__).parent.parent).as_posix())
|
| 49 |
+
for p in (Path(__file__).parent.parent / "examples").rglob("*.yaml")
|
| 50 |
+
],
|
| 51 |
+
)
|
| 52 |
+
def test_individual_yaml_validation(self, yaml_file: Path) -> None:
|
| 53 |
+
"""Test that each YAML file validates against UserInput schema.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
yaml_file: Path to the YAML file to validate.
|
| 57 |
+
"""
|
| 58 |
+
assert yaml_file.exists(), f"YAML file does not exist: {yaml_file}"
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
with yaml_file.open("r", encoding="utf-8") as file:
|
| 62 |
+
data = yaml.safe_load(file)
|
| 63 |
+
|
| 64 |
+
user_input = UserInput.model_validate(data)
|
| 65 |
+
assert user_input is not None
|
| 66 |
+
|
| 67 |
+
except yaml.YAMLError as error:
|
| 68 |
+
pytest.fail(f"YAML parsing error in {yaml_file}: {error}")
|
| 69 |
+
except ValidationError as error:
|
| 70 |
+
error_details = []
|
| 71 |
+
for error_detail in error.errors():
|
| 72 |
+
field_path = " -> ".join(str(loc) for loc in error_detail["loc"])
|
| 73 |
+
error_details.append(
|
| 74 |
+
f" Field '{field_path}': {error_detail['msg']} "
|
| 75 |
+
f"(input: {error_detail.get('input', 'N/A')})"
|
| 76 |
+
)
|
| 77 |
+
pytest.fail(
|
| 78 |
+
f"Validation error in {yaml_file}:\n" + "\n".join(error_details)
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
def test_all_yaml_files_valid(self, all_yaml_files: list[Path]) -> None:
|
| 82 |
+
"""Test that all YAML files are valid (batch validation).
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
all_yaml_files: List of all YAML files found in the examples directory.
|
| 86 |
+
"""
|
| 87 |
+
failed_files = []
|
| 88 |
+
|
| 89 |
+
for yaml_file in all_yaml_files:
|
| 90 |
+
try:
|
| 91 |
+
with yaml_file.open("r", encoding="utf-8") as file:
|
| 92 |
+
data = yaml.safe_load(file)
|
| 93 |
+
UserInput.model_validate(data)
|
| 94 |
+
except Exception as error:
|
| 95 |
+
failed_files.append((yaml_file, str(error)))
|
| 96 |
+
|
| 97 |
+
if failed_files:
|
| 98 |
+
error_message = "YAML validation failures:\n"
|
| 99 |
+
for file_path, error in failed_files:
|
| 100 |
+
error_message += f" {file_path}: {error}\n"
|
| 101 |
+
pytest.fail(error_message)
|
| 102 |
+
|
| 103 |
+
print(f"✅ All {len(all_yaml_files)} YAML files passed validation")
|
| 104 |
+
|
| 105 |
+
def test_yaml_files_summary(self, all_yaml_files: list[Path]) -> None:
|
| 106 |
+
"""Test that provides a summary of all YAML files found.
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
all_yaml_files: List of all YAML files found in the examples directory.
|
| 110 |
+
"""
|
| 111 |
+
categories = {}
|
| 112 |
+
for yaml_file in all_yaml_files:
|
| 113 |
+
# Get the relative path from examples directory
|
| 114 |
+
rel_path = yaml_file.relative_to(Path(__file__).parent.parent / "examples")
|
| 115 |
+
category = str(rel_path.parent) if rel_path.parent != Path(".") else "root"
|
| 116 |
+
|
| 117 |
+
if category not in categories:
|
| 118 |
+
categories[category] = []
|
| 119 |
+
categories[category].append(rel_path.name)
|
| 120 |
+
|
| 121 |
+
print(f"\nYAML Files Summary ({len(all_yaml_files)} total):")
|
| 122 |
+
for category, files in sorted(categories.items()):
|
| 123 |
+
print(f" {category}: {len(files)} files")
|
| 124 |
+
for file_name in sorted(files):
|
| 125 |
+
print(f" - {file_name}")
|
| 126 |
+
|
| 127 |
+
# This test always passes - it's just for information
|
| 128 |
+
assert True
|