jeuko commited on
Commit
629a216
·
verified ·
1 Parent(s): 0451e1c

Sync from GitHub (main)

Browse files
Files changed (37) hide show
  1. examples/dev/profile_1.yaml +21 -43
  2. examples/dev/profile_2.yaml +40 -32
  3. examples/dev/profile_3.yaml +27 -30
  4. examples/dev/profile_4.yaml +33 -22
  5. examples/dev/profile_5.yaml +24 -28
  6. examples/dev/profile_6.yaml +34 -33
  7. examples/dev/profile_mjs_1.yaml +31 -66
  8. examples/dev/profile_mjs_2.yaml +31 -38
  9. examples/dev/profile_mjs_3.yaml +32 -90
  10. examples/dev/profile_plcom2012_comprehensive.yaml +26 -35
  11. examples/dev/profile_plcom2012_edge_cases.yaml +36 -23
  12. examples/synthetic/complex_and_acquired_risk/colorectal_risk_ibd.yaml +23 -21
  13. examples/synthetic/complex_and_acquired_risk/complex_comorbidity.yaml +22 -67
  14. examples/synthetic/complex_and_acquired_risk/kidney_cancer_esrd.yaml +18 -42
  15. examples/synthetic/complex_and_acquired_risk/leukemia_therapy_related.yaml +13 -39
  16. examples/synthetic/complex_and_acquired_risk/lymphoma_immunosuppression.yaml +13 -39
  17. examples/synthetic/complex_and_acquired_risk/real_world_data.yaml +15 -82
  18. examples/synthetic/complex_and_acquired_risk/stomach_cancer_high_risk.yaml +18 -45
  19. examples/synthetic/complex_and_acquired_risk/thyroid_cancer_radiation.yaml +13 -34
  20. examples/synthetic/diagnostic_and_screening_pathways/executive_checkup.yaml +17 -64
  21. examples/synthetic/diagnostic_and_screening_pathways/indeterminate_imaging_birads3.yaml +15 -26
  22. examples/synthetic/diagnostic_and_screening_pathways/mrd_surveillance_candidate.yaml +13 -30
  23. examples/synthetic/diagnostic_and_screening_pathways/post_positive_cologuard.yaml +13 -25
  24. examples/synthetic/diagnostic_and_screening_pathways/therapy_selection_context.yaml +14 -25
  25. examples/synthetic/diagnostic_and_screening_pathways/vague_symptoms.yaml +22 -50
  26. examples/synthetic/guideline_boundaries/starting_screening_young_adult.yaml +13 -20
  27. examples/synthetic/guideline_boundaries/stopping_screening_older_adult.yaml +14 -30
  28. examples/synthetic/hereditary_and_genetic_risk/brain_tumor_nf1.yaml +16 -31
  29. examples/synthetic/hereditary_and_genetic_risk/brca1_high_risk.yaml +38 -31
  30. examples/synthetic/hereditary_and_genetic_risk/conflicting_genetic_data.yaml +16 -28
  31. examples/synthetic/hereditary_and_genetic_risk/li_fraumeni_tp53.yaml +20 -31
  32. examples/synthetic/hereditary_and_genetic_risk/lynch_syndrome.yaml +27 -40
  33. examples/synthetic/hereditary_and_genetic_risk/vague_family_history.yaml +14 -31
  34. examples/synthetic/lifestyle_and_demographic_risk/liver_risk_alcohol_abuse.yaml +15 -43
  35. examples/synthetic/lifestyle_and_demographic_risk/lung_risk_occupational.yaml +17 -29
  36. examples/synthetic/lifestyle_and_demographic_risk/metabolic_syndrome.yaml +16 -40
  37. tests/test_yaml_validation.py +128 -0
examples/dev/profile_1.yaml CHANGED
@@ -1,51 +1,29 @@
1
  demographics:
2
- age: 65
3
  sex: male
4
- ethnicity: "African American"
 
 
 
5
  lifestyle:
6
- smoking_status: former
7
- smoking_pack_years: 10
 
8
  alcohol_consumption: moderate
9
  family_history:
10
- - relative: father
11
- cancer_type: prostate
12
  age_at_diagnosis: 65
 
 
13
  personal_medical_history:
 
14
  previous_cancers: []
15
- current_concerns_or_symptoms: "Difficulty with urination."
16
- clinical_observations:
17
- - test_name: "PSA"
18
- value: "6.1"
19
- unit: "ng/mL"
20
- reference_range: "< 4.0 ng/mL"
21
- date: "2025-05-20"
22
- - test_name: "Vitamin D, 25-Hydroxy"
23
- value: "28"
24
- unit: "ng/mL"
25
- reference_range: "30-100 ng/mL"
26
- date: "2025-05-20"
27
- - test_name: "Hemoglobin"
28
- value: "13.2"
29
- unit: "g/dL"
30
- reference_range: "13.5-17.5 g/dL"
31
- date: "2025-05-20"
32
- - test_name: "White Blood Cell Count"
33
- value: "7.2"
34
- unit: "K/uL"
35
- reference_range: "4.5 - 11.0 K/uL"
36
- date: "2025-05-20"
37
- - test_name: "Glucose"
38
- value: "115"
39
- unit: "mg/dL"
40
- reference_range: "70-99 mg/dL"
41
- date: "2025-05-20"
42
- - test_name: "Creatinine"
43
- value: "1.4"
44
- unit: "mg/dL"
45
- reference_range: "0.7-1.3 mg/dL"
46
- date: "2025-05-20"
47
- - test_name: "LDL Cholesterol"
48
- value: "140"
49
- unit: "mg/dL"
50
- reference_range: "< 100 mg/dL"
51
- date: "2025-05-20"
 
1
  demographics:
2
+ age_years: 65
3
  sex: male
4
+ ethnicity: black
5
+ anthropometrics:
6
+ height_cm: 175.0
7
+ weight_kg: 80.0
8
  lifestyle:
9
+ smoking:
10
+ status: former
11
+ pack_years: 10
12
  alcohol_consumption: moderate
13
  family_history:
14
+ - relation: father
15
+ cancer_type: prostate_cancer
16
  age_at_diagnosis: 65
17
+ degree: "1"
18
+ side: unknown
19
  personal_medical_history:
20
+ chronic_conditions: []
21
  previous_cancers: []
22
+ genetic_mutations: []
23
+ clinical_tests:
24
+ psa:
25
+ value_ng_ml: 6.1
26
+ date: 2025-05-20
27
+ symptoms:
28
+ - symptom_type: increased_urinary_frequency
29
+ duration_days: 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/dev/profile_2.yaml CHANGED
@@ -1,42 +1,50 @@
1
  # Older Female with Lung Cancer Risk
2
 
3
  demographics:
4
- age: 72
5
  sex: female
6
- ethnicity: "Caucasian"
 
 
 
7
  lifestyle:
8
- smoking_status: current
9
- smoking_pack_years: 40
10
- alcohol_consumption: occasional
11
- exercise_frequency: "Rarely"
12
- diet: "Standard Western diet"
13
  family_history:
14
- - relative: mother
15
- cancer_type: lung
16
  age_at_diagnosis: 70
17
- - relative: sister
18
- cancer_type: breast
 
 
19
  age_at_diagnosis: 50
 
 
20
  personal_medical_history:
21
- previous_cancers: []
22
  chronic_conditions:
23
- - "COPD"
24
- - "Osteoporosis"
25
- - "Hypertension"
26
- current_concerns_or_symptoms: "Persistent cough, unintended weight loss, shortness of breath."
27
- clinical_observations:
28
- - test_name: "Chest X-ray"
29
- value: "Suspicious mass in right upper lobe"
30
- unit: "N/A"
31
- reference_range: "Clear"
32
- date: "2025-06-10"
33
- - test_name: "Spirometry"
34
- value: "FEV1 55% predicted"
35
- unit: "%"
36
- reference_range: "> 80%"
37
- date: "2025-06-05"
38
- - test_name: "Bone Density Scan"
39
- value: "T-score -2.6"
40
- unit: "T-score"
41
- reference_range: "> -1.0"
42
- date: "2025-05-20"
 
 
 
1
  # Older Female with Lung Cancer Risk
2
 
3
  demographics:
4
+ age_years: 72
5
  sex: female
6
+ ethnicity: white
7
+ anthropometrics:
8
+ height_cm: 160.0
9
+ weight_kg: 65.0
10
  lifestyle:
11
+ smoking:
12
+ status: current
13
+ pack_years: 40
14
+ alcohol_consumption: light
15
+ physical_activity_level: sedentary
16
  family_history:
17
+ - relation: mother
18
+ cancer_type: lung_cancer
19
  age_at_diagnosis: 70
20
+ degree: "1"
21
+ side: maternal
22
+ - relation: sister
23
+ cancer_type: breast_cancer
24
  age_at_diagnosis: 50
25
+ degree: "1"
26
+ side: unknown
27
  personal_medical_history:
 
28
  chronic_conditions:
29
+ - copd
30
+ previous_cancers: []
31
+ genetic_mutations: []
32
+ female_specific:
33
+ menstrual:
34
+ age_at_menopause: 50
35
+ parity:
36
+ num_live_births: 2
37
+ age_at_first_live_birth: 25
38
+ hormone_use:
39
+ estrogen_use: never
40
+ breast_health:
41
+ num_biopsies: 0
42
+ atypical_hyperplasia: false
43
+ lobular_carcinoma_in_situ: false
44
+ symptoms:
45
+ - symptom_type: persistent_cough
46
+ duration_days: 60
47
+ - symptom_type: weight_loss
48
+ duration_days: 30
49
+ - symptom_type: haemoptysis
50
+ duration_days: 7
examples/dev/profile_3.yaml CHANGED
@@ -1,41 +1,38 @@
1
  # Middle-aged Hispanic Male with Colon Cancer Risk
2
 
3
  demographics:
4
- age: 50
5
  sex: male
6
- ethnicity: "Hispanic"
 
 
 
7
  lifestyle:
8
- smoking_status: never
 
9
  alcohol_consumption: heavy
10
- exercise_frequency: "Rarely"
11
- diet: "High in processed foods"
12
  family_history:
13
- - relative: father
14
- cancer_type: colorectal
15
  age_at_diagnosis: 55
16
- - relative: paternal uncle
17
- cancer_type: stomach
 
 
18
  age_at_diagnosis: 60
 
 
19
  personal_medical_history:
20
- previous_cancers: []
21
  chronic_conditions:
22
- - "Type 2 Diabetes"
23
- - "Obesity"
24
- - "High cholesterol"
25
- current_concerns_or_symptoms: "Intermittent rectal bleeding, abdominal discomfort, fatigue."
26
- clinical_observations:
27
- - test_name: "Fecal Occult Blood Test"
28
- value: "Positive"
29
- unit: "N/A"
30
- reference_range: "Negative"
31
- date: "2025-04-15"
32
- - test_name: "HbA1c"
33
- value: "7.8"
34
- unit: "%"
35
- reference_range: "<5.7%"
36
- date: "2025-03-10"
37
- - test_name: "Lipid Panel"
38
- value: "LDL 165"
39
- unit: "mg/dL"
40
- reference_range: "<100 mg/dL"
41
- date: "2025-03-10"
 
1
  # Middle-aged Hispanic Male with Colon Cancer Risk
2
 
3
  demographics:
4
+ age_years: 50
5
  sex: male
6
+ ethnicity: hispanic
7
+ anthropometrics:
8
+ height_cm: 175.0
9
+ weight_kg: 90.0
10
  lifestyle:
11
+ smoking:
12
+ status: never
13
  alcohol_consumption: heavy
14
+ physical_activity_level: sedentary
15
+ red_meat_consumption_oz_per_day: 8.0
16
  family_history:
17
+ - relation: father
18
+ cancer_type: colorectal_cancer
19
  age_at_diagnosis: 55
20
+ degree: "1"
21
+ side: paternal
22
+ - relation: paternal_uncle
23
+ cancer_type: gastro_oesophageal_cancer
24
  age_at_diagnosis: 60
25
+ degree: "2"
26
+ side: paternal
27
  personal_medical_history:
 
28
  chronic_conditions:
29
+ - diabetes
30
+ previous_cancers: []
31
+ genetic_mutations: []
32
+ symptoms:
33
+ - symptom_type: rectal_bleeding
34
+ duration_days: 14
35
+ - symptom_type: abdominal_pain
36
+ duration_days: 30
37
+ - symptom_type: weight_loss
38
+ duration_days: 14
 
 
 
 
 
 
 
 
 
 
examples/dev/profile_4.yaml CHANGED
@@ -1,34 +1,45 @@
1
  # Young Asian-American Female, Risk of Thyroid Cancer
2
 
3
  demographics:
4
- age: 29
5
  sex: female
6
- ethnicity: "Asian-American"
 
 
 
7
  lifestyle:
8
- smoking_status: never
 
9
  alcohol_consumption: none
10
- exercise_frequency: "Regularly"
11
- diet: "Balanced diet, mostly plant-based"
12
  family_history:
13
- - relative: mother
14
- cancer_type: thyroid
15
  age_at_diagnosis: 35
16
- - relative: father
 
 
17
  cancer_type: melanoma
18
  age_at_diagnosis: 45
 
 
19
  personal_medical_history:
 
20
  previous_cancers: []
21
- chronic_conditions:
22
- - "Anxiety"
23
- current_concerns_or_symptoms: "Swelling and discomfort in the neck, fatigue, occasional headaches."
24
- clinical_observations:
25
- - test_name: "Thyroid Ultrasound"
26
- value: "1.5 cm hypoechoic nodule"
27
- unit: "cm"
28
- reference_range: "Nodule-free"
29
- date: "2025-06-01"
30
- - test_name: "TSH"
31
- value: "4.8"
32
- unit: "μIU/mL"
33
- reference_range: "0.4 - 4.0 μIU/mL"
34
- date: "2025-05-15"
 
 
 
 
1
  # Young Asian-American Female, Risk of Thyroid Cancer
2
 
3
  demographics:
4
+ age_years: 29
5
  sex: female
6
+ ethnicity: asian
7
+ anthropometrics:
8
+ height_cm: 160.0
9
+ weight_kg: 55.0
10
  lifestyle:
11
+ smoking:
12
+ status: never
13
  alcohol_consumption: none
14
+ physical_activity_level: moderate
 
15
  family_history:
16
+ - relation: mother
17
+ cancer_type: thyroid_cancer
18
  age_at_diagnosis: 35
19
+ degree: "1"
20
+ side: maternal
21
+ - relation: father
22
  cancer_type: melanoma
23
  age_at_diagnosis: 45
24
+ degree: "1"
25
+ side: paternal
26
  personal_medical_history:
27
+ chronic_conditions: []
28
  previous_cancers: []
29
+ genetic_mutations: []
30
+ female_specific:
31
+ menstrual:
32
+ age_at_menarche: 13
33
+ parity:
34
+ num_live_births: 0
35
+ hormone_use:
36
+ estrogen_use: never
37
+ breast_health:
38
+ num_biopsies: 0
39
+ atypical_hyperplasia: false
40
+ lobular_carcinoma_in_situ: false
41
+ symptoms:
42
+ - symptom_type: neck_lump
43
+ duration_days: 30
44
+ - symptom_type: weight_loss
45
+ duration_days: 14
examples/dev/profile_5.yaml CHANGED
@@ -1,38 +1,34 @@
1
  # African American Male, Risk of Prostate Cancer
2
 
3
  demographics:
4
- age: 58
5
  sex: male
6
- ethnicity: "African American"
 
 
 
7
  lifestyle:
8
- smoking_status: former
9
- smoking_pack_years: 20
 
10
  alcohol_consumption: moderate
11
- exercise_frequency: "Occasional"
12
- diet: "Mixed diet, moderate meat consumption"
13
  family_history:
14
- - relative: brother
15
- cancer_type: prostate
16
  age_at_diagnosis: 60
17
- - relative: mother
18
- cancer_type: hypertension-related stroke
19
- age_at_diagnosis: 72
20
  personal_medical_history:
 
21
  previous_cancers: []
22
- chronic_conditions:
23
- - "Hypertension"
24
- - "Pre-diabetes"
25
- current_medications:
26
- - "Amlodipine"
27
- current_concerns_or_symptoms: "Frequent nighttime urination, mild lower back pain, increased fatigue."
28
- clinical_observations:
29
- - test_name: "Prostate-Specific Antigen (PSA)"
30
- value: "5.5"
31
- unit: "ng/mL"
32
- reference_range: "<4.0 ng/mL"
33
- date: "2025-06-10"
34
- - test_name: "Blood Pressure"
35
- value: "145/90"
36
- unit: "mmHg"
37
- reference_range: "<120/80 mmHg"
38
- date: "2025-06-05"
 
1
  # African American Male, Risk of Prostate Cancer
2
 
3
  demographics:
4
+ age_years: 58
5
  sex: male
6
+ ethnicity: black
7
+ anthropometrics:
8
+ height_cm: 180.0
9
+ weight_kg: 85.0
10
  lifestyle:
11
+ smoking:
12
+ status: former
13
+ pack_years: 20
14
  alcohol_consumption: moderate
15
+ physical_activity_level: low
 
16
  family_history:
17
+ - relation: brother
18
+ cancer_type: prostate_cancer
19
  age_at_diagnosis: 60
20
+ degree: "1"
21
+ side: unknown
 
22
  personal_medical_history:
23
+ chronic_conditions: []
24
  previous_cancers: []
25
+ genetic_mutations: []
26
+ clinical_tests:
27
+ psa:
28
+ value_ng_ml: 5.5
29
+ date: 2025-06-10
30
+ symptoms:
31
+ - symptom_type: nocturia
32
+ duration_days: 30
33
+ - symptom_type: increased_urinary_frequency
34
+ duration_days: 14
 
 
 
 
 
 
 
examples/dev/profile_6.yaml CHANGED
@@ -1,43 +1,44 @@
1
  # Young Female, BRCA Mutation, High Breast Cancer Risk
2
 
3
  demographics:
4
- age: 32
5
  sex: female
6
- ethnicity: "Ashkenazi Jewish"
7
- education_level: 4 # college graduate
 
 
 
8
  lifestyle:
9
- smoking_status: never
10
- alcohol_consumption: occasional
11
- exercise_frequency: "Regularly"
12
- diet: "Mediterranean diet"
13
- female_specific:
14
- age_at_first_period: 12
15
- num_live_births: 1
16
- age_at_first_live_birth: 25
17
- hormone_therapy_use: "never"
18
  family_history:
19
- - relative: mother
20
- cancer_type: breast
21
  age_at_diagnosis: 42
22
- - relative: maternal grandmother
23
- cancer_type: ovarian
 
 
24
  age_at_diagnosis: 60
 
 
25
  personal_medical_history:
26
- previous_cancers: []
27
  chronic_conditions: []
28
- genetic_testing:
29
- - mutation: "BRCA1"
30
- status: "Positive"
31
- date: "2025-03-20"
32
- current_concerns_or_symptoms: "No current symptoms, proactive health management, fertility counseling ongoing."
33
- clinical_observations:
34
- - test_name: "Breast MRI"
35
- value: "No abnormalities detected"
36
- unit: "N/A"
37
- reference_range: "No abnormalities"
38
- date: "2025-05-01"
39
- - test_name: "CA-125"
40
- value: "15"
41
- unit: "U/mL"
42
- reference_range: "<35 U/mL"
43
- date: "2025-05-01"
 
1
  # Young Female, BRCA Mutation, High Breast Cancer Risk
2
 
3
  demographics:
4
+ age_years: 32
5
  sex: female
6
+ ethnicity: ashkenazi jewish
7
+ education_level: 16
8
+ anthropometrics:
9
+ height_cm: 165.0
10
+ weight_kg: 60.0
11
  lifestyle:
12
+ smoking:
13
+ status: never
14
+ alcohol_consumption: light
15
+ physical_activity_level: moderate
 
 
 
 
 
16
  family_history:
17
+ - relation: mother
18
+ cancer_type: breast_cancer
19
  age_at_diagnosis: 42
20
+ degree: "1"
21
+ side: maternal
22
+ - relation: maternal_grandmother
23
+ cancer_type: ovarian_cancer
24
  age_at_diagnosis: 60
25
+ degree: "2"
26
+ side: maternal
27
  personal_medical_history:
 
28
  chronic_conditions: []
29
+ previous_cancers: []
30
+ genetic_mutations:
31
+ - brca1
32
+ female_specific:
33
+ menstrual:
34
+ age_at_menarche: 12
35
+ parity:
36
+ num_live_births: 1
37
+ age_at_first_live_birth: 25
38
+ hormone_use:
39
+ estrogen_use: never
40
+ breast_health:
41
+ num_biopsies: 0
42
+ atypical_hyperplasia: false
43
+ lobular_carcinoma_in_situ: false
44
+ symptoms: []
examples/dev/profile_mjs_1.yaml CHANGED
@@ -1,77 +1,42 @@
1
  # Relatively healthy female in her 50s with a family history of CRC, entering menopause.
2
 
3
  demographics:
4
- age: 49
5
  sex: female
6
- ethnicity: "Japanese"
7
- years_edu: 12
8
- height_in: 60.0
9
- weight_lb: 100.5
10
- height: 1.52
11
- weight: 45.6
12
 
13
  lifestyle:
14
- smoking_status: former
15
- smoking_pack_years: 1
 
16
  alcohol_consumption: moderate
17
- alcohol_drinks_per_day: 1.00
18
- multivitamin_usage: True
19
- diabetes_status: False
20
- activity: 2.0
21
- total_meat: 4.0
22
- pain_med: "no"
23
- nsaid_use: False
24
- estrogen: "no"
25
- estrogen_use: False
26
- estrogen_type: "none"
27
- estrogen_use_duration: 0
28
- estrogen_use_duration_unit: "years"
29
- estrogen_use_duration_value: 0
30
- estrogen_use_duration_unit: "years"
31
  personal_medical_history:
 
32
  previous_cancers: []
33
- family_crc: False
34
- aspirin: "yes"
35
 
36
- current_concerns_or_symptoms: "Irritability and night sweats."
37
- clinical_observations:
38
- nsaid_use: False
39
- estrogen: "no"
40
- estrogen_use: False
41
- estrogen_type: "none"
42
- estrogen_use_duration: 0
43
- estrogen_use_duration_unit: "years"
44
- estrogen_use_duration_value: 0
45
- estrogen_use_duration_unit: "years"
 
 
46
 
47
- clinical_observations:
48
- - test_name: "Vitamin D, 25-Hydroxy"
49
- value: "28"
50
- unit: "ng/mL"
51
- reference_range: "30-100 ng/mL"
52
- date: "2025-05-20"
53
- - test_name: "Hemoglobin"
54
- value: "13.2"
55
- unit: "g/dL"
56
- reference_range: "13.5-17.5 g/dL"
57
- date: "2025-05-20"
58
- - test_name: "White Blood Cell Count"
59
- value: "7.2"
60
- unit: "K/uL"
61
- reference_range: "4.5 - 11.0 K/uL"
62
- date: "2025-05-20"
63
- - test_name: "Glucose"
64
- value: "115"
65
- unit: "mg/dL"
66
- reference_range: "70-99 mg/dL"
67
- date: "2025-05-20"
68
- - test_name: "Creatinine"
69
- value: "1.1"
70
- unit: "mg/dL"
71
- reference_range: "0.7-1.3 mg/dL"
72
- date: "2025-05-20"
73
- - test_name: "LDL Cholesterol"
74
- value: "120"
75
- unit: "mg/dL"
76
- reference_range: "< 100 mg/dL"
77
- date: "2025-05-20"
 
1
  # Relatively healthy female in her 50s with a family history of CRC, entering menopause.
2
 
3
  demographics:
4
+ age_years: 49
5
  sex: female
6
+ ethnicity: asian
7
+ education_level: 12
8
+ anthropometrics:
9
+ height_cm: 152.0
10
+ weight_kg: 45.6
 
11
 
12
  lifestyle:
13
+ smoking:
14
+ status: former
15
+ pack_years: 1
16
  alcohol_consumption: moderate
17
+ multivitamin_use: true
18
+ physical_activity_level: low
19
+ red_meat_consumption_oz_per_day: 4.0
20
+
 
 
 
 
 
 
 
 
 
 
21
  personal_medical_history:
22
+ chronic_conditions: []
23
  previous_cancers: []
24
+ genetic_mutations: []
25
+ aspirin_use: current
26
 
27
+ female_specific:
28
+ menstrual:
29
+ age_at_menopause: 49
30
+ parity:
31
+ num_live_births: 2
32
+ age_at_first_live_birth: 25
33
+ hormone_use:
34
+ estrogen_use: never
35
+ breast_health:
36
+ num_biopsies: 0
37
+ atypical_hyperplasia: false
38
+ lobular_carcinoma_in_situ: false
39
 
40
+ symptoms:
41
+ - symptom_type: night_sweats
42
+ duration_days: 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/dev/profile_mjs_2.yaml CHANGED
@@ -1,48 +1,41 @@
1
  # African American Male, Risk of Prostate Cancer
2
 
3
  demographics:
4
- age: 58
5
  sex: male
6
- bmi: 24.9
7
- ethnicity: "African American"
8
- height: 1.85
9
- weight: 85
 
10
  lifestyle:
11
- smoking_status: former
12
- smoking_pack_years: 5
13
- alcohol_consumption: abstain
14
- alcohol_drinks_per_day: 0.0
15
- multivitamin_usage: True
16
- diabetes_status: False
17
- activity: 2.0
18
- total_meat: 4.0
19
- pain_med: "no"
20
- nsaid_use: False
21
 
22
  family_history:
23
- - relative: brother
24
- cancer_type: prostate
25
  age_at_diagnosis: 60
26
- - relative: mother
27
- cancer_type: hypertension-related stroke
28
- age_at_diagnosis: 72
29
  personal_medical_history:
 
30
  previous_cancers: []
31
- chronic_conditions:
32
- - "Hypertension"
33
- - "Pre-diabetes"
34
- - "Impotence"
35
- current_medications:
36
- - "Amlodipine"
37
- current_concerns_or_symptoms: "Increased fatigue, appetite loss, indigestion."
38
- clinical_observations:
39
- - test_name: "Prostate-Specific Antigen (PSA)"
40
- value: "2.5"
41
- unit: "ng/mL"
42
- reference_range: "<4.0 ng/mL"
43
- date: "2025-06-10"
44
- - test_name: "Blood Pressure"
45
- value: "115/75"
46
- unit: "mmHg"
47
- reference_range: "<120/80 mmHg"
48
- date: "2025-06-05"
 
1
  # African American Male, Risk of Prostate Cancer
2
 
3
  demographics:
4
+ age_years: 58
5
  sex: male
6
+ ethnicity: black
7
+ anthropometrics:
8
+ height_cm: 185.0
9
+ weight_kg: 85.0
10
+
11
  lifestyle:
12
+ smoking:
13
+ status: former
14
+ pack_years: 5
15
+ alcohol_consumption: none
16
+ multivitamin_use: true
17
+ physical_activity_level: low
18
+ red_meat_consumption_oz_per_day: 4.0
 
 
 
19
 
20
  family_history:
21
+ - relation: brother
22
+ cancer_type: prostate_cancer
23
  age_at_diagnosis: 60
24
+ degree: "1"
25
+ side: unknown
26
+
27
  personal_medical_history:
28
+ chronic_conditions: []
29
  previous_cancers: []
30
+ genetic_mutations: []
31
+
32
+ clinical_tests:
33
+ psa:
34
+ value_ng_ml: 2.5
35
+ date: 2025-06-10
36
+
37
+ symptoms:
38
+ - symptom_type: appetite_loss
39
+ duration_days: 14
40
+ - symptom_type: indigestion
41
+ duration_days: 7
 
 
 
 
 
 
examples/dev/profile_mjs_3.yaml CHANGED
@@ -1,102 +1,44 @@
1
  # European young male, healthy
2
 
3
  demographics:
4
- age: 25
5
  sex: male
6
- bmi: 21.0
7
- ethnicity: "European"
8
- height: 1.80
9
- weight: 75
 
10
  lifestyle:
11
- smoking_status: former
12
- smoking_pack_years: 3
 
13
  alcohol_consumption: light
14
- alcohol_drinks_per_day: 2
15
- multivitamin_usage: False
16
- diabetes_status: False
17
- activity: 3.0
18
- total_meat: 2.5
19
- pain_med: "no"
20
- nsaid_use: False
21
 
22
  family_history:
23
- - relative: grandmother
24
- cancer_type: colorectal
25
  age_at_diagnosis: 85
26
- - relative: paternal grandfather
27
- cancer_type: prostate
 
 
28
  age_at_diagnosis: 96
 
 
 
29
  personal_medical_history:
 
30
  previous_cancers: []
31
- chronic_conditions:
32
- - "Depression"
33
- - "ADHD"
34
- current_medications:
35
- - "Fluoxetine"
36
- - "Methylphenidate"
37
- current_concerns_or_symptoms: "Increased fatigue and appetite loss."
38
- clinical_observations:
39
- - test_name: "Prostate-Specific Antigen (PSA)"
40
- value: "0.5"
41
- unit: "ng/mL"
42
- reference_range: "<4.0 ng/mL"
43
- date: "2025-06-10"
44
- - test_name: "Blood Pressure"
45
- value: "115/75"
46
- unit: "mmHg"
47
- reference_range: "<120/80 mmHg"
48
- date: "2025-06-05"
49
- - test_name: "Glucose"
50
- value: "103"
51
- unit: "mg/dL"
52
- reference_range: "70-99 mg/dL"
53
- date: "2025-06-05"
54
- - test_name: "Creatinine"
55
- value: "1.1"
56
- unit: "mg/dL"
57
- reference_range: "0.7-1.3 mg/dL"
58
- date: "2025-06-05"
59
- - test_name: "HDL Cholesterol"
60
- value: "42"
61
- unit: "mg/dL"
62
- reference_range: "40-50 mg/dL"
63
- date: "2025-06-05"
64
- - test_name: "LDL Cholesterol"
65
- value: "92"
66
- unit: "mg/dL"
67
- reference_range: "<100 mg/dL"
68
- date: "2025-06-05"
69
- - test_name: "Triglycerides"
70
- value: "134"
71
- unit: "mg/dL"
72
- reference_range: "<150 mg/dL"
73
- date: "2025-06-05"
74
- - test_name: "C-Reactive Protein"
75
- value: "0.7"
76
- unit: "mg/dL"
77
- reference_range: "0.0-3.0 mg/dL"
78
- date: "2025-06-05"
79
- - test_name: "Lung X-Ray"
80
- value: "normal"
81
- unit: "n/a"
82
- date: "2025-06-05"
83
- - test_name: "CT Scan of Abdomen"
84
- value: "normal"
85
- unit: "n/a"
86
- date: "2025-06-05"
87
- - test_name: "CT Scan of Chest"
88
- value: "normal"
89
- unit: "n/a"
90
- date: "2025-06-05"
91
- - test_name: "Lung Function Test"
92
- value: "normal"
93
- unit: "n/a"
94
- date: "2025-06-05"
95
- - test_name: "Liver Function Test"
96
- value: "normal"
97
- unit: "n/a"
98
- date: "2025-06-05"
99
- - test_name: "Kidney Function Test"
100
- value: "normal"
101
- unit: "n/a"
102
- date: "2025-06-05"
 
1
  # European young male, healthy
2
 
3
  demographics:
4
+ age_years: 25
5
  sex: male
6
+ ethnicity: white
7
+ anthropometrics:
8
+ height_cm: 180.0
9
+ weight_kg: 75.0
10
+
11
  lifestyle:
12
+ smoking:
13
+ status: former
14
+ pack_years: 3
15
  alcohol_consumption: light
16
+ multivitamin_use: false
17
+ physical_activity_level: moderate
18
+ red_meat_consumption_oz_per_day: 2.5
 
 
 
 
19
 
20
  family_history:
21
+ - relation: paternal_grandmother
22
+ cancer_type: colorectal_cancer
23
  age_at_diagnosis: 85
24
+ degree: "2"
25
+ side: paternal
26
+ - relation: paternal_grandfather
27
+ cancer_type: prostate_cancer
28
  age_at_diagnosis: 96
29
+ degree: "2"
30
+ side: paternal
31
+
32
  personal_medical_history:
33
+ chronic_conditions: []
34
  previous_cancers: []
35
+ genetic_mutations: []
36
+
37
+ clinical_tests:
38
+ psa:
39
+ value_ng_ml: 0.5
40
+ date: 2025-06-10
41
+
42
+ symptoms:
43
+ - symptom_type: appetite_loss
44
+ duration_days: 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/dev/profile_plcom2012_comprehensive.yaml CHANGED
@@ -2,50 +2,41 @@
2
  # Tests native hawaiian/pacific islander race category and comprehensive variable coverage
3
 
4
  demographics:
5
- age: 62 # Center value used in model
6
  sex: male
7
- ethnicity: "native hawaiian" # Tests highest race offset (1.027152)
8
- education_level: 4 # College graduate (center value in model)
9
- height: 1.78 # meters
10
- weight: 85.0 # kilograms (BMI ~27, close to center value)
 
11
 
12
  lifestyle:
13
- smoking_status: current # Current smoker
14
- smoking_pack_years: 35 # Calculated from intensity and duration
15
- smoking_intensity_cpd: 25 # Moderate cigarettes per day
16
- smoking_duration_years: 27 # Center value used in model
17
- smoking_quit_years: null # Not applicable for current smoker
18
  alcohol_consumption: light
19
- dietary_habits: "mixed"
20
- physical_activity_level: "low"
21
 
22
  personal_medical_history:
23
- known_genetic_mutations: []
 
24
  previous_cancers: [] # No previous cancers
25
- chronic_illnesses: ["diabetes"] # No COPD
26
 
27
  family_history:
28
- - relative: "uncle" # Not first-degree relative
29
- cancer_type: "lung"
30
  age_at_diagnosis: 68
31
- - relative: "cousin" # Not first-degree relative
32
- cancer_type: "lung"
 
 
33
  age_at_diagnosis: 55
 
 
34
 
35
- female_specific: null # Not applicable for male profile
36
-
37
- current_concerns_or_symptoms: "Current smoker with no specific symptoms"
38
-
39
- clinical_observations:
40
- - test_name: "Complete Blood Count"
41
- value: "Normal"
42
- unit: "descriptive"
43
- reference_range: "Normal"
44
- date: "2025-09-30"
45
- - test_name: "Chest X-Ray"
46
- value: "Clear"
47
- unit: "descriptive"
48
- reference_range: "Normal"
49
- date: "2025-09-30"
50
-
51
- risks_scores: []
 
2
  # Tests native hawaiian/pacific islander race category and comprehensive variable coverage
3
 
4
  demographics:
5
+ age_years: 62 # Center value used in model
6
  sex: male
7
+ ethnicity: pacific_islander # Tests highest race offset (1.027152)
8
+ education_level: 16 # College graduate (center value in model)
9
+ anthropometrics:
10
+ height_cm: 178.0 # meters
11
+ weight_kg: 85.0 # kilograms (BMI ~27, close to center value)
12
 
13
  lifestyle:
14
+ smoking:
15
+ status: current # Current smoker
16
+ pack_years: 35 # Calculated from intensity and duration
17
+ cigarettes_per_day: 25 # Moderate cigarettes per day
18
+ years_smoked: 27 # Center value used in model
19
  alcohol_consumption: light
20
+ physical_activity_level: low
 
21
 
22
  personal_medical_history:
23
+ chronic_conditions:
24
+ - diabetes
25
  previous_cancers: [] # No previous cancers
26
+ genetic_mutations: []
27
 
28
  family_history:
29
+ - relation: paternal_uncle # Not first-degree relative
30
+ cancer_type: lung_cancer
31
  age_at_diagnosis: 68
32
+ degree: "2"
33
+ side: paternal
34
+ - relation: paternal_cousin # Not first-degree relative
35
+ cancer_type: lung_cancer
36
  age_at_diagnosis: 55
37
+ degree: "3"
38
+ side: paternal
39
 
40
+ symptoms:
41
+ - symptom_type: persistent_cough
42
+ duration_days: 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/dev/profile_plcom2012_edge_cases.yaml CHANGED
@@ -2,41 +2,54 @@
2
  # Tests model validation with missing required fields and edge cases
3
 
4
  demographics:
5
- age: 50 # Minimum age boundary for PLCOm2012 (50-80 range)
6
  sex: female
7
- ethnicity: "asian" # Tests different race offset (-0.466585)
8
- height: 1.73
9
- weight: 104
10
- education_level: 5
 
11
 
12
  lifestyle:
13
- smoking_status: current # Tests current smoker (smoking_status = 0)
14
- smoking_intensity_cpd: 17
15
- smoking_duration_years: 5
16
- smoking_quit_years: null # Not applicable for current smoker
17
  alcohol_consumption: heavy
18
 
19
  personal_medical_history:
 
20
  previous_cancers: [] # No previous cancers (cancer_hist = 0)
21
- chronic_illnesses: ["hypertension", "arthritis"] # No COPD (copd = 0)
22
 
23
  family_history:
24
- - relative: "grandfather" # Not first-degree relative - shouldn't count for lung cancer family history
25
- cancer_type: "lung"
26
  age_at_diagnosis: 75
27
- - relative: "aunt" # Not first-degree relative
28
- cancer_type: "lung"
 
 
29
  age_at_diagnosis: 68
30
- - relative: "sister" # First-degree relative but different cancer
31
- cancer_type: "ovarian"
 
 
32
  age_at_diagnosis: 55
 
 
33
 
34
  female_specific:
35
- menarche_age: 12
36
- menopause_age: 48
37
- pregnancies_count: 2
38
- breastfeeding_duration_months: 17
39
- hormone_replacement_therapy: "yes"
40
- oral_contraceptive_use: "no"
 
 
 
41
 
42
- current_concerns_or_symptoms: "Heavy smoker concerned about lung cancer risk"
 
 
 
2
  # Tests model validation with missing required fields and edge cases
3
 
4
  demographics:
5
+ age_years: 50 # Minimum age boundary for PLCOm2012 (50-80 range)
6
  sex: female
7
+ ethnicity: asian # Tests different race offset (-0.466585)
8
+ anthropometrics:
9
+ height_cm: 173.0
10
+ weight_kg: 104.0
11
+ education_level: 20
12
 
13
  lifestyle:
14
+ smoking:
15
+ status: current # Tests current smoker (smoking_status = 0)
16
+ cigarettes_per_day: 17
17
+ years_smoked: 5
18
  alcohol_consumption: heavy
19
 
20
  personal_medical_history:
21
+ chronic_conditions: []
22
  previous_cancers: [] # No previous cancers (cancer_hist = 0)
23
+ genetic_mutations: []
24
 
25
  family_history:
26
+ - relation: paternal_grandfather # Not first-degree relative - shouldn't count for lung cancer family history
27
+ cancer_type: lung_cancer
28
  age_at_diagnosis: 75
29
+ degree: "2"
30
+ side: paternal
31
+ - relation: paternal_aunt # Not first-degree relative
32
+ cancer_type: lung_cancer
33
  age_at_diagnosis: 68
34
+ degree: "2"
35
+ side: paternal
36
+ - relation: sister # First-degree relative but different cancer
37
+ cancer_type: ovarian_cancer
38
  age_at_diagnosis: 55
39
+ degree: "1"
40
+ side: unknown
41
 
42
  female_specific:
43
+ menstrual:
44
+ age_at_menarche: 12
45
+ age_at_menopause: 48
46
+ parity:
47
+ num_live_births: 2
48
+ age_at_first_live_birth: 25
49
+ hormone_use:
50
+ estrogen_use: current
51
+ oral_contraceptive_use: "N"
52
 
53
+ symptoms:
54
+ - symptom_type: persistent_cough
55
+ duration_days: 30
examples/synthetic/complex_and_acquired_risk/colorectal_risk_ibd.yaml CHANGED
@@ -13,30 +13,32 @@
13
  # 2. Correct Surveillance Protocol: The AI must recommend a surveillance colonoscopy with biopsies, not just a standard screening one. It should recommend a much shorter interval (e.g., "every 1-2 years") than the standard 10 years.
14
  # 3. Guideline Start Time: The AI should note that surveillance for IBD typically begins 8-10 years after diagnosis, and therefore this patient is due for surveillance now.
15
 
 
16
  demographics:
17
- age: 35
18
  sex: male
19
- ethnicity: "Caucasian"
 
 
 
 
20
  lifestyle:
21
- smoking_status: never
 
22
  alcohol_consumption: light
 
23
  family_history: []
 
24
  personal_medical_history:
25
- chronic_illnesses: ["Ulcerative Colitis (pancolitis) diagnosed at age 25"]
26
- current_concerns_or_symptoms: "My colitis is fairly well controlled, but I want to know about my cancer risk."
27
- clinical_observations:
28
- - test_name: "Calprotectin, Fecal"
29
- value: "350"
30
- unit: "mcg/g"
31
- reference_range: "< 50"
32
- date: "2025-05-10"
33
- - test_name: "CRP, High Sensitivity"
34
- value: "8.5"
35
- unit: "mg/L"
36
- reference_range: "< 3.0"
37
- date: "2025-05-10"
38
- - test_name: "Hemoglobin"
39
- value: "12.8"
40
- unit: "g/dL"
41
- reference_range: "13.5-17.5"
42
- date: "2025-05-10"
 
13
  # 2. Correct Surveillance Protocol: The AI must recommend a surveillance colonoscopy with biopsies, not just a standard screening one. It should recommend a much shorter interval (e.g., "every 1-2 years") than the standard 10 years.
14
  # 3. Guideline Start Time: The AI should note that surveillance for IBD typically begins 8-10 years after diagnosis, and therefore this patient is due for surveillance now.
15
 
16
+ schema_version: v1.0
17
  demographics:
18
+ age_years: 35
19
  sex: male
20
+ ethnicity: white
21
+ anthropometrics:
22
+ height_cm: 175.0
23
+ weight_kg: 70.0
24
+
25
  lifestyle:
26
+ smoking:
27
+ status: never
28
  alcohol_consumption: light
29
+
30
  family_history: []
31
+
32
  personal_medical_history:
33
+ chronic_conditions:
34
+ - ibd
35
+ previous_cancers: []
36
+ genetic_mutations: []
37
+
38
+ symptoms:
39
+ - symptom_type: abdominal_pain
40
+ duration_days: 30
41
+ - symptom_type: rectal_bleeding
42
+ duration_days: 14
43
+
44
+ clinical_tests: {}
 
 
 
 
 
 
examples/synthetic/complex_and_acquired_risk/complex_comorbidity.yaml CHANGED
@@ -1,73 +1,28 @@
1
- # Use Case: The Complex Comorbidity Case
2
- #
3
- # Why it was chosen:
4
- # This profile showcases the AI's ability to synthesize a multi-factorial risk profile with competing signals. It must correctly prioritize risks from demographics (African American), family history (prostate cancer), lifestyle (smoking), and clinical data (elevated PSA), while navigating comorbidities (Diabetes, HTN) and ambiguous lab results.
5
- #
6
- # How to understand the inputs:
7
- # - Multiple high-risk streams: Prostate (age, ethnicity, family hx, PSA) and Lung (smoking history).
8
- # - Ambiguous Lab Result: Mildly low hemoglobin. This is a key test of nuance.
9
- # - Comorbidities: Diabetes and hypertension are present, which are relevant health issues but must be distinguished from the primary cancer risks.
10
- # - Imaging Result: An Optomap scan result is included to test parsing of text-based reports.
11
- #
12
- # What to look for in a successful assessment:
13
- # 1. Prioritization: The report must correctly identify Prostate and Lung cancer as the highest-risk categories (e.g., Level 4 or 5).
14
- # 2. PSA Handling: The elevated PSA (5.8) must be flagged as a "Major" contributing factor for prostate cancer.
15
- # 3. Nuanced Reasoning (Anemia): The AI's `reasoning` should acknowledge the mild anemia. An exceptional response would link it as a secondary reason to ensure a timely colonoscopy (to rule out GI bleed).
16
- # 4. Advanced Dx Recommendation: The AI should recommend a more advanced prostate cancer biomarker test (like Proclarix) as a logical next step to clarify the "grey zone" PSA.
17
- # 5. Lung Screening: It must correctly identify him as eligible for an annual LDCT scan based on his age and pack-year history.
18
-
19
  demographics:
20
- age: 66
21
  sex: male
22
- ethnicity: "African American"
 
 
 
23
  lifestyle:
24
- smoking_status: former
25
- smoking_pack_years: 30
 
26
  alcohol_consumption: moderate
27
- personal_medical_history:
28
- known_genetic_mutations: []
29
- previous_cancers: []
30
- chronic_illnesses:
31
- - "Type 2 Diabetes"
32
- - "Hypertension"
33
  family_history:
34
- - relative: father
35
- cancer_type: prostate
36
  age_at_diagnosis: 62
37
- current_concerns_or_symptoms: "Frequent urination at night and some recent fatigue."
38
- clinical_observations:
39
- - test_name: "Prostate-Specific Antigen (PSA)"
40
- value: "5.8"
41
- unit: "ng/mL"
42
- reference_range: "< 4.0 ng/mL"
43
- date: "2025-06-20"
44
- - test_name: "Hemoglobin A1c"
45
- value: "7.5"
46
- unit: "%"
47
- reference_range: "< 5.7 %"
48
- date: "2025-05-10"
49
- - test_name: "Hemoglobin"
50
- value: "13.1"
51
- unit: "g/dL"
52
- reference_range: "13.5-17.5"
53
- date: "2025-05-10"
54
- - test_name: "Creatinine"
55
- value: "1.35"
56
- unit: "mg/dL"
57
- reference_range: "0.7-1.3"
58
- date: "2025-05-10"
59
- - test_name: "eGFR"
60
- value: "55"
61
- unit: "mL/min/1.73m^2"
62
- reference_range: ">60"
63
- date: "2025-05-10"
64
- - test_name: "Chest X-ray"
65
- value: "No acute cardiopulmonary process."
66
- unit: "N/A"
67
- reference_range: "N/A"
68
- date: "2023-08-01"
69
- - test_name: "Optomap Retinal Scan"
70
- value: "Mild non-proliferative diabetic retinopathy noted. No signs of choroidal melanoma."
71
- unit: "N/A"
72
- reference_range: "N/A"
73
- date: "2024-11-15"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 66
4
  sex: male
5
+ ethnicity: black
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: former
12
+ pack_years: 30
13
  alcohol_consumption: moderate
 
 
 
 
 
 
14
  family_history:
15
+ - relation: father
16
+ cancer_type: prostate_cancer
17
  age_at_diagnosis: 62
18
+ degree: '1'
19
+ side: unknown
20
+ personal_medical_history:
21
+ chronic_conditions:
22
+ - diabetes
23
+ previous_cancers: []
24
+ genetic_mutations: []
25
+ symptoms:
26
+ - symptom_type: weight_loss
27
+ duration_days: 14
28
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/synthetic/complex_and_acquired_risk/kidney_cancer_esrd.yaml CHANGED
@@ -1,48 +1,24 @@
1
- # Use Case: 02 - Kidney Cancer (Acquired Risk from ESRD)
2
- #
3
- # Why it was chosen:
4
- # This profile tests the AI's understanding of acquired (non-genetic) high-risk conditions. Long-term dialysis is a known major risk factor for a specific type of kidney cancer (acquired cystic kidney disease-associated RCC). This moves beyond simple lifestyle/family history risks.
5
- #
6
- # How to understand the inputs:
7
- # - The key information is the 8-year history of end-stage renal disease (ESRD) and dialysis.
8
- # - Symptoms (flank pain, hematuria) are classic signs of potential kidney cancer.
9
- # - The lab work (anemia, high creatinine) is expected with ESRD but could also be worsened by a tumor.
10
- #
11
- # What to look for in a successful assessment:
12
- # 1. Correct Module Trigger: The AI must identify Kidney Cancer as a major risk (Level 5).
13
- # 2. Risk Rationale: The `explanation` must correctly cite long-term dialysis as the primary risk factor, as specified in the `kidney.yaml` module.
14
- # 3. Dx Recommendation: A renal ultrasound or MRI/CT should be recommended as a "Critical" (Level 5) next step.
15
- # 4. Contextual Reasoning: The AI should note that while anemia is expected in ESRD, the new symptoms make investigating for a renal mass urgent.
16
-
17
  demographics:
18
- age: 55
19
  sex: male
20
- ethnicity: "Caucasian"
 
 
 
21
  lifestyle:
22
- smoking_status: never
 
23
  alcohol_consumption: none
24
- personal_medical_history:
25
- chronic_illnesses:
26
- - "End-Stage Renal Disease (ESRD) secondary to Polycystic Kidney Disease"
27
- - "On hemodialysis for 8 years"
28
  family_history:
29
- - relative: mother
30
- cancer_type: breast
31
  age_at_diagnosis: 65
32
- current_concerns_or_symptoms: "Intermittent dull pain in my left side and I think I saw some blood in my urine last week."
33
- clinical_observations:
34
- - test_name: "Creatinine"
35
- value: "7.8"
36
- unit: "mg/dL"
37
- reference_range: "0.7-1.3"
38
- date: "2025-06-15"
39
- - test_name: "Hemoglobin"
40
- value: "9.5"
41
- unit: "g/dL"
42
- reference_range: "13.5-17.5"
43
- date: "2025-06-15"
44
- - test_name: "Urine Dipstick"
45
- value: "2+ blood"
46
- unit: "N/A"
47
- reference_range: "Negative"
48
- date: "2025-06-28"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 55
4
  sex: male
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: none
 
 
 
 
13
  family_history:
14
+ - relation: mother
15
+ cancer_type: breast_cancer
16
  age_at_diagnosis: 65
17
+ degree: '1'
18
+ side: unknown
19
+ personal_medical_history:
20
+ chronic_conditions: []
21
+ previous_cancers: []
22
+ genetic_mutations: []
23
+ symptoms: []
24
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
examples/synthetic/complex_and_acquired_risk/leukemia_therapy_related.yaml CHANGED
@@ -1,46 +1,20 @@
1
- # Use Case: 03 - Leukemia (Therapy-Related Acute Leukemia)
2
- #
3
- # Why it was chosen:
4
- # A critical safety and nuance test. The AI must recognize that prior cytotoxic therapy (chemo/radiation) is a major risk factor for secondary malignancies, especially leukemia (t-AML/t-MDS). It must also identify the urgent nature of the symptoms and lab results.
5
- #
6
- # How to understand the inputs:
7
- # - The most important input is the history of treatment for breast cancer 10 years ago.
8
- # - The symptoms (fatigue, bruising) are classic signs of bone marrow failure.
9
- # - The CBC result is the key objective finding: pancytopenia (low levels of all three blood cell lines) is a massive red flag.
10
- #
11
- # What to look for in a successful assessment:
12
- # 1. Urgency Recognition: The assessment must immediately flag the leukemia risk as Level 5 and emphasize the need for immediate medical consultation.
13
- # 2. Correct Risk Factor: The `reasoning` must connect the prior chemotherapy/radiation to the current risk of a therapy-related myeloid neoplasm.
14
- # 3. Lab Interpretation: The AI must identify that low hemoglobin, low platelets, and low white blood cells (pancytopenia) are highly alarming findings requiring urgent hematological investigation.
15
- # 4. Dx Recommendation: A bone marrow biopsy is the definitive test, but the most important recommendation is an urgent referral to a hematologist. A standard "CBC with differential" is a Level 5 recommendation.
16
-
17
  demographics:
18
- age: 45
19
  sex: female
20
- ethnicity: "Caucasian"
 
 
 
21
  lifestyle:
22
- smoking_status: never
 
23
  alcohol_consumption: light
24
  family_history: []
25
  personal_medical_history:
 
26
  previous_cancers:
27
- - "Breast Cancer (Stage II) at age 35"
28
- chronic_illnesses:
29
- - "Treated with Adriamycin/Cytoxan chemotherapy and radiation, completed 2011"
30
- current_concerns_or_symptoms: "For the past month, I've been extremely tired, more than usual. I've also noticed a lot of small bruises on my legs and my gums bled a lot when I brushed my teeth this morning."
31
- clinical_observations:
32
- - test_name: "WBC Count"
33
- value: "2.1"
34
- unit: "K/uL"
35
- reference_range: "4.5-11.0"
36
- date: "2025-06-29"
37
- - test_name: "Hemoglobin"
38
- value: "8.9"
39
- unit: "g/dL"
40
- reference_range: "12.0-16.0"
41
- date: "2025-06-29"
42
- - test_name: "Platelet Count"
43
- value: "45"
44
- unit: "K/uL"
45
- reference_range: "150-450"
46
- date: "2025-06-29"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 45
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: light
13
  family_history: []
14
  personal_medical_history:
15
+ chronic_conditions: []
16
  previous_cancers:
17
+ - other_cancer
18
+ genetic_mutations: []
19
+ symptoms: []
20
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/synthetic/complex_and_acquired_risk/lymphoma_immunosuppression.yaml CHANGED
@@ -1,45 +1,19 @@
1
- # Use Case: 04 - Lymphoma (Post-Transplant Immunosuppression)
2
- #
3
- # Why it was chosen:
4
- # Similar to the leukemia case, this tests the AI's ability to identify risk from a specific medical history (immunosuppression) rather than lifestyle or family history. Post-transplant lymphoproliferative disorder (PTLD) is a known risk.
5
- #
6
- # How to understand the inputs:
7
- # - The kidney transplant and use of immunosuppressants are the key risk factors.
8
- # - The "B symptoms" (night sweats, fatigue) and a new swollen lymph node are classic signs of lymphoma.
9
- # - The elevated LDH is a non-specific but corroborating marker of high cell turnover.
10
- #
11
- # What to look for in a successful assessment:
12
- # 1. Risk Connection: The AI must connect the use of immunosuppressants for a transplant to the elevated risk of lymphoma (specifically PTLD), rating it Level 4/5.
13
- # 2. Symptom Triage: The reasoning must identify the combination of a new lymph node and "B symptoms" as highly suspicious.
14
- # 3. Dx Recommendation: The AI should indicate that a biopsy of the lymph node is the definitive diagnostic step and that a PET/CT scan would be used for staging *if* lymphoma is confirmed. It should NOT recommend a PET/CT as the first step.
15
- # 4. Low Risk for Kidney Cancer: As a bonus, the AI should correctly note that while he had a transplant, his risk of native kidney cancer is now lower (as the diseased kidneys are gone/non-functional).
16
-
17
  demographics:
18
- age: 38
19
  sex: male
20
- ethnicity: "Hispanic"
 
 
 
21
  lifestyle:
22
- smoking_status: never
 
23
  alcohol_consumption: none
24
  family_history: []
25
  personal_medical_history:
26
- chronic_illnesses:
27
- - "Kidney transplant recipient (5 years ago)"
28
- - "On long-term immunosuppressant medication (Tacrolimus, Mycophenolate)"
29
- current_concerns_or_symptoms: "I've been waking up drenched in sweat for the past few weeks. I also found a painless lump in the side of my neck that wasn't there before."
30
- clinical_observations:
31
- - test_name: "LDH (Lactate Dehydrogenase)"
32
- value: "350"
33
- unit: "U/L"
34
- reference_range: "140-280"
35
- date: "2025-06-25"
36
- - test_name: "CBC"
37
- value: "Normal"
38
- unit: "N/A"
39
- reference_range: "N/A"
40
- date: "2025-06-25"
41
- - test_name: "Tacrolimus Level"
42
- value: "6.5"
43
- unit: "ng/mL"
44
- reference_range: "5-10"
45
- date: "2025-06-25"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 38
4
  sex: male
5
+ ethnicity: hispanic
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: none
13
  family_history: []
14
  personal_medical_history:
15
+ chronic_conditions: []
16
+ previous_cancers: []
17
+ genetic_mutations: []
18
+ symptoms: []
19
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/synthetic/complex_and_acquired_risk/real_world_data.yaml CHANGED
@@ -1,88 +1,21 @@
1
- # Use Case: The Real-World Data Onslaught
2
- #
3
- # Why it was chosen:
4
- # This is a stress test of the AI's ability to parse a large, messy, and chronologically complex set of data that mimics a real-world electronic health record. The objective is to demonstrate that the AI can successfully find the critical, actionable "signals" (a new suspicious mole, eligibility for lung screening) from a large amount of "noise" (resolved past issues, irrelevant comorbidities, normal labs).
5
- #
6
- # How to understand the inputs:
7
- # - A long list of over 20 clinical observations spanning 15 years.
8
- # - Multiple comorbidities (COPD, Osteoporosis, GERD).
9
- # - A critical, recent clinical note about a "new, irregular mole on her back."
10
- # - A history of total hysterectomy, which is a key piece of "negative" information.
11
- #
12
- # What to look for in a successful assessment:
13
- # 1. Signal vs. Noise: The `identified_risk_factors` should prominently feature the new mole and smoking history. It should NOT list things like the old appendectomy or resolved UTIs.
14
- # 2. Correct Triage: Skin Cancer and Lung Cancer should be flagged as the highest risks.
15
- # 3. Correct De-escalation: Cervical, Endometrial, and Ovarian cancer risk should all be correctly identified as very low (Level 1) because of the total hysterectomy. The reasoning must cite the hysterectomy.
16
- # 4. Actionable & Specific Dx Recommendation:
17
- # - It must recommend an annual LDCT for lung screening (Level 4/5).
18
- # - It must recommend an urgent dermatology referral. An exceptional response would reference the `dermasensor_skin_assesment` protocol, correctly positioning it as a tool her PCP could use to evaluate the lesion.
19
-
20
  demographics:
21
- age: 72
22
  sex: female
23
- ethnicity: "Caucasian"
 
 
 
24
  lifestyle:
25
- smoking_status: current
26
- smoking_pack_years: 25
 
27
  alcohol_consumption: light
 
28
  personal_medical_history:
29
- known_genetic_mutations: []
 
30
  previous_cancers: []
31
- chronic_illnesses:
32
- - "COPD"
33
- - "Osteoporosis"
34
- - "GERD"
35
- - "History of Total Hysterectomy for benign fibroids at age 45"
36
- - "History of Appendectomy at age 20"
37
- current_concerns_or_symptoms: "My doctor mentioned a spot on my back but I haven't seen a specialist yet. I also get short of breath but I assume it's my COPD."
38
- clinical_observations:
39
- - test_name: "Primary Care Visit Note"
40
- value: "Patient notes a new, irregular mole on her back, approx 7mm, with some color variation. Advised dermatology consult."
41
- unit: "N/A"
42
- reference_range: "N/A"
43
- date: "2025-05-01"
44
- - test_name: "Spirometry (PFT)"
45
- value: "FEV1 60% of predicted"
46
- unit: "%"
47
- reference_range: ">80%"
48
- date: "2025-04-10"
49
- - test_name: "Bone Density Scan (T-score)"
50
- value: "-2.7"
51
- unit: "SD"
52
- reference_range: "> -1.0"
53
- date: "2024-03-15"
54
- - test_name: "Lipid Panel"
55
- value: "LDL 110, HDL 50, Total 180"
56
- unit: "mg/dL"
57
- reference_range: "Normal"
58
- date: "2025-04-10"
59
- - test_name: "Complete Blood Count"
60
- value: "Normal"
61
- unit: "N/A"
62
- reference_range: "N/A"
63
- date: "2025-04-10"
64
- - test_name: "Vitamin B12"
65
- value: "450"
66
- unit: "pg/mL"
67
- reference_range: "200-900"
68
- date: "2025-04-10"
69
- - test_name: "Pap Smear"
70
- value: "N/A post-hysterectomy"
71
- unit: "N/A"
72
- reference_range: "N/A"
73
- date: "2010-01-01"
74
- - test_name: "Colonoscopy"
75
- value: "Normal to cecum, small diverticula noted."
76
- unit: "N/A"
77
- reference_range: "N/A"
78
- date: "2018-07-22"
79
- - test_name: "Urinalysis"
80
- value: "Trace bacteria, resolved with antibiotics."
81
- unit: "N/A"
82
- reference_range: "N/A"
83
- date: "2019-05-12"
84
- - test_name: "Mammogram"
85
- value: "Scattered fibroglandular densities. No suspicious mass or calcification."
86
- unit: "N/A"
87
- reference_range: "N/A"
88
- date: "2024-02-01"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 72
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: current
12
+ pack_years: 25
13
  alcohol_consumption: light
14
+ family_history: []
15
  personal_medical_history:
16
+ chronic_conditions:
17
+ - copd
18
  previous_cancers: []
19
+ genetic_mutations: []
20
+ symptoms: []
21
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/synthetic/complex_and_acquired_risk/stomach_cancer_high_risk.yaml CHANGED
@@ -1,52 +1,25 @@
1
- # Use Case: 01 - Stomach Cancer (High-Risk Ethnicity & History)
2
- #
3
- # Why it was chosen:
4
- # Tests the system's knowledge of a less common but important cancer, incorporating ethnic and specific clinical risk factors. It demonstrates the ability to connect a history of infections (H. pylori) and pre-malignant conditions (atrophic gastritis) to a specific cancer risk.
5
- #
6
- # How to understand the inputs:
7
- # - Ethnicity (Korean) is a known demographic risk factor for stomach cancer.
8
- # - Family history (father) provides a genetic predisposition signal.
9
- # - The clinical observations of a past H. pylori infection and diagnosed atrophic gastritis are major, direct risk factors.
10
- # - Mild anemia (low Hgb/MCV) is a potential symptom of gastric bleeding.
11
- #
12
- # What to look for in a successful assessment:
13
- # 1. Risk Prioritization: Stomach cancer should be identified as the highest-risk cancer (Level 4/5).
14
- # 2. Factor Contribution: Atrophic gastritis and family history must be listed as "Major" contributing factors.
15
- # 3. Dx Recommendation: An upper endoscopy must be strongly recommended (Level 5 - Critical) for surveillance.
16
- # 4. Symptom Connection: The reasoning should connect the mild anemia to the possibility of chronic GI blood loss, reinforcing the need for endoscopy.
17
-
18
  demographics:
19
- age: 68
20
  sex: male
21
- ethnicity: "Korean"
 
 
 
22
  lifestyle:
23
- smoking_status: former
24
- smoking_pack_years: 10
 
25
  alcohol_consumption: light
26
- dietary_habits: "High in salted and preserved foods"
27
  family_history:
28
- - relative: father
29
- cancer_type: stomach
30
  age_at_diagnosis: 72
 
 
31
  personal_medical_history:
32
- chronic_illnesses:
33
- - "Chronic Atrophic Gastritis"
34
- - "History of treated H. pylori infection (2015)"
35
- - "Hypertension"
36
- current_concerns_or_symptoms: "Occasional indigestion and feeling full early after meals."
37
- clinical_observations:
38
- - test_name: "Hemoglobin"
39
- value: "12.9"
40
- unit: "g/dL"
41
- reference_range: "13.5-17.5"
42
- date: "2025-06-01"
43
- - test_name: "MCV"
44
- value: "79"
45
- unit: "fL"
46
- reference_range: "80-100"
47
- date: "2025-06-01"
48
- - test_name: "Gastrin Level"
49
- value: "250"
50
- unit: "pg/mL"
51
- reference_range: "<100"
52
- date: "2024-11-20"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 68
4
  sex: male
5
+ ethnicity: unknown
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: former
12
+ pack_years: 10
13
  alcohol_consumption: light
 
14
  family_history:
15
+ - relation: father
16
+ cancer_type: gastro_oesophageal_cancer
17
  age_at_diagnosis: 72
18
+ degree: '1'
19
+ side: unknown
20
  personal_medical_history:
21
+ chronic_conditions: []
22
+ previous_cancers: []
23
+ genetic_mutations: []
24
+ symptoms: []
25
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/synthetic/complex_and_acquired_risk/thyroid_cancer_radiation.yaml CHANGED
@@ -1,41 +1,20 @@
1
- # Use Case: 05 - Thyroid Cancer (Childhood Radiation Exposure)
2
- #
3
- # Why it was chosen:
4
- # This profile tests the AI's knowledge of a specific, potent environmental risk factor: childhood radiation to the neck. It also includes a direct clinical finding (a thyroid nodule) that requires a clear follow-up plan.
5
- #
6
- # How to understand the inputs:
7
- # - The history of radiation for Hodgkin's lymphoma as a teenager is the single most important risk factor.
8
- # - The new clinical observation of a palpable thyroid nodule is the primary actionable finding.
9
- # - The TSH is normal, which is a key piece of information (most thyroid cancers are euthyroid).
10
- #
11
- # What to look for in a successful assessment:
12
- # 1. Major Risk Identification: The AI must identify prior neck radiation as a "Major" contributor to Thyroid Cancer risk (Level 5).
13
- # 2. Actionable Finding: The AI must recognize the palpable nodule as needing immediate evaluation.
14
- # 3. Correct Dx Pathway: The recommendations should be a Thyroid Ultrasound followed by a potential Fine Needle Aspiration (FNA) biopsy, which is the standard workup. It should not jump to recommending surgery.
15
- # 4. TSH Nuance: The `reasoning` should note that a normal TSH does not lower the suspicion for cancer in the presence of a nodule.
16
-
17
  demographics:
18
- age: 40
19
  sex: female
20
- ethnicity: "Caucasian"
 
 
 
21
  lifestyle:
22
- smoking_status: never
 
23
  alcohol_consumption: light
24
  family_history: []
25
  personal_medical_history:
 
26
  previous_cancers:
27
- - "Hodgkin's Lymphoma at age 16"
28
- chronic_illnesses:
29
- - "Treated with radiation therapy to neck and chest"
30
- current_concerns_or_symptoms: "My primary care doctor felt a lump in my neck during my physical last week."
31
- clinical_observations:
32
- - test_name: "Physical Exam Note"
33
- value: "Firm, non-tender 2 cm nodule noted in the right lobe of the thyroid."
34
- unit: "N/A"
35
- reference_range: "N/A"
36
- date: "2025-06-22"
37
- - test_name: "TSH"
38
- value: "2.1"
39
- unit: "mIU/L"
40
- reference_range: "0.4-4.5"
41
- date: "2025-06-22"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 40
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: light
13
  family_history: []
14
  personal_medical_history:
15
+ chronic_conditions: []
16
  previous_cancers:
17
+ - other_cancer
18
+ genetic_mutations: []
19
+ symptoms: []
20
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
examples/synthetic/diagnostic_and_screening_pathways/executive_checkup.yaml CHANGED
@@ -1,72 +1,25 @@
1
- # Use Case: The Executive Check-Up ("The Worried Well")
2
- #
3
- # Why it was chosen:
4
- # This profile demonstrates the AI's ability to act as a "Chief Realism Officer." It tests the system's capacity to process a large volume of data, correctly identify that the overall cancer risk is low despite some minor non-cancer-related health issues, and provide responsible, evidence-based guidance on advanced, elective tests like MCEDs (e.g., Galleri). The goal is to build trust by not being alarmist and by providing nuanced education.
5
- #
6
- # How to understand the inputs:
7
- # - The user is a 58-year-old male with a healthy lifestyle and no significant family history of cancer.
8
- # - He has a list of clinical observations from an annual check-up.
9
- # - Critically, some labs are borderline or slightly abnormal (Uric Acid, Vitamin D, LDL, ALT), but these are not primary cancer risk drivers.
10
- # - His "Current Concerns" explicitly ask about advanced screening.
11
- #
12
- # What to look for in a successful assessment:
13
- # 1. Overall Risk Score: Should be low (e.g., < 25/100).
14
- # 2. Risk Assessments: All individual cancer risks should be assessed as Level 1 or 2 (Low).
15
- # 3. Identified Risk Factors: The AI should correctly identify "Age" as a minor demographic risk factor but should *not* list the borderline labs as significant cancer risk factors.
16
- # 4. Dx Recommendations:
17
- # - Standard screenings (Colonoscopy, PSA) should be recommended appropriately for his age (e.g., Level 4 - Recommended).
18
- # - Advanced tests like Galleri should be rated as "Optional" (Level 3), NOT "Recommended."
19
- # 5. Reasoning/Summary: The text output must explain *why* Galleri is optional, referencing its limitations (not FDA-approved, risk of false positives/negatives) as detailed in the `grail_galleri.yaml` protocol. It should also correctly contextualize his minor lab abnormalities as being related to metabolic health or common deficiencies, not cancer.
20
-
21
  demographics:
22
- age: 58
23
  sex: male
24
- ethnicity: "Caucasian"
 
 
 
25
  lifestyle:
26
- smoking_status: never
 
27
  alcohol_consumption: light
28
- dietary_habits: "Balanced, low-carb diet"
29
- physical_activity_level: "Regular, 4-5 times per week"
30
  family_history:
31
- - relative: grandfather
32
- cancer_type: skin
33
  age_at_diagnosis: 80
 
 
34
  personal_medical_history:
35
- known_genetic_mutations: []
36
  previous_cancers: []
37
- chronic_illnesses: []
38
- current_concerns_or_symptoms: "I feel great, but I want to be proactive and get the most advanced cancer screening available. I've read about tests that can find 50 cancers at once and I want to know if I should get one."
39
- clinical_observations:
40
- - test_name: "Complete Blood Count (CBC)"
41
- value: "Normal"
42
- unit: "N/A"
43
- date: "2025-06-15"
44
- - test_name: "Comprehensive Metabolic Panel (CMP)"
45
- value: "Normal"
46
- unit: "N/A"
47
- date: "2025-06-15"
48
- - test_name: "Uric Acid"
49
- value: "7.5"
50
- unit: "mg/dL"
51
- reference_range: "4.0-7.0"
52
- date: "2025-06-15"
53
- - test_name: "Vitamin D, 25-Hydroxy"
54
- value: "25"
55
- unit: "ng/mL"
56
- reference_range: "30-100"
57
- date: "2025-06-15"
58
- - test_name: "LDL Cholesterol"
59
- value: "135"
60
- unit: "mg/dL"
61
- reference_range: "< 100"
62
- date: "2025-06-15"
63
- - test_name: "ALT (Alanine Aminotransferase)"
64
- value: "48"
65
- unit: "U/L"
66
- reference_range: "< 45"
67
- date: "2025-06-15"
68
- - test_name: "Cardiac Calcium Score"
69
- value: "0"
70
- unit: "Agatston score"
71
- reference_range: "0"
72
- date: "2025-01-20"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 58
4
  sex: male
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: light
13
+ physical_activity_level: moderate
 
14
  family_history:
15
+ - relation: paternal_grandfather
16
+ cancer_type: melanoma
17
  age_at_diagnosis: 80
18
+ degree: '2'
19
+ side: paternal
20
  personal_medical_history:
21
+ chronic_conditions: []
22
  previous_cancers: []
23
+ genetic_mutations: []
24
+ symptoms: []
25
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/synthetic/diagnostic_and_screening_pathways/indeterminate_imaging_birads3.yaml CHANGED
@@ -1,31 +1,20 @@
1
- # Use Case: 17 - Indeterminate Imaging Finding (BI-RADS 3)
2
- #
3
- # Why it was chosen:
4
- # To test the AI's ability to interpret a common but ambiguous imaging result (BI-RADS 3) and recommend the appropriate, non-alarming follow-up, which is short-interval surveillance, not immediate biopsy.
5
- #
6
- # How to understand the inputs:
7
- # - The user has dense breasts, a risk factor in itself.
8
- # - The mammogram finding of "architectural distortion" and the "BI-RADS 3" category are the key inputs.
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Correct Interpretation: The AI must understand that BI-RADS 3 means "Probably Benign" with a <2% chance of malignancy.
12
- # 2. Correct Follow-up: The standard recommendation for a BI-RADS 3 finding is a short-interval (6-month) follow-up diagnostic mammogram. The AI should recommend this (Level 4) and NOT jump to recommending a biopsy (which would be for BI-RADS 4 or 5).
13
- # 3. Context for Dense Breasts: The AI should mention that breast density can lower mammogram sensitivity and that supplemental screening with ultrasound or MRI is a topic to discuss with her provider.
14
-
15
  demographics:
16
- age: 62
17
  sex: female
18
- ethnicity: "Caucasian"
 
 
 
19
  lifestyle:
20
- smoking_status: former
21
- smoking_pack_years: 5
 
22
  alcohol_consumption: light
23
- personal_medical_history:
24
- chronic_illnesses: ["Dense Breasts (Type C)"]
25
  family_history: []
26
- current_concerns_or_symptoms: "My mammogram report came back with something called 'BI-RADS 3' and I'm not sure what it means."
27
- clinical_observations:
28
- - test_name: "3D Mammogram Report"
29
- value: "Breasts are heterogeneously dense. In the left breast at 2 o'clock, there is an area of architectural distortion. No suspicious mass or calcifications. ASSESSMENT: BI-RADS 3: Probably Benign. Recommend short-interval follow-up."
30
- unit: "N/A"
31
- date: "2025-06-12"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 62
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: former
12
+ pack_years: 5
13
  alcohol_consumption: light
 
 
14
  family_history: []
15
+ personal_medical_history:
16
+ chronic_conditions: []
17
+ previous_cancers: []
18
+ genetic_mutations: []
19
+ symptoms: []
20
+ clinical_tests: {}
examples/synthetic/diagnostic_and_screening_pathways/mrd_surveillance_candidate.yaml CHANGED
@@ -1,37 +1,20 @@
1
- # Use Case: 16 - Minimal Residual Disease (MRD) Candidate
2
- #
3
- # Why it was chosen:
4
- # Tests knowledge of the post-treatment surveillance space, which is a sophisticated and growing area of oncology. The AI needs to differentiate a test for *recurrence* risk from a test for initial screening.
5
- #
6
- # How to understand the inputs:
7
- # - The history of Stage III colon cancer and recent completion of chemotherapy are the key facts.
8
- # - The user is asking about a specific type of test ("blood tests to see if it's coming back").
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Correct Test Identification: The AI must correctly identify `Guardant Reveal` as the appropriate test for this clinical scenario (colorectal cancer MRD testing).
12
- # 2. Correct Use Case: The `rationale` for recommending Guardant Reveal (Level 4 - Recommended, as it's still an advanced test) must accurately describe its purpose: detecting ctDNA to assess recurrence risk and guide future decisions.
13
- # 3. Distinction from Other Tests: The AI must NOT recommend a screening test like Cologuard, which is inappropriate in this context. It should also correctly explain this is different from a therapy selection test like `Guardant360`.
14
-
15
  demographics:
16
- age: 58
17
  sex: male
18
- ethnicity: "Caucasian"
 
 
 
19
  lifestyle:
20
- smoking_status: never
 
21
  alcohol_consumption: moderate
22
  family_history: []
23
  personal_medical_history:
 
24
  previous_cancers:
25
- - "Stage III Colon Cancer, s/p hemicolectomy and adjuvant FOLFOX chemotherapy"
26
- chronic_illnesses: ["Chemotherapy-induced peripheral neuropathy"]
27
- current_concerns_or_symptoms: "I finished my chemo a couple of months ago. My last CT scan was clear. I want to know about those new blood tests to see if the cancer is coming back."
28
- clinical_observations:
29
- - test_name: "CEA (Carcinoembryonic Antigen)"
30
- value: "1.5"
31
- unit: "ng/mL"
32
- reference_range: "< 5.0"
33
- date: "2025-06-15"
34
- - test_name: "CT Chest/Abdomen/Pelvis"
35
- value: "No evidence of metastatic disease."
36
- unit: "N/A"
37
- date: "2025-05-20"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 58
4
  sex: male
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: moderate
13
  family_history: []
14
  personal_medical_history:
15
+ chronic_conditions: []
16
  previous_cancers:
17
+ - other_cancer
18
+ genetic_mutations: []
19
+ symptoms: []
20
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
examples/synthetic/diagnostic_and_screening_pathways/post_positive_cologuard.yaml CHANGED
@@ -1,31 +1,19 @@
1
- # Use Case: 15 - Post-Positive Cologuard
2
- #
3
- # Why it was chosen:
4
- # A crucial test of the AI's adherence to the "screening cascade." It must demonstrate that it understands a positive non-invasive test is not a diagnosis, but a trigger for a mandatory diagnostic follow-up. This is a key patient safety and education moment.
5
- #
6
- # How to understand the inputs:
7
- # - The user is average risk otherwise.
8
- # - The "Positive" Cologuard result is the only significant finding.
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Mandatory Follow-up: The recommendation for a Colonoscopy must be "Critical" (Level 5).
12
- # 2. Clear Rationale: The `reasoning` and `overall_summary` must state unequivocally that a colonoscopy is the required next step to determine the cause of the positive result, as per the `exact_sciences_cologuard.yaml` protocol.
13
- # 3. Reassurance and Context: The AI should explain that a positive result does not mean she has cancer, as false positives can occur, but that a colonoscopy is the only way to be sure. This manages anxiety while ensuring compliance.
14
-
15
  demographics:
16
- age: 51
17
  sex: female
18
- ethnicity: "Caucasian"
 
 
 
19
  lifestyle:
20
- smoking_status: never
 
21
  alcohol_consumption: light
22
  family_history: []
23
  personal_medical_history:
24
- chronic_illnesses: ["Hypothyroidism"]
25
- current_concerns_or_symptoms: "I did one of those at-home Cologuard tests and it came back positive. I'm really scared. Do I really need to get a colonoscopy?"
26
- clinical_observations:
27
- - test_name: "Exact Sciences Cologuard"
28
- value: "Positive"
29
- unit: "N/A"
30
- reference_range: "Negative"
31
- date: "2025-06-20"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 51
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: light
13
  family_history: []
14
  personal_medical_history:
15
+ chronic_conditions: []
16
+ previous_cancers: []
17
+ genetic_mutations: []
18
+ symptoms: []
19
+ clinical_tests: {}
 
 
 
examples/synthetic/diagnostic_and_screening_pathways/therapy_selection_context.yaml CHANGED
@@ -1,31 +1,20 @@
1
- # Use Case: 18 - Known Cancer, Therapy Selection Context
2
- #
3
- # Why it was chosen:
4
- # An advanced case to show the system's knowledge extends beyond screening to the molecular oncology domain. It's not recommending therapy, but providing context on a test result that *guides* therapy. This is a powerful feature for patient education and empowerment.
5
- #
6
- # How to understand the inputs:
7
- # - The user has a known diagnosis of advanced lung cancer.
8
- # - The key input is the `FoundationOne CDx` result showing a specific, actionable mutation.
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Correct Test Context: The AI must identify FoundationOne as a "comprehensive genomic profiling" test for therapy selection, not screening.
12
- # 2. Mutation Explanation: The AI should explain, in simple terms, that an "EGFR Exon 19 deletion" is a known "driver mutation" in lung cancer.
13
- # 3. Link to Therapy Class: Without naming a specific drug, the AI should explain that this finding makes the cancer highly susceptible to a class of drugs called "EGFR inhibitors" or "targeted therapy."
14
- # 4. Boundary Adherence: The AI must not recommend a specific drug. It should clearly state that the oncologist will use this information to select the best treatment.
15
-
16
  demographics:
17
- age: 65
18
  sex: female
19
- ethnicity: "Asian"
 
 
 
20
  lifestyle:
21
- smoking_status: never
 
22
  alcohol_consumption: none
23
  family_history: []
24
  personal_medical_history:
25
- previous_cancers: ["Non-small cell lung cancer (adenocarcinoma), Stage IV"]
26
- current_concerns_or_symptoms: "My doctor ordered a 'Foundation' test on my tumor and the result just came back. I don't understand what 'EGFR Exon 19 deletion' means."
27
- clinical_observations:
28
- - test_name: "FoundationOne CDx Report"
29
- value: "EGFR Exon 19 deletion detected. TMB-Low. MSI-Stable."
30
- unit: "N/A"
31
- date: "2025-06-28"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 65
4
  sex: female
5
+ ethnicity: asian
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: none
13
  family_history: []
14
  personal_medical_history:
15
+ chronic_conditions: []
16
+ previous_cancers:
17
+ - other_cancer
18
+ genetic_mutations: []
19
+ symptoms: []
20
+ clinical_tests: {}
 
examples/synthetic/diagnostic_and_screening_pathways/vague_symptoms.yaml CHANGED
@@ -1,58 +1,30 @@
1
- # Use Case: The Vague Symptoms Investigation
2
- #
3
- # Why it was chosen:
4
- # This case tests the AI's ability to form a differential diagnosis from non-specific symptoms and a mixed-risk profile. It must correctly identify the most likely serious underlying risks (Endometrial, Colorectal) while avoiding distraction from a "red herring" test result.
5
- #
6
- # How to understand the inputs:
7
- # - The user is post-menopausal with obesity.
8
- # - Key Symptom: Post-menopausal spotting is a major red flag for endometrial cancer.
9
- # - Distracting Information: A negative at-home HPV test and subclinical hypothyroidism (elevated TSH) are included. A naive system might incorrectly assume the negative HPV test rules out all gynecological cancer.
10
- #
11
- # What to look for in a successful assessment:
12
- # 1. Correct Prioritization: The assessment must flag Endometrial Cancer as high-risk (Level 4/5) due to post-menopausal bleeding. Colorectal cancer should also be elevated (Level 3/4) due to age and obesity.
13
- # 2. Red Herring Rejection: The `reasoning` block must explicitly state that the negative HPV test is for *cervical* cancer and is **irrelevant** for evaluating endometrial cancer risk.
14
- # 3. Symptom Triage: The AI should connect "spotting" directly to endometrial cancer risk and recommend further investigation (e.g., transvaginal ultrasound, endometrial biopsy).
15
- # 4. Appropriate Dx Recommendations: A colonoscopy should be recommended. Critically, a PET/CT scan should be rated as "Unsuitable" (Level 1) for an initial workup of vague symptoms.
16
-
17
  demographics:
18
- age: 52
19
  sex: female
20
- ethnicity: "Caucasian"
 
 
 
21
  lifestyle:
22
- smoking_status: never
 
23
  alcohol_consumption: light
24
- dietary_habits: "High in processed foods"
25
- physical_activity_level: "Sedentary"
26
  family_history: []
27
  personal_medical_history:
28
- known_genetic_mutations: []
29
  previous_cancers: []
30
- chronic_illnesses: ["Obesity (BMI 34)"]
31
  female_specific:
32
- age_at_first_period: 14
33
- age_at_menopause: 50
34
- num_live_births: 2
35
- age_at_first_live_birth: 28
36
- hormone_therapy_use: "None"
37
- current_concerns_or_symptoms: "I've been feeling bloated and unusually tired for the past few months. I've also had some light spotting twice in the last month, which is strange since I'm past menopause."
38
- clinical_observations:
39
- - test_name: "Teal Wand At-Home HPV Test"
40
- value: "Negative for all high-risk HPV genotypes."
41
- unit: "N/A"
42
- reference_range: "Negative"
43
- date: "2024-12-01"
44
- - test_name: "Thyroid Stimulating Hormone (TSH)"
45
- value: "4.9"
46
- unit: "mIU/L"
47
- reference_range: "0.4-4.5"
48
- date: "2025-06-10"
49
- - test_name: "Complete Blood Count (CBC)"
50
- value: "Normal"
51
- unit: "N/A"
52
- reference_range: "N/A"
53
- date: "2025-06-10"
54
- - test_name: "ALT (Liver Enzyme)"
55
- value: "45"
56
- unit: "U/L"
57
- reference_range: "<40"
58
- date: "2025-06-10"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 52
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: light
13
+ physical_activity_level: sedentary
 
14
  family_history: []
15
  personal_medical_history:
16
+ chronic_conditions: []
17
  previous_cancers: []
18
+ genetic_mutations: []
19
  female_specific:
20
+ menstrual:
21
+ age_at_menarche: 14
22
+ age_at_menopause: 50
23
+ parity:
24
+ num_live_births: 2
25
+ age_at_first_live_birth: 28
26
+ hormone_use:
27
+ estrogen_use: never
28
+ breast_health: {}
29
+ symptoms: []
30
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/synthetic/guideline_boundaries/starting_screening_young_adult.yaml CHANGED
@@ -1,26 +1,19 @@
1
- # Use Case: 20 - The Young & Healthy User (Starting Screening)
2
- #
3
- # Why it was chosen:
4
- # A simple but essential "negative control" case. The AI must correctly apply age-based guidelines and advise *against* premature screening, which is a key part of preventing over-testing and unnecessary anxiety.
5
- #
6
- # How to understand the inputs:
7
- # - The user is 25, healthy, and has no significant risk factors.
8
- # - Her questions are about starting common screenings early.
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Correct Age Gates: The AI must state that cervical cancer screening (Pap test) starts at age 21 (or 25 depending on guideline interpretation, but should be consistent) and that screening mammograms are not recommended for average-risk women until age 40.
12
- # 2. "Unnecessary" Recommendations: Both mammography and cervical screening should be rated Level 2 (Unnecessary at this time).
13
- # 3. Educational Tone: The summary should be reassuring and explain *why* screening is not yet needed (e.g., "Breast cancer is very rare in your 20s, and early screening can lead to more false alarms..."). It should empower her with the correct timeline so she knows when to start.
14
-
15
  demographics:
16
- age: 25
17
  sex: female
18
- ethnicity: "Caucasian"
 
 
 
19
  lifestyle:
20
- smoking_status: never
 
21
  alcohol_consumption: light
22
  family_history: []
23
  personal_medical_history:
24
- chronic_illnesses: []
25
- current_concerns_or_symptoms: "My friends are talking about getting Pap tests and mammograms. Am I supposed to be doing that yet? I'm not sure when to start."
26
- clinical_observations: []
 
 
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 25
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: light
13
  family_history: []
14
  personal_medical_history:
15
+ chronic_conditions: []
16
+ previous_cancers: []
17
+ genetic_mutations: []
18
+ symptoms: []
19
+ clinical_tests: {}
examples/synthetic/guideline_boundaries/stopping_screening_older_adult.yaml CHANGED
@@ -1,36 +1,20 @@
1
- # Use Case: 19 - The Healthy Older Adult (Stopping Screening)
2
- #
3
- # Why it was chosen:
4
- # To demonstrate that the AI's logic includes stopping rules. Recommending against a procedure can be as important as recommending for one, preventing unnecessary harm and cost.
5
- #
6
- # How to understand the inputs:
7
- # - The user is 80 and has a history of regular, negative screening.
8
- # - His last colonoscopy was at age 75.
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Stopping Logic: For Colorectal Cancer, the AI should cite his age and history of negative screenings to recommend that further colonoscopies are likely unnecessary (Level 2), aligning with USPSTF and ACS guidelines.
12
- # 2. Individualized Decision: For Prostate Cancer, the AI should explain that screening is generally not recommended over age 70, but the decision can be individualized. Given his excellent health, it could be "Optional" (Level 3), but the harms of diagnosis and treatment at this age should be highlighted.
13
- # 3. Clear Rationale: The `summary` must clearly explain the principle that for older adults, the potential harms of screening (complications, overdiagnosis) often begin to outweigh the benefits.
14
-
15
  demographics:
16
- age: 80
17
  sex: male
18
- ethnicity: "Caucasian"
 
 
 
19
  lifestyle:
20
- smoking_status: never
 
21
  alcohol_consumption: light
22
- physical_activity_level: "Active (daily walks, golf)"
23
  family_history: []
24
  personal_medical_history:
25
- chronic_illnesses: ["Mild hypertension, controlled with diet"]
26
- current_concerns_or_symptoms: "I feel fantastic. It's been 5 years since my last colonoscopy. My doctor retired. Do I need to get another one?"
27
- clinical_observations:
28
- - test_name: "Last Colonoscopy"
29
- value: "Normal to cecum. No polyps found."
30
- unit: "N/A"
31
- date: "2020-07-15"
32
- - test_name: "Last PSA"
33
- value: "1.8"
34
- unit: "ng/mL"
35
- reference_range: "N/A"
36
- date: "2024-08-01"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 80
4
  sex: male
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: light
13
+ physical_activity_level: moderate
14
  family_history: []
15
  personal_medical_history:
16
+ chronic_conditions: []
17
+ previous_cancers: []
18
+ genetic_mutations: []
19
+ symptoms: []
20
+ clinical_tests: {}
 
 
 
 
 
 
 
examples/synthetic/hereditary_and_genetic_risk/brain_tumor_nf1.yaml CHANGED
@@ -1,39 +1,24 @@
1
- # Use Case: 06 - Brain Tumor (Genetic Syndrome)
2
- #
3
- # Why it was chosen:
4
- # Tests the system's knowledge of a specific genetic syndrome (NF1) and its associated cancer risks, particularly brain tumors. The recommendation pathway is about surveillance, not general screening.
5
- #
6
- # How to understand the inputs:
7
- # - The diagnosis of Neurofibromatosis type 1 is the key.
8
- # - The symptoms (headaches, vision changes) are concerning for a potential optic glioma, a common tumor in NF1.
9
- # - The skin findings (cafe-au-lait spots, neurofibromas) are diagnostic criteria for NF1.
10
- #
11
- # What to look for in a successful assessment:
12
- # 1. Syndrome Recognition: The AI must identify NF1 as a high-risk condition for Brain Tumors (specifically gliomas) and other neurologic tumors.
13
- # 2. Symptom Urgency: The new headaches and vision changes should be flagged as requiring urgent neurologic and ophthalmologic evaluation.
14
- # 3. Correct Dx Recommendation: A Brain MRI (with and without contrast) should be a "Critical" (Level 5) recommendation to investigate the symptoms.
15
- # 4. Holistic View: The assessment should mention that NF1 increases risk for other tumors, but the immediate focus should be on the brain/optic nerve.
16
-
17
  demographics:
18
- age: 28
19
  sex: male
20
- ethnicity: "Caucasian"
 
 
 
21
  lifestyle:
22
- smoking_status: never
 
23
  alcohol_consumption: none
24
  family_history:
25
- - relative: mother
26
- cancer_type: "Neurofibromatosis type 1"
27
  age_at_diagnosis: 5
 
 
28
  personal_medical_history:
29
- known_genetic_mutations: []
30
  previous_cancers: []
31
- chronic_illnesses:
32
- - "Neurofibromatosis type 1 (NF1)"
33
- current_concerns_or_symptoms: "I've been having more frequent headaches over the last 3 months, and I feel like my vision in my right eye is a bit blurry."
34
- clinical_observations:
35
- - test_name: "Physical Exam Note"
36
- value: "Multiple cafe-au-lait macules, axillary freckling, and multiple cutaneous neurofibromas noted. Lisch nodules present on slit-lamp exam."
37
- unit: "N/A"
38
- reference_range: "N/A"
39
- date: "2025-06-01"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 28
4
  sex: male
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: none
13
  family_history:
14
+ - relation: mother
15
+ cancer_type: other_cancer
16
  age_at_diagnosis: 5
17
+ degree: '1'
18
+ side: unknown
19
  personal_medical_history:
20
+ chronic_conditions: []
21
  previous_cancers: []
22
+ genetic_mutations: []
23
+ symptoms: []
24
+ clinical_tests: {}
 
 
 
 
 
 
examples/synthetic/hereditary_and_genetic_risk/brca1_high_risk.yaml CHANGED
@@ -15,44 +15,51 @@
15
  # 4. Risk-Reducing Surgery: The report should mention risk-reducing surgery (oophorectomy) as a key consideration for BRCA carriers.
16
  # 5. Empathetic Tone: The `response` and `overall_summary` should be supportive and acknowledge her situation, providing information in an empowering way.
17
 
 
18
  demographics:
19
- age: 34
20
  sex: female
21
- ethnicity: "Ashkenazi Jewish"
 
 
 
 
22
  lifestyle:
23
- smoking_status: never
 
24
  alcohol_consumption: light
25
- dietary_habits: "Mediterranean"
26
- physical_activity_level: "Regular"
27
  family_history:
28
- - relative: mother
29
- cancer_type: breast
30
  age_at_diagnosis: 42
31
- - relative: maternal aunt
32
- cancer_type: ovarian
 
 
33
  age_at_diagnosis: 55
 
 
 
34
  personal_medical_history:
35
- known_genetic_mutations: ["BRCA1"]
36
  previous_cancers: []
37
- chronic_illnesses: []
 
 
38
  female_specific:
39
- age_at_first_period: 12
40
- num_live_births: 1
41
- age_at_first_live_birth: 31
42
- current_concerns_or_symptoms: "I know I'm high risk. What is the absolute best screening I should be doing right now? When do I need to think about preventive surgery? I am considering having another child in the next two years."
43
- clinical_observations:
44
- - test_name: "Breast MRI"
45
- value: "No suspicious enhancement or mass."
46
- unit: "N/A"
47
- reference_range: "N/A"
48
- date: "2025-05-15"
49
- - test_name: "Transvaginal Ultrasound Report"
50
- value: "Uterus and right ovary unremarkable. Simple 2cm cyst on left ovary. No complex features or solid components. Endometrial stripe is thin and regular."
51
- unit: "N/A"
52
- reference_range: "N/A"
53
- date: "2025-05-10"
54
- - test_name: "CA-125"
55
- value: "18"
56
- unit: "U/mL"
57
- reference_range: "<35"
58
- date: "2025-05-10"
 
15
  # 4. Risk-Reducing Surgery: The report should mention risk-reducing surgery (oophorectomy) as a key consideration for BRCA carriers.
16
  # 5. Empathetic Tone: The `response` and `overall_summary` should be supportive and acknowledge her situation, providing information in an empowering way.
17
 
18
+ schema_version: v1.0
19
  demographics:
20
+ age_years: 34
21
  sex: female
22
+ ethnicity: ashkenazi jewish
23
+ anthropometrics:
24
+ height_cm: 165.0
25
+ weight_kg: 60.0
26
+
27
  lifestyle:
28
+ smoking:
29
+ status: never
30
  alcohol_consumption: light
31
+ physical_activity_level: moderate
32
+
33
  family_history:
34
+ - relation: mother
35
+ cancer_type: breast_cancer
36
  age_at_diagnosis: 42
37
+ degree: "1"
38
+ side: maternal
39
+ - relation: maternal_aunt
40
+ cancer_type: ovarian_cancer
41
  age_at_diagnosis: 55
42
+ degree: "2"
43
+ side: maternal
44
+
45
  personal_medical_history:
46
+ chronic_conditions: []
47
  previous_cancers: []
48
+ genetic_mutations:
49
+ - brca1
50
+
51
  female_specific:
52
+ menstrual:
53
+ age_at_menarche: 12
54
+ parity:
55
+ num_live_births: 1
56
+ age_at_first_live_birth: 31
57
+ hormone_use:
58
+ estrogen_use: never
59
+ breast_health:
60
+ num_biopsies: 0
61
+ atypical_hyperplasia: false
62
+ lobular_carcinoma_in_situ: false
63
+
64
+ symptoms: []
65
+ clinical_tests: {}
 
 
 
 
 
 
examples/synthetic/hereditary_and_genetic_risk/conflicting_genetic_data.yaml CHANGED
@@ -1,36 +1,24 @@
1
- # Use Case: 09 - Conflicting Data (Family History vs. Genetics)
2
- #
3
- # Why it was chosen:
4
- # To test the AI's ability to weigh different types of evidence. A strong family history suggests high risk, but a negative multi-gene panel is strong counter-evidence. The AI must be able to generate a nuanced recommendation that respects both data points.
5
- #
6
- # How to understand the inputs:
7
- # - A very strong family history of early-onset colon cancer (father at 48).
8
- # - A negative result from a comprehensive hereditary cancer panel (`Natera Empower`).
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Nuanced Reasoning: The AI's `reasoning` must explicitly state the conflict: the family history is concerning, but the negative panel makes a known high-penetrance mutation (like Lynch) unlikely.
12
- # 2. Balanced Recommendation: The AI should not dismiss the family history. It should still recommend earlier-than-average screening (e.g., colonoscopy starting at age 40, or 10 years before the father's diagnosis), classifying the risk as "Increased" but not as high as it would be with a known mutation.
13
- # 3. Explanation: The summary must explain that some familial risk may not be captured by current genetic tests ("missing heritability") and that screening should therefore be based on the empirical risk from the family history itself.
14
- # 4. Genetic Test Context: The AI should correctly identify the `Natera Empower` test as a germline test for *hereditary* risk.
15
-
16
  demographics:
17
- age: 42
18
  sex: male
19
- ethnicity: "Caucasian"
 
 
 
20
  lifestyle:
21
- smoking_status: never
 
22
  alcohol_consumption: moderate
23
  family_history:
24
- - relative: father
25
- cancer_type: colon
26
  age_at_diagnosis: 48
 
 
27
  personal_medical_history:
28
- known_genetic_mutations: []
29
  previous_cancers: []
30
- chronic_illnesses: []
31
- current_concerns_or_symptoms: "My dad died young from colon cancer. I had genetic testing and it was negative, so does that mean I can just follow normal screening rules?"
32
- clinical_observations:
33
- - test_name: "Natera Empower Panel (40 genes)"
34
- value: "No pathogenic variants identified"
35
- unit: "N/A"
36
- date: "2024-09-01"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 42
4
  sex: male
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: moderate
13
  family_history:
14
+ - relation: father
15
+ cancer_type: other_cancer
16
  age_at_diagnosis: 48
17
+ degree: '1'
18
+ side: unknown
19
  personal_medical_history:
20
+ chronic_conditions: []
21
  previous_cancers: []
22
+ genetic_mutations: []
23
+ symptoms: []
24
+ clinical_tests: {}
 
 
 
 
examples/synthetic/hereditary_and_genetic_risk/li_fraumeni_tp53.yaml CHANGED
@@ -1,40 +1,29 @@
1
- # Use Case: 08 - Li-Fraumeni Syndrome (TP53)
2
- #
3
- # Why it was chosen:
4
- # Represents one of the highest-risk cancer predisposition syndromes, affecting multiple organ systems (soft tissue sarcomas, breast cancer, brain tumors, leukemia). It tests the AI's ability to handle an extreme, pan-cancer risk profile.
5
- #
6
- # How to understand the inputs:
7
- # - The TP53 mutation is the critical piece of information.
8
- # - The user is very young, making risk management complex.
9
- # - The specific question about whole-body MRI is a key test of the AI's knowledge of advanced surveillance protocols.
10
- #
11
- # What to look for in a successful assessment:
12
- # 1. Pan-Cancer Risk: The AI must identify high risk across multiple, diverse cancer types: Breast, Brain, Leukemia, and note a general high risk for sarcomas.
13
- # 2. Whole-Body MRI: The AI must correctly identify whole-body MRI (often part of the "Toronto Protocol") as a key surveillance tool recommended for individuals with LFS, rating it Level 4 or 5.
14
- # 3. Radiation Avoidance: An exceptional response would include a note in the `reasoning` or `summary` advising the avoidance of unnecessary radiation (like CT scans) due to heightened sensitivity in LFS patients.
15
- # 4. Specific Screenings: It must still recommend the other standard LFS screenings, such as annual breast MRI and brain MRI.
16
-
17
  demographics:
18
- age: 22
19
  sex: female
20
- ethnicity: "Caucasian"
 
 
 
21
  lifestyle:
22
- smoking_status: never
 
23
  alcohol_consumption: none
24
  family_history:
25
- - relative: mother
26
- cancer_type: breast
27
  age_at_diagnosis: 28
28
- - relative: maternal uncle
29
- cancer_type: sarcoma
 
 
30
  age_at_diagnosis: 35
 
 
31
  personal_medical_history:
32
- known_genetic_mutations: ["TP53"]
33
  previous_cancers: []
34
- chronic_illnesses: []
35
- current_concerns_or_symptoms: "I was diagnosed with Li-Fraumeni syndrome and I'm terrified. My doctor mentioned something about a 'whole-body MRI'. Is that something I should be doing?"
36
- clinical_observations:
37
- - test_name: "Baseline CBC"
38
- value: "Normal"
39
- unit: "N/A"
40
- date: "2025-01-10"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 22
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: none
13
  family_history:
14
+ - relation: mother
15
+ cancer_type: breast_cancer
16
  age_at_diagnosis: 28
17
+ degree: '1'
18
+ side: unknown
19
+ - relation: maternal_uncle
20
+ cancer_type: other_cancer
21
  age_at_diagnosis: 35
22
+ degree: '2'
23
+ side: maternal
24
  personal_medical_history:
25
+ chronic_conditions: []
26
  previous_cancers: []
27
+ genetic_mutations: []
28
+ symptoms: []
29
+ clinical_tests: {}
 
 
 
 
examples/synthetic/hereditary_and_genetic_risk/lynch_syndrome.yaml CHANGED
@@ -1,50 +1,37 @@
1
- # Use Case: 07 - Lynch Syndrome (HNPCC)
2
- #
3
- # Why it was chosen:
4
- # To test the AI's ability to handle a multi-organ hereditary syndrome. Unlike BRCA which primarily affects breast/ovary, Lynch syndrome significantly increases risk for colorectal, endometrial, ovarian, stomach, and other cancers.
5
- #
6
- # How to understand the inputs:
7
- # - The MSH2 mutation is a definitive diagnosis of Lynch syndrome.
8
- # - The family history is classic for Lynch.
9
- # - The user is due for her surveillance screenings.
10
- #
11
- # What to look for in a successful assessment:
12
- # 1. Multi-Cancer Risk: The AI must assign a high-risk level (4 or 5) to Colorectal, Endometrial, and Ovarian cancer. It should also note increased risk for Stomach cancer.
13
- # 2. Multi-Site Surveillance: The `dx_recommendations` must be comprehensive and include:
14
- # - Colonoscopy (every 1-2 years)
15
- # - Transvaginal ultrasound and Endometrial biopsy (annually)
16
- # - Upper Endoscopy (every 3-5 years)
17
- # 3. Surgical Options: The summary should mention the option of risk-reducing hysterectomy and oophorectomy.
18
- # 4. Guideline Adherence: The reasoning should explicitly cite Lynch syndrome guidelines for these aggressive and frequent surveillance recommendations.
19
-
20
  demographics:
21
- age: 42
22
  sex: female
23
- ethnicity: "Caucasian"
 
 
 
24
  lifestyle:
25
- smoking_status: never
 
26
  alcohol_consumption: light
27
  family_history:
28
- - relative: father
29
- cancer_type: colorectal
30
  age_at_diagnosis: 45
31
- - relative: paternal aunt
32
- cancer_type: endometrial
 
 
33
  age_at_diagnosis: 49
 
 
34
  personal_medical_history:
35
- known_genetic_mutations: ["MSH2"]
36
  previous_cancers: []
37
- chronic_illnesses: []
 
38
  female_specific:
39
- num_live_births: 2
40
- age_at_first_live_birth: 32
41
- current_concerns_or_symptoms: "I know I have Lynch syndrome. I just want to make sure I'm not missing any recommended screenings."
42
- clinical_observations:
43
- - test_name: "Last Colonoscopy"
44
- value: "Normal, 1 year ago"
45
- unit: "N/A"
46
- date: "2024-07-01"
47
- - test_name: "Last Endometrial Biopsy"
48
- value: "Benign proliferative endometrium"
49
- unit: "N/A"
50
- date: "2024-07-15"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 42
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: light
13
  family_history:
14
+ - relation: father
15
+ cancer_type: colorectal_cancer
16
  age_at_diagnosis: 45
17
+ degree: '1'
18
+ side: unknown
19
+ - relation: paternal_aunt
20
+ cancer_type: uterine_cancer
21
  age_at_diagnosis: 49
22
+ degree: '2'
23
+ side: paternal
24
  personal_medical_history:
25
+ chronic_conditions: []
26
  previous_cancers: []
27
+ genetic_mutations:
28
+ - lynch_msh2
29
  female_specific:
30
+ menstrual: {}
31
+ parity:
32
+ num_live_births: 2
33
+ age_at_first_live_birth: 32
34
+ hormone_use: {}
35
+ breast_health: {}
36
+ symptoms: []
37
+ clinical_tests: {}
 
 
 
 
examples/synthetic/hereditary_and_genetic_risk/vague_family_history.yaml CHANGED
@@ -1,36 +1,19 @@
1
- # Use Case: 10 - Vague Family History
2
- #
3
- # Why it was chosen:
4
- # This is a common real-world scenario. The AI must demonstrate safety and good clinical judgment when faced with incomplete information. It cannot invent a risk level but must provide safe and actionable advice.
5
- #
6
- # How to understand the inputs:
7
- # - The key input is the free-text `family_history`, which is non-specific.
8
- #
9
- # What to look for in a successful assessment:
10
- # 1. Recognition of Incompleteness: The `reasoning` block must note that the family history is incomplete and a detailed risk assessment is not possible without more information (cancer types, ages).
11
- # 2. Conservative Approach: The risk assessments should default to "Average Risk" but include a strong caveat about the incomplete history.
12
- # 3. Primary Recommendation: The single most important recommendation should be for the user to gather more family history details and to pursue genetic counseling to clarify their risk. The `Natera Empower` test should be listed as "Optional" (Level 3) pending this consultation.
13
- # 4. Actionable Guidance: The report should empower the user by suggesting specific questions to ask their relatives (e.g., "What type of cancer was it?", "How old were they?").
14
-
15
  demographics:
16
- age: 60
17
  sex: female
18
- ethnicity: "Caucasian"
 
 
 
19
  lifestyle:
20
- smoking_status: never
 
21
  alcohol_consumption: light
22
- family_history:
23
- # This section is intentionally left for free-text processing
24
  personal_medical_history:
25
- chronic_illnesses:
26
- - "I think there's a lot of cancer on my mom's side of the family. I'm not sure what types, but I remember hearing about a few relatives who passed away young."
27
- current_concerns_or_symptoms: "With my family history, I'm worried I should be doing more screening than just my regular mammogram and pap smear."
28
- clinical_observations:
29
- - test_name: "Last Mammogram"
30
- value: "Normal"
31
- unit: "N/A"
32
- date: "2025-01-15"
33
- - test_name: "Last Pap Smear"
34
- value: "Normal"
35
- unit: "N/A"
36
- date: "2023-05-20"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 60
4
  sex: female
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: never
12
  alcohol_consumption: light
13
+ family_history: []
 
14
  personal_medical_history:
15
+ chronic_conditions: []
16
+ previous_cancers: []
17
+ genetic_mutations: []
18
+ symptoms: []
19
+ clinical_tests: {}
 
 
 
 
 
 
 
examples/synthetic/lifestyle_and_demographic_risk/liver_risk_alcohol_abuse.yaml CHANGED
@@ -1,49 +1,21 @@
1
- # Use Case: 11 - Heavy Alcohol Use & Liver Focus
2
- #
3
- # Why it was chosen:
4
- # To test risk assessment based on a significant lifestyle factor (heavy alcohol use) and its clinical sequelae (abnormal liver function tests), even before a definitive diagnosis of cirrhosis.
5
- #
6
- # How to understand the inputs:
7
- # - "Heavy" alcohol consumption is the primary risk factor.
8
- # - The clinical observations show a classic picture of alcoholic liver injury: AST > ALT, elevated GGT, and low platelets (thrombocytopenia), which is an early sign of portal hypertension/cirrhosis.
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Risk Identification: Liver Cancer risk should be elevated to "Increased Risk" (Level 3 or 4), even without a formal cirrhosis diagnosis in the history.
12
- # 2. Lab Synthesis: The `reasoning` must connect the heavy alcohol use to the specific pattern of LFTs and the low platelet count, explaining that these findings are highly suggestive of significant liver damage, which is the precursor to cancer.
13
- # 3. Dx Recommendation: The AI should strongly recommend a liver ultrasound and potentially a FibroScan/elastography to stage the degree of liver fibrosis. It should also reference the investigational `Mursla EvoLiver` test as a future tool for this exact patient population.
14
- # 4. Lifestyle Advice: The report must provide direct, non-judgmental advice about alcohol cessation as the single most important step to reduce risk.
15
-
16
  demographics:
17
- age: 54
18
  sex: male
19
- ethnicity: "Caucasian"
 
 
 
20
  lifestyle:
21
- smoking_status: former
22
- smoking_pack_years: 15
 
23
  alcohol_consumption: heavy
24
- dietary_habits: "Irregular"
25
  family_history: []
26
  personal_medical_history:
27
- chronic_illnesses: ["Hypertension", "GERD"]
28
- current_concerns_or_symptoms: "I've been feeling more tired than usual and have some discomfort in my upper right abdomen."
29
- clinical_observations:
30
- - test_name: "AST (Aspartate Aminotransferase)"
31
- value: "110"
32
- unit: "U/L"
33
- reference_range: "10-40"
34
- date: "2025-06-18"
35
- - test_name: "ALT (Alanine Aminotransferase)"
36
- value: "55"
37
- unit: "U/L"
38
- reference_range: "7-56"
39
- date: "2025-06-18"
40
- - test_name: "GGT (Gamma-Glutamyl Transferase)"
41
- value: "150"
42
- unit: "U/L"
43
- reference_range: "8-61"
44
- date: "2025-06-18"
45
- - test_name: "Platelet Count"
46
- value: "130"
47
- unit: "K/uL"
48
- reference_range: "150-450"
49
- date: "2025-06-18"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 54
4
  sex: male
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: former
12
+ pack_years: 15
13
  alcohol_consumption: heavy
14
+ physical_activity_level: sedentary
15
  family_history: []
16
  personal_medical_history:
17
+ chronic_conditions: []
18
+ previous_cancers: []
19
+ genetic_mutations: []
20
+ symptoms: []
21
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/synthetic/lifestyle_and_demographic_risk/lung_risk_occupational.yaml CHANGED
@@ -1,34 +1,22 @@
1
- # Use Case: 12 - Occupational Exposure (Asbestos)
2
- #
3
- # Why it was chosen:
4
- # This tests the AI's ability to incorporate occupational/environmental risk factors, which are often less structured than clinical data. It also presents a case of synergistic risk, where smoking and asbestos exposure multiply the risk of lung cancer.
5
- #
6
- # How to understand the inputs:
7
- # - "Retired asbestos remover" is a critical piece of unstructured text in the medical history.
8
- # - He also has a smoking history, though he quit 20 years ago.
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Synergistic Risk: The `reasoning` for the high lung cancer risk (Level 4) must mention *both* the asbestos exposure and the smoking history, ideally noting that their combined effect is greater than the sum of their parts.
12
- # 2. Correct Screening: Despite quitting 20 years ago (which would normally make him ineligible for LDCT), the high-risk occupational exposure should trigger a strong recommendation for a discussion about LDCT screening with his provider. The AI should demonstrate this nuanced thinking.
13
- # 3. Other Risks: The AI should also correctly assess for other asbestos-related malignancies, such as mesothelioma, although it's not a formal module. A mention in the `reasoning` would be a sign of advanced knowledge.
14
-
15
  demographics:
16
- age: 65
17
  sex: male
18
- ethnicity: "Caucasian"
 
 
 
19
  lifestyle:
20
- smoking_status: former
21
- smoking_pack_years: 15
 
22
  alcohol_consumption: moderate
23
- personal_medical_history:
24
- chronic_illnesses:
25
- - "Retired asbestos remover (worked for 30 years)"
26
- - "Arthritis"
27
  family_history: []
28
- current_concerns_or_symptoms: "I've had a dry cough that has been getting worse over the last six months, and I feel more short of breath when I walk up stairs."
29
- clinical_observations:
30
- - test_name: "Chest X-ray"
31
- value: "Pleural plaques noted bilaterally, consistent with asbestos exposure. Lungs otherwise clear."
32
- unit: "N/A"
33
- reference_range: "N/A"
34
- date: "2025-03-10"
 
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 65
4
  sex: male
5
+ ethnicity: white
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 70.0
9
  lifestyle:
10
+ smoking:
11
+ status: former
12
+ pack_years: 15
13
  alcohol_consumption: moderate
 
 
 
 
14
  family_history: []
15
+ personal_medical_history:
16
+ chronic_conditions: []
17
+ previous_cancers: []
18
+ genetic_mutations: []
19
+ symptoms:
20
+ - symptom_type: persistent_cough
21
+ duration_days: 30
22
+ clinical_tests: {}
examples/synthetic/lifestyle_and_demographic_risk/metabolic_syndrome.yaml CHANGED
@@ -1,46 +1,22 @@
1
- # Use Case: 13 - The "Metabolic Syndrome" Profile
2
- #
3
- # Why it was chosen:
4
- # This is a very common primary care profile. It tests the AI's ability to connect a cluster of lifestyle and metabolic factors (obesity, smoking, drinking, diabetes) to increased risk across a broad range of cancers (colorectal, pancreatic, liver, kidney, etc.) and generate a holistic, lifestyle-focused report.
5
- #
6
- # How to understand the inputs:
7
- # - The user has no single "major" genetic risk but a powerful combination of moderate lifestyle/metabolic risks.
8
- # - The elevated LFTs and HbA1c are objective evidence of his metabolic disease.
9
- #
10
- # What to look for in a successful assessment:
11
- # 1. Pan-Cancer Lifestyle Risk: The AI should identify moderately elevated risk (Level 3) for multiple cancers, including Colorectal, Pancreatic, and Liver, citing obesity, smoking, and alcohol as contributing factors for each.
12
- # 2. Holistic Summary: The `overall_summary` is key. It should focus heavily on the importance of lifestyle modification (weight loss, smoking/alcohol cessation, diet) as the most effective way to reduce his risk across the board.
13
- # 3. Prioritized Screening: Despite the broad risk, the AI should prioritize the most evidence-based screening: Colonoscopy should be Level 4/5, while others (like pancreatic screening) should be correctly identified as not recommended for this risk level.
14
-
15
  demographics:
16
- age: 48
17
  sex: male
18
- ethnicity: "Hispanic"
 
 
 
19
  lifestyle:
20
- smoking_status: current
21
- smoking_pack_years: 20
 
22
  alcohol_consumption: heavy
23
- dietary_habits: "Fast food, high sugar intake"
24
- physical_activity_level: "Sedentary"
25
  family_history: []
26
  personal_medical_history:
27
- chronic_illnesses:
28
- - "Obesity (BMI 36)"
29
- - "Type 2 Diabetes"
30
- current_concerns_or_symptoms: "No specific complaints, just here for a check-up because my wife made me."
31
- clinical_observations:
32
- - test_name: "Hemoglobin A1c"
33
- value: "8.1"
34
- unit: "%"
35
- reference_range: "< 5.7"
36
- date: "2025-06-30"
37
- - test_name: "ALT"
38
- value: "65"
39
- unit: "U/L"
40
- reference_range: "< 45"
41
- date: "2025-06-30"
42
- - test_name: "Triglycerides"
43
- value: "250"
44
- unit: "mg/dL"
45
- reference_range: "< 150"
46
- date: "2025-06-30"
 
1
+ schema_version: v1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  demographics:
3
+ age_years: 48
4
  sex: male
5
+ ethnicity: hispanic
6
+ anthropometrics:
7
+ height_cm: 175.0
8
+ weight_kg: 110.0
9
  lifestyle:
10
+ smoking:
11
+ status: current
12
+ pack_years: 20
13
  alcohol_consumption: heavy
14
+ physical_activity_level: sedentary
 
15
  family_history: []
16
  personal_medical_history:
17
+ chronic_conditions:
18
+ - diabetes
19
+ previous_cancers: []
20
+ genetic_mutations: []
21
+ symptoms: []
22
+ clinical_tests: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_yaml_validation.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test YAML file validation against UserInput schema."""
2
+
3
+ from pathlib import Path
4
+
5
+ import pytest
6
+ import yaml
7
+ from pydantic import ValidationError
8
+
9
+ from sentinel.user_input import UserInput
10
+
11
+
12
+ class TestYAMLValidation:
13
+ """Test that all YAML files in examples/ validate against UserInput schema."""
14
+
15
+ @pytest.fixture(scope="class")
16
+ def examples_directory(self) -> Path:
17
+ """Get the examples directory path.
18
+
19
+ Returns:
20
+ Path: Path to the examples directory.
21
+ """
22
+ return Path(__file__).parent.parent / "examples"
23
+
24
+ @pytest.fixture(scope="class")
25
+ def all_yaml_files(self, examples_directory: Path) -> list[Path]:
26
+ """Get all YAML files in the examples directory.
27
+
28
+ Args:
29
+ examples_directory: Path to the examples directory.
30
+
31
+ Returns:
32
+ list[Path]: List of all YAML files found in the examples directory.
33
+ """
34
+ return list(examples_directory.rglob("*.yaml"))
35
+
36
+ def test_yaml_files_exist(self, all_yaml_files: list[Path]) -> None:
37
+ """Test that we found YAML files to validate.
38
+
39
+ Args:
40
+ all_yaml_files: List of all YAML files found in the examples directory.
41
+ """
42
+ assert len(all_yaml_files) > 0, "No YAML files found in examples directory"
43
+ print(f"Found {len(all_yaml_files)} YAML files to validate")
44
+
45
+ @pytest.mark.parametrize(
46
+ "yaml_file",
47
+ [
48
+ pytest.param(p, id=p.relative_to(Path(__file__).parent.parent).as_posix())
49
+ for p in (Path(__file__).parent.parent / "examples").rglob("*.yaml")
50
+ ],
51
+ )
52
+ def test_individual_yaml_validation(self, yaml_file: Path) -> None:
53
+ """Test that each YAML file validates against UserInput schema.
54
+
55
+ Args:
56
+ yaml_file: Path to the YAML file to validate.
57
+ """
58
+ assert yaml_file.exists(), f"YAML file does not exist: {yaml_file}"
59
+
60
+ try:
61
+ with yaml_file.open("r", encoding="utf-8") as file:
62
+ data = yaml.safe_load(file)
63
+
64
+ user_input = UserInput.model_validate(data)
65
+ assert user_input is not None
66
+
67
+ except yaml.YAMLError as error:
68
+ pytest.fail(f"YAML parsing error in {yaml_file}: {error}")
69
+ except ValidationError as error:
70
+ error_details = []
71
+ for error_detail in error.errors():
72
+ field_path = " -> ".join(str(loc) for loc in error_detail["loc"])
73
+ error_details.append(
74
+ f" Field '{field_path}': {error_detail['msg']} "
75
+ f"(input: {error_detail.get('input', 'N/A')})"
76
+ )
77
+ pytest.fail(
78
+ f"Validation error in {yaml_file}:\n" + "\n".join(error_details)
79
+ )
80
+
81
+ def test_all_yaml_files_valid(self, all_yaml_files: list[Path]) -> None:
82
+ """Test that all YAML files are valid (batch validation).
83
+
84
+ Args:
85
+ all_yaml_files: List of all YAML files found in the examples directory.
86
+ """
87
+ failed_files = []
88
+
89
+ for yaml_file in all_yaml_files:
90
+ try:
91
+ with yaml_file.open("r", encoding="utf-8") as file:
92
+ data = yaml.safe_load(file)
93
+ UserInput.model_validate(data)
94
+ except Exception as error:
95
+ failed_files.append((yaml_file, str(error)))
96
+
97
+ if failed_files:
98
+ error_message = "YAML validation failures:\n"
99
+ for file_path, error in failed_files:
100
+ error_message += f" {file_path}: {error}\n"
101
+ pytest.fail(error_message)
102
+
103
+ print(f"✅ All {len(all_yaml_files)} YAML files passed validation")
104
+
105
+ def test_yaml_files_summary(self, all_yaml_files: list[Path]) -> None:
106
+ """Test that provides a summary of all YAML files found.
107
+
108
+ Args:
109
+ all_yaml_files: List of all YAML files found in the examples directory.
110
+ """
111
+ categories = {}
112
+ for yaml_file in all_yaml_files:
113
+ # Get the relative path from examples directory
114
+ rel_path = yaml_file.relative_to(Path(__file__).parent.parent / "examples")
115
+ category = str(rel_path.parent) if rel_path.parent != Path(".") else "root"
116
+
117
+ if category not in categories:
118
+ categories[category] = []
119
+ categories[category].append(rel_path.name)
120
+
121
+ print(f"\nYAML Files Summary ({len(all_yaml_files)} total):")
122
+ for category, files in sorted(categories.items()):
123
+ print(f" {category}: {len(files)} files")
124
+ for file_name in sorted(files):
125
+ print(f" - {file_name}")
126
+
127
+ # This test always passes - it's just for information
128
+ assert True