Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions dataset/config/complex_traits.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
trait
AD
AFib
AG
Age_at_Menarche
Age_at_Menopause
AID
Alb
ALP
ALT
Alzheimer
AMD
ApoA
ApoB
AST
Asthma
Balding_Type4
Baso
Benign_Neoplasms
BFP
Blood_Clot_Lung
BMI
BrC
BW
Ca
CAD
Carpal_Tunnel_Syndrome
Cataract
CD
Cholelithiasis
Cirrhosis
College
COPD
Coxarthrosis
CRC
CRP
DBP
Depression
Diverticulosis
DVT
eBMD
EduYears
eGFR
eGFRcys
Eosino
FedUp_Feelings
FEV1FVC
Fibroblastic_Disorders
GGT
Glaucoma
Glucose
Guilty_Feelings
Hb
HbA1c
HDLC
Height
Ht
Hypothyroidism
IBD
IGF1
Inguinal_Hernia
Insomnia
Irritability
IS
LDLC
LipoA
Loneliness
LOY
LuC
Lym
Malignant_Neoplasms
MAP
MCH
MCHC
MCP
MCV
MI
Migraine
Miserableness
Mono
Mood_Swings
Morning_Person
NAP
Nervous_Feelings
Neuroticism
Neutro
P
Plt
PP
PrC
RA
RBC
Risk_Taking
SBP
sCr
Sensitivity
SHBG
SkC
Smoking_CPD
Smoking_Ever_Never
Suffer_from_Nerves
T1D
T2D
TBil
TC
Tense
Testosterone_F
Testosterone_M
TG
TP
UA
UF
Urea
Urolithiasis
Varicose_Veins
VitD
WBC
WHRadjBMI
Worrier
Worry_Too_Long
1 change: 1 addition & 0 deletions dataset/config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
annotation_url: "http://ftp.ensembl.org/pub/release-107/gtf/homo_sapiens/Homo_sapiens.GRCh38.107.chr.gtf.gz"
genome_url: "http://ftp.ensembl.org/pub/release-107/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa.gz"

clinvar:
release: 20251019
Expand Down
10 changes: 10 additions & 0 deletions dataset/workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,13 @@ include: "rules/gnomad.smk"
include: "rules/hgmd.smk"
include: "rules/mendelian_traits.smk"
include: "rules/smedley_et_al.smk"
include: "rules/complex_traits.smk"
include: "rules/ldscore.smk"


rule all:
input:
"results/dataset/mendelian_traits_matched_9/test.parquet",
"results/feature_performance/mendelian_traits_matched_9.parquet",
"results/dataset/complex_traits_matched_9/test.parquet",
"results/feature_performance/complex_traits_matched_9.parquet",
11 changes: 10 additions & 1 deletion dataset/workflow/rules/common.smk
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from gpn.data import Genome
import pandas as pd
import polars as pl
from cyvcf2 import VCF
from sklearn.metrics import average_precision_score

from traitgym.intervals import add_exon, add_tss
from traitgym.intervals import add_exon, add_tss, build_dataset
from traitgym.matching import match_features
from traitgym.variants import (
COORDINATES,
Expand All @@ -12,4 +13,12 @@ from traitgym.variants import (
filter_snp,
filter_chroms,
lift_hg19_to_hg38,
check_ref_alt,
)


rule download_genome:
output:
"results/genome.fa.gz",
shell:
"wget {config[genome_url]} -O {output}"
Loading