vignettes/lcc_variants.Rmd
      lcc_variants.RmdUsing the file available from /data/CCRBioinfo/projects/TargetOsteoManuscriptData/mutect_summary/TargetOsteoDNA.mutect.GATK_filter.tab:
I did the followign:
Summary tables are below.
Load variants and genes.
library(TargetOsteoAnalysis) library(dplyr) variants = variant_calls() # mutect calls as a tibble
And find unique variants for each of the LCC genes, regardless of sequencing technique used.
unique_vars = variants %>% dplyr::arrange(SampleName, Chromosome, Start_Position, End_Position, Variant_Type, t_vaf) %>% dplyr::distinct(SampleName, Chromosome, Start_Position, End_Position, Variant_Type, .keep_all=TRUE) %>% dplyr::rename(VariantAlleleFreq = t_vaf, HGVS = HGVSp_Short) %>% dplyr::select(SampleName, Chromosome, Start_Position, End_Position, Hugo_Symbol, Variant_Type, VariantAlleleFreq, HGVS, Transcript_ID, Consequence) %>% dplyr::filter(Hugo_Symbol %in% lcc_genes())
All unique variants for the LCC genes are:
| SampleName | Chromosome | Start_Position | End_Position | Hugo_Symbol | Variant_Type | VariantAlleleFreq | HGVS | Transcript_ID | Consequence | 
|---|---|---|---|---|---|---|---|---|---|
| 0A4HLD_Tumor | chr17 | 7577106 | 7577106 | TP53 | SNP | 0.8040000 | p.P278S | ENST00000269305 | missense_variant | 
| 0A4HX8_Tumor | chr17 | 7578461 | 7578461 | TP53 | SNP | 0.8970000 | p.V157F | ENST00000269305 | missense_variant | 
| 0A4I4E_Tumor | chr13 | 49039206 | 49039206 | RB1 | SNP | 0.8150000 | p.Q762* | ENST00000267163 | stop_gained | 
| 0A4I4E_Tumor | chr17 | 7578290 | 7578290 | TP53 | SNP | 0.6720000 | p.X187_splice | ENST00000269305 | splice_acceptor_variant | 
| 0A4I5B_Tumor | chr17 | 7578257 | 7578257 | TP53 | SNP | 0.5930000 | p.E198* | ENST00000269305 | stop_gained | 
| 0A4I6O_Tumor | chr17 | 7578532 | 7578532 | TP53 | SNP | 0.5430000 | p.M133K | ENST00000269305 | missense_variant | 
| 0A4I9K_Tumor | chr17 | 7577097 | 7577097 | TP53 | SNP | 0.6730000 | p.D281N | ENST00000269305 | missense_variant | 
| NAAECZ_Tumor | chr17 | 7577142 | 7577142 | TP53 | SNP | 0.5000000 | p.G266R | ENST00000269305 | missense_variant | 
| NAAEDB_Tumor | chr17 | 7578192 | 7578192 | TP53 | DEL | 0.7740000 | p.Y220Mfs*27 | ENST00000269305 | frameshift_variant | 
| NAAEDG_Tumor | chr17 | 7577498 | 7577498 | TP53 | SNP | 0.5830000 | p.X261_splice | ENST00000269305 | splice_donor_variant | 
| NAAGJP_Tumor | chr17 | 7577094 | 7577094 | TP53 | SNP | 0.1279621 | p.R282W | ENST00000269305 | missense_variant | 
| NAAGJP_Tumor | chrX | 76938407 | 76938407 | ATRX | SNP | 0.1140000 | p.R781* | ENST00000373344 | stop_gained | 
| NAAGJZ_Tumor | chrX | 76937963 | 76937963 | ATRX | SNP | 0.3676471 | p.Q929E | ENST00000373344 | missense_variant | 
| NAAGKM_Tumor | chr17 | 7579373 | 7579373 | TP53 | SNP | 0.6360000 | p.G105V | ENST00000269305 | missense_variant | 
| NAAHBI_Tumor | chr17 | 7577018 | 7577018 | TP53 | SNP | 0.8250000 | p.X307_splice | ENST00000269305 | splice_donor_variant | 
| NAAHBK_Tumor | chr17 | 7577114 | 7577114 | TP53 | SNP | 0.1890000 | p.C275Y | ENST00000269305 | missense_variant | 
| NAAHBU_RecurrentTumor | chr17 | 7579358 | 7579358 | TP53 | SNP | 0.8700000 | p.R110L | ENST00000269305 | missense_variant | 
| NAAHBU_RecurrentTumor | chrX | 76939918 | 76939918 | ATRX | SNP | 0.1910000 | p.V277D | ENST00000373344 | missense_variant | 
| NAAHBU_Tumor | chr17 | 7579358 | 7579358 | TP53 | SNP | 0.4740000 | p.R110L | ENST00000269305 | missense_variant | 
| NAAHBY_RecurrentTumor | chr17 | 7578525 | 7578525 | TP53 | SNP | 0.4120000 | p.C135W | ENST00000269305 | missense_variant | 
| NAAHBY_Tumor | chr17 | 7578525 | 7578525 | TP53 | SNP | 0.5170000 | p.C135W | ENST00000269305 | missense_variant | 
| NAAHBZ_Tumor | chr17 | 7577097 | 7577097 | TP53 | SNP | 0.5530000 | p.D281Y | ENST00000269305 | missense_variant | 
| NAAHCF_Tumor | chr17 | 7578382 | 7578382 | TP53 | SNP | 0.5820000 | p.S183* | ENST00000269305 | stop_gained | 
| NAAHCH_Tumor | chr17 | 7577539 | 7577539 | TP53 | SNP | 0.6630000 | p.R248W | ENST00000269305 | missense_variant | 
| NAAHCJ_RecurrentTumor | chr17 | 7579416 | 7579446 | TP53 | DEL | 0.7140000 | p.T81Gfs*32 | ENST00000269305 | frameshift_variant | 
| NAAHCJ_Tumor | chr17 | 7579416 | 7579446 | TP53 | DEL | 0.7860000 | p.T81Gfs*32 | ENST00000269305 | frameshift_variant | 
| NAAHCR_Tumor | chr17 | 7578545 | 7578545 | TP53 | DEL | 0.5660000 | p.A129Pfs*41 | ENST00000269305 | frameshift_variant | 
| NAAHDF_Tumor | chr10 | 89692911 | 89692911 | PTEN | SNP | 0.1150000 | p.G132D | ENST00000371953 | missense_variant | 
| NAAHDL_Tumor | chr17 | 7577108 | 7577108 | TP53 | SNP | 0.3650000 | p.C277F | ENST00000269305 | missense_variant | 
| NAAIBU_Tumor | chr13 | 48881488 | 48881489 | RB1 | INS | 0.2777778 | p.A74Efs*4 | ENST00000267163 | frameshift_variant | 
| NAAIBW_Tumor | chr10 | 89685296 | 89685296 | PTEN | SNP | 0.5142857 | p.H64P | ENST00000371953 | missense_variant | 
| NAASJI_Tumor | chr17 | 7579310 | 7579310 | TP53 | SNP | 0.4838710 | p.X125_splice | ENST00000269305 | splice_donor_variant | 
| NAAWEX_Tumor | chr17 | 7577121 | 7577121 | TP53 | SNP | 0.7610000 | p.R273C | ENST00000269305 | missense_variant | 
| NAAWGR_Tumor | chrX | 76940042 | 76940042 | ATRX | DEL | 0.6770000 | p.H236Ifs*21 | ENST00000373344 | frameshift_variant | 
| PAIXFY_Tumor | chr13 | 48921987 | 48921987 | RB1 | DEL | 0.7220000 | p.Q176Hfs*10 | ENST00000267163 | frameshift_variant | 
| PAKUZU_Tumor | chr13 | 49039169 | 49039169 | RB1 | SNP | 0.5590000 | p.Y749* | ENST00000267163 | stop_gained | 
| PAKXLD_Tumor | chr17 | 7577120 | 7577120 | TP53 | SNP | 0.5910000 | p.R273H | ENST00000269305 | missense_variant | 
| PAKZZK_Tumor | chr17 | 7577094 | 7577094 | TP53 | SNP | 0.6600000 | p.R282W | ENST00000269305 | missense_variant | 
| PAMHLF_Tumor | chr17 | 7578190 | 7578190 | TP53 | SNP | 0.8330000 | p.Y220C | ENST00000269305 | missense_variant | 
| PAMJXS_Tumor | chr17 | 7577114 | 7577114 | TP53 | SNP | 0.7250000 | p.C275Y | ENST00000269305 | missense_variant | 
| PAMRHD_Tumor | chr13 | 49037915 | 49037917 | RB1 | DEL | 0.5860000 | p.L719del | ENST00000267163 | inframe_deletion | 
| PAMYYJ_Tumor | chr13 | 49027153 | 49027154 | RB1 | INS | 0.6110000 | p.K574Ifs*11 | ENST00000267163 | frameshift_variant | 
| PAMYYJ_Tumor | chrX | 76937383 | 76937383 | ATRX | DEL | 0.6000000 | p.C1122Lfs*8 | ENST00000373344 | frameshift_variant | 
| PANGCY_Tumor | chr22 | 30050681 | 30050682 | NF2 | INS | 0.0940000 | p.L163Ffs*40 | ENST00000338641 | frameshift_variant | 
| PANMIG_Tumor | chr13 | 49027128 | 49027128 | RB1 | SNP | 0.7710000 | p.X566_splice | ENST00000267163 | splice_acceptor_variant | 
| PANMIG_Tumor | chrX | 76855051 | 76855051 | ATRX | SNP | 0.9160000 | p.X1929_splice | ENST00000373344 | splice_acceptor_variant | 
| PAPVYW_Tumor | chr17 | 7577120 | 7577120 | TP53 | SNP | 0.3750000 | p.R273H | ENST00000269305 | missense_variant | 
| PARDAX_Tumor | chr13 | 48955550 | 48955550 | RB1 | SNP | 0.7500000 | p.R556* | ENST00000267163 | stop_gained | 
| PARDAX_Tumor | chr17 | 7578203 | 7578203 | TP53 | SNP | 0.7880000 | p.V216M | ENST00000269305 | missense_variant | 
| PASEFS_Tumor | chr17 | 7576855 | 7576855 | TP53 | SNP | 0.9920000 | p.Q331* | ENST00000269305 | stop_gained,splice_region_variant | 
| PASEFS_Tumor | chrX | 76938593 | 76938594 | ATRX | INS | 0.9680000 | p.S719Ifs*2 | ENST00000373344 | frameshift_variant | 
| PASKZZ_Tumor | chr17 | 7578475 | 7578475 | TP53 | DEL | 0.6500000 | p.P152Rfs*18 | ENST00000269305 | frameshift_variant | 
| PASNZV_Tumor | chr13 | 48955538 | 48955538 | RB1 | SNP | 0.6260000 | p.R552* | ENST00000267163 | stop_gained | 
| PASNZV_Tumor | chr17 | 7579324 | 7579325 | TP53 | INS | 0.6680000 | p.V122Sfs*3 | ENST00000269305 | frameshift_variant | 
| PASSLM_Tumor | chr13 | 48953743 | 48953743 | RB1 | SNP | 0.8150000 | p.G449V | ENST00000267163 | missense_variant | 
| PASSLM_Tumor | chr17 | 7577498 | 7577498 | TP53 | SNP | 0.9440000 | p.X261_splice | ENST00000269305 | splice_donor_variant | 
| PASSLM_Tumor | chrX | 76937879 | 76937880 | ATRX | INS | 0.9430000 | p.V957Sfs*7 | ENST00000373344 | frameshift_variant | 
| PATMXR_Tumor | chrX | 76891541 | 76891541 | ATRX | SNP | 0.4184100 | p.E1522K | ENST00000373344 | missense_variant | 
| PATPBS_Tumor | chr17 | 7577094 | 7577094 | TP53 | SNP | 0.5640000 | p.R282W | ENST00000269305 | missense_variant | 
| PATUXZ_Tumor | chrX | 76874323 | 76874323 | ATRX | SNP | 0.7260000 | p.M1800K | ENST00000373344 | missense_variant | 
| PAVDSN_Tumor | chr17 | 7577545 | 7577545 | TP53 | SNP | 0.8560000 | p.M246V | ENST00000269305 | missense_variant | 
| PAVLIB_Tumor | chr17 | 7577142 | 7577142 | TP53 | SNP | 0.1160000 | p.G266R | ENST00000269305 | missense_variant | 
| PAVXDP_Tumor | chr17 | 7578532 | 7578532 | TP53 | SNP | 0.7000000 | p.M133K | ENST00000269305 | missense_variant | 
Removing recurrent tumors:
| SampleName | Chromosome | Start_Position | End_Position | Hugo_Symbol | Variant_Type | VariantAlleleFreq | HGVS | Transcript_ID | Consequence | 
|---|---|---|---|---|---|---|---|---|---|
| 0A4HLD_Tumor | chr17 | 7577106 | 7577106 | TP53 | SNP | 0.8040000 | p.P278S | ENST00000269305 | missense_variant | 
| 0A4HX8_Tumor | chr17 | 7578461 | 7578461 | TP53 | SNP | 0.8970000 | p.V157F | ENST00000269305 | missense_variant | 
| 0A4I4E_Tumor | chr13 | 49039206 | 49039206 | RB1 | SNP | 0.8150000 | p.Q762* | ENST00000267163 | stop_gained | 
| 0A4I4E_Tumor | chr17 | 7578290 | 7578290 | TP53 | SNP | 0.6720000 | p.X187_splice | ENST00000269305 | splice_acceptor_variant | 
| 0A4I5B_Tumor | chr17 | 7578257 | 7578257 | TP53 | SNP | 0.5930000 | p.E198* | ENST00000269305 | stop_gained | 
| 0A4I6O_Tumor | chr17 | 7578532 | 7578532 | TP53 | SNP | 0.5430000 | p.M133K | ENST00000269305 | missense_variant | 
| 0A4I9K_Tumor | chr17 | 7577097 | 7577097 | TP53 | SNP | 0.6730000 | p.D281N | ENST00000269305 | missense_variant | 
| NAAECZ_Tumor | chr17 | 7577142 | 7577142 | TP53 | SNP | 0.5000000 | p.G266R | ENST00000269305 | missense_variant | 
| NAAEDB_Tumor | chr17 | 7578192 | 7578192 | TP53 | DEL | 0.7740000 | p.Y220Mfs*27 | ENST00000269305 | frameshift_variant | 
| NAAEDG_Tumor | chr17 | 7577498 | 7577498 | TP53 | SNP | 0.5830000 | p.X261_splice | ENST00000269305 | splice_donor_variant | 
| NAAGJP_Tumor | chr17 | 7577094 | 7577094 | TP53 | SNP | 0.1279621 | p.R282W | ENST00000269305 | missense_variant | 
| NAAGJP_Tumor | chrX | 76938407 | 76938407 | ATRX | SNP | 0.1140000 | p.R781* | ENST00000373344 | stop_gained | 
| NAAGJZ_Tumor | chrX | 76937963 | 76937963 | ATRX | SNP | 0.3676471 | p.Q929E | ENST00000373344 | missense_variant | 
| NAAGKM_Tumor | chr17 | 7579373 | 7579373 | TP53 | SNP | 0.6360000 | p.G105V | ENST00000269305 | missense_variant | 
| NAAHBI_Tumor | chr17 | 7577018 | 7577018 | TP53 | SNP | 0.8250000 | p.X307_splice | ENST00000269305 | splice_donor_variant | 
| NAAHBK_Tumor | chr17 | 7577114 | 7577114 | TP53 | SNP | 0.1890000 | p.C275Y | ENST00000269305 | missense_variant | 
| NAAHBU_Tumor | chr17 | 7579358 | 7579358 | TP53 | SNP | 0.4740000 | p.R110L | ENST00000269305 | missense_variant | 
| NAAHBY_Tumor | chr17 | 7578525 | 7578525 | TP53 | SNP | 0.5170000 | p.C135W | ENST00000269305 | missense_variant | 
| NAAHBZ_Tumor | chr17 | 7577097 | 7577097 | TP53 | SNP | 0.5530000 | p.D281Y | ENST00000269305 | missense_variant | 
| NAAHCF_Tumor | chr17 | 7578382 | 7578382 | TP53 | SNP | 0.5820000 | p.S183* | ENST00000269305 | stop_gained | 
| NAAHCH_Tumor | chr17 | 7577539 | 7577539 | TP53 | SNP | 0.6630000 | p.R248W | ENST00000269305 | missense_variant | 
| NAAHCJ_Tumor | chr17 | 7579416 | 7579446 | TP53 | DEL | 0.7860000 | p.T81Gfs*32 | ENST00000269305 | frameshift_variant | 
| NAAHCR_Tumor | chr17 | 7578545 | 7578545 | TP53 | DEL | 0.5660000 | p.A129Pfs*41 | ENST00000269305 | frameshift_variant | 
| NAAHDF_Tumor | chr10 | 89692911 | 89692911 | PTEN | SNP | 0.1150000 | p.G132D | ENST00000371953 | missense_variant | 
| NAAHDL_Tumor | chr17 | 7577108 | 7577108 | TP53 | SNP | 0.3650000 | p.C277F | ENST00000269305 | missense_variant | 
| NAAIBU_Tumor | chr13 | 48881488 | 48881489 | RB1 | INS | 0.2777778 | p.A74Efs*4 | ENST00000267163 | frameshift_variant | 
| NAAIBW_Tumor | chr10 | 89685296 | 89685296 | PTEN | SNP | 0.5142857 | p.H64P | ENST00000371953 | missense_variant | 
| NAASJI_Tumor | chr17 | 7579310 | 7579310 | TP53 | SNP | 0.4838710 | p.X125_splice | ENST00000269305 | splice_donor_variant | 
| NAAWEX_Tumor | chr17 | 7577121 | 7577121 | TP53 | SNP | 0.7610000 | p.R273C | ENST00000269305 | missense_variant | 
| NAAWGR_Tumor | chrX | 76940042 | 76940042 | ATRX | DEL | 0.6770000 | p.H236Ifs*21 | ENST00000373344 | frameshift_variant | 
| PAIXFY_Tumor | chr13 | 48921987 | 48921987 | RB1 | DEL | 0.7220000 | p.Q176Hfs*10 | ENST00000267163 | frameshift_variant | 
| PAKUZU_Tumor | chr13 | 49039169 | 49039169 | RB1 | SNP | 0.5590000 | p.Y749* | ENST00000267163 | stop_gained | 
| PAKXLD_Tumor | chr17 | 7577120 | 7577120 | TP53 | SNP | 0.5910000 | p.R273H | ENST00000269305 | missense_variant | 
| PAKZZK_Tumor | chr17 | 7577094 | 7577094 | TP53 | SNP | 0.6600000 | p.R282W | ENST00000269305 | missense_variant | 
| PAMHLF_Tumor | chr17 | 7578190 | 7578190 | TP53 | SNP | 0.8330000 | p.Y220C | ENST00000269305 | missense_variant | 
| PAMJXS_Tumor | chr17 | 7577114 | 7577114 | TP53 | SNP | 0.7250000 | p.C275Y | ENST00000269305 | missense_variant | 
| PAMRHD_Tumor | chr13 | 49037915 | 49037917 | RB1 | DEL | 0.5860000 | p.L719del | ENST00000267163 | inframe_deletion | 
| PAMYYJ_Tumor | chr13 | 49027153 | 49027154 | RB1 | INS | 0.6110000 | p.K574Ifs*11 | ENST00000267163 | frameshift_variant | 
| PAMYYJ_Tumor | chrX | 76937383 | 76937383 | ATRX | DEL | 0.6000000 | p.C1122Lfs*8 | ENST00000373344 | frameshift_variant | 
| PANGCY_Tumor | chr22 | 30050681 | 30050682 | NF2 | INS | 0.0940000 | p.L163Ffs*40 | ENST00000338641 | frameshift_variant | 
| PANMIG_Tumor | chr13 | 49027128 | 49027128 | RB1 | SNP | 0.7710000 | p.X566_splice | ENST00000267163 | splice_acceptor_variant | 
| PANMIG_Tumor | chrX | 76855051 | 76855051 | ATRX | SNP | 0.9160000 | p.X1929_splice | ENST00000373344 | splice_acceptor_variant | 
| PAPVYW_Tumor | chr17 | 7577120 | 7577120 | TP53 | SNP | 0.3750000 | p.R273H | ENST00000269305 | missense_variant | 
| PARDAX_Tumor | chr13 | 48955550 | 48955550 | RB1 | SNP | 0.7500000 | p.R556* | ENST00000267163 | stop_gained | 
| PARDAX_Tumor | chr17 | 7578203 | 7578203 | TP53 | SNP | 0.7880000 | p.V216M | ENST00000269305 | missense_variant | 
| PASEFS_Tumor | chr17 | 7576855 | 7576855 | TP53 | SNP | 0.9920000 | p.Q331* | ENST00000269305 | stop_gained,splice_region_variant | 
| PASEFS_Tumor | chrX | 76938593 | 76938594 | ATRX | INS | 0.9680000 | p.S719Ifs*2 | ENST00000373344 | frameshift_variant | 
| PASKZZ_Tumor | chr17 | 7578475 | 7578475 | TP53 | DEL | 0.6500000 | p.P152Rfs*18 | ENST00000269305 | frameshift_variant | 
| PASNZV_Tumor | chr13 | 48955538 | 48955538 | RB1 | SNP | 0.6260000 | p.R552* | ENST00000267163 | stop_gained | 
| PASNZV_Tumor | chr17 | 7579324 | 7579325 | TP53 | INS | 0.6680000 | p.V122Sfs*3 | ENST00000269305 | frameshift_variant | 
| PASSLM_Tumor | chr13 | 48953743 | 48953743 | RB1 | SNP | 0.8150000 | p.G449V | ENST00000267163 | missense_variant | 
| PASSLM_Tumor | chr17 | 7577498 | 7577498 | TP53 | SNP | 0.9440000 | p.X261_splice | ENST00000269305 | splice_donor_variant | 
| PASSLM_Tumor | chrX | 76937879 | 76937880 | ATRX | INS | 0.9430000 | p.V957Sfs*7 | ENST00000373344 | frameshift_variant | 
| PATMXR_Tumor | chrX | 76891541 | 76891541 | ATRX | SNP | 0.4184100 | p.E1522K | ENST00000373344 | missense_variant | 
| PATPBS_Tumor | chr17 | 7577094 | 7577094 | TP53 | SNP | 0.5640000 | p.R282W | ENST00000269305 | missense_variant | 
| PATUXZ_Tumor | chrX | 76874323 | 76874323 | ATRX | SNP | 0.7260000 | p.M1800K | ENST00000373344 | missense_variant | 
| PAVDSN_Tumor | chr17 | 7577545 | 7577545 | TP53 | SNP | 0.8560000 | p.M246V | ENST00000269305 | missense_variant | 
| PAVLIB_Tumor | chr17 | 7577142 | 7577142 | TP53 | SNP | 0.1160000 | p.G266R | ENST00000269305 | missense_variant | 
| PAVXDP_Tumor | chr17 | 7578532 | 7578532 | TP53 | SNP | 0.7000000 | p.M133K | ENST00000269305 | missense_variant | 
Gene counts (counting each gene only once per sample) are:
unique_gene_samples = unique_vars %>% dplyr::filter(!grepl('recurrent', SampleName, ignore.case = TRUE)) %>% dplyr::select(SampleName, Hugo_Symbol) %>% unique() sort(table(unique_gene_samples$Hugo_Symbol))
## 
##  NF2 PTEN ATRX  RB1 TP53 
##    1    2    9   10   37
readr::write_lines(paste('# Protein-altering somatic variants in any of the genes ', paste(lcc_genes(), collapse=", ")), '/tmp/unique_vars_LCC_mutect.csv') readr::write_csv(unique_vars, '/tmp/unique_vars_LCC_mutect.csv', append=TRUE, col_names = TRUE)