Read in the splicing results returned by MAJIQ and make a volcano plot, only highlight genes of interest with a label.
ipsc_splicing = fread(file.path(here::here(),"data","ipsc_splicing_results.csv"))
ipsc_de = fread(file.path(here::here(),"data","ipsc_differential_expression.csv"))
splicing_dots_tables <- ipsc_splicing %>%
mutate(junction_name = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2") &
p_d_psi_0_10_per_lsv_junction > 0.9 &
deltaPSI > 0 ~ gene_name,
T ~ "")) %>%
mutate(`Novel Junction` = de_novo_junctions == 0) %>%
mutate(log10_test_stat = -log10(1 - p_d_psi_0_10_per_lsv_junction)) %>%
mutate(log10_test_stat = ifelse(is.infinite(log10_test_stat), 16, log10_test_stat)) %>%
mutate(graph_alpha = ifelse(p_d_psi_0_10_per_lsv_junction > 0.9, 1, 0.2)) %>%
mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2") &
p_d_psi_0_10_per_lsv_junction > 0.9 &
deltaPSI > 0 ~ junction_name,
T ~ ""))
fig1a = ggplot() +
geom_point(data = splicing_dots_tables %>% filter(de_novo_junctions != 0),
aes(x = deltaPSI, y =log10_test_stat,
alpha = graph_alpha,,fill = "Annotated Junction"), pch = 21, size = 10) +
geom_point(data = splicing_dots_tables %>% filter(de_novo_junctions == 0),
aes(x = deltaPSI, y =log10_test_stat,
alpha = graph_alpha,fill = "Novel Junction"), pch = 21, size = 10) +
geom_text_repel(data = splicing_dots_tables[junction_name != ""],
aes(x = deltaPSI, y =log10_test_stat,
label = label_junction,
color = as.character(de_novo_junctions)), point.padding = 0.3,
nudge_y = 0.2,
min.segment.length = 0,
box.padding = 2,
size=6,show.legend = F) +
geom_hline(yintercept = -log10(1 - .9)) +
geom_vline(xintercept = -0,linetype="dotted") +
scale_fill_manual(name = "",
breaks = c("Annotated Junction", "Novel Junction"),
values = c("Annotated Junction" = "#648FFF", "Novel Junction" = "#fe6101") ) +
scale_color_manual(name = "",
breaks = c("0", "1"),
values = c("1" = "#648FFF", "0" = "#fe6101") ) +
guides(alpha = FALSE, size = FALSE) +
theme(legend.position = 'top') +
ggpubr::theme_pubr() +
xlab("delta PSI") +
ylab(expression(paste("-Lo", g[10], " Test Statistic"))) +
theme(text = element_text(size = 24)) +
theme(legend.text=element_text(size=22)) +
xlim(-1,1) +
scale_x_continuous(labels = scales::percent)
Scale for 'x' is already present. Adding another scale for 'x', which will
replace the existing scale.
de_table = ipsc_de %>%
mutate(contains_cryptic = gene_name %in% splicing_dots_tables[cryptic_junction == T,unique(gene_name)]) %>%
mutate(contains_cryptic = as.character(as.numeric(contains_cryptic))) %>%
mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2","TARDBP") ~ gene_name,
T ~ "")) %>%
mutate(graph_alpha = ifelse(padj < 0.1, 1, 0.2)) %>%
mutate(y_data = -log10(padj))
fig1b = ggplot(data = de_table) +
geom_point(data = de_table %>% filter(contains_cryptic == "0"),
aes(x = log2FoldChange, y = -log10(padj),
alpha = graph_alpha,fill = "No Cryptic"), pch = 21, size = 10) +
geom_point(data = de_table %>% filter(contains_cryptic == "1"),
aes(x = log2FoldChange, y = -log10(padj),
alpha = graph_alpha,fill = "Contains Cryptic"), pch = 21, size = 10)+
geom_text_repel(data = de_table[label_junction != ""],max.overlaps = 500,
aes(x = log2FoldChange,
y = -log10(padj),
label = label_junction,
color = as.character(contains_cryptic)),
nudge_y = 5,
min.segment.length = 0,
box.padding = 4,
size=6,show.legend = F) +
scale_fill_manual(name = "",
breaks = c("No Cryptic", "Contains Cryptic"),
values = c("No Cryptic" = "#648FFF", "Contains Cryptic" = "#fe6101") ) +
scale_color_manual(name = "",
breaks = c("1", "0"),
values = c("1" = "#fe6101", "0" = "#648FFF") ) +
xlim(-7.5,7.5) +
ylab(expression(paste("-Lo", g[10], " P-value"))) +
guides(alpha = FALSE, size = FALSE) +
theme(legend.position = 'top') +
ggpubr::theme_pubr() +
xlab(expression(paste("Lo", g[2], " Fold Change"))) +
theme(text = element_text(size = 24)) +
theme(legend.text=element_text(size=22)) +
geom_hline(yintercept = -log10(0.1)) +
geom_vline(xintercept=c(0), linetype="dotted")
print(fig1a)
print(fig1b)
The ‘C/G’ tells which genotypes were supported by RNA-seq on rs12973192. The SK-N-DZ cell lines are het, as is the WTC11 cell line. SH-SY5Y cells are homozygote for the major allele. There was variability on the Klim hMN set on allelic expression.
rel_rna_cryptic_amount = data.table::fread(file.path(here::here(),"data","kd_experiments_relative_rna_and_unc13a_cryptic_junction_counts.csv"))
rel_rna_cryptic_amount[,cryptic_psi_full := ( UNC13A_3prime +
UNC13A_5prime + UNC13A_5prime_2 +
UNC13A_5prime_3) / (UNC13A_annotated + UNC13A_3prime +
UNC13A_5prime + UNC13A_5prime_2 +
UNC13A_5prime_3)]
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "cryptic_psi_full",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(name = "",
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(name = "",
values = c("#1C2617","#262114")
) +
ylab("UNC13A CE PSI") +
xlab("") +
guides(color = FALSE) +
scale_y_continuous(labels = scales::percent) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "condition",
add = c("mean_se","jitter"),
y = "unc13b_nmd_exon_psi",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8),facet.by = 'source') +
ggpubr::theme_pubr() +
scale_fill_manual(name = "",
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(name = "",
values = c("#1C2617","#262114")
) +
ylab("UNC13B \nNMD Exon PSI") +
xlab("") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28)) +
facet_wrap(~source) +
stat_compare_means() +
scale_y_continuous(labels = scales::percent)
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = cryptic_psi_full, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = cryptic_psi_full),size = 12,method = 's',cor.coef.name = 'rho') +
geom_smooth(aes(x = TARDBP, y = cryptic_psi_full),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A CE PSI") +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom') +
scale_y_continuous(labels = scales::percent)
`geom_smooth()` using formula 'y ~ x'
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = UNC13A, y = cryptic_psi_full, fill = source),pch = 21,size = 6) +
stat_cor(aes(x = UNC13A, y = cryptic_psi_full),size = 12,
method = 'spearman',
cor.coef.name = 'rho') +
geom_smooth(aes(x = UNC13A, y = cryptic_psi_full),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
scale_y_continuous(labels = scales::percent) +
ylab("UNC13A CE PSI") +
expand_limits(y = 1) +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom') +
theme(text = element_text(size = 18,family = 'sans'),
legend.text = element_text(size = 20,family = 'sans'),
axis.title = element_text(size = 32),
axis.text = element_text(size = 32))
`geom_smooth()` using formula 'y ~ x'
## UNC13A RNA and UNC13A IR
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = UNC13A, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = UNC13A, y = normalized_unc13a_ir),size = 12,cor.coef.name = 'rho',method = 's') +
geom_smooth(aes(x = UNC13A, y = normalized_unc13a_ir),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A Normalized IR") +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom')
`geom_smooth()` using formula 'y ~ x'
## TARDBP RNA and UNC13A IR
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = normalized_unc13a_ir),size = 12) +
geom_smooth(aes(x = TARDBP, y = normalized_unc13a_ir),color = "black",method = 'lm') +
ggpubr::theme_pubr() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = scales::percent) +
ylab("UNC13A Normalized IR") +
theme(legend.title=element_blank()) +
theme(legend.position = 'bottom')
`geom_smooth()` using formula 'y ~ x'
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "cryptic_psi_full",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("UNC13A CE PSI") +
xlab("") +
guides(color = FALSE)
stmn2 = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "stmn_2_cryptic_psi",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("STMN2 Cryptic PSI") +
xlab("") +
guides(color = FALSE)
print(stmn2)
unc13b_ir = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "normalized_unc13b_ir",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("Normalized UNC13B \nIntron Retention Ratio") +
xlab("") +
guides(color = FALSE) +
theme(legend.position = "none") +
theme(text = element_text(size = 18)) +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
print(unc13b_ir)
unc13a_ir = rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "normalized_unc13a_ir",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("Normalized UNC13A \nIntron Retention Ratio") +
xlab("") +
theme(legend.position = "none") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28))
text_table = fread(file.path(here::here(),"data","deseq2_cellline_fold_change.csv"))
rel_rna_cryptic_amount %>%
ggbarplot(,
x = "source",
add = c("mean_se","jitter"),
y = "UNC13A",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("UNC13A") +
xlab("") +
theme(legend.position = "none") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28)) +
scale_y_continuous(labels = scales::percent) +
geom_text(data = text_table[gene_name == "UNC13A"],aes(x = source, y= 1.35,label = padj_plot),size = 8) +
geom_text(data = text_table[gene_name == "UNC13A"],aes(x = source, y= 1.2,label = log2FoldChange_plot),size = 8)
text_table = fread(file.path(here::here(),"data","deseq2_cellline_fold_change.csv"))
rel_rna_cryptic_amount %>%
left_join(text_table[,.(plot_name,source)]) %>%
unique() %>%
mutate(plot_name = fct_relevel(plot_name,"iPSC MN","SH-SY5Y","I3 Neurons","SK-N-DZ_a")) %>%
ggbarplot(,
x = "plot_name",
add = c("mean_se","jitter"),
y = "UNC13B",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("UNC13B") +
xlab("") +
theme(legend.position = "none") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28)) +
scale_y_continuous(labels = scales::percent,expand = expansion(mult = c(0, .1))) +
geom_text(data = text_table[gene_name == "UNC13B"],aes(x = plot_name, y= 1.35,label = padj_plot),size = 8) +
geom_text(data = text_table[gene_name == "UNC13B"],aes(x = plot_name, y= 1.2,label = log2FoldChange_plot),size = 8)
Joining, by = c("source", "plot_name")
text_table = fread(file.path(here::here(),"data","deseq2_cellline_fold_change.csv"))
rel_rna_cryptic_amount %>%
left_join(text_table[,.(plot_name,source)]) %>%
unique() %>%
mutate(plot_name = fct_relevel(plot_name,"iPSC MN","SH-SY5Y","I3 Neurons","SK-N-DZ_a")) %>%
ggbarplot(,
x = "plot_name",
add = c("mean_se","jitter"),
y = "TARDBP",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("TARDBP") +
xlab("") +
theme(legend.position = "none") +
guides(color = FALSE) +
theme(text = element_text(size = 20,family = 'sans'),
legend.text = element_text(size = 36,family = 'sans'),
axis.title.y = element_text(size = 28),
axis.text.y = element_text(size = 28)) +
scale_y_continuous(labels = scales::percent,expand = expansion(mult = c(0, .1))) +
geom_text(data = text_table[gene_name == "TARDBP"],aes(x = plot_name, y= 1.35,label = padj_plot),size = 8) +
geom_text(data = text_table[gene_name == "TARDBP"],aes(x = plot_name, y= 1.2,label = log2FoldChange_plot),size = 8)
Joining, by = c("source", "plot_name")
## Scatterplot stmn2 normalized TARDBP
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = stmn_2_cryptic_psi, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = stmn_2_cryptic_psi)) +
theme(legend.position = 'bottom')
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = TARDBP, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = TARDBP, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = cryptic_psi_full,
y = normalized_unc13a_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = cryptic_psi_full,
y = normalized_unc13a_ir)) +
theme(legend.position = 'bottom')
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = cryptic_psi_full, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = cryptic_psi_full, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')+
facet_wrap(~source)
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = unc13b_nmd_exon_psi, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = unc13b_nmd_exon_psi, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')+
facet_wrap(~source)
rel_rna_cryptic_amount %>%
filter(condition != "control") %>%
ggplot() +
geom_point(aes(x = cryptic_psi, y = normalized_unc13b_ir, fill = source),pch = 21,size = 4) +
stat_cor(aes(x = unc13b_nmd_exon_psi, y = normalized_unc13b_ir)) +
theme(legend.position = 'bottom')+
facet_wrap(~source)
# NMD experiment on SH-SY5Y cells - CHX
for_bar_plot_input <- fread(file.path(here::here(),"data","SY5Y_Tidy_4.csv"))
for_bar_plot <- data.frame(for_bar_plot_input)
for_bar_plot$Treatment <- factor(for_bar_plot$Treatment, levels = c("DMSO", "CHX"))
shapiro.test(filter(for_bar_plot, Treatment == "CHX" & Gene == "HNRNPL")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot, Treatment == "CHX" & Gene == "HNRNPL")$Value
W = 0.87027, p-value = 0.1866
shapiro.test(filter(for_bar_plot, Treatment == "CHX" & Gene == "STMN2")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot, Treatment == "CHX" & Gene == "STMN2")$Value
W = 0.89488, p-value = 0.3011
shapiro.test(filter(for_bar_plot, Treatment == "CHX" & Gene == "UNC13A")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot, Treatment == "CHX" & Gene == "UNC13A")$Value
W = 0.95746, p-value = 0.7967
shapiro.test(filter(for_bar_plot, Treatment == "CHX" & Gene == "UNC13B")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot, Treatment == "CHX" & Gene == "UNC13B")$Value
W = 0.89514, p-value = 0.261
stattest <- for_bar_plot %>%
group_by(Gene) %>%
pairwise_t_test(Value ~ Treatment, ref.group = "DMSO", alternative = "greater") %>%
add_significance() %>%
add_xy_position(x="Treatment")
stattest
# A tibble: 4 x 14
Gene .y. group1 group2 n1 n2 p p.signif p.adj p.adj.signif
<chr> <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
1 HNRNPL Value DMSO CHX 7 7 6.77e-7 **** 6.77e-7 ****
2 STMN2 Value DMSO CHX 7 7 8.05e-2 ns 8.05e-2 ns
3 UNC13A Value DMSO CHX 7 7 3.64e-4 *** 3.64e-4 ***
4 UNC13B Value DMSO CHX 8 8 3.72e-4 *** 3.72e-4 ***
# … with 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
# xmax <dbl>
nmd_plot_chx = ggbarplot(for_bar_plot,
x = 'Gene',
add = c("mean_se","jitter"),
y = 'Value',
color = 'Treatment',
position = position_dodge(0.8),
dot.size = 10) +
scale_y_continuous() +
ggpubr::theme_pubr() + stat_pvalue_manual(stattest, label="p.signif" , x = "Gene")
plot(nmd_plot_chx)
for_bar_plot_sy_input <- fread(file.path(here::here(),"data","results_upf1.csv"))
for_bar_plot_sy_df <- data.frame(for_bar_plot_sy_input)
for_bar_plot_sy_df$Condition <- as.factor(for_bar_plot_sy_df$Condition)
#assess TDP-43 and UPF1 KD
for_bar_plot_sz <- for_bar_plot_sy_df %>%
filter(Gene == "TDP43" & (Condition == "siTDP43+siCTRL" | Condition == "siTDP43+siUPF1" | Condition == "Non-treated"))
shapiro.test(filter(for_bar_plot_sz, Condition == "siTDP43+siCTRL" & Gene == "TDP43")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot_sz, Condition == "siTDP43+siCTRL" & Gene == "TDP43")$Value
W = 0.99437, p-value = 0.9787
shapiro.test(filter(for_bar_plot_sz, Condition == "siTDP43+siUPF1" & Gene == "TDP43")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot_sz, Condition == "siTDP43+siUPF1" & Gene == "TDP43")$Value
W = 0.98592, p-value = 0.9357
stattest_sz <- for_bar_plot_sz %>%
pairwise_t_test(Value ~ Condition, ref.group = "Non-treated", alternative = "less") %>%
add_significance() %>%
add_xy_position(x="Condition")
stattest_sz
# A tibble: 2 x 13
.y. group1 group2 n1 n2 p p.signif p.adj p.adj.signif
<chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
1 Value Non-trea… siTDP43+s… 4 4 1.36e-13 **** 2.72e-13 ****
2 Value Non-trea… siTDP43+s… 4 4 2.34e-13 **** 2.72e-13 ****
# … with 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
# xmax <dbl>
for_bar_plot_sx <- for_bar_plot_sy_df %>%
filter(Gene == "UPF1" & (Condition == "siTDP43+siCTRL" | Condition == "siTDP43+siUPF1"))
shapiro.test(filter(for_bar_plot_sx, Condition == "siTDP43+siUPF1" & Gene == "UPF1")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot_sx, Condition == "siTDP43+siUPF1" & Gene == "UPF1")$Value
W = 0.91957, p-value = 0.5345
stattest_sx <- for_bar_plot_sx %>%
pairwise_t_test(Value ~ Condition, ref.group = "siTDP43+siCTRL", alternative = "less") %>%
add_significance() %>%
add_xy_position(x="Condition")
stattest_sx
# A tibble: 1 x 13
.y. group1 group2 n1 n2 p p.signif p.adj p.adj.signif y.position
<chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr> <dbl>
1 Value siTDP4… siTDP4… 4 4 1e-9 **** 1e-9 **** 1.37
# … with 3 more variables: groups <named list>, xmin <dbl>, xmax <dbl>
for_bar_plot_szx <- for_bar_plot_sy_df %>%
filter(Gene == "TDP43" & (Condition == "siTDP43+siCTRL" | Condition == "siTDP43+siUPF1" | Condition == "Non-treated")
| Gene == "UPF1" & (Condition == "siTDP43+siCTRL" | Condition == "siTDP43+siUPF1"))
#fix position of stats for plot
stattest_sx$xmin[1] <- 1.8
stattest_sx$xmax[1] <- 2.2
stattest_sx$y.position[1] <- 1.3
stattest_sz$xmin[1] <- 0.75
stattest_sz$xmin[2] <- 0.75
stattest_sz$xmax[1] <- 1
stattest_sz$xmax[2] <- 1.25
stattest_sz$y.position[1] <- 1.2
stattest_sz$y.position[2] <- 1.3
nmd_plotz = ggbarplot(for_bar_plot_szx,
x = 'Gene',
add = c("mean_se","jitter"),
y = 'Value',
color = 'Condition',
position = position_dodge(0.8),
dot.size = 10) +
scale_y_continuous() + ylab("Percent of transcript after UPF1 knockdown") + xlab("Gene") +
ggpubr::theme_pubr() + stat_pvalue_manual(stattest_sz) + stat_pvalue_manual(stattest_sx)
plot(nmd_plotz)
#assess rescue of NMD-sensitive transcripts after UPF1 inhibition
for_bar_plot_sy <- for_bar_plot_sy_df %>%
filter(Gene == "hnRNPL" | Gene == "STMN2" | Gene == "UNC13A" | Gene == "UNC13B")
shapiro.test(filter(for_bar_plot_sy, Condition == "siTDP43+siUPF1" & Gene == "hnRNPL")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot_sy, Condition == "siTDP43+siUPF1" & Gene == "hnRNPL")$Value
W = 0.97268, p-value = 0.858
shapiro.test(filter(for_bar_plot_sy, Condition == "siTDP43+siUPF1" & Gene == "STMN2")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot_sy, Condition == "siTDP43+siUPF1" & Gene == "STMN2")$Value
W = 0.98571, p-value = 0.9346
shapiro.test(filter(for_bar_plot_sy, Condition == "siTDP43+siUPF1" & Gene == "UNC13A")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot_sy, Condition == "siTDP43+siUPF1" & Gene == "UNC13A")$Value
W = 0.87364, p-value = 0.3122
shapiro.test(filter(for_bar_plot_sy, Condition == "siTDP43+siUPF1" & Gene == "UNC13B")$Value)
Shapiro-Wilk normality test
data: filter(for_bar_plot_sy, Condition == "siTDP43+siUPF1" & Gene == "UNC13B")$Value
W = 0.96734, p-value = 0.8249
stattest_sy <- for_bar_plot_sy %>%
group_by(Gene) %>%
pairwise_t_test(Value ~ Condition, ref.group = "siTDP43+siCTRL", alternative = "greater") %>%
add_significance() %>%
add_xy_position(x="Condition")
stattest_sy
# A tibble: 4 x 14
Gene .y. group1 group2 n1 n2 p p.signif p.adj p.adj.signif
<chr> <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
1 hnRNPL Value siTDP4… siTDP4… 4 4 6.38e-4 *** 6.38e-4 ***
2 STMN2 Value siTDP4… siTDP4… 4 4 9.91e-1 ns 9.91e-1 ns
3 UNC13A Value siTDP4… siTDP4… 4 4 7.64e-3 ** 7.64e-3 **
4 UNC13B Value siTDP4… siTDP4… 4 4 8.26e-4 *** 8.26e-4 ***
# … with 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
# xmax <dbl>
nmd_plot = ggbarplot(for_bar_plot_sy,
x = 'Gene',
add = c("mean_se","jitter"),
y = 'Value',
color = 'Condition',
fill = "Condition",
position = position_dodge(0.8),
dot.size = 20,
size = 1.5) +
scale_y_continuous() +
ggpubr::theme_pubr() + ylab("Percent of transcript after UPF1 knockdown") + xlab("Gene") +
stat_pvalue_manual(stattest_sy, label="p.signif", x = "Gene") +
theme(axis.ticks.length=unit(0.1,"inch"),
axis.line = element_line(colour = 'black', size = 1.5),
axis.ticks = element_line(colour = "black", size = 2))+
scale_fill_manual(values = c("#fca361ff","#aac043ff")) +
scale_color_manual(values = c("#000000ff","#000000ff"))
plot(nmd_plot)
peptides = fread(file.path(here::here(),"data","peptide_abdundance.csv"))
peptides %>%
mutate(gene = fct_relevel(gene, "UNC13A","UNC13B")) %>%
ggbarplot(,
x = "condition",
add = c("mean_se"),
y = "protein",
fill = 'condition',
color = 'condition',
position = position_dodge(0.8),
facet.by = 'gene') +
ggpubr::theme_pubr() +
scale_fill_manual(
values = c("#40B0A6","#E1BE6A")
) +
scale_color_manual(
values = c("#1C2617","#262114")
) +
ylab("Protein abundance") +
xlab("") +
geom_jitter(pch = 21,height = 0,aes(fill = condition),size = 2) +
guides(color = FALSE) +
facet_wrap(~gene, scales = 'free') +
theme(legend.position = 'none') +
scale_y_continuous(labels = scientific) +
stat_compare_means(comparisons = list(c("Control","TDP-43 KD")),
label = 'p.signif',tip.length = 0,
size = 10)
force_colors = c("#F8766D", "#A3A500", "#00BF7D", "#00B0F6", "#E76BF3")
names(force_colors) = c("Control","ALS \n non-TDP","ALS-TDP","FTLD \n non-TDP","FTLD-TDP")
clean_data_table = fread(file.path(here::here(),"data","nygc_junction_information.csv"))
clean_data_table = clean_data_table %>%
mutate(rs12973192 = fct_relevel(rs12973192,
"C/C", "C/G", "G/G")) %>%
mutate(number_g_alleles = as.numeric(rs12973192) - 1) %>%
mutate(unc13a_cryptic_leaf_psi = ifelse(is.na(unc13a_cryptic_leaf_psi),0,unc13a_cryptic_leaf_psi)) %>%
mutate(junction_reads_stmn2 = STMN2_annotated_leaf + STMN2_cryptic_leaf) %>%
mutate(junction_reads_unc13a = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf + UNC13A_annotated_leaf) %>%
unique()
print(glue::glue("Number of unique patients: {clean_data_table[,length(unique(participant_id))]}"))
Number of unique patients: 377
print(glue::glue("Number of unique tissue samples: {clean_data_table[,length(unique(sample))]}"))
Number of unique tissue samples: 1349
print("Patients Per Disease Category")
[1] "Patients Per Disease Category"
clean_data_table[,length(unique(participant_id)),by = disease]
disease V1
1: ALS-FTD 23
2: ALS 193
3: Control 77
4: Other 11
5: FTD 61
6: ALS-AD 12
print("Tissues Per Disease Category")
[1] "Tissues Per Disease Category"
clean_data_table[,length(unique(sample)),by = disease]
disease V1
1: ALS-FTD 110
2: ALS 764
3: Control 199
4: Other 70
5: FTD 138
6: ALS-AD 68
print("Number of patients per UNC13A SNP genotype")
[1] "Number of patients per UNC13A SNP genotype"
unique(clean_data_table[,.(participant_id,rs12608932,rs12973192)]) %>% select(-participant_id) %>% table()
rs12973192
rs12608932 C/C C/G G/G
A/A 166 0 0
A/C 0 152 1
C/C 0 1 57
print("Number of tissues per disease")
[1] "Number of tissues per disease"
clean_data_table[,.N,by = c("disease","tissue_clean")]
disease tissue_clean N
1: ALS-FTD Frontal_Cortex 22
2: ALS Frontal_Cortex 132
3: Control Frontal_Cortex 40
4: Other Frontal_Cortex 11
5: ALS-FTD Lumbar_Spinal_Cord 15
6: ALS Lumbar_Spinal_Cord 105
7: Control Lumbar_Spinal_Cord 33
8: Other Lumbar_Spinal_Cord 9
9: ALS-FTD Cervical_Spinal_Cord 14
10: ALS Cervical_Spinal_Cord 103
11: Control Cervical_Spinal_Cord 32
12: Other Cervical_Spinal_Cord 10
13: ALS-FTD Motor_Cortex 28
14: ALS Motor_Cortex 175
15: Control Motor_Cortex 23
16: Other Motor_Cortex 16
17: ALS-FTD Cerebellum 13
18: ALS Cerebellum 129
19: Control Cerebellum 28
20: Other Cerebellum 8
21: FTD Cerebellum 58
22: FTD Frontal_Cortex 45
23: ALS-AD Cerebellum 11
24: ALS-AD Motor_Cortex 13
25: ALS-AD Cervical_Spinal_Cord 10
26: ALS-AD Lumbar_Spinal_Cord 11
27: ALS-AD Frontal_Cortex 12
28: ALS-AD Occipital_Cortex 7
29: ALS-AD Thoracic_Spinal_Cord 4
30: ALS Occipital_Cortex 37
31: ALS Thoracic_Spinal_Cord 33
32: ALS-FTD Occipital_Cortex 6
33: Control Temporal_Cortex 23
34: ALS Temporal_Cortex 23
35: FTD Temporal_Cortex 35
36: Other Occipital_Cortex 7
37: Other Thoracic_Spinal_Cord 6
38: Control Thoracic_Spinal_Cord 5
39: Control Occipital_Cortex 5
40: ALS-FTD Thoracic_Spinal_Cord 5
41: ALS Hippocampus 27
42: ALS-FTD Hippocampus 7
43: Other Hippocampus 3
44: Control Hippocampus 10
disease tissue_clean N
print("Number of partcipants by mutation and disease")
[1] "Number of partcipants by mutation and disease"
clean_data_table[,length(unique(participant_id)),by = c("mutations","disease")]
mutations disease V1
1: None ALS-FTD 13
2: None ALS 145
3: C9orf72 ALS-FTD 10
4: None Control 77
5: None Other 11
6: SOD1 ALS 8
7: OPTN ALS 4
8: C9orf72 ALS 32
9: MATR3 ALS 1
10: ANG ALS 1
11: C9orf72 FTD 12
12: None ALS-AD 11
13: None FTD 42
14: C9orf72 ALS-AD 1
15: TBK1 FTD 2
16: MAPT FTD 5
17: FUS ALS 2
print(glue::glue("Number of patients per pathology:"))
Number of patients per pathology:
clean_data_table[,length(unique(participant_id)),by = .(pathology)]
pathology V1
1: ALS-FTD 23
2: ALS 193
3: control 77
4: Other 11
5: 13
6: ALS-AD 12
7: FTD-TDP-A 24
8: FTD-TDP-B 3
9: FTD-TDP-C 9
10: FTD-TAU 7
11: FTD-FUS 5
FTLD-non-TDP are those with TAU and FUS aggregates
Non-tdp ALS are those with SOD1 or FUS mutations. The n’s are quite low on this unfortunately, only 8 ALS with SOD1 and 2 with FUS mutations.
First we look at detection rate in tissues affected by TDP-43 proteinopathy, For FTLD this is frontal and temporal Cortices, and for ALS this is lumbar, cervical, and thoracic spinal cord samples as well as motor cortex. For controls we also take all 6 tissues, frontal,temporal,motor cortices and the lumbar, cervical, and thoracic spinal cords.
(As a side note, once we do this the number of ALS-non-TDP drops down to 6 (2 FUS) because the ALS sample tissues are not balanced and not every participant has samples in every tissue)
####Inclusion reads by if TDP-potential####
boxplot_table = clean_data_table %>%
mutate(across(UNC13A_3prime_leaf:UNC13A_annotated_leaf, ~ .x / library_size,.names = "{.col}_library_norm")) %>%
filter(!tissue_clean %in% c("Choroid","Liver")) %>%
dplyr::select(sample,participant_id,mutations,disease_group2,pathology,tissue_clean,contains("_library_norm")) %>%
melt() %>%
filter(grepl("_3prime|_5prime_1",variable)) %>%
group_by(sample) %>%
mutate(inclusion_reads = sum(value)) %>%
ungroup() %>%
unique() %>%
mutate(junction_name = case_when(variable == "UNC13A_3prime_leaf_library_norm" ~ " Novel Donor",
variable == "UNC13A_5prime_1_leaf_library_norm" ~ " Short Novel Acceptor",
variable == "UNC13A_5prime_2_leaf_library_norm"~ "Long Novel Acceptor")) %>%
mutate(disease_tissue = case_when((grepl("FTLD",disease_group2) & grepl("Cortex",tissue_clean)) ~ T,
(grepl("ALS",disease_group2) & grepl("Cord|Motor",tissue_clean)) ~ T,
(grepl("Occipital",tissue_clean)) ~ F,
(grepl("Control",disease_group2) & grepl("Cord|Cortex",tissue_clean)) ~ T,
TRUE ~ F)) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean))
melt_count = clean_data_table[,.(sample,UNC13A_3prime_leaf,UNC13A_5prime_1_leaf,UNC13A_5prime_2_leaf)] %>% data.table::melt() %>% setnames(.,"value","orig_count")
Looking at disease tissue only, so just taking the cord and motor cortex in ALS and the frontal and temporal cortex of FTLD and then the cord and cortices in Controls.
clean_data_table %>%
filter(disease_tissue == T) %>%
mutate(inclusion_reads = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf) %>%
mutate(detected = inclusion_reads > 0) %>%
dplyr::select(participant_id,disease_group2,detected) %>%
unique() %>%
group_by(disease_group2) %>%
mutate(n_sample = n_distinct(participant_id)) %>%
mutate(n_sample_detected = sum(detected)) %>%
dplyr::select(disease_group2,n_sample,n_sample_detected) %>%
unique() %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
ggplot() +
geom_col(aes(x = detection_name, y = detection_rate)) +
ggpubr::theme_pubr() +
scale_y_continuous(lim = c(0,1),labels = scales::percent) +
ylab("Percent of Patients \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 24)) +
xlab("N individuals")
# library size
#llumina HiSeq 2500 (125 bp paired end) or an Illumina NovaSeq (100 bp paired end).
unc13a_sequencing_platform <- clean_data_table %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
mutate(inclusion_reads = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf) %>%
mutate(unc13a_detected = ifelse(inclusion_reads > 0, yes = "+", no = "-")) %>%
mutate(tissue = as.character(tissue)) %>%
filter(sequencing_platform != "") %>%
filter(disease_group2 %in% c("ALS-TDP","FTLD-TDP"))
quick_fisher = function(tbl){
if(length(unique(tbl$sequencing_platform)) < 2){
return(NA)
}
comparison_table = table(unc13a = tbl$unc13a_detected,
sequencing_platform = tbl$sequencing_platform)
p_value = fisher.test(comparison_table)$p.value
return(p_value)
}
unc13a_data_fisher = unc13a_sequencing_platform %>%
filter(tissue_clean != "Thoracic_Spinal_Cord") %>%
group_by(disease_group2,tissue_clean) %>%
nest() %>%
mutate(fisher_pvalue = map_dbl(data, quick_fisher))
unc13a_data_fisher$data = NULL
unc13a_sequencing_platform_pct = unc13a_sequencing_platform %>%
group_by(disease_group2,tissue_clean, sequencing_platform, unc13a_detected) %>% tally() %>%
spread(key = unc13a_detected, value = n,fill = 0) %>%
mutate(detection = `+` / ( `-` + `+`) ) %>%
mutate(total = ( `-` + `+`) ) %>%
mutate(plot_name = glue::glue("{sequencing_platform} \n ({total})")) %>%
left_join(unc13a_data_fisher)
Joining, by = c("disease_group2", "tissue_clean")
unc13a_sequencing_platform_pct %>%
ggplot(aes(x = plot_name, y = detection )) +
geom_col(fill = "firebrick") +
labs(title = "UNC13A CE detection and sequencing platform",
y = "UNC13A CE detected",x = element_blank(), subtitle = "Fisher exact test") +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free_x') +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
geom_text(aes(x = 1.5, y = 1.05, label = paste0("p = ", signif(fisher_pvalue,digits = 2) ) )) +
theme(legend.text = element_text(size = 15)) +
ggpubr::theme_pubr() +
scale_fill_manual(values = ("#b3251f"))
# scale_y_continuous(expand = c(0,0) )
Were there systemic differences in sequence platform between ALS and FTD?
tmp = clean_data_table %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
mutate(inclusion_reads = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf) %>%
mutate(unc13a_detected = ifelse(inclusion_reads > 0, yes = "+", no = "-")) %>%
mutate(tissue = as.character(tissue)) %>%
filter(sequencing_platform != "") %>%
filter(disease_group2 %in% c("ALS-TDP","FTLD-TDP")) %>%
dplyr::select(tissue_clean,disease_group2,sequencing_platform) %>%
group_by(tissue_clean) %>%
nest()
tmp %>%
mutate(chisq_res = map(data, ~ .x %>% table() %>% chisq.test() %>% pluck('residuals')))
# A tibble: 5 x 3
# Groups: tissue_clean [5]
tissue_clean data chisq_res
<chr> <list> <list>
1 Frontal_Cortex <tibble [199 × 2]> <table [2 × 2]>
2 Lumbar_Spinal_Cord <tibble [125 × 2]> <dbl [2]>
3 Cervical_Spinal_Cord <tibble [130 × 2]> <dbl [2]>
4 Motor_Cortex <tibble [201 × 2]> <dbl [2]>
5 Temporal_Cortex <tibble [52 × 2]> <table [2 × 2]>
# library size
plot_list = list()
lib_comps = c("Cervical_Spinal_Cord", "Frontal_Cortex",
"Lumbar_Spinal_Cord", "Temporal_Cortex",
"Motor_Cortex", "Cerebellum")
plot_list = list()
for(t in 1:length(lib_comps)){
plt = clean_data_table %>%
filter(tissue_clean == lib_comps[t]) %>%
mutate(inclusion_reads = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf) %>%
mutate(unc13a_detected = ifelse(inclusion_reads > 0, yes = "+", no = "-")) %>%
mutate(disease_group2 = fct_relevel(disease_group2, "Control")) %>%
ggplot(aes(x = disease_group2,y = log10(library_size),fill = disease_group2)) +
geom_boxplot(colour = "black", outlier.colour = NA) +
geom_point(pch = 21, size = 2, alpha = 0.9, position = position_jitter(width = 0.2, height = 0) ) +
facet_wrap(~ tissue_clean,scales = 'free_x') +
# ggpubr::stat_compare_means(label = "p.format", label.x.npc = "center", method = "wilcox" ) +
ggpubr::stat_compare_means(label = "p.signif",
label.x.npc = "center",
method = "wilcox",
comparisons = list(
c("Control","ALS-TDP"),
c("ALS \n non-TDP","ALS-TDP"))) +
ggpubr::stat_compare_means(label = "p.signif",
label.x.npc = "center",
method = "wilcox",
comparisons = list(
c("Control","FTLD-TDP"),
c("FTLD-TDP","FTLD \n non-TDP"))) +
labs(y = "log10 library size", fill= "UNC13A CE detected", x = element_blank()) +
theme(axis.text.x = element_text(size = 15)) +
ggpubr::theme_pubr() +
scale_fill_manual(values = force_colors) +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
legend.position = "none") +
ylab(element_blank())
plot_list[[t]] <- plt
}
cowplot::plot_grid(plotlist = plot_list,nrow = 3)
unc13a_library_depth <- clean_data_table %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
mutate(inclusion_reads = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf) %>%
mutate(unc13a_detected = ifelse(inclusion_reads > 0, yes = "+", no = "-")) %>%
filter(disease_group2 %in% c("ALS-TDP","FTLD-TDP"))
unc13a_library_depth %>%
group_by(disease_group2,tissue_clean) %>%
add_count() %>%
mutate(plot_name = glue::glue("{tissue_clean} \n {n}")) %>%
ggplot(aes(x = unc13a_detected, y = log10(library_size))) +
geom_boxplot(aes(fill = unc13a_detected), colour = "black", outlier.colour = NA) +
facet_wrap(~disease_group2+ plot_name) +
geom_jitter(height = 0,size = 2,alpha = 0.3) +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "center", method = "wilcox", label.y.npc = 0.95) +
ggpubr::theme_pubr() +
xlab("UNC13A CE Detected") +
scale_fill_manual(values = c("#EBC3B9","#b3251f")) +
theme(legend.position = 'none')
force_colors = c("#F8766D", "#A3A500", "#00BF7D", "#00B0F6", "#E76BF3")
names(force_colors) = c("Control","ALS \n non-TDP","ALS-TDP","FTLD \n non-TDP","FTLD-TDP")
plot_list = list()
lib_comps = c("Cervical_Spinal_Cord", "Frontal_Cortex",
"Lumbar_Spinal_Cord", "Temporal_Cortex",
"Motor_Cortex", "Cerebellum")
plot_list = list()
for(t in 1:length(lib_comps)){
groups = c("ALS-TDP", "Control", "ALS \n non-TDP", "FTLD-TDP", "FTLD \n non-TDP")
plt = clean_data_table %>%
filter(tissue_clean == lib_comps[t]) %>%
mutate(disease_group2 = fct_relevel(disease_group2, "Control")) %>%
filter(disease_group2 %in% groups) %>%
ggplot(aes(x = disease_group2,y = rin,fill =disease_group2 )) +
geom_boxplot(colour = "black", outlier.colour = NA) +
geom_point(pch = 21, size = 2, alpha = 0.9, position = position_jitter(width = 0.2, height = 0) ) +
facet_wrap(~ tissue_clean,scales = 'free_x') +
# ggpubr::stat_compare_means(label = "p.format", label.x.npc = "center", method = "wilcox" ) +
ggpubr::stat_compare_means(label = "p.signif",
label.x.npc = "center",
method = "wilcox",
comparisons = list(
c("Control","ALS-TDP"),
c("ALS \n non-TDP","ALS-TDP"))) +
ggpubr::stat_compare_means( label = "p.signif",
label.x.npc = "center",
method = "wilcox",
comparisons = list(
c("Control","FTLD-TDP"),
c("FTLD-TDP","FTLD \n non-TDP"))) +
labs(y = "RIN", x = element_blank()) +
theme(axis.text.x = element_text(size = 15)) +
ggpubr::theme_pubr() +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
legend.position = "none") +
scale_color_manual(values = force_colors) +
scale_fill_manual(values = force_colors)
plot_list[[t]] <- plt
}
cowplot::plot_grid(plotlist = plot_list,ncol = 2)
unc13a_rin <- clean_data_table %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
mutate(inclusion_reads = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf) %>%
mutate(unc13a_detected = ifelse(inclusion_reads > 0, yes = "+", no = "-")) %>%
filter(disease_group2 %in% c("ALS-TDP","FTLD-TDP")) %>%
filter(rin != "")
unc13a_rin %>%
group_by(disease_group2,tissue_clean) %>%
add_count() %>%
mutate(plot_name = glue::glue("{tissue_clean} \n {n}")) %>%
ggplot(aes(x = unc13a_detected, y = rin)) +
geom_boxplot(aes(fill = unc13a_detected), colour = "black", outlier.colour = NA) +
facet_wrap(~disease_group2+ plot_name) +
ylab("RIN") +
geom_jitter(height = 0,size = 2,alpha = 0.3) +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "center", method = "wilcox", label.y.npc = 0.95) +
ggpubr::theme_pubr() +
xlab("UNC13A CE Detected") +
scale_fill_manual(values = c("#EBC3B9","#b3251f")) +
theme(legend.position = 'none')
## effect of RIN?
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP", "ALS-TDP")) %>%
add_count(tissue_clean,disease_group2) %>%
mutate(plot_name = glue::glue("{disease_group2} \n ({n})")) %>%
mutate(inclusion_reads = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf) %>%
mutate(unc13a_detected = ifelse(inclusion_reads > 0, yes = "+", no = "-") ) %>%
ggplot(aes(x = plot_name, fill = unc13a_detected,y = UNC13B_TPM )) +
geom_point(size = 2,show.legend = F,pch = 21, position = position_jitterdodge(dodge.width=1,jitter.width = 0.3))+
geom_boxplot(outlier.colour = NA) +
labs(title = "UNC13A CE detection and UNC13B TPM",subtitle = "Wilcox test") +
facet_wrap(~ tissue_clean,scales = 'free') +
ggpubr::stat_compare_means(label = "p.signif") +
labs(y = "UNC13B TPM", x = "UNC13A CE detection") +
theme(axis.text.x = element_text(size = 15)) +
ggpubr::theme_pubr() +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "center",label.y.npc = 0.9)
force_colors = c("#F8766D", "#A3A500", "#00BF7D", "#00B0F6", "#E76BF3")
names(force_colors) = c("Control","ALS \n non-TDP","ALS-TDP","FTLD \n non-TDP","FTLD-TDP")
plot_list = list()
lib_comps = c("Cervical_Spinal_Cord", "Frontal_Cortex",
"Lumbar_Spinal_Cord", "Temporal_Cortex",
"Motor_Cortex", "Cerebellum")
cell_types = fread(paste0(here::here(),"/data/All_1917_samples_Darmanis_dtangle_deconv.tsv"))
for(t in 1:length(lib_comps)){
groups = c("ALS-TDP", "Control", "ALS \n non-TDP", "FTLD-TDP", "FTLD \n non-TDP")
plt = cell_types %>%
left_join(clean_data_table[,.(sample,
UNC13A_3prime_leaf,
UNC13A_5prime_1_leaf,
disease_group2,
disease_tissue)], by = 'sample') %>%
filter(!is.na(disease_group2)) %>%
unique() %>%
filter(tissue_clean == lib_comps[t]) %>%
filter(disease_group2 %in% groups) %>%
mutate(disease_group2 = fct_relevel(disease_group2, "Control")) %>%
ggplot(aes(x = disease_group2,y = deconv,fill =disease_group2 )) +
geom_boxplot(colour = "black", outlier.colour = NA) +
facet_wrap(~cell,scales = 'free_x',nrow = 1) +
ggpubr::stat_compare_means(label = "p.signif",
label.x.npc = "center",
method = "wilcox",
comparisons = list(
c("Control","FTLD-TDP"),
c("FTLD-TDP","FTLD \n non-TDP"))) +
ggpubr::stat_compare_means(label = "p.signif",
label.x.npc = "center",
method = "wilcox",
comparisons = list(
c("Control","ALS-TDP"),
c("ALS \n non-TDP","ALS-TDP"))) +
ggtitle(lib_comps[t]) +
labs(fill = element_blank(),x = element_blank()) +
geom_point(pch = 21, size = 2, alpha = 0.9, position = position_jitter(width = 0.2, height = 0) ) +
ggpubr::theme_pubr() +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
legend.position = "none") +
scale_fill_manual(values = force_colors)
plot_list[[t]] <- plt
}
cowplot::plot_grid(plotlist = plot_list,ncol = 2)
cell_types = fread("/Users/annaleigh/Documents/GitHub/unc13a_cryptic_splicing/data/All_1917_samples_Darmanis_dtangle_deconv.tsv")
cell_types %>%
left_join(clean_data_table[,.(sample,
UNC13A_3prime_leaf,
UNC13A_5prime_1_leaf,
disease_group2,
disease_tissue)], by = 'sample') %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
mutate(inclusion_reads = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf) %>%
mutate(unc13a_detected = ifelse(inclusion_reads > 0, yes = "+", no = "-")) %>%
filter(disease_group2 %in% c("ALS-TDP","FTLD-TDP")) %>%
group_by(disease_group2,tissue_clean) %>%
add_count() %>%
mutate(n = n / 5) %>%
mutate(plot_name = glue::glue("{tissue_clean} \n {n}")) %>%
filter(!is.na(inclusion_reads)) %>%
unique() %>%
ggplot(aes(x = cell, y = deconv, fill = unc13a_detected)) +
# geom_point(size = 2,show.legend = F,pch = 21, position = position_jitterdodge(dodge.width=1,jitter.width = 0.3))+
geom_boxplot(outlier.color = NA) +
facet_wrap(~disease_group2+plot_name,scales = 'free') +
ggpubr::stat_compare_means(label = "p.format",vjust = 1.3) +
ggpubr::theme_pubr() +
scale_fill_manual(values = c("#EBC3B9","#b3251f")) +
theme(legend.position = 'none') +
xlab(element_blank())
boxplot_table %>%
filter(!(tissue_clean %in% c("Cerebellum","Hippocampus","Occipital Cortex"))) %>%
mutate(detected = inclusion_reads > 0) %>%
dplyr::select(sample,disease_group2,detected,tissue_clean) %>%
unique() %>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(n_sample_detected = sum(detected)) %>%
dplyr::select(tissue_clean,disease_group2,n_sample,n_sample_detected) %>%
unique() %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
ungroup() %>%
filter(grepl("ALS-TDP|FTLD-T",disease_group2)) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Motor Cortex",
"Thoracic Spinal Cord")) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(cortex = ifelse(grepl(" Cord",tissue_clean),"cot","cor")) %>%
ggplot() +
geom_col(aes(x = detection_name,
y = detection_rate,
fill = detection_name),color = 'black',position = position_dodge2(width = 0.8, preserve = "single")) +
ggpubr::theme_pubr() +
ylab("Percent of Tissues \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N tissues") +
facet_wrap(~tissue_clean,scales = 'free_x',nrow = 3) +
scale_y_continuous(lim = c(0,1),labels = scales::percent,expand = c(0,0) ) +
theme(axis.text.x = element_text(size = 14)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
scale_fill_manual(values = c("#00bd7dff","#00bd7dff","#00bd7dff","#00bd7dff","#00bd7dff","red","#e668f3ff","#e668f3ff")) +
theme(legend.position = "none")
boxplot_table %>%
left_join(clean_data_table %>% dplyr::select(sample, rs12973192)) %>%
unique() %>%
filter(!(tissue_clean %in% c("Cerebellum","Hippocampus","Occipital Cortex"))) %>%
mutate(detected = inclusion_reads > 0) %>%
dplyr::select(sample,disease_group2,detected,tissue_clean,rs12973192) %>%
unique() %>%
group_by(disease_group2,tissue_clean,rs12973192) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(n_sample_detected = sum(detected)) %>%
dplyr::select(tissue_clean,disease_group2,n_sample,n_sample_detected) %>%
unique() %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
ungroup() %>%
filter(grepl("ALS-TDP|FTLD-T",disease_group2)) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Motor Cortex",
"Thoracic Spinal Cord")) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(cortex = ifelse(grepl(" Cord",tissue_clean),"cot","cor")) %>%
ggplot() +
geom_col(aes(x = detection_name, y = detection_rate,fill = rs12973192),position = position_dodge2(width = 0.8, preserve = "single")) +
ggpubr::theme_pubr() +
ylab("Percent of Tissues \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N tissues") +
facet_wrap(~tissue_clean,scales = 'free_x',nrow = 3) +
scale_y_continuous(lim = c(0,1),labels = scales::percent,expand = c(0,0) ) +
theme(axis.text.x = element_text(size = 14)) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
theme(legend.position = "bottom")
Joining, by = "sample"
Adding missing grouping variables: `rs12973192`
This only plots the short novel acceptor and novel donor
star_maker = function (p.value)
{
unclass(symnum(p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.0001,
0.001, 0.01, 0.05, 0.1, 1), symbols = c("****","***", "**",
"*", ".", " ")))
}
only_short_plotter = boxplot_table %>%
filter(tissue_clean %in%
c("Frontal Cortex",
"Lumbar Spinal Cord",
"Cervical Spinal Cord",
"Motor Cortex",
"Temporal Cortex",
"Cerebellum") )%>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex",
"Motor Cortex"))
only_short_plotter %>%
dplyr::select(sample,tissue_clean,inclusion_reads,disease_group2) %>%
mutate(disease_group2 = gsub("\n", "",disease_group2)) %>%
unique() %>%
group_by(tissue_clean) %>%
nest() %>%
mutate(wilcox_res = map(data, ~pairwise.wilcox.test(.x$inclusion_reads,.x$disease_group2) %>% broom::tidy())) %>%
dplyr::select(-data) %>%
unnest() %>%
mutate(star_plot = star_maker(p.value)) %>% fwrite("~/Desktop/stars_for_weaverly_fig3b.csv")
only_short_plotter %>%
ggplot(aes(x = detection_name,
y = inclusion_reads * 10^6,
fill ="red")) +
geom_boxplot(show.legend = F,position = position_dodge(preserve = "single",width = 1),outlier.colour = NA) +
geom_point(size = 2,show.legend = F,pch = 21, position = position_jitterdodge(dodge.width=1,jitter.width = 0.3))+
ggplot2::facet_wrap(vars(tissue_clean),scales = "free_x",nrow = 3) +
ylab("UNC13A cryptic \n reads per million") +
theme(text = element_text(size = 12)) +
xlab("") +
scale_fill_manual(values = colorblind_pal()(4)[2:4]) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
theme(legend.position = "bottom")
clean_data_table %>%
mutate(across(UNC13A_3prime_leaf:UNC13A_annotated_leaf, ~ .x / library_size,.names = "{.col}_library_norm")) %>%
filter(!tissue_clean %in% c("Choroid","Liver")) %>%
dplyr::select(sample,participant_id,mutations,disease_group2,pathology,tissue_clean,contains("_library_norm")) %>%
melt() %>%
filter(grepl("_3prime|_5prime_1|_5prime_2",variable)) %>%
# filter(grepl("_5prime_2",variable)) %>%
unique() %>%
mutate(junction_name = case_when(variable == "UNC13A_3prime_leaf_library_norm" ~ " Novel Donor",
variable == "UNC13A_5prime_1_leaf_library_norm" ~ " Short Novel Acceptor",
variable == "UNC13A_5prime_2_leaf_library_norm"~ "Long Novel Acceptor")) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
dplyr::select(sample,disease_group2,
tissue_clean,
junction_name,
value) %>%
unique() %>%
filter(tissue_clean %in%
c("Frontal Cortex",
"Lumbar Spinal Cord",
"Cervical Spinal Cord",
"Motor Cortex",
"Temporal Cortex",
"Cerebellum"))%>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex",
"Motor Cortex")) %>%
ggplot(aes(x = detection_name,
y = value * 10^6,
fill = junction_name)) +
geom_boxplot(show.legend = F,position = position_dodge(preserve = "single")) +
geom_point(size = 2,pch = 21, alpha = 0.7, position = position_jitterdodge(jitter.width = 0.2))+
scale_y_log10() +
ggplot2::facet_wrap(vars(tissue_clean),scales = "free_x",nrow = 3) +
ylab("UNC13A cryptic \n reads per million") +
theme(text = element_text(size = 12)) +
xlab("") +
scale_fill_manual(values = colorblind_pal()(4)[2:4]) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
theme(legend.position = "bottom")
disease_comparisons = list( c("Control","ALS-TDP"),
c("Control","ALS \n non-TDP"),
c("Control","FTLD-TDP"),
c("Control","FTLD \n non-TDP" ))
table_to_test = boxplot_table %>%
filter(disease_tissue == T) %>%
group_by(disease_group2) %>%
mutate(n_sample = n_distinct(sample)) %>%
mutate(disease_group2 = gsub("Control"," Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
dplyr::select(detection_name,
participant_id,
inclusion_reads,
disease_group2,
tissue_clean,
disease_tissue) %>%
unique()
test_pair = pairwise.wilcox.test(table_to_test$inclusion_reads, table_to_test$detection_name,
p.adjust.method = "BH") %>% broom::tidy()
test_pair = test_pair %>%
mutate(p_value_draw = case_when(p.value < 0.0001~ "***",
p.value < 0.01 ~ "**",
p.value < 0.05 ~ "*",
TRUE ~ paste0("Adj. p-value \n",as.character(round(p.value,2))))) %>%
mutate(y.position = seq(0.25,by = 0.1,length.out = 7))
table_to_test %>%
ggplot(aes(x = detection_name, y = inclusion_reads * 10^6)) +
geom_boxplot() +
geom_jitter(height = 0) +
scale_y_log10() +
ggpubr::theme_pubr() +
ylab("UNC13A cryptic inclusion \n reads per million") +
theme(text = element_text(size = 24)) +
xlab("N samples") +
stat_pvalue_manual(test_pair %>% filter(p.value < 0.05),
label = "p_value_draw",size = 8) +
stat_compare_means(size = 8)
comps = clean_data_table %>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>% pull(detection_name) %>%
unique() %>%
combn(.,2,simplify = F)
clean_data_table %>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
filter(tissue_clean %in%
c("Frontal Cortex",
"Lumbar Spinal Cord",
"Cervical Spinal Cord",
"Motor Cortex",
"Temporal Cortex",
"Cerebellum") )%>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex",
"Motor Cortex")) %>%
ggplot(aes(x = detection_name, y = UNC13A_TPM)) +
ggpubr::theme_pubr() +
geom_boxplot(aes(fill = disease_group2)) +
geom_jitter(pch = 21, color = 'black',width = 0.2,height = 0,aes(fill = disease_group2)) +
ylab("UNC13A TPM") +
xlab("") +
guides(color = FALSE) +
theme(text = element_text(size = 20)) +
facet_wrap(~tissue_clean, scales = 'free',nrow = 3) +
theme(legend.position = 'none') +
stat_compare_means(comparisons = comps)
plot_by_tissue = function(tissue_name,dt){
only_tissue = dt %>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
filter(tissue_clean == tissue_name) %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean))
unique_x_values = only_tissue %>%
pull(detection_name) %>%
unique()
comparisons = combn(unique_x_values,2,simplify = F)
the_plot = only_tissue %>%
ggplot(aes(x = detection_name, y = UNC13A_TPM)) +
ggpubr::theme_pubr() +
geom_boxplot(aes(fill = disease_group2)) +
geom_jitter(pch = 21, color = 'black',width = 0.2,height = 0,aes(fill = disease_group2)) +
ylab("UNC13A TPM") +
xlab("") +
guides(color = FALSE) +
theme(text = element_text(size = 20)) +
facet_wrap(~tissue_clean, scales = 'free',nrow = 3) +
theme(legend.position = 'none') +
stat_compare_means() +
stat_compare_means(label = "p.signif",comparisons = comparisons,)
return(the_plot)
}
dis_tis = clean_data_table[disease_tissue == T,unique(tissue_clean)]
dis_tis = c("Cerebellum",dis_tis)
my_plots = purrr::map(dis_tis,plot_by_tissue, clean_data_table)
ggarrange(plotlist=my_plots)
for_stats = clean_data_table %>%
filter(tissue_clean %in%
c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex",
"Cerebellum") )%>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Motor_Cortex",
"Frontal_Cortex",
"Temporal_Cortex",
"Cervical_Spinal_Cord",
"Lumbar_Spinal_Cord")) %>%
mutate(all_the_fse_reads = (((UNC13B_nmd_junction_5prime + UNC13B_nmd_junction_3prime)/ library_size) * 10^6)) %>%
dplyr::select(tissue_clean,all_the_fse_reads,disease_group2) %>%
group_by(tissue_clean) %>%
nest()
stars_table = for_stats %>%
mutate(wilcox_test = map(data, ~{pairwise.wilcox.test(.x$all_the_fse_reads,.x$disease_group2) %>% broom::tidy()})) %>%
dplyr::select(-data) %>%
unnest() %>%
ungroup() %>%
mutate(stars = gtools::stars.pval(p.value = p.value)) %>%
filter(p.value < 0.05) %>%
mutate(y.position = seq(from = 1.5,to = 2.6,length.out = 11))
clean_data_table %>%
filter(tissue_clean %in%
c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex",
"Cerebellum") )%>%
group_by(disease_group2,tissue_clean) %>%
mutate(n_sample = n_distinct(sample)) %>%
ungroup() %>%
mutate(disease_group2 = gsub("Control", " Control",disease_group2)) %>%
mutate(detection_name = glue::glue("{disease_group2} \n ( {n_sample} )")) %>%
mutate(tissue_clean = fct_relevel(tissue_clean, "Motor_Cortex",
"Frontal_Cortex",
"Temporal_Cortex",
"Cervical_Spinal_Cord",
"Lumbar_Spinal_Cord")) %>%
mutate(all_the_fse_reads = (((UNC13B_nmd_junction_5prime + UNC13B_nmd_junction_3prime)/ library_size) * 10^6)) %>%
ggplot(aes(x = disease_group2,
y =all_the_fse_reads,
fill ="red")) +
geom_boxplot(show.legend = F,position = position_dodge(preserve = "single",width = 1),outlier.colour = NA) +
geom_point(size = 2,show.legend = F,pch = 21, position = position_jitterdodge(dodge.width=1,jitter.width = 0.3))+
ggplot2::facet_wrap(~tissue_clean,scales = "free",nrow = 2) +
ylab("UNC13B fsE \n reads per million") +
theme(text = element_text(size = 12)) +
xlab("") +
scale_fill_manual(values = colorblind_pal()(4)[2:4]) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 18)) +
theme(legend.position = "bottom") +
stat_pvalue_manual(stars_table, label = "{stars}")
stat_test_a = clean_data_table %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
unique() %>%
group_by(tissue_clean) %>%
wilcox_test(IRratioUNC13A ~ disease_group2) %>%
add_significance("p")
stat_test_b = clean_data_table %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
unique() %>%
group_by(tissue_clean) %>%
wilcox_test(IRratioUNC13B ~ disease_group2) %>%
add_significance("p")
clean_data_table %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
unique() %>%
mutate(disease_group2 = fct_relevel(disease_group2, "Control")) %>%
ggplot(aes(x = disease_group2,
y = IRratioUNC13A)) +
geom_boxplot(aes(fill = disease_group2),outlier.shape = NA) +
geom_jitter(aes(fill = disease_group2),
pch = 21,height = 0,
width = 0.2,size = 3,alpha = 0.8) +
facet_wrap(~tissue_clean,scales = 'free_x') +
stat_pvalue_manual(stat_test_a %>% filter(group1 == "Control" & group2 == "FTLD-TDP"),
label = "p.signif",y.position = 0.98) +
stat_pvalue_manual(
stat_test_a %>% filter(group1 == "ALS-TDP" & group2 == "Control"),
label = "p.signif",y.position = 0.88) +
ylab("UNC13A IRratio") +
xlab(element_blank())+
scale_fill_manual(values = force_colors) +
ggpubr::theme_pubr() +
theme(legend.position = 'none')
clean_data_table %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
unique() %>%
mutate(disease_group2 = fct_relevel(disease_group2, "Control")) %>%
ggplot(aes(x = disease_group2,
y = IRratioUNC13B)) +
geom_boxplot(aes(fill = disease_group2),outlier.shape = NA) +
geom_jitter(aes(fill = disease_group2),
pch = 21,height = 0,
width = 0.2,size = 3,alpha = 0.8) + facet_wrap(~tissue_clean,scales = 'free_x') +
stat_pvalue_manual(stat_test_b %>% filter(group1 == "Control" & group2 == "FTLD-TDP"),
label = "p.signif",y.position = 0.98) +
stat_pvalue_manual(
stat_test_b %>% filter(group1 == "ALS-TDP" & group2 == "Control"),
label = "p.signif",y.position = 0.88) +
ylab("UNC13B IRratio") +
xlab(element_blank()) +
scale_fill_manual(values = force_colors) +
ggpubr::theme_pubr() +
theme(legend.position = 'none')
only in ALS/FTLD-TDP
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = UNC13A_TPM)) +
geom_point() +
stat_cor(size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A TPM") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
ylim(0,50)
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = UNC13A_TPM)) +
geom_point() +
stat_cor(size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A TPM") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
facet_wrap(~tissue_clean,scales = "free_y")
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(UNC13A_annotated_leaf > 15) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(unc13a_cryptic_leaf_psi > 0) %>%
ggplot(aes(x = unc13a_cryptic_leaf_psi, y = UNC13B_TPM)) +
geom_point() +
stat_cor(label.y = 20,size = 8,method = 's',cor.coef.name = 'rho') +
geom_smooth(method = lm, se = FALSE,color = "black") +
ylab("UNC13A TPM") +
xlab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 22)) +
scale_x_continuous(labels = scales::percent)
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(UNC13A_annotated_leaf > 15) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
ggplot(aes(x = unc13a_cryptic_leaf_psi, y = UNC13A_TPM, color = disease_group2)) +
geom_point() +
stat_cor(size = 8) +
geom_smooth(method = lm, se = FALSE,color = "black") +
ylab("UNC13A TPM") +
xlab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
facet_wrap(~tissue_clean,scales = 'free') +
scale_x_continuous(labels = scales::percent)
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
# filter(UNC13A_3prime_leaf + UNC13A_5prime_1_leaf >= 2) %>%
# filter(STMN2_cryptic_leaf >= 2) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = unc13a_cryptic_leaf_psi)) +
geom_point(pch = 21, size = 3) +
stat_cor(size = 8,method = "spearman",cor.coef.name = "rho") +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
scale_x_continuous(labels = scales::percent_format(accuracy = 1L))
`geom_smooth()` using formula 'y ~ x'
## Scatter plot showing the correlation in non-log space RAP1GAP1 & STMN2 CE
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
# filter(UNC13A_3prime_leaf + UNC13A_5prime_1_leaf >= 2) %>%
# filter(STMN2_cryptic_leaf >= 2) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = unc13a_cryptic_leaf_psi)) +
geom_point() +
stat_cor(size = 8,method = "spearman",cor.coef.name = "rho") +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A CE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
scale_x_continuous(labels = scales::percent_format(accuracy = 1L))
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
mutate(junction_reads_unc13b = UNC13B_nmd_junction_5prime + UNC13B_annotated + UNC13B_nmd_junction_3prime) %>%
filter(junction_reads_unc13b >= 30) %>%
# filter(UNC13A_3prime_leaf + UNC13A_5prime_1_leaf >= 2) %>%
# filter(STMN2_cryptic_leaf >= 2) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0) %>%
ggplot(aes(x = unc13a_cryptic_leaf_psi, y = unc13b_nmd_psi)) +
geom_point() +
stat_cor(size = 8,method = "spearman",cor.coef.name = "rho") +
geom_smooth(method = lm, se = FALSE,color = "black") +
xlab("UNC13A CE PSI") +
ylab("UNC13B fsE PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
scale_x_continuous(labels = scales::percent_format(accuracy = 1L))
`geom_smooth()` using formula 'y ~ x'
clean_data_table %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
filter(tissue_clean %in%
c("Frontal Cortex",
"Lumbar Spinal Cord",
"Cervical Spinal Cord",
"Motor Cortex",
"Temporal Cortex",
"Cerebellum") )%>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(disease_tissue == T) %>%
filter(UNC13A_3prime_leaf + UNC13A_5prime_1_leaf >= 1) %>%
filter(STMN2_cryptic_leaf >= 1) %>%
dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
melt() %>%
mutate(tissue_clean = fct_relevel(tissue_clean,"Motor Cortex", "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex")) %>%
mutate(variable = ifelse(grepl("stmn_2",variable), "STMN2", "UNC13A")) %>%
ggplot(aes(x = variable, y = value)) +
geom_boxplot(aes(color = variable),outlier.shape = NA) +
geom_jitter(aes(fill = variable),pch = 21,height = 0,width = 0.2,size = 3) +
scale_fill_manual(values = c("#008574","#cd5582")) +
scale_color_manual(values = c("#008574","#cd5582")) +
facet_wrap(~tissue_clean,nrow = 3, scales = 'free') +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("CE PSI") +
theme(legend.position = 'none') +
stat_compare_means(label = 'p.signif')
clean_data_table %>%
filter(UNC13A_annotated_leaf > 10) %>%
filter(STMN2_annotated_leaf > 10) %>%
# filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
# filter(unc13a_cryptic_leaf_psi > 0) %>%
# filter(disease_tissue == T) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
filter(tissue_clean %in% c("Motor Cortex", "Cervical Spinal Cord", "Frontal Cortex", "Lumbar Spinal Cord", "Temporal Cortex")) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_rat = log2((unc13a_cryptic_leaf_psi + 0.1) / (stmn_2_cryptic_psi_leaf + 0.1))) %>%
group_by(tissue_clean, disease_group2) %>%
mutate(average_log2Fold = mean(log2_rat)) %>%
ungroup() %>%
dplyr::select(-log2_rat) %>%
pivot_longer(cols = c("stmn_2_cryptic_psi_leaf","unc13a_cryptic_leaf_psi")) %>%
mutate(tissue_clean = fct_reorder(tissue_clean,average_log2Fold)) %>%
mutate(variable = ifelse(grepl("stmn_2",name), "STMN2", "UNC13A")) %>%
ggplot(aes(x = tissue_clean, y = value,fill = name)) +
geom_boxplot() +
geom_point(pch = 21,height = 0,width = 0.2,position = position_jitterdodge()) +
scale_fill_manual(values = c("#004D40","#882255")) +
facet_wrap(~tissue_clean+disease_group2,nrow = 1,scales = 'free') +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'none')
clean_data_table %>%
# filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
# filter(unc13a_cryptic_leaf_psi > 0) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
melt() %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
mutate(tissue_clean = fct_relevel(tissue_clean,"Motor Cortex", "Cervical Spinal Cord",
"Frontal Cortex",
"Lumbar Spinal Cord",
"Temporal Cortex")) %>%
mutate(variable = ifelse(grepl("stmn_2",variable), "STMN2", "UNC13A")) %>%
ggplot(aes(x = variable, y = value)) +
geom_boxplot(aes(color = variable)) +
geom_jitter(aes(fill = variable),pch = 21,height = 0,width = 0.2,size = 3) +
scale_fill_manual(values = c("#004D40","#882255")) +
scale_color_manual(values = c("#004D40","#882255")) +
facet_wrap(~tissue_clean+disease_group2,nrow = 3,scales = 'free') +
scale_y_continuous(labels = scales::percent) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'none')
clean_data_table %>%
filter(UNC13A_annotated_leaf > 10) %>%
filter(STMN2_annotated_leaf > 10) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,disease_group2,tissue_clean,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf)))%>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
# mutate(variable = ifelse(grepl("stmn_2",variable), "STMN2", "UNC13A")) %>%
ggplot(aes(x = tissue_clean,y = log2_unc, fill = tissue_clean)) +
geom_boxplot(position = 'dodge') +
# geom_jitter(height = 0,width = 0.2,pch = 21) +
facet_wrap(~disease_group2,scales = "free_x") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'top') +
theme(legend.title=element_blank()) +
stat_compare_means(comparisons = list(c("STMN2","UNC13A")))
rs12973192_comparisons = clean_data_table %>%
filter(junction_reads_unc13a >=30) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )"))
comp_for_gg = rs12973192_comparisons %>% pull(detection_name) %>%
unique() %>%
combn(.,2,simplify = F)
ce_psi = rs12973192_comparisons %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = detection_name, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
ylab("UNC13A PSI") +
xlab(element_blank()) +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means() +
theme(legend.position = "none") +
stat_compare_means(comparisons = comp_for_gg,label = "p.signif") +
scale_y_continuous(trans='log10',labels = scales::percent_format(accuracy = 1L)) +
ggtitle("CE SNP (rs12973192)")
rs12608932_comparisons = clean_data_table %>%
filter(junction_reads_unc13a >=30) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12608932,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12608932) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12608932} \n ( {n_sample} )"))
comp_for_gg = rs12608932_comparisons %>% pull(detection_name) %>%
unique() %>%
combn(.,2,simplify = F)
in_psi = rs12608932_comparisons %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = detection_name, fill = rs12608932)) +
geom_boxplot() +
geom_jitter( height = 0) +
ylab(element_blank()) +
xlab(element_blank()) +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(legend.position = "none") +
theme(text = element_text(size = 24)) +
stat_compare_means() +
stat_compare_means(comparisons = comp_for_gg,label = "p.signif") +
scale_y_continuous(trans='log10',labels = scales::percent_format(accuracy = 1L)) +
ggtitle("intronic SNP (rs12608932)")
ggpubr::ggarrange(ce_psi,in_psi)
genotype_comparisons = list(c("C/C", "C/G"), c("C/C", "G/G"), c("C/G", "G/G"))
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2,scales = 'free') +
ylab("UNC13A PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
genotype_comparisons = list(c("C/C", "C/G"), c("C/C", "G/G"), c("C/G", "G/G"))
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
ylab("UNC13A PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
ggplot(aes(y = unc13a_cryptic_leaf_psi, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
ylab("UNC13A PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
ggplot(aes(y = stmn_2_cryptic_psi_leaf, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2,scales = 'free') +
ylab("STMN2 PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
ggplot(aes(y = stmn_2_cryptic_psi_leaf, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
ylab("STMN2 PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(tissue_clean = gsub("_"," ",tissue_clean)) %>%
ggplot(aes(y = stmn_2_cryptic_psi_leaf, x = rs12973192, fill = rs12973192)) +
geom_boxplot() +
geom_jitter( height = 0) +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free') +
ylab("STMN2 PSI") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
stat_compare_means(comparisons = genotype_comparisons,label = "p.signif")
clean_data_table %>%
filter(!(rs12973192 == "C/G" & rs12608932 == "C/C")) %>%
mutate(snp_status = case_when(rs12973192 == "C/C" ~ "healthy/healthy",
rs12973192 == "C/G" ~ "healthy/risk",
rs12973192 == "G/G" ~ "risk/risk")) %>%
filter(disease_tissue == T) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(rs12973192 = fct_relevel(rs12973192,
"C/C", "C/G", "G/G")) %>%
filter(!grepl("Cord",tissue_clean)) %>%
mutate(log10_stmn2 = log10(stmn_2_cryptic_psi_leaf)) %>%
mutate(log10_unc13a = log10(unc13a_cryptic_leaf_psi)) %>%
ggpubr::ggscatter(.,
x = "log10_stmn2",
y = "log10_unc13a",
fill = 'snp_status',
color = 'black',
size = 3,shape = 21,
) +
ylab("UNC13A CE PSI ") +
xlab("STMN2 Cryptic PSI ") +
geom_smooth(method = 'lm',aes(color = snp_status),se = F) +
stat_cor(aes( x = stmn_2_cryptic_psi_leaf,
y = unc13a_cryptic_leaf_psi,
color = snp_status),
method = 'spearman',
cor.coef.name = 'rho',
show.legend = FALSE,
size = 14) +
scale_color_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
theme(legend.title = element_blank()) +
theme(text = element_text(size = 24),legend.text = element_text(size = 32)) +
ylab(expression(paste("Lo", g[10], "UNC13A CE PSI"))) +
xlab(expression(paste("Lo", g[10], "STMN2 CE PSI")))
`geom_smooth()` using formula 'y ~ x'
detected_table_for_graph = clean_data_table %>%
filter(!(rs12973192 == "C/G" & rs12608932 == "C/C")) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,rs12608932,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
mutate(no_cong = ifelse(rs12608932 == "C/C" & rs12973192 == "C/G",8,3)) %>% as.data.table()
#get out that person who is discongruous
comparisons = combn(unique(detected_table_for_graph$detection_name),2,simplify = F)
detected_table_for_graph %>%
ggplot(aes(y = log2_unc, x = detection_name,fill = detection_name)) +
geom_boxplot(show.legend = F) +
geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) +
stat_compare_means() +
stat_compare_means(aes(group = detection_name),comparisons = comparisons,label = "p.signif") +
ylab("Ratio UNC13A CE PSI / STMN2 PSI") +
ylab(expression(paste("Lo", g[2], " fold UNC13A CE PSI / STMN2 CE PSI "))) +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
geom_hline(yintercept = 0,size = 1.5)
detected_table_for_graph = clean_data_table %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,rs12608932,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
mutate(no_cong = ifelse(rs12608932 == "C/C" & rs12973192 == "C/G",8,3))
comparisons = combn(unique(detected_table_for_graph$detection_name),2,simplify = F)
detected_table_for_graph %>%
ggplot(aes(y = log2_unc, x = detection_name,fill = detection_name)) +
geom_boxplot(show.legend = F) +
geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) +
stat_compare_means( label = "p.format") +
stat_compare_means(aes(group = detection_name),comparisons = comparisons,label = "p.signif") +
ylab("Ratio UNC13A CE PSI / STMN2 PSI") +
ylab(expression(paste("Lo", g[2], " fold UNC13A CE PSI / STMN2 CE PSI "))) +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
geom_hline(yintercept = 0,size = 1.5)
pretty_effect_plot = function(model, p_value_cutoff = 0.05, title = "Model 1"){
p_value = Tmisc::lmp(model)
r_squared = summary(model)$adj.r.squared
pretty_plot = broom::tidy(model) %>%
filter(p.value < p_value_cutoff) %>%
mutate(term = fct_reorder(term, -abs(estimate))) %>%
ggplot(aes(x = term, y = estimate,fill = estimate > 0 )) +
geom_col() +
coord_flip() +
ggtitle(title,subtitle = glue::glue("Model p-value: {p_value} \n Adj. R-squared: {r_squared}"))
}
detect2 = clean_data_table %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(disease_tissue == T) %>%
mutate(call = fct_relevel(rs12973192,
"C/C", "C/G", "G/G")) %>%
mutate(number_g_alleles = as.numeric(call)) %>%
mutate(age_at_death = as.numeric(age_at_death))
m1 = lm(unc13a_cryptic_leaf_psi ~ stmn_2_cryptic_psi_leaf *
number_g_alleles +
tissue_clean +
age_at_death +
sex,
detect2)
k = pretty_effect_plot(m1,p_value_cutoff = 0.05)
print(k)
detected_table_for_graph_rs12608932 = clean_data_table %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12608932,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(rs12608932) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12608932} \n ( {n_sample} )"))
comparisons = combn(unique(detected_table_for_graph_rs12608932$detection_name),2,simplify = F)
detected_table_for_graph_rs12608932 %>%
ggplot(aes(y = log2_unc, x = detection_name,fill = detection_name)) +
geom_boxplot(show.legend = F) +
geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) +
stat_compare_means( label = "p.format") +
stat_compare_means(aes(group = detection_name),comparisons = comparisons,label = "p.signif") +
ylab("Ratio UNC13A CE PSI / STMN2 PSI") +
ylab(expression(paste("Lo", g[2], " fold UNC13A CE PSI / STMN2 CE PSI "))) +
xlab("UNC13A rs12608932 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
geom_hline(yintercept = 0,size = 1.5)
str_genotype = janitor::clean_names(fread(file.path(here::here(),"data/rs56041637.genotypes.377samples.txt"))) %>%
dplyr::rename(participant_id = number_sample_id)
detected_table_for_graph = clean_data_table %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(junction_reads_unc13a >= 30) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,participant_id,tissue_clean,disease_group2,rs12973192,rs12608932,stmn_2_cryptic_psi_leaf,unc13a_cryptic_leaf_psi) %>%
mutate(log2_unc = log2((unc13a_cryptic_leaf_psi)/(stmn_2_cryptic_psi_leaf))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
left_join(str_genotype) %>%
filter(!is.na(genotype_ci)) %>% unique()
Joining, by = "participant_id"
comparisons = combn(unique(detected_table_for_graph$genotype),2,simplify = F)
detected_table_for_graph %>%
ggplot(aes(y = log2_unc, x = genotype,fill = genotype)) +
geom_boxplot(show.legend = F) +
geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) +
stat_compare_means( label = "p.format") +
stat_compare_means(aes(group = detection_name),comparisons = comparisons,label = "p.signif") +
ylab("Ratio UNC13A CE PSI / STMN2 PSI") +
ylab(expression(paste("Lo", g[2], " fold UNC13A CE PSI / STMN2 CE PSI "))) +
xlab("UNC13A STR Genotype") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
geom_hline(yintercept = 0,size = 1.5)
clean_data_table %>%
mutate(presumed_unique_id = gsub("-b38","",participant_id)) %>%
select(participant_id,presumed_unique_id,rs12973192,rs12608932) %>%
unique() %>%
left_join(str_genotype) %>%
unique() %>%
mutate(genotype = fct_relevel(genotype,"6/6", "6/7", "6/8", "6/9",
"6/10","6/12","7/8","8/8","8/10","9/10")) %>%
ggplot(aes(x = genotype,fill = rs12973192)) +
geom_bar() +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ylab("N individuals") +
xlab("STR genotype")
Joining, by = "participant_id"
overall_fisher = clean_data_table %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
mutate(unc13a_detected = unc13a_cryptic_leaf_psi > 0) %>%
dplyr::select(participant_id,unc13a_detected,rs12973192) %>%
unique() %>%
group_by(rs12973192) %>%
mutate(n_sample = n_distinct(participant_id)) %>%
mutate(n_sample_detected = sum(unc13a_detected)) %>%
dplyr::select(rs12973192,n_sample,n_sample_detected) %>%
unique() %>%
mutate(n_non_detected = n_sample - n_sample_detected) %>%
dplyr::select(-n_sample)
over_p = overall_fisher %>%
column_to_rownames('rs12973192') %>%
chisq.test() %>%
broom::tidy() %>%
.$p.value
overall_fisher %>%
mutate(n_sample = n_non_detected + n_sample_detected) %>%
mutate(detection_rate = n_sample_detected / n_sample) %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
ggplot(aes(x = detection_name, y = detection_rate, fill = detection_name)) +
geom_col(show.legend = F) +
ggpubr::theme_pubr() +
scale_y_continuous(labels = scales::percent) +
ylab("% of TDP-43 Proteionopathy Patients \n UNC13A Cryptic Detected") +
theme(text = element_text(size = 18)) +
xlab("N individuals") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggtitle(label = "ALS/FTD-TDP patients",subtitle = glue::glue("Chi-squared Test: p-value {round(over_p,2)}"))
Although this difference is not significant, with the Fisher’s exact giving a p-value of 0.27.
Across our KD’s, we observed a clear allelic bias in the SK-DZ-N cells
kd_vafs = fread(file.path(here::here(),"data","all_kds_unc13.snp.out.tsv"))
kd_vafs %>%
left_join(rel_rna_cryptic_amount) %>%
filter(!is.na(condition)) %>%
mutate(VAF = alt_reads / total) %>%
filter(condition != "control" ) %>%
filter(alt_reads > 5) %>%
ggplot(aes(x = stmn_2_cryptic_psi,y = cryptic_psi, fill = -(VAF - 0.5),size = total)) +
geom_point(pch = 21) +
coord_fixed() +
ylim(0,0.8) +
xlim(0,0.8) +
geom_abline() +
scale_fill_gradient2()
Joining, by = "sample"
Also look at the coverage in the Liu nuclear facs sorted neurons
liu_vafs = fread(file.path(here::here(),"data","liu_facs_neurons_unc13.snp.out.tsv"))
liu_stmn2 = fread(file.path(here::here(),"data","liu_stmn2_and_unc13aaggregated.clean.annotated.bed"))
liu_stmn2[,sample := gsub(".SJ.out","",V4)]
liu_stmn2 = liu_stmn2 %>%
unique() %>%
pivot_wider(id_cols = 'sample',
names_from = "V7",
values_from = 'V5',
values_fill = 0)
liu_stmn2 <- liu_stmn2 %>%
mutate(stmn2_psi = STMN2_cryptic / (STMN2_cryptic + STMN2_annotated)) %>%
mutate(unc13a_psi = (UNC13A_3prime + UNC13A_5prime + UNC13A_5prime_2)/ (UNC13A_annotated + UNC13A_3prime + UNC13A_5prime + UNC13A_5prime_2))
liu_vafs %>%
mutate(VAF = alt_reads / total) %>%
data.table::melt(id.vars = c("sample",'VAF','total')) %>%
mutate(allele = ifelse(variable == "ref_reads", "C","G")) %>%
mutate(sample_name = sub("_TDP_.*", "", sample)) %>%
mutate(sample_name = gsub("_unsorted", "", sample_name)) %>%
mutate(condition = sub(".*_", "", sample)) %>%
ggplot(aes(x = condition,y = value, fill = allele)) +
geom_col(pch = 21, size = 4) +
facet_wrap(~sample_name,scales = "free")
liu_stmn2 %>%
dplyr::select(stmn2_psi,sample,unc13a_psi) %>%
data.table::melt() %>%
mutate(sample_name = sub("_TDP_.*", "", sample)) %>%
filter(!grepl("unsorted",sample_name)) %>%
mutate(condition = sub(".*_", "", sample)) %>%
mutate(condition = ifelse(grepl("negative",condition), "TDP-43 \nNegative", "TDP-43 \nPositive")) %>%
mutate(variable = ifelse(grepl("stmn2",variable), "STMN2", "UNC13A")) %>%
ggplot(aes(x = condition,y = value, fill = variable)) +
geom_col(size = 4,position = 'dodge') +
facet_wrap(~sample_name,scales = "free_x") +
scale_fill_manual(values = c("#004D40","#882255")) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 24)) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'top') +
theme(legend.title=element_blank()) +
stat_compare_means(comparisons = list(c("STMN2","UNC13A")))
Using sample as id variables
liu_stmn2 %>%
dplyr::select(stmn2_psi,sample,unc13a_psi) %>%
data.table::melt() %>%
mutate(sample_name = sub("_TDP_.*", "", sample)) %>%
filter(!grepl("unsorted",sample_name)) %>%
mutate(condition = sub(".*_", "", sample)) %>%
mutate(condition = ifelse(grepl("negative",condition), "TDP-43 \nNegative", "TDP-43 \nPositive")) %>%
mutate(variable = ifelse(grepl("stmn2",variable), "STMN2", "UNC13A")) %>%
ggbarplot(,
x = "condition",
add = c("mean_se",'jitter'),
y = "value",
fill = 'variable',
color = 'variable',
position = position_dodge(0.8),dot.size = 6) +
ggpubr::theme_pubr() +
scale_color_manual(values = c("#000000","#000000")) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(text = element_text(size = 24)) +
scale_y_continuous(labels = scales::percent) +
xlab("") +
ylab("PSI") +
theme(legend.position = 'top') +
theme(legend.title=element_blank()) +
stat_compare_means(comparisons = list(c("STMN2","UNC13A")))
Using sample as id variables
liu_stmn2 %>%
mutate(stmn2_psi = STMN2_cryptic / (STMN2_cryptic + STMN2_annotated)) %>%
mutate(unc13a_psi = (UNC13A_3prime + UNC13A_5prime)/ (UNC13A_annotated + UNC13A_3prime + UNC13A_5prime)) %>%
left_join(liu_vafs) %>%
mutate(alt_vaf = (alt_reads)/total) %>%
mutate(condition = ifelse(grepl("negative",sample),"negative", "positive")) %>%
filter(!grepl("unsorted",sample)) %>%
ggplot(aes(y = unc13a_psi,x = stmn2_psi, fill = alt_vaf)) +
geom_point(pch = 21,size = 5) +
ggtitle("STMN2 PSI and the VAF of the G allele \n in the Liu Nuclei ") +
facet_grid(~condition) +
scale_fill_viridis_c(option = "plasma") +
coord_fixed() +
geom_abline()
Joining, by = "sample"
nycg_vafs = fread(file.path(here::here(),"data","NYGC_all_samples_UNC13A_snp_coverage.tsv"),fill = T)
nycg_vafs %>%
left_join(clean_data_table) %>%
filter(rs12973192 == "C/G") %>%
mutate(fraction_risk = alt_reads / total) %>%
filter(!is.na(fraction_risk)) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = unc13a_cryptic_leaf_psi)) +
geom_point(size = 4, pch = 21, aes(fill = fraction_risk - 0.5)) +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A CE PSI") +
geom_abline() +
scale_fill_gradient2()
Joining, by = "sample"
nycg_vafs %>%
left_join(clean_data_table) %>%
filter(rs12973192 == "C/G") %>%
mutate(fraction_risk = alt_reads / total) %>%
filter(!is.na(fraction_risk)) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(unc13a_cryptic_leaf_psi > 0 ) %>%
mutate(stmn2_unc_fract = ((unc13a_cryptic_leaf_psi * 100) / (stmn_2_cryptic_psi_leaf * 100))) %>%
ggplot(aes(x = stmn2_unc_fract, y = fraction_risk)) +
geom_point(size = 4, pch = 21, aes(fill = fraction_risk - 0.5)) +
xlab("STMN2 Cryptic PSI") +
ylab("UNC13A CE PSI") +
scale_fill_gradient2()
Joining, by = "sample"
TODO: convert all libraries to p_load.
Perform quality control to check that reads are periodic.
read_length_filter <- 29 # length of reads of interest
gencode_v29_info <- fread(paste0(here(), "/data/longest_proteincoding_transcript_hs_details.txt"))
ribo_df <- fread(paste0(here(), "/data/riboseq/sh_tdpb.Aligned.toTranscriptome.out.bam.bed.gz"))
names(ribo_df) = c("transcript_id", "start0", "end", "strand")
ribo_df = ribo_df %>%
inner_join(gencode_v29_info, by = "transcript_id") %>%
dplyr::filter(end-start0 == read_length_filter, strand == "+") %>%
mutate(distance_from_start = start0-cds_start) %>%
group_by(distance_from_start) %>%
mutate(n_this_dist = n()) %>%
dplyr::select(distance_from_start, n_this_dist) %>%
unique()
ggplot(ribo_df, aes(x=distance_from_start, y=n_this_dist)) +
geom_line() +
geom_point() +
xlim(-50, 50) +
ylab("Counts") +
xlab("Distance of 5' end from annotated start codon") +
ggtitle(paste0("Periodicity of ", read_length_filter, "nt reads from TDP-43 KD replicate B"))
Use DESeq to detect differential ribosome footprints and create volcano plots
counts_df <- read_csv(paste0(here(), "/data/riboseq/cds_feature_counts.csv"))
counts <- as.matrix(counts_df %>% column_to_rownames("gene_id"))
ipsc_splicing <- read_csv(paste0(here(), "/data/ipsc_splicing_results.csv"))
ipsc_de = read_csv(paste0(here(), "/data/ipsc_differential_expression.csv"))
col_data <- data.frame(sample = colnames(counts)) %>%
mutate(condition = ifelse(str_detect(sample, "tdp"), "tdp_ko", "control"))
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = col_data,
design= ~condition)
dds <- DESeq(dds)
results_name <- resultsNames(dds)[2]
res <- results(dds, name=results_name)
res_df <- data.frame(res) %>%
rownames_to_column("gene_id") %>%
left_join(gencode_v29_info, by = "gene_id")
cryptic_df <- ipsc_splicing %>%
dplyr::select(gene_name, cryptic_junction) %>%
unique() %>%
filter(cryptic_junction)
res_df2 <- res_df %>%
mutate(cryptic_junction = gene_name %in% cryptic_df$gene_name) %>%
mutate(label_junction = case_when(gene_name %in% c("UNC13A","AGRN",
"UNC13B","PFKP","SETD5",
"ATG4B","STMN2","TARDBP") ~ gene_name)) %>%
mutate(colour = ifelse(cryptic_junction, "#fe6101", "#648FFF")) %>%
arrange(colour)
p1 <- ggplot(res_df2, aes(x = log2FoldChange, y = -log10(pvalue), label = label_junction)) +
geom_point(data = res_df2 %>% filter(!cryptic_junction),
aes(x = log2FoldChange, y= -log10(pvalue)), color = "#648FFF") +
geom_point(data = res_df2 %>% filter(cryptic_junction),
aes(x = log2FoldChange, y= -log10(pvalue)), color = "#fe6101") +
ggpubr::theme_pubr() +
ylab("-Log10(p-value)") +
xlab("Log2 fold change") +
geom_text_repel(box.padding = 1.2, max.overlaps = Inf)
p2 <- p1 + ylim(0,10)
p1
p2
Read in data from processed bam files
TODO: add scripts which deduplicate UMIs for each.
specific_rt <- read_csv(paste0(here(), "/data/targeted_RNA_seq/specific_RT_primer/for_plot.csv")) %>%
select(risk, sample_name, n)
random_hexamer <- read_csv(paste0(here(), "/data/targeted_RNA_seq/random_hexamer/values_for_plot.csv")) %>%
select(-sample_name) %>%
select(sample_name = actual_sample_name, risk, n)
tg_rnaseq_df <- bind_rows(specific_rt, random_hexamer) %>%
group_by(sample_name) %>%
mutate(n_healthy = ifelse(risk == "Non-Risk", n, 0)) %>%
mutate(combined_healthy = sum(n_healthy)) %>%
select(-n_healthy) %>%
mutate(combined = sum(n)) %>%
group_by(sample_name, risk) %>%
mutate(combined_n = sum(n)) %>%
ungroup() %>%
select(-n) %>%
unique() %>%
group_by(sample_name) %>%
mutate(p_value = pbinom(combined_healthy, sum(combined_n), 0.5)) %>% # single-tailed bionomial test
ungroup() %>%
mutate(sample_name_ordered = fct_reorder(sample_name, dplyr::desc(combined_n)))
ggplot(tg_rnaseq_df %>%
filter(combined_n > 0), aes(x = sample_name_ordered, y = combined_n, fill = risk)) +
geom_bar(stat="identity", position="dodge") +
geom_text(aes(x=sample_name, y=-2.5, label=signif(p_value,2)), size = 3.5) +
xlab("") +
ylab("Unique cDNAs") +
ggeasy::easy_add_legend_title("Allele") +
theme_minimal() +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels()
Read in the processed data (following alignment with Bowtie2, and filtering for high-confidence alignments), and generate a plot of the overall distribution of crosslinks, and the rolling average difference between the 2x Risk and 2x Healthy.
Next, filter for peaks in the iCLIP signal, and see how they change in 2x Risk versus 2x Healthy. Highlight those which are close to the SNPs.
TODO: add script which does this filtering TODO: add fasta files for the minigenes
exon_snp <- 556
intron_snp <- 1106
intron_start <- 141
intron_end <- 1429
acceptor1 <- 394
acceptor2 <- 444
donor <- 572
mg_l <- 1624
cpoor_start <- 490
cpoor_end <- 1325
risk_colour <- "#00BFC4"
healthy_colour <- "#F8766D"
confidence_interval <- 0.75 # fracion, i.e. 0.75 = 75%.
peak_threshold <- 5 # how many times above the mean to be a "peak"
region_length <- 50
filtered_df <- read_csv(paste0(here(), "/data/iCLIP/bowtie2_aligned_2_filtered_df.csv"))
rolling_window <- 20
peak_t <- peak_threshold*1000/mg_l # by definition...
df2 <- filtered_df %>%
ungroup() %>%
group_by(condition, pos) %>%
mutate(mean_counts = sum(normalised_counts)/2) %>%
select(pos, condition, mean_counts) %>%
unique() %>%
pivot_wider(names_from = "condition", values_from ="mean_counts")
df2[is.na(df2)] <- 0
df2 <- df2 %>%
mutate(diff = R-H) %>%
mutate(mean_4 = (H+R)/2) %>%
arrange(pos)
top <- ggplot(df2, aes(x=pos, y=diff)) +
geom_area(aes(x=pos, y = mean_4), colour="grey50", fill = "grey50", alpha=0.2) +
geom_vline(xintercept = exon_snp, linetype="dashed") +
geom_vline(xintercept = intron_snp, linetype="dashed") +
xlim(1,1600) +
theme_minimal() +
ylab("XLs per 1000") +
xlab("") +
geom_vline(xintercept = cpoor_start) +
geom_vline(xintercept = cpoor_end)
bottom <- ggplot(df2, aes(x=pos, y=zoo::rollmean(diff, k = rolling_window, na.pad=T))) +
geom_area(colour = "grey50", fill="grey50", alpha = 1) +
geom_vline(xintercept = exon_snp, linetype="dashed") +
geom_vline(xintercept = intron_snp, linetype="dashed") +
xlim(1,1600) +
ylim(-0.5,0.75) +
theme_minimal() +
ylab("Change in XL density") +
xlab("Distance from start of minigene") +
geom_vline(xintercept = intron_start) +
geom_vline(xintercept = intron_end) +
geom_vline(xintercept = acceptor1) +
geom_vline(xintercept = acceptor2) +
geom_vline(xintercept = donor)
combined <- top/bottom
combined
find_ratio_bound <- function(e_x, sem_x, e_y, sem_y, quantile_value, randoms = 10000){
# monte carlo confidence interval estimate for a ratio. Assumes the two variables
# x and y are normally distributed. Finds confidence interval for (x-y)/y
# randomly generate possible global means for x and y
xs = rnorm(randoms, mean = e_x, sd=sem_x) # risk
ys = rnorm(randoms, mean = e_y, sd=sem_y) # healthy
ratios = (xs-ys)/ys
return(quantile(ratios, quantile_value))
}
peak_df <- filtered_df %>%
ungroup() %>%
group_by(pos) %>%
mutate(max_at_pos = max(normalised_counts)) %>% # at least 1 of the 4 samples above the threshold
ungroup() %>%
filter(max_at_pos >= peak_t) %>% # at least 1 of the 4 samples above the threshold
group_by(condition, pos) %>%
mutate(peak_mean = mean(normalised_counts),
peak_sd = sd(normalised_counts)) %>%
ungroup() %>%
select(condition, pos, peak_mean, peak_sd) %>%
unique() %>%
pivot_wider(names_from = "condition", values_from = c("peak_mean", "peak_sd")) %>%
mutate(frac_increase = (peak_mean_R-peak_mean_H)/peak_mean_H) %>%
mutate(peak_rank = rank(frac_increase)) %>%
mutate(colour = ifelse(abs(pos-exon_snp)<region_length, "#f7e7abff",
ifelse(abs(pos-intron_snp)<region_length, "#c8e1c4ff", "grey50")))
peak_df$upper_bound <- mapply(find_ratio_bound,
peak_df$peak_mean_R,
peak_df$peak_sd_R/sqrt(2), #convert to SEM
peak_df$peak_mean_H,
peak_df$peak_sd_H/sqrt(2), #convert to SEM
0.5*(1+confidence_interval))
peak_df$lower_bound <- mapply(find_ratio_bound,
peak_df$peak_mean_R,
peak_df$peak_sd_R/sqrt(2), #convert to SEM
peak_df$peak_mean_H,
peak_df$peak_sd_H/sqrt(2), #convert to SEM
0.5*(1-confidence_interval))
ggplot(peak_df, aes(x = peak_rank, y = 100*frac_increase, fill=colour)) +
geom_bar(stat="identity",,color = 'black') +
scale_fill_identity() +
theme_minimal() +
ggpubr::theme_pubr()+
ylab("% change in 2x Risk") +
xlab("Rank") +
geom_errorbar(aes(ymin = 100*lower_bound, ymax = 100*upper_bound, x = peak_rank),
width = 0.3,size = 1.1)
Supplementary iCLIP Figures
bars <- filtered_df %>%
ungroup() %>%
group_by(condition, pos) %>%
mutate(mean_normalised = mean(normalised_counts),
sd_normalised = sd(normalised_counts)) %>%
mutate(colour = ifelse(condition == "H", healthy_colour, risk_colour))
bar_top <- ggplot(bars %>% filter(abs(pos-exon_snp) < 35), aes(x = pos, y = mean_normalised,
fill = colour)) +
geom_bar(stat="identity", position="dodge") +
scale_fill_identity() +
geom_errorbar(aes(ymin = mean_normalised-sd_normalised, ymax=mean_normalised+sd_normalised),
position="dodge") +
geom_vline(xintercept = exon_snp, linetype="dashed") +
ylab("XLs per 1000") +
ggpubr::theme_pubr() +
ggtitle("iCLIP signal around CE SNP") +
xlab("")
bar_bottom <- ggplot(bars %>% filter(abs(pos-intron_snp) < 35), aes(x = pos, y = mean_normalised,
fill = colour)) +
geom_bar(stat="identity", position="dodge") +
scale_fill_identity() +
geom_errorbar(aes(ymin = mean_normalised-sd_normalised, ymax=mean_normalised+sd_normalised),
position="dodge") +
geom_vline(xintercept = intron_snp, linetype="dashed") +
ggpubr::theme_pubr() +
ylab("XLs per 1000") +
ggtitle("iCLIP signal around intronic SNP") +
xlab("")
bar_combined <- bar_top / bar_bottom
bar_combined
Analyse E-values (higher E value = stronger binding enrichment) for the motifs present at the exon SNP.
# Read in supplementary 4 from Ray et al 2013
data <- read_tsv(paste0(here(), "/data/heptamers/41586_2013_BFnature12311_MOESM334_ESM.txt"))
for(type in c("exon", "intron")){
if(type == "exon"){
snp <- "TAAAAGCATGGAT"
healthy <- "TAAAAGGATGGAT"
} else {
snp <- "GGATGGGTGGATA"
healthy <- "GGATGGTTGGATA"
}
all_healthy <- c()
all_risk <- c()
# Make dataframe containing E scores for each relevant 7mer
for(i in 1:(str_length(snp)-6)){
this_snp <- str_sub(snp, i, i+6)
this_healthy <- str_sub(healthy, i, i+6)
all_healthy <- c(all_healthy, str_replace_all(this_healthy, "T", "U"))
all_risk <- c(all_risk, str_replace_all(this_snp, "T", "U"))
row1 <- data %>% filter(`7mer` == str_replace_all(this_snp, "T", "U")) %>%
mutate(snp = T, pos = i)
row2 <- data %>% filter(`7mer` == str_replace_all(this_healthy, "T", "U")) %>%
mutate(snp = F, pos = i)
if(i == 1){
full <- bind_rows(row1, row2)
} else {
full <- bind_rows(full, row1, row2)
}
}
# Find average change in E score for the two groups of heptamers
full2 <- full %>%
group_by(pos) %>%
filter(n() == 2) %>%
ungroup() %>%
pivot_longer(cols = contains("set")) %>%
select(-`7mer`) %>%
pivot_wider(names_from = "snp", values_from = "value") %>%
mutate(abs_change = `TRUE`-`FALSE`) %>%
filter(!is.na(`TRUE`) & !is.na(`FALSE`)) %>%
group_by(name) %>%
mutate(mean_abs_change = mean(abs_change)) %>%
select(name, mean_abs_change) %>%
unique() %>%
arrange(mean_abs_change) %>%
mutate(is_tdp43 = ifelse(str_detect(name, "RNCMPT00076"), "red", "grey80")) %>%
mutate(short_name = str_sub(name, 1, 11)) %>%
ungroup() %>% group_by(short_name) %>%
mutate(average_delta = mean(mean_abs_change)) %>%
select(short_name, is_tdp43, average_delta) %>%
unique()
# Plot ranks
plot <- ggplot(full2, aes(x=rank(average_delta), y = average_delta,
fill = is_tdp43)) +
geom_bar(stat="identity") +
scale_fill_identity() +
theme_minimal() +
ggtitle(paste0("Average change in binding E value for each dataset with SNP for ", type, " SNP")) +
xlab("") +
ylab("Average change in E score") +
ggpubr::theme_pubr() +
xlab("Rank")
print(plot)
full_just_tdp <- full %>%
group_by(pos) %>%
filter(n() == 2) %>%
ungroup() %>%
select(`7mer`, contains("RNCMPT00076"), snp, pos) %>%
mutate(mean_E = (RNCMPT00076_e_setA + RNCMPT00076_e_setB)/2) %>%
pivot_longer(cols = contains("RNCMP")) %>%
mutate(ypos = ifelse(snp, -0.35, -0.27))
plot2 <- ggplot(full_just_tdp, aes(x = pos, y = value, colour = snp,
label = `7mer`)) +
geom_point(size = 3) +
xlab("7mer") +
ylab("E value (similar to rank?)") +
ggeasy::easy_add_legend_title("Contains Risk SNP") +
ggtitle(paste0("Binding E value for TDP43 for all 7x 7mers with or without risk SNP for ", type, " SNP")) +
geom_label(aes(y = ypos), size = 2.7) +
xlim(0.5, 7.5) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
ggeasy::easy_remove_x_axis()
print(plot2)
}
Read in values, plot and perform a t-test
df <- read_csv(paste0(here(),
"/data/itc/itc final values.csv"))
df2 <- df %>%
group_by(RNA, experiment) %>%
mutate(sd2 =ifelse(is.na(sd), sd(kd), sd)) %>%
mutate(mean_kd = mean(kd)) %>%
ungroup() %>%
select(experiment, RNA, mean_kd, sd2) %>%
unique()
ggplot(df2, aes(x = experiment, fill = RNA, y = mean_kd)) +
geom_bar(stat="identity", position="dodge") +
geom_errorbar(aes(x=experiment, ymin = mean_kd-sd2, ymax = mean_kd+sd2,
fill = RNA), position="dodge") +
ylim(0, NA) +
geom_jitter(data = df,
aes(group=RNA,x = experiment, y = kd,fill = RNA),
position=position_jitterdodge(jitter.width = 0.1),pch = 21,size = 3,) +
ggpubr::theme_pubr() +
ylab("Kd / nM") +
xlab("") +
scale_fill_manual(values = c("#0c9bbdff","#ffb700ff"))
exon_pval = t.test(df$kd[which(df$RNA=="H" & df$experiment=="exon")], df$kd[which(df$RNA=="R" & df$experiment=="exon")]) %>% broom::tidy() %>% pull(p.value)
intron_pval = t.test(df$kd[which(df$RNA=="H" & df$experiment=="intron")], df$kd[which(df$RNA=="R" & df$experiment=="intron")]) %>% broom::tidy() %>% pull(p.value) # this is an overestimate of significance - however still borderline significant (~0.05) when fitting error is taken into account
P-value two sample t-test:
als_ftd_genomics = c("ALS2", "ANG", "APP", "AR", "ATN1", "ATXN1", "ATXN10", "ATXN2",
"ATXN3", "C21orf2", "C9orf72", "CHMP2B", "CSF1R", "DCTN1", "DNAJC5",
"DNMT1", "EPM2A", "FIG4", "FUS", "GRN", "HNRNPA1", "HTT", "ITM2B",
"JPH3", "KIF5A", "LOC101927815", "MAPT", "NHLRC1", "NOP56", "NOTCH3",
"OPTN", "PFN1", "PMP22", "PRNP", "PSEN1", "PSEN2", "SCFD1", "SETX",
"SIGMAR1", "SLC52A2", "SLC52A3", "SOD1", "TARDBP", "TBK1", "TBP",
"TNIP1", "TYROBP", "UBQLN2", "UNC13A", "VAPB", "VCP")
ipsc_de = as.data.table(ipsc_de)
ipsc_splicing = as.data.table(ipsc_splicing)
ipsc_de[gene_name %in% als_ftd_genomics][order(log2FoldChange)][,.(log2FoldChange,padj,gene_name,gene_expresion)] %>% mutate(has_cryptic = gene_name %in% ipsc_splicing[cryptic_junction == T,gene_name]) %>% unique() %>% arrange(log2FoldChange) %>% knitr::kable()
log2FoldChange | padj | gene_name | gene_expresion | has_cryptic |
---|---|---|---|---|
-1.4755792 | 0.0000000 | TARDBP | downregulated | FALSE |
-0.4651237 | 0.3129656 | AR | not_significant | FALSE |
-0.4522688 | 0.0000000 | PRNP | downregulated | FALSE |
-0.3370146 | 0.0000000 | MAPT | downregulated | FALSE |
-0.2560809 | 0.0000197 | ATXN10 | downregulated | FALSE |
-0.2507640 | 0.0000114 | UNC13A | downregulated | TRUE |
-0.2459440 | 0.0000023 | SIGMAR1 | downregulated | FALSE |
-0.2233069 | 0.0000475 | UBQLN2 | downregulated | FALSE |
-0.2211469 | 0.0000258 | ATN1 | downregulated | FALSE |
-0.1942493 | 0.0003034 | OPTN | downregulated | FALSE |
-0.1628779 | 0.0024270 | KIF5A | downregulated | FALSE |
-0.1617614 | 0.0012821 | DNAJC5 | downregulated | FALSE |
-0.1384970 | 0.2533432 | CHMP2B | not_significant | FALSE |
-0.1220204 | 0.0515311 | DNMT1 | downregulated | FALSE |
-0.1049954 | 0.0844946 | VCP | downregulated | FALSE |
-0.0916095 | 0.0681453 | APP | downregulated | FALSE |
-0.0835958 | 0.3273028 | PSEN2 | not_significant | FALSE |
-0.0790801 | 0.4084679 | FIG4 | not_significant | FALSE |
-0.0663967 | 0.2592503 | FUS | not_significant | FALSE |
-0.0646336 | 0.3051997 | DCTN1 | not_significant | FALSE |
-0.0635069 | 0.5807439 | SCFD1 | not_significant | FALSE |
-0.0623503 | 0.5686538 | SLC52A2 | not_significant | FALSE |
-0.0476461 | 0.5766988 | ATXN2 | not_significant | FALSE |
-0.0423053 | 0.7469608 | C9orf72 | not_significant | FALSE |
-0.0294862 | 0.6926638 | SOD1 | not_significant | FALSE |
-0.0185168 | 0.8789865 | SETX | not_significant | FALSE |
-0.0183617 | 0.8304551 | VAPB | not_significant | FALSE |
0.0045770 | 0.9596904 | JPH3 | not_significant | FALSE |
0.0111296 | 0.9086136 | ITM2B | not_significant | FALSE |
0.0112341 | 0.9313250 | TBP | not_significant | FALSE |
0.0153951 | 0.8461312 | PFN1 | not_significant | FALSE |
0.0234133 | 0.8051831 | HTT | not_significant | FALSE |
0.0323912 | 0.7842848 | C21orf2 | not_significant | FALSE |
0.0536923 | 0.7306305 | EPM2A | not_significant | FALSE |
0.0578489 | 0.5192551 | ATXN1 | not_significant | FALSE |
0.0622795 | 0.4214349 | NOP56 | not_significant | FALSE |
0.0629227 | 0.4204136 | ALS2 | not_significant | FALSE |
0.0996251 | 0.8525963 | NHLRC1 | not_significant | FALSE |
0.1187887 | 0.0069879 | HNRNPA1 | upregulated | FALSE |
0.1295121 | 0.2157945 | TBK1 | not_significant | FALSE |
0.1591301 | 0.4195443 | NOTCH3 | not_significant | FALSE |
0.1801005 | 0.0032988 | PSEN1 | upregulated | FALSE |
0.2001363 | 0.0291925 | ATXN3 | upregulated | FALSE |
0.2340033 | 0.0005677 | GRN | upregulated | FALSE |
0.2560610 | 0.0152038 | TNIP1 | upregulated | FALSE |
# library size
#llumina HiSeq 2500 (125 bp paired end) or an Illumina NovaSeq (100 bp paired end).
full_cov = clean_data_table %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
mutate(inclusion_reads = UNC13A_3prime_leaf + UNC13A_5prime_1_leaf) %>%
mutate(unc13a_detected = ifelse(inclusion_reads > 0, yes = "+", no = "-")) %>%
filter(sequencing_platform != "") %>%
left_join(cell_types %>%
dplyr::select(cell,deconv,sample) %>%
unique() %>%
pivot_wider(id_cols = "sample",names_from = "cell",values_from = "deconv"), by = 'sample') %>%
filter(!is.na(disease_group2)) %>%
unique() %>%
filter(disease_group2 %in% c("ALS-TDP","FTLD-TDP")) %>%
dplyr::select(c("unc13a_cryptic_leaf_psi",
"sample", "tissue_clean",
"rin", "sex",
"age", "mutations",
"sequencing_platform",
"number_g_alleles",
"stmn_2_cryptic_psi_leaf", "oligodendrocytes",
"astrocytes", "neurons", "endothelial", "microglia")) %>%
column_to_rownames('sample')
fit <- lm(unc13a_cryptic_leaf_psi ~ ., data = full_cov)
fit %>% broom::tidy() %>% fwrite(paste0(here::here(),"/covariate_unc13a_ce.csv"))
full_cov %>%
group_by(tissue_clean) %>%
nest() %>%
mutate(lm_obj = map(data, ~lm(unc13a_cryptic_leaf_psi ~., data = .x))) %>%
mutate(broomed = map(lm_obj, broom::tidy)) %>%
dplyr::select(tissue_clean,broomed) %>%
unnest() %>%
filter(p.value < 0.05) %>% knitr::kable()
tissue_clean | term | estimate | std.error | statistic | p.value |
---|---|---|---|---|---|
Frontal_Cortex | stmn_2_cryptic_psi_leaf | 0.5006214 | 0.0361175 | 13.860894 | 0.0000000 |
Cervical_Spinal_Cord | (Intercept) | -0.3120120 | 0.1505643 | -2.072284 | 0.0404359 |
Cervical_Spinal_Cord | sequencing_platformNovaSeq | -0.0232872 | 0.0087809 | -2.652043 | 0.0091104 |
Cervical_Spinal_Cord | stmn_2_cryptic_psi_leaf | 0.1016095 | 0.0510255 | 1.991348 | 0.0487726 |
Motor_Cortex | sequencing_platformNovaSeq | -0.0055742 | 0.0018216 | -3.060125 | 0.0025378 |
Motor_Cortex | number_g_alleles | 0.0025967 | 0.0010752 | 2.415114 | 0.0166931 |
Motor_Cortex | stmn_2_cryptic_psi_leaf | 0.5734449 | 0.1021909 | 5.611505 | 0.0000001 |
Temporal_Cortex | sexMale | -0.0173710 | 0.0069398 | -2.503112 | 0.0166080 |
Temporal_Cortex | age | -0.0011040 | 0.0004588 | -2.406180 | 0.0209579 |
Temporal_Cortex | number_g_alleles | 0.0108918 | 0.0039751 | 2.740003 | 0.0092187 |
unc13a_data_fisher = unc13a_sequencing_platform %>%
filter(tissue_clean != "Thoracic_Spinal_Cord") %>%
group_by(disease_group2,tissue_clean) %>%
nest() %>%
mutate(fisher_pvalue = map_dbl(data, quick_fisher))
unc13a_data_fisher$data = NULL
unc13a_sequencing_platform_pct = unc13a_sequencing_platform %>%
group_by(disease_group2,tissue_clean, sequencing_platform, unc13a_detected) %>% tally() %>%
spread(key = unc13a_detected, value = n,fill = 0) %>%
mutate(detection = `+` / ( `-` + `+`) ) %>%
mutate(total = ( `-` + `+`) ) %>%
mutate(plot_name = glue::glue("{sequencing_platform} \n ({total})")) %>%
left_join(unc13a_data_fisher)
Joining, by = c("disease_group2", "tissue_clean")
unc13a_sequencing_platform_pct %>%
ggplot(aes(x = plot_name, y = detection )) +
geom_col(fill = "firebrick") +
labs(title = "UNC13A CE detection and sequencing platform",
y = "UNC13A CE detected",x = element_blank(), subtitle = "Fisher exact test") +
facet_wrap(~disease_group2 + tissue_clean,scales = 'free_x') +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
geom_text(aes(x = 1.5, y = 1.05, label = paste0("p = ", signif(fisher_pvalue,digits = 2) ) )) +
theme(legend.text = element_text(size = 15)) +
ggpubr::theme_pubr()
# scale_y_continuous(expand = c(0,0) )
patient_others = fread(file.path(here::here(),"data","nygc_pfkp_unc13b_rap1gap.bed"))
patient_others[,sample := gsub(".SJ.out","",V4)]
clean_data_table = clean_data_table %>%
left_join(patient_others %>%
pivot_wider(id_cols = "sample",names_from = "V7",values_from = "V5",values_fill = 0))
Joining, by = c("sample", "RAP1GAP_annotated", "PFKP_annotated", "PFKP_noveldonor", "PFKP_novelacceptor", "RAP1GAP_noveldonor", "RAP1GAP_novelacceptor")
clean_data_table[, rap1gap_cryptic_psi := (RAP1GAP_noveldonor + RAP1GAP_novelacceptor)/ (RAP1GAP_noveldonor + RAP1GAP_novelacceptor + RAP1GAP_annotated)]
clean_data_table[, pfkp_cryptic_psi := (PFKP_novelacceptor + PFKP_noveldonor)/ (PFKP_novelacceptor + PFKP_noveldonor + PFKP_annotated)]
pfkp_plot = clean_data_table %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_tissue == T) %>%
filter((PFKP_novelacceptor + PFKP_noveldonor + PFKP_annotated) >=30) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(pfkp_cryptic_psi > 0) %>%
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = pfkp_cryptic_psi)) +
geom_point() +
stat_cor(size = 8,method = "spearman",cor.coef.name = "rho") +
geom_smooth(method = lm, se = FALSE,color = 'black') +
xlab("STMN2 Cryptic PSI") +
ylab("PFKP Cryptic PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
scale_x_continuous(labels = scales::percent_format(accuracy = 1L))
rap1gap_plot = clean_data_table %>%
filter(disease_tissue == T) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter((RAP1GAP_noveldonor + RAP1GAP_novelacceptor + RAP1GAP_annotated) >=30) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(rap1gap_cryptic_psi > 0) %>%
# ggplot(aes(x = rap1gap_cryptic_psi, y = unc13a_cryptic_leaf_psi)) +
ggplot(aes(x = stmn_2_cryptic_psi_leaf, y = rap1gap_cryptic_psi)) +
geom_point() +
stat_cor(size = 8,method = "spearman",cor.coef.name = "rho") +
geom_smooth(method = lm, se = FALSE,color = 'black') +
xlab("STMN2 Cryptic PSI") +
ylab("RAP1GAP Cryptic PSI") +
ggpubr::theme_pubr() +
theme(text = element_text(size = 18)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
scale_x_continuous(labels = scales::percent_format(accuracy = 1L))
cowplot::plot_grid(rap1gap_plot,pfkp_plot)
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
detected_table_for_graph_rap1 = clean_data_table %>%
filter(!(rs12973192 == "C/G" & rs12608932 == "C/C")) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(rap1gap_cryptic_psi > 0) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter((RAP1GAP_noveldonor + RAP1GAP_novelacceptor + RAP1GAP_annotated) >=30) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,rs12608932,stmn_2_cryptic_psi_leaf,rap1gap_cryptic_psi) %>%
mutate(log2_rap1gap = log2((rap1gap_cryptic_psi)/(stmn_2_cryptic_psi_leaf))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
as.data.table()
#get out that person who is discongruous
comparisons = combn(unique(detected_table_for_graph_rap1$detection_name),2,simplify = F)
rap1_fold_plot = detected_table_for_graph_rap1 %>%
ggplot(aes(y = log2_rap1gap, x = detection_name,fill = detection_name)) +
geom_boxplot(show.legend = F) +
geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) +
stat_compare_means(aes(group = detection_name),comparisons = comparisons,label = "p.signif") +
ylab(expression(paste("Lo", g[2], " fold CE PSI / STMN2 CE PSI "))) +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
geom_hline(yintercept = 0,size = 1.5) +
ggtitle("RAP1GAP")
detected_table_for_graph_pfkp = clean_data_table %>%
filter(!(rs12973192 == "C/G" & rs12608932 == "C/C")) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
filter(pfkp_cryptic_psi > 0) %>%
filter(stmn_2_cryptic_psi_leaf > 0 ) %>%
filter(junction_reads_stmn2 >= 30) %>%
filter((PFKP_novelacceptor + PFKP_noveldonor + PFKP_annotated) >=30) %>%
filter(grepl("Cortex",tissue_clean)) %>%
filter(disease_tissue == T) %>%
filter(disease_group2 %in% c("FTLD-TDP","ALS-TDP")) %>%
dplyr::select(sample,tissue_clean,disease_group2,rs12973192,rs12608932,stmn_2_cryptic_psi_leaf,pfkp_cryptic_psi) %>%
mutate(log2_pfkp = log2((pfkp_cryptic_psi)/(stmn_2_cryptic_psi_leaf))) %>%
mutate(tissue_clean = gsub("_","\n",tissue_clean)) %>%
group_by(rs12973192) %>%
mutate(n_sample = length(unique(sample))) %>%
ungroup() %>%
mutate(detection_name = glue::glue("{rs12973192} \n ( {n_sample} )")) %>%
mutate(no_cong = ifelse(rs12608932 == "C/C" & rs12973192 == "C/G",8,3)) %>% as.data.table()
comparisons = combn(unique(detected_table_for_graph_pfkp$detection_name),2,simplify = F)
pfkp_fold_plot = detected_table_for_graph_pfkp %>%
ggplot(aes(y = log2_pfkp, x = detection_name,fill = detection_name)) +
geom_boxplot(show.legend = F) +
geom_jitter(show.legend = F, alpha = 0.8, height = 0,pch = 21) +
stat_compare_means(aes(group = detection_name),comparisons = comparisons,label = "p.signif") +
xlab("UNC13A rs12973192 Genotype") +
scale_fill_manual(values = c("#88CCEE","#44AA99","#1B7739")) +
ggpubr::theme_pubr() +
theme(text = element_text(size = 24)) +
geom_hline(yintercept = 0,size = 1.5) +
ggtitle("PFKP") +
ylab(element_blank())
cowplot::plot_grid(rap1_fold_plot,pfkp_fold_plot)
iso_colors = c("#1972A9", "#28A919", "#F49F0B")
names(iso_colors) = c("UNC13B-207","UNC13B-208","UNC13B-210")
#207 -- 1972A9 - normal
#208 -- 28A919 - NMD
#210 -- F49F0B - brain
t_type = fread(file.path(here::here(),"data","transcripts-Summary-Homo_sapiens_Gene_Summary_ENSG00000198722.csv"))
cell_lines = fread(file.path(here::here(),"data","unc13b_salmon_cell_lines.csv"))
cell_lines %>%
mutate(sample = gsub(".Aligned.sorted.out","",sample)) %>%
left_join(rel_rna_cryptic_amount[,.(condition,plot_name,sample)]) %>%
left_join(t_type, by = c("Name" = "Transcript ID")) %>%
filter(Name.y %in% c("UNC13B-207",
"UNC13B-208",
"UNC13B-210")) %>%
ggplot(aes(x =condition, y = TPM,fill = Name.y),color = 'black') +
geom_boxplot(size = 1.2,color = "black") +
geom_point(stroke = 1.2,size = 2.3,pch = 21, position = position_jitterdodge(jitter.width = 0.1,jitter.height = 0)) +
# facet_wrap(~plot_name) +
ggpubr::theme_pubr() +
labs(fill="",x="") +
scale_fill_manual(values = iso_colors)
Joining, by = "sample"
unc13b_expression = fread(file.path(here::here(),"data","unc13b_transcript_counts.csv"))
unc13b_expression %>%
left_join(t_type, by = c("Name" = "Transcript ID")) %>%
filter(sample != "") %>%
dplyr::select(sample,
EffectiveLength,
NumReads,
Name.y) %>%
unique() %>%
left_join(clean_data_table[,.(sample,library_size)]) %>%
filter(!is.na(library_size)) %>%
mutate(TPM = (NumReads )/ (library_size / 10^6)) %>%
unique() %>%
left_join(clean_data_table %>%
select(sample,disease_group2,tissue_clean)) %>%
unique() %>%
filter(tissue_clean %in% c("Frontal_Cortex",
"Lumbar_Spinal_Cord",
"Cervical_Spinal_Cord",
"Motor_Cortex",
"Temporal_Cortex")) %>%
mutate(disease_group2 = fct_relevel(disease_group2, "Control")) %>%
filter(Name.y %in% c("UNC13B-207",
"UNC13B-208",
"UNC13B-210")) %>%
ggplot(aes(x = disease_group2, y = TPM, fill = Name.y)) +
geom_boxplot() +
geom_point(pch = 21, position = position_jitterdodge(jitter.width = 0.1,jitter.height = 0)) +
facet_wrap(~tissue_clean,scales = 'free') +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) +
ggpubr::theme_pubr() +
labs(fill="",x="") +
scale_fill_manual(values = iso_colors)
Joining, by = "sample"
Joining, by = "sample"
Read in pre-processed data (see separate script) and plot
df <- read_csv(paste0(here::here(),
"/data/nanopore/combined_nanopore_dataframe.csv"))
df[is.na(df)] <- 0
p2_patient <- ggplot(df %>% filter(classification != "Neither",
type=="Patient"), aes(x = name, y = 100*fraction, fill = class2)) +
geom_bar(stat="identity", position="dodge") +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels(side = "right") +
ylab("Percentage of reads") +
ggtitle("Patient RNA") +
xlab("") +
ggeasy::easy_add_legend_title("")
p2_shsy <- ggplot(df %>% filter(classification != "Neither",
type=="SHSY5Y"), aes(x = name, y = 100*fraction, fill = class2)) +
geom_bar(stat="identity", position="dodge") +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels(side = "right") +
ylab("Percentage of reads") +
ggtitle("SH-SY5Y") +
xlab("") +
ggeasy::easy_add_legend_title("")
p2_shsy | p2_patient
# without neither removed
p2_patient <- ggplot(df %>% filter(classification != "",
type=="Patient"), aes(x = name, y = 100*fraction, fill = class2,
colour = class2)) +
geom_bar(stat="identity", position="dodge") +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels(side = "right") +
ylab("Percentage of reads") +
ggtitle("Patient RNA") +
xlab("") +
ggeasy::easy_add_legend_title("")
ggplot(df %>% filter(classification != "",
type=="SHSY5Y"), aes(x = name, y = fraction, fill = class2,
colour = class2)) +
geom_bar(stat="identity", position="dodge") +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels(side = "right") +
ylab("Percentage of reads") +
ggtitle("SH-SY5Y") +
xlab("") +
ggeasy::easy_add_legend_title("") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L))
p2_shsy | p2_patient
# fractional
df2 <- df %>%
filter(classification != "Neither") %>%
group_by(name) %>%
mutate(f = n/sum(n))
ggplot(df2, aes(x = name, y = 100*f, fill = classification)) +
geom_bar(stat="identity") +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels(side = "right") +
ylab("Percentage of reads") +
xlab("") +
ggeasy::easy_add_legend_title("")
ggplot(df2 %>%
filter(type == "Patient"), aes(x = name, y = 100*f, fill = classification)) +
geom_bar(stat="identity") +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels(side = "right") +
ylab("Percentage of reads") +
xlab("") +
ggeasy::easy_add_legend_title("")
ggplot(df2 %>%
filter(type != "Patient"), aes(x = name, y = 100*f, fill = classification)) +
geom_bar(stat="identity") +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels(side = "right") +
ylab("Percentage of reads") +
xlab("") +
ggeasy::easy_add_legend_title("")
# fractional
df %>%
filter(classification != "Neither") %>%
group_by(name) %>%
mutate(f = n/sum(n)) %>%
filter(grepl("FTD",name)) %>%
ggplot(aes(x = name, y = 100*f, fill = classification)) +
geom_bar(stat="identity") +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels(side = "right") +
ylab("Percentage of reads") +
xlab("") +
ggeasy::easy_add_legend_title("") +
scale_fill_manual(values = c("#006613ff",
"#ffbf00ff",
"#f5671cff")) +
theme(axis.ticks.length=unit(0.1,"inch"),
axis.line = element_line(colour = 'black', size = 1.5),
axis.ticks = element_line(colour = "black", size = 2))
ggplot(df %>% filter(type=="Patient"),
aes(x = name, y = 100*fraction, fill = class2)) +
geom_bar(stat="identity", position="dodge",color = "black") +
ggpubr::theme_pubr() +
ggeasy::easy_rotate_x_labels(side = "right") +
ylab("Percentage of reads") +
ggtitle("Patient RNA") +
xlab("") +
ggeasy::easy_add_legend_title("") +
scale_fill_manual(values = c("#006613ff",
"#ffbf00ff",
"#f5671cff",
"#290340ff")) +
theme(axis.ticks.length=unit(0.1,"inch"),
axis.line = element_line(colour = 'black', size = 1.5),
axis.ticks = element_line(colour = "black", size = 2))
guide_strength = fread(file.path(here::here(),"data","different_guides_cryptic_expression.csv"))
guide_strength %>%
melt() %>%
filter(`cell line` == "i5W") %>%
mutate(condition = fct_relevel(condition,
"control","tdp43_mild")) %>%
ggbarplot(,
x = "condition",
add = c("mean_se","jitter"),
y = "value",
fill = 'variable',
color = 'variable',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_color_manual(
values = c("#1C2617","#262114","#262114")
) +
scale_fill_manual(
values = c("#1576b8","#eb7f3f","#975452")
) +
scale_y_continuous(labels = scales::percent) +
ggtitle("NCRM5") +
theme(axis.line = element_line(colour = 'black', size = 1.5),
axis.ticks = element_line(colour = "black", size = 2))
guide_strength %>%
melt() %>%
filter(`cell line` == "i11w_repeat") %>%
mutate(condition = fct_relevel(condition,
"control","tdp43_mild")) %>%
ggbarplot(,
x = "condition",
add = c("mean_se","jitter"),
y = "value",
fill = 'variable',
color = 'variable',
position = position_dodge(0.8)) +
ggpubr::theme_pubr() +
scale_color_manual(
values = c("#1C2617","#262114","#262114")
) +
scale_fill_manual(
values = c("#1576b8","#eb7f3f","#975452")
) +
scale_y_continuous(labels = scales::percent) +
ggtitle("WTC11") +
theme(axis.line = element_line(colour = 'black', size = 1.5),
axis.ticks = element_line(colour = "black", size = 2))
library(tidyverse)
library(viridis)
Loading required package: viridisLite
Attaching package: 'viridis'
The following object is masked from 'package:scales':
viridis_pal
library(dplyr)
Protein <- read.csv(file.path(here::here(),"data","Protein_RNA_Corr_3.csv"), colClasses = c("factor","factor","numeric","factor","factor"))
Protein$Treatment <- factor(Protein$Treatment, levels=c("NT","Dox-1","Dox-2","Dox-3","Dox-4","Dox-5"))
Protein$Line_Group <- factor(Protein$Line_Group, levels=c("Protein","RNA","Correct_RNA"))
agg=aggregate(Value~Treatment*Gene*Line_Group*Color_Group, data=Protein, FUN="mean")
std <- function(x) sd(x) / sqrt(length(x))
tmp = Protein %>%
group_by(Treatment,Gene,Line_Group,Color_Group) %>%
summarise(se = std(Value))
`summarise()` has grouped output by 'Treatment', 'Gene', 'Line_Group'. You can override using the `.groups` argument.
limits <- aes(ymax=Value+se, ymin=Value-se)
dodge <- position_dodge(width=0.9)
agg = agg %>%
filter(Gene != "13A-RNA") %>%
mutate(Line_Group = gsub("Correct_RNA","RNA",Line_Group)) %>%
left_join(tmp)
Joining, by = c("Treatment", "Gene", "Line_Group", "Color_Group")
ggplot(agg, aes(y=Value, x=Treatment)) +
geom_line(size = 2,mapping = aes(x=Treatment, y=Value,
linetype=Line_Group, group=Gene,color = Color_Group), size=1)+
geom_errorbar(limits,
position=dodge,
width=0.24,
size = 1.1)+
xlab("") +
ylab("") +
ggpubr::theme_pubr()+
scale_y_continuous(labels = scales::percent_format(scale = 1))+
scale_colour_viridis_d(begin=0, end=1) +
labs(linetype = "") +
theme(axis.ticks.length=unit(0.1,"inch"),
axis.line = element_line(colour = 'black', size = 1.5),
axis.ticks = element_line(colour = "black", size = 2))
# Read in filtered bed file (iMAPs output, filtered by script in pre-processing scripts folder)
sh <- read_csv(paste0(here(), "/data/iCLIP/shsy5y_tdp43_iclip_unc13a_b_only.csv"))
── Column specification ────────────────────────────────────────────────────────
cols(
chr = col_character(),
start = col_double(),
end.x = col_double(),
score1 = col_double(),
end.y = col_double(),
score2 = col_double(),
total = col_double()
)
unc13a <- sh %>%
filter(chr == "chr19" & start > 17599327 & ((end.x < 17690344 & end.y == 0) |
(end.y < 17690344 & end.x == 0))) %>%
select(start,total) %>%
full_join(data.frame(start=17599327:17690344)) %>%
arrange(start)
Joining, by = "start"
unc13a[is.na(unc13a)] <- 0
window <- 20
p_a <- ggplot(unc13a, aes(x = start, y = zoo::rollmean(total, k = window, na.pad=T))) +
geom_line(alpha = 1) +
ylim(0, NA) +
# blue dashed line shows position of CE
geom_vline(xintercept = 17642430, linetype="dashed", alpha= 0.3, size=2, colour="blue") +
# red line shows position of retained intron
geom_vline(xintercept = 17628569, linetype="dashed", alpha= 0.3, size=2, colour="red") +
ggtitle("UNC13a") +
ylab(paste("Average XL density in", window, "nt window")) +
theme_minimal()
unc13b <- sh %>%
filter(chr == "chr9" & start > 35160008 & ((end.x < 35407338 & end.y == 0) |
(end.y < 35407338 & end.x == 0))) %>%
select(start,total) %>%
full_join(data.frame(start=35160008:35407338)) %>%
arrange(start)
Joining, by = "start"
unc13b[is.na(unc13b)] <- 0
p_b <- ggplot(unc13b, aes(x = start, y = zoo::rollmean(total, k = window, na.pad=T))) +
geom_line(alpha = 1) +
ylim(0, NA) +
# blue dashed line shows rough position of UG-repeat near fsE
geom_vline(xintercept = 35364641, linetype="dashed", alpha= 0.3, size=2, colour="blue") +
theme_minimal() +
ggtitle("UNC13B") +
ylab(paste("Average XL density in", window, "nt window"))
p_a / p_b