Skip to content
Snippets Groups Projects
Commit c78b8199 authored by 3mma-mack's avatar 3mma-mack
Browse files

Add ExomeDepth to cnv_calling.nf, arguments set in command

parent e4928e64
No related branches found
No related tags found
No related merge requests found
Pipeline #16227 failed
......@@ -20,4 +20,4 @@ details:
genome_build: hg38
upload:
# relative path will output locally to the bcbio run folder
dir: ./results
dir: /exports/igmm/eddie/IGMM-VariantAnalysis/emma/trio-whole-exome/results
......@@ -64,13 +64,8 @@ process {
}
withLabel: large {
<<<<<<< HEAD
cpus = 10
memory = 32.GB
=======
cpus = get_cpus(16)
cpus = get_cpus(10)
memory = get_mem(32.GB)
>>>>>>> master
}
withLabel: long {
......
# try exomeDepth for masters project
# install.packages("ExomeDepth", repos = "http://cran.us.r-project.org", "/exports/eddie/scratch/s1734289/R_packages")
# install.packages("optparse", repos = "http://cran.us.r-project.org", "/exports/eddie/scratch/s1734289/R_packages")
# install.packages("bedr", repos = "http://cran.us.r-project.org", "/exports/eddie/scratch/s1734289/R_packages")
library(optparse)
library(ExomeDepth)
# library(bedr)
# get options from command line with optparse
option_list <- list(make_option(c("-i", "--input"),
type = "character",
help = "Path input to bam file"),
make_option(c("-b", "--bed"),
type = "character",
help = "path to bed file"),
make_option(c("-f", "--fasta"),
type = "character",
help = "Path to the fasta file of the organism being studied"),
make_option(c("-o", "--output"),
type = "character",
help = "Path to output directory"),
make_option(c("-t", "--target"),
type = "character",
help = "name of target bam file")
)
opt <- optparse::parse_args(OptionParser(option_list = option_list))
bam_paths <- opt$input
bed <- opt$bed
fasta <- opt$fasta
output_dir <- opt$output
target <- opt$target
# create a vector of paths to bam files
bam <- readLines(bam_paths)
# get the exome counts
ExomeCount <- getBamCounts(bed.file = bed,
bam.files = bam,
include.chr = FALSE,
referenceFasta = fasta)
# data("exons.hg19")
# data(ExomeCount)
# data(exons.hg19.X)
ExomeCount.dafr <- as(ExomeCount, 'data.frame')
ExomeCount.dafr$chromosome <- gsub(as.character(ExomeCount.dafr$chromosome),
pattern = 'chr',
replacement = '') ##remove the annoying chr letters
# take the names of the header, if there is a path, split the name into just the last element
# names that are not the target sample are used to build the reference
check_dafr_name <- function(dafr_name){
split_path <- unlist(strsplit(dafr_name,"[/]"))
if (length(split_path)> 1){
append(sample_names, tail(split_path, n=1))
}
}
sample_names <- names(ExomeCount.dafr)
unwanted_header <- c("GC", "chromosome", "start", "end", "name", target)
sample_names <- sample_names[!(sample_names %in% unwanted_header)]
print(sample_names)
# The bed file used doesn't give gene names with it, so need to add a column otherwise it all crashes
ExomeCount.dafr$names <- "no_gene_name"
#head(ExomeCount.dafr)
# build a reference
# name of sample
# my.test <- ExomeCount$HG002.sorted.bam
my.test <- ExomeCount[,target]
#print(my.test)
# name of parents
my.ref.samples <- sample_names
# my.ref.samples <- c('HG003.sorted.bam', 'HG004.sorted.bam')
my.reference.set <- as.matrix(ExomeCount.dafr[, my.ref.samples])
my.choice <- select.reference.set (test.counts = my.test,
reference.counts = my.reference.set,
bin.length = (ExomeCount.dafr$end - ExomeCount.dafr$start)/1000,
n.bins.reduced = 10000)
print(my.choice[[1]])
my.matrix <- as.matrix( ExomeCount.dafr[, my.choice$reference.choice, drop = FALSE])
my.reference.selected <- apply(X = my.matrix,
MAR = 1,
FUN = sum)
# cnv calling
# likely need to remove X chromosome as may be causing problems
all.exons <- new('ExomeDepth',
test = my.test,
reference = my.reference.selected,
formula = 'cbind(test, reference) ~ 1')
all.exons <- CallCNVs(x = all.exons,
transition.probability = 10^-4,
chromosome = ExomeCount.dafr$chromosome,
start = ExomeCount.dafr$start,
end = ExomeCount.dafr$end,
name = ExomeCount.dafr$names)
head(all.exons@CNV.calls)
head(all.exons@CNV.calls[ order ( all.exons@CNV.calls$BF, decreasing = TRUE),])
output.file <- paste0(output_dir, '/exome_calls_', target, '.csv')
write.csv(file = output.file, x = all.exons@CNV.calls,row.names = FALSE)
## create vcf file
#regions <- cbind(chromosome=all.exons@CNV.calls$chromosome,start=all.exons@CNV.calls$start, end=all.exons@CNV.calls$end)
#regions$chromosome <- gsub("^", "chr", regions$chromosome)
#bed2vcf(
# all.exons@CNV.calls$id,
# filename = 'ExomeDepth.vcf',
# zero.based = FALSE,
# fasta = fasta,
# header = NULL)
/exports/igmm/eddie/IGMM-VariantAnalysis/alison/giab_exomes/analysis/alignments/Preprocessing/HG003/Recalibrated/HG003.recal.bam
/exports/igmm/eddie/IGMM-VariantAnalysis/alison/giab_exomes/analysis/alignments/Preprocessing/HG004/Recalibrated/HG004.recal.bam
/exports/igmm/eddie/IGMM-VariantAnalysis/alison/giab_exomes/analysis/alignments/Preprocessing/HG002/Recalibrated/HG002.recal.bam
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3028_3843/Recalibrated/3028_3843.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3028_3843.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3048_4224/Recalibrated/3048_4224.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3048_4224.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3059_3755/Recalibrated/3059_3755.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3059_3755.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3060_3816/Recalibrated/3060_3816.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3060_3816.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3461_4260/Recalibrated/3461_4260.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3461_4260.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3610_3754/Recalibrated/3610_3754.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3610_3754.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3611_4071/Recalibrated/3611_4071.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3611_4071.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3781_3782/Recalibrated/3781_3782.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3781_3782.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3894_3895/Recalibrated/3894_3895.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3894_3895.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3903_3904/Recalibrated/3903_3904.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3903_3904.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3906_3907/Recalibrated/3906_3907.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3906_3907.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4156_4157/Recalibrated/4156_4157.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4156_4157.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4198_4269/Recalibrated/4198_4269.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4198_4269.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/727_728/Recalibrated/727_728.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/727_728.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4005_R28H9/Recalibrated/4005_R28H9.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4005_R28H9.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4314_R35C9/Recalibrated/4314_R35C9.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4314_R35C9.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4427_R36B2/Recalibrated/4427_R36B2.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4427_R36B2.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4485_R36F5/Recalibrated/4485_R36F5.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4485_R36F5.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4454_R36C5/Recalibrated/4454_R36C5.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4454_R36C5.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/491_R14A1/Recalibrated/491_R14A1.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/491_R14A1.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/421_R19A8/Recalibrated/421_R19A8.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/421_R19A8.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3457_OTF1/Recalibrated/3457_OTF1.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/3457_OTF1.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4445_R36E6/Recalibrated/4445_R36E6.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4445_R36E6.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/121_R17H11/Recalibrated/121_R17H11.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/121_R17H11.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4494_R36G10/Recalibrated/4494_R36G10.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4494_R36G10.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/1149_R9A8/Recalibrated/1149_R9A8.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/1149_R9A8.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/443_R14E12/Recalibrated/443_R14E12.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/443_R14E12.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4365_R35F5/Recalibrated/4365_R35F5.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4365_R35F5.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4699_R38E10/Recalibrated/4699_R38E10.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/4699_R38E10.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3028_3842/Recalibrated/3028_3842.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3028_3842.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3048_4225/Recalibrated/3048_4225.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3048_4225.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3059_3756/Recalibrated/3059_3756.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3059_3756.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3060_3817/Recalibrated/3060_3817.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3060_3817.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3461_4261/Recalibrated/3461_4261.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3461_4261.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3610_3753/Recalibrated/3610_3753.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3610_3753.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3611_4072/Recalibrated/3611_4072.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3611_4072.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3781_3783/Recalibrated/3781_3783.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3781_3783.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3894_3896/Recalibrated/3894_3896.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3894_3896.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3903_3905/Recalibrated/3903_3905.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3903_3905.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3906_3908/Recalibrated/3906_3908.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3906_3908.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4156_4158/Recalibrated/4156_4158.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4156_4158.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4198_4270/Recalibrated/4198_4270.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4198_4270.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/727_729/Recalibrated/727_729.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/727_729.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4005_R28H10/Recalibrated/4005_R28H10.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4005_R28H10.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4314_R34H1/Recalibrated/4314_R34H1.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4314_R34H1.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4427_R36B3/Recalibrated/4427_R36B3.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4427_R36B3.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4485_R36F6/Recalibrated/4485_R36F6.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4485_R36F6.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4454_R36C6/Recalibrated/4454_R36C6.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4454_R36C6.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/491_R14A2/Recalibrated/491_R14A2.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/491_R14A2.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/421_R19A10/Recalibrated/421_R19A10.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/421_R19A10.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/3457_OTF2/Recalibrated/3457_OTF2.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/3457_OTF2.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4445_R36E11/Recalibrated/4445_R36E11.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4445_R36E11.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/121_R17H12/Recalibrated/121_R17H12.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/121_R17H12.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4494_R36G11/Recalibrated/4494_R36G11.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4494_R36G11.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/1149_R9A7/Recalibrated/1149_R9A7.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/1149_R9A7.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/443_R14F1/Recalibrated/443_R14F1.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/443_R14F1.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4365_R35F4/Recalibrated/4365_R35F4.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4365_R35F4.targetcoverage.cnn
/gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/4699_R38E11/Recalibrated/4699_R38E11.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/4699_R38E11.targetcoverage.cnn
This diff is collapsed.
#!/bin/bash
# a bash script to take a ped file, identify unaffected individuals based on the pedfile, create a list of unaffected file and paths to them
rm Recalibrated_target_paths_male.txt
touch Recalibrated_target_paths_male.txt
rm Recalibrated_antitarget_paths_male.txt
touch Recalibrated_antitarget_paths_male.txt
rm Recalibrated_target_paths_female.txt
touch Recalibrated_target_paths_female.txt
rm Recalibrated_antitarget_paths_female.txt
touch Recalibrated_antitarget_paths_female.txt
while read pedfile_line
do
phenotype=$(echo ${pedfile_line} | cut -d " " -f 6)
sex=$(echo ${pedfile_line} | cut -d " " -f 5)
# echo $phenotype
if [[ $phenotype == 1 ]]
then
if [[ $sex == 1 ]]
then
indiv_id=$(echo ${pedfile_line} | cut -d " " -f 1)
family_id=$(echo ${pedfile_line} | cut -d " " -f 2)
echo " /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/${family_id}/Recalibrated/${family_id}.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/${family_id}.targetcoverage.cnn" >> Recalibrated_target_paths_male.txt
fi
if [[ $sex == 2 ]]
then
indiv_id=$(echo ${pedfile_line} | cut -d " " -f 1)
family_id=$(echo ${pedfile_line} | cut -d " " -f 2)
echo " /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/${family_id}/Recalibrated/${family_id}.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/${family_id}.targetcoverage.cnn" >> Recalibrated_target_paths_female.txt
fi
fi
done < exome.ped
while read pedfile_line
do
phenotype=$(echo ${pedfile_line} | cut -d " " -f 6)
sex=$(echo ${pedfile_line} | cut -d " " -f 5)
# echo $phenotype
if [[ $phenotype == 1 ]]
then
if [[ $sex == 1 ]]
then
indiv_id=$(echo ${pedfile_line} | cut -d " " -f 1)
family_id=$(echo ${pedfile_line} | cut -d " " -f 2)
echo " /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/${family_id}/Recalibrated/${family_id}.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/male/${family_id}.antitargetcoverage.cnn" >> Recalibrated_antitarget_paths_male.txt
fi
if [[ $sex == 2 ]]
then
indiv_id=$(echo ${pedfile_line} | cut -d " " -f 1)
family_id=$(echo ${pedfile_line} | cut -d " " -f 2)
echo " /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/alison/cdls_exomes/alignments/Preprocessing/${family_id}/Recalibrated/${family_id}.recal.bam -o /gpfs/igmmfs01/eddie/IGMM-VariantAnalysis/emma/tool_testing/CNVkit_test/reference_coverage/female/${family_id}.antitargetcoverage.cnn" >> Recalibrated_antitarget_paths_female.txt
fi
fi
done < exome.ped
......@@ -92,10 +92,39 @@ process check_ped_file {
"""
}
// process to split ped files into families - send each family to extract_target
// extract_target: take a family and identify the sex. use the sample sheet to get the path to the bam file
// emit a paired object, indicating the sex and the path to the bam file
process run_exomedepth {
label 'large'
input:
path(target_bed)
path(reference_genome)
script:
"""
# take emitted paired object by extract_target, if sex = 1 use male reference if sex = 2 use female reference
# create an input file, adding the path to target to the reference
# take the last element in path to target, use it as the target
Rscript $workflow.projectDir/pipeline/ExomeDepth_assets/ExomeDepth_basic_own_vignette.R -i $workflow.projectDir/pipeline/ExomeDepth_assets/HG003_4_paths.txt -b ${target_bed} -f ${reference_genome} -t HG002.recal.bam -o $workflow.projectDir/pipeline/ExomeDepth_assets
"""
}
// future process to reformat output
// future process to compare proband to parents
// future process for gnomAD
workflow check_inputs {
read_inputs()
check_sample_sheet(read_inputs.out.ch_samplesheet)
check_ped_file(read_inputs.out.ch_ped_file)
run_exomedepth(params.target_bed, params.reference_genome)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment