diff --git a/pipeline/cnv_calling.nf b/pipeline/cnv_calling.nf index 6ffcb0301bb600a03a507d3d90f5397338d04828..c600a97c0023a5e9c9123f53907f168b6bab0549 100644 --- a/pipeline/cnv_calling.nf +++ b/pipeline/cnv_calling.nf @@ -91,34 +91,37 @@ process check_ped_file { cat $ped_file """ } - -// process to split ped files into families - send each family to extract_target -// this is done by inputs - -// extract_target: take a family and identify the sex. use the sample sheet to get the path to the bam file -// emit a paired object, indicating the sex and the path to the bam file process run_exomedepth { label 'medium' + // Take the ped_file line and sample sheet line of a sample, collect relevant information and run exomedepth + // This assumes that the information coming from the input sample channel are in the same order + // so information recieved is for the same sample + + // Want to emit a channel of family IDs for next process to use so can find relevant files + input: path(target_bed) path(reference_genome) val(ped_file_info) val(samplesheet_info) + output: + val \${fam_id} into family_ids + script: """ ind_id=\$(echo ${ped_file_info} |cut -c2- | cut -d',' -f1) - fam_id=\$(echo ${ped_file_info} |cut -c2- | cut -d',' -f2) + fam_id=\$(echo ${ped_file_info} |cut -c2- | cut -d',' -f2 | cut -c2-) sex=\$(echo ${ped_file_info} |cut -c2- | cut -d',' -f5) # echo ${ped_file_info} # echo ${samplesheet_info} bam_path=\$(echo ${samplesheet_info} | cut -d',' -f4 | cut -c3- | rev | cut -c3- | rev) - + # bam_path # take the pedfile info of a sample, if sex = 1 use male reference if sex = 2 use female reference # create an input file, adding the path to target to the reference, using cat [bam path from sample_sheet_info] and reference file @@ -129,16 +132,52 @@ process run_exomedepth { echo \${fam_id} echo \${sex} echo \${bam_path} + + bam_file=\$(echo \${bam_path} | awk -F"/" '{print \$NF}') - # Rscript $workflow.projectDir/pipeline/ExomeDepth_assets/ExomeDepth_basic_own_vignette.R -i $workflow.projectDir/pipeline/ExomeDepth_assets/HG003_4_paths.txt -b ${target_bed} -f ${reference_genome} -t HG002.recal.bam -o $workflow.projectDir/pipeline/ExomeDepth_assets + # if directory for family doesnt exist, create it + mkdir -p "$workflow.projectDir/\${fam_id}" + + # make reference file for sample, store in fam directory, depending on sex, if sex is not known use both, maybe return a warning + + if [ \${sex} == 1 ]; then + + cp $workflow.projectDir/pipeline/ExomeDepth_assets/mini_male_reference.txt $workflow.projectDir/\${fam_id}/\${ind_id}_bam_paths.txt + echo \${bam_path} >> $workflow.projectDir/\${fam_id}/\${ind_id}_bam_paths.txt + + elif [ \${sex} == 2 ]; then + + cp $workflow.projectDir/pipeline/ExomeDepth_assets/mini_female_reference.txt $workflow.projectDir/\${fam_id}/\${ind_id}_bam_paths.txt + echo \${bam_path} >> $workflow.projectDir/\${fam_id}/\${ind_id}_bam_paths.txt + + else + + cp $workflow.projectDir/pipeline/ExomeDepth_assets/mini_male_reference.txt $workflow.projectDir/\${fam_id}/\${ind_id}_bam_paths.txt + cat $workflow.projectDir/pipeline/ExomeDepth_assets/mini_female_reference.txt >> $workflow.projectDir/\${fam_id}/\${ind_id}_bam_paths.txt + echo \${bam_path} >> $workflow.projectDir/\${fam_id}/\${ind_id}_bam_paths.txt + + fi + + # Rscript $workflow.projectDir/pipeline/ExomeDepth_assets/ExomeDepth_basic_own_vignette.R -i $workflow.projectDir/\${fam_id}/\${ind_id}_bam_paths.txt -b ${target_bed} -f ${reference_genome} -t \${bam_file} -o $workflow.projectDir/\${fam_id} """ } -// future process to reformat output - // future process to compare proband to parents +process collect_proband_cnvs { + + input: + val(family_ids) + + script: + + """ + echo $family_ids + """ + +} + // future process for gnomAD workflow check_inputs { @@ -147,4 +186,6 @@ workflow check_inputs { check_sample_sheet(read_inputs.out.ch_samplesheet) check_ped_file(read_inputs.out.ch_ped_file) run_exomedepth(params.target_bed, params.reference_genome, read_inputs.out.ch_ped_file_info, read_inputs.out.ch_samplesheet_info) + collect_proband_cnvs(run_exomedepth.out.ch_family_ids) + }