main.nf

nextflow.enable.dsl = 2

include {read_inputs} from './inputs.nf'
include {validation} from './validation.nf'

params.bcbio = null

process merge_fastqs {
    publishDir "outputs/individuals/$indv_id/merged_fastqs", mode: 'copy'

    input:
    tuple(val(indv_id), val(r1), val(r2))

    output:
    tuple(
        val(indv_id),
        path("${indv_id}_merged_r1.fastq.gz"),
        path("${indv_id}_merged_r2.fastq.gz")
    )

    script:
    // todo: pigz if gzip becomes a bottleneck
    """
    cat ${r1.join(' ')} | gzip -c > ${indv_id}_merged_r1.fastq.gz &
    cat ${r2.join(' ')} | gzip -c > ${indv_id}_merged_r2.fastq.gz
    """
}

process write_bcbio_csv {
    publishDir "outputs/families/$family_id", mode: 'copy'

    input:
    tuple(val(family_id), val(individual_info))

    output:
    path("${family_id}.csv")

    script:
    """
    #!/usr/bin/env python

    individual_info = '$individual_info'
    lines = individual_info.lstrip('[').rstrip(']').split('], [')
    
    with open('${family_id}.csv', 'w') as f:
        f.write('samplename,description,batch,sex,phenotype,variant_regions\\n')
        for l in lines:
            l = l.lstrip('[').rstrip(']').split(', ')
            f.write(','.join(l))
    """
}

process run_bcbio {
    input:
    path(bcbio_config)

    script:
    """
    ${params.bcbio} $bcbio_config -n 16 -t local
    """

}

workflow prepare_bcbio_config {
    /*
    - For each individual, merge all R1 and R2 fastqs
    - For each family:
      - Read the relevant information from the samplesheet, ped files and merged fastqs
      - Write a BCBio config file
      - Run BCBio on it
    */

    take:
        ch_samplesheet_info
        ch_individuals_by_family
    
    main:
        ch_merged_fastqs = merge_fastqs(ch_samplesheet_info)
        ch_merged_data = ch_individuals_by_family.map({ k, v -> v })
        .join(ch_merged_fastqs)
        .map(
            { sample_id, family_id, father, mother, sex, phenotype, r1s, r2s, merged_r1, merged_r2 ->
                [family_id, [sample_id, father, mother, sex, phenotype, merged_r1, merged_r2]]
        }).groupTuple()

        ch_bcbio_config = write_bcbio_csv(ch_merged_data)
        run_bcbio(ch_bcbio_config)
}


workflow {
    read_inputs()
    ch_samplesheet_info = read_inputs.out[0]
    ch_ped_file_info = read_inputs.out[1]
    ch_individuals_by_family = read_inputs.out[2]

    validation(ch_samplesheet_info)
    prepare_bcbio_config(ch_samplesheet_info, ch_individuals_by_family)
}