From cad434e3bd23c6d46710f977a8a546d2cd117f93 Mon Sep 17 00:00:00 2001
From: Paul Bartholomew <paul.bartholomew08@imperial.ac.uk>
Date: Wed, 10 Apr 2019 12:54:41 +0100
Subject: [PATCH] FORMATTING ONLY

Adding a script indent.sh to apply emacs formatting to all sources
this ensures consistent formatting of code.

All code should be formatted in this way - if in doubt rerun the
script.

This commit is only formatting changes - no functional changes
---
 .dir-locals.el                |    5 +-
 decomp2d/alloc.inc            |  532 ++---
 decomp2d/decomp_2d.f90        |  316 +--
 decomp2d/factor.inc           |  138 +-
 decomp2d/fft_common.inc       |  346 ++--
 decomp2d/fft_common_3d.inc    |  354 ++--
 decomp2d/fft_fftw3.f90        |   66 +-
 decomp2d/fft_generic.f90      |   18 +-
 decomp2d/fft_mkl.f90          |  176 +-
 decomp2d/glassman.f90         |   56 +-
 decomp2d/halo.inc             |  164 +-
 decomp2d/halo_common.inc      |  740 +++----
 decomp2d/io.f90               |  172 +-
 decomp2d/io_read_one.inc      |   98 +-
 decomp2d/io_read_var.inc      |  102 +-
 decomp2d/io_write_every.inc   |  418 ++--
 decomp2d/io_write_one.inc     |  116 +-
 decomp2d/io_write_plane.inc   |  208 +-
 decomp2d/io_write_var.inc     |  102 +-
 decomp2d/mem_merge.f90        |  158 +-
 decomp2d/mem_split.f90        |  158 +-
 decomp2d/module_param.f90     |   30 +-
 decomp2d/transpose_x_to_y.inc |  762 +++----
 decomp2d/transpose_y_to_x.inc |  760 +++----
 decomp2d/transpose_y_to_z.inc |  772 +++----
 decomp2d/transpose_z_to_y.inc |  750 +++----
 indent.sh                     |   26 +
 src/BC-Channel-flow.f90       |    6 +-
 src/BC-Jet.f90                |   74 +-
 src/BC-Lock-exchange.f90      |   32 +-
 src/BC-Mixing-layer.f90       |    8 +-
 src/BC-Periodic-hill.f90      |  264 +--
 src/BC-TGV.f90                |    2 +-
 src/BC-dbg-schemes.f90        |   22 +-
 src/case.f90                  |   30 +-
 src/derive.f90                | 3642 ++++++++++++++++-----------------
 src/filters.f90               | 2152 +++++++++----------
 src/genepsi3d.f90             |  884 ++++----
 src/incompact3d.f90           |   36 +-
 src/les_models.f90            |   34 +-
 src/mkl_dfti.f90              |   16 +-
 src/navier.f90                |  282 +--
 src/parameters.f90            |    6 +-
 src/poisson.f90               | 1810 ++++++++--------
 src/post.f90                  |    4 +-
 src/schemes.f90               |   70 +-
 src/statistics.f90            |   12 +-
 src/tools.f90                 | 1146 +++++------
 src/transeq.f90               |   40 +-
 src/variables.f90             |   42 +-
 src/visu.f90                  |   18 +-
 51 files changed, 9101 insertions(+), 9074 deletions(-)
 create mode 100644 indent.sh

diff --git a/.dir-locals.el b/.dir-locals.el
index c0d88d07..41c5840f 100644
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -1,5 +1,5 @@
 ((nil . ((eval . (setq flycheck-fortran-gfortran-executable "mpif90"))
-				 (eval . (setf flycheck-fortran-args "-fcray-pointer -cpp"))
+				 (eval . (setq flycheck-fortran-args "-fcray-pointer -cpp"))
 				 (eval . (setq flycheck-gfortran-include-path
 											 ;; Find this file and use it as the project root directory.
 											 (list (file-name-directory
@@ -7,4 +7,5 @@
 																(if (stringp d)
 																		d
 																	(car d)))))))
-				 (eval . (setq flycheck-gfortran-language-standard "f2003")))))
+				 (eval . (setq flycheck-gfortran-language-standard "f2003"))
+				 (eval . (setq fortran-comment-indent-style 'relative)))))
diff --git a/decomp2d/alloc.inc b/decomp2d/alloc.inc
index 73602d35..c0156175 100644
--- a/decomp2d/alloc.inc
+++ b/decomp2d/alloc.inc
@@ -9,269 +9,269 @@
 !
 !=======================================================================
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! Utility routine to help allocate 3D arrays
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-  ! X-pencil real arrays
-  subroutine alloc_x_real(var, opt_decomp, opt_global)
-
-    implicit none
-
-    real(mytype), allocatable, dimension(:,:,:) :: var
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-    logical, intent(IN), optional :: opt_global
-
-    TYPE(DECOMP_INFO) :: decomp
-    logical :: global
-    integer :: alloc_stat, errorcode
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    if (present(opt_global)) then
-       global = opt_global
-    else
-       global = .false.
-    end if
-
-    if (global) then
-       allocate(var(decomp%xst(1):decomp%xen(1), &
-            decomp%xst(2):decomp%xen(2), decomp%xst(3):decomp%xen(3)), &
-            stat=alloc_stat)
-    else
-       allocate(var(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3)), &
-            stat=alloc_stat)
-    end if
-    
-    if (alloc_stat /= 0) then
-       errorcode = 8
-       call decomp_2d_abort(errorcode, &
-            'Memory allocation failed when creating new arrays')
-    end if
-
-    return
-  end subroutine alloc_x_real
-
-
-  ! X-pencil complex arrays
-  subroutine alloc_x_complex(var, opt_decomp, opt_global)
-
-    implicit none
-
-    complex(mytype), allocatable, dimension(:,:,:) :: var
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-    logical, intent(IN), optional :: opt_global
-
-    TYPE(DECOMP_INFO) :: decomp
-    logical :: global
-    integer :: alloc_stat, errorcode
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    if (present(opt_global)) then
-       global = opt_global
-    else
-       global = .false.
-    end if
-
-    if (global) then
-       allocate(var(decomp%xst(1):decomp%xen(1), &
-            decomp%xst(2):decomp%xen(2), decomp%xst(3):decomp%xen(3)), &
-            stat=alloc_stat)
-    else
-       allocate(var(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3)), &
-            stat=alloc_stat)
-    end if
-    
-    if (alloc_stat /= 0) then
-       errorcode = 8
-       call decomp_2d_abort(errorcode, &
-            'Memory allocation failed when creating new arrays')
-    end if
-
-    return
-  end subroutine alloc_x_complex
-
-
-  ! Y-pencil real arrays
-  subroutine alloc_y_real(var, opt_decomp, opt_global)
-
-    implicit none
-
-    real(mytype), allocatable, dimension(:,:,:) :: var
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-    logical, intent(IN), optional :: opt_global
-
-    TYPE(DECOMP_INFO) :: decomp
-    logical :: global
-    integer :: alloc_stat, errorcode
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    if (present(opt_global)) then
-       global = opt_global
-    else
-       global = .false.
-    end if
-
-    if (global) then
-       allocate(var(decomp%yst(1):decomp%yen(1), &
-            decomp%yst(2):decomp%yen(2), decomp%yst(3):decomp%yen(3)), &
-            stat=alloc_stat)
-    else
-       allocate(var(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)), &
-            stat=alloc_stat)
-    end if
-    
-    if (alloc_stat /= 0) then
-       errorcode = 8
-       call decomp_2d_abort(errorcode, &
-            'Memory allocation failed when creating new arrays')
-    end if
-
-    return
-  end subroutine alloc_y_real
-
-
-  ! Y-pencil complex arrays
-  subroutine alloc_y_complex(var, opt_decomp, opt_global)
-
-    implicit none
-
-    complex(mytype), allocatable, dimension(:,:,:) :: var
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-    logical, intent(IN), optional :: opt_global
-
-    TYPE(DECOMP_INFO) :: decomp
-    logical :: global
-    integer :: alloc_stat, errorcode
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    if (present(opt_global)) then
-       global = opt_global
-    else
-       global = .false.
-    end if
-
-    if (global) then
-       allocate(var(decomp%yst(1):decomp%yen(1), &
-            decomp%yst(2):decomp%yen(2), decomp%yst(3):decomp%yen(3)), &
-            stat=alloc_stat)
-    else
-       allocate(var(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)), &
-            stat=alloc_stat)
-    end if
-    
-    if (alloc_stat /= 0) then
-       errorcode = 8
-       call decomp_2d_abort(errorcode, &
-            'Memory allocation failed when creating new arrays')
-    end if
-
-    return
-  end subroutine alloc_y_complex
-
-
-  ! Z-pencil real arrays
-  subroutine alloc_z_real(var, opt_decomp, opt_global)
-
-    implicit none
-
-    real(mytype), allocatable, dimension(:,:,:) :: var
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-    logical, intent(IN), optional :: opt_global
-
-    TYPE(DECOMP_INFO) :: decomp
-    logical :: global
-    integer :: alloc_stat, errorcode
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    if (present(opt_global)) then
-       global = opt_global
-    else
-       global = .false.
-    end if
-
-    if (global) then
-       allocate(var(decomp%zst(1):decomp%zen(1), &
-            decomp%zst(2):decomp%zen(2), decomp%zst(3):decomp%zen(3)), &
-            stat=alloc_stat)
-    else
-       allocate(var(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3)), &
-            stat=alloc_stat)
-    end if
-    
-    if (alloc_stat /= 0) then
-       errorcode = 8
-       call decomp_2d_abort(errorcode, &
-            'Memory allocation failed when creating new arrays')
-    end if
-
-    return
-  end subroutine alloc_z_real
-
-
-  ! Z-pencil complex arrays
-  subroutine alloc_z_complex(var, opt_decomp, opt_global)
-
-    implicit none
-
-    complex(mytype), allocatable, dimension(:,:,:) :: var
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-    logical, intent(IN), optional :: opt_global
-
-    TYPE(DECOMP_INFO) :: decomp
-    logical :: global
-    integer :: alloc_stat, errorcode
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    if (present(opt_global)) then
-       global = opt_global
-    else
-       global = .false.
-    end if
-
-    if (global) then
-       allocate(var(decomp%zst(1):decomp%zen(1), &
-            decomp%zst(2):decomp%zen(2), decomp%zst(3):decomp%zen(3)), &
-            stat=alloc_stat)
-    else
-       allocate(var(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3)), &
-            stat=alloc_stat)
-    end if
-    
-    if (alloc_stat /= 0) then
-       errorcode = 8
-       call decomp_2d_abort(errorcode, &
-            'Memory allocation failed when creating new arrays')
-    end if
-
-    return
-  end subroutine alloc_z_complex
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! Utility routine to help allocate 3D arrays
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+! X-pencil real arrays
+subroutine alloc_x_real(var, opt_decomp, opt_global)
+
+implicit none
+
+real(mytype), allocatable, dimension(:,:,:) :: var
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+logical, intent(IN), optional :: opt_global
+
+TYPE(DECOMP_INFO) :: decomp
+logical :: global
+integer :: alloc_stat, errorcode
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+if (present(opt_global)) then
+global = opt_global
+else
+global = .false.
+end if
+
+if (global) then
+allocate(var(decomp%xst(1):decomp%xen(1), &
+decomp%xst(2):decomp%xen(2), decomp%xst(3):decomp%xen(3)), &
+stat=alloc_stat)
+else
+allocate(var(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3)), &
+stat=alloc_stat)
+end if
+
+if (alloc_stat /= 0) then
+errorcode = 8
+call decomp_2d_abort(errorcode, &
+'Memory allocation failed when creating new arrays')
+end if
+
+return
+end subroutine alloc_x_real
+
+
+! X-pencil complex arrays
+subroutine alloc_x_complex(var, opt_decomp, opt_global)
+
+implicit none
+
+complex(mytype), allocatable, dimension(:,:,:) :: var
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+logical, intent(IN), optional :: opt_global
+
+TYPE(DECOMP_INFO) :: decomp
+logical :: global
+integer :: alloc_stat, errorcode
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+if (present(opt_global)) then
+global = opt_global
+else
+global = .false.
+end if
+
+if (global) then
+allocate(var(decomp%xst(1):decomp%xen(1), &
+decomp%xst(2):decomp%xen(2), decomp%xst(3):decomp%xen(3)), &
+stat=alloc_stat)
+else
+allocate(var(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3)), &
+stat=alloc_stat)
+end if
+
+if (alloc_stat /= 0) then
+errorcode = 8
+call decomp_2d_abort(errorcode, &
+'Memory allocation failed when creating new arrays')
+end if
+
+return
+end subroutine alloc_x_complex
+
+
+! Y-pencil real arrays
+subroutine alloc_y_real(var, opt_decomp, opt_global)
+
+implicit none
+
+real(mytype), allocatable, dimension(:,:,:) :: var
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+logical, intent(IN), optional :: opt_global
+
+TYPE(DECOMP_INFO) :: decomp
+logical :: global
+integer :: alloc_stat, errorcode
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+if (present(opt_global)) then
+global = opt_global
+else
+global = .false.
+end if
+
+if (global) then
+allocate(var(decomp%yst(1):decomp%yen(1), &
+decomp%yst(2):decomp%yen(2), decomp%yst(3):decomp%yen(3)), &
+stat=alloc_stat)
+else
+allocate(var(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)), &
+stat=alloc_stat)
+end if
+
+if (alloc_stat /= 0) then
+errorcode = 8
+call decomp_2d_abort(errorcode, &
+'Memory allocation failed when creating new arrays')
+end if
+
+return
+end subroutine alloc_y_real
+
+
+! Y-pencil complex arrays
+subroutine alloc_y_complex(var, opt_decomp, opt_global)
+
+implicit none
+
+complex(mytype), allocatable, dimension(:,:,:) :: var
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+logical, intent(IN), optional :: opt_global
+
+TYPE(DECOMP_INFO) :: decomp
+logical :: global
+integer :: alloc_stat, errorcode
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+if (present(opt_global)) then
+global = opt_global
+else
+global = .false.
+end if
+
+if (global) then
+allocate(var(decomp%yst(1):decomp%yen(1), &
+decomp%yst(2):decomp%yen(2), decomp%yst(3):decomp%yen(3)), &
+stat=alloc_stat)
+else
+allocate(var(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)), &
+stat=alloc_stat)
+end if
+
+if (alloc_stat /= 0) then
+errorcode = 8
+call decomp_2d_abort(errorcode, &
+'Memory allocation failed when creating new arrays')
+end if
+
+return
+end subroutine alloc_y_complex
+
+
+! Z-pencil real arrays
+subroutine alloc_z_real(var, opt_decomp, opt_global)
+
+implicit none
+
+real(mytype), allocatable, dimension(:,:,:) :: var
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+logical, intent(IN), optional :: opt_global
+
+TYPE(DECOMP_INFO) :: decomp
+logical :: global
+integer :: alloc_stat, errorcode
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+if (present(opt_global)) then
+global = opt_global
+else
+global = .false.
+end if
+
+if (global) then
+allocate(var(decomp%zst(1):decomp%zen(1), &
+decomp%zst(2):decomp%zen(2), decomp%zst(3):decomp%zen(3)), &
+stat=alloc_stat)
+else
+allocate(var(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3)), &
+stat=alloc_stat)
+end if
+
+if (alloc_stat /= 0) then
+errorcode = 8
+call decomp_2d_abort(errorcode, &
+'Memory allocation failed when creating new arrays')
+end if
+
+return
+end subroutine alloc_z_real
+
+
+! Z-pencil complex arrays
+subroutine alloc_z_complex(var, opt_decomp, opt_global)
+
+implicit none
+
+complex(mytype), allocatable, dimension(:,:,:) :: var
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+logical, intent(IN), optional :: opt_global
+
+TYPE(DECOMP_INFO) :: decomp
+logical :: global
+integer :: alloc_stat, errorcode
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+if (present(opt_global)) then
+global = opt_global
+else
+global = .false.
+end if
+
+if (global) then
+allocate(var(decomp%zst(1):decomp%zen(1), &
+decomp%zst(2):decomp%zen(2), decomp%zst(3):decomp%zen(3)), &
+stat=alloc_stat)
+else
+allocate(var(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3)), &
+stat=alloc_stat)
+end if
+
+if (alloc_stat /= 0) then
+errorcode = 8
+call decomp_2d_abort(errorcode, &
+'Memory allocation failed when creating new arrays')
+end if
+
+return
+end subroutine alloc_z_complex
diff --git a/decomp2d/decomp_2d.f90 b/decomp2d/decomp_2d.f90
index b2caa9d3..6c2c69f1 100644
--- a/decomp2d/decomp_2d.f90
+++ b/decomp2d/decomp_2d.f90
@@ -145,7 +145,7 @@ module decomp_2d
   real(mytype),    allocatable, dimension(:) :: work1_r, work2_r
   complex(mytype), allocatable, dimension(:) :: work1_c, work2_c
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! To define smaller arrays using every several mesh points
   integer, save, dimension(3), public :: xszS,yszS,zszS,xstS,ystS,zstS,xenS,yenS,zenS
   integer, save, dimension(3), public :: xszV,yszV,zszV,xstV,ystV,zstV,xenV,yenV,zenV
@@ -176,7 +176,7 @@ module decomp_2d
        get_decomp_info
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! These are routines to perform global data transpositions
   ! 
   !   Four combinations are available, enough to cover all situations
@@ -192,18 +192,18 @@ module decomp_2d
   !    - an optional argument can be supplied to transpose data whose 
   !      global size is not the default nx*ny*nz 
   !       * as the case in fft r2c/c2r interface 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
   interface transpose_x_to_y
      module procedure transpose_x_to_y_real
      module procedure transpose_x_to_y_complex
   end interface transpose_x_to_y
-  
+
   interface transpose_y_to_z
      module procedure transpose_y_to_z_real
      module procedure transpose_y_to_z_complex
   end interface transpose_y_to_z
-  
+
   interface transpose_z_to_y
      module procedure transpose_z_to_y_real
      module procedure transpose_z_to_y_complex
@@ -229,7 +229,7 @@ module decomp_2d
      module procedure transpose_z_to_y_real_start
      module procedure transpose_z_to_y_complex_start
   end interface transpose_z_to_y_start
-     
+
   interface transpose_y_to_x_start
      module procedure transpose_y_to_x_real_start
      module procedure transpose_y_to_x_complex_start
@@ -249,7 +249,7 @@ module decomp_2d
      module procedure transpose_z_to_y_real_wait
      module procedure transpose_z_to_y_complex_wait
   end interface transpose_z_to_y_wait
-     
+
   interface transpose_y_to_x_wait
      module procedure transpose_y_to_x_real_wait
      module procedure transpose_y_to_x_complex_wait
@@ -279,7 +279,7 @@ module decomp_2d
 contains
 
 #ifdef SHM_DEBUG
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! For debugging, print the shared-memory structure
   subroutine print_smp_info(s)
     TYPE(SMP_INFO) :: s
@@ -295,7 +295,7 @@ contains
   end subroutine print_smp_info
 #endif
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Routine to be called by applications to initialise this library
   !   INPUT:
   !     nx, ny, nz   - global data dimension
@@ -303,16 +303,16 @@ contains
   !   OUTPUT:
   !     all internal data structures initialised properly
   !     library ready to use
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine decomp_2d_init(nx,ny,nz,p_row,p_col,periodic_bc)
 
     implicit none
 
     integer, intent(IN) :: nx,ny,nz,p_row,p_col
     logical, dimension(3), intent(IN), optional :: periodic_bc
-    
+
     integer :: errorcode, ierror, row, col
-    
+
 #ifdef SHM_DEBUG
     character(len=80) fname
 #endif
@@ -344,7 +344,7 @@ contains
           col = p_col
        end if
     end if
-    
+
     ! Create 2D Catersian topology
     ! Note that in order to support periodic B.C. in the halo-cell code,
     ! need to create multiple topology objects: DECOMP_2D_COMM_CART_?,
@@ -368,7 +368,7 @@ contains
          .false., DECOMP_2D_COMM_CART_Z, ierror)
 
     call MPI_CART_COORDS(DECOMP_2D_COMM_CART_X,nrank,2,coord,ierror)
-    
+
     ! derive communicators defining sub-groups for ALLTOALL(V)
     call MPI_CART_SUB(DECOMP_2D_COMM_CART_X,(/.true.,.false./), &
          DECOMP_2D_COMM_COL,ierror)
@@ -377,10 +377,10 @@ contains
 
     ! gather information for halo-cell support code
     call init_neighbour
-    
+
     ! actually generate all 2D decomposition information
     call decomp_info_init(nx,ny,nz,decomp_main)
-    
+
     ! make a copy of the decomposition information associated with the
     ! default global size in these global variables so applications can
     ! use them to create data structures 
@@ -460,27 +460,27 @@ contains
 
     return
   end subroutine decomp_2d_init
-  
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Routine to be called by applications to clean things up
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine decomp_2d_finalize
 
     implicit none
-    
+
     call decomp_info_finalize(decomp_main)
 
     decomp_buf_size = 0
     deallocate(work1_r, work2_r, work1_c, work2_c)
-    
+
     return
   end subroutine decomp_2d_finalize
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Return the default decomposition object
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine get_decomp_info(decomp)
 
     implicit none
@@ -491,9 +491,9 @@ contains
 
     return
   end subroutine get_decomp_info
-    
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Advanced Interface allowing applications to define globle domain of
   ! any size, distribute it, and then transpose data among pencils.
   !  - generate 2D decomposition details as defined in DECOMP_INFO
@@ -501,11 +501,11 @@ contains
   !  - a different global size nx/2+1,ny,nz is used in FFT r2c/c2r
   !  - multiple global sizes can co-exist in one application, each
   !    using its own DECOMP_INFO object
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine decomp_info_init(nx,ny,nz,decomp)
 
     implicit none
-    
+
     integer, intent(IN) :: nx,ny,nz
     TYPE(DECOMP_INFO), intent(INOUT) :: decomp
 
@@ -519,7 +519,7 @@ contains
             'Make sure that min(nx,ny) >= p_row and ' // &
             'min(ny,nz) >= p_col')
     end if
-    
+
     if (mod(nx,dims(1))==0 .and. mod(ny,dims(1))==0 .and. &
          mod(ny,dims(2))==0 .and. mod(nz,dims(2))==0) then
        decomp%even = .true.
@@ -531,7 +531,7 @@ contains
     allocate(decomp%x1dist(0:dims(1)-1),decomp%y1dist(0:dims(1)-1), &
          decomp%y2dist(0:dims(2)-1),decomp%z2dist(0:dims(2)-1))
     call get_dist(nx,ny,nz,decomp)
-    
+
     ! generate partition information - starting/ending index etc.
     call partition(nx, ny, nz, (/ 1,2,3 /), &
          decomp%xst, decomp%xen, decomp%xsz)
@@ -539,7 +539,7 @@ contains
          decomp%yst, decomp%yen, decomp%ysz)
     call partition(nx, ny, nz, (/ 2,3,1 /), &
          decomp%zst, decomp%zen, decomp%zsz)
-    
+
     ! prepare send/receive buffer displacement and count for ALLTOALL(V)
     allocate(decomp%x1cnts(0:dims(1)-1),decomp%y1cnts(0:dims(1)-1), &
          decomp%y2cnts(0:dims(2)-1),decomp%z2cnts(0:dims(2)-1))
@@ -554,7 +554,7 @@ contains
 
     ! allocate memory for the MPI_ALLTOALL(V) buffers
     ! define the buffers globally for performance reason
-    
+
     buf_size = max(decomp%xsz(1)*decomp%xsz(2)*decomp%xsz(3), &
          max(decomp%ysz(1)*decomp%ysz(2)*decomp%ysz(3), &
          decomp%zsz(1)*decomp%zsz(2)*decomp%zsz(3)) )
@@ -587,9 +587,9 @@ contains
   end subroutine decomp_info_init
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Release memory associated with a DECOMP_INFO object
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine decomp_info_finalize(decomp)
 
     implicit none
@@ -613,16 +613,16 @@ contains
   end subroutine decomp_info_finalize
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Coarser mesh support for statistic
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine init_coarser_mesh_statS(i_skip,j_skip,k_skip,from1)
 
     implicit none
-    
+
     integer, intent(IN) :: i_skip,j_skip,k_skip
     logical, intent(IN) :: from1  ! .true.  - save 1,n+1,2n+1...
-                                  ! .false. - save n,2n,3n...
+    ! .false. - save n,2n,3n...
 
     integer, dimension(3) :: skip
     integer :: i
@@ -678,16 +678,16 @@ contains
     return
   end subroutine init_coarser_mesh_statS
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Coarser mesh support for visualization
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine init_coarser_mesh_statV(i_skip,j_skip,k_skip,from1)
 
     implicit none
-    
+
     integer, intent(IN) :: i_skip,j_skip,k_skip
     logical, intent(IN) :: from1  ! .true.  - save 1,n+1,2n+1...
-                                  ! .false. - save n,2n,3n...
+    ! .false. - save n,2n,3n...
 
     integer, dimension(3) :: skip
     integer :: i
@@ -743,16 +743,16 @@ contains
     return
   end subroutine init_coarser_mesh_statV
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Coarser mesh support for probe
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine init_coarser_mesh_statP(i_skip,j_skip,k_skip,from1)
 
     implicit none
-    
+
     integer, intent(IN) :: i_skip,j_skip,k_skip
     logical, intent(IN) :: from1  ! .true.  - save 1,n+1,2n+1...
-                                  ! .false. - save n,2n,3n...
+    ! .false. - save n,2n,3n...
 
     integer, dimension(3) :: skip
     integer :: i
@@ -1064,7 +1064,7 @@ contains
   end subroutine fine_to_coarseP
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Find sub-domain information held by current processor
   !   INPUT: 
   !     nx, ny, nz - global data dimension
@@ -1076,7 +1076,7 @@ contains
   !     lstart(3)  - starting index
   !     lend(3)    - ending index
   !     lsize(3)   - size of the sub-block (redundant) 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine partition(nx, ny, nz, pdim, lstart, lend, lsize)
 
     implicit none
@@ -1089,50 +1089,50 @@ contains
     integer :: i, gsize
 
     do i = 1, 3
- 
-      if (i==1) then
-        gsize = nx
-      else if (i==2) then
-        gsize = ny
-      else if (i==3) then
-        gsize = nz
-      end if
-
-      if (pdim(i) == 1) then        ! all local
-        lstart(i) = 1
-        lend(i)   = gsize
-        lsize(i)  = gsize
-      elseif (pdim(i) == 2) then    ! distribute across dims(1)
-        allocate(st(0:dims(1)-1))
-        allocate(en(0:dims(1)-1))
-        allocate(sz(0:dims(1)-1))
-        call distribute(gsize,dims(1),st,en,sz)
-        lstart(i) = st(coord(1))
-        lend(i)   = en(coord(1))
-        lsize(i)  = sz(coord(1))
-        deallocate(st,en,sz)
-      elseif (pdim(i) == 3) then    ! distribute across dims(2)
-        allocate(st(0:dims(2)-1))
-        allocate(en(0:dims(2)-1))
-        allocate(sz(0:dims(2)-1))
-        call distribute(gsize,dims(2),st,en,sz)
-        lstart(i) = st(coord(2))
-        lend(i)   = en(coord(2))
-        lsize(i)  = sz(coord(2))
-        deallocate(st,en,sz)
-      end if    
+
+       if (i==1) then
+          gsize = nx
+       else if (i==2) then
+          gsize = ny
+       else if (i==3) then
+          gsize = nz
+       end if
+
+       if (pdim(i) == 1) then        ! all local
+          lstart(i) = 1
+          lend(i)   = gsize
+          lsize(i)  = gsize
+       elseif (pdim(i) == 2) then    ! distribute across dims(1)
+          allocate(st(0:dims(1)-1))
+          allocate(en(0:dims(1)-1))
+          allocate(sz(0:dims(1)-1))
+          call distribute(gsize,dims(1),st,en,sz)
+          lstart(i) = st(coord(1))
+          lend(i)   = en(coord(1))
+          lsize(i)  = sz(coord(1))
+          deallocate(st,en,sz)
+       elseif (pdim(i) == 3) then    ! distribute across dims(2)
+          allocate(st(0:dims(2)-1))
+          allocate(en(0:dims(2)-1))
+          allocate(sz(0:dims(2)-1))
+          call distribute(gsize,dims(2),st,en,sz)
+          lstart(i) = st(coord(2))
+          lend(i)   = en(coord(2))
+          lsize(i)  = sz(coord(2))
+          deallocate(st,en,sz)
+       end if
 
     end do
     return   
 
   end subroutine partition
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !   - distibutes grid points in one dimension
   !   - handles uneven distribution properly 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
   subroutine distribute(data1,proc,st,en,sz)
-  
+
     implicit none
     ! data1 -- data size in any dimension to be partitioned
     ! proc  -- number of processors in that dimension
@@ -1141,7 +1141,7 @@ contains
     ! sz    -- array of local size  (redundent)
     integer data1,proc,st(0:proc-1),en(0:proc-1),sz(0:proc-1)
     integer i,size1,nl,nu
-  
+
     size1=data1/proc
     nu = data1 - size1 * proc
     nl = proc - nu
@@ -1149,27 +1149,27 @@ contains
     sz(0) = size1
     en(0) = size1
     do i=1,nl-1
-      st(i) = st(i-1) + size1
-      sz(i) = size1
-      en(i) = en(i-1) + size1
+       st(i) = st(i-1) + size1
+       sz(i) = size1
+       en(i) = en(i-1) + size1
     end do
     size1 = size1 + 1
     do i=nl,proc-1
-      st(i) = en(i-1) + 1
-      sz(i) = size1
-      en(i) = en(i-1) + size1
+       st(i) = en(i-1) + 1
+       sz(i) = size1
+       en(i) = en(i-1) + size1
     end do
     en(proc-1)= data1 
     sz(proc-1)= data1-st(proc-1)+1
-  
+
     return
   end subroutine distribute
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !  Define how each dimension is distributed across processors
   !    e.g. 17 meshes across 4 processor would be distibuted as (4,4,4,5)
   !    such global information is required locally at MPI_ALLTOALLV time
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine get_dist(nx,ny,nz,decomp)
 
     integer, intent(IN) :: nx, ny, nz
@@ -1191,13 +1191,13 @@ contains
     return
   end subroutine get_dist
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Prepare the send / receive buffers for MPI_ALLTOALLV communications
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine prepare_buffer(decomp)
-    
+
     implicit none
-    
+
     TYPE(DECOMP_INFO), intent(INOUT) :: decomp
 
     integer :: i
@@ -1215,7 +1215,7 @@ contains
           decomp%y1disp(i) = decomp%y1disp(i-1) + decomp%y1cnts(i-1)
        end if
     end do
-    
+
     do i=0, dims(2)-1
        decomp%y2cnts(i) = decomp%ysz(1)*decomp%y2dist(i)*decomp%ysz(3)
        decomp%z2cnts(i) = decomp%zsz(1)*decomp%zsz(2)*decomp%z2dist(i)
@@ -1227,7 +1227,7 @@ contains
           decomp%z2disp(i) = decomp%z2disp(i-1) + decomp%z2cnts(i-1)
        end if
     end do
-    
+
     ! MPI_ALLTOALL buffer information
 
     ! For evenly distributed data, following is an easier implementation.
@@ -1247,15 +1247,15 @@ contains
     decomp%y2count = decomp%y2dist(dims(2)-1) * &
          decomp%z2dist(dims(2)-1) * decomp%zsz(1)
     decomp%z2count = decomp%y2count
-    
+
     return
-  end subroutine prepare_buffer  
+  end subroutine prepare_buffer
 
 #ifdef SHM
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !  Generate shared-memory information 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine decomp_info_init_shm(decomp)
 
     implicit none
@@ -1277,22 +1277,22 @@ contains
   end subroutine decomp_info_init_shm
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! For shared-memory implementation, prepare send/recv shared buffer
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine prepare_shared_buffer(C,MPI_COMM,decomp)
 
     implicit none
-    
+
     TYPE(SMP_INFO) :: C
     INTEGER :: MPI_COMM
     TYPE(DECOMP_INFO) :: decomp
-    
+
     INTEGER, ALLOCATABLE :: KTBL(:,:),NARY(:,:),KTBLALL(:,:)
     INTEGER MYSMP, MYCORE, COLOR
-    
+
     integer :: ierror
-    
+
     C%MPI_COMM = MPI_COMM
     CALL MPI_COMM_SIZE(MPI_COMM,C%NCPU,ierror)
     CALL MPI_COMM_RANK(MPI_COMM,C%NODE_ME,ierror)
@@ -1309,7 +1309,7 @@ contains
     C%RCV_P = 0
     C%SND_P_c = 0
     C%RCV_P_c = 0
-    
+
     ! get smp-node map for this communicator and set up smp communicators
     CALL GET_SMP_MAP(C%MPI_COMM, C%NSMP, MYSMP, &
          C%NCORE, MYCORE, C%MAXCORE)
@@ -1331,72 +1331,72 @@ contains
     KTBL=KTBLALL
     !  IF (SUM(KTBL) /= C%NCPU*(C%NCPU+1)/2) &
     !       CALL MPI_ABORT(...
-    
+
     ! compute offsets in shared SNDBUF and RCVBUF
     CALL MAPSET_SMPSHM(C, KTBL, NARY, decomp)
-    
+
     DEALLOCATE(KTBL,NARY)
-    
+
     return
   end subroutine prepare_shared_buffer
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Use Ian Bush's FreeIPC to generate shared-memory information
   !  - system independent solution
   !  - replacing David Tanqueray's implementation in alloc_shm.c
   !    (old C code renamed to get_smp_map2)
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine get_smp_map(comm, nnodes, my_node, ncores, my_core, maxcor)
-    
+
     use FIPC_module
-    
+
     implicit none
-    
+
     integer, intent(IN) :: comm
     integer, intent(OUT) :: nnodes, my_node, ncores, my_core, maxcor
-    
+
     integer :: intra_comm, extra_comm
     integer :: ierror
-    
+
     call FIPC_init(comm, ierror)
-    
+
     ! intra_comm: communicator for processes on this shared memory node
     ! extra_comm: communicator for all rank 0 on each shared memory node
     call FIPC_ctxt_intra_comm(FIPC_ctxt_world, intra_comm, ierror)
     call FIPC_ctxt_extra_comm(FIPC_ctxt_world, extra_comm, ierror)
-    
+
     call MPI_COMM_SIZE(intra_comm,  ncores, ierror)
     call MPI_COMM_RANK(intra_comm, my_core, ierror)
-    
+
     ! only rank 0 on each shared memory node member of extra_comm
     ! for others extra_comm = MPI_COMM_NULL
     if (extra_comm /= MPI_COMM_NULL) then
        call MPI_COMM_SIZE(extra_comm,  nnodes, ierror)
        call MPI_COMM_RANK(extra_comm, my_node, ierror)
     end if
-    
+
     ! other ranks share the same information as their leaders
     call MPI_BCAST( nnodes, 1, MPI_INTEGER, 0, intra_comm, ierror)
     call MPI_BCAST(my_node, 1, MPI_INTEGER, 0, intra_comm, ierror)
-    
+
     ! maxcor
     call MPI_ALLREDUCE(ncores, maxcor, 1, MPI_INTEGER, MPI_MAX, &
          MPI_COMM_WORLD, ierror)
-    
+
     call FIPC_finalize(ierror)
-    
+
     return
-    
+
   end subroutine get_smp_map
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Set up smp-node based shared memory maps
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   SUBROUTINE MAPSET_SMPSHM(C, KTBL, NARY, decomp)
-        
+
     IMPLICIT NONE
-    
+
     TYPE (SMP_INFO) C
     INTEGER KTBL(C%MAXCORE,C%NSMP)
     INTEGER NARY(C%NCPU,C%NCORE)
@@ -1404,9 +1404,9 @@ contains
 
     INTEGER i, j, k, l, N, PTR, BSIZ, ierror, status, seed
     character*16 s
- 
+
     BSIZ = C%N_SND
-    
+
     ! a - SNDBUF
     IF (C%MPI_COMM==DECOMP_2D_COMM_COL) THEN
        ALLOCATE(decomp%x1cnts_s(C%NSMP),decomp%x1disp_s(C%NSMP+1), &
@@ -1431,7 +1431,7 @@ contains
        END DO
        decomp%x1disp_s(C%NSMP+1) = PTR
        IF (PTR > BSIZ) BSIZ = PTR
-       
+
     ELSE IF (C%MPI_COMM==DECOMP_2D_COMM_ROW) THEN
        ALLOCATE(decomp%y2cnts_s(C%NSMP),decomp%y2disp_s(C%NSMP+1), &
             stat=status)
@@ -1456,9 +1456,9 @@ contains
        decomp%y2disp_s(C%NSMP+1) = PTR
        IF (PTR > BSIZ) BSIZ = PTR
     END IF
-    
+
     ! b - RCVBUF
-    
+
     IF (C%MPI_COMM==DECOMP_2D_COMM_COL) THEN
        ALLOCATE(decomp%y1cnts_s(C%NSMP),decomp%y1disp_s(C%NSMP+1), &
             stat=status)
@@ -1482,7 +1482,7 @@ contains
        END DO
        decomp%y1disp_s(C%NSMP+1) = PTR
        IF (PTR > BSIZ) BSIZ = PTR
-       
+
     ELSE IF (C%MPI_COMM==DECOMP_2D_COMM_ROW) THEN
        ALLOCATE(decomp%z2cnts_s(C%NSMP),decomp%z2disp_s(C%NSMP+1), &
             stat=status)
@@ -1506,9 +1506,9 @@ contains
        END DO
        decomp%z2disp_s(C%NSMP+1) = PTR
        IF (PTR > BSIZ) BSIZ = PTR
-       
+
     END IF
-    
+
     ! check buffer size and (re)-allocate buffer space if necessary
     IF (BSIZ > C%N_SND) then
        IF (C%SND_P /= 0) CALL DEALLOC_SHM(C%SND_P, C%CORE_COMM)
@@ -1541,7 +1541,7 @@ contains
 
 
     END IF
-    
+
     RETURN
   END SUBROUTINE MAPSET_SMPSHM
 
@@ -1563,18 +1563,18 @@ contains
 #endif
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Transposition routines 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 #include "transpose_x_to_y.inc"
 #include "transpose_y_to_z.inc"
 #include "transpose_z_to_y.inc"
 #include "transpose_y_to_x.inc"
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Auto-tuning algorithm to select the best 2D processor grid
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine best_2d_grid(iproc, best_p_row, best_p_col)
 
     implicit none
@@ -1595,7 +1595,7 @@ contains
     best_time = huge(t1)
     best_p_row = -1
     best_p_col = -1
-    
+
     i = int(sqrt(real(iproc))) + 10  ! enough space to save all factors 
     allocate(factors(i))
     call findfactor(iproc, factors, nfact)
@@ -1618,13 +1618,13 @@ contains
           call MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periodic, &
                .false.,DECOMP_2D_COMM_CART_X, ierror)
           call MPI_CART_COORDS(DECOMP_2D_COMM_CART_X,nrank,2,coord,ierror)
-          
+
           ! communicators defining sub-groups for ALLTOALL(V)
           call MPI_CART_SUB(DECOMP_2D_COMM_CART_X,(/.true.,.false./), &
                DECOMP_2D_COMM_COL,ierror)
           call MPI_CART_SUB(DECOMP_2D_COMM_CART_X,(/.false.,.true./), &
                DECOMP_2D_COMM_ROW,ierror)
-          
+
           ! generate 2D decomposition information for this row*col
           call decomp_info_init(nx_global,ny_global,nz_global,decomp)
 
@@ -1645,7 +1645,7 @@ contains
           call decomp_info_finalize(decomp)
 
           call MPI_ALLREDUCE(t2,t1,1,MPI_DOUBLE_PRECISION,MPI_SUM, &
-                   MPI_COMM_WORLD,ierror)
+               MPI_COMM_WORLD,ierror)
           t1 = t1 / dble(nproc)
 
           if (nrank==0) then
@@ -1659,7 +1659,7 @@ contains
           end if
 
        end if
-       
+
     end do ! loop through processer grid
 
     deallocate(factors)
@@ -1681,15 +1681,15 @@ contains
 
 #include "factor.inc"
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Halo cell support
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 #include "halo.inc"
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Error handling
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine decomp_2d_abort(errorcode, msg)
 
     implicit none
@@ -1698,7 +1698,7 @@ contains
     character(len=*), intent(IN) :: msg
 
     integer :: ierror
-    
+
     if (nrank==0) then
        write(*,*) '2DECOMP&FFT ERROR - errorcode: ', errorcode
        write(*,*) 'ERROR MESSAGE: ' // msg
@@ -1709,11 +1709,11 @@ contains
   end subroutine decomp_2d_abort
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Utility routines to help allocate 3D arrays
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 #include "alloc.inc"
-    
-  
+
+
 end module decomp_2d
 
diff --git a/decomp2d/factor.inc b/decomp2d/factor.inc
index 3e14c305..1ea2988c 100644
--- a/decomp2d/factor.inc
+++ b/decomp2d/factor.inc
@@ -11,72 +11,72 @@
 
 ! A few utility routines to find factors of integer numbers
 
-  subroutine findfactor(num, factors, nfact)
-    
-    implicit none
-
-    integer, intent(IN) :: num
-    integer, intent(OUT), dimension(*) :: factors
-    integer, intent(OUT) :: nfact
-    integer :: i, m
-
-    ! find the factors <= sqrt(num)
-    m = int(sqrt(real(num)))
-    nfact = 1
-    do i=1,m
-       if (num/i*i == num) then
-          factors(nfact) = i
-          nfact = nfact + 1
-       end if
-    end do
-    nfact = nfact - 1
-
-    ! derive those > sqrt(num)
-    if (factors(nfact)**2/=num) then
-       do i=nfact+1, 2*nfact
-          factors(i) = num / factors(2*nfact-i+1)
-       end do
-       nfact = nfact * 2
-    else
-       do i=nfact+1, 2*nfact-1
-          factors(i) = num / factors(2*nfact-i)
-       end do
-       nfact = nfact * 2 - 1
-    endif
-       
-    return
-
-  end subroutine findfactor
-
-
-  subroutine primefactors(num, factors, nfact)
-
-    implicit none
-  
-    integer, intent(IN) :: num
-    integer, intent(OUT), dimension(*) :: factors
-    integer, intent(INOUT) :: nfact
-
-    integer :: i, n
-    
-    i = 2  
-    nfact = 1
-    n = num 
-    do
-       if (mod(n,i) == 0) then
-          factors(nfact) = i
-          nfact = nfact + 1
-          n = n / i
-       else
-          i = i + 1
-       end if
-       if (n == 1) then
-          nfact = nfact - 1
-          exit
-       end if
-    end do
-    
-    return
-
-  end subroutine primefactors
-  
+subroutine findfactor(num, factors, nfact)
+
+implicit none
+
+integer, intent(IN) :: num
+integer, intent(OUT), dimension(*) :: factors
+integer, intent(OUT) :: nfact
+integer :: i, m
+
+! find the factors <= sqrt(num)
+m = int(sqrt(real(num)))
+nfact = 1
+do i=1,m
+if (num/i*i == num) then
+factors(nfact) = i
+nfact = nfact + 1
+end if
+end do
+nfact = nfact - 1
+
+! derive those > sqrt(num)
+if (factors(nfact)**2/=num) then
+do i=nfact+1, 2*nfact
+factors(i) = num / factors(2*nfact-i+1)
+end do
+nfact = nfact * 2
+else
+do i=nfact+1, 2*nfact-1
+factors(i) = num / factors(2*nfact-i)
+end do
+nfact = nfact * 2 - 1
+endif
+
+return
+
+end subroutine findfactor
+
+
+subroutine primefactors(num, factors, nfact)
+
+implicit none
+
+integer, intent(IN) :: num
+integer, intent(OUT), dimension(*) :: factors
+integer, intent(INOUT) :: nfact
+
+integer :: i, n
+
+i = 2  
+nfact = 1
+n = num 
+do
+if (mod(n,i) == 0) then
+factors(nfact) = i
+nfact = nfact + 1
+n = n / i
+else
+i = i + 1
+end if
+if (n == 1) then
+nfact = nfact - 1
+exit
+end if
+end do
+
+return
+
+end subroutine primefactors
+
diff --git a/decomp2d/fft_common.inc b/decomp2d/fft_common.inc
index eb21b437..2e7b45c3 100644
--- a/decomp2d/fft_common.inc
+++ b/decomp2d/fft_common.inc
@@ -11,177 +11,177 @@
 
 ! This file contains common code shared by all FFT engines
 
-  integer, parameter, public :: DECOMP_2D_FFT_FORWARD = -1
-  integer, parameter, public :: DECOMP_2D_FFT_BACKWARD = 1
-  
-  ! Physical space data can be stored in either X-pencil or Z-pencil
-  integer, parameter, public :: PHYSICAL_IN_X = 1
-  integer, parameter, public :: PHYSICAL_IN_Z = 3 
-
-  integer, save :: format                 ! input X-pencil or Z-pencil
-  
-  ! The libary can only be initialised once
-  logical, save :: initialised = .false. 
-
-  ! Global size of the FFT
-  integer, save :: nx_fft, ny_fft, nz_fft
-
-  ! 2D processor grid
-  integer, save, dimension(2) :: dims
-
-  ! Decomposition objects
-  TYPE(DECOMP_INFO), save :: ph  ! physical space
-  TYPE(DECOMP_INFO), save :: sp  ! spectral space
-
-  ! Workspace to store the intermediate Y-pencil data
-  ! *** TODO: investigate how to use only one workspace array
-  complex(mytype), allocatable, dimension(:,:,:) :: wk2_c2c, wk2_r2c
-  complex(mytype), allocatable, dimension(:,:,:) :: wk13
-
-  public :: decomp_2d_fft_init, decomp_2d_fft_3d, &
-       decomp_2d_fft_finalize, decomp_2d_fft_get_size
-  
-  ! Declare generic interfaces to handle different inputs
-  
-  interface decomp_2d_fft_init
-     module procedure fft_init_noarg
-     module procedure fft_init_arg
-     module procedure fft_init_general
-  end interface
-  
-  interface decomp_2d_fft_3d
-     module procedure fft_3d_c2c
-     module procedure fft_3d_r2c
-     module procedure fft_3d_c2r
-  end interface
-
-  
+integer, parameter, public :: DECOMP_2D_FFT_FORWARD = -1
+integer, parameter, public :: DECOMP_2D_FFT_BACKWARD = 1
+
+! Physical space data can be stored in either X-pencil or Z-pencil
+integer, parameter, public :: PHYSICAL_IN_X = 1
+integer, parameter, public :: PHYSICAL_IN_Z = 3 
+
+integer, save :: format                 ! input X-pencil or Z-pencil
+
+! The libary can only be initialised once
+logical, save :: initialised = .false. 
+
+! Global size of the FFT
+integer, save :: nx_fft, ny_fft, nz_fft
+
+! 2D processor grid
+integer, save, dimension(2) :: dims
+
+! Decomposition objects
+TYPE(DECOMP_INFO), save :: ph  ! physical space
+TYPE(DECOMP_INFO), save :: sp  ! spectral space
+
+! Workspace to store the intermediate Y-pencil data
+! *** TODO: investigate how to use only one workspace array
+complex(mytype), allocatable, dimension(:,:,:) :: wk2_c2c, wk2_r2c
+complex(mytype), allocatable, dimension(:,:,:) :: wk13
+
+public :: decomp_2d_fft_init, decomp_2d_fft_3d, &
+decomp_2d_fft_finalize, decomp_2d_fft_get_size
+
+! Declare generic interfaces to handle different inputs
+
+interface decomp_2d_fft_init
+module procedure fft_init_noarg
+module procedure fft_init_arg
+module procedure fft_init_general
+end interface
+
+interface decomp_2d_fft_3d
+module procedure fft_3d_c2c
+module procedure fft_3d_r2c
+module procedure fft_3d_c2r
+end interface
+
+
 contains
-  
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! Initialise the FFT module
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  subroutine fft_init_noarg
-    
-    implicit none
-    
-    call fft_init_arg(PHYSICAL_IN_X)  ! default input is X-pencil data
-    
-    return
-  end subroutine fft_init_noarg
-
-  subroutine fft_init_arg(pencil)     ! allow to handle Z-pencil input
-
-    implicit none
-
-    integer, intent(IN) :: pencil
-
-    call fft_init_general(pencil, nx_global, ny_global, nz_global)
-
-    return
-  end subroutine fft_init_arg
-
-  ! Initialise the FFT library to perform arbitrary size transforms
-  subroutine fft_init_general(pencil, nx, ny, nz)
-
-    implicit none
-
-    integer, intent(IN) :: pencil
-    integer, intent(IN) :: nx, ny, nz
-
-    logical, dimension(2) :: dummy_periods
-    integer, dimension(2) :: dummy_coords
-    integer :: status, errorcode, ierror
-
-    if (initialised) then
-       errorcode = 4
-       call decomp_2d_abort(errorcode, &
-            'FFT library should only be initialised once')
-    end if
-    
-    format = pencil
-    nx_fft = nx
-    ny_fft = ny
-    nz_fft = nz
-
-    ! determine the processor grid in use
-    call MPI_CART_GET(DECOMP_2D_COMM_CART_X, 2, &
-         dims, dummy_periods, dummy_coords, ierror)
-
-    ! for c2r/r2c interface:
-    ! if in physical space, a real array is of size: nx*ny*nz
-    ! in spectral space, the complex array is of size:
-    !         (nx/2+1)*ny*nz, if PHYSICAL_IN_X
-    !      or nx*ny*(nz/2+1), if PHYSICAL_IN_Z
-
-    call decomp_info_init(nx, ny, nz, ph)
-    if (format==PHYSICAL_IN_X) then
-       call decomp_info_init(nx/2+1, ny, nz, sp)
-    else if (format==PHYSICAL_IN_Z) then
-       call decomp_info_init(nx, ny, nz/2+1, sp)
-    end if
-
-    allocate(wk2_c2c(ph%ysz(1),ph%ysz(2),ph%ysz(3)), STAT=status)
-    allocate(wk2_r2c(sp%ysz(1),sp%ysz(2),sp%ysz(3)), STAT=status)
-    if (format==PHYSICAL_IN_X) then
-       allocate(wk13(sp%xsz(1),sp%xsz(2),sp%xsz(3)), STAT=status)
-    else if (format==PHYSICAL_IN_Z) then
-       allocate(wk13(sp%zsz(1),sp%zsz(2),sp%zsz(3)), STAT=status)
-    end if
-    if (status /= 0) then
-       errorcode = 3
-       call decomp_2d_abort(errorcode, &
-            'Out of memory when initialising FFT')
-    end if
-
-    call init_fft_engine
-    
-    initialised = .true.
-    
-    return
-  end subroutine fft_init_general
-
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! Final clean up
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  subroutine decomp_2d_fft_finalize
-    
-    implicit none
-
-    call decomp_info_finalize(ph)
-    call decomp_info_finalize(sp)
-
-    deallocate(wk2_c2c, wk2_r2c, wk13)
-
-    call finalize_fft_engine
-
-    initialised = .false.
-
-    return
-  end subroutine decomp_2d_fft_finalize
-
-
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! Return the size, starting/ending index of the distributed array 
-  !  whose global size is (nx/2+1)*ny*nz, for defining data structures
-  !  in r2c and c2r interfaces
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  subroutine decomp_2d_fft_get_size(istart, iend, isize)
-    
-    implicit none
-    integer, dimension(3), intent(OUT) :: istart, iend, isize
-    
-    if (format==PHYSICAL_IN_X) then
-       istart = sp%zst
-       iend   = sp%zen
-       isize  = sp%zsz
-    else if (format==PHYSICAL_IN_Z) then
-       istart = sp%xst
-       iend   = sp%xen
-       isize  = sp%xsz
-    end if
-    
-    return
-  end subroutine decomp_2d_fft_get_size
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! Initialise the FFT module
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+subroutine fft_init_noarg
+
+implicit none
+
+call fft_init_arg(PHYSICAL_IN_X)  ! default input is X-pencil data
+
+return
+end subroutine fft_init_noarg
+
+subroutine fft_init_arg(pencil)     ! allow to handle Z-pencil input
+
+implicit none
+
+integer, intent(IN) :: pencil
+
+call fft_init_general(pencil, nx_global, ny_global, nz_global)
+
+return
+end subroutine fft_init_arg
+
+! Initialise the FFT library to perform arbitrary size transforms
+subroutine fft_init_general(pencil, nx, ny, nz)
+
+implicit none
+
+integer, intent(IN) :: pencil
+integer, intent(IN) :: nx, ny, nz
+
+logical, dimension(2) :: dummy_periods
+integer, dimension(2) :: dummy_coords
+integer :: status, errorcode, ierror
+
+if (initialised) then
+errorcode = 4
+call decomp_2d_abort(errorcode, &
+'FFT library should only be initialised once')
+end if
+
+format = pencil
+nx_fft = nx
+ny_fft = ny
+nz_fft = nz
+
+! determine the processor grid in use
+call MPI_CART_GET(DECOMP_2D_COMM_CART_X, 2, &
+dims, dummy_periods, dummy_coords, ierror)
+
+! for c2r/r2c interface:
+! if in physical space, a real array is of size: nx*ny*nz
+! in spectral space, the complex array is of size:
+!         (nx/2+1)*ny*nz, if PHYSICAL_IN_X
+!      or nx*ny*(nz/2+1), if PHYSICAL_IN_Z
+
+call decomp_info_init(nx, ny, nz, ph)
+if (format==PHYSICAL_IN_X) then
+call decomp_info_init(nx/2+1, ny, nz, sp)
+else if (format==PHYSICAL_IN_Z) then
+call decomp_info_init(nx, ny, nz/2+1, sp)
+end if
+
+allocate(wk2_c2c(ph%ysz(1),ph%ysz(2),ph%ysz(3)), STAT=status)
+allocate(wk2_r2c(sp%ysz(1),sp%ysz(2),sp%ysz(3)), STAT=status)
+if (format==PHYSICAL_IN_X) then
+allocate(wk13(sp%xsz(1),sp%xsz(2),sp%xsz(3)), STAT=status)
+else if (format==PHYSICAL_IN_Z) then
+allocate(wk13(sp%zsz(1),sp%zsz(2),sp%zsz(3)), STAT=status)
+end if
+if (status /= 0) then
+errorcode = 3
+call decomp_2d_abort(errorcode, &
+'Out of memory when initialising FFT')
+end if
+
+call init_fft_engine
+
+initialised = .true.
+
+return
+end subroutine fft_init_general
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! Final clean up
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+subroutine decomp_2d_fft_finalize
+
+implicit none
+
+call decomp_info_finalize(ph)
+call decomp_info_finalize(sp)
+
+deallocate(wk2_c2c, wk2_r2c, wk13)
+
+call finalize_fft_engine
+
+initialised = .false.
+
+return
+end subroutine decomp_2d_fft_finalize
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! Return the size, starting/ending index of the distributed array 
+!  whose global size is (nx/2+1)*ny*nz, for defining data structures
+!  in r2c and c2r interfaces
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+subroutine decomp_2d_fft_get_size(istart, iend, isize)
+
+implicit none
+integer, dimension(3), intent(OUT) :: istart, iend, isize
+
+if (format==PHYSICAL_IN_X) then
+istart = sp%zst
+iend   = sp%zen
+isize  = sp%zsz
+else if (format==PHYSICAL_IN_Z) then
+istart = sp%xst
+iend   = sp%xen
+isize  = sp%xsz
+end if
+
+return
+end subroutine decomp_2d_fft_get_size
diff --git a/decomp2d/fft_common_3d.inc b/decomp2d/fft_common_3d.inc
index fcda5d8f..8c9696e1 100644
--- a/decomp2d/fft_common_3d.inc
+++ b/decomp2d/fft_common_3d.inc
@@ -12,242 +12,242 @@
 ! This file contains 3D c2c/r2c/c2r transform subroutines which are
 ! identical for several FFT engines 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! 3D FFT - complex to complex
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  subroutine fft_3d_c2c(in, out, isign)
-    
-    implicit none
-    
-    complex(mytype), dimension(:,:,:), intent(INOUT) :: in
-    complex(mytype), dimension(:,:,:), intent(OUT) :: out
-    integer, intent(IN) :: isign
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! 3D FFT - complex to complex
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+subroutine fft_3d_c2c(in, out, isign)
+
+implicit none
+
+complex(mytype), dimension(:,:,:), intent(INOUT) :: in
+complex(mytype), dimension(:,:,:), intent(OUT) :: out
+integer, intent(IN) :: isign
 
 #ifndef OVERWRITE
-    complex(mytype), allocatable, dimension(:,:,:) :: wk1
+complex(mytype), allocatable, dimension(:,:,:) :: wk1
 #endif
 
-    if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_FORWARD .OR.  &
-         format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_BACKWARD) then
-       
-       ! ===== 1D FFTs in X =====
+if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_FORWARD .OR.  &
+format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_BACKWARD) then
+
+! ===== 1D FFTs in X =====
 #ifdef OVERWRITE
-       call c2c_1m_x(in,isign,ph)
+call c2c_1m_x(in,isign,ph)
 #else
-       allocate (wk1(ph%xsz(1),ph%xsz(2),ph%xsz(3)))
-       wk1 = in
-       call c2c_1m_x(wk1,isign,ph)
+allocate (wk1(ph%xsz(1),ph%xsz(2),ph%xsz(3)))
+wk1 = in
+call c2c_1m_x(wk1,isign,ph)
 #endif
 
-       ! ===== Swap X --> Y; 1D FFTs in Y =====
+! ===== Swap X --> Y; 1D FFTs in Y =====
 
-       if (dims(1)>1) then
+if (dims(1)>1) then
 #ifdef OVERWRITE
-          call transpose_x_to_y(in,wk2_c2c,ph)
+call transpose_x_to_y(in,wk2_c2c,ph)
 #else
-          call transpose_x_to_y(wk1,wk2_c2c,ph)
+call transpose_x_to_y(wk1,wk2_c2c,ph)
 #endif
-          call c2c_1m_y(wk2_c2c,isign,ph)
-       else
+call c2c_1m_y(wk2_c2c,isign,ph)
+else
 #ifdef OVERWRITE
-          call c2c_1m_y(in,isign,ph)
+call c2c_1m_y(in,isign,ph)
 #else
-          call c2c_1m_y(wk1,isign,ph)
+call c2c_1m_y(wk1,isign,ph)
 #endif
-       end if
+end if
 
-       ! ===== Swap Y --> Z; 1D FFTs in Z =====
-       if (dims(1)>1) then
-          call transpose_y_to_z(wk2_c2c,out,ph)
-       else
+! ===== Swap Y --> Z; 1D FFTs in Z =====
+if (dims(1)>1) then
+call transpose_y_to_z(wk2_c2c,out,ph)
+else
 #ifdef OVERWRITE
-          call transpose_y_to_z(in,out,ph)
+call transpose_y_to_z(in,out,ph)
 #else
-          call transpose_y_to_z(wk1,out,ph)
+call transpose_y_to_z(wk1,out,ph)
 #endif
-       end if
-       call c2c_1m_z(out,isign,ph)
+end if
+call c2c_1m_z(out,isign,ph)
 
-    else if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_BACKWARD &
-         .OR. & 
-         format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_FORWARD) then
+else if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_BACKWARD &
+.OR. & 
+format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_FORWARD) then
 
-       ! ===== 1D FFTs in Z =====
+! ===== 1D FFTs in Z =====
 #ifdef OVERWRITE
-       call c2c_1m_z(in,isign,ph)
+call c2c_1m_z(in,isign,ph)
 #else
-       allocate (wk1(ph%zsz(1),ph%zsz(2),ph%zsz(3)))
-       wk1 = in
-       call c2c_1m_z(wk1,isign,ph)
+allocate (wk1(ph%zsz(1),ph%zsz(2),ph%zsz(3)))
+wk1 = in
+call c2c_1m_z(wk1,isign,ph)
 #endif
 
-       ! ===== Swap Z --> Y; 1D FFTs in Y =====
-       if (dims(1)>1) then
+! ===== Swap Z --> Y; 1D FFTs in Y =====
+if (dims(1)>1) then
 #ifdef OVERWRITE
-          call transpose_z_to_y(in,wk2_c2c,ph)
+call transpose_z_to_y(in,wk2_c2c,ph)
 #else
-          call transpose_z_to_y(wk1,wk2_c2c,ph)
+call transpose_z_to_y(wk1,wk2_c2c,ph)
 #endif
-          call c2c_1m_y(wk2_c2c,isign,ph)
-       else  ! out==wk2_c2c if 1D decomposition
+call c2c_1m_y(wk2_c2c,isign,ph)
+else  ! out==wk2_c2c if 1D decomposition
 #ifdef OVERWRITE
-          call transpose_z_to_y(in,out,ph)
+call transpose_z_to_y(in,out,ph)
 #else
-          call transpose_z_to_y(wk1,out,ph)
+call transpose_z_to_y(wk1,out,ph)
 #endif
-          call c2c_1m_y(out,isign,ph)
-       end if
-
-       ! ===== Swap Y --> X; 1D FFTs in X =====
-       if (dims(1)>1) then
-          call transpose_y_to_x(wk2_c2c,out,ph)
-       end if
-       call c2c_1m_x(out,isign,ph)
-       
-    end if
-
-    return
-  end subroutine fft_3d_c2c
-
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! 3D forward FFT - real to complex
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  subroutine fft_3d_r2c(in_r, out_c)
-    
-    implicit none
-    
-    real(mytype), dimension(:,:,:), intent(IN) :: in_r
-    complex(mytype), dimension(:,:,:), intent(OUT) :: out_c
-
-    if (format==PHYSICAL_IN_X) then
-
-       ! ===== 1D FFTs in X =====
-       call r2c_1m_x(in_r,wk13)
-
-       ! ===== Swap X --> Y; 1D FFTs in Y =====
-       if (dims(1)>1) then
-          call transpose_x_to_y(wk13,wk2_r2c,sp)
-          call c2c_1m_y(wk2_r2c,-1,sp)
-       else
-          call c2c_1m_y(wk13,-1,sp)
-       end if
-
-       ! ===== Swap Y --> Z; 1D FFTs in Z =====
-       if (dims(1)>1) then
-          call transpose_y_to_z(wk2_r2c,out_c,sp)
-       else
-          call transpose_y_to_z(wk13,out_c,sp)
-       end if
-       call c2c_1m_z(out_c,-1,sp)
-                
-    else if (format==PHYSICAL_IN_Z) then
-
-       ! ===== 1D FFTs in Z =====
-       call r2c_1m_z(in_r,wk13)
-
-       ! ===== Swap Z --> Y; 1D FFTs in Y =====
-       if (dims(1)>1) then
-          call transpose_z_to_y(wk13,wk2_r2c,sp)
-          call c2c_1m_y(wk2_r2c,-1,sp)
-       else  ! out_c==wk2_r2c if 1D decomposition
-          call transpose_z_to_y(wk13,out_c,sp)
-          call c2c_1m_y(out_c,-1,sp)
-       end if
-
-       ! ===== Swap Y --> X; 1D FFTs in X =====
-       if (dims(1)>1) then
-          call transpose_y_to_x(wk2_r2c,out_c,sp)
-       end if
-       call c2c_1m_x(out_c,-1,sp)
-
-    end if
-    
-    return
-  end subroutine fft_3d_r2c
-  
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! 3D inverse FFT - complex to real
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  subroutine fft_3d_c2r(in_c, out_r)
-    
-    implicit none
-    
-    complex(mytype), dimension(:,:,:), intent(INOUT) :: in_c
-    real(mytype), dimension(:,:,:), intent(OUT) :: out_r
+call c2c_1m_y(out,isign,ph)
+end if
+
+! ===== Swap Y --> X; 1D FFTs in X =====
+if (dims(1)>1) then
+call transpose_y_to_x(wk2_c2c,out,ph)
+end if
+call c2c_1m_x(out,isign,ph)
+
+end if
+
+return
+end subroutine fft_3d_c2c
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! 3D forward FFT - real to complex
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+subroutine fft_3d_r2c(in_r, out_c)
+
+implicit none
+
+real(mytype), dimension(:,:,:), intent(IN) :: in_r
+complex(mytype), dimension(:,:,:), intent(OUT) :: out_c
+
+if (format==PHYSICAL_IN_X) then
+
+! ===== 1D FFTs in X =====
+call r2c_1m_x(in_r,wk13)
+
+! ===== Swap X --> Y; 1D FFTs in Y =====
+if (dims(1)>1) then
+call transpose_x_to_y(wk13,wk2_r2c,sp)
+call c2c_1m_y(wk2_r2c,-1,sp)
+else
+call c2c_1m_y(wk13,-1,sp)
+end if
+
+! ===== Swap Y --> Z; 1D FFTs in Z =====
+if (dims(1)>1) then
+call transpose_y_to_z(wk2_r2c,out_c,sp)
+else
+call transpose_y_to_z(wk13,out_c,sp)
+end if
+call c2c_1m_z(out_c,-1,sp)
+
+else if (format==PHYSICAL_IN_Z) then
+
+! ===== 1D FFTs in Z =====
+call r2c_1m_z(in_r,wk13)
+
+! ===== Swap Z --> Y; 1D FFTs in Y =====
+if (dims(1)>1) then
+call transpose_z_to_y(wk13,wk2_r2c,sp)
+call c2c_1m_y(wk2_r2c,-1,sp)
+else  ! out_c==wk2_r2c if 1D decomposition
+call transpose_z_to_y(wk13,out_c,sp)
+call c2c_1m_y(out_c,-1,sp)
+end if
+
+! ===== Swap Y --> X; 1D FFTs in X =====
+if (dims(1)>1) then
+call transpose_y_to_x(wk2_r2c,out_c,sp)
+end if
+call c2c_1m_x(out_c,-1,sp)
+
+end if
+
+return
+end subroutine fft_3d_r2c
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! 3D inverse FFT - complex to real
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+subroutine fft_3d_c2r(in_c, out_r)
+
+implicit none
+
+complex(mytype), dimension(:,:,:), intent(INOUT) :: in_c
+real(mytype), dimension(:,:,:), intent(OUT) :: out_r
 
 #ifndef OVERWRITE
-    complex(mytype), allocatable, dimension(:,:,:) :: wk1
+complex(mytype), allocatable, dimension(:,:,:) :: wk1
 #endif
 
-    if (format==PHYSICAL_IN_X) then
+if (format==PHYSICAL_IN_X) then
 
-       ! ===== 1D FFTs in Z =====
+! ===== 1D FFTs in Z =====
 #ifdef OVERWRITE
-       call c2c_1m_z(in_c,1,sp)       
+call c2c_1m_z(in_c,1,sp)       
 #else
-       allocate(wk1(sp%zsz(1),sp%zsz(2),sp%zsz(3)))
-       wk1 = in_c
-       call c2c_1m_z(wk1,1,sp)
+allocate(wk1(sp%zsz(1),sp%zsz(2),sp%zsz(3)))
+wk1 = in_c
+call c2c_1m_z(wk1,1,sp)
 #endif
 
-       ! ===== Swap Z --> Y; 1D FFTs in Y =====
+! ===== Swap Z --> Y; 1D FFTs in Y =====
 #ifdef OVERWRITE
-       call transpose_z_to_y(in_c,wk2_r2c,sp)
+call transpose_z_to_y(in_c,wk2_r2c,sp)
 #else
-       call transpose_z_to_y(wk1,wk2_r2c,sp)
+call transpose_z_to_y(wk1,wk2_r2c,sp)
 #endif
-       call c2c_1m_y(wk2_r2c,1,sp)
+call c2c_1m_y(wk2_r2c,1,sp)
 
-       ! ===== Swap Y --> X; 1D FFTs in X =====
-       if (dims(1)>1) then
-          call transpose_y_to_x(wk2_r2c,wk13,sp)
-          call c2r_1m_x(wk13,out_r)
-       else
-          call c2r_1m_x(wk2_r2c,out_r)
-       end if
+! ===== Swap Y --> X; 1D FFTs in X =====
+if (dims(1)>1) then
+call transpose_y_to_x(wk2_r2c,wk13,sp)
+call c2r_1m_x(wk13,out_r)
+else
+call c2r_1m_x(wk2_r2c,out_r)
+end if
 
-    else if (format==PHYSICAL_IN_Z) then
+else if (format==PHYSICAL_IN_Z) then
 
-       ! ===== 1D FFTs in X =====
+! ===== 1D FFTs in X =====
 #ifdef OVERWRITE
-       call c2c_1m_x(in_c,1,sp)
+call c2c_1m_x(in_c,1,sp)
 #else
-       allocate(wk1(sp%xsz(1),sp%xsz(2),sp%xsz(3)))
-       wk1 = in_c
-       call c2c_1m_x(wk1,1,sp)
+allocate(wk1(sp%xsz(1),sp%xsz(2),sp%xsz(3)))
+wk1 = in_c
+call c2c_1m_x(wk1,1,sp)
 #endif
 
-       ! ===== Swap X --> Y; 1D FFTs in Y =====
-       if (dims(1)>1) then
+! ===== Swap X --> Y; 1D FFTs in Y =====
+if (dims(1)>1) then
 #ifdef OVERWRITE
-          call transpose_x_to_y(in_c,wk2_r2c,sp)
+call transpose_x_to_y(in_c,wk2_r2c,sp)
 #else
-          call transpose_x_to_y(wk1,wk2_r2c,sp)
+call transpose_x_to_y(wk1,wk2_r2c,sp)
 #endif
-          call c2c_1m_y(wk2_r2c,1,sp)
-       else  ! in_c==wk2_r2c if 1D decomposition
+call c2c_1m_y(wk2_r2c,1,sp)
+else  ! in_c==wk2_r2c if 1D decomposition
 #ifdef OVERWRITE
-          call c2c_1m_y(in_c,1,sp)
+call c2c_1m_y(in_c,1,sp)
 #else
-          call c2c_1m_y(wk1,1,sp)
+call c2c_1m_y(wk1,1,sp)
 #endif
-       end if
+end if
 
-       ! ===== Swap Y --> Z; 1D FFTs in Z =====
-       if (dims(1)>1) then
-          call transpose_y_to_z(wk2_r2c,wk13,sp)
-       else
+! ===== Swap Y --> Z; 1D FFTs in Z =====
+if (dims(1)>1) then
+call transpose_y_to_z(wk2_r2c,wk13,sp)
+else
 #ifdef OVERWRITE
-          call transpose_y_to_z(in_c,wk13,sp)
+call transpose_y_to_z(in_c,wk13,sp)
 #else
-          call transpose_y_to_z(wk1,wk13,sp)
+call transpose_y_to_z(wk1,wk13,sp)
 #endif
-       end if
-       call c2r_1m_z(wk13,out_r)
+end if
+call c2r_1m_z(wk13,out_r)
 
-    end if
+end if
 
-    return
-  end subroutine fft_3d_c2r
+return
+end subroutine fft_3d_c2r
diff --git a/decomp2d/fft_fftw3.f90 b/decomp2d/fft_fftw3.f90
index 741c7423..09edb0d9 100644
--- a/decomp2d/fft_fftw3.f90
+++ b/decomp2d/fft_fftw3.f90
@@ -12,13 +12,13 @@
 ! This is the FFTW (version 3.x) implementation of the FFT library
 
 module decomp_2d_fft
-  
+
   use decomp_2d  ! 2D decomposition module
-  
+
   implicit none
 
   include "fftw3.f"
-  
+
   private        ! Make everything private unless declared public
 
   ! engine-specific global variables
@@ -256,9 +256,9 @@ module decomp_2d_fft
   end subroutine c2r_1m_z_plan
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !  This routine performs one-time initialisations for the FFT engine
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine init_fft_engine
 
     implicit none
@@ -278,7 +278,7 @@ module decomp_2d_fft
        call c2c_1m_z_plan(plan( 1,3), ph, FFTW_BACKWARD)
        call c2c_1m_y_plan(plan( 1,2), ph, FFTW_BACKWARD)
        call c2c_1m_x_plan(plan( 1,1), ph, FFTW_BACKWARD)
-       
+
        ! For R2C/C2R tranforms
        call r2c_1m_x_plan(plan(0,1), ph, sp)
        call c2c_1m_y_plan(plan(0,2), sp, FFTW_FORWARD )
@@ -296,7 +296,7 @@ module decomp_2d_fft
        call c2c_1m_x_plan(plan( 1,1), ph, FFTW_BACKWARD)
        call c2c_1m_y_plan(plan( 1,2), ph, FFTW_BACKWARD)
        call c2c_1m_z_plan(plan( 1,3), ph, FFTW_BACKWARD)
-       
+
        ! For R2C/C2R tranforms
        call r2c_1m_z_plan(plan(0,3), ph, sp)
        call c2c_1m_y_plan(plan(0,2), sp, FFTW_FORWARD )
@@ -304,22 +304,22 @@ module decomp_2d_fft
        call c2c_1m_x_plan(plan(2,1), sp, FFTW_BACKWARD)
        call c2c_1m_y_plan(plan(2,2), sp, FFTW_BACKWARD)
        call c2r_1m_z_plan(plan(2,3), sp, ph)
-       
+
     end if
 
     return
   end subroutine init_fft_engine
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !  This routine performs one-time finalisations for the FFT engine
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine finalize_fft_engine
 
     implicit none
 
     integer :: i,j
-    
+
     do j=1,3
        do i=-1,2
 #ifdef DOUBLE_PREC
@@ -390,9 +390,9 @@ module decomp_2d_fft
     integer*8, intent(IN) :: plan1
 
 #ifdef DOUBLE_PREC
-       call dfftw_execute_dft(plan1, inout, inout)
+    call dfftw_execute_dft(plan1, inout, inout)
 #else
-       call sfftw_execute_dft(plan1, inout, inout)
+    call sfftw_execute_dft(plan1, inout, inout)
 #endif
 
     return
@@ -472,13 +472,13 @@ module decomp_2d_fft
 
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! 3D FFT - complex to complex
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine fft_3d_c2c(in, out, isign)
-    
+
     implicit none
-    
+
     complex(mytype), dimension(:,:,:), intent(INOUT) :: in
     complex(mytype), dimension(:,:,:), intent(OUT) :: out
     integer, intent(IN) :: isign
@@ -489,7 +489,7 @@ module decomp_2d_fft
 
     if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_FORWARD .OR.  &
          format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_BACKWARD) then
-       
+
        ! ===== 1D FFTs in X =====
 #ifdef OVERWRITE
        call c2c_1m_x(in,isign,plan(isign,1))
@@ -563,7 +563,7 @@ module decomp_2d_fft
           call transpose_y_to_x(wk2_c2c,out,ph)
        end if
        call c2c_1m_x(out,isign,plan(isign,1))
-       
+
     end if
 
 #ifndef OVERWRITE
@@ -573,14 +573,14 @@ module decomp_2d_fft
     return
   end subroutine fft_3d_c2c
 
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! 3D forward FFT - real to complex
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine fft_3d_r2c(in_r, out_c)
-    
+
     implicit none
-    
+
     real(mytype), dimension(:,:,:), intent(IN) :: in_r
     complex(mytype), dimension(:,:,:), intent(OUT) :: out_c
 
@@ -604,7 +604,7 @@ module decomp_2d_fft
           call transpose_y_to_z(wk13,out_c,sp)
        end if
        call c2c_1m_z(out_c,-1,plan(0,3))
-                
+
     else if (format==PHYSICAL_IN_Z) then
 
        ! ===== 1D FFTs in Z =====
@@ -626,18 +626,18 @@ module decomp_2d_fft
        call c2c_1m_x(out_c,-1,plan(0,1))
 
     end if
-    
+
     return
   end subroutine fft_3d_r2c
-  
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! 3D inverse FFT - complex to real
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine fft_3d_c2r(in_c, out_r)
-    
+
     implicit none
-    
+
     complex(mytype), dimension(:,:,:), intent(INOUT) :: in_c
     real(mytype), dimension(:,:,:), intent(OUT) :: out_r
 
@@ -720,5 +720,5 @@ module decomp_2d_fft
     return
   end subroutine fft_3d_c2r
 
-  
+
 end module decomp_2d_fft
diff --git a/decomp2d/fft_generic.f90 b/decomp2d/fft_generic.f90
index e9b05b62..4fb5c41b 100644
--- a/decomp2d/fft_generic.f90
+++ b/decomp2d/fft_generic.f90
@@ -12,12 +12,12 @@
 ! This is the 'generic' implementation of the FFT library
 
 module decomp_2d_fft
-  
+
   use decomp_2d  ! 2D decomposition module
   use glassman
-  
+
   implicit none
-  
+
   private        ! Make everything private unless declared public
 
   ! engine-specific global variables
@@ -27,9 +27,9 @@ module decomp_2d_fft
   ! generic interface definitions and several subroutines
 #include "fft_common.inc"
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !  This routine performs one-time initialisations for the FFT engine
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine init_fft_engine
 
     implicit none
@@ -51,9 +51,9 @@ module decomp_2d_fft
   end subroutine init_fft_engine
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !  This routine performs one-time finalisations for the FFT engine
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine finalize_fft_engine
 
     implicit none
@@ -77,7 +77,7 @@ module decomp_2d_fft
     TYPE(DECOMP_INFO), intent(IN) :: decomp
 
     integer :: i,j,k
-    
+
     do k=1,decomp%xsz(3)
        do j=1,decomp%xsz(2)
           do i=1,decomp%xsz(1)
@@ -299,5 +299,5 @@ module decomp_2d_fft
 
 #include "fft_common_3d.inc"
 
-  
+
 end module decomp_2d_fft
diff --git a/decomp2d/fft_mkl.f90 b/decomp2d/fft_mkl.f90
index 4cbaf14b..5a957741 100644
--- a/decomp2d/fft_mkl.f90
+++ b/decomp2d/fft_mkl.f90
@@ -12,16 +12,16 @@
 ! This is the Intel MKL implementation of the FFT library
 
 module decomp_2d_fft
-  
+
   use decomp_2d  ! 2D decomposition module
   use MKL_DFTI   ! MKL FFT module
-  
+
   implicit none
-  
+
   private        ! Make everything private unless declared public
 
   ! engine-specific global variables
-  
+
   ! Descriptors for MKL FFT, one for each set of 1D FFTs
   !  for c2c transforms
   type(DFTI_DESCRIPTOR), pointer :: c2c_x, c2c_y, c2c_z     
@@ -34,9 +34,9 @@ module decomp_2d_fft
   ! generic interface definitions and several subroutines
 #include "fft_common.inc"
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !  This routine performs one-time initialisations for the FFT engine
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine init_fft_engine
 
     implicit none
@@ -59,7 +59,7 @@ module decomp_2d_fft
        call c2c_1m_z_plan(c2c_z2, sp)
        call r2c_1m_x_plan(c2r_x, ph, sp,  1)
 
-    ! For R2C/C2R tranfroms with physical space in Z-pencil
+       ! For R2C/C2R tranfroms with physical space in Z-pencil
     else if (format == PHYSICAL_IN_Z) then
        call r2c_1m_z_plan(r2c_z, ph, sp, -1)
        call c2c_1m_y_plan(c2c_y2, sp)
@@ -70,7 +70,7 @@ module decomp_2d_fft
     return
   end subroutine init_fft_engine
 
-  
+
   ! Return an MKL plan for multiple 1D c2c FFTs in X direction
   subroutine c2c_1m_x_plan(desc, decomp)
 
@@ -199,7 +199,7 @@ module decomp_2d_fft
             decomp_ph%xsz(1))
     end if
     status = DftiCommitDescriptor(desc)
-    
+
     return
   end subroutine r2c_1m_x_plan
 
@@ -215,7 +215,7 @@ module decomp_2d_fft
     integer :: status, strides(2)
 
     ! c2r and r2c plans are almost the same, just swap input/output
-    
+
 #ifdef DOUBLE_PREC
     status = DftiCreateDescriptor(desc, DFTI_DOUBLE, &
          DFTI_REAL, 1, decomp_ph%zsz(3))
@@ -244,20 +244,20 @@ module decomp_2d_fft
        status = DftiSetValue(desc, DFTI_INPUT_STRIDES, strides)
     end if
     status = DftiCommitDescriptor(desc)
-    
+
     return
   end subroutine r2c_1m_z_plan
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !  This routine performs one-time finalisations for the FFT engine
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine finalize_fft_engine
 
     implicit none
 
     integer :: status
-    
+
     status = DftiFreeDescriptor(c2c_x)
     status = DftiFreeDescriptor(c2c_y)
     status = DftiFreeDescriptor(c2c_z)
@@ -276,13 +276,13 @@ module decomp_2d_fft
   end subroutine finalize_fft_engine
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! 3D FFT - complex to complex
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine fft_3d_c2c(in, out, isign)
-    
+
     implicit none
-    
+
     complex(mytype), dimension(:,:,:), intent(IN) :: in
     complex(mytype), dimension(:,:,:), intent(OUT) :: out
     integer, intent(IN) :: isign   
@@ -292,28 +292,28 @@ module decomp_2d_fft
 
     if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_FORWARD .OR.  &
          format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_BACKWARD) then
-       
+
        ! ===== 1D FFTs in X =====
        allocate (wk1(ph%xsz(1),ph%xsz(2),ph%xsz(3)))
-!       if (isign==DECOMP_2D_FFT_FORWARD) then
-!          status = DftiComputeForward(c2c_x, in(:,1,1), wk1(:,1,1))
-!       else if (isign==DECOMP_2D_FFT_BACKWARD) then
-!          status = DftiComputeBackward(c2c_x, in(:,1,1), wk1(:,1,1))
-!       end if
+       !       if (isign==DECOMP_2D_FFT_FORWARD) then
+       !          status = DftiComputeForward(c2c_x, in(:,1,1), wk1(:,1,1))
+       !       else if (isign==DECOMP_2D_FFT_BACKWARD) then
+       !          status = DftiComputeBackward(c2c_x, in(:,1,1), wk1(:,1,1))
+       !       end if
        status = wrapper_c2c(c2c_x, in, wk1, isign)
 
        ! ===== Swap X --> Y =====
        allocate (wk2(ph%ysz(1),ph%ysz(2),ph%ysz(3)))
        call transpose_x_to_y(wk1,wk2,ph)
-       
+
        ! ===== 1D FFTs in Y =====
        allocate (wk2b(ph%ysz(1),ph%ysz(2),ph%ysz(3))) 
        do k=1,ph%xsz(3) ! one Z-plane at a time
-!          if (isign==DECOMP_2D_FFT_FORWARD) then
-!             status = DftiComputeForward(c2c_y, wk2(:,1,k), wk2b(:,1,k))
-!          else if (isign==DECOMP_2D_FFT_BACKWARD) then
-!             status = DftiComputeBackward(c2c_y, wk2(:,1,k), wk2b(:,1,k))
-!          end if
+          !          if (isign==DECOMP_2D_FFT_FORWARD) then
+          !             status = DftiComputeForward(c2c_y, wk2(:,1,k), wk2b(:,1,k))
+          !          else if (isign==DECOMP_2D_FFT_BACKWARD) then
+          !             status = DftiComputeBackward(c2c_y, wk2(:,1,k), wk2b(:,1,k))
+          !          end if
           status = wrapper_c2c(c2c_y, wk2(1,1,k), wk2b(1,1,k), isign)
        end do
 
@@ -322,11 +322,11 @@ module decomp_2d_fft
        call transpose_y_to_z(wk2b,wk3,ph)
 
        ! ===== 1D FFTs in Z =====
-!       if (isign==DECOMP_2D_FFT_FORWARD) then
-!          status = DftiComputeForward(c2c_z, wk3(:,1,1), out(:,1,1))
-!       else if (isign==DECOMP_2D_FFT_BACKWARD) then
-!          status = DftiComputeBackward(c2c_z, wk3(:,1,1), out(:,1,1))
-!       end if
+       !       if (isign==DECOMP_2D_FFT_FORWARD) then
+       !          status = DftiComputeForward(c2c_z, wk3(:,1,1), out(:,1,1))
+       !       else if (isign==DECOMP_2D_FFT_BACKWARD) then
+       !          status = DftiComputeBackward(c2c_z, wk3(:,1,1), out(:,1,1))
+       !       end if
        status = wrapper_c2c(c2c_z, wk3, out, isign)
 
     else if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_BACKWARD &
@@ -335,56 +335,56 @@ module decomp_2d_fft
 
        ! ===== 1D FFTs in Z =====
        allocate (wk1(ph%zsz(1),ph%zsz(2),ph%zsz(3)))
-!       if (isign==DECOMP_2D_FFT_FORWARD) then
-!          status = DftiComputeForward(c2c_z, in(:,1,1), wk1(:,1,1))
-!       else if (isign==DECOMP_2D_FFT_BACKWARD) then
-!          status = DftiComputeBackward(c2c_z, in(:,1,1), wk1(:,1,1))
-!       end if
+       !       if (isign==DECOMP_2D_FFT_FORWARD) then
+       !          status = DftiComputeForward(c2c_z, in(:,1,1), wk1(:,1,1))
+       !       else if (isign==DECOMP_2D_FFT_BACKWARD) then
+       !          status = DftiComputeBackward(c2c_z, in(:,1,1), wk1(:,1,1))
+       !       end if
        status = wrapper_c2c(c2c_z, in, wk1, isign)
-       
+
        ! ===== Swap Z --> Y =====
        allocate (wk2(ph%ysz(1),ph%ysz(2),ph%ysz(3)))
        call transpose_z_to_y(wk1,wk2,ph)
-       
+
        ! ===== 1D FFTs in Y =====
        allocate (wk2b(ph%ysz(1),ph%ysz(2),ph%ysz(3)))
        do k=1,ph%xsz(3) ! one Z-plane at a time
-!          if (isign==DECOMP_2D_FFT_FORWARD) then
-!             status = DftiComputeForward(c2c_y, wk2(:,1,k), wk2b(:,1,k))
-!          else if (isign==DECOMP_2D_FFT_BACKWARD) then
-!             status = DftiComputeBackward(c2c_y, wk2(:,1,k), wk2b(:,1,k))
-!          end if
+          !          if (isign==DECOMP_2D_FFT_FORWARD) then
+          !             status = DftiComputeForward(c2c_y, wk2(:,1,k), wk2b(:,1,k))
+          !          else if (isign==DECOMP_2D_FFT_BACKWARD) then
+          !             status = DftiComputeBackward(c2c_y, wk2(:,1,k), wk2b(:,1,k))
+          !          end if
           status = wrapper_c2c(c2c_y, wk2(1,1,k), wk2b(1,1,k), isign)
        end do
-       
+
        ! ===== Swap Y --> X =====
        allocate (wk3(ph%xsz(1),ph%xsz(2),ph%xsz(3)))
        call transpose_y_to_x(wk2b,wk3,ph)
-       
+
        ! ===== 1D FFTs in X =====
-!       if (isign==DECOMP_2D_FFT_FORWARD) then
-!          status = DftiComputeForward(c2c_x, wk3(:,1,1), out(:,1,1))
-!       else if (isign==DECOMP_2D_FFT_BACKWARD) then
-!          status = DftiComputeBackward(c2c_x, wk3(:,1,1), out(:,1,1))
-!       end if
+       !       if (isign==DECOMP_2D_FFT_FORWARD) then
+       !          status = DftiComputeForward(c2c_x, wk3(:,1,1), out(:,1,1))
+       !       else if (isign==DECOMP_2D_FFT_BACKWARD) then
+       !          status = DftiComputeBackward(c2c_x, wk3(:,1,1), out(:,1,1))
+       !       end if
        status = wrapper_c2c(c2c_x, wk3, out, isign)
-       
+
     end if
 
     return
   end subroutine fft_3d_c2c
-  
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! 3D forward FFT - real to complex
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine fft_3d_r2c(in_r, out_c)
-    
+
     implicit none
-    
+
     real(mytype), dimension(:,:,:), intent(IN) :: in_r
     complex(mytype), dimension(:,:,:), intent(OUT) :: out_c
-    
+
     complex(mytype), allocatable, dimension(:,:,:) :: wk1,wk2,wk2b,wk3
     integer :: k, status, isign
 
@@ -394,7 +394,7 @@ module decomp_2d_fft
 
        ! ===== 1D FFTs in X =====
        allocate(wk1(sp%xsz(1),sp%xsz(2),sp%xsz(3)))
-!       status = DftiComputeForward(r2c_x, in_r(:,1,1), wk1(:,1,1))       
+       !       status = DftiComputeForward(r2c_x, in_r(:,1,1), wk1(:,1,1))       
        status = wrapper_r2c(r2c_x, in_r, wk1)
 
        ! ===== Swap X --> Y =====
@@ -404,7 +404,7 @@ module decomp_2d_fft
        ! ===== 1D FFTs in Y =====
        allocate (wk2b(sp%ysz(1),sp%ysz(2),sp%ysz(3)))
        do k=1,sp%ysz(3)
-!          status = DftiComputeForward(c2c_y2, wk2(:,1,k), wk2b(:,1,k))
+          !          status = DftiComputeForward(c2c_y2, wk2(:,1,k), wk2b(:,1,k))
           status = wrapper_c2c(c2c_y2, wk2(1,1,k), wk2b(1,1,k), isign)
        end do
 
@@ -413,14 +413,14 @@ module decomp_2d_fft
        call transpose_y_to_z(wk2b,wk3,sp)
 
        ! ===== 1D FFTs in Z =====
-!       status = DftiComputeForward(c2c_z2, wk3(:,1,1), out_c(:,1,1))
+       !       status = DftiComputeForward(c2c_z2, wk3(:,1,1), out_c(:,1,1))
        status = wrapper_c2c(c2c_z2, wk3, out_c, isign)
-                
+
     else if (format==PHYSICAL_IN_Z) then
 
        ! ===== 1D FFTs in Z =====
        allocate(wk1(sp%zsz(1),sp%zsz(2),sp%zsz(3)))
-!       status = DftiComputeForward(r2c_z, in_r(:,1,1), wk1(:,1,1))
+       !       status = DftiComputeForward(r2c_z, in_r(:,1,1), wk1(:,1,1))
        status = wrapper_r2c(r2c_z, in_r, wk1)
 
        ! ===== Swap Z --> Y =====
@@ -430,7 +430,7 @@ module decomp_2d_fft
        ! ===== 1D FFTs in Y =====
        allocate (wk2b(sp%ysz(1),sp%ysz(2),sp%ysz(3)))
        do k=1,sp%ysz(3)
-!          status = DftiComputeForward(c2c_y2, wk2(:,1,k), wk2b(:,1,k))
+          !          status = DftiComputeForward(c2c_y2, wk2(:,1,k), wk2b(:,1,k))
           status = wrapper_c2c(c2c_y2, wk2(1,1,k), wk2b(1,1,k), isign)
        end do
 
@@ -439,25 +439,25 @@ module decomp_2d_fft
        call transpose_y_to_x(wk2b,wk3,sp)
 
        ! ===== 1D FFTs in X =====
-!       status = DftiComputeForward(c2c_x2, wk3(:,1,1), out_c(:,1,1))
+       !       status = DftiComputeForward(c2c_x2, wk3(:,1,1), out_c(:,1,1))
        status = wrapper_c2c(c2c_x2, wk3, out_c, isign)
 
     end if
-    
+
     return
   end subroutine fft_3d_r2c
-  
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! 3D inverse FFT - complex to real
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine fft_3d_c2r(in_c, out_r)
-    
+
     implicit none
-    
+
     complex(mytype), dimension(:,:,:), intent(IN) :: in_c
     real(mytype), dimension(:,:,:), intent(OUT) :: out_r
-    
+
     complex(mytype), allocatable, dimension(:,:,:) :: wk1,wk2,wk2b,wk3
     integer :: k, status, isign
 
@@ -467,7 +467,7 @@ module decomp_2d_fft
 
        ! ===== 1D FFTs in Z =====
        allocate (wk1(sp%zsz(1),sp%zsz(2),sp%zsz(3)))
-!       status = DftiComputeBackward(c2c_z2, in_c(:,1,1), wk1(:,1,1))
+       !       status = DftiComputeBackward(c2c_z2, in_c(:,1,1), wk1(:,1,1))
        status = wrapper_c2c(c2c_z2, in_c, wk1, isign)
 
        ! ===== Swap Z --> Y =====
@@ -477,7 +477,7 @@ module decomp_2d_fft
        ! ===== 1D FFTs in Y =====
        allocate (wk2b(sp%ysz(1),sp%ysz(2),sp%ysz(3)))
        do k=1,sp%ysz(3)
-!          status = DftiComputeBackward(c2c_y2, wk2(:,1,k), wk2b(:,1,k))
+          !          status = DftiComputeBackward(c2c_y2, wk2(:,1,k), wk2b(:,1,k))
           status = wrapper_c2c(c2c_y2, wk2(1,1,k), wk2b(1,1,k), isign)
        end do
 
@@ -486,14 +486,14 @@ module decomp_2d_fft
        call transpose_y_to_x(wk2b,wk3,sp)
 
        ! ===== 1D FFTs in X =====
-!       status = DftiComputeBackward(c2r_x, wk3(:,1,1), out_r(:,1,1))
+       !       status = DftiComputeBackward(c2r_x, wk3(:,1,1), out_r(:,1,1))
        status =  wrapper_c2r(c2r_x, wk3, out_r)
 
     else if (format==PHYSICAL_IN_Z) then
 
        ! ===== 1D FFTs in X =====
        allocate(wk1(sp%xsz(1),sp%xsz(2),sp%xsz(3)))
-!       status = DftiComputeBackward(c2c_x2, in_c(:,1,1), wk1(:,1,1))
+       !       status = DftiComputeBackward(c2c_x2, in_c(:,1,1), wk1(:,1,1))
        status = wrapper_c2c(c2c_x2, in_c, wk1, isign)
 
        ! ===== Swap X --> Y =====
@@ -503,7 +503,7 @@ module decomp_2d_fft
        ! ===== 1D FFTs in Y =====
        allocate (wk2b(sp%ysz(1),sp%ysz(2),sp%ysz(3)))
        do k=1,sp%ysz(3)
-!          status = DftiComputeBackward(c2c_y2, wk2(:,1,k), wk2b(:,1,k))
+          !          status = DftiComputeBackward(c2c_y2, wk2(:,1,k), wk2b(:,1,k))
           status = wrapper_c2c(c2c_y2, wk2(1,1,k), wk2b(1,1,k), isign)
        end do
 
@@ -512,7 +512,7 @@ module decomp_2d_fft
        call transpose_y_to_z(wk2b,wk3,sp)
 
        ! ===== 1D FFTs in Z =====
-!       status = DftiComputeBackward(c2r_z, wk3(:,1,1), out_r(:,1,1))
+       !       status = DftiComputeBackward(c2r_z, wk3(:,1,1), out_r(:,1,1))
        status =  wrapper_c2r(c2r_z, wk3, out_r)
 
     end if
@@ -521,7 +521,7 @@ module decomp_2d_fft
   end subroutine fft_3d_c2r
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Wrapper functions so that one can pass 3D arrays to DftiCompute
   !  -- MKL accepts only 1D arrays as input/output for its multi-
   !     dimensional FFTs.
@@ -533,7 +533,7 @@ module decomp_2d_fft
   !     rather than referring to the same memory address, i.e. 3D array
   !     A and 1D array A(:,1,1) may refer to different memory location.
   !  -- Using the following wrappers is safe and standard conforming.
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
   integer function wrapper_c2c(desc, in, out, isign)
 
@@ -555,9 +555,9 @@ module decomp_2d_fft
   end function wrapper_c2c
 
   integer function wrapper_r2c(desc, in, out)
-    
+
     implicit none
-    
+
     type(DFTI_DESCRIPTOR), pointer :: desc
     real(mytype), dimension(*) :: in 
     complex(mytype), dimension(*) :: out
@@ -579,5 +579,5 @@ module decomp_2d_fft
 
     return
   end function wrapper_c2r
-  
+
 end module decomp_2d_fft
diff --git a/decomp2d/glassman.f90 b/decomp2d/glassman.f90
index ed205656..05545e85 100644
--- a/decomp2d/glassman.f90
+++ b/decomp2d/glassman.f90
@@ -20,7 +20,7 @@ module glassman
 
 contains
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Following is a FFT implementation based on algorithm proposed by
   ! Glassman, a general FFT algorithm supporting arbitrary input length.
   !
@@ -33,21 +33,21 @@ contains
   ! Updated  
   !  -  to handle double-precision as well
   !  -  unnecessary scaling code removed
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
   SUBROUTINE SPCFFT(U,N,ISIGN,WORK)
-    
+
     IMPLICIT NONE
-    
+
     LOGICAL :: INU
     INTEGER :: A,B,C,N,I,ISIGN
     COMPLEX(mytype) :: U(*),WORK(*)
-    
+
     A = 1
     B = N
     C = 1
     INU = .TRUE.
-    
+
     DO WHILE ( B .GT. 1 )
        A = C * A
        C = 2
@@ -62,36 +62,36 @@ contains
        END IF
        INU = ( .NOT. INU )
     END DO
-    
+
     IF ( .NOT. INU ) THEN
        DO I = 1, N
           U(I) = WORK(I)
        END DO
     END IF
-    
+
     RETURN
   END SUBROUTINE SPCFFT
-  
-  
+
+
   SUBROUTINE SPCPFT( A, B, C, UIN, UOUT, ISIGN )
-    
+
     IMPLICIT NONE
-    
+
     INTEGER :: ISIGN,A,B,C,IA,IB,IC,JCR,JC
-    
+
     DOUBLE PRECISION :: ANGLE
-    
+
     COMPLEX(mytype) :: UIN(B,C,A),UOUT(B,A,C),DELTA,OMEGA,SUM
-    
+
     ANGLE = 6.28318530717958_mytype / REAL( A * C, kind=mytype )
     OMEGA = CMPLX( 1.0, 0.0, kind=mytype )
-    
+
     IF( ISIGN .EQ. 1 ) THEN
        DELTA = CMPLX( DCOS(ANGLE), DSIN(ANGLE), kind=mytype )
     ELSE
        DELTA = CMPLX( DCOS(ANGLE), -DSIN(ANGLE), kind=mytype )
     END IF
-    
+
     DO IC = 1, C
        DO IA = 1, A
           DO IB = 1, B
@@ -105,32 +105,32 @@ contains
           OMEGA = DELTA * OMEGA
        END DO
     END DO
-    
+
     RETURN
   END SUBROUTINE SPCPFT
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! A 3D real-to-complex routine implemented using the 1D FFT above
   !   Input:   nx*ny*nz real numbers
   !   Output:  (nx/2+1)*ny*nz complex numbers
   ! Just like big FFT libraries (such as FFTW) do
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine glassman_3d_r2c(in_r,nx,ny,nz,out_c)
 
     implicit none
-    
+
     integer, intent(IN) :: nx,ny,nz
     real(mytype), dimension(nx,ny,nz) :: in_r
     complex(mytype), dimension(nx/2+1,ny,nz) :: out_c
-    
+
     complex(mytype), allocatable, dimension(:) :: buf, scratch
     integer :: maxsize, i,j,k
-    
+
     maxsize = max(nx, max(ny,nz))
     allocate(buf(maxsize))
     allocate(scratch(maxsize))
-    
+
     ! ===== 1D FFTs in X =====
     do k=1,nz
        do j=1,ny
@@ -147,7 +147,7 @@ contains
           end do
        end do
     end do
-    
+
     ! ===== 1D FFTs in Y =====
     do k=1,nz
        do i=1,nx/2+1
@@ -160,7 +160,7 @@ contains
           end do
        end do
     end do
-    
+
     ! ===== 1D FFTs in Z =====
     do j=1,ny
        do i=1,nx/2+1
@@ -173,9 +173,9 @@ contains
           end do
        end do
     end do
-    
+
     deallocate(buf,scratch)
-    
+
     return
   end subroutine glassman_3d_r2c
 
diff --git a/decomp2d/halo.inc b/decomp2d/halo.inc
index 68ca413c..9fd4b761 100644
--- a/decomp2d/halo.inc
+++ b/decomp2d/halo.inc
@@ -9,107 +9,107 @@
 !
 !=======================================================================
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! Halo cell support for neighbouring pencils to exchange data
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  subroutine update_halo_real(in, out, level, opt_decomp, opt_global)
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! Halo cell support for neighbouring pencils to exchange data
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+subroutine update_halo_real(in, out, level, opt_decomp, opt_global)
 
-    implicit none
+implicit none
 
-    integer, intent(IN) :: level      ! levels of halo cells required
-    real(mytype), dimension(:,:,:), intent(IN) :: in    
-    real(mytype), allocatable, dimension(:,:,:), intent(OUT) :: out
-    TYPE(DECOMP_INFO), optional :: opt_decomp
-    logical, optional :: opt_global
+integer, intent(IN) :: level      ! levels of halo cells required
+real(mytype), dimension(:,:,:), intent(IN) :: in    
+real(mytype), allocatable, dimension(:,:,:), intent(OUT) :: out
+TYPE(DECOMP_INFO), optional :: opt_decomp
+logical, optional :: opt_global
 
-    TYPE(DECOMP_INFO) :: decomp
-    logical :: global
+TYPE(DECOMP_INFO) :: decomp
+logical :: global
 
-    ! starting/ending index of array with halo cells
-    integer :: xs, ys, zs, xe, ye, ze
+! starting/ending index of array with halo cells
+integer :: xs, ys, zs, xe, ye, ze
 
-    integer :: i, j, k, s1, s2, s3, ierror
-    integer :: data_type
+integer :: i, j, k, s1, s2, s3, ierror
+integer :: data_type
 
-    integer :: icount, ilength, ijump 
-    integer :: halo12, halo21, halo31, halo32                 
-    integer, dimension(4) :: requests
-    integer, dimension(MPI_STATUS_SIZE,4) :: status
-    integer :: tag_e, tag_w, tag_n, tag_s, tag_t, tag_b
+integer :: icount, ilength, ijump 
+integer :: halo12, halo21, halo31, halo32                 
+integer, dimension(4) :: requests
+integer, dimension(MPI_STATUS_SIZE,4) :: status
+integer :: tag_e, tag_w, tag_n, tag_s, tag_t, tag_b
 
-    data_type = real_type
+data_type = real_type
 
 #include "halo_common.inc"
 
-    return
-  end subroutine update_halo_real
+return
+end subroutine update_halo_real
 
 
-  subroutine update_halo_complex(in, out, level, opt_decomp, opt_global)
+subroutine update_halo_complex(in, out, level, opt_decomp, opt_global)
 
-    implicit none
+implicit none
 
-    integer, intent(IN) :: level      ! levels of halo cells required
-    complex(mytype), dimension(:,:,:), intent(IN) :: in    
-    complex(mytype), allocatable, dimension(:,:,:), intent(OUT) :: out
-    TYPE(DECOMP_INFO), optional :: opt_decomp
-    logical, optional :: opt_global
+integer, intent(IN) :: level      ! levels of halo cells required
+complex(mytype), dimension(:,:,:), intent(IN) :: in    
+complex(mytype), allocatable, dimension(:,:,:), intent(OUT) :: out
+TYPE(DECOMP_INFO), optional :: opt_decomp
+logical, optional :: opt_global
 
-    TYPE(DECOMP_INFO) :: decomp
-    logical :: global
+TYPE(DECOMP_INFO) :: decomp
+logical :: global
 
-    ! starting/ending index of array with halo cells
-    integer :: xs, ys, zs, xe, ye, ze
+! starting/ending index of array with halo cells
+integer :: xs, ys, zs, xe, ye, ze
 
-    integer :: i, j, k, s1, s2, s3, ierror
-    integer :: data_type
+integer :: i, j, k, s1, s2, s3, ierror
+integer :: data_type
 
-    integer :: icount, ilength, ijump 
-    integer :: halo12, halo21, halo31, halo32                 
-    integer, dimension(4) :: requests
-    integer, dimension(MPI_STATUS_SIZE,4) :: status
-    integer :: tag_e, tag_w, tag_n, tag_s, tag_t, tag_b
+integer :: icount, ilength, ijump 
+integer :: halo12, halo21, halo31, halo32                 
+integer, dimension(4) :: requests
+integer, dimension(MPI_STATUS_SIZE,4) :: status
+integer :: tag_e, tag_w, tag_n, tag_s, tag_t, tag_b
 
-    data_type = complex_type
+data_type = complex_type
 
 #include "halo_common.inc"
 
-    return
-  end subroutine update_halo_complex
-
-
-
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  ! To support halo-cell exchange:
-  !   find the MPI ranks of neighbouring pencils
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  subroutine init_neighbour
-
-    integer :: ierror
-
-    ! For X-pencil
-    neighbour(1,1) = MPI_PROC_NULL               ! east
-    neighbour(1,2) = MPI_PROC_NULL               ! west
-    call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_X, 0, 1, &
-         neighbour(1,4), neighbour(1,3), ierror) ! north & south
-    call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_X, 1, 1, &
-         neighbour(1,6), neighbour(1,5), ierror) ! top & bottom
-
-    ! For Y-pencil
-    call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Y, 0, 1, &
-         neighbour(2,2), neighbour(2,1), ierror) ! east & west
-    neighbour(2,3) = MPI_PROC_NULL               ! north
-    neighbour(2,4) = MPI_PROC_NULL               ! south
-    call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Y, 1, 1, &
-         neighbour(2,6), neighbour(2,5), ierror) ! top & bottom
-
-    ! For Z-pencil
-    call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Z, 0, 1, &
-         neighbour(3,2), neighbour(3,1), ierror) ! east & west
-    call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Z, 1, 1, &
-         neighbour(3,4), neighbour(3,3), ierror) ! north & south
-    neighbour(3,5) = MPI_PROC_NULL               ! top
-    neighbour(3,6) = MPI_PROC_NULL               ! bottom
-
-    return
-  end subroutine init_neighbour
+return
+end subroutine update_halo_complex
+
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! To support halo-cell exchange:
+!   find the MPI ranks of neighbouring pencils
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+subroutine init_neighbour
+
+integer :: ierror
+
+! For X-pencil
+neighbour(1,1) = MPI_PROC_NULL               ! east
+neighbour(1,2) = MPI_PROC_NULL               ! west
+call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_X, 0, 1, &
+neighbour(1,4), neighbour(1,3), ierror) ! north & south
+call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_X, 1, 1, &
+neighbour(1,6), neighbour(1,5), ierror) ! top & bottom
+
+! For Y-pencil
+call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Y, 0, 1, &
+neighbour(2,2), neighbour(2,1), ierror) ! east & west
+neighbour(2,3) = MPI_PROC_NULL               ! north
+neighbour(2,4) = MPI_PROC_NULL               ! south
+call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Y, 1, 1, &
+neighbour(2,6), neighbour(2,5), ierror) ! top & bottom
+
+! For Z-pencil
+call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Z, 0, 1, &
+neighbour(3,2), neighbour(3,1), ierror) ! east & west
+call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Z, 1, 1, &
+neighbour(3,4), neighbour(3,3), ierror) ! north & south
+neighbour(3,5) = MPI_PROC_NULL               ! top
+neighbour(3,6) = MPI_PROC_NULL               ! bottom
+
+return
+end subroutine init_neighbour
diff --git a/decomp2d/halo_common.inc b/decomp2d/halo_common.inc
index a5187deb..1064f07a 100644
--- a/decomp2d/halo_common.inc
+++ b/decomp2d/halo_common.inc
@@ -12,414 +12,414 @@
 ! This file contain common code to be included by subroutines 
 ! 'update_halo_...' in halo.inc
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    if (present(opt_global)) then
-       global = opt_global
-    else
-       global = .false.
-    end if
+if (present(opt_global)) then
+global = opt_global
+else
+global = .false.
+end if
 
-    s1 = size(in,1)
-    s2 = size(in,2)
-    s3 = size(in,3)
+s1 = size(in,1)
+s2 = size(in,2)
+s3 = size(in,3)
 
-    ! Calculate the starting index and ending index of output
-    if (s1==decomp%xsz(1)) then  ! X-pencil input
-       if (global) then
-          xs = decomp%xst(1) 
-          xe = decomp%xen(1)
-          ys = decomp%xst(2) - level
-          ye = decomp%xen(2) + level
-          zs = decomp%xst(3) - level
-          ze = decomp%xen(3) + level
-       else
-          xs = 1 
-          xe = s1
-          ys = 1 - level
-          ye = s2 + level 
-          zs = 1 - level
-          ze = s3 + level
-       end if
-    else if (s2==decomp%ysz(2)) then  ! Y-pencil input
-       if (global) then
-          xs = decomp%yst(1) - level 
-          xe = decomp%yen(1) + level
-          ys = decomp%yst(2)
-          ye = decomp%yen(2)
-          zs = decomp%yst(3) - level
-          ze = decomp%yen(3) + level
-       else
-          xs = 1 - level
-          xe = s1 + level
-          ys = 1
-          ye = s2
-          zs = 1 - level
-          ze = s3 + level
-       end if
-    else if (s3==decomp%zsz(3)) then  ! Z-pencil input
-       if (global) then
-          xs = decomp%zst(1) - level 
-          xe = decomp%zen(1) + level
-          ys = decomp%zst(2) - level
-          ye = decomp%zen(2) + level
-          zs = decomp%zst(3)
-          ze = decomp%zen(3)
-       else
-          xs = 1 - level
-          xe = s1 + level
-          ys = 1 - level
-          ye = s2 + level
-          zs = 1
-          ze = s3
-       end if
-    else
-       ! invalid input
-       call decomp_2d_abort(10, &
-            'Invalid data passed to update_halo')
-    end if
+! Calculate the starting index and ending index of output
+if (s1==decomp%xsz(1)) then  ! X-pencil input
+if (global) then
+xs = decomp%xst(1) 
+xe = decomp%xen(1)
+ys = decomp%xst(2) - level
+ye = decomp%xen(2) + level
+zs = decomp%xst(3) - level
+ze = decomp%xen(3) + level
+else
+xs = 1 
+xe = s1
+ys = 1 - level
+ye = s2 + level 
+zs = 1 - level
+ze = s3 + level
+end if
+else if (s2==decomp%ysz(2)) then  ! Y-pencil input
+if (global) then
+xs = decomp%yst(1) - level 
+xe = decomp%yen(1) + level
+ys = decomp%yst(2)
+ye = decomp%yen(2)
+zs = decomp%yst(3) - level
+ze = decomp%yen(3) + level
+else
+xs = 1 - level
+xe = s1 + level
+ys = 1
+ye = s2
+zs = 1 - level
+ze = s3 + level
+end if
+else if (s3==decomp%zsz(3)) then  ! Z-pencil input
+if (global) then
+xs = decomp%zst(1) - level 
+xe = decomp%zen(1) + level
+ys = decomp%zst(2) - level
+ye = decomp%zen(2) + level
+zs = decomp%zst(3)
+ze = decomp%zen(3)
+else
+xs = 1 - level
+xe = s1 + level
+ys = 1 - level
+ye = s2 + level
+zs = 1
+ze = s3
+end if
+else
+! invalid input
+call decomp_2d_abort(10, &
+'Invalid data passed to update_halo')
+end if
 
 
-    allocate(out(xs:xe, ys:ye, zs:ze))
+allocate(out(xs:xe, ys:ye, zs:ze))
 !    out = -1.0_mytype ! fill the halo for debugging
 
-    ! copy input data to output
-    if (global) then
-       ! using global coordinate
-       ! note the input array passed in always has index starting from 1 
-       ! need to work out the corresponding global index
-       if (s1==decomp%xsz(1)) then
-          do k=decomp%xst(3),decomp%xen(3)
-             do j=decomp%xst(2),decomp%xen(2)
-                do i=1,s1  ! x all local
-                   out(i,j,k) = in(i,j-decomp%xst(2)+1,k-decomp%xst(3)+1)
-                end do
-             end do
-          end do
-       else if (s2==decomp%ysz(2)) then
-          do k=decomp%yst(3),decomp%yen(3)
-             do j=1,s2  ! y all local
-                do i=decomp%yst(1),decomp%yen(1)
-                   out(i,j,k) = in(i-decomp%yst(1)+1,j,k-decomp%yst(3)+1)
-                end do
-             end do
-          end do
-       else if (s3==decomp%zsz(3)) then
-          do k=1,s3  ! z all local
-             do j=decomp%zst(2),decomp%zen(2)
-                do i=decomp%zst(1),decomp%zen(1)
-                   out(i,j,k) = in(i-decomp%zst(1)+1,j-decomp%zst(2)+1,k)
-                end do
-             end do
-          end do
-       end if
-    else
-       ! not using global coordinate
-       do k=1,s3
-          do j=1,s2
-             do i=1,s1
-                out(i,j,k) = in(i,j,k)
-             end do
-          end do
-       end do
-    end if
+! copy input data to output
+if (global) then
+! using global coordinate
+! note the input array passed in always has index starting from 1 
+! need to work out the corresponding global index
+if (s1==decomp%xsz(1)) then
+do k=decomp%xst(3),decomp%xen(3)
+do j=decomp%xst(2),decomp%xen(2)
+do i=1,s1  ! x all local
+out(i,j,k) = in(i,j-decomp%xst(2)+1,k-decomp%xst(3)+1)
+end do
+end do
+end do
+else if (s2==decomp%ysz(2)) then
+do k=decomp%yst(3),decomp%yen(3)
+do j=1,s2  ! y all local
+do i=decomp%yst(1),decomp%yen(1)
+out(i,j,k) = in(i-decomp%yst(1)+1,j,k-decomp%yst(3)+1)
+end do
+end do
+end do
+else if (s3==decomp%zsz(3)) then
+do k=1,s3  ! z all local
+do j=decomp%zst(2),decomp%zen(2)
+do i=decomp%zst(1),decomp%zen(1)
+out(i,j,k) = in(i-decomp%zst(1)+1,j-decomp%zst(2)+1,k)
+end do
+end do
+end do
+end if
+else
+! not using global coordinate
+do k=1,s3
+do j=1,s2
+do i=1,s1
+out(i,j,k) = in(i,j,k)
+end do
+end do
+end do
+end if
 
-    ! If needed, define MPI derived data type to pack halo data,
-    ! then call MPI send/receive to exchange halo data
-    
-    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    ! X-pencil
-    if (s1==decomp%xsz(1)) then
+! If needed, define MPI derived data type to pack halo data,
+! then call MPI send/receive to exchange halo data
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! X-pencil
+if (s1==decomp%xsz(1)) then
 
 #ifdef HALO_DEBUG
-       if (nrank==4) then
-          write(*,*) 'X-pencil input'
-          write(*,*) '=============='
-          write(*,*) 'Data on a y-z plane is shown'
-          write(*,*) 'Before halo exchange'
-          do j=ye,ys,-1
-             write(*,'(10F4.0)') (out(1,j,k),k=zs,ze)
-          end do
-       end if
+if (nrank==4) then
+write(*,*) 'X-pencil input'
+write(*,*) '=============='
+write(*,*) 'Data on a y-z plane is shown'
+write(*,*) 'Before halo exchange'
+do j=ye,ys,-1
+write(*,'(10F4.0)') (out(1,j,k),k=zs,ze)
+end do
+end if
 #endif
 
-       ! *** east/west ***
-       ! all data in local memory already, no halo exchange
+! *** east/west ***
+! all data in local memory already, no halo exchange
 
-       ! *** north/south *** 
-       tag_s = coord(1)
-       if (coord(1)==dims(1)-1 .AND. periodic_y) then
-          tag_n = 0
-       else
-          tag_n = coord(1) + 1
-       end if
-       icount = s3 + 2*level
-       ilength = level * s1
-       ijump = s1*(s2+2*level)
-       call MPI_TYPE_VECTOR(icount, ilength, ijump, &
-            data_type, halo12, ierror)
-       call MPI_TYPE_COMMIT(halo12, ierror)
-       ! receive from south
-       call MPI_IRECV(out(xs,ys,zs), 1, halo12, &
-            neighbour(1,4), tag_s, DECOMP_2D_COMM_CART_X, &
-            requests(1), ierror)
-       ! receive from north
-       call MPI_IRECV(out(xs,ye-level+1,zs), 1, halo12, &
-            neighbour(1,3), tag_n, DECOMP_2D_COMM_CART_X, &
-            requests(2), ierror)
-       ! send to south
-       call MPI_ISSEND(out(xs,ys+level,zs), 1, halo12, &
-            neighbour(1,4), tag_s, DECOMP_2D_COMM_CART_X, &
-            requests(3), ierror)
-       ! send to north
-       call MPI_ISSEND(out(xs,ye-level-level+1,zs), 1, halo12, &
-            neighbour(1,3), tag_n, DECOMP_2D_COMM_CART_X, &
-            requests(4), ierror)
-       call MPI_WAITALL(4, requests, status, ierror)
-       call MPI_TYPE_FREE(halo12, ierror)
+! *** north/south *** 
+tag_s = coord(1)
+if (coord(1)==dims(1)-1 .AND. periodic_y) then
+tag_n = 0
+else
+tag_n = coord(1) + 1
+end if
+icount = s3 + 2*level
+ilength = level * s1
+ijump = s1*(s2+2*level)
+call MPI_TYPE_VECTOR(icount, ilength, ijump, &
+data_type, halo12, ierror)
+call MPI_TYPE_COMMIT(halo12, ierror)
+! receive from south
+call MPI_IRECV(out(xs,ys,zs), 1, halo12, &
+neighbour(1,4), tag_s, DECOMP_2D_COMM_CART_X, &
+requests(1), ierror)
+! receive from north
+call MPI_IRECV(out(xs,ye-level+1,zs), 1, halo12, &
+neighbour(1,3), tag_n, DECOMP_2D_COMM_CART_X, &
+requests(2), ierror)
+! send to south
+call MPI_ISSEND(out(xs,ys+level,zs), 1, halo12, &
+neighbour(1,4), tag_s, DECOMP_2D_COMM_CART_X, &
+requests(3), ierror)
+! send to north
+call MPI_ISSEND(out(xs,ye-level-level+1,zs), 1, halo12, &
+neighbour(1,3), tag_n, DECOMP_2D_COMM_CART_X, &
+requests(4), ierror)
+call MPI_WAITALL(4, requests, status, ierror)
+call MPI_TYPE_FREE(halo12, ierror)
 #ifdef HALO_DEBUG
-       if (nrank==4) then
-          write(*,*) 'After exchange in Y'
-          do j=ye,ys,-1
-             write(*,'(10F4.0)') (out(1,j,k),k=zs,ze)
-          end do
-       end if
+if (nrank==4) then
+write(*,*) 'After exchange in Y'
+do j=ye,ys,-1
+write(*,'(10F4.0)') (out(1,j,k),k=zs,ze)
+end do
+end if
 #endif
 
-       ! *** top/bottom ***
-       ! no need to define derived data type as data on xy-planes
-       ! all contiguous in memory, which can be sent/received using
-       ! MPI directly
-       tag_b = coord(2)
-       if (coord(2)==dims(2)-1 .AND. periodic_z) then
-          tag_t = 0
-       else
-          tag_t = coord(2) + 1
-       end if
-       icount = (s1 * (s2+2*level)) * level
-       ! receive from bottom
-       call MPI_IRECV(out(xs,ys,zs), icount, data_type, &
-            neighbour(1,6), tag_b, DECOMP_2D_COMM_CART_X, &
-            requests(1), ierror)
-       ! receive from top
-       call MPI_IRECV(out(xs,ys,ze-level+1), icount, data_type, &
-            neighbour(1,5), tag_t, DECOMP_2D_COMM_CART_X, &
-            requests(2), ierror)
-       ! send to bottom
-       call MPI_ISSEND(out(xs,ys,zs+level), icount, data_type, &
-            neighbour(1,6), tag_b, DECOMP_2D_COMM_CART_X, &
-            requests(3), ierror)
-       ! send to top
-       call MPI_ISSEND(out(xs,ys,ze-level-level+1), icount, data_type, &
-            neighbour(1,5), tag_t, DECOMP_2D_COMM_CART_X, &
-            requests(4), ierror)
-       call MPI_WAITALL(4, requests, status, ierror)
+! *** top/bottom ***
+! no need to define derived data type as data on xy-planes
+! all contiguous in memory, which can be sent/received using
+! MPI directly
+tag_b = coord(2)
+if (coord(2)==dims(2)-1 .AND. periodic_z) then
+tag_t = 0
+else
+tag_t = coord(2) + 1
+end if
+icount = (s1 * (s2+2*level)) * level
+! receive from bottom
+call MPI_IRECV(out(xs,ys,zs), icount, data_type, &
+neighbour(1,6), tag_b, DECOMP_2D_COMM_CART_X, &
+requests(1), ierror)
+! receive from top
+call MPI_IRECV(out(xs,ys,ze-level+1), icount, data_type, &
+neighbour(1,5), tag_t, DECOMP_2D_COMM_CART_X, &
+requests(2), ierror)
+! send to bottom
+call MPI_ISSEND(out(xs,ys,zs+level), icount, data_type, &
+neighbour(1,6), tag_b, DECOMP_2D_COMM_CART_X, &
+requests(3), ierror)
+! send to top
+call MPI_ISSEND(out(xs,ys,ze-level-level+1), icount, data_type, &
+neighbour(1,5), tag_t, DECOMP_2D_COMM_CART_X, &
+requests(4), ierror)
+call MPI_WAITALL(4, requests, status, ierror)
 #ifdef HALO_DEBUG       
-       if (nrank==4) then
-          write(*,*) 'After exchange in Z'
-          do j=ye,ys,-1
-             write(*,'(10F4.0)') (out(1,j,k),k=zs,ze)
-          end do
-       end if
+if (nrank==4) then
+write(*,*) 'After exchange in Z'
+do j=ye,ys,-1
+write(*,'(10F4.0)') (out(1,j,k),k=zs,ze)
+end do
+end if
 #endif       
 
-    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   
-    ! Y-pencil   
-    else if (s2==decomp%ysz(2)) then
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   
+! Y-pencil   
+else if (s2==decomp%ysz(2)) then
 
 #ifdef HALO_DEBUG
-       if (nrank==4) then
-          write(*,*) 'Y-pencil input'
-          write(*,*) '=============='
-          write(*,*) 'Data on a x-z plane is shown'
-          write(*,*) 'Before halo exchange'
-          do i=xe,xs,-1
-             write(*,'(10F4.0)') (out(i,1,k),k=zs,ze)
-          end do
-       end if
+if (nrank==4) then
+write(*,*) 'Y-pencil input'
+write(*,*) '=============='
+write(*,*) 'Data on a x-z plane is shown'
+write(*,*) 'Before halo exchange'
+do i=xe,xs,-1
+write(*,'(10F4.0)') (out(i,1,k),k=zs,ze)
+end do
+end if
 #endif
 
-       ! *** east/west ***
-       tag_w = coord(1)
-       if (coord(1)==dims(1)-1 .AND. periodic_x) then
-          tag_e = 0
-       else
-          tag_e = coord(1) + 1
-       end if
-       icount = s2*(s3+2*level)
-       ilength = level
-       ijump = s1+2*level
-       call MPI_TYPE_VECTOR(icount, ilength, ijump, &
-            data_type, halo21, ierror)
-       call MPI_TYPE_COMMIT(halo21, ierror)
-       ! receive from west
-       call MPI_IRECV(out(xs,ys,zs), 1, halo21, &
-            neighbour(2,2), tag_w, DECOMP_2D_COMM_CART_Y, &
-            requests(1), ierror)
-       ! receive from east
-       call MPI_IRECV(out(xe-level+1,ys,zs), 1, halo21, &
-            neighbour(2,1), tag_e, DECOMP_2D_COMM_CART_Y, &
-            requests(2), ierror)
-       ! send to west
-       call MPI_ISSEND(out(xs+level,ys,zs), 1, halo21, &
-            neighbour(2,2), tag_w, DECOMP_2D_COMM_CART_Y, &
-            requests(3), ierror)
-       ! send to east
-       call MPI_ISSEND(out(xe-level-level+1,ys,zs), 1, halo21, &
-            neighbour(2,1), tag_e, DECOMP_2D_COMM_CART_Y, &
-            requests(4), ierror)
-       call MPI_WAITALL(4, requests, status, ierror)
-       call MPI_TYPE_FREE(halo21, ierror)
+! *** east/west ***
+tag_w = coord(1)
+if (coord(1)==dims(1)-1 .AND. periodic_x) then
+tag_e = 0
+else
+tag_e = coord(1) + 1
+end if
+icount = s2*(s3+2*level)
+ilength = level
+ijump = s1+2*level
+call MPI_TYPE_VECTOR(icount, ilength, ijump, &
+data_type, halo21, ierror)
+call MPI_TYPE_COMMIT(halo21, ierror)
+! receive from west
+call MPI_IRECV(out(xs,ys,zs), 1, halo21, &
+neighbour(2,2), tag_w, DECOMP_2D_COMM_CART_Y, &
+requests(1), ierror)
+! receive from east
+call MPI_IRECV(out(xe-level+1,ys,zs), 1, halo21, &
+neighbour(2,1), tag_e, DECOMP_2D_COMM_CART_Y, &
+requests(2), ierror)
+! send to west
+call MPI_ISSEND(out(xs+level,ys,zs), 1, halo21, &
+neighbour(2,2), tag_w, DECOMP_2D_COMM_CART_Y, &
+requests(3), ierror)
+! send to east
+call MPI_ISSEND(out(xe-level-level+1,ys,zs), 1, halo21, &
+neighbour(2,1), tag_e, DECOMP_2D_COMM_CART_Y, &
+requests(4), ierror)
+call MPI_WAITALL(4, requests, status, ierror)
+call MPI_TYPE_FREE(halo21, ierror)
 #ifdef HALO_DEBUG       
-       if (nrank==4) then
-          write(*,*) 'After exchange in X'
-          do i=xe,xs,-1
-             write(*,'(10F4.0)') (out(i,1,k),k=zs,ze)
-          end do
-       end if
+if (nrank==4) then
+write(*,*) 'After exchange in X'
+do i=xe,xs,-1
+write(*,'(10F4.0)') (out(i,1,k),k=zs,ze)
+end do
+end if
 #endif
 
-       ! *** north/south ***
-       ! all data in local memory already, no halo exchange
+! *** north/south ***
+! all data in local memory already, no halo exchange
 
-       ! *** top/bottom ***
-       ! no need to define derived data type as data on xy-planes
-       ! all contiguous in memory, which can be sent/received using
-       ! MPI directly
-       tag_b = coord(2)
-       if (coord(2)==dims(2)-1 .AND. periodic_z) then
-          tag_t = 0
-       else
-          tag_t = coord(2) + 1
-       end if
-       icount = (s2 * (s1+2*level)) * level
-       ! receive from bottom
-       call MPI_IRECV(out(xs,ys,zs), icount, data_type, &
-            neighbour(2,6), tag_b, DECOMP_2D_COMM_CART_Y, &
-            requests(1), ierror)
-       ! receive from top
-       call MPI_IRECV(out(xs,ys,ze-level+1), icount, data_type, &
-            neighbour(2,5), tag_t, DECOMP_2D_COMM_CART_Y, &
-            requests(2), ierror)
-       ! send to bottom
-       call MPI_ISSEND(out(xs,ys,zs+level), icount, data_type, &
-            neighbour(2,6), tag_b, DECOMP_2D_COMM_CART_Y, &
-            requests(3), ierror)
-       ! send to top
-       call MPI_ISSEND(out(xs,ys,ze-level-level+1), icount, data_type, &
-            neighbour(2,5), tag_t, DECOMP_2D_COMM_CART_Y, &
-            requests(4), ierror)
-       call MPI_WAITALL(4, requests, status, ierror)
+! *** top/bottom ***
+! no need to define derived data type as data on xy-planes
+! all contiguous in memory, which can be sent/received using
+! MPI directly
+tag_b = coord(2)
+if (coord(2)==dims(2)-1 .AND. periodic_z) then
+tag_t = 0
+else
+tag_t = coord(2) + 1
+end if
+icount = (s2 * (s1+2*level)) * level
+! receive from bottom
+call MPI_IRECV(out(xs,ys,zs), icount, data_type, &
+neighbour(2,6), tag_b, DECOMP_2D_COMM_CART_Y, &
+requests(1), ierror)
+! receive from top
+call MPI_IRECV(out(xs,ys,ze-level+1), icount, data_type, &
+neighbour(2,5), tag_t, DECOMP_2D_COMM_CART_Y, &
+requests(2), ierror)
+! send to bottom
+call MPI_ISSEND(out(xs,ys,zs+level), icount, data_type, &
+neighbour(2,6), tag_b, DECOMP_2D_COMM_CART_Y, &
+requests(3), ierror)
+! send to top
+call MPI_ISSEND(out(xs,ys,ze-level-level+1), icount, data_type, &
+neighbour(2,5), tag_t, DECOMP_2D_COMM_CART_Y, &
+requests(4), ierror)
+call MPI_WAITALL(4, requests, status, ierror)
 #ifdef HALO_DEBUG
-       if (nrank==4) then
-          write(*,*) 'After exchange in Z'
-          do i=xe,xs,-1
-             write(*,'(10F4.0)') (out(i,1,k),k=zs,ze)
-          end do
-       end if
+if (nrank==4) then
+write(*,*) 'After exchange in Z'
+do i=xe,xs,-1
+write(*,'(10F4.0)') (out(i,1,k),k=zs,ze)
+end do
+end if
 #endif
 
-    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   
-    ! Z-pencil
-    else if (s3==decomp%zsz(3)) then   
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   
+! Z-pencil
+else if (s3==decomp%zsz(3)) then   
 
 #ifdef HALO_DEBUG
-       if (nrank==4) then
-          write(*,*) 'Z-pencil input'
-          write(*,*) '=============='
-          write(*,*) 'Data on a x-y plane is shown'
-          write(*,*) 'Before halo exchange'
-          do i=xe,xs,-1
-             write(*,'(10F4.0)') (out(i,j,1),j=ys,ye)
-          end do
-       end if
+if (nrank==4) then
+write(*,*) 'Z-pencil input'
+write(*,*) '=============='
+write(*,*) 'Data on a x-y plane is shown'
+write(*,*) 'Before halo exchange'
+do i=xe,xs,-1
+write(*,'(10F4.0)') (out(i,j,1),j=ys,ye)
+end do
+end if
 #endif
 
-       ! *** east/west ***
-       tag_w = coord(1)
-       if (coord(1)==dims(1)-1 .AND. periodic_x) then
-          tag_e = 0
-       else
-          tag_e = coord(1) + 1
-       end if
-       icount = (s2+2*level)*s3
-       ilength = level
-       ijump = s1+2*level
-       call MPI_TYPE_VECTOR(icount, ilength, ijump, &
-            data_type, halo31, ierror)
-       call MPI_TYPE_COMMIT(halo31, ierror)
-       ! receive from west
-       call MPI_IRECV(out(xs,ys,zs), 1, halo31, &
-            neighbour(3,2), tag_w, DECOMP_2D_COMM_CART_Z, &
-            requests(1), ierror)
-       ! receive from east
-       call MPI_IRECV(out(xe-level+1,ys,zs), 1, halo31, &
-            neighbour(3,1), tag_e, DECOMP_2D_COMM_CART_Z, &
-            requests(2), ierror)
-       ! send to west
-       call MPI_ISSEND(out(xs+level,ys,zs), 1, halo31, &
-            neighbour(3,2), tag_w, DECOMP_2D_COMM_CART_Z, &
-            requests(3), ierror)
-       ! send to east
-       call MPI_ISSEND(out(xe-level-level+1,ys,zs), 1, halo31, &
-            neighbour(3,1), tag_e, DECOMP_2D_COMM_CART_Z, &
-            requests(4), ierror)
-       call MPI_WAITALL(4, requests, status, ierror)
-       call MPI_TYPE_FREE(halo31, ierror)       
+! *** east/west ***
+tag_w = coord(1)
+if (coord(1)==dims(1)-1 .AND. periodic_x) then
+tag_e = 0
+else
+tag_e = coord(1) + 1
+end if
+icount = (s2+2*level)*s3
+ilength = level
+ijump = s1+2*level
+call MPI_TYPE_VECTOR(icount, ilength, ijump, &
+data_type, halo31, ierror)
+call MPI_TYPE_COMMIT(halo31, ierror)
+! receive from west
+call MPI_IRECV(out(xs,ys,zs), 1, halo31, &
+neighbour(3,2), tag_w, DECOMP_2D_COMM_CART_Z, &
+requests(1), ierror)
+! receive from east
+call MPI_IRECV(out(xe-level+1,ys,zs), 1, halo31, &
+neighbour(3,1), tag_e, DECOMP_2D_COMM_CART_Z, &
+requests(2), ierror)
+! send to west
+call MPI_ISSEND(out(xs+level,ys,zs), 1, halo31, &
+neighbour(3,2), tag_w, DECOMP_2D_COMM_CART_Z, &
+requests(3), ierror)
+! send to east
+call MPI_ISSEND(out(xe-level-level+1,ys,zs), 1, halo31, &
+neighbour(3,1), tag_e, DECOMP_2D_COMM_CART_Z, &
+requests(4), ierror)
+call MPI_WAITALL(4, requests, status, ierror)
+call MPI_TYPE_FREE(halo31, ierror)       
 #ifdef HALO_DEBUG
-       if (nrank==4) then
-          write(*,*) 'After exchange in X'
-          do i=xe,xs,-1
-             write(*,'(10F4.0)') (out(i,j,1),j=ys,ye)
-          end do
-       end if
+if (nrank==4) then
+write(*,*) 'After exchange in X'
+do i=xe,xs,-1
+write(*,'(10F4.0)') (out(i,j,1),j=ys,ye)
+end do
+end if
 #endif
 
-       ! *** north/south *** 
-       tag_s = coord(2)
-       if (coord(2)==dims(2)-1 .AND. periodic_y) then
-          tag_n = 0
-       else
-          tag_n = coord(2) + 1
-       end if
-       icount = s3
-       ilength = level * (s1+2*level)
-       ijump = (s1+2*level) * (s2+2*level)
-       call MPI_TYPE_VECTOR(icount, ilength, ijump, &
-            data_type, halo32, ierror)
-       call MPI_TYPE_COMMIT(halo32, ierror)
-       ! receive from south
-       call MPI_IRECV(out(xs,ys,zs), 1, halo32, &
-            neighbour(3,4), tag_s, DECOMP_2D_COMM_CART_Z, &
-            requests(1), ierror)
-       ! receive from north
-       call MPI_IRECV(out(xs,ye-level+1,zs), 1, halo32, &
-            neighbour(3,3), tag_n, DECOMP_2D_COMM_CART_Z, &
-            requests(2), ierror)
-       ! send to south
-       call MPI_ISSEND(out(xs,ys+level,zs), 1, halo32, &
-            neighbour(3,4), tag_s, DECOMP_2D_COMM_CART_Z, &
-            requests(3), ierror)
-       ! send to north
-       call MPI_ISSEND(out(xs,ye-level-level+1,zs), 1, halo32, &
-            neighbour(3,3), tag_n, DECOMP_2D_COMM_CART_Z, &
-            requests(4), ierror)
-       call MPI_WAITALL(4, requests, status, ierror)
-       call MPI_TYPE_FREE(halo32, ierror)
+! *** north/south *** 
+tag_s = coord(2)
+if (coord(2)==dims(2)-1 .AND. periodic_y) then
+tag_n = 0
+else
+tag_n = coord(2) + 1
+end if
+icount = s3
+ilength = level * (s1+2*level)
+ijump = (s1+2*level) * (s2+2*level)
+call MPI_TYPE_VECTOR(icount, ilength, ijump, &
+data_type, halo32, ierror)
+call MPI_TYPE_COMMIT(halo32, ierror)
+! receive from south
+call MPI_IRECV(out(xs,ys,zs), 1, halo32, &
+neighbour(3,4), tag_s, DECOMP_2D_COMM_CART_Z, &
+requests(1), ierror)
+! receive from north
+call MPI_IRECV(out(xs,ye-level+1,zs), 1, halo32, &
+neighbour(3,3), tag_n, DECOMP_2D_COMM_CART_Z, &
+requests(2), ierror)
+! send to south
+call MPI_ISSEND(out(xs,ys+level,zs), 1, halo32, &
+neighbour(3,4), tag_s, DECOMP_2D_COMM_CART_Z, &
+requests(3), ierror)
+! send to north
+call MPI_ISSEND(out(xs,ye-level-level+1,zs), 1, halo32, &
+neighbour(3,3), tag_n, DECOMP_2D_COMM_CART_Z, &
+requests(4), ierror)
+call MPI_WAITALL(4, requests, status, ierror)
+call MPI_TYPE_FREE(halo32, ierror)
 #ifdef HALO_DEBUG
-       if (nrank==4) then
-          write(*,*) 'After exchange in Y'
-          do i=xe,xs,-1
-             write(*,'(10F4.0)') (out(i,j,1),j=ys,ye)
-          end do
-       end if
+if (nrank==4) then
+write(*,*) 'After exchange in Y'
+do i=xe,xs,-1
+write(*,'(10F4.0)') (out(i,j,1),j=ys,ye)
+end do
+end if
 #endif
 
-       ! *** top/bottom ***
-       ! all data in local memory already, no halo exchange
+! *** top/bottom ***
+! all data in local memory already, no halo exchange
 
-    end if  ! pencil
+end if  ! pencil
diff --git a/decomp2d/io.f90 b/decomp2d/io.f90
index 1dba4653..95ab06db 100644
--- a/decomp2d/io.f90
+++ b/decomp2d/io.f90
@@ -71,27 +71,27 @@ module decomp_2d_io
   interface decomp_2d_write_plane
      module procedure write_plane_3d_real
      module procedure write_plane_3d_complex
-!     module procedure write_plane_2d
+     !     module procedure write_plane_2d
   end interface decomp_2d_write_plane
 
   interface decomp_2d_write_every
      module procedure write_every_real
      module procedure write_every_complex
   end interface decomp_2d_write_every
-  
+
   interface decomp_2d_write_subdomain
      module procedure write_subdomain
   end interface decomp_2d_write_subdomain
 
 contains
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Using MPI-IO library to write a single 3D array to a file
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine write_one_real(ipencil,var,filename,opt_decomp)
-    
+
     implicit none
-    
+
     integer, intent(IN) :: ipencil
     real(mytype), dimension(:,:,:), intent(IN) :: var
     character(len=*), intent(IN) :: filename
@@ -111,9 +111,9 @@ contains
 
 
   subroutine write_one_complex(ipencil,var,filename,opt_decomp)
-    
+
     implicit none
-    
+
     integer, intent(IN) :: ipencil
     complex(mytype), dimension(:,:,:), intent(IN) :: var
     character(len=*), intent(IN) :: filename
@@ -127,18 +127,18 @@ contains
     data_type = complex_type
 
 #include "io_write_one.inc"
-    
+
     return
   end subroutine write_one_complex
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Using MPI-IO library to read from a file a single 3D array
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine read_one_real(ipencil,var,filename,opt_decomp)
-    
+
     implicit none
-    
+
     integer, intent(IN) :: ipencil
     real(mytype), dimension(:,:,:), intent(INOUT) :: var
     character(len=*), intent(IN) :: filename
@@ -152,18 +152,18 @@ contains
 
     data_type = real_type_single
     allocate (varsingle(xstV(1):xenV(1),xstV(2):xenV(2),xstV(3):xenV(3)))
-    
+
     if (present(opt_decomp)) then
        decomp = opt_decomp
     else
        call get_decomp_info(decomp)
     end if
-    
+
     ! determine subarray parameters
     sizes(1) = decomp%xsz(1)
     sizes(2) = decomp%ysz(2)
     sizes(3) = decomp%zsz(3)
-    
+
     if (ipencil == 1) then
        subsizes(1) = decomp%xsz(1)
        subsizes(2) = decomp%xsz(2)
@@ -186,7 +186,7 @@ contains
        starts(2) = decomp%zst(2)-1
        starts(3) = decomp%zst(3)-1
     endif
-    
+
     call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
          MPI_ORDER_FORTRAN, data_type, newtype, ierror)
     call MPI_TYPE_COMMIT(newtype,ierror)
@@ -203,15 +203,15 @@ contains
     call MPI_TYPE_FREE(newtype,ierror)
     var = real(varsingle,mytype)
     deallocate(varsingle)
-    
+
     return
   end subroutine read_one_real
 
 
   subroutine read_one_complex(ipencil,var,filename,opt_decomp)
-    
+
     implicit none
-    
+
     integer, intent(IN) :: ipencil
     complex(mytype), dimension(:,:,:), intent(INOUT) :: var
     character(len=*), intent(IN) :: filename
@@ -221,20 +221,20 @@ contains
     integer(kind=MPI_OFFSET_KIND) :: disp
     integer, dimension(3) :: sizes, subsizes, starts
     integer :: ierror, newtype, fh, data_type
-    
+
     data_type = complex_type
 
 #include "io_read_one.inc"
-    
+
     return
   end subroutine read_one_complex
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Write a 3D array as part of a big MPI-IO file, starting from 
   !  displacement 'disp'; 'disp' will be updated after the writing
   !  operation to prepare the writing of next chunk of data.
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine write_var_real(fh,disp,ipencil,var,opt_decomp)
 
     implicit none
@@ -279,11 +279,11 @@ contains
   end subroutine write_var_complex
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Read a 3D array as part of a big MPI-IO file, starting from 
   !  displacement 'disp'; 'disp' will be updated after the reading
   !  operation to prepare the reading of next chunk of data.
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine read_var_real(fh,disp,ipencil,var,opt_decomp)
 
     implicit none
@@ -327,12 +327,12 @@ contains
     return
   end subroutine read_var_complex
 
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Write scalar variables as part of a big MPI-IO file, starting from 
   !  displacement 'disp'; 'disp' will be updated after the reading
   !  operation to prepare the reading of next chunk of data.
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine write_scalar_real(fh,disp,n,var)
 
     implicit none
@@ -441,11 +441,11 @@ contains
   end subroutine write_scalar_logical
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Read scalar variables as part of a big MPI-IO file, starting from 
   !  displacement 'disp'; 'disp' will be updated after the reading
   !  operation to prepare the reading of next chunk of data.
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine read_scalar_real(fh,disp,n,var)
 
     implicit none
@@ -467,7 +467,7 @@ contains
 
     return
   end subroutine read_scalar_real
-  
+
 
   subroutine read_scalar_complex(fh,disp,n,var)
 
@@ -534,9 +534,9 @@ contains
   end subroutine read_scalar_logical
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Write a 2D slice of the 3D data to a file
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine write_plane_3d_real(ipencil,var,iplane,n,filename, &
        opt_decomp)
 
@@ -591,32 +591,32 @@ contains
   end subroutine write_plane_3d_complex
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Write a 2D array to a file
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !************** TO DO ***************
   !* Consider handling distributed 2D data set
-!  subroutine write_plane_2d(ipencil,var,filename)
-!    integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3)
-!    real(mytype), dimension(:,:), intent(IN) :: var ! 2D array
-!    character(len=*), intent(IN) :: filename
-!
-!    if (ipencil==1) then
-!       ! var should be defined as var(xsize(2)
-!
-!    else if (ipencil==2) then
-!
-!    else if (ipencil==3) then
-!
-!    end if
-!
-!    return
-!  end subroutine write_plane_2d
-
-
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  !  subroutine write_plane_2d(ipencil,var,filename)
+  !    integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3)
+  !    real(mytype), dimension(:,:), intent(IN) :: var ! 2D array
+  !    character(len=*), intent(IN) :: filename
+  !
+  !    if (ipencil==1) then
+  !       ! var should be defined as var(xsize(2)
+  !
+  !    else if (ipencil==2) then
+  !
+  !    else if (ipencil==3) then
+  !
+  !    end if
+  !
+  !    return
+  !  end subroutine write_plane_2d
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Write 3D array data for every specified mesh point
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine write_every_real(ipencil,var,iskip,jskip,kskip, &
        filename, from1)
 
@@ -627,7 +627,7 @@ contains
     integer, intent(IN) :: iskip,jskip,kskip 
     character(len=*), intent(IN) :: filename
     logical, intent(IN) :: from1  ! .true.  - save 1,n+1,2n+1...
-                                  ! .false. - save n,2n,3n...
+    ! .false. - save n,2n,3n...
 
     real(mytype), allocatable, dimension(:,:,:) :: wk, wk2
     integer(kind=MPI_OFFSET_KIND) :: filesize, disp
@@ -638,7 +638,7 @@ contains
     data_type = real_type
 
 #include "io_write_every.inc"
-    
+
     return
   end subroutine write_every_real
 
@@ -653,7 +653,7 @@ contains
     integer, intent(IN) :: iskip,jskip,kskip 
     character(len=*), intent(IN) :: filename
     logical, intent(IN) :: from1  ! .true.  - save 1,n+1,2n+1...
-                                  ! .false. - save n,2n,3n...
+    ! .false. - save n,2n,3n...
 
     complex(mytype), allocatable, dimension(:,:,:) :: wk, wk2
     integer(kind=MPI_OFFSET_KIND) :: filesize, disp
@@ -664,24 +664,24 @@ contains
     data_type = complex_type
 
 #include "io_write_every.inc"
-    
+
     return
   end subroutine write_every_complex
 
 
   subroutine mpiio_write_real_coarse(ipencil,var,filename,icoarse)
-    
+
     USE param
     USE variables
 
     implicit none
-    
+
     integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3)
     integer, intent(IN) :: icoarse !(nstat=1; nvisu=2)
     real(mytype), dimension(:,:,:), intent(IN) :: var
     real(mytype_single), allocatable, dimension(:,:,:) :: varsingle
     character(len=*) :: filename
-    
+
     integer (kind=MPI_OFFSET_KIND) :: filesize, disp
     integer, dimension(3) :: sizes, subsizes, starts
     integer :: i,j,k, ierror, newtype, fh
@@ -690,7 +690,7 @@ contains
        sizes(1) = xszS(1)
        sizes(2) = yszS(2)
        sizes(3) = zszS(3)
-       
+
        if (ipencil == 1) then
           subsizes(1) = xszS(1)
           subsizes(2) = xszS(2)
@@ -719,7 +719,7 @@ contains
        sizes(1) = xszV(1)
        sizes(2) = yszV(2)
        sizes(3) = zszV(3)
-       
+
        if (ipencil == 1) then
           subsizes(1) = xszV(1)
           subsizes(2) = xszV(2)
@@ -763,27 +763,27 @@ contains
     call MPI_FILE_CLOSE(fh,ierror)
     call MPI_TYPE_FREE(newtype,ierror)
     deallocate(varsingle)
-    
+
     return
   end subroutine mpiio_write_real_coarse
 
   subroutine mpiio_write_real_probe(ipencil,var,filename)
-    
+
     USE param
     USE variables
 
     implicit none
-    
+
     integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3)
     real(mytype), dimension(:,:,:,:), intent(IN) :: var
-  
+
     character(len=*) :: filename
-    
+
     integer (kind=MPI_OFFSET_KIND) :: filesize, disp
     integer, dimension(4) :: sizes, subsizes, starts
     integer :: i,j,k, ierror, newtype, fh
 
-    
+
     sizes(1) = xszP(1)
     sizes(2) = yszP(2)
     sizes(3) = zszP(3)
@@ -812,7 +812,7 @@ contains
        starts(2) = zstP(2)-1
        starts(3) = zstP(3)-1
     endif
-!   print *,nrank,starts(1),starts(2),starts(3),starts(4)
+    !   print *,nrank,starts(1),starts(2),starts(3),starts(4)
     call MPI_TYPE_CREATE_SUBARRAY(4, sizes, subsizes, starts,  &
          MPI_ORDER_FORTRAN, real_type, newtype, ierror)
     call MPI_TYPE_COMMIT(newtype,ierror)
@@ -829,18 +829,18 @@ contains
          real_type, MPI_STATUS_IGNORE, ierror)
     call MPI_FILE_CLOSE(fh,ierror)
     call MPI_TYPE_FREE(newtype,ierror)
-    
-    
+
+
     return
   end subroutine mpiio_write_real_probe
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Write a 3D data set covering a smaller sub-domain only
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine write_subdomain(ipencil,var,is,ie,js,je,ks,ke,filename)
 
     implicit none
-    
+
     integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3)
     real(mytype), dimension(:,:,:), intent(IN) :: var
     integer, intent(IN) :: is, ie, js, je, ks, ke
@@ -883,9 +883,9 @@ contains
        end if
     end if
     call MPI_COMM_SPLIT(MPI_COMM_WORLD,color,key,newcomm,ierror)
-    
+
     if (color==1) then ! only ranks in this group do IO collectively
-       
+
        ! generate MPI-IO subarray information
 
        ! global size of the sub-domain to write
@@ -930,7 +930,7 @@ contains
           else if (xend(3)>ke) then
              subsizes(3) = ke - xstart(3) + 1
           end if
-          
+
        else if (ipencil==2) then
 
           ! TODO 
@@ -945,7 +945,7 @@ contains
        ! copy data from orginal to a temp array
        ! pay attention to blocks only partially cover the sub-domain
        if (ipencil==1) then
-          
+
           if (xend(1)>ie .AND. xstart(1)<is) then
              i1 = is
              i2 = ie
@@ -998,13 +998,13 @@ contains
                 end do
              end do
           end do
-          
+
        else if (ipencil==2) then
 
           ! TODO
 
        else if (ipencil==3) then
-          
+
           ! TODO
 
        end if
@@ -1035,6 +1035,6 @@ contains
 
     return
   end subroutine write_subdomain
-    
+
 
 end module decomp_2d_io
diff --git a/decomp2d/io_read_one.inc b/decomp2d/io_read_one.inc
index b2a89896..2b3d9836 100644
--- a/decomp2d/io_read_one.inc
+++ b/decomp2d/io_read_one.inc
@@ -12,53 +12,53 @@
 ! This file contain common code to be included by subroutines 
 ! 'mpiio_read_one_...' in io.f90
 
-    ! Using MPI-IO to write a distributed 3D array into a file
+! Using MPI-IO to write a distributed 3D array into a file
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       call get_decomp_info(decomp)
-    end if
-    
-    ! determine subarray parameters
-    sizes(1) = decomp%xsz(1)
-    sizes(2) = decomp%ysz(2)
-    sizes(3) = decomp%zsz(3)
-    
-    if (ipencil == 1) then
-       subsizes(1) = decomp%xsz(1)
-       subsizes(2) = decomp%xsz(2)
-       subsizes(3) = decomp%xsz(3)
-       starts(1) = decomp%xst(1)-1  ! 0-based index
-       starts(2) = decomp%xst(2)-1
-       starts(3) = decomp%xst(3)-1
-    else if (ipencil == 2) then
-       subsizes(1) = decomp%ysz(1)
-       subsizes(2) = decomp%ysz(2)
-       subsizes(3) = decomp%ysz(3)
-       starts(1) = decomp%yst(1)-1
-       starts(2) = decomp%yst(2)-1
-       starts(3) = decomp%yst(3)-1
-    else if (ipencil == 3) then
-       subsizes(1) = decomp%zsz(1)
-       subsizes(2) = decomp%zsz(2)
-       subsizes(3) = decomp%zsz(3)
-       starts(1) = decomp%zst(1)-1
-       starts(2) = decomp%zst(2)-1
-       starts(3) = decomp%zst(3)-1
-    endif
-    
-    call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
-         MPI_ORDER_FORTRAN, data_type, newtype, ierror)
-    call MPI_TYPE_COMMIT(newtype,ierror)
-    call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, &
-         MPI_MODE_RDONLY, MPI_INFO_NULL, &
-         fh, ierror)
-    disp = 0_MPI_OFFSET_KIND
-    call MPI_FILE_SET_VIEW(fh,disp,data_type, &
-         newtype,'native',MPI_INFO_NULL,ierror)
-    call MPI_FILE_READ_ALL(fh, var, &
-         subsizes(1)*subsizes(2)*subsizes(3), &
-         data_type, MPI_STATUS_IGNORE, ierror)
-    call MPI_FILE_CLOSE(fh,ierror)
-    call MPI_TYPE_FREE(newtype,ierror)
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+call get_decomp_info(decomp)
+end if
+
+! determine subarray parameters
+sizes(1) = decomp%xsz(1)
+sizes(2) = decomp%ysz(2)
+sizes(3) = decomp%zsz(3)
+
+if (ipencil == 1) then
+subsizes(1) = decomp%xsz(1)
+subsizes(2) = decomp%xsz(2)
+subsizes(3) = decomp%xsz(3)
+starts(1) = decomp%xst(1)-1  ! 0-based index
+starts(2) = decomp%xst(2)-1
+starts(3) = decomp%xst(3)-1
+else if (ipencil == 2) then
+subsizes(1) = decomp%ysz(1)
+subsizes(2) = decomp%ysz(2)
+subsizes(3) = decomp%ysz(3)
+starts(1) = decomp%yst(1)-1
+starts(2) = decomp%yst(2)-1
+starts(3) = decomp%yst(3)-1
+else if (ipencil == 3) then
+subsizes(1) = decomp%zsz(1)
+subsizes(2) = decomp%zsz(2)
+subsizes(3) = decomp%zsz(3)
+starts(1) = decomp%zst(1)-1
+starts(2) = decomp%zst(2)-1
+starts(3) = decomp%zst(3)-1
+endif
+
+call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
+MPI_ORDER_FORTRAN, data_type, newtype, ierror)
+call MPI_TYPE_COMMIT(newtype,ierror)
+call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, &
+MPI_MODE_RDONLY, MPI_INFO_NULL, &
+fh, ierror)
+disp = 0_MPI_OFFSET_KIND
+call MPI_FILE_SET_VIEW(fh,disp,data_type, &
+newtype,'native',MPI_INFO_NULL,ierror)
+call MPI_FILE_READ_ALL(fh, var, &
+subsizes(1)*subsizes(2)*subsizes(3), &
+data_type, MPI_STATUS_IGNORE, ierror)
+call MPI_FILE_CLOSE(fh,ierror)
+call MPI_TYPE_FREE(newtype,ierror)
diff --git a/decomp2d/io_read_var.inc b/decomp2d/io_read_var.inc
index 68647ec6..ff48f52e 100644
--- a/decomp2d/io_read_var.inc
+++ b/decomp2d/io_read_var.inc
@@ -12,57 +12,57 @@
 ! This file contain common code to be included by subroutines 
 ! 'read_var_...' in io.f90
 
-  ! Using MPI-IO to read a distributed 3D variable from a file. File 
-  ! operations (open/close) need to be done in calling application. This 
-  ! allows multiple variables to be read from a single file. Together 
-  ! with the corresponding write operation, this is the perfect solution
-  ! for applications to perform restart/checkpointing.
+! Using MPI-IO to read a distributed 3D variable from a file. File 
+! operations (open/close) need to be done in calling application. This 
+! allows multiple variables to be read from a single file. Together 
+! with the corresponding write operation, this is the perfect solution
+! for applications to perform restart/checkpointing.
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       call get_decomp_info(decomp)
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+call get_decomp_info(decomp)
+end if
 
-    ! Create file type and set file view
-    sizes(1) = decomp%xsz(1)
-    sizes(2) = decomp%ysz(2)
-    sizes(3) = decomp%zsz(3)
-    if (ipencil == 1) then
-       subsizes(1) = decomp%xsz(1)
-       subsizes(2) = decomp%xsz(2)
-       subsizes(3) = decomp%xsz(3)
-       starts(1) = decomp%xst(1)-1  ! 0-based index
-       starts(2) = decomp%xst(2)-1
-       starts(3) = decomp%xst(3)-1
-    else if (ipencil == 2) then
-       subsizes(1) = decomp%ysz(1)
-       subsizes(2) = decomp%ysz(2)
-       subsizes(3) = decomp%ysz(3)
-       starts(1) = decomp%yst(1)-1
-       starts(2) = decomp%yst(2)-1
-       starts(3) = decomp%yst(3)-1
-    else if (ipencil == 3) then
-       subsizes(1) = decomp%zsz(1)
-       subsizes(2) = decomp%zsz(2)
-       subsizes(3) = decomp%zsz(3)
-       starts(1) = decomp%zst(1)-1
-       starts(2) = decomp%zst(2)-1
-       starts(3) = decomp%zst(3)-1
-    endif
-    
-    call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
-         MPI_ORDER_FORTRAN, data_type, newtype, ierror)
-    call MPI_TYPE_COMMIT(newtype,ierror)
-    call MPI_FILE_SET_VIEW(fh,disp,data_type, &
-         newtype,'native',MPI_INFO_NULL,ierror)
-    call MPI_FILE_READ_ALL(fh, var, &
-         subsizes(1)*subsizes(2)*subsizes(3), &
-         data_type, MPI_STATUS_IGNORE, ierror)
-    call MPI_TYPE_FREE(newtype,ierror)
+! Create file type and set file view
+sizes(1) = decomp%xsz(1)
+sizes(2) = decomp%ysz(2)
+sizes(3) = decomp%zsz(3)
+if (ipencil == 1) then
+subsizes(1) = decomp%xsz(1)
+subsizes(2) = decomp%xsz(2)
+subsizes(3) = decomp%xsz(3)
+starts(1) = decomp%xst(1)-1  ! 0-based index
+starts(2) = decomp%xst(2)-1
+starts(3) = decomp%xst(3)-1
+else if (ipencil == 2) then
+subsizes(1) = decomp%ysz(1)
+subsizes(2) = decomp%ysz(2)
+subsizes(3) = decomp%ysz(3)
+starts(1) = decomp%yst(1)-1
+starts(2) = decomp%yst(2)-1
+starts(3) = decomp%yst(3)-1
+else if (ipencil == 3) then
+subsizes(1) = decomp%zsz(1)
+subsizes(2) = decomp%zsz(2)
+subsizes(3) = decomp%zsz(3)
+starts(1) = decomp%zst(1)-1
+starts(2) = decomp%zst(2)-1
+starts(3) = decomp%zst(3)-1
+endif
 
-    ! update displacement for the next read operation
-    disp = disp + sizes(1)*sizes(2)*sizes(3)*mytype_bytes
-    if (data_type == complex_type) then
-       disp = disp + sizes(1)*sizes(2)*sizes(3)*mytype_bytes
-    end if
+call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
+MPI_ORDER_FORTRAN, data_type, newtype, ierror)
+call MPI_TYPE_COMMIT(newtype,ierror)
+call MPI_FILE_SET_VIEW(fh,disp,data_type, &
+newtype,'native',MPI_INFO_NULL,ierror)
+call MPI_FILE_READ_ALL(fh, var, &
+subsizes(1)*subsizes(2)*subsizes(3), &
+data_type, MPI_STATUS_IGNORE, ierror)
+call MPI_TYPE_FREE(newtype,ierror)
+
+! update displacement for the next read operation
+disp = disp + sizes(1)*sizes(2)*sizes(3)*mytype_bytes
+if (data_type == complex_type) then
+disp = disp + sizes(1)*sizes(2)*sizes(3)*mytype_bytes
+end if
diff --git a/decomp2d/io_write_every.inc b/decomp2d/io_write_every.inc
index 3ae60c9d..4bbcadea 100644
--- a/decomp2d/io_write_every.inc
+++ b/decomp2d/io_write_every.inc
@@ -12,212 +12,212 @@
 ! This file contain common code to be included by subroutines 
 ! 'write_every_...' in io.f90
 
-  ! To write every few points of a 3D array to a file
-
-    ! work out the distribution parameters, which may be different from 
-    ! the default distribution used by the decomposition library
-    !  For exmample if nx=17 and p_row=4
-    !    distribution is: 4 4 4 5
-
-    ! If writing from the 1st element
-    !  If saving every 3 points, then 5 points to be saved (17/3)
-    !    default distribution would be 1 1 1 2
-    !    However, 1st block (1-4) contains the 3rd point
-    !             2nd block (5-8) contains the 6th point
-    !             3rd block (9-12) contains the 9th and 12th point
-    !             4th block (13-17) contains then 15th point
-    !    giving a 1 1 2 1 distribution
-    !    So cannot use the base decomposition library for such IO
-
-    ! If writing from the n-th element (n=?skip)
-    !  If saving every 3 points, then 6 points to be saved
-    !    However, 1st block (1-4) contains the 1st & 4th point
-    !             2nd block (5-8) contains the 7th point
-    !             3rd block (9-12) contains the 10th point
-    !             4th block (13-17) contains then 12th & 15th point
-    !    giving a 1 2 2 1 distribution
-
-    skip(1)=iskip
-    skip(2)=jskip
-    skip(3)=kskip
-
-    do i=1,3
-       if (from1) then
-          xst(i) = (xstart(i)+skip(i)-1)/skip(i)
-          if (mod(xstart(i)+skip(i)-1,skip(i))/=0) xst(i)=xst(i)+1
-          xen(i) = (xend(i)+skip(i)-1)/skip(i)
-       else
-          xst(i) = xstart(i)/skip(i)
-          if (mod(xstart(i),skip(i))/=0) xst(i)=xst(i)+1
-          xen(i) = xend(i)/skip(i)
-       end if
-       xsz(i) = xen(i)-xst(i)+1
-    end do
-       
-    do i=1,3
-       if (from1) then
-          yst(i) = (ystart(i)+skip(i)-1)/skip(i)
-          if (mod(ystart(i)+skip(i)-1,skip(i))/=0) yst(i)=yst(i)+1
-          yen(i) = (yend(i)+skip(i)-1)/skip(i)
-       else
-          yst(i) = ystart(i)/skip(i)
-          if (mod(ystart(i),skip(i))/=0) yst(i)=yst(i)+1
-          yen(i) = yend(i)/skip(i)
-       end if
-       ysz(i) = yen(i)-yst(i)+1
-    end do
-
-    do i=1,3
-       if (from1) then
-          zst(i) = (zstart(i)+skip(i)-1)/skip(i)
-          if (mod(zstart(i)+skip(i)-1,skip(i))/=0) zst(i)=zst(i)+1
-          zen(i) = (zend(i)+skip(i)-1)/skip(i)
-       else
-          zst(i) = zstart(i)/skip(i)
-          if (mod(zstart(i),skip(i))/=0) zst(i)=zst(i)+1
-          zen(i) = zend(i)/skip(i)
-       end if
-       zsz(i) = zen(i)-zst(i)+1
-    end do
-
-    ! if 'skip' value is large it is possible that some ranks do not 
-    ! contain any points to be written. Subarray constructor requires 
-    ! nonzero size so it is not possible to use MPI_COMM_WORLD for IO.
-    ! Create a sub communicator for this...
-    color = 1
-    key = 0  ! rank order doesn't matter
-    if (ipencil==1) then
-       if (xsz(1)==0 .or. xsz(2)==0 .or. xsz(3)==0) then
-          color = 2
-       end if
-    else if (ipencil==2) then
-       if (ysz(1)==0 .or. ysz(2)==0 .or. ysz(3)==0) then
-          color = 2
-       end if
-    else if (ipencil==3) then
-       if (zsz(1)==0 .or. zsz(2)==0 .or. zsz(3)==0) then
-          color = 2
-       end if
-    end if
-    call MPI_COMM_SPLIT(MPI_COMM_WORLD,color,key,newcomm,ierror)
-
-    if (color==1) then ! only ranks in this group do IO collectively
-       
-       ! generate subarray information
-       sizes(1) = xsz(1)
-       sizes(2) = ysz(2)
-       sizes(3) = zsz(3)
-       if (ipencil==1) then
-          subsizes(1) = xsz(1)
-          subsizes(2) = xsz(2)
-          subsizes(3) = xsz(3)
-          starts(1) = xst(1)-1
-          starts(2) = xst(2)-1
-          starts(3) = xst(3)-1
-       else if (ipencil==2) then
-          subsizes(1) = ysz(1)
-          subsizes(2) = ysz(2)
-          subsizes(3) = ysz(3)
-          starts(1) = yst(1)-1
-          starts(2) = yst(2)-1
-          starts(3) = yst(3)-1
-       else if (ipencil==3) then
-          subsizes(1) = zsz(1)
-          subsizes(2) = zsz(2)
-          subsizes(3) = zsz(3)
-          starts(1) = zst(1)-1
-          starts(2) = zst(2)-1
-          starts(3) = zst(3)-1
-       end if
-       
-       ! copy data from original array
-       ! needs a copy of original array in global coordinate 
-       if (ipencil==1) then
-          allocate(wk(xst(1):xen(1),xst(2):xen(2),xst(3):xen(3)))
-          allocate(wk2(xstart(1):xend(1),xstart(2):xend(2),xstart(3):xend(3)))
-          wk2=var
-          if (from1) then
-             do k=xst(3),xen(3)
-                do j=xst(2),xen(2)
-                   do i=xst(1),xen(1)
-                      wk(i,j,k) = wk2((i-1)*iskip+1,(j-1)*jskip+1,(k-1)*kskip+1)
-                   end do
-                end do
-             end do
-          else
-             do k=xst(3),xen(3)
-                do j=xst(2),xen(2)
-                   do i=xst(1),xen(1)
-                      wk(i,j,k) = wk2(i*iskip,j*jskip,k*kskip)
-                   end do
-                end do
-             end do
-          end if   
-       else if (ipencil==2) then
-          allocate(wk(yst(1):yen(1),yst(2):yen(2),yst(3):yen(3)))
-          allocate(wk2(ystart(1):yend(1),ystart(2):yend(2),ystart(3):yend(3)))
-          wk2=var
-          if (from1) then
-             do k=yst(3),yen(3)
-                do j=yst(2),yen(2)
-                   do i=yst(1),yen(1)
-                      wk(i,j,k) = wk2((i-1)*iskip+1,(j-1)*jskip+1,(k-1)*kskip+1)
-                   end do
-                end do
-             end do
-          else
-             do k=yst(3),yen(3)
-                do j=yst(2),yen(2)
-                   do i=yst(1),yen(1)
-                      wk(i,j,k) = wk2(i*iskip,j*jskip,k*kskip)
-                   end do
-                end do
-             end do
-          end if
-       else if (ipencil==3) then
-          allocate(wk(zst(1):zen(1),zst(2):zen(2),zst(3):zen(3)))
-          allocate(wk2(zstart(1):zend(1),zstart(2):zend(2),zstart(3):zend(3)))
-          wk2=var
-          if (from1) then
-             do k=zst(3),zen(3)
-                do j=zst(2),zen(2)
-                   do i=zst(1),zen(1)
-                      wk(i,j,k) = wk2((i-1)*iskip+1,(j-1)*jskip+1,(k-1)*kskip+1)
-                   end do
-                end do
-             end do
-          else
-             do k=zst(3),zen(3)
-                do j=zst(2),zen(2)
-                   do i=zst(1),zen(1)
-                      wk(i,j,k) = wk2(i*iskip,j*jskip,k*kskip)
-                   end do
-                end do
-             end do
-          end if
-       end if
-       deallocate(wk2)
-
-       ! MPI-IO
-       call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
-            MPI_ORDER_FORTRAN, data_type, newtype, ierror)
-       call MPI_TYPE_COMMIT(newtype,ierror)
-       call MPI_FILE_OPEN(newcomm, filename, &
-            MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, &
-            fh, ierror)
-       filesize = 0_MPI_OFFSET_KIND
-       call MPI_FILE_SET_SIZE(fh,filesize,ierror)  ! guarantee overwriting
-       disp = 0_MPI_OFFSET_KIND
-       call MPI_FILE_SET_VIEW(fh,disp,data_type, &
-            newtype,'native',MPI_INFO_NULL,ierror)
-       call MPI_FILE_WRITE_ALL(fh, wk, &
-            subsizes(1)*subsizes(2)*subsizes(3), &
-            data_type, MPI_STATUS_IGNORE, ierror)
-       call MPI_FILE_CLOSE(fh,ierror)
-       call MPI_TYPE_FREE(newtype,ierror)
-
-       deallocate(wk)
-
-    end if ! color==1
-
-    call MPI_BARRIER(MPI_COMM_WORLD, ierror)
+! To write every few points of a 3D array to a file
+
+! work out the distribution parameters, which may be different from 
+! the default distribution used by the decomposition library
+!  For exmample if nx=17 and p_row=4
+!    distribution is: 4 4 4 5
+
+! If writing from the 1st element
+!  If saving every 3 points, then 5 points to be saved (17/3)
+!    default distribution would be 1 1 1 2
+!    However, 1st block (1-4) contains the 3rd point
+!             2nd block (5-8) contains the 6th point
+!             3rd block (9-12) contains the 9th and 12th point
+!             4th block (13-17) contains then 15th point
+!    giving a 1 1 2 1 distribution
+!    So cannot use the base decomposition library for such IO
+
+! If writing from the n-th element (n=?skip)
+!  If saving every 3 points, then 6 points to be saved
+!    However, 1st block (1-4) contains the 1st & 4th point
+!             2nd block (5-8) contains the 7th point
+!             3rd block (9-12) contains the 10th point
+!             4th block (13-17) contains then 12th & 15th point
+!    giving a 1 2 2 1 distribution
+
+skip(1)=iskip
+skip(2)=jskip
+skip(3)=kskip
+
+do i=1,3
+if (from1) then
+xst(i) = (xstart(i)+skip(i)-1)/skip(i)
+if (mod(xstart(i)+skip(i)-1,skip(i))/=0) xst(i)=xst(i)+1
+xen(i) = (xend(i)+skip(i)-1)/skip(i)
+else
+xst(i) = xstart(i)/skip(i)
+if (mod(xstart(i),skip(i))/=0) xst(i)=xst(i)+1
+xen(i) = xend(i)/skip(i)
+end if
+xsz(i) = xen(i)-xst(i)+1
+end do
+
+do i=1,3
+if (from1) then
+yst(i) = (ystart(i)+skip(i)-1)/skip(i)
+if (mod(ystart(i)+skip(i)-1,skip(i))/=0) yst(i)=yst(i)+1
+yen(i) = (yend(i)+skip(i)-1)/skip(i)
+else
+yst(i) = ystart(i)/skip(i)
+if (mod(ystart(i),skip(i))/=0) yst(i)=yst(i)+1
+yen(i) = yend(i)/skip(i)
+end if
+ysz(i) = yen(i)-yst(i)+1
+end do
+
+do i=1,3
+if (from1) then
+zst(i) = (zstart(i)+skip(i)-1)/skip(i)
+if (mod(zstart(i)+skip(i)-1,skip(i))/=0) zst(i)=zst(i)+1
+zen(i) = (zend(i)+skip(i)-1)/skip(i)
+else
+zst(i) = zstart(i)/skip(i)
+if (mod(zstart(i),skip(i))/=0) zst(i)=zst(i)+1
+zen(i) = zend(i)/skip(i)
+end if
+zsz(i) = zen(i)-zst(i)+1
+end do
+
+! if 'skip' value is large it is possible that some ranks do not 
+! contain any points to be written. Subarray constructor requires 
+! nonzero size so it is not possible to use MPI_COMM_WORLD for IO.
+! Create a sub communicator for this...
+color = 1
+key = 0  ! rank order doesn't matter
+if (ipencil==1) then
+if (xsz(1)==0 .or. xsz(2)==0 .or. xsz(3)==0) then
+color = 2
+end if
+else if (ipencil==2) then
+if (ysz(1)==0 .or. ysz(2)==0 .or. ysz(3)==0) then
+color = 2
+end if
+else if (ipencil==3) then
+if (zsz(1)==0 .or. zsz(2)==0 .or. zsz(3)==0) then
+color = 2
+end if
+end if
+call MPI_COMM_SPLIT(MPI_COMM_WORLD,color,key,newcomm,ierror)
+
+if (color==1) then ! only ranks in this group do IO collectively
+
+! generate subarray information
+sizes(1) = xsz(1)
+sizes(2) = ysz(2)
+sizes(3) = zsz(3)
+if (ipencil==1) then
+subsizes(1) = xsz(1)
+subsizes(2) = xsz(2)
+subsizes(3) = xsz(3)
+starts(1) = xst(1)-1
+starts(2) = xst(2)-1
+starts(3) = xst(3)-1
+else if (ipencil==2) then
+subsizes(1) = ysz(1)
+subsizes(2) = ysz(2)
+subsizes(3) = ysz(3)
+starts(1) = yst(1)-1
+starts(2) = yst(2)-1
+starts(3) = yst(3)-1
+else if (ipencil==3) then
+subsizes(1) = zsz(1)
+subsizes(2) = zsz(2)
+subsizes(3) = zsz(3)
+starts(1) = zst(1)-1
+starts(2) = zst(2)-1
+starts(3) = zst(3)-1
+end if
+
+! copy data from original array
+! needs a copy of original array in global coordinate 
+if (ipencil==1) then
+allocate(wk(xst(1):xen(1),xst(2):xen(2),xst(3):xen(3)))
+allocate(wk2(xstart(1):xend(1),xstart(2):xend(2),xstart(3):xend(3)))
+wk2=var
+if (from1) then
+do k=xst(3),xen(3)
+do j=xst(2),xen(2)
+do i=xst(1),xen(1)
+wk(i,j,k) = wk2((i-1)*iskip+1,(j-1)*jskip+1,(k-1)*kskip+1)
+end do
+end do
+end do
+else
+do k=xst(3),xen(3)
+do j=xst(2),xen(2)
+do i=xst(1),xen(1)
+wk(i,j,k) = wk2(i*iskip,j*jskip,k*kskip)
+end do
+end do
+end do
+end if   
+else if (ipencil==2) then
+allocate(wk(yst(1):yen(1),yst(2):yen(2),yst(3):yen(3)))
+allocate(wk2(ystart(1):yend(1),ystart(2):yend(2),ystart(3):yend(3)))
+wk2=var
+if (from1) then
+do k=yst(3),yen(3)
+do j=yst(2),yen(2)
+do i=yst(1),yen(1)
+wk(i,j,k) = wk2((i-1)*iskip+1,(j-1)*jskip+1,(k-1)*kskip+1)
+end do
+end do
+end do
+else
+do k=yst(3),yen(3)
+do j=yst(2),yen(2)
+do i=yst(1),yen(1)
+wk(i,j,k) = wk2(i*iskip,j*jskip,k*kskip)
+end do
+end do
+end do
+end if
+else if (ipencil==3) then
+allocate(wk(zst(1):zen(1),zst(2):zen(2),zst(3):zen(3)))
+allocate(wk2(zstart(1):zend(1),zstart(2):zend(2),zstart(3):zend(3)))
+wk2=var
+if (from1) then
+do k=zst(3),zen(3)
+do j=zst(2),zen(2)
+do i=zst(1),zen(1)
+wk(i,j,k) = wk2((i-1)*iskip+1,(j-1)*jskip+1,(k-1)*kskip+1)
+end do
+end do
+end do
+else
+do k=zst(3),zen(3)
+do j=zst(2),zen(2)
+do i=zst(1),zen(1)
+wk(i,j,k) = wk2(i*iskip,j*jskip,k*kskip)
+end do
+end do
+end do
+end if
+end if
+deallocate(wk2)
+
+! MPI-IO
+call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
+MPI_ORDER_FORTRAN, data_type, newtype, ierror)
+call MPI_TYPE_COMMIT(newtype,ierror)
+call MPI_FILE_OPEN(newcomm, filename, &
+MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, &
+fh, ierror)
+filesize = 0_MPI_OFFSET_KIND
+call MPI_FILE_SET_SIZE(fh,filesize,ierror)  ! guarantee overwriting
+disp = 0_MPI_OFFSET_KIND
+call MPI_FILE_SET_VIEW(fh,disp,data_type, &
+newtype,'native',MPI_INFO_NULL,ierror)
+call MPI_FILE_WRITE_ALL(fh, wk, &
+subsizes(1)*subsizes(2)*subsizes(3), &
+data_type, MPI_STATUS_IGNORE, ierror)
+call MPI_FILE_CLOSE(fh,ierror)
+call MPI_TYPE_FREE(newtype,ierror)
+
+deallocate(wk)
+
+end if ! color==1
+
+call MPI_BARRIER(MPI_COMM_WORLD, ierror)
diff --git a/decomp2d/io_write_one.inc b/decomp2d/io_write_one.inc
index 37c291a4..612e5344 100644
--- a/decomp2d/io_write_one.inc
+++ b/decomp2d/io_write_one.inc
@@ -12,71 +12,71 @@
 ! This file contain common code to be included by subroutines 
 ! 'mpiio_write_one_...' in io.f90
 
-    ! Using MPI-IO to write a distributed 3D array into a file
+! Using MPI-IO to write a distributed 3D array into a file
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       call get_decomp_info(decomp)
-    end if
-    
-    ! determine subarray parameters
-    sizes(1) = decomp%xsz(1)
-    sizes(2) = decomp%ysz(2)
-    sizes(3) = decomp%zsz(3)
-    
-    if (ipencil == 1) then
-       subsizes(1) = decomp%xsz(1)
-       subsizes(2) = decomp%xsz(2)
-       subsizes(3) = decomp%xsz(3)
-       starts(1) = decomp%xst(1)-1  ! 0-based index
-       starts(2) = decomp%xst(2)-1
-       starts(3) = decomp%xst(3)-1
-    else if (ipencil == 2) then
-       subsizes(1) = decomp%ysz(1)
-       subsizes(2) = decomp%ysz(2)
-       subsizes(3) = decomp%ysz(3)
-       starts(1) = decomp%yst(1)-1
-       starts(2) = decomp%yst(2)-1
-       starts(3) = decomp%yst(3)-1
-    else if (ipencil == 3) then
-       subsizes(1) = decomp%zsz(1)
-       subsizes(2) = decomp%zsz(2)
-       subsizes(3) = decomp%zsz(3)
-       starts(1) = decomp%zst(1)-1
-       starts(2) = decomp%zst(2)-1
-       starts(3) = decomp%zst(3)-1
-    endif
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+call get_decomp_info(decomp)
+end if
+
+! determine subarray parameters
+sizes(1) = decomp%xsz(1)
+sizes(2) = decomp%ysz(2)
+sizes(3) = decomp%zsz(3)
+
+if (ipencil == 1) then
+subsizes(1) = decomp%xsz(1)
+subsizes(2) = decomp%xsz(2)
+subsizes(3) = decomp%xsz(3)
+starts(1) = decomp%xst(1)-1  ! 0-based index
+starts(2) = decomp%xst(2)-1
+starts(3) = decomp%xst(3)-1
+else if (ipencil == 2) then
+subsizes(1) = decomp%ysz(1)
+subsizes(2) = decomp%ysz(2)
+subsizes(3) = decomp%ysz(3)
+starts(1) = decomp%yst(1)-1
+starts(2) = decomp%yst(2)-1
+starts(3) = decomp%yst(3)-1
+else if (ipencil == 3) then
+subsizes(1) = decomp%zsz(1)
+subsizes(2) = decomp%zsz(2)
+subsizes(3) = decomp%zsz(3)
+starts(1) = decomp%zst(1)-1
+starts(2) = decomp%zst(2)-1
+starts(3) = decomp%zst(3)-1
+endif
 
 #ifdef T3PIO
-    call MPI_INFO_CREATE(info, ierror)
-    gs = ceiling(real(sizes(1),mytype)*real(sizes(2),mytype)* &
-         real(sizes(3),mytype)/1024./1024.)
-    call t3pio_set_info(MPI_COMM_WORLD, info, "./", ierror, &
-         GLOBAL_SIZE=gs, factor=1)
+call MPI_INFO_CREATE(info, ierror)
+gs = ceiling(real(sizes(1),mytype)*real(sizes(2),mytype)* &
+real(sizes(3),mytype)/1024./1024.)
+call t3pio_set_info(MPI_COMM_WORLD, info, "./", ierror, &
+GLOBAL_SIZE=gs, factor=1)
 #endif
 
-    call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
-         MPI_ORDER_FORTRAN, data_type, newtype, ierror)
-    call MPI_TYPE_COMMIT(newtype,ierror)
+call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
+MPI_ORDER_FORTRAN, data_type, newtype, ierror)
+call MPI_TYPE_COMMIT(newtype,ierror)
 #ifdef T3PIO
-    call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, &
-         MPI_MODE_CREATE+MPI_MODE_WRONLY, info, fh, ierror)
+call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, &
+MPI_MODE_CREATE+MPI_MODE_WRONLY, info, fh, ierror)
 #else
-    call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, &
-         MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, &
-         fh, ierror)
+call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, &
+MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, &
+fh, ierror)
 #endif
-    filesize = 0_MPI_OFFSET_KIND
-    call MPI_FILE_SET_SIZE(fh,filesize,ierror)  ! guarantee overwriting
-    disp = 0_MPI_OFFSET_KIND
-    call MPI_FILE_SET_VIEW(fh,disp,data_type, &
-         newtype,'native',MPI_INFO_NULL,ierror)
-    call MPI_FILE_WRITE_ALL(fh, var, &
-         subsizes(1)*subsizes(2)*subsizes(3), &
-         data_type, MPI_STATUS_IGNORE, ierror)
-    call MPI_FILE_CLOSE(fh,ierror)
-    call MPI_TYPE_FREE(newtype,ierror)
+filesize = 0_MPI_OFFSET_KIND
+call MPI_FILE_SET_SIZE(fh,filesize,ierror)  ! guarantee overwriting
+disp = 0_MPI_OFFSET_KIND
+call MPI_FILE_SET_VIEW(fh,disp,data_type, &
+newtype,'native',MPI_INFO_NULL,ierror)
+call MPI_FILE_WRITE_ALL(fh, var, &
+subsizes(1)*subsizes(2)*subsizes(3), &
+data_type, MPI_STATUS_IGNORE, ierror)
+call MPI_FILE_CLOSE(fh,ierror)
+call MPI_TYPE_FREE(newtype,ierror)
 #ifdef T3PIO
-    call MPI_INFO_FREE(info,ierror)
+call MPI_INFO_FREE(info,ierror)
 #endif
diff --git a/decomp2d/io_write_plane.inc b/decomp2d/io_write_plane.inc
index cde541a3..dd8b5799 100644
--- a/decomp2d/io_write_plane.inc
+++ b/decomp2d/io_write_plane.inc
@@ -12,112 +12,112 @@
 ! This file contain common code to be included by subroutines 
 ! 'mpiio_write_plane_3d_...' in io.f90
 
-    ! It is much easier to implement if all mpi ranks participate I/O.
-    ! Transpose the 3D data if necessary.
+! It is much easier to implement if all mpi ranks participate I/O.
+! Transpose the 3D data if necessary.
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       call get_decomp_info(decomp)
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+call get_decomp_info(decomp)
+end if
 
-    if (iplane==1) then
-       allocate(wk(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3)))
-       if (ipencil==1) then
-          wk = var
-       else if (ipencil==2) then
-          call transpose_y_to_x(var,wk,decomp)
-       else if (ipencil==3) then
-          allocate(wk2(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)))
-          call transpose_z_to_y(var,wk2,decomp)
-          call transpose_y_to_x(wk2,wk,decomp)
-          deallocate(wk2)
-       end if
-       allocate(wk2d(1,decomp%xsz(2),decomp%xsz(3)))
-       do k=1,decomp%xsz(3)
-          do j=1,decomp%xsz(2)
-             wk2d(1,j,k)=wk(n,j,k)
-          end do
-       end do
-       sizes(1) = 1
-       sizes(2) = decomp%ysz(2)
-       sizes(3) = decomp%zsz(3)
-       subsizes(1) = 1
-       subsizes(2) = decomp%xsz(2)
-       subsizes(3) = decomp%xsz(3)
-       starts(1) = 0
-       starts(2) = decomp%xst(2)-1
-       starts(3) = decomp%xst(3)-1
+if (iplane==1) then
+allocate(wk(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3)))
+if (ipencil==1) then
+wk = var
+else if (ipencil==2) then
+call transpose_y_to_x(var,wk,decomp)
+else if (ipencil==3) then
+allocate(wk2(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)))
+call transpose_z_to_y(var,wk2,decomp)
+call transpose_y_to_x(wk2,wk,decomp)
+deallocate(wk2)
+end if
+allocate(wk2d(1,decomp%xsz(2),decomp%xsz(3)))
+do k=1,decomp%xsz(3)
+do j=1,decomp%xsz(2)
+wk2d(1,j,k)=wk(n,j,k)
+end do
+end do
+sizes(1) = 1
+sizes(2) = decomp%ysz(2)
+sizes(3) = decomp%zsz(3)
+subsizes(1) = 1
+subsizes(2) = decomp%xsz(2)
+subsizes(3) = decomp%xsz(3)
+starts(1) = 0
+starts(2) = decomp%xst(2)-1
+starts(3) = decomp%xst(3)-1
 
-    else if (iplane==2) then
-       allocate(wk(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)))
-       if (ipencil==1) then
-          call transpose_x_to_y(var,wk,decomp)
-       else if (ipencil==2) then
-          wk = var
-       else if (ipencil==3) then
-          call transpose_z_to_y(var,wk,decomp)
-       end if
-       allocate(wk2d(decomp%ysz(1),1,decomp%ysz(3)))
-       do k=1,decomp%ysz(3)
-          do i=1,decomp%ysz(1)
-             wk2d(i,1,k)=wk(i,n,k)
-          end do
-       end do
-       sizes(1) = decomp%xsz(1)
-       sizes(2) = 1
-       sizes(3) = decomp%zsz(3)
-       subsizes(1) = decomp%ysz(1)
-       subsizes(2) = 1
-       subsizes(3) = decomp%ysz(3)
-       starts(1) = decomp%yst(1)-1
-       starts(2) = 0
-       starts(3) = decomp%yst(3)-1
+else if (iplane==2) then
+allocate(wk(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)))
+if (ipencil==1) then
+call transpose_x_to_y(var,wk,decomp)
+else if (ipencil==2) then
+wk = var
+else if (ipencil==3) then
+call transpose_z_to_y(var,wk,decomp)
+end if
+allocate(wk2d(decomp%ysz(1),1,decomp%ysz(3)))
+do k=1,decomp%ysz(3)
+do i=1,decomp%ysz(1)
+wk2d(i,1,k)=wk(i,n,k)
+end do
+end do
+sizes(1) = decomp%xsz(1)
+sizes(2) = 1
+sizes(3) = decomp%zsz(3)
+subsizes(1) = decomp%ysz(1)
+subsizes(2) = 1
+subsizes(3) = decomp%ysz(3)
+starts(1) = decomp%yst(1)-1
+starts(2) = 0
+starts(3) = decomp%yst(3)-1
 
-    else if (iplane==3) then
-       allocate(wk(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3)))
-       if (ipencil==1) then
-          allocate(wk2(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)))
-          call transpose_x_to_y(var,wk2,decomp)
-          call transpose_y_to_z(wk2,wk,decomp)
-          deallocate(wk2)
-       else if (ipencil==2) then
-          call transpose_y_to_z(var,wk,decomp)
-       else if (ipencil==3) then
-          wk = var
-       end if
-       allocate(wk2d(decomp%zsz(1),decomp%zsz(2),1))
-       do j=1,decomp%zsz(2)
-          do i=1,decomp%zsz(1) 
-             wk2d(i,j,1)=wk(i,j,n)
-          end do
-       end do
-       sizes(1) = decomp%xsz(1)
-       sizes(2) = decomp%ysz(2)
-       sizes(3) = 1
-       subsizes(1) = decomp%zsz(1)
-       subsizes(2) = decomp%zsz(2)
-       subsizes(3) = 1
-       starts(1) = decomp%zst(1)-1
-       starts(2) = decomp%zst(2)-1
-       starts(3) = 0
-    end if
+else if (iplane==3) then
+allocate(wk(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3)))
+if (ipencil==1) then
+allocate(wk2(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)))
+call transpose_x_to_y(var,wk2,decomp)
+call transpose_y_to_z(wk2,wk,decomp)
+deallocate(wk2)
+else if (ipencil==2) then
+call transpose_y_to_z(var,wk,decomp)
+else if (ipencil==3) then
+wk = var
+end if
+allocate(wk2d(decomp%zsz(1),decomp%zsz(2),1))
+do j=1,decomp%zsz(2)
+do i=1,decomp%zsz(1) 
+wk2d(i,j,1)=wk(i,j,n)
+end do
+end do
+sizes(1) = decomp%xsz(1)
+sizes(2) = decomp%ysz(2)
+sizes(3) = 1
+subsizes(1) = decomp%zsz(1)
+subsizes(2) = decomp%zsz(2)
+subsizes(3) = 1
+starts(1) = decomp%zst(1)-1
+starts(2) = decomp%zst(2)-1
+starts(3) = 0
+end if
 
-    call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
-         MPI_ORDER_FORTRAN, data_type, newtype, ierror)
-    call MPI_TYPE_COMMIT(newtype,ierror)
-    call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, &
-         MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, &
-         fh, ierror)
-    filesize = 0_MPI_OFFSET_KIND
-    call MPI_FILE_SET_SIZE(fh,filesize,ierror)  ! guarantee overwriting
-    disp = 0_MPI_OFFSET_KIND
-    call MPI_FILE_SET_VIEW(fh,disp,data_type, &
-         newtype,'native',MPI_INFO_NULL,ierror)
-    call MPI_FILE_WRITE_ALL(fh, wk2d, &
-         subsizes(1)*subsizes(2)*subsizes(3), &
-         data_type, MPI_STATUS_IGNORE, ierror)
-    call MPI_FILE_CLOSE(fh,ierror)
-    call MPI_TYPE_FREE(newtype,ierror)
-    
-    deallocate(wk,wk2d)
+call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
+MPI_ORDER_FORTRAN, data_type, newtype, ierror)
+call MPI_TYPE_COMMIT(newtype,ierror)
+call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, &
+MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, &
+fh, ierror)
+filesize = 0_MPI_OFFSET_KIND
+call MPI_FILE_SET_SIZE(fh,filesize,ierror)  ! guarantee overwriting
+disp = 0_MPI_OFFSET_KIND
+call MPI_FILE_SET_VIEW(fh,disp,data_type, &
+newtype,'native',MPI_INFO_NULL,ierror)
+call MPI_FILE_WRITE_ALL(fh, wk2d, &
+subsizes(1)*subsizes(2)*subsizes(3), &
+data_type, MPI_STATUS_IGNORE, ierror)
+call MPI_FILE_CLOSE(fh,ierror)
+call MPI_TYPE_FREE(newtype,ierror)
+
+deallocate(wk,wk2d)
diff --git a/decomp2d/io_write_var.inc b/decomp2d/io_write_var.inc
index 38a27ea5..6a82d40b 100644
--- a/decomp2d/io_write_var.inc
+++ b/decomp2d/io_write_var.inc
@@ -12,57 +12,57 @@
 ! This file contain common code to be included by subroutines 
 ! 'write_var_...' in io.f90
 
-  ! Using MPI-IO to write a distributed 3D variable to a file. File 
-  ! operations (open/close) need to be done in calling application. This
-  ! allows multiple variables to be written to a single file. Together 
-  ! with the corresponding read operation, this is the perfect solution
-  ! for applications to perform restart/checkpointing.
+! Using MPI-IO to write a distributed 3D variable to a file. File 
+! operations (open/close) need to be done in calling application. This
+! allows multiple variables to be written to a single file. Together 
+! with the corresponding read operation, this is the perfect solution
+! for applications to perform restart/checkpointing.
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       call get_decomp_info(decomp)
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+call get_decomp_info(decomp)
+end if
 
-    ! Create file type and set file view
-    sizes(1) = decomp%xsz(1)
-    sizes(2) = decomp%ysz(2)
-    sizes(3) = decomp%zsz(3)
-    if (ipencil == 1) then
-       subsizes(1) = decomp%xsz(1)
-       subsizes(2) = decomp%xsz(2)
-       subsizes(3) = decomp%xsz(3)
-       starts(1) = decomp%xst(1)-1  ! 0-based index
-       starts(2) = decomp%xst(2)-1
-       starts(3) = decomp%xst(3)-1
-    else if (ipencil == 2) then
-       subsizes(1) = decomp%ysz(1)
-       subsizes(2) = decomp%ysz(2)
-       subsizes(3) = decomp%ysz(3)
-       starts(1) = decomp%yst(1)-1
-       starts(2) = decomp%yst(2)-1
-       starts(3) = decomp%yst(3)-1
-    else if (ipencil == 3) then
-       subsizes(1) = decomp%zsz(1)
-       subsizes(2) = decomp%zsz(2)
-       subsizes(3) = decomp%zsz(3)
-       starts(1) = decomp%zst(1)-1
-       starts(2) = decomp%zst(2)-1
-       starts(3) = decomp%zst(3)-1
-    endif
-    
-    call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
-         MPI_ORDER_FORTRAN, data_type, newtype, ierror)
-    call MPI_TYPE_COMMIT(newtype,ierror)
-    call MPI_FILE_SET_VIEW(fh,disp,data_type, &
-         newtype,'native',MPI_INFO_NULL,ierror)
-    call MPI_FILE_WRITE_ALL(fh, var, &
-         subsizes(1)*subsizes(2)*subsizes(3), &
-         data_type, MPI_STATUS_IGNORE, ierror)
-    call MPI_TYPE_FREE(newtype,ierror)
+! Create file type and set file view
+sizes(1) = decomp%xsz(1)
+sizes(2) = decomp%ysz(2)
+sizes(3) = decomp%zsz(3)
+if (ipencil == 1) then
+subsizes(1) = decomp%xsz(1)
+subsizes(2) = decomp%xsz(2)
+subsizes(3) = decomp%xsz(3)
+starts(1) = decomp%xst(1)-1  ! 0-based index
+starts(2) = decomp%xst(2)-1
+starts(3) = decomp%xst(3)-1
+else if (ipencil == 2) then
+subsizes(1) = decomp%ysz(1)
+subsizes(2) = decomp%ysz(2)
+subsizes(3) = decomp%ysz(3)
+starts(1) = decomp%yst(1)-1
+starts(2) = decomp%yst(2)-1
+starts(3) = decomp%yst(3)-1
+else if (ipencil == 3) then
+subsizes(1) = decomp%zsz(1)
+subsizes(2) = decomp%zsz(2)
+subsizes(3) = decomp%zsz(3)
+starts(1) = decomp%zst(1)-1
+starts(2) = decomp%zst(2)-1
+starts(3) = decomp%zst(3)-1
+endif
 
-    ! update displacement for the next write operation
-    disp = disp + sizes(1)*sizes(2)*sizes(3)*mytype_bytes
-    if (data_type == complex_type) then
-       disp = disp + sizes(1)*sizes(2)*sizes(3)*mytype_bytes
-    end if
+call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts,  &
+MPI_ORDER_FORTRAN, data_type, newtype, ierror)
+call MPI_TYPE_COMMIT(newtype,ierror)
+call MPI_FILE_SET_VIEW(fh,disp,data_type, &
+newtype,'native',MPI_INFO_NULL,ierror)
+call MPI_FILE_WRITE_ALL(fh, var, &
+subsizes(1)*subsizes(2)*subsizes(3), &
+data_type, MPI_STATUS_IGNORE, ierror)
+call MPI_TYPE_FREE(newtype,ierror)
+
+! update displacement for the next write operation
+disp = disp + sizes(1)*sizes(2)*sizes(3)*mytype_bytes
+if (data_type == complex_type) then
+disp = disp + sizes(1)*sizes(2)*sizes(3)*mytype_bytes
+end if
diff --git a/decomp2d/mem_merge.f90 b/decomp2d/mem_merge.f90
index afbebd50..e1ff36fb 100644
--- a/decomp2d/mem_merge.f90
+++ b/decomp2d/mem_merge.f90
@@ -1,92 +1,92 @@
-!=======================================================================
-! This is part of the 2DECOMP&FFT library
-! 
-! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) 
-! decomposition. It also implements a highly scalable distributed
-! three-dimensional Fast Fourier Transform (FFT).
-!
-! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG)
-!
-!=======================================================================
+  !=======================================================================
+  ! This is part of the 2DECOMP&FFT library
+  ! 
+  ! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) 
+  ! decomposition. It also implements a highly scalable distributed
+  ! three-dimensional Fast Fourier Transform (FFT).
+  !
+  ! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG)
+  !
+  !=======================================================================
+  
+  ! This file contain duplicated code that scatters data from the
+  ! MPI_ALLTOALLV receive buffer to destinations. It is 'included' by two 
+  ! subroutines in decomp_2d.f90
+  
+  ! Note:
+  !   in  --> receive buffer
+  !   out --> destination array
+  !   pos --> pointer for the receive buffer
+  !    - for normal ALLTOALLV, points to the beginning of receive buffer (=1)
+  !    - for shared memory code, note the receive buffer is shared by all cores
+  !      on same node, so 'pos' points to the correct location for this core
+  
+  integer, intent(IN) :: ndir
+  integer, intent(IN) :: iproc
+  integer, dimension(0:iproc-1), intent(IN) :: dist
+  TYPE(DECOMP_INFO), intent(IN) :: decomp
 
-! This file contain duplicated code that scatters data from the
-! MPI_ALLTOALLV receive buffer to destinations. It is 'included' by two 
-! subroutines in decomp_2d.f90
+  integer :: i,j,k, m,i1,i2,pos
 
-! Note:
-!   in  --> receive buffer
-!   out --> destination array
-!   pos --> pointer for the receive buffer
-!    - for normal ALLTOALLV, points to the beginning of receive buffer (=1)
-!    - for shared memory code, note the receive buffer is shared by all cores
-!      on same node, so 'pos' points to the correct location for this core
-
-    integer, intent(IN) :: ndir
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
-    
 #ifndef SHM
-    pos = 1
+  pos = 1
 #endif
-    do m=0,iproc-1
-       if (m==0) then
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       endif
+  do m=0,iproc-1
+     if (m==0) then
+        i1 = 1
+        i2 = dist(0)
+     else
+        i1 = i2+1
+        i2 = i1+dist(m)-1
+     endif
 
-       if (ndir==1) then
+     if (ndir==1) then
 #ifdef SHM
-          pos = decomp%y1disp_o(m) + 1
+        pos = decomp%y1disp_o(m) + 1
 #endif
-          do k=1,n3
-             do j=i1,i2
-                do i=1,n1
-                   out(i,j,k) = in(pos)
-                   pos = pos + 1
-                enddo
-             enddo
-          enddo
-       else if (ndir==2) then
+        do k=1,n3
+           do j=i1,i2
+              do i=1,n1
+                 out(i,j,k) = in(pos)
+                 pos = pos + 1
+              enddo
+           enddo
+        enddo
+     else if (ndir==2) then
 #ifdef SHM
-          pos = decomp%z2disp_o(m) + 1
+        pos = decomp%z2disp_o(m) + 1
 #endif
-          do k=i1,i2
-             do j=1,n2
-                do i=1,n1
-                   out(i,j,k) = in(pos)
-                   pos = pos + 1
-                enddo
-             enddo
-          enddo
-       else if (ndir==3) then
+        do k=i1,i2
+           do j=1,n2
+              do i=1,n1
+                 out(i,j,k) = in(pos)
+                 pos = pos + 1
+              enddo
+           enddo
+        enddo
+     else if (ndir==3) then
 #ifdef SHM
-          pos = decomp%y2disp_o(m) + 1
+        pos = decomp%y2disp_o(m) + 1
 #endif
-          do k=1,n3
-             do j=i1,i2
-                do i=1,n1
-                   out(i,j,k) = in(pos)
-                   pos = pos + 1
-                enddo
-             enddo
-          enddo
-       else if (ndir==4) then
+        do k=1,n3
+           do j=i1,i2
+              do i=1,n1
+                 out(i,j,k) = in(pos)
+                 pos = pos + 1
+              enddo
+           enddo
+        enddo
+     else if (ndir==4) then
 #ifdef SHM
-          pos = decomp%x1disp_o(m) + 1
+        pos = decomp%x1disp_o(m) + 1
 #endif
-          do k=1,n3
-             do j=1,n2
-                do i=i1,i2
-                   out(i,j,k) = in(pos)
-                   pos = pos + 1
-                enddo
-             enddo
-          enddo
-       endif
-    enddo
+        do k=1,n3
+           do j=1,n2
+              do i=i1,i2
+                 out(i,j,k) = in(pos)
+                 pos = pos + 1
+              enddo
+           enddo
+        enddo
+     endif
+  enddo
diff --git a/decomp2d/mem_split.f90 b/decomp2d/mem_split.f90
index f1b5597f..bb56c953 100644
--- a/decomp2d/mem_split.f90
+++ b/decomp2d/mem_split.f90
@@ -1,92 +1,92 @@
-!=======================================================================
-! This is part of the 2DECOMP&FFT library
-! 
-! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) 
-! decomposition. It also implements a highly scalable distributed
-! three-dimensional Fast Fourier Transform (FFT).
-!
-! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG)
-!
-!=======================================================================
+  !=======================================================================
+  ! This is part of the 2DECOMP&FFT library
+  ! 
+  ! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) 
+  ! decomposition. It also implements a highly scalable distributed
+  ! three-dimensional Fast Fourier Transform (FFT).
+  !
+  ! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG)
+  !
+  !=======================================================================
+  
+  ! This file contain duplicated code that gathers data from source to 
+  ! MPI_ALLTOALLV send buffer. It is 'included' by two subroutines in 
+  ! decomp_2d.f90
+  
+  ! Note:
+  !   in  --> source array
+  !   out --> send buffer
+  !   pos --> pointer for the send buffer
+  !    - for normal ALLTOALLV, points to the beginning of send buffer (=1)
+  !    - for shared memory code, note the send buffer is shared by all cores
+  !      on same node, so 'pos' points to the correct location for this core 
+  
+  integer, intent(IN) :: ndir
+  integer, intent(IN) :: iproc
+  integer, dimension(0:iproc-1), intent(IN) :: dist
+  TYPE(DECOMP_INFO), intent(IN) :: decomp
 
-! This file contain duplicated code that gathers data from source to 
-! MPI_ALLTOALLV send buffer. It is 'included' by two subroutines in 
-! decomp_2d.f90
+  integer :: i,j,k, m,i1,i2,pos
 
-! Note:
-!   in  --> source array
-!   out --> send buffer
-!   pos --> pointer for the send buffer
-!    - for normal ALLTOALLV, points to the beginning of send buffer (=1)
-!    - for shared memory code, note the send buffer is shared by all cores
-!      on same node, so 'pos' points to the correct location for this core 
-
-    integer, intent(IN) :: ndir
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
-    
 #ifndef SHM
-    pos = 1
+  pos = 1
 #endif
-    do m=0,iproc-1
-       if (m==0) then 
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       endif
+  do m=0,iproc-1
+     if (m==0) then 
+        i1 = 1
+        i2 = dist(0)
+     else
+        i1 = i2+1
+        i2 = i1+dist(m)-1
+     endif
 
-       if (ndir==1) then
+     if (ndir==1) then
 #ifdef SHM
-          pos = decomp%x1disp_o(m) + 1
+        pos = decomp%x1disp_o(m) + 1
 #endif
-          do k=1,n3
-             do j=1,n2
-                do i=i1,i2
-                   out(pos) = in(i,j,k)
-                   pos = pos + 1
-                enddo
-             enddo
-          enddo
-       else if (ndir==2) then
+        do k=1,n3
+           do j=1,n2
+              do i=i1,i2
+                 out(pos) = in(i,j,k)
+                 pos = pos + 1
+              enddo
+           enddo
+        enddo
+     else if (ndir==2) then
 #ifdef SHM
-          pos = decomp%y2disp_o(m) + 1
+        pos = decomp%y2disp_o(m) + 1
 #endif
-          do k=1,n3
-             do j=i1,i2
-                do i=1,n1
-                   out(pos) = in(i,j,k)
-                   pos = pos + 1
-                enddo
-             enddo
-          enddo
-       else if (ndir==3) then
+        do k=1,n3
+           do j=i1,i2
+              do i=1,n1
+                 out(pos) = in(i,j,k)
+                 pos = pos + 1
+              enddo
+           enddo
+        enddo
+     else if (ndir==3) then
 #ifdef SHM
-          pos = decomp%z2disp_o(m) + 1
+        pos = decomp%z2disp_o(m) + 1
 #endif
-          do k=i1,i2
-             do j=1,n2
-                do i=1,n1
-                   out(pos) = in(i,j,k)
-                   pos = pos + 1
-                enddo
-             enddo
-          enddo
-       else if (ndir==4) then
+        do k=i1,i2
+           do j=1,n2
+              do i=1,n1
+                 out(pos) = in(i,j,k)
+                 pos = pos + 1
+              enddo
+           enddo
+        enddo
+     else if (ndir==4) then
 #ifdef SHM
-          pos = decomp%y1disp_o(m) + 1
+        pos = decomp%y1disp_o(m) + 1
 #endif
-          do k=1,n3
-             do j=i1,i2
-                do i=1,n1
-                   out(pos) = in(i,j,k)
-                   pos = pos + 1
-                enddo
-             enddo
-          enddo          
-       endif
-    enddo
+        do k=1,n3
+           do j=i1,i2
+              do i=1,n1
+                 out(pos) = in(i,j,k)
+                 pos = pos + 1
+              enddo
+           enddo
+        enddo
+     endif
+  enddo
diff --git a/decomp2d/module_param.f90 b/decomp2d/module_param.f90
index 02244916..388291f7 100644
--- a/decomp2d/module_param.f90
+++ b/decomp2d/module_param.f90
@@ -18,7 +18,7 @@ module variables
   !nvisu = size for visualization collection
   !nprobe =  size for probe collection (energy spectra)
 
-!Possible n points: 3 5 7 9 11 13 17 19 21 25 31 33 37 41 49 51 55 61 65 73 81 91 97 101 109 121 129 145 151 161 163 181 193 201 217 241 251 257 271 289 301 321 325 361 385 401 433 451 481 487 501 513 541 577 601 641 649 721 751 769 801 811 865 901 961 973 1001 1025 1081 1153 1201 1251 1281 1297 1351 1441 1459 1501 1537 1601 1621 1729 1801 1921 1945 2001 2049 2161 2251 2305 2401 2431 2501 2561 2593 2701 2881 2917 3001 3073 3201 3241 3457 3601 3751 3841 3889 4001 4051 4097 4321 4375 4501 4609 4801 4861 5001 5121 5185 5401 5761 5833 6001 6145 6251 6401 6481 6751 6913 7201 7291 7501 7681 7777 8001 8101 8193 8641 8749 9001 9217 9601 9721 enough
+  !Possible n points: 3 5 7 9 11 13 17 19 21 25 31 33 37 41 49 51 55 61 65 73 81 91 97 101 109 121 129 145 151 161 163 181 193 201 217 241 251 257 271 289 301 321 325 361 385 401 433 451 481 487 501 513 541 577 601 641 649 721 751 769 801 811 865 901 961 973 1001 1025 1081 1153 1201 1251 1281 1297 1351 1441 1459 1501 1537 1601 1621 1729 1801 1921 1945 2001 2049 2161 2251 2305 2401 2431 2501 2561 2593 2701 2881 2917 3001 3073 3201 3241 3457 3601 3751 3841 3889 4001 4051 4097 4321 4375 4501 4609 4801 4861 5001 5121 5185 5401 5761 5833 6001 6145 6251 6401 6481 6751 6913 7201 7291 7501 7681 7777 8001 8101 8193 8641 8749 9001 9217 9601 9721 enough
 
   integer :: nx,ny,nz,numscalar,p_row,p_col,nxm,nym,nzm
   real :: spinup_time
@@ -259,7 +259,7 @@ module param
 
   !! LES modelling flag
   integer :: ilesmod, iwall
-  
+
   !LES
   integer :: jLES
   integer :: smagwalldamp
@@ -280,7 +280,7 @@ module param
   integer :: primary_species
 
   character :: filesauve*80, filenoise*80, &
-  nchamp*80,filepath*80, fileturb*80, filevisu*80, datapath*80
+       nchamp*80,filepath*80, fileturb*80, filevisu*80, datapath*80
   real(mytype), dimension(5) :: adt,bdt,cdt,ddt,gdt
 
   !VISU
@@ -335,30 +335,30 @@ module param
   real(mytype),parameter :: twentyfive=25._mytype
   real(mytype),parameter :: twentyseven=27._mytype
   real(mytype),parameter :: twentyeight=28._mytype
-!
+  !
   real(mytype),parameter :: thirtytwo=32._mytype
   real(mytype),parameter :: thirtyfour=34._mytype
   real(mytype),parameter :: thirtysix=36._mytype
-!
+  !
   real(mytype),parameter :: fortyfour=44._mytype
   real(mytype),parameter :: fortyfive=45._mytype
   real(mytype),parameter :: fortyeight=48._mytype
-!
+  !
   real(mytype),parameter :: sixty=60._mytype
   real(mytype),parameter :: sixtytwo=62._mytype
   real(mytype),parameter :: sixtythree=63._mytype
-!
+  !
   real(mytype),parameter :: seventy=70._mytype
   real(mytype),parameter :: seventyfive=75._mytype
-!
+  !
   real(mytype),parameter :: onehundredtwentysix=126._mytype
   real(mytype),parameter :: onehundredtwentyeight=128._mytype
-!
+  !
   real(mytype),parameter :: twohundredsix=206._mytype
   real(mytype),parameter :: twohundredeight=208._mytype
   real(mytype),parameter :: twohundredfiftysix=256._mytype
   real(mytype),parameter :: twohundredseventytwo=272._mytype
-!
+  !
   real(mytype),parameter :: twothousand=2000._mytype
   real(mytype),parameter :: thirtysixthousand=3600._mytype
 
@@ -375,8 +375,8 @@ end module param
 
 module complex_geometry
 
-use decomp_2d,only : mytype
-use variables,only : nx,ny,nz,nxm,nym,nzm
+  use decomp_2d,only : mytype
+  use variables,only : nx,ny,nz,nxm,nym,nzm
 
   integer     ,allocatable,dimension(:,:)   :: nobjx,nobjy,nobjz
   integer     ,allocatable,dimension(:,:,:) :: nxipif,nxfpif,nyipif,nyfpif,nzipif,nzfpif
@@ -465,7 +465,7 @@ end module parfiX
 !
 module parfiY
 
-use decomp_2d, only : mytype
+  use decomp_2d, only : mytype
   real(mytype) :: fial1y, fia1y, fib1y, fic1y, fid1y, fie1y, fif1y ! Coefficients for filter at boundary point 1  
   real(mytype) :: fial2y, fia2y, fib2y, fic2y, fid2y, fie2y, fif2y ! Coefficients for filter at boundary point 2
   real(mytype) :: fial3y, fia3y, fib3y, fic3y, fid3y, fie3y, fif3y ! Coefficients for filter at boundary point 3
@@ -477,7 +477,7 @@ end module parfiY
 
 module parfiZ
 
-use decomp_2d, only : mytype
+  use decomp_2d, only : mytype
   real(mytype) :: fial1z, fia1z, fib1z, fic1z, fid1z, fie1z, fif1z ! Coefficients for filter at boundary point 1  
   real(mytype) :: fial2z, fia2z, fib2z, fic2z, fid2z, fie2z, fif2z ! Coefficients for filter at boundary point 2
   real(mytype) :: fial3z, fia3z, fib3z, fic3z, fid3z, fie3z, fif3z ! Coefficients for filter at boundary point 3
@@ -493,5 +493,5 @@ end module simulation_stats
 
 module ibm
   use decomp_2d, only : mytype
-   real(mytype) :: cex,cey,ra
+  real(mytype) :: cex,cey,ra
 end module ibm
diff --git a/decomp2d/transpose_x_to_y.inc b/decomp2d/transpose_x_to_y.inc
index 1e0e9847..5ab2d6f8 100644
--- a/decomp2d/transpose_x_to_y.inc
+++ b/decomp2d/transpose_x_to_y.inc
@@ -11,503 +11,503 @@
 
 ! This file contains the routines that transpose data from X to Y pencil
 
-  subroutine transpose_x_to_y_real(src, dst, opt_decomp)
+subroutine transpose_x_to_y_real(src, dst, opt_decomp)
 
-    implicit none
-    
-    real(mytype), dimension(:,:,:), intent(IN) :: src
-    real(mytype), dimension(:,:,:), intent(OUT) :: dst
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+implicit none
 
-    TYPE(DECOMP_INFO) :: decomp
+real(mytype), dimension(:,:,:), intent(IN) :: src
+real(mytype), dimension(:,:,:), intent(OUT) :: dst
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
 
 #ifdef SHM
-    real(mytype) :: work1(*), work2(*)
-    POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
+real(mytype) :: work1(*), work2(*)
+POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
 #endif
-    
-    integer :: s1,s2,s3,d1,d2,d3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
-
-    ! rearrange source array as send buffer
+
+integer :: s1,s2,s3,d1,d2,d3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
+
+! rearrange source array as send buffer
 #ifdef SHM
-    work1_p = decomp%COL_INFO%SND_P
-    call mem_split_xy_real(src, s1, s2, s3, work1, dims(1), &
-         decomp%x1dist, decomp)
+work1_p = decomp%COL_INFO%SND_P
+call mem_split_xy_real(src, s1, s2, s3, work1, dims(1), &
+decomp%x1dist, decomp)
 #else
-    call mem_split_xy_real(src, s1, s2, s3, work1_r, dims(1), &
-         decomp%x1dist, decomp)
+call mem_split_xy_real(src, s1, s2, s3, work1_r, dims(1), &
+decomp%x1dist, decomp)
 #endif
 
-    ! define receive buffer
+! define receive buffer
 #ifdef SHM
-    work2_p = decomp%COL_INFO%RCV_P
-    call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
+work2_p = decomp%COL_INFO%RCV_P
+call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
 #endif
-    
-    ! transpose using MPI_ALLTOALL(V)
+
+! transpose using MPI_ALLTOALL(V)
 #ifdef SHM
-    if (decomp%COL_INFO%CORE_ME==1) THEN
-       call MPI_ALLTOALLV(work1, decomp%x1cnts_s, decomp%x1disp_s, &
-            real_type, work2, decomp%y1cnts_s, decomp%y1disp_s, &
-            real_type, decomp%COL_INFO%SMP_COMM, ierror)
-    end if
+if (decomp%COL_INFO%CORE_ME==1) THEN
+call MPI_ALLTOALLV(work1, decomp%x1cnts_s, decomp%x1disp_s, &
+real_type, work2, decomp%y1cnts_s, decomp%y1disp_s, &
+real_type, decomp%COL_INFO%SMP_COMM, ierror)
+end if
 #else
 #ifdef EVEN
-    call MPI_ALLTOALL(work1_r, decomp%x1count, &
-         real_type, work2_r, decomp%y1count, &
-         real_type, DECOMP_2D_COMM_COL, ierror)
+call MPI_ALLTOALL(work1_r, decomp%x1count, &
+real_type, work2_r, decomp%y1count, &
+real_type, DECOMP_2D_COMM_COL, ierror)
 #else
-    call MPI_ALLTOALLV(work1_r, decomp%x1cnts, decomp%x1disp, &
-         real_type, work2_r, decomp%y1cnts, decomp%y1disp, &
-         real_type, DECOMP_2D_COMM_COL, ierror)
+call MPI_ALLTOALLV(work1_r, decomp%x1cnts, decomp%x1disp, &
+real_type, work2_r, decomp%y1cnts, decomp%y1disp, &
+real_type, DECOMP_2D_COMM_COL, ierror)
 #endif
 #endif
 
-    ! rearrange receive buffer
+! rearrange receive buffer
 #ifdef SHM
-    call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
-    call mem_merge_xy_real(work2, d1, d2, d3, dst, dims(1), &
-         decomp%y1dist, decomp)
+call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
+call mem_merge_xy_real(work2, d1, d2, d3, dst, dims(1), &
+decomp%y1dist, decomp)
 #else
-    call mem_merge_xy_real(work2_r, d1, d2, d3, dst, dims(1), &
-         decomp%y1dist, decomp)
+call mem_merge_xy_real(work2_r, d1, d2, d3, dst, dims(1), &
+decomp%y1dist, decomp)
 #endif
-    
-    return
-  end subroutine transpose_x_to_y_real
+
+return
+end subroutine transpose_x_to_y_real
 
 
 #ifdef OCC
-  subroutine transpose_x_to_y_real_start(handle, src, dst, sbuf, rbuf, &
-       opt_decomp)
-
-    implicit none
-    
-    integer :: handle
-    real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-
-    TYPE(DECOMP_INFO) :: decomp
-
-    integer :: s1,s2,s3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-
-    ! rearrange source array as send buffer
-    call mem_split_xy_real(src, s1, s2, s3, sbuf, dims(1), &
-         decomp%x1dist, decomp)
-    
+subroutine transpose_x_to_y_real_start(handle, src, dst, sbuf, rbuf, &
+opt_decomp)
+
+implicit none
+
+integer :: handle
+real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
+
+integer :: s1,s2,s3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+
+! rearrange source array as send buffer
+call mem_split_xy_real(src, s1, s2, s3, sbuf, dims(1), &
+decomp%x1dist, decomp)
+
 #ifdef EVEN
-    call NBC_IALLTOALL(sbuf, decomp%x1count, real_type, &
-         rbuf, decomp%y1count, real_type, &
-         DECOMP_2D_COMM_COL, handle, ierror)
+call NBC_IALLTOALL(sbuf, decomp%x1count, real_type, &
+rbuf, decomp%y1count, real_type, &
+DECOMP_2D_COMM_COL, handle, ierror)
 #else
-    call NBC_IALLTOALLV(sbuf, decomp%x1cnts, decomp%x1disp, real_type, &
-         rbuf, decomp%y1cnts, decomp%y1disp, real_type, &
-         DECOMP_2D_COMM_COL, handle, ierror)
+call NBC_IALLTOALLV(sbuf, decomp%x1cnts, decomp%x1disp, real_type, &
+rbuf, decomp%y1cnts, decomp%y1disp, real_type, &
+DECOMP_2D_COMM_COL, handle, ierror)
 #endif
 
-    return
-  end subroutine transpose_x_to_y_real_start
+return
+end subroutine transpose_x_to_y_real_start
+
 
+subroutine transpose_x_to_y_real_wait(handle, src, dst, sbuf, rbuf, &
+opt_decomp)
 
-  subroutine transpose_x_to_y_real_wait(handle, src, dst, sbuf, rbuf, &
-       opt_decomp)
+implicit none
 
-    implicit none
-    
-    integer :: handle
-    real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+integer :: handle
+real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
-    integer :: d1,d2,d3
-    integer :: ierror
+integer :: d1,d2,d3
+integer :: ierror
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
 
-    call NBC_WAIT(handle, ierror)
+call NBC_WAIT(handle, ierror)
 
-    ! rearrange receive buffer
-    call mem_merge_xy_real(rbuf, d1, d2, d3, dst, dims(1), &
-         decomp%y1dist, decomp)
+! rearrange receive buffer
+call mem_merge_xy_real(rbuf, d1, d2, d3, dst, dims(1), &
+decomp%y1dist, decomp)
 
-    return
-  end subroutine transpose_x_to_y_real_wait
+return
+end subroutine transpose_x_to_y_real_wait
 #endif
 
 
-  subroutine transpose_x_to_y_complex(src, dst, opt_decomp)
+subroutine transpose_x_to_y_complex(src, dst, opt_decomp)
 
-    implicit none
-    
-    complex(mytype), dimension(:,:,:), intent(IN) :: src
-    complex(mytype), dimension(:,:,:), intent(OUT) :: dst
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+implicit none
 
-    TYPE(DECOMP_INFO) :: decomp
+complex(mytype), dimension(:,:,:), intent(IN) :: src
+complex(mytype), dimension(:,:,:), intent(OUT) :: dst
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
 
 #ifdef SHM
-    complex(mytype) :: work1(*), work2(*)
-    POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
+complex(mytype) :: work1(*), work2(*)
+POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
 #endif
-    
-    integer :: s1,s2,s3,d1,d2,d3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
-    
-    ! rearrange source array as send buffer
+
+integer :: s1,s2,s3,d1,d2,d3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
+
+! rearrange source array as send buffer
 #ifdef SHM
-    work1_p = decomp%COL_INFO%SND_P_c
-    call mem_split_xy_complex(src, s1, s2, s3, work1, dims(1), &
-         decomp%x1dist, decomp)
+work1_p = decomp%COL_INFO%SND_P_c
+call mem_split_xy_complex(src, s1, s2, s3, work1, dims(1), &
+decomp%x1dist, decomp)
 #else
-    call mem_split_xy_complex(src, s1, s2, s3, work1_c, dims(1), &
-         decomp%x1dist, decomp)
+call mem_split_xy_complex(src, s1, s2, s3, work1_c, dims(1), &
+decomp%x1dist, decomp)
 #endif
-    
-    ! define receive buffer
+
+! define receive buffer
 #ifdef SHM
-    work2_p = decomp%COL_INFO%RCV_P_c
-    call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
+work2_p = decomp%COL_INFO%RCV_P_c
+call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
 #endif
-    
-    ! transpose using MPI_ALLTOALL(V)
+
+! transpose using MPI_ALLTOALL(V)
 #ifdef SHM
-    if (decomp%COL_INFO%CORE_ME==1) THEN
-       call MPI_ALLTOALLV(work1, decomp%x1cnts_s, decomp%x1disp_s, &
-            complex_type, work2, decomp%y1cnts_s, decomp%y1disp_s, &
-            complex_type, decomp%COL_INFO%SMP_COMM, ierror)
-    end if
+if (decomp%COL_INFO%CORE_ME==1) THEN
+call MPI_ALLTOALLV(work1, decomp%x1cnts_s, decomp%x1disp_s, &
+complex_type, work2, decomp%y1cnts_s, decomp%y1disp_s, &
+complex_type, decomp%COL_INFO%SMP_COMM, ierror)
+end if
 #else
 #ifdef EVEN
-    call MPI_ALLTOALL(work1_c, decomp%x1count, &
-         complex_type, work2_c, decomp%y1count, &
-         complex_type, DECOMP_2D_COMM_COL, ierror)
+call MPI_ALLTOALL(work1_c, decomp%x1count, &
+complex_type, work2_c, decomp%y1count, &
+complex_type, DECOMP_2D_COMM_COL, ierror)
 #else
-    call MPI_ALLTOALLV(work1_c, decomp%x1cnts, decomp%x1disp, &
-         complex_type, work2_c, decomp%y1cnts, decomp%y1disp, &
-         complex_type, DECOMP_2D_COMM_COL, ierror)
+call MPI_ALLTOALLV(work1_c, decomp%x1cnts, decomp%x1disp, &
+complex_type, work2_c, decomp%y1cnts, decomp%y1disp, &
+complex_type, DECOMP_2D_COMM_COL, ierror)
 #endif
 #endif
 
-    ! rearrange receive buffer
+! rearrange receive buffer
 #ifdef SHM
-    call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
-    call mem_merge_xy_complex(work2, d1, d2, d3, dst, dims(1), &
-         decomp%y1dist, decomp)
+call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
+call mem_merge_xy_complex(work2, d1, d2, d3, dst, dims(1), &
+decomp%y1dist, decomp)
 #else
-    call mem_merge_xy_complex(work2_c, d1, d2, d3, dst, dims(1), &
-         decomp%y1dist, decomp)
+call mem_merge_xy_complex(work2_c, d1, d2, d3, dst, dims(1), &
+decomp%y1dist, decomp)
 #endif
 
-    return
-  end subroutine transpose_x_to_y_complex
+return
+end subroutine transpose_x_to_y_complex
 
 
 #ifdef OCC
-  subroutine transpose_x_to_y_complex_start(handle, src, dst, sbuf, &
-       rbuf, opt_decomp)
-
-    implicit none
-    
-    integer :: handle
-    complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-
-    TYPE(DECOMP_INFO) :: decomp
-
-    integer :: s1,s2,s3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    
-    ! rearrange source array as send buffer
-    call mem_split_xy_complex(src, s1, s2, s3, sbuf, dims(1), &
-         decomp%x1dist, decomp)
+subroutine transpose_x_to_y_complex_start(handle, src, dst, sbuf, &
+rbuf, opt_decomp)
+
+implicit none
+
+integer :: handle
+complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
+
+integer :: s1,s2,s3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+
+! rearrange source array as send buffer
+call mem_split_xy_complex(src, s1, s2, s3, sbuf, dims(1), &
+decomp%x1dist, decomp)
 
 #ifdef EVEN
-    call NBC_IALLTOALL(sbuf, decomp%x1count, &
-         complex_type, rbuf, decomp%y1count, &
-         complex_type, DECOMP_2D_COMM_COL, handle, ierror)
+call NBC_IALLTOALL(sbuf, decomp%x1count, &
+complex_type, rbuf, decomp%y1count, &
+complex_type, DECOMP_2D_COMM_COL, handle, ierror)
 #else
-    call NBC_IALLTOALLV(sbuf, decomp%x1cnts, decomp%x1disp, &
-         complex_type, rbuf, decomp%y1cnts, decomp%y1disp, &
-         complex_type, DECOMP_2D_COMM_COL, handle, ierror)
+call NBC_IALLTOALLV(sbuf, decomp%x1cnts, decomp%x1disp, &
+complex_type, rbuf, decomp%y1cnts, decomp%y1disp, &
+complex_type, DECOMP_2D_COMM_COL, handle, ierror)
 #endif
 
-    return
-  end subroutine transpose_x_to_y_complex_start
+return
+end subroutine transpose_x_to_y_complex_start
+
 
+subroutine transpose_x_to_y_complex_wait(handle, src, dst, sbuf, &
+rbuf, opt_decomp)
 
-  subroutine transpose_x_to_y_complex_wait(handle, src, dst, sbuf, &
-       rbuf, opt_decomp)
+implicit none
 
-    implicit none
-    
-    integer :: handle
-    complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+integer :: handle
+complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
-    integer :: d1,d2,d3
-    integer :: ierror
+integer :: d1,d2,d3
+integer :: ierror
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
 
-    call NBC_WAIT(handle, ierror)
+call NBC_WAIT(handle, ierror)
 
-    ! rearrange receive buffer
-    call mem_merge_xy_complex(rbuf, d1, d2, d3, dst, dims(1), &
-         decomp%y1dist, decomp)
+! rearrange receive buffer
+call mem_merge_xy_complex(rbuf, d1, d2, d3, dst, dims(1), &
+decomp%y1dist, decomp)
 
-    return
-  end subroutine transpose_x_to_y_complex_wait
+return
+end subroutine transpose_x_to_y_complex_wait
 #endif
 
 
-  ! pack/unpack ALLTOALL(V) buffers
+! pack/unpack ALLTOALL(V) buffers
 
-  subroutine mem_split_xy_real(in,n1,n2,n3,out,iproc,dist,decomp)
+subroutine mem_split_xy_real(in,n1,n2,n3,out,iproc,dist,decomp)
 
-    implicit none
+implicit none
 
-    integer, intent(IN) :: n1,n2,n3
-    real(mytype), dimension(n1,n2,n3), intent(IN) :: in
-    real(mytype), dimension(*), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
+integer, intent(IN) :: n1,n2,n3
+real(mytype), dimension(n1,n2,n3), intent(IN) :: in
+real(mytype), dimension(*), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
 
-    do m=0,iproc-1
-       if (m==0) then 
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+integer :: i,j,k, m,i1,i2,pos
+
+do m=0,iproc-1
+if (m==0) then 
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%x1disp_o(m) + 1
+pos = decomp%x1disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%x1count + 1
+pos = m * decomp%x1count + 1
 #else
-       pos = decomp%x1disp(m) + 1
+pos = decomp%x1disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=1,n2
-             do i=i1,i2
-                out(pos) = in(i,j,k)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_split_xy_real
-
-
-  subroutine mem_split_xy_complex(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-
-    integer, intent(IN) :: n1,n2,n3
-    complex(mytype), dimension(n1,n2,n3), intent(IN) :: in
-    complex(mytype), dimension(*), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
-
-    do m=0,iproc-1
-       if (m==0) then 
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=1,n3
+do j=1,n2
+do i=i1,i2
+out(pos) = in(i,j,k)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_split_xy_real
+
+
+subroutine mem_split_xy_complex(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+complex(mytype), dimension(n1,n2,n3), intent(IN) :: in
+complex(mytype), dimension(*), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2,pos
+
+do m=0,iproc-1
+if (m==0) then 
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%x1disp_o(m) + 1
+pos = decomp%x1disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%x1count + 1
+pos = m * decomp%x1count + 1
 #else
-       pos = decomp%x1disp(m) + 1
+pos = decomp%x1disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=1,n2
-             do i=i1,i2
-                out(pos) = in(i,j,k)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_split_xy_complex
-
-
-  subroutine mem_merge_xy_real(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-    
-    integer, intent(IN) :: n1,n2,n3
-    real(mytype), dimension(*), intent(IN) :: in
-    real(mytype), dimension(n1,n2,n3), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2, pos
-
-    do m=0,iproc-1
-       if (m==0) then
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=1,n3
+do j=1,n2
+do i=i1,i2
+out(pos) = in(i,j,k)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_split_xy_complex
+
+
+subroutine mem_merge_xy_real(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+real(mytype), dimension(*), intent(IN) :: in
+real(mytype), dimension(n1,n2,n3), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2, pos
+
+do m=0,iproc-1
+if (m==0) then
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%y1disp_o(m) + 1
+pos = decomp%y1disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%y1count + 1
+pos = m * decomp%y1count + 1
 #else
-       pos = decomp%y1disp(m) + 1
+pos = decomp%y1disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=i1,i2
-             do i=1,n1
-                out(i,j,k) = in(pos)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_merge_xy_real
-
-
-  subroutine mem_merge_xy_complex(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-    
-    integer, intent(IN) :: n1,n2,n3
-    complex(mytype), dimension(*), intent(IN) :: in
-    complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2, pos
-
-    do m=0,iproc-1
-       if (m==0) then
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=1,n3
+do j=i1,i2
+do i=1,n1
+out(i,j,k) = in(pos)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_merge_xy_real
+
+
+subroutine mem_merge_xy_complex(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+complex(mytype), dimension(*), intent(IN) :: in
+complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2, pos
+
+do m=0,iproc-1
+if (m==0) then
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%y1disp_o(m) + 1
+pos = decomp%y1disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%y1count + 1
+pos = m * decomp%y1count + 1
 #else
-       pos = decomp%y1disp(m) + 1
+pos = decomp%y1disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=i1,i2
-             do i=1,n1
-                out(i,j,k) = in(pos)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_merge_xy_complex
+do k=1,n3
+do j=i1,i2
+do i=1,n1
+out(i,j,k) = in(pos)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_merge_xy_complex
diff --git a/decomp2d/transpose_y_to_x.inc b/decomp2d/transpose_y_to_x.inc
index e8f27aee..ec82699e 100644
--- a/decomp2d/transpose_y_to_x.inc
+++ b/decomp2d/transpose_y_to_x.inc
@@ -11,503 +11,503 @@
 
 ! This file contains the routines that transpose data from Y to X pencil
 
-  subroutine transpose_y_to_x_real(src, dst, opt_decomp)
+subroutine transpose_y_to_x_real(src, dst, opt_decomp)
 
-    implicit none
-    
-    real(mytype), dimension(:,:,:), intent(IN) :: src
-    real(mytype), dimension(:,:,:), intent(OUT) :: dst
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+implicit none
 
-    TYPE(DECOMP_INFO) :: decomp
+real(mytype), dimension(:,:,:), intent(IN) :: src
+real(mytype), dimension(:,:,:), intent(OUT) :: dst
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
 
 #ifdef SHM
-    real(mytype) :: work1(*), work2(*)
-    POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
+real(mytype) :: work1(*), work2(*)
+POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
 #endif
-    
-    integer :: s1,s2,s3,d1,d2,d3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
-
-    ! rearrange source array as send buffer
+
+integer :: s1,s2,s3,d1,d2,d3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
+
+! rearrange source array as send buffer
 #ifdef SHM
-    work1_p = decomp%COL_INFO%SND_P
-    call mem_split_yx_real(src, s1, s2, s3, work1, dims(1), &
-         decomp%y1dist, decomp)
+work1_p = decomp%COL_INFO%SND_P
+call mem_split_yx_real(src, s1, s2, s3, work1, dims(1), &
+decomp%y1dist, decomp)
 #else
-    call mem_split_yx_real(src, s1, s2, s3, work1_r, dims(1), &
-         decomp%y1dist, decomp)
+call mem_split_yx_real(src, s1, s2, s3, work1_r, dims(1), &
+decomp%y1dist, decomp)
 #endif
 
-    ! define receive buffer
+! define receive buffer
 #ifdef SHM
-    work2_p = decomp%COL_INFO%RCV_P
-    call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
+work2_p = decomp%COL_INFO%RCV_P
+call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
 #endif
-    
-    ! transpose using MPI_ALLTOALL(V)
+
+! transpose using MPI_ALLTOALL(V)
 #ifdef SHM
-    if (decomp%COL_INFO%CORE_ME==1) THEN
-       call MPI_ALLTOALLV(work1, decomp%y1cnts_s, decomp%y1disp_s, &
-            real_type, work2, decomp%x1cnts_s, decomp%x1disp_s, &
-            real_type, decomp%COL_INFO%SMP_COMM, ierror)
-    end if
+if (decomp%COL_INFO%CORE_ME==1) THEN
+call MPI_ALLTOALLV(work1, decomp%y1cnts_s, decomp%y1disp_s, &
+real_type, work2, decomp%x1cnts_s, decomp%x1disp_s, &
+real_type, decomp%COL_INFO%SMP_COMM, ierror)
+end if
 #else
 #ifdef EVEN
-    call MPI_ALLTOALL(work1_r, decomp%y1count, &
-         real_type, work2_r, decomp%x1count, &
-         real_type, DECOMP_2D_COMM_COL, ierror)
+call MPI_ALLTOALL(work1_r, decomp%y1count, &
+real_type, work2_r, decomp%x1count, &
+real_type, DECOMP_2D_COMM_COL, ierror)
 #else
-    call MPI_ALLTOALLV(work1_r, decomp%y1cnts, decomp%y1disp, &
-         real_type, work2_r, decomp%x1cnts, decomp%x1disp, &
-         real_type, DECOMP_2D_COMM_COL, ierror)
+call MPI_ALLTOALLV(work1_r, decomp%y1cnts, decomp%y1disp, &
+real_type, work2_r, decomp%x1cnts, decomp%x1disp, &
+real_type, DECOMP_2D_COMM_COL, ierror)
 #endif
 #endif
 
-    ! rearrange receive buffer
+! rearrange receive buffer
 #ifdef SHM
-    call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
-    call mem_merge_yx_real(work2, d1, d2, d3, dst, dims(1), &
-         decomp%x1dist, decomp)
+call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
+call mem_merge_yx_real(work2, d1, d2, d3, dst, dims(1), &
+decomp%x1dist, decomp)
 #else
-    call mem_merge_yx_real(work2_r, d1, d2, d3, dst, dims(1), &
-         decomp%x1dist, decomp)
+call mem_merge_yx_real(work2_r, d1, d2, d3, dst, dims(1), &
+decomp%x1dist, decomp)
 #endif
-    
-    return
-  end subroutine transpose_y_to_x_real
+
+return
+end subroutine transpose_y_to_x_real
 
 
 #ifdef OCC
-  subroutine transpose_y_to_x_real_start(handle, src, dst, sbuf, rbuf, &
-       opt_decomp)
-
-    implicit none
-    
-    integer :: handle
-    real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-
-    TYPE(DECOMP_INFO) :: decomp
-
-    integer :: s1,s2,s3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    
-    ! rearrange source array as send buffer
-    call mem_split_yx_real(src, s1, s2, s3, sbuf, dims(1), &
-         decomp%y1dist, decomp)
+subroutine transpose_y_to_x_real_start(handle, src, dst, sbuf, rbuf, &
+opt_decomp)
+
+implicit none
+
+integer :: handle
+real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
+
+integer :: s1,s2,s3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+
+! rearrange source array as send buffer
+call mem_split_yx_real(src, s1, s2, s3, sbuf, dims(1), &
+decomp%y1dist, decomp)
 
 #ifdef EVEN
-    call NBC_IALLTOALL(sbuf, decomp%y1count, real_type, &
-         rbuf, decomp%x1count, real_type, &
-         DECOMP_2D_COMM_COL, handle, ierror)
+call NBC_IALLTOALL(sbuf, decomp%y1count, real_type, &
+rbuf, decomp%x1count, real_type, &
+DECOMP_2D_COMM_COL, handle, ierror)
 #else
-    call NBC_IALLTOALLV(sbuf, decomp%y1cnts, decomp%y1disp, real_type, &
-         rbuf, decomp%x1cnts, decomp%x1disp, real_type, &
-         DECOMP_2D_COMM_COL, handle, ierror)
+call NBC_IALLTOALLV(sbuf, decomp%y1cnts, decomp%y1disp, real_type, &
+rbuf, decomp%x1cnts, decomp%x1disp, real_type, &
+DECOMP_2D_COMM_COL, handle, ierror)
 #endif
 
-    return
-  end subroutine transpose_y_to_x_real_start
+return
+end subroutine transpose_y_to_x_real_start
+
 
+subroutine transpose_y_to_x_real_wait(handle, src, dst, sbuf, rbuf, &
+opt_decomp)
 
-  subroutine transpose_y_to_x_real_wait(handle, src, dst, sbuf, rbuf, &
-       opt_decomp)
+implicit none
 
-    implicit none
-    
-    integer :: handle
-    real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+integer :: handle
+real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
-    integer :: d1,d2,d3
-    integer :: ierror
+integer :: d1,d2,d3
+integer :: ierror
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
 
-    call NBC_WAIT(handle, ierror)
+call NBC_WAIT(handle, ierror)
 
-    ! rearrange receive buffer
-    call mem_merge_yx_real(rbuf, d1, d2, d3, dst, dims(1), &
-         decomp%x1dist, decomp)
+! rearrange receive buffer
+call mem_merge_yx_real(rbuf, d1, d2, d3, dst, dims(1), &
+decomp%x1dist, decomp)
 
-    return
-  end subroutine transpose_y_to_x_real_wait
+return
+end subroutine transpose_y_to_x_real_wait
 #endif
 
 
-  subroutine transpose_y_to_x_complex(src, dst, opt_decomp)
+subroutine transpose_y_to_x_complex(src, dst, opt_decomp)
 
-    implicit none
-    
-    complex(mytype), dimension(:,:,:), intent(IN) :: src
-    complex(mytype), dimension(:,:,:), intent(OUT) :: dst
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+implicit none
 
-    TYPE(DECOMP_INFO) :: decomp
+complex(mytype), dimension(:,:,:), intent(IN) :: src
+complex(mytype), dimension(:,:,:), intent(OUT) :: dst
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
 
 #ifdef SHM
-    complex(mytype) :: work1(*), work2(*)
-    POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
+complex(mytype) :: work1(*), work2(*)
+POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
 #endif
-    
-    integer :: s1,s2,s3,d1,d2,d3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
-    
-    ! rearrange source array as send buffer
+
+integer :: s1,s2,s3,d1,d2,d3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
+
+! rearrange source array as send buffer
 #ifdef SHM
-    work1_p = decomp%COL_INFO%SND_P_c
-    call mem_split_yx_complex(src, s1, s2, s3, work1, dims(1), &
-         decomp%y1dist, decomp)
+work1_p = decomp%COL_INFO%SND_P_c
+call mem_split_yx_complex(src, s1, s2, s3, work1, dims(1), &
+decomp%y1dist, decomp)
 #else
-    call mem_split_yx_complex(src, s1, s2, s3, work1_c, dims(1), &
-         decomp%y1dist, decomp)
+call mem_split_yx_complex(src, s1, s2, s3, work1_c, dims(1), &
+decomp%y1dist, decomp)
 #endif
-    
-    ! define receive buffer
+
+! define receive buffer
 #ifdef SHM
-    work2_p = decomp%COL_INFO%RCV_P_c
-    call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
+work2_p = decomp%COL_INFO%RCV_P_c
+call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
 #endif
-    
-    ! transpose using MPI_ALLTOALL(V)
+
+! transpose using MPI_ALLTOALL(V)
 #ifdef SHM
-    if (decomp%COL_INFO%CORE_ME==1) THEN
-       call MPI_ALLTOALLV(work1, decomp%y1cnts_s, decomp%y1disp_s, &
-            complex_type, work2, decomp%x1cnts_s, decomp%x1disp_s, &
-            complex_type, decomp%COL_INFO%SMP_COMM, ierror)
-    end if
+if (decomp%COL_INFO%CORE_ME==1) THEN
+call MPI_ALLTOALLV(work1, decomp%y1cnts_s, decomp%y1disp_s, &
+complex_type, work2, decomp%x1cnts_s, decomp%x1disp_s, &
+complex_type, decomp%COL_INFO%SMP_COMM, ierror)
+end if
 #else
 #ifdef EVEN
-    call MPI_ALLTOALL(work1_c, decomp%y1count, &
-         complex_type, work2_c, decomp%x1count, &
-         complex_type, DECOMP_2D_COMM_COL, ierror)
+call MPI_ALLTOALL(work1_c, decomp%y1count, &
+complex_type, work2_c, decomp%x1count, &
+complex_type, DECOMP_2D_COMM_COL, ierror)
 #else
-    call MPI_ALLTOALLV(work1_c, decomp%y1cnts, decomp%y1disp, &
-         complex_type, work2_c, decomp%x1cnts, decomp%x1disp, &
-         complex_type, DECOMP_2D_COMM_COL, ierror)
+call MPI_ALLTOALLV(work1_c, decomp%y1cnts, decomp%y1disp, &
+complex_type, work2_c, decomp%x1cnts, decomp%x1disp, &
+complex_type, DECOMP_2D_COMM_COL, ierror)
 #endif
 #endif
 
-    ! rearrange receive buffer
+! rearrange receive buffer
 #ifdef SHM
-    call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
-    call mem_merge_yx_complex(work2, d1, d2, d3, dst, dims(1), &
-         decomp%x1dist, decomp)
+call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror)
+call mem_merge_yx_complex(work2, d1, d2, d3, dst, dims(1), &
+decomp%x1dist, decomp)
 #else
-    call mem_merge_yx_complex(work2_c, d1, d2, d3, dst, dims(1), &
-         decomp%x1dist, decomp)
+call mem_merge_yx_complex(work2_c, d1, d2, d3, dst, dims(1), &
+decomp%x1dist, decomp)
 #endif
 
-    return
-  end subroutine transpose_y_to_x_complex
+return
+end subroutine transpose_y_to_x_complex
 
 
 #ifdef OCC
-  subroutine transpose_y_to_x_complex_start(handle, src, dst, sbuf, &
-       rbuf, opt_decomp)
-
-    implicit none
-    
-    integer :: handle
-    complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-
-    TYPE(DECOMP_INFO) :: decomp
-
-    integer :: s1,s2,s3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    
-    ! rearrange source array as send buffer
-    call mem_split_yx_complex(src, s1, s2, s3, sbuf, dims(1), &
-         decomp%y1dist, decomp)
+subroutine transpose_y_to_x_complex_start(handle, src, dst, sbuf, &
+rbuf, opt_decomp)
+
+implicit none
+
+integer :: handle
+complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
+
+integer :: s1,s2,s3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+
+! rearrange source array as send buffer
+call mem_split_yx_complex(src, s1, s2, s3, sbuf, dims(1), &
+decomp%y1dist, decomp)
 
 #ifdef EVEN
-    call NBC_IALLTOALL(sbuf, decomp%y1count, &
-         complex_type, rbuf, decomp%x1count, &
-         complex_type, DECOMP_2D_COMM_COL, handle, ierror)
+call NBC_IALLTOALL(sbuf, decomp%y1count, &
+complex_type, rbuf, decomp%x1count, &
+complex_type, DECOMP_2D_COMM_COL, handle, ierror)
 #else
-    call NBC_IALLTOALLV(sbuf, decomp%y1cnts, decomp%y1disp, &
-         complex_type, rbuf, decomp%x1cnts, decomp%x1disp, &
-         complex_type, DECOMP_2D_COMM_COL, handle, ierror)
+call NBC_IALLTOALLV(sbuf, decomp%y1cnts, decomp%y1disp, &
+complex_type, rbuf, decomp%x1cnts, decomp%x1disp, &
+complex_type, DECOMP_2D_COMM_COL, handle, ierror)
 #endif
 
-    return
-  end subroutine transpose_y_to_x_complex_start
+return
+end subroutine transpose_y_to_x_complex_start
+
 
+subroutine transpose_y_to_x_complex_wait(handle, src, dst, sbuf, &
+rbuf, opt_decomp)
 
-  subroutine transpose_y_to_x_complex_wait(handle, src, dst, sbuf, &
-       rbuf, opt_decomp)
+implicit none
 
-    implicit none
-    
-    integer :: handle
-    complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+integer :: handle
+complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
-    integer :: d1,d2,d3
-    integer :: ierror
+integer :: d1,d2,d3
+integer :: ierror
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
 
-    call NBC_WAIT(handle, ierror)
+call NBC_WAIT(handle, ierror)
 
-    ! rearrange receive buffer
-    call mem_merge_yx_complex(rbuf, d1, d2, d3, dst, dims(1), &
-         decomp%x1dist, decomp)
+! rearrange receive buffer
+call mem_merge_yx_complex(rbuf, d1, d2, d3, dst, dims(1), &
+decomp%x1dist, decomp)
 
-    return
-  end subroutine transpose_y_to_x_complex_wait
+return
+end subroutine transpose_y_to_x_complex_wait
 #endif
 
 
-  ! pack/unpack ALLTOALL(V) buffers
+! pack/unpack ALLTOALL(V) buffers
 
-  subroutine mem_split_yx_real(in,n1,n2,n3,out,iproc,dist,decomp)
+subroutine mem_split_yx_real(in,n1,n2,n3,out,iproc,dist,decomp)
 
-    implicit none
+implicit none
 
-    integer, intent(IN) :: n1,n2,n3
-    real(mytype), dimension(n1,n2,n3), intent(IN) :: in
-    real(mytype), dimension(*), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
+integer, intent(IN) :: n1,n2,n3
+real(mytype), dimension(n1,n2,n3), intent(IN) :: in
+real(mytype), dimension(*), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
 
-    do m=0,iproc-1
-       if (m==0) then 
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+integer :: i,j,k, m,i1,i2,pos
+
+do m=0,iproc-1
+if (m==0) then 
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%y1disp_o(m) + 1
+pos = decomp%y1disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%y1count + 1
+pos = m * decomp%y1count + 1
 #else
-       pos = decomp%y1disp(m) + 1
+pos = decomp%y1disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=i1,i2
-             do i=1,n1
-                out(pos) = in(i,j,k)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_split_yx_real
-
-
-  subroutine mem_split_yx_complex(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-
-    integer, intent(IN) :: n1,n2,n3
-    complex(mytype), dimension(n1,n2,n3), intent(IN) :: in
-    complex(mytype), dimension(*), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
-
-    do m=0,iproc-1
-       if (m==0) then 
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=1,n3
+do j=i1,i2
+do i=1,n1
+out(pos) = in(i,j,k)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_split_yx_real
+
+
+subroutine mem_split_yx_complex(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+complex(mytype), dimension(n1,n2,n3), intent(IN) :: in
+complex(mytype), dimension(*), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2,pos
+
+do m=0,iproc-1
+if (m==0) then 
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%y1disp_o(m) + 1
+pos = decomp%y1disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%y1count + 1
+pos = m * decomp%y1count + 1
 #else
-       pos = decomp%y1disp(m) + 1
+pos = decomp%y1disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=i1,i2
-             do i=1,n1
-                out(pos) = in(i,j,k)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_split_yx_complex
-
-
-  subroutine mem_merge_yx_real(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-    
-    integer, intent(IN) :: n1,n2,n3
-    real(mytype), dimension(*), intent(IN) :: in
-    real(mytype), dimension(n1,n2,n3), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2, pos
-
-    do m=0,iproc-1
-       if (m==0) then
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=1,n3
+do j=i1,i2
+do i=1,n1
+out(pos) = in(i,j,k)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_split_yx_complex
+
+
+subroutine mem_merge_yx_real(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+real(mytype), dimension(*), intent(IN) :: in
+real(mytype), dimension(n1,n2,n3), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2, pos
+
+do m=0,iproc-1
+if (m==0) then
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%x1disp_o(m) + 1
+pos = decomp%x1disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%x1count + 1
+pos = m * decomp%x1count + 1
 #else
-       pos = decomp%x1disp(m) + 1
+pos = decomp%x1disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=1,n2
-             do i=i1,i2
-                out(i,j,k) = in(pos)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_merge_yx_real
-
-
-  subroutine mem_merge_yx_complex(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-    
-    integer, intent(IN) :: n1,n2,n3
-    complex(mytype), dimension(*), intent(IN) :: in
-    complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2, pos
-
-    do m=0,iproc-1
-       if (m==0) then
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=1,n3
+do j=1,n2
+do i=i1,i2
+out(i,j,k) = in(pos)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_merge_yx_real
+
+
+subroutine mem_merge_yx_complex(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+complex(mytype), dimension(*), intent(IN) :: in
+complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2, pos
+
+do m=0,iproc-1
+if (m==0) then
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%x1disp_o(m) + 1
+pos = decomp%x1disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%x1count + 1
+pos = m * decomp%x1count + 1
 #else
-       pos = decomp%x1disp(m) + 1
+pos = decomp%x1disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=1,n2
-             do i=i1,i2
-                out(i,j,k) = in(pos)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_merge_yx_complex
+do k=1,n3
+do j=1,n2
+do i=i1,i2
+out(i,j,k) = in(pos)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_merge_yx_complex
diff --git a/decomp2d/transpose_y_to_z.inc b/decomp2d/transpose_y_to_z.inc
index 58a92458..9eb28588 100644
--- a/decomp2d/transpose_y_to_z.inc
+++ b/decomp2d/transpose_y_to_z.inc
@@ -11,513 +11,513 @@
 
 ! This file contains the routines that transpose data from Y to Z pencil
 
-  subroutine transpose_y_to_z_real(src, dst, opt_decomp)
+subroutine transpose_y_to_z_real(src, dst, opt_decomp)
 
-    implicit none
-    
-    real(mytype), dimension(:,:,:), intent(IN) :: src
-    real(mytype), dimension(:,:,:), intent(OUT) :: dst
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+implicit none
 
-    TYPE(DECOMP_INFO) :: decomp
+real(mytype), dimension(:,:,:), intent(IN) :: src
+real(mytype), dimension(:,:,:), intent(OUT) :: dst
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
 
 #ifdef SHM
-    real(mytype) :: work1(*), work2(*)
-    POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
+real(mytype) :: work1(*), work2(*)
+POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
 #endif
-    
-    integer :: s1,s2,s3,d1,d2,d3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
-
-    ! rearrange source array as send buffer
+
+integer :: s1,s2,s3,d1,d2,d3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
+
+! rearrange source array as send buffer
 #ifdef SHM
-    work1_p = decomp%ROW_INFO%SND_P
-    call mem_split_yz_real(src, s1, s2, s3, work1, dims(2), &
-         decomp%y2dist, decomp)
+work1_p = decomp%ROW_INFO%SND_P
+call mem_split_yz_real(src, s1, s2, s3, work1, dims(2), &
+decomp%y2dist, decomp)
 #else
-    call mem_split_yz_real(src, s1, s2, s3, work1_r, dims(2), &
-         decomp%y2dist, decomp)
+call mem_split_yz_real(src, s1, s2, s3, work1_r, dims(2), &
+decomp%y2dist, decomp)
 #endif
 
-    ! define receive buffer
+! define receive buffer
 #ifdef SHM
-    work2_p = decomp%ROW_INFO%RCV_P
-    call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
+work2_p = decomp%ROW_INFO%RCV_P
+call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
 #endif
-    
+
 #ifdef SHM
-    if (decomp%ROW_INFO%CORE_ME==1) THEN
-       call MPI_ALLTOALLV(work1, decomp%y2cnts_s, decomp%y2disp_s, &
-            real_type, work2, decomp%z2cnts_s, decomp%z2disp_s, &
-            real_type, decomp%ROW_INFO%SMP_COMM, ierror)
-    end if
+if (decomp%ROW_INFO%CORE_ME==1) THEN
+call MPI_ALLTOALLV(work1, decomp%y2cnts_s, decomp%y2disp_s, &
+real_type, work2, decomp%z2cnts_s, decomp%z2disp_s, &
+real_type, decomp%ROW_INFO%SMP_COMM, ierror)
+end if
 #else
 #ifdef EVEN
-    if (decomp%even) then
-       call MPI_ALLTOALL(work1_r, decomp%y2count, &
-            real_type, dst, decomp%z2count, &
-            real_type, DECOMP_2D_COMM_ROW, ierror)
-    else
-       call MPI_ALLTOALL(work1_r, decomp%y2count, &
-            real_type, work2_r, decomp%z2count, &
-            real_type, DECOMP_2D_COMM_ROW, ierror)
-    end if
+if (decomp%even) then
+call MPI_ALLTOALL(work1_r, decomp%y2count, &
+real_type, dst, decomp%z2count, &
+real_type, DECOMP_2D_COMM_ROW, ierror)
+else
+call MPI_ALLTOALL(work1_r, decomp%y2count, &
+real_type, work2_r, decomp%z2count, &
+real_type, DECOMP_2D_COMM_ROW, ierror)
+end if
 #else
-    call MPI_ALLTOALLV(work1_r, decomp%y2cnts, decomp%y2disp, &
-         real_type, dst, decomp%z2cnts, decomp%z2disp, &
-         real_type, DECOMP_2D_COMM_ROW, ierror)
+call MPI_ALLTOALLV(work1_r, decomp%y2cnts, decomp%y2disp, &
+real_type, dst, decomp%z2cnts, decomp%z2disp, &
+real_type, DECOMP_2D_COMM_ROW, ierror)
 #endif
 #endif
 
-    ! rearrange receive buffer
+! rearrange receive buffer
 #ifdef SHM
-    call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
-    call mem_merge_yz_real(work2, d1, d2, d3, dst, dims(2), &
-         decomp%z2dist, decomp)
+call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
+call mem_merge_yz_real(work2, d1, d2, d3, dst, dims(2), &
+decomp%z2dist, decomp)
 #else
 #ifdef EVEN
-    if (.not. decomp%even) then
-       call mem_merge_yz_real(work2_r, d1, d2, d3, dst, dims(2), &
-         decomp%z2dist, decomp)
-    end if
+if (.not. decomp%even) then
+call mem_merge_yz_real(work2_r, d1, d2, d3, dst, dims(2), &
+decomp%z2dist, decomp)
+end if
 #else
-    ! note the receive buffer is already in natural (i,j,k) order
-    ! so no merge operation needed
+! note the receive buffer is already in natural (i,j,k) order
+! so no merge operation needed
 #endif
 #endif
-    
-    return
-  end subroutine transpose_y_to_z_real
+
+return
+end subroutine transpose_y_to_z_real
 
 
 #ifdef OCC
-  subroutine transpose_y_to_z_real_start(handle, src, dst, sbuf, rbuf, &
-       opt_decomp)
-
-    implicit none
-    
-    integer :: handle
-    real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-
-    TYPE(DECOMP_INFO) :: decomp
-
-    integer :: s1,s2,s3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-    
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-
-    ! rearrange source array as send buffer
-    call mem_split_yz_real(src, s1, s2, s3, sbuf, dims(2), &
-         decomp%y2dist, decomp)
+subroutine transpose_y_to_z_real_start(handle, src, dst, sbuf, rbuf, &
+opt_decomp)
+
+implicit none
+
+integer :: handle
+real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
+
+integer :: s1,s2,s3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+
+! rearrange source array as send buffer
+call mem_split_yz_real(src, s1, s2, s3, sbuf, dims(2), &
+decomp%y2dist, decomp)
 
 #ifdef EVEN
-    call NBC_IALLTOALL(sbuf, decomp%y2count, real_type, &
-         rbuf, decomp%z2count, real_type, &
-         DECOMP_2D_COMM_ROW, handle, ierror)
+call NBC_IALLTOALL(sbuf, decomp%y2count, real_type, &
+rbuf, decomp%z2count, real_type, &
+DECOMP_2D_COMM_ROW, handle, ierror)
 #else
-    call NBC_IALLTOALLV(sbuf, decomp%y2cnts, decomp%y2disp, real_type, &
-         rbuf, decomp%z2cnts, decomp%z2disp, real_type, &
-         DECOMP_2D_COMM_ROW, handle, ierror)
+call NBC_IALLTOALLV(sbuf, decomp%y2cnts, decomp%y2disp, real_type, &
+rbuf, decomp%z2cnts, decomp%z2disp, real_type, &
+DECOMP_2D_COMM_ROW, handle, ierror)
 #endif
 
-    return
-  end subroutine transpose_y_to_z_real_start
+return
+end subroutine transpose_y_to_z_real_start
+
 
+subroutine transpose_y_to_z_real_wait(handle, src, dst, sbuf, rbuf, &
+opt_decomp)
 
-  subroutine transpose_y_to_z_real_wait(handle, src, dst, sbuf, rbuf, &
-       opt_decomp)
+implicit none
 
-    implicit none
-    
-    integer :: handle
-    real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+integer :: handle
+real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
-    integer :: ierror
+integer :: ierror
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    call NBC_WAIT(handle, ierror)
+call NBC_WAIT(handle, ierror)
 
-    dst = rbuf
+dst = rbuf
 
-    return
-  end subroutine transpose_y_to_z_real_wait
+return
+end subroutine transpose_y_to_z_real_wait
 #endif
 
 
-  subroutine transpose_y_to_z_complex(src, dst, opt_decomp)
+subroutine transpose_y_to_z_complex(src, dst, opt_decomp)
 
-    implicit none
-    
-    complex(mytype), dimension(:,:,:), intent(IN) :: src
-    complex(mytype), dimension(:,:,:), intent(OUT) :: dst
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+implicit none
 
-    TYPE(DECOMP_INFO) :: decomp
+complex(mytype), dimension(:,:,:), intent(IN) :: src
+complex(mytype), dimension(:,:,:), intent(OUT) :: dst
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
 
 #ifdef SHM
-    complex(mytype) :: work1(*), work2(*)
-    POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
+complex(mytype) :: work1(*), work2(*)
+POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
 #endif
-    
-    integer :: s1,s2,s3,d1,d2,d3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
-    
-    ! rearrange source array as send buffer
+
+integer :: s1,s2,s3,d1,d2,d3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
+
+! rearrange source array as send buffer
 #ifdef SHM
-    work1_p = decomp%ROW_INFO%SND_P_c
-    call mem_split_yz_complex(src, s1, s2, s3, work1, dims(2), &
-         decomp%y2dist, decomp)
+work1_p = decomp%ROW_INFO%SND_P_c
+call mem_split_yz_complex(src, s1, s2, s3, work1, dims(2), &
+decomp%y2dist, decomp)
 #else
-    call mem_split_yz_complex(src, s1, s2, s3, work1_c, dims(2), &
-         decomp%y2dist, decomp)
+call mem_split_yz_complex(src, s1, s2, s3, work1_c, dims(2), &
+decomp%y2dist, decomp)
 #endif
-    
-    ! define receive buffer
+
+! define receive buffer
 #ifdef SHM
-    work2_p = decomp%ROW_INFO%RCV_P_c
-    call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
+work2_p = decomp%ROW_INFO%RCV_P_c
+call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
 #endif
-    
+
 #ifdef SHM
-    if (decomp%ROW_INFO%CORE_ME==1) THEN
-       call MPI_ALLTOALLV(work1, decomp%y2cnts_s, decomp%y2disp_s, &
-            complex_type, work2, decomp%z2cnts_s, decomp%z2disp_s, &
-            complex_type, decomp%ROW_INFO%SMP_COMM, ierror)
-    end if
+if (decomp%ROW_INFO%CORE_ME==1) THEN
+call MPI_ALLTOALLV(work1, decomp%y2cnts_s, decomp%y2disp_s, &
+complex_type, work2, decomp%z2cnts_s, decomp%z2disp_s, &
+complex_type, decomp%ROW_INFO%SMP_COMM, ierror)
+end if
 #else
 #ifdef EVEN
-    if (decomp%even) then
-       call MPI_ALLTOALL(work1_c, decomp%y2count, &
-            complex_type, dst, decomp%z2count, &
-            complex_type, DECOMP_2D_COMM_ROW, ierror)
-    else
-       call MPI_ALLTOALL(work1_c, decomp%y2count, &
-            complex_type, work2_c, decomp%z2count, &
-            complex_type, DECOMP_2D_COMM_ROW, ierror)
-    end if
+if (decomp%even) then
+call MPI_ALLTOALL(work1_c, decomp%y2count, &
+complex_type, dst, decomp%z2count, &
+complex_type, DECOMP_2D_COMM_ROW, ierror)
+else
+call MPI_ALLTOALL(work1_c, decomp%y2count, &
+complex_type, work2_c, decomp%z2count, &
+complex_type, DECOMP_2D_COMM_ROW, ierror)
+end if
 #else
-    call MPI_ALLTOALLV(work1_c, decomp%y2cnts, decomp%y2disp, &
-         complex_type, dst, decomp%z2cnts, decomp%z2disp, &
-         complex_type, DECOMP_2D_COMM_ROW, ierror)
+call MPI_ALLTOALLV(work1_c, decomp%y2cnts, decomp%y2disp, &
+complex_type, dst, decomp%z2cnts, decomp%z2disp, &
+complex_type, DECOMP_2D_COMM_ROW, ierror)
 #endif
 #endif
 
-    ! rearrange receive buffer
+! rearrange receive buffer
 #ifdef SHM
-    call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
-    call mem_merge_yz_complex(work2, d1, d2, d3, dst, dims(2), &
-         decomp%z2dist, decomp)
+call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
+call mem_merge_yz_complex(work2, d1, d2, d3, dst, dims(2), &
+decomp%z2dist, decomp)
 #else
 #ifdef EVEN
-    if (.not. decomp%even) then
-       call mem_merge_yz_complex(work2_c, d1, d2, d3, dst, dims(2), &
-         decomp%z2dist, decomp)
-    end if
+if (.not. decomp%even) then
+call mem_merge_yz_complex(work2_c, d1, d2, d3, dst, dims(2), &
+decomp%z2dist, decomp)
+end if
 #else
-    ! note the receive buffer is already in natural (i,j,k) order
-    ! so no merge operation needed
+! note the receive buffer is already in natural (i,j,k) order
+! so no merge operation needed
 #endif
 #endif
 
-    return
-  end subroutine transpose_y_to_z_complex
+return
+end subroutine transpose_y_to_z_complex
 
 
 #ifdef OCC
-  subroutine transpose_y_to_z_complex_start(handle, src, dst, sbuf, &
-       rbuf, opt_decomp)
-
-    implicit none
-    
-    integer :: handle
-    complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
-
-    TYPE(DECOMP_INFO) :: decomp
-
-    integer :: s1,s2,s3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-    
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-
-    ! rearrange source array as send buffer
-    call mem_split_yz_complex(src, s1, s2, s3, sbuf, dims(2), &
-         decomp%y2dist, decomp)
+subroutine transpose_y_to_z_complex_start(handle, src, dst, sbuf, &
+rbuf, opt_decomp)
+
+implicit none
+
+integer :: handle
+complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
+
+integer :: s1,s2,s3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+
+! rearrange source array as send buffer
+call mem_split_yz_complex(src, s1, s2, s3, sbuf, dims(2), &
+decomp%y2dist, decomp)
 
 #ifdef EVEN
-    call NBC_IALLTOALL(sbuf, decomp%y2count, &
-         complex_type, rbuf, decomp%z2count, &
-         complex_type, DECOMP_2D_COMM_ROW, handle, ierror)
+call NBC_IALLTOALL(sbuf, decomp%y2count, &
+complex_type, rbuf, decomp%z2count, &
+complex_type, DECOMP_2D_COMM_ROW, handle, ierror)
 #else
-    call NBC_IALLTOALLV(sbuf, decomp%y2cnts, decomp%y2disp, &
-         complex_type, rbuf,decomp%z2cnts, decomp%z2disp, &
-         complex_type, DECOMP_2D_COMM_ROW, handle, ierror)
+call NBC_IALLTOALLV(sbuf, decomp%y2cnts, decomp%y2disp, &
+complex_type, rbuf,decomp%z2cnts, decomp%z2disp, &
+complex_type, DECOMP_2D_COMM_ROW, handle, ierror)
 #endif
 
-    return
-  end subroutine transpose_y_to_z_complex_start
+return
+end subroutine transpose_y_to_z_complex_start
+
 
+subroutine transpose_y_to_z_complex_wait(handle, src, dst, sbuf, &
+rbuf, opt_decomp)
 
-  subroutine transpose_y_to_z_complex_wait(handle, src, dst, sbuf, &
-       rbuf, opt_decomp)
+implicit none
 
-    implicit none
-    
-    integer :: handle
-    complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+integer :: handle
+complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
-    integer :: ierror
+integer :: ierror
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    call NBC_WAIT(handle, ierror)
+call NBC_WAIT(handle, ierror)
 
-    dst = rbuf
+dst = rbuf
 
-    return
-  end subroutine transpose_y_to_z_complex_wait
+return
+end subroutine transpose_y_to_z_complex_wait
 #endif
 
 
-  ! pack/unpack ALLTOALL(V) buffers
+! pack/unpack ALLTOALL(V) buffers
 
-  subroutine mem_split_yz_real(in,n1,n2,n3,out,iproc,dist,decomp)
+subroutine mem_split_yz_real(in,n1,n2,n3,out,iproc,dist,decomp)
 
-    implicit none
+implicit none
 
-    integer, intent(IN) :: n1,n2,n3
-    real(mytype), dimension(n1,n2,n3), intent(IN) :: in
-    real(mytype), dimension(*), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
+integer, intent(IN) :: n1,n2,n3
+real(mytype), dimension(n1,n2,n3), intent(IN) :: in
+real(mytype), dimension(*), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
 
-    do m=0,iproc-1
-       if (m==0) then 
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+integer :: i,j,k, m,i1,i2,pos
+
+do m=0,iproc-1
+if (m==0) then 
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%y2disp_o(m) + 1
+pos = decomp%y2disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%y2count + 1
+pos = m * decomp%y2count + 1
 #else
-       pos = decomp%y2disp(m) + 1
+pos = decomp%y2disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=i1,i2
-             do i=1,n1
-                out(pos) = in(i,j,k)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_split_yz_real
-
-
-  subroutine mem_split_yz_complex(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-
-    integer, intent(IN) :: n1,n2,n3
-    complex(mytype), dimension(n1,n2,n3), intent(IN) :: in
-    complex(mytype), dimension(*), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
-
-    do m=0,iproc-1
-       if (m==0) then 
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=1,n3
+do j=i1,i2
+do i=1,n1
+out(pos) = in(i,j,k)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_split_yz_real
+
+
+subroutine mem_split_yz_complex(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+complex(mytype), dimension(n1,n2,n3), intent(IN) :: in
+complex(mytype), dimension(*), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2,pos
+
+do m=0,iproc-1
+if (m==0) then 
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%y2disp_o(m) + 1
+pos = decomp%y2disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%y2count + 1
+pos = m * decomp%y2count + 1
 #else
-       pos = decomp%y2disp(m) + 1
+pos = decomp%y2disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=i1,i2
-             do i=1,n1
-                out(pos) = in(i,j,k)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_split_yz_complex
-
-
-  subroutine mem_merge_yz_real(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-    
-    integer, intent(IN) :: n1,n2,n3
-    real(mytype), dimension(*), intent(IN) :: in
-    real(mytype), dimension(n1,n2,n3), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2, pos
-
-    do m=0,iproc-1
-       if (m==0) then
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=1,n3
+do j=i1,i2
+do i=1,n1
+out(pos) = in(i,j,k)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_split_yz_complex
+
+
+subroutine mem_merge_yz_real(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+real(mytype), dimension(*), intent(IN) :: in
+real(mytype), dimension(n1,n2,n3), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2, pos
+
+do m=0,iproc-1
+if (m==0) then
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%z2disp_o(m) + 1
+pos = decomp%z2disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%z2count + 1
+pos = m * decomp%z2count + 1
 #else
-       pos = decomp%z2disp(m) + 1
+pos = decomp%z2disp(m) + 1
 #endif
 #endif
 
-       do k=i1,i2
-          do j=1,n2
-             do i=1,n1
-                out(i,j,k) = in(pos)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_merge_yz_real
-
-
-  subroutine mem_merge_yz_complex(in,n1,n2,n3,out,iproc,dist,decomp)
-    
-    implicit none
-    
-    integer, intent(IN) :: n1,n2,n3
-    complex(mytype), dimension(*), intent(IN) :: in
-    complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2, pos
-
-    do m=0,iproc-1
-       if (m==0) then
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=i1,i2
+do j=1,n2
+do i=1,n1
+out(i,j,k) = in(pos)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_merge_yz_real
+
+
+subroutine mem_merge_yz_complex(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+complex(mytype), dimension(*), intent(IN) :: in
+complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2, pos
+
+do m=0,iproc-1
+if (m==0) then
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%z2disp_o(m) + 1
+pos = decomp%z2disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%z2count + 1
+pos = m * decomp%z2count + 1
 #else
-       pos = decomp%z2disp(m) + 1
+pos = decomp%z2disp(m) + 1
 #endif
 #endif
 
-       do k=i1,i2
-          do j=1,n2
-             do i=1,n1
-                out(i,j,k) = in(pos)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_merge_yz_complex
+do k=i1,i2
+do j=1,n2
+do i=1,n1
+out(i,j,k) = in(pos)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_merge_yz_complex
diff --git a/decomp2d/transpose_z_to_y.inc b/decomp2d/transpose_z_to_y.inc
index 4920ab27..53b5d521 100644
--- a/decomp2d/transpose_z_to_y.inc
+++ b/decomp2d/transpose_z_to_y.inc
@@ -11,513 +11,513 @@
 
 ! This file contains the routines that transpose data from Z to Y pencil
 
-  subroutine transpose_z_to_y_real(src, dst, opt_decomp)
+subroutine transpose_z_to_y_real(src, dst, opt_decomp)
 
-    implicit none
-    
-    real(mytype), dimension(:,:,:), intent(IN) :: src
-    real(mytype), dimension(:,:,:), intent(OUT) :: dst
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+implicit none
 
-    TYPE(DECOMP_INFO) :: decomp
+real(mytype), dimension(:,:,:), intent(IN) :: src
+real(mytype), dimension(:,:,:), intent(OUT) :: dst
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+
+TYPE(DECOMP_INFO) :: decomp
 
 #ifdef SHM
-    real(mytype) :: work1(*), work2(*)
-    POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
+real(mytype) :: work1(*), work2(*)
+POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
 #endif
-    
-    integer :: s1,s2,s3,d1,d2,d3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
-
-    ! rearrange source array as send buffer
+
+integer :: s1,s2,s3,d1,d2,d3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
+
+! rearrange source array as send buffer
 #ifdef SHM
-    work1_p = decomp%ROW_INFO%SND_P
-    call mem_split_zy_real(src, s1, s2, s3, work1, dims(2), &
-         decomp%z2dist, decomp)
+work1_p = decomp%ROW_INFO%SND_P
+call mem_split_zy_real(src, s1, s2, s3, work1, dims(2), &
+decomp%z2dist, decomp)
 #else
 #ifdef EVEN
-    if (.not. decomp%even) then
-       call mem_split_zy_real(src, s1, s2, s3, work1_r, dims(2), &
-            decomp%z2dist, decomp)
-    end if
+if (.not. decomp%even) then
+call mem_split_zy_real(src, s1, s2, s3, work1_r, dims(2), &
+decomp%z2dist, decomp)
+end if
 #else
-    ! note the src array is suitable to be a send buffer
-    ! so no split operation needed
+! note the src array is suitable to be a send buffer
+! so no split operation needed
 #endif
 #endif
-    
-    ! define receive buffer
+
+! define receive buffer
 #ifdef SHM
-    work2_p = decomp%ROW_INFO%RCV_P
-    call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
+work2_p = decomp%ROW_INFO%RCV_P
+call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
 #endif
-    
+
 #ifdef SHM
-    if (decomp%ROW_INFO%CORE_ME==1) THEN
-       call MPI_ALLTOALLV(work1, decomp%z2cnts_s, decomp%z2disp_s, &
-            real_type, work2, decomp%y2cnts_s, decomp%y2disp_s, &
-            real_type, decomp%ROW_INFO%SMP_COMM, ierror)
-    end if
+if (decomp%ROW_INFO%CORE_ME==1) THEN
+call MPI_ALLTOALLV(work1, decomp%z2cnts_s, decomp%z2disp_s, &
+real_type, work2, decomp%y2cnts_s, decomp%y2disp_s, &
+real_type, decomp%ROW_INFO%SMP_COMM, ierror)
+end if
 #else
 #ifdef EVEN
-    if (decomp%even) then
-       call MPI_ALLTOALL(src, decomp%z2count, &
-            real_type, work2_r, decomp%y2count, &
-            real_type, DECOMP_2D_COMM_ROW, ierror)
-    else
-       call MPI_ALLTOALL(work1_r, decomp%z2count, &
-            real_type, work2_r, decomp%y2count, &
-            real_type, DECOMP_2D_COMM_ROW, ierror)
-    end if
+if (decomp%even) then
+call MPI_ALLTOALL(src, decomp%z2count, &
+real_type, work2_r, decomp%y2count, &
+real_type, DECOMP_2D_COMM_ROW, ierror)
+else
+call MPI_ALLTOALL(work1_r, decomp%z2count, &
+real_type, work2_r, decomp%y2count, &
+real_type, DECOMP_2D_COMM_ROW, ierror)
+end if
 #else
-    call MPI_ALLTOALLV(src, decomp%z2cnts, decomp%z2disp, &
-         real_type, work2_r, decomp%y2cnts, decomp%y2disp, &
-         real_type, DECOMP_2D_COMM_ROW, ierror)
+call MPI_ALLTOALLV(src, decomp%z2cnts, decomp%z2disp, &
+real_type, work2_r, decomp%y2cnts, decomp%y2disp, &
+real_type, DECOMP_2D_COMM_ROW, ierror)
 #endif
 #endif
 
-    ! rearrange receive buffer
+! rearrange receive buffer
 #ifdef SHM
-    call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
-    call mem_merge_zy_real(work2, d1, d2, d3, dst, dims(2), &
-         decomp%y2dist, decomp)
+call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
+call mem_merge_zy_real(work2, d1, d2, d3, dst, dims(2), &
+decomp%y2dist, decomp)
 #else
-    call mem_merge_zy_real(work2_r, d1, d2, d3, dst, dims(2), &
-         decomp%y2dist, decomp)
+call mem_merge_zy_real(work2_r, d1, d2, d3, dst, dims(2), &
+decomp%y2dist, decomp)
 #endif
-    
-    return
-  end subroutine transpose_z_to_y_real
+
+return
+end subroutine transpose_z_to_y_real
 
 
 #ifdef OCC
-  subroutine transpose_z_to_y_real_start(handle, src, dst, sbuf, rbuf, &
-       opt_decomp)
+subroutine transpose_z_to_y_real_start(handle, src, dst, sbuf, rbuf, &
+opt_decomp)
+
+implicit none
 
-    implicit none
-    
-    integer :: handle
-    real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+integer :: handle
+real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
-    integer :: ierror
+integer :: ierror
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    sbuf = src
+sbuf = src
 
 #ifdef EVEN
-    call NBC_IALLTOALL(sbuf, decomp%z2count, real_type, &
-         rbuf, decomp%y2count, real_type, &
-         DECOMP_2D_COMM_ROW, handle, ierror)
+call NBC_IALLTOALL(sbuf, decomp%z2count, real_type, &
+rbuf, decomp%y2count, real_type, &
+DECOMP_2D_COMM_ROW, handle, ierror)
 #else
-    call NBC_IALLTOALLV(sbuf, decomp%z2cnts, decomp%z2disp, real_type, &
-         rbuf, decomp%y2cnts, decomp%y2disp, real_type, &
-         DECOMP_2D_COMM_ROW, handle, ierror)
+call NBC_IALLTOALLV(sbuf, decomp%z2cnts, decomp%z2disp, real_type, &
+rbuf, decomp%y2cnts, decomp%y2disp, real_type, &
+DECOMP_2D_COMM_ROW, handle, ierror)
 #endif
 
-    return
-  end subroutine transpose_z_to_y_real_start
+return
+end subroutine transpose_z_to_y_real_start
 
 
-  subroutine transpose_z_to_y_real_wait(handle, src, dst, sbuf, rbuf, &
-       opt_decomp)
+subroutine transpose_z_to_y_real_wait(handle, src, dst, sbuf, rbuf, &
+opt_decomp)
 
-    implicit none
-    
-    integer :: handle
-    real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+implicit none
 
-    TYPE(DECOMP_INFO) :: decomp
+integer :: handle
+real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    integer :: d1,d2,d3
-    integer :: ierror
+TYPE(DECOMP_INFO) :: decomp
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+integer :: d1,d2,d3
+integer :: ierror
 
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    call NBC_WAIT(handle, ierror)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
 
-    ! rearrange receive buffer
-    call mem_merge_zy_real(rbuf, d1, d2, d3, dst, dims(2), &
-         decomp%y2dist, decomp)
+call NBC_WAIT(handle, ierror)
 
-    return
-  end subroutine transpose_z_to_y_real_wait
+! rearrange receive buffer
+call mem_merge_zy_real(rbuf, d1, d2, d3, dst, dims(2), &
+decomp%y2dist, decomp)
+
+return
+end subroutine transpose_z_to_y_real_wait
 #endif
 
 
-  subroutine transpose_z_to_y_complex(src, dst, opt_decomp)
+subroutine transpose_z_to_y_complex(src, dst, opt_decomp)
+
+implicit none
 
-    implicit none
-    
-    complex(mytype), dimension(:,:,:), intent(IN) :: src
-    complex(mytype), dimension(:,:,:), intent(OUT) :: dst
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+complex(mytype), dimension(:,:,:), intent(IN) :: src
+complex(mytype), dimension(:,:,:), intent(OUT) :: dst
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
 #ifdef SHM
-    complex(mytype) :: work1(*), work2(*)
-    POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
+complex(mytype) :: work1(*), work2(*)
+POINTER  (work1_p, work1), (work2_p, work2)  ! Cray pointers
 #endif
-    
-    integer :: s1,s2,s3,d1,d2,d3
-    integer :: ierror
-
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
-
-    s1 = SIZE(src,1)
-    s2 = SIZE(src,2)
-    s3 = SIZE(src,3)
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
-    
-    ! rearrange source array as send buffer
+
+integer :: s1,s2,s3,d1,d2,d3
+integer :: ierror
+
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
+
+s1 = SIZE(src,1)
+s2 = SIZE(src,2)
+s3 = SIZE(src,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
+
+! rearrange source array as send buffer
 #ifdef SHM
-    work1_p = decomp%ROW_INFO%SND_P_c
-    call mem_split_zy_complex(src, s1, s2, s3, work1, dims(2), &
-         decomp%z2dist, decomp)
+work1_p = decomp%ROW_INFO%SND_P_c
+call mem_split_zy_complex(src, s1, s2, s3, work1, dims(2), &
+decomp%z2dist, decomp)
 #else
 #ifdef EVEN
-    if (.not. decomp%even) then
-       call mem_split_zy_complex(src, s1, s2, s3, work1_c, dims(2), &
-            decomp%z2dist, decomp)
-    end if
+if (.not. decomp%even) then
+call mem_split_zy_complex(src, s1, s2, s3, work1_c, dims(2), &
+decomp%z2dist, decomp)
+end if
 #else
-    ! note the src array is suitable to be a send buffer
-    ! so no split operation needed
+! note the src array is suitable to be a send buffer
+! so no split operation needed
 #endif
 #endif
-    
-    ! define receive buffer
+
+! define receive buffer
 #ifdef SHM
-    work2_p = decomp%ROW_INFO%RCV_P_c
-    call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
+work2_p = decomp%ROW_INFO%RCV_P_c
+call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
 #endif
-    
+
 #ifdef SHM
-    if (decomp%ROW_INFO%CORE_ME==1) THEN
-       call MPI_ALLTOALLV(work1, decomp%z2cnts_s, decomp%z2disp_s, &
-            complex_type, work2, decomp%y2cnts_s, decomp%y2disp_s, &
-            complex_type, decomp%ROW_INFO%SMP_COMM, ierror)
-    end if
+if (decomp%ROW_INFO%CORE_ME==1) THEN
+call MPI_ALLTOALLV(work1, decomp%z2cnts_s, decomp%z2disp_s, &
+complex_type, work2, decomp%y2cnts_s, decomp%y2disp_s, &
+complex_type, decomp%ROW_INFO%SMP_COMM, ierror)
+end if
 #else
 #ifdef EVEN
-    if (decomp%even) then
-       call MPI_ALLTOALL(src, decomp%z2count, &
-            complex_type, work2_c, decomp%y2count, &
-            complex_type, DECOMP_2D_COMM_ROW, ierror)
-    else
-       call MPI_ALLTOALL(work1_c, decomp%z2count, &
-            complex_type, work2_c, decomp%y2count, &
-            complex_type, DECOMP_2D_COMM_ROW, ierror)
-    end if
+if (decomp%even) then
+call MPI_ALLTOALL(src, decomp%z2count, &
+complex_type, work2_c, decomp%y2count, &
+complex_type, DECOMP_2D_COMM_ROW, ierror)
+else
+call MPI_ALLTOALL(work1_c, decomp%z2count, &
+complex_type, work2_c, decomp%y2count, &
+complex_type, DECOMP_2D_COMM_ROW, ierror)
+end if
 #else
-    call MPI_ALLTOALLV(src, decomp%z2cnts, decomp%z2disp, &
-         complex_type, work2_c, decomp%y2cnts, decomp%y2disp, &
-         complex_type, DECOMP_2D_COMM_ROW, ierror)
+call MPI_ALLTOALLV(src, decomp%z2cnts, decomp%z2disp, &
+complex_type, work2_c, decomp%y2cnts, decomp%y2disp, &
+complex_type, DECOMP_2D_COMM_ROW, ierror)
 #endif
 #endif
 
-    ! rearrange receive buffer
+! rearrange receive buffer
 #ifdef SHM
-    call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
-    call mem_merge_zy_complex(work2, d1, d2, d3, dst, dims(2), &
-         decomp%y2dist, decomp)
+call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror)
+call mem_merge_zy_complex(work2, d1, d2, d3, dst, dims(2), &
+decomp%y2dist, decomp)
 #else
-    call mem_merge_zy_complex(work2_c, d1, d2, d3, dst, dims(2), &
-         decomp%y2dist, decomp)
+call mem_merge_zy_complex(work2_c, d1, d2, d3, dst, dims(2), &
+decomp%y2dist, decomp)
 #endif
 
-    return
-  end subroutine transpose_z_to_y_complex
+return
+end subroutine transpose_z_to_y_complex
 
 
 #ifdef OCC
-  subroutine transpose_z_to_y_complex_start(handle, src, dst, sbuf, &
-       rbuf, opt_decomp)
+subroutine transpose_z_to_y_complex_start(handle, src, dst, sbuf, &
+rbuf, opt_decomp)
+
+implicit none
 
-    implicit none
-    
-    integer :: handle
-    complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+integer :: handle
+complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
-    integer :: ierror
+integer :: ierror
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    sbuf = src
+sbuf = src
 
 #ifdef EVEN
-    call NBC_IALLTOALL(sbuf, decomp%z2count, &
-         complex_type, rbuf, decomp%y2count, &
-         complex_type, DECOMP_2D_COMM_ROW, handle, ierror)
+call NBC_IALLTOALL(sbuf, decomp%z2count, &
+complex_type, rbuf, decomp%y2count, &
+complex_type, DECOMP_2D_COMM_ROW, handle, ierror)
 #else
-    call NBC_IALLTOALLV(sbuf, decomp%z2cnts, decomp%z2disp, &
-         complex_type, rbuf, decomp%y2cnts, decomp%y2disp, &
-         complex_type, DECOMP_2D_COMM_ROW, handle, ierror)
+call NBC_IALLTOALLV(sbuf, decomp%z2cnts, decomp%z2disp, &
+complex_type, rbuf, decomp%y2cnts, decomp%y2disp, &
+complex_type, DECOMP_2D_COMM_ROW, handle, ierror)
 #endif
 
-    return
-  end subroutine transpose_z_to_y_complex_start
+return
+end subroutine transpose_z_to_y_complex_start
+
 
+subroutine transpose_z_to_y_complex_wait(handle, src, dst, sbuf, &
+rbuf, opt_decomp)
 
-  subroutine transpose_z_to_y_complex_wait(handle, src, dst, sbuf, &
-       rbuf, opt_decomp)
+implicit none
 
-    implicit none
-    
-    integer :: handle
-    complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
-    TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
+integer :: handle
+complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf
+TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp
 
-    TYPE(DECOMP_INFO) :: decomp
+TYPE(DECOMP_INFO) :: decomp
 
-    integer :: d1,d2,d3
-    integer :: ierror
+integer :: d1,d2,d3
+integer :: ierror
 
-    if (present(opt_decomp)) then
-       decomp = opt_decomp
-    else
-       decomp = decomp_main
-    end if
+if (present(opt_decomp)) then
+decomp = opt_decomp
+else
+decomp = decomp_main
+end if
 
-    d1 = SIZE(dst,1)
-    d2 = SIZE(dst,2)
-    d3 = SIZE(dst,3)
+d1 = SIZE(dst,1)
+d2 = SIZE(dst,2)
+d3 = SIZE(dst,3)
 
-    call NBC_WAIT(handle, ierror)
+call NBC_WAIT(handle, ierror)
 
-    ! rearrange receive buffer
-    call mem_merge_zy_complex(rbuf, d1, d2, d3, dst, dims(2), &
-         decomp%y2dist, decomp)
+! rearrange receive buffer
+call mem_merge_zy_complex(rbuf, d1, d2, d3, dst, dims(2), &
+decomp%y2dist, decomp)
 
-    return
-  end subroutine transpose_z_to_y_complex_wait
+return
+end subroutine transpose_z_to_y_complex_wait
 #endif
 
 
-  ! pack/unpack ALLTOALL(V) buffers
+! pack/unpack ALLTOALL(V) buffers
 
-  subroutine mem_split_zy_real(in,n1,n2,n3,out,iproc,dist,decomp)
+subroutine mem_split_zy_real(in,n1,n2,n3,out,iproc,dist,decomp)
 
-    implicit none
+implicit none
 
-    integer, intent(IN) :: n1,n2,n3
-    real(mytype), dimension(n1,n2,n3), intent(IN) :: in
-    real(mytype), dimension(*), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
+integer, intent(IN) :: n1,n2,n3
+real(mytype), dimension(n1,n2,n3), intent(IN) :: in
+real(mytype), dimension(*), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
 
-    do m=0,iproc-1
-       if (m==0) then 
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+integer :: i,j,k, m,i1,i2,pos
+
+do m=0,iproc-1
+if (m==0) then 
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%z2disp_o(m) + 1
+pos = decomp%z2disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%z2count + 1
+pos = m * decomp%z2count + 1
 #else
-       pos = decomp%z2disp(m) + 1
+pos = decomp%z2disp(m) + 1
 #endif
 #endif
 
-       do k=i1,i2
-          do j=1,n2
-             do i=1,n1
-                out(pos) = in(i,j,k)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_split_zy_real
-
-
-  subroutine mem_split_zy_complex(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-
-    integer, intent(IN) :: n1,n2,n3
-    complex(mytype), dimension(n1,n2,n3), intent(IN) :: in
-    complex(mytype), dimension(*), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2,pos
-
-    do m=0,iproc-1
-       if (m==0) then 
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=i1,i2
+do j=1,n2
+do i=1,n1
+out(pos) = in(i,j,k)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_split_zy_real
+
+
+subroutine mem_split_zy_complex(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+complex(mytype), dimension(n1,n2,n3), intent(IN) :: in
+complex(mytype), dimension(*), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2,pos
+
+do m=0,iproc-1
+if (m==0) then 
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%z2disp_o(m) + 1
+pos = decomp%z2disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%z2count + 1
+pos = m * decomp%z2count + 1
 #else
-       pos = decomp%z2disp(m) + 1
+pos = decomp%z2disp(m) + 1
 #endif
 #endif
 
-       do k=i1,i2
-          do j=1,n2
-             do i=1,n1
-                out(pos) = in(i,j,k)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_split_zy_complex
-
-
-  subroutine mem_merge_zy_real(in,n1,n2,n3,out,iproc,dist,decomp)
-
-    implicit none
-    
-    integer, intent(IN) :: n1,n2,n3
-    real(mytype), dimension(*), intent(IN) :: in
-    real(mytype), dimension(n1,n2,n3), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2, pos
-
-    do m=0,iproc-1
-       if (m==0) then
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=i1,i2
+do j=1,n2
+do i=1,n1
+out(pos) = in(i,j,k)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_split_zy_complex
+
+
+subroutine mem_merge_zy_real(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+real(mytype), dimension(*), intent(IN) :: in
+real(mytype), dimension(n1,n2,n3), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2, pos
+
+do m=0,iproc-1
+if (m==0) then
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%y2disp_o(m) + 1
+pos = decomp%y2disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%y2count + 1
+pos = m * decomp%y2count + 1
 #else
-       pos = decomp%y2disp(m) + 1
+pos = decomp%y2disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=i1,i2
-             do i=1,n1
-                out(i,j,k) = in(pos)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_merge_zy_real
-
-
-  subroutine mem_merge_zy_complex(in,n1,n2,n3,out,iproc,dist,decomp)
-    
-    implicit none
-    
-    integer, intent(IN) :: n1,n2,n3
-    complex(mytype), dimension(*), intent(IN) :: in
-    complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out
-    integer, intent(IN) :: iproc
-    integer, dimension(0:iproc-1), intent(IN) :: dist
-    TYPE(DECOMP_INFO), intent(IN) :: decomp
-    
-    integer :: i,j,k, m,i1,i2, pos
-
-    do m=0,iproc-1
-       if (m==0) then
-          i1 = 1
-          i2 = dist(0)
-       else
-          i1 = i2+1
-          i2 = i1+dist(m)-1
-       end if
+do k=1,n3
+do j=i1,i2
+do i=1,n1
+out(i,j,k) = in(pos)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_merge_zy_real
+
+
+subroutine mem_merge_zy_complex(in,n1,n2,n3,out,iproc,dist,decomp)
+
+implicit none
+
+integer, intent(IN) :: n1,n2,n3
+complex(mytype), dimension(*), intent(IN) :: in
+complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out
+integer, intent(IN) :: iproc
+integer, dimension(0:iproc-1), intent(IN) :: dist
+TYPE(DECOMP_INFO), intent(IN) :: decomp
+
+integer :: i,j,k, m,i1,i2, pos
+
+do m=0,iproc-1
+if (m==0) then
+i1 = 1
+i2 = dist(0)
+else
+i1 = i2+1
+i2 = i1+dist(m)-1
+end if
 
 #ifdef SHM
-       pos = decomp%y2disp_o(m) + 1
+pos = decomp%y2disp_o(m) + 1
 #else
 #ifdef EVEN
-       pos = m * decomp%y2count + 1
+pos = m * decomp%y2count + 1
 #else
-       pos = decomp%y2disp(m) + 1
+pos = decomp%y2disp(m) + 1
 #endif
 #endif
 
-       do k=1,n3
-          do j=i1,i2
-             do i=1,n1
-                out(i,j,k) = in(pos)
-                pos = pos + 1
-             end do
-          end do
-       end do
-    end do
-
-    return
-  end subroutine mem_merge_zy_complex
+do k=1,n3
+do j=i1,i2
+do i=1,n1
+out(i,j,k) = in(pos)
+pos = pos + 1
+end do
+end do
+end do
+end do
+
+return
+end subroutine mem_merge_zy_complex
diff --git a/indent.sh b/indent.sh
new file mode 100644
index 00000000..715ced8f
--- /dev/null
+++ b/indent.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+#
+#        FILE: indent.sh
+#      AUTHOR: Paul Bartholomew
+# DESCRIPTION: Use emacs to indent files
+#
+
+# decomp2d/
+for f in decomp2d/*.f90
+do
+	echo "Indenting ${f}"
+	emacs -batch ${f} --eval '(indent-region (point-min) (point-max) nil)' -f save-buffer 2> /dev/null
+done
+for f in decomp2d/*.inc
+do
+	echo "Indenting ${f}"
+	emacs -batch ${f} --eval '(indent-region (point-min) (point-max) nil)' -f save-buffer 2> /dev/null
+done
+
+# src/
+for f in src/*.f90
+do
+	echo "Indenting ${f}"
+	emacs -batch ${f} --eval '(indent-region (point-min) (point-max) nil)' -f save-buffer 2> /dev/null
+done
diff --git a/src/BC-Channel-flow.f90 b/src/BC-Channel-flow.f90
index fc8b859a..062eecc1 100644
--- a/src/BC-Channel-flow.f90
+++ b/src/BC-Channel-flow.f90
@@ -147,7 +147,7 @@ contains
           i = xsize(1)
           phi(i,:,:,:) = phi(i - 1,:,:,:)
        endif
-       
+
        if ((nclyS1.eq.2).and.(xstart(2).eq.1)) then
           !! Generate a hot patch on bottom boundary
           do k = 1, xsize(3)
@@ -435,13 +435,13 @@ contains
   end subroutine write_probes
   !############################################################################
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !!
   !!  SUBROUTINE: momentum_forcing
   !!      AUTHOR: Paul Bartholomew
   !! DESCRIPTION: Applies rotation for t < spinup_time.
   !!
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   SUBROUTINE momentum_forcing_channel(dux1, duy1, ux1, uy1)
 
     IMPLICIT NONE
diff --git a/src/BC-Jet.f90 b/src/BC-Jet.f90
index 51127eb1..764909a6 100644
--- a/src/BC-Jet.f90
+++ b/src/BC-Jet.f90
@@ -89,14 +89,14 @@ contains
 
     !INIT FOR G AND U=MEAN FLOW + NOISE
     if (xstart(2)==1) then
-      j = 1
-      do k = 1, xsize(3)
-        do i = 1, xsize(1)
-          ux1(i, j, k) = byx1(i, k)
-          uy1(i, j, k) = byy1(i, k)
-          uz1(i, j, k) = byz1(i, k)
-        enddo
-      enddo
+       j = 1
+       do k = 1, xsize(3)
+          do i = 1, xsize(1)
+             ux1(i, j, k) = byx1(i, k)
+             uy1(i, j, k) = byy1(i, k)
+             uz1(i, j, k) = byz1(i, k)
+          enddo
+       enddo
     endif
     call transpose_x_to_y(ux1, ta2)
     call transpose_x_to_y(uy1, tb2)
@@ -128,13 +128,13 @@ contains
     dux1(:,:,:,1)=ux1(:,:,:)
     duy1(:,:,:,1)=uy1(:,:,:)
     duz1(:,:,:,1)=uz1(:,:,:)
-    
+
     drho1(:,:,:,1) = rho1(:,:,:,1)
     do is = 2, ntime
        dux1(:,:,:,is)=dux1(:,:,:,is - 1)
        duy1(:,:,:,is)=duy1(:,:,:,is - 1)
        duz1(:,:,:,is)=duz1(:,:,:,is - 1)
-       
+
        drho1(:,:,:,is) = drho1(:,:,:,is - 1)
     enddo
 
@@ -189,11 +189,11 @@ contains
     endif
     timeswitch = one !! Nichols starts with a jet 'column'
 
-    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     !! Set inflow
     inflow = zero
     j = 1
-    
+
     if (xstart(2) == 1) then
        do k = 1, xsize(3)
           z = real(k + xstart(3) - 2, mytype) * dz - half * zlz
@@ -226,7 +226,7 @@ contains
                       phi(i, 1, k, is) = one - half * (one + tanh((12.5_mytype / four) * ((D / two) / r - two * r / D)))
                    endif
                 enddo
-                
+
                 if (primary_species.gt.0) then
                    phi(i, 1, k, primary_species) = one
 
@@ -237,7 +237,7 @@ contains
                    enddo
                 endif
              endif
-                
+
              !! Apply transient behaviour
              ! if (r.lt.D/two) then
              !   perturbation = inflow_noise * sin(r * x * z * t)
@@ -265,22 +265,22 @@ contains
     byo(:,:) = two * byo(:,:) - one
     bzo(:,:) = two * bzo(:,:) - one
     do k=1,xsize(3)
-      do j=1,xsize(2)
-        bxx1(j,k)=bxo(j,k)*inflow_noise
-        bxy1(j,k)=byo(j,k)*inflow_noise
-        bxz1(j,k)=bzo(j,k)*inflow_noise
-      enddo
-   enddo
-
-   if (initialising) then !! we can stop here
-      return
-   endif
-
-    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+       do j=1,xsize(2)
+          bxx1(j,k)=bxo(j,k)*inflow_noise
+          bxy1(j,k)=byo(j,k)*inflow_noise
+          bxz1(j,k)=bzo(j,k)*inflow_noise
+       enddo
+    enddo
+
+    if (initialising) then !! we can stop here
+       return
+    endif
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     !! Apply lateral boundary conditions
     !! XXX Assume purely radial flow
     !! XXX In X-pencils
-    
+
     xc=half*xlx
     zc=half*xlx
 
@@ -515,7 +515,7 @@ contains
              endif
           endif
        enddo
-       
+
        if (iscalar.ne.0) then
           do is = 1, numscalar
              if (is.ne.primary_species) then
@@ -525,13 +525,13 @@ contains
              endif
           enddo
        endif
-       
+
        if (ilmn) then
           if (.not.ilmn_solve_temp) then
              rho(:,:,k,1) = dens1
           else
              ta1(:,:,k) = one
-             
+
              !! Need to compute rho (on boundary)
              CALL calc_rho_eos(rho(:,:,k,1), ta1(:,:,k), phi(:,:,k,:), tb1(:,:,k), xsize(1), xsize(2), 1)
           endif
@@ -608,20 +608,20 @@ contains
              endif
           enddo
        endif
-       
+
        if (ilmn) then
           if (.not.ilmn_solve_temp) then
              rho(:,:,k,1) = dens1
           else
              ta1(:,:,k) = one
-             
+
              !! Need to compute rho (on boundary)
              CALL calc_rho_eos(rho(:,:,k,1), ta1(:,:,k), phi(:,:,k,:), tb1(:,:,k), xsize(1), xsize(2), 1)
           endif
        endif
     ENDIF
 
-    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     !! Compute outflow
     call MPI_ALLREDUCE(inflow,outflow,1,real_type,MPI_SUM,MPI_COMM_WORLD,ierr)
     outflow = outflow / nx / nz
@@ -647,7 +647,7 @@ contains
              enddo
           endif
        endif
-       
+
        if (ilmn) then
           if (.not.ilmn_solve_temp) then
              rho(:, j, :, 1) = rho(:, j, :, 1) &
@@ -657,7 +657,7 @@ contains
              CALL calc_temp_eos(ta1(:,j-1:j,:), rho(:,j-1:j,:,1), phi(:,j-1:j,:,:), tb1(:,j-1:j,:), xsize(1), 2, xsize(3))
 
              ta1(:,j,:) = ta1(:,j,:) - dt * outflow * (ta1(:,j,:) - ta1(:,j-1,:))
-             
+
              !! Need to compute rho (on boundary)
              CALL calc_rho_eos(rho(:,j,:,1), ta1(:,j,:), phi(:,j,:,:), tb1(:,j,:), xsize(1), 1, xsize(3))
           endif
@@ -928,13 +928,13 @@ contains
   end subroutine write_probes
   !############################################################################
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !!
   !!  SUBROUTINE: momentum_forcing_jet
   !!      AUTHOR: Paul Bartholomew
   !! DESCRIPTION: Applies a fringe/sponge region at the outlet.
   !!
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   SUBROUTINE momentum_forcing_jet(dux1, duy1, duz1, rho1, ux1, uy1, uz1)
 
     IMPLICIT NONE
diff --git a/src/BC-Lock-exchange.f90 b/src/BC-Lock-exchange.f90
index 44522769..a8bfda69 100644
--- a/src/BC-Lock-exchange.f90
+++ b/src/BC-Lock-exchange.f90
@@ -172,7 +172,7 @@ subroutine init (ux1,uy1,uz1,ep1,phi1,dux1,duy1,duz1,phis1,phiss1)
         enddo
         do ijk = 1,xsize(1)*xsize(2)*xsize(3)
            if (phi1(ijk,1,1,is).gt.cp(is)) phi1(ijk,1,1,is) =  cp(is)
-            if (phi1(ijk,1,1,is).lt.zero) phi1(ijk,1,1,is) =  zero
+           if (phi1(ijk,1,1,is).lt.zero) phi1(ijk,1,1,is) =  zero
         enddo
      enddo
 
@@ -600,7 +600,7 @@ contains
     real(mytype),dimension(3,3,xsize(1),xsize(2),xsize(3)) :: A
 
     real(mytype),dimension(xszV(1),xszV(2),xszV(3)) :: uvisu
-  
+
     real(8) :: ek,ek1,dek,dek1,ep,ep1,dep,dep1,xvol
     integer :: ijk,i,j,k,l,m,is,code
     character(len=30) :: filename
@@ -719,20 +719,20 @@ contains
     end if
 
     if (mod(itime,imodulo).eq.0) then
-     !if (save_diss.eq.1) then
-        uvisu=zero
-        call fine_to_coarseV(1,diss1,uvisu)
-        write(filename,"('./data/diss',I4.4)") itime/imodulo
-        call decomp_2d_write_one(1,uvisu,filename,2)
-     !endif
-
-     !if (save_dissm.eq.1) then
-        call transpose_x_to_y (diss1,temp2)
-        call transpose_y_to_z (temp2,temp3)
-        call mean_plane_z(temp3,zsize(1),zsize(2),zsize(3),temp3(:,:,1))
-        write(filename,"('./data/dissm',I4.4)") itime/imodulo
-        call decomp_2d_write_plane(3,temp3,3,1,filename)
-     !endif
+       !if (save_diss.eq.1) then
+       uvisu=zero
+       call fine_to_coarseV(1,diss1,uvisu)
+       write(filename,"('./data/diss',I4.4)") itime/imodulo
+       call decomp_2d_write_one(1,uvisu,filename,2)
+       !endif
+
+       !if (save_dissm.eq.1) then
+       call transpose_x_to_y (diss1,temp2)
+       call transpose_y_to_z (temp2,temp3)
+       call mean_plane_z(temp3,zsize(1),zsize(2),zsize(3),temp3(:,:,1))
+       write(filename,"('./data/dissm',I4.4)") itime/imodulo
+       call decomp_2d_write_plane(3,temp3,3,1,filename)
+       !endif
     endif
 
   end subroutine budget
diff --git a/src/BC-Mixing-layer.f90 b/src/BC-Mixing-layer.f90
index 1dc26bbb..4eddf95d 100644
--- a/src/BC-Mixing-layer.f90
+++ b/src/BC-Mixing-layer.f90
@@ -18,7 +18,7 @@ module mixlayer
   PUBLIC :: init_mixlayer!, boundary_conditions_mixlayer, postprocessing_mixlayer
 
 contains
-  
+
   subroutine init_mixlayer (rho1,ux1,uy1,uz1,drho1,dux1,duy1,duz1)
 
     USE decomp_2d, ONLY : mytype, xsize
@@ -78,7 +78,7 @@ contains
                      + half * (u1 - u2) * TANH(two * y)
                 uy1(i, j, k) = zero
                 uz1(i, j, k) = zero
-                
+
                 rho1(i, j, k, 1) = (one / (two * heatcap)) &
                      * (ux1(i, j, k) * (u1 + u2) - ux1(i, j, k)**2 - u1 * u2) &
                      + ux1(i, j, k) * (T1 - T2) / (u1 - u2) &
@@ -117,7 +117,7 @@ contains
        duy1(:,:,:,is)=duy1(:,:,:,is - 1)
        duz1(:,:,:,is)=duz1(:,:,:,is - 1)
     enddo
-    
+
     drho1(:,:,:,1)=rho1(:,:,:,1)
     do is = 2, ntime
        drho1(:,:,:,is)=drho1(:,:,:,is - 1)
@@ -133,5 +133,5 @@ contains
 
     return
   end subroutine init_mixlayer
-  
+
 end module mixlayer
diff --git a/src/BC-Periodic-hill.f90 b/src/BC-Periodic-hill.f90
index 45cb5dcc..d2055e42 100644
--- a/src/BC-Periodic-hill.f90
+++ b/src/BC-Periodic-hill.f90
@@ -50,58 +50,58 @@ contains
     real(mytype)               :: zeromach
     real(mytype), dimension(nxi:nxf) :: dune
     real(mytype) :: y_bump
-    
-   zeromach=one
-  do while ((one + zeromach / two) .gt. one)
-     zeromach = zeromach/two
-  end do
-  zeromach = 1.0e1*zeromach
-  !
-  y_bump=zero
-  dune=zero
-  do i=nxi,nxf
-     xm=real(i-1,mytype)*dx
-     if (xm.gt.xlx/two) then
-        xm = (xlx-xm)*twentyeight
-     else
-        xm = xm*twentyeight
-     endif
-     if ((xm.ge.zero).and.(xm.le.nine)) then
-        y_bump=min(28.,28+0.006775070969851*xm**two-2.124527775800E-03*xm**three)
-     endif
-     if ((xm.ge.9.).and.(xm.le.fourteen)) then
-        y_bump=  2.507355893131E+01      +9.754803562315E-01*xm&
-             -1.016116352781E-01*xm**two +1.889794677828E-03*xm**three
-     endif
-     if ((xm.ge.14.).and.(xm.le.twenty)) then
-        y_bump=  2.579601052357E+01      +8.206693007457E-01*xm &
-             -9.055370274339E-02*xm**two +1.626510569859E-03*xm**three
-     endif
-     if ((xm.ge.20.).and.(xm.le.30._mytype)) then
-        y_bump=  4.046435022819E+01      -1.379581654948E+00*xm &
-             +1.945884504128E-02*xm**two -2.070318932190E-04*xm**three
-     endif
-     if ((xm.ge.30.).and.(xm.le.40._mytype)) then
-        y_bump=  1.792461334664E+01      +8.743920332081E-01*xm &
-             -5.567361123058E-02*xm**two +6.277731764683E-04*xm**three
-     endif
-     if ((xm.ge.40.).and.(xm.le.54._mytype)) then
-        y_bump=max(0.,5.639011190988E+01  -2.010520359035E+00*xm &
-             +1.644919857549E-02*xm**two  +2.674976141766E-05*xm**three)
-     endif
-     dune(i)=y_bump/twentyeight
-  enddo
-
-  do k=nzi,nzf
-     do j=nyi,nyf
-        ym=yp(j)
-        do i=nxi,nxf
-           if (ym-dune(i).le.zeromach) then
-              epsi(i,j,k)=remp
-           endif
-        enddo
-     enddo
-  enddo
+
+    zeromach=one
+    do while ((one + zeromach / two) .gt. one)
+       zeromach = zeromach/two
+    end do
+    zeromach = 1.0e1*zeromach
+    !
+    y_bump=zero
+    dune=zero
+    do i=nxi,nxf
+       xm=real(i-1,mytype)*dx
+       if (xm.gt.xlx/two) then
+          xm = (xlx-xm)*twentyeight
+       else
+          xm = xm*twentyeight
+       endif
+       if ((xm.ge.zero).and.(xm.le.nine)) then
+          y_bump=min(28.,28+0.006775070969851*xm**two-2.124527775800E-03*xm**three)
+       endif
+       if ((xm.ge.9.).and.(xm.le.fourteen)) then
+          y_bump=  2.507355893131E+01      +9.754803562315E-01*xm&
+               -1.016116352781E-01*xm**two +1.889794677828E-03*xm**three
+       endif
+       if ((xm.ge.14.).and.(xm.le.twenty)) then
+          y_bump=  2.579601052357E+01      +8.206693007457E-01*xm &
+               -9.055370274339E-02*xm**two +1.626510569859E-03*xm**three
+       endif
+       if ((xm.ge.20.).and.(xm.le.30._mytype)) then
+          y_bump=  4.046435022819E+01      -1.379581654948E+00*xm &
+               +1.945884504128E-02*xm**two -2.070318932190E-04*xm**three
+       endif
+       if ((xm.ge.30.).and.(xm.le.40._mytype)) then
+          y_bump=  1.792461334664E+01      +8.743920332081E-01*xm &
+               -5.567361123058E-02*xm**two +6.277731764683E-04*xm**three
+       endif
+       if ((xm.ge.40.).and.(xm.le.54._mytype)) then
+          y_bump=max(0.,5.639011190988E+01  -2.010520359035E+00*xm &
+               +1.644919857549E-02*xm**two  +2.674976141766E-05*xm**three)
+       endif
+       dune(i)=y_bump/twentyeight
+    enddo
+
+    do k=nzi,nzf
+       do j=nyi,nyf
+          ym=yp(j)
+          do i=nxi,nxf
+             if (ym-dune(i).le.zeromach) then
+                epsi(i,j,k)=remp
+             endif
+          enddo
+       enddo
+    enddo
 
     return
   end subroutine geomcomplex_hill
@@ -118,13 +118,13 @@ contains
     real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: ux,uy,uz,ep1
     real(mytype),dimension(xsize(1),xsize(2),xsize(3),numscalar) :: phi
 
-     real(mytype),dimension(ysize(1),ysize(2),ysize(3)) :: gx
-     
-     ux = ux*(one-ep1)
-     call transpose_x_to_y(ux,gx)
-     call hill_flrt(gx,(two/three)*(two/yly))
-     call transpose_y_to_x(gx,ux)
-     
+    real(mytype),dimension(ysize(1),ysize(2),ysize(3)) :: gx
+
+    ux = ux*(one-ep1)
+    call transpose_x_to_y(ux,gx)
+    call hill_flrt(gx,(two/three)*(two/yly))
+    call transpose_y_to_x(gx,ux)
+
     return
   end subroutine boundary_conditions_hill
 
@@ -151,80 +151,80 @@ contains
 
     integer, dimension (:), allocatable :: seed
 
-  if (iscalar==1) then
-
-     phi1(:,:,:,:) = zero !change as much as you want
-
-     !do not delete this
-     dphi1(:,:,:,1,:) = phi1(:,:,:,:)
-     do is = 2, ntime
-        dphi1(:,:,:,is,:) = dphi1(:,:,:,is - 1,:)
-     enddo
-
-  endif
-  ux1=zero;uy1=zero;uz1=zero
-  if (iin.ne.0) then
-     call system_clock(count=code)
-     if (iin.eq.2) code=0
-     call random_seed(size = ii)
-     call random_seed(put = code+63946*nrank*(/ (i - 1, i = 1, ii) /))
-
-     call random_number(ux1)
-     call random_number(uy1)
-     call random_number(uz1)
-  endif
-
-  !modulation of the random noise
-  do k=1,xsize(3)
-     do j=1,xsize(2)
-        do i=1,xsize(1)
-           ux1(i,j,k)=init_noise*(two*ux1(i,j,k)-one)
-           uy1(i,j,k)=init_noise*(two*uy1(i,j,k)-one)
-           uz1(i,j,k)=init_noise*(two*uz1(i,j,k)-one)
-        enddo
-     enddo
-  enddo
-
-  !initial velocity profile
-  do k=1,xsize(3)
-     do j=1,xsize(2)
-        if (istret.eq.0) y=real(j+xstart(2)-1-1,mytype)*dy
-        if (istret.ne.0) y=yp(j+xstart(2)-1)
-        if (y.lt.yly-two) then
-           do i=1,xsize(1)
-              ux1(i,j,k) = zero
-              uy1(i,j,k) = zero
-              uz1(i,j,k) = zero
-           enddo
-        else
-           do i=1,xsize(1)
-              ux1(i,j,k)=(ux1(i,j,k)+one)*(one-(y+one-yly)**two)
-              uy1(i,j,k)=(uy1(i,j,k))*(one-(y+one-yly)**two)
-              uz1(i,j,k)=(uz1(i,j,k))*(one-(y+one-yly)**two)
-           enddo
-        endif
-     enddo
-  enddo
-
-  !INIT FOR G AND U=MEAN FLOW + NOISE
-  do k=1,xsize(3)
-     do j=1,xsize(2)
-        do i=1,xsize(1)
-           ux1(i,j,k)=ux1(i,j,k)+bxx1(j,k)
-           uy1(i,j,k)=uy1(i,j,k)+bxy1(j,k)
-           uz1(i,j,k)=uz1(i,j,k)+bxz1(j,k)
-           dux1(i,j,k,1)=ux1(i,j,k)
-           duy1(i,j,k,1)=uy1(i,j,k)
-           duz1(i,j,k,1)=uz1(i,j,k)
-           dux1(i,j,k,2)=dux1(i,j,k,1)
-           duy1(i,j,k,2)=duy1(i,j,k,1)
-           duz1(i,j,k,2)=duz1(i,j,k,1)
-        enddo
-     enddo
-  enddo
+    if (iscalar==1) then
+
+       phi1(:,:,:,:) = zero !change as much as you want
+
+       !do not delete this
+       dphi1(:,:,:,1,:) = phi1(:,:,:,:)
+       do is = 2, ntime
+          dphi1(:,:,:,is,:) = dphi1(:,:,:,is - 1,:)
+       enddo
+
+    endif
+    ux1=zero;uy1=zero;uz1=zero
+    if (iin.ne.0) then
+       call system_clock(count=code)
+       if (iin.eq.2) code=0
+       call random_seed(size = ii)
+       call random_seed(put = code+63946*nrank*(/ (i - 1, i = 1, ii) /))
+
+       call random_number(ux1)
+       call random_number(uy1)
+       call random_number(uz1)
+    endif
+
+    !modulation of the random noise
+    do k=1,xsize(3)
+       do j=1,xsize(2)
+          do i=1,xsize(1)
+             ux1(i,j,k)=init_noise*(two*ux1(i,j,k)-one)
+             uy1(i,j,k)=init_noise*(two*uy1(i,j,k)-one)
+             uz1(i,j,k)=init_noise*(two*uz1(i,j,k)-one)
+          enddo
+       enddo
+    enddo
+
+    !initial velocity profile
+    do k=1,xsize(3)
+       do j=1,xsize(2)
+          if (istret.eq.0) y=real(j+xstart(2)-1-1,mytype)*dy
+          if (istret.ne.0) y=yp(j+xstart(2)-1)
+          if (y.lt.yly-two) then
+             do i=1,xsize(1)
+                ux1(i,j,k) = zero
+                uy1(i,j,k) = zero
+                uz1(i,j,k) = zero
+             enddo
+          else
+             do i=1,xsize(1)
+                ux1(i,j,k)=(ux1(i,j,k)+one)*(one-(y+one-yly)**two)
+                uy1(i,j,k)=(uy1(i,j,k))*(one-(y+one-yly)**two)
+                uz1(i,j,k)=(uz1(i,j,k))*(one-(y+one-yly)**two)
+             enddo
+          endif
+       enddo
+    enddo
+
+    !INIT FOR G AND U=MEAN FLOW + NOISE
+    do k=1,xsize(3)
+       do j=1,xsize(2)
+          do i=1,xsize(1)
+             ux1(i,j,k)=ux1(i,j,k)+bxx1(j,k)
+             uy1(i,j,k)=uy1(i,j,k)+bxy1(j,k)
+             uz1(i,j,k)=uz1(i,j,k)+bxz1(j,k)
+             dux1(i,j,k,1)=ux1(i,j,k)
+             duy1(i,j,k,1)=uy1(i,j,k)
+             duz1(i,j,k,1)=uz1(i,j,k)
+             dux1(i,j,k,2)=dux1(i,j,k,1)
+             duy1(i,j,k,2)=duy1(i,j,k,1)
+             duz1(i,j,k,2)=duz1(i,j,k,1)
+          enddo
+       enddo
+    enddo
 
 #ifdef DEBG
-  if (nrank .eq. 0) print *,'# init end ok'
+    if (nrank .eq. 0) print *,'# init end ok'
 #endif
 
     return
@@ -505,7 +505,7 @@ contains
 
   end subroutine write_probes
   !############################################################################
-    !********************************************************************
+  !********************************************************************
   !
   subroutine hill_flrt (ux,constant)
     !
diff --git a/src/BC-TGV.f90 b/src/BC-TGV.f90
index 184e9c78..d6e63109 100644
--- a/src/BC-TGV.f90
+++ b/src/BC-TGV.f90
@@ -379,7 +379,7 @@ contains
     enddo
     call MPI_ALLREDUCE(temp1,eek,1,real_type,MPI_SUM,MPI_COMM_WORLD,code)
     eek=eek/(nxc*nyc*nzc)
-    
+
     !x-derivatives
     call derx (ta1,ux1,di1,sx,ffx,fsx,fwx,xsize(1),xsize(2),xsize(3),0)
     call derx (tb1,uy1,di1,sx,ffxp,fsxp,fwxp,xsize(1),xsize(2),xsize(3),1)
diff --git a/src/BC-dbg-schemes.f90 b/src/BC-dbg-schemes.f90
index fb90ae16..11182075 100644
--- a/src/BC-dbg-schemes.f90
+++ b/src/BC-dbg-schemes.f90
@@ -188,7 +188,7 @@ contains
        enddo
        close(69)
     endif
-    
+
     ! FILTER
     call random_number(rand1)
     call filter(0.45_mytype)
@@ -215,7 +215,7 @@ contains
        enddo
        close(70)
     endif
-    
+
     do k=1,ysize(3)
        do j=1,ysize(2)
           y = real(j-1,mytype)*dy*4*pi
@@ -278,7 +278,7 @@ contains
        enddo
        close(69)
     endif
-    
+
     ! FILTER
     call filter(0.45_mytype)
     do k=1,ysize(3)
@@ -366,11 +366,11 @@ contains
        enddo
        close(69)
     endif
-    
+
     ! FILTER
     call filter(0.45_mytype)
     do k=1,zsize(3)
-          z = real(k-1,mytype)*dz*four*pi
+       z = real(k-1,mytype)*dz*four*pi
        do j=1,zsize(2)
           do i=1,zsize(1)
              ffz3(i,j,k)  = sin_prec(z)
@@ -424,7 +424,7 @@ contains
        enddo
        close(67)
     endif
-    
+
     ! FILTER
     call random_number(rand1)
     call filter(-0.45_mytype)
@@ -522,7 +522,7 @@ contains
     ! FILTER
     call filter(0.45_mytype)
     do k=1,zsize(3)
-          z = real(k-1,mytype)*dz*four*pi
+       z = real(k-1,mytype)*dz*four*pi
        do j=1,zsize(2)
           do i=1,zsize(1)
              ffz3(i,j,k)  = sin_prec(z)
@@ -689,7 +689,7 @@ contains
     ! FILTER
     call filter(0.45_mytype)
     do k=1,zsize(3)
-          z = real(k-1,mytype)*dz*four*pi
+       z = real(k-1,mytype)*dz*four*pi
        do j=1,zsize(2)
           do i=1,zsize(1)
              ffz3(i,j,k)  = sin_prec(z)
@@ -856,7 +856,7 @@ contains
     ! FILTER
     call filter(0.45_mytype)
     do k=1,zsize(3)
-          z = real(k-1,mytype)*dz*four*pi
+       z = real(k-1,mytype)*dz*four*pi
        do j=1,zsize(2)
           do i=1,zsize(1)
              ffz3(i,j,k)  = sin_prec(z)
@@ -973,7 +973,7 @@ contains
        enddo
        close(69)
     endif
-    
+
     ! FILTER
     call random_number(rand1)
     call filter(-0.45_mytype)
@@ -1145,7 +1145,7 @@ contains
     ! FILTER
     call filter(0.45_mytype)
     do k=1,zsize(3)
-          z = real(k-1,mytype)*dz*four*pi
+       z = real(k-1,mytype)*dz*four*pi
        do j=1,zsize(2)
           do i=1,zsize(1)
              ffz3(i,j,k)  = sin_prec(z)
diff --git a/src/case.f90 b/src/case.f90
index 88af7a1a..0e170d24 100644
--- a/src/case.f90
+++ b/src/case.f90
@@ -64,9 +64,9 @@ CONTAINS
     rho1(:,:,:,:) = one
 
     IF (itype.EQ.itype_user) THEN
-       
+
        CALL init_user (ux1, uy1, uz1, ep1, phi1, dux1, duy1, duz1, dphi1)
-       
+
     ELSEIF (itype.EQ.itype_lockexch) THEN
 
        IF (nrank.EQ.0) THEN
@@ -75,23 +75,23 @@ CONTAINS
        ENDIF
 
     ELSEIF (itype.EQ.itype_tgv) THEN
-       
+
        CALL init_tgv (ux1, uy1, uz1, ep1, phi1, dux1, duy1, duz1, dphi1)
-       
+
     ELSEIF (itype.EQ.itype_channel) THEN
-       
+
        CALL init_channel (ux1, uy1, uz1, ep1, phi1, dux1, duy1, duz1, dphi1)
-       
+
     ELSEIF (itype.EQ.itype_hill) THEN
 
        CALL  init_hill (ux1,uy1,uz1,ep1,phi1,dux1,duy1,duz1,dphi1)
-          
+
     ELSEIF (itype.EQ.itype_cyl) THEN
-       
+
        CALL init_cyl (ux1, uy1, uz1, ep1, phi1, dux1, duy1, duz1, dphi1)
-       
+
     ELSEIF (itype.EQ.itype_dbg) THEN
-       
+
        CALL init_dbg (ux1, uy1, uz1, ep1, phi1, dux1, duy1, duz1, dphi1)
 
     ELSEIF (itype.EQ.itype_mixlayer) THEN
@@ -115,7 +115,7 @@ CONTAINS
     IF (itype.EQ.itype_user) THEN
 
        CALL boundary_conditions_user (ux,uy,uz,phi,ep)
-       
+
     ELSEIF (itype.EQ.itype_lockexch) THEN
 
        IF (nrank.EQ.0) THEN
@@ -158,9 +158,9 @@ CONTAINS
     REAL(mytype),DIMENSION(xsize(1),xsize(2),xsize(3)) :: ep
 
     IF (itype.EQ.itype_user) THEN
-       
+
        CALL postprocessing_user (ux, uy, uz, phi, ep)
-       
+
     ELSEIF (itype.EQ.itype_lockexch) THEN
 
        IF (nrank.EQ.0) THEN
@@ -195,14 +195,14 @@ CONTAINS
     ENDIF
   END SUBROUTINE postprocessing
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !!
   !!  SUBROUTINE: momentum_forcing
   !!      AUTHOR: Paul Bartholomew
   !! DESCRIPTION: Calls case-specific forcing functions for the
   !!              momentum equations.
   !!
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   SUBROUTINE momentum_forcing(dux1, duy1, duz1, rho1, ux1, uy1, uz1)
 
     IMPLICIT NONE
diff --git a/src/derive.f90 b/src/derive.f90
index dd9a2d0e..27e25727 100644
--- a/src/derive.f90
+++ b/src/derive.f90
@@ -33,8 +33,8 @@
 !********************************************************************
 !
 subroutine derx_00(tx,ux,rx,sx,ffx,fsx,fwx,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivX
@@ -45,9 +45,9 @@ subroutine derx_00(tx,ux,rx,sx,ffx,fsx,fwx,nx,ny,nz,npaire)
   real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
   real(mytype), dimension(ny,nz):: sx
   real(mytype), dimension(nx):: ffx,fsx,fwx
-  
+
   if(iibm.eq.2) call lagpolx(ux)
-  
+
   do k=1,nz 
      do j=1,ny 
         tx(1,j,k)=afix*(ux(2,j,k)-ux(nx,j,k))& 
@@ -91,8 +91,8 @@ end subroutine derx_00
 !********************************************************************
 !
 subroutine derx_11(tx,ux,rx,sx,ffx,fsx,fwx,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivX
@@ -103,9 +103,9 @@ subroutine derx_11(tx,ux,rx,sx,ffx,fsx,fwx,nx,ny,nz,npaire)
   real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
   real(mytype), dimension(ny,nz):: sx
   real(mytype), dimension(nx):: ffx,fsx,fwx
-  
+
   if(iibm.eq.2) call lagpolx(ux)
-  
+
   if (npaire==1) then 
      do k=1,nz 
         do j=1,ny 
@@ -161,8 +161,8 @@ end subroutine derx_11
 !********************************************************************
 !
 subroutine derx_12(tx,ux,rx,sx,ffx,fsx,fwx,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivX
@@ -173,9 +173,9 @@ subroutine derx_12(tx,ux,rx,sx,ffx,fsx,fwx,nx,ny,nz,npaire)
   real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
   real(mytype), dimension(ny,nz):: sx
   real(mytype), dimension(nx):: ffx,fsx,fwx
-  
+
   if(iibm.eq.2) call lagpolx(ux)
-    
+
   if (npaire==1) then 
      do k=1,nz 
         do j=1,ny 
@@ -228,8 +228,8 @@ end subroutine derx_12
 !********************************************************************
 !
 subroutine derx_21(tx,ux,rx,sx,ffx,fsx,fwx,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivX
@@ -295,8 +295,8 @@ end subroutine derx_21
 !********************************************************************
 !
 subroutine derx_22(tx,ux,rx,sx,ffx,fsx,fwx,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivX
@@ -336,8 +336,8 @@ end subroutine derx_22
 !********************************************************************
 !
 subroutine dery_00(ty,uy,ry,sy,ffy,fsy,fwy,ppy,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivY
@@ -431,8 +431,8 @@ end subroutine dery_00
 !********************************************************************
 !
 subroutine dery_11(ty,uy,ry,sy,ffy,fsy,fwy,ppy,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivY
@@ -550,8 +550,8 @@ end subroutine dery_11
 !********************************************************************
 !
 subroutine dery_12(ty,uy,ry,sy,ffy,fsy,fwy,ppy,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivY
@@ -666,8 +666,8 @@ end subroutine dery_12
 !********************************************************************
 !
 subroutine dery_21(ty,uy,ry,sy,ffy,fsy,fwy,ppy,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivY
@@ -782,8 +782,8 @@ end subroutine dery_21
 !********************************************************************
 !
 subroutine dery_22(ty,uy,ry,sy,ffy,fsy,fwy,ppy,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param
   USE derivY
@@ -854,8 +854,8 @@ end subroutine dery_22
 !********************************************************************
 !
 subroutine derz_00(tz,uz,rz,sz,ffz,fsz,fwz,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivZ 
@@ -939,8 +939,8 @@ end subroutine derz_00
 !********************************************************************
 !
 subroutine derz_11(tz,uz,rz,sz,ffz,fsz,fwz,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivZ 
@@ -1267,8 +1267,8 @@ end subroutine derz_21
 !********************************************************************
 !
 subroutine derz_22(tz,uz,rz,sz,ffz,fsz,fwz,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivZ 
@@ -1330,8 +1330,8 @@ end subroutine derz_22
 !********************************************************************
 !
 subroutine derxx_00(tx,ux,rx,sx,sfx,ssx,swx,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivX 
@@ -1454,8 +1454,8 @@ end subroutine derxx_00
 !********************************************************************
 !
 subroutine derxx_11(tx,ux,rx,sx,sfx,ssx,swx,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivX 
@@ -1942,8 +1942,8 @@ end subroutine derxx_21
 !********************************************************************
 !
 subroutine derxx_22(tx,ux,rx,sx,sfx,ssx,swx,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivX 
@@ -2013,8 +2013,8 @@ end subroutine derxx_22
 !********************************************************************
 !
 subroutine deryy_00(ty,uy,ry,sy,sfy,ssy,swy,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivY 
@@ -2124,7 +2124,7 @@ subroutine deryy_00(ty,uy,ry,sy,sfy,ssy,swy,nx,ny,nz,npaire)
      enddo
   enddo
 #ifdef IMPLICIT
-     return
+  return
 #endif
   do k=1,nz
      do j=2,ny
@@ -2167,8 +2167,8 @@ end subroutine deryy_00
 !********************************************************************
 !
 subroutine deryy_11(ty,uy,ry,sy,sfy,ssy,swy,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivY 
@@ -2593,8 +2593,8 @@ end subroutine deryy_12
 !********************************************************************
 !
 subroutine deryy_21(ty,uy,ry,sy,sfy,ssy,swy,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivY 
@@ -2792,8 +2792,8 @@ end subroutine deryy_21
 !********************************************************************
 !
 subroutine deryy_22(ty,uy,ry,sy,sfy,ssy,swy,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivY 
@@ -2885,8 +2885,8 @@ end subroutine deryy_22
 !********************************************************************
 !
 subroutine derzz_00(tz,uz,rz,sz,sfz,ssz,swz,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivZ 
@@ -3036,8 +3036,8 @@ end subroutine derzz_00
 !********************************************************************
 !
 subroutine derzz_11(tz,uz,rz,sz,sfz,ssz,swz,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivZ 
@@ -3644,8 +3644,8 @@ end subroutine derzz_21
 !********************************************************************
 !
 subroutine derzz_22(tz,uz,rz,sz,sfz,ssz,swz,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
 
   USE param 
   USE derivZ 
@@ -3739,1803 +3739,1803 @@ end subroutine derzz_22
 !********************************************************************
 !
 subroutine derxvp(tx,ux,rx,sx,cfx6,csx6,cwx6,nx,nxm,ny,nz,npaire)  
-!
-!********************************************************************
+  !
+  !********************************************************************
+
+  USE param 
+  USE derivX 
+
+  implicit none
+
+  integer :: nx,nxm,ny,nz,npaire
+  real(mytype), dimension(nxm,ny,nz) :: tx
+  real(mytype), dimension(nx,ny,nz) :: ux,rx
+  real(mytype), dimension(ny,nz) :: sx
+  real(mytype), dimension(nxm) :: cfx6,csx6,cwx6
+  integer :: i,j,k,nyz
+
+
+
+  nyz=ny*nz
+
+  if (nclx) then
+     do k=1,nz
+        do j=1,ny
+           tx(1,j,k)=acix6*(ux(2,j,k)-ux(1  ,j,k))&
+                +bcix6*(ux(3,j,k)-ux(nx,j,k))
+           rx(1,j,k)=-1.
+           tx(2,j,k)=acix6*(ux(3,j,k)-ux(2 ,j,k))&
+                +bcix6*(ux(4,j,k)-ux(1,j,k))
+           rx(2,j,k)=0.
+           do i=3,nx-2
+              tx(i,j,k)=acix6*(ux(i+1,j,k)-ux(i,j,k))&
+                   +bcix6*(ux(i+2,j,k)-ux(i-1,j,k))
+              rx(i,j,k)=0.
+           enddo
+           tx(nx-1,j,k)=acix6*(ux(nx,j,k)-ux(nx-1,j,k))&
+                +bcix6*(ux(1 ,j,k)-ux(nx-2,j,k))
+           rx(nx-1,j,k)=0.
+           tx(nx  ,j,k)=acix6*(ux(1,j,k)-ux(nx,j,k))&
+                +bcix6*(ux(2,j,k)-ux(nx-1,j,k))
+           rx(nx  ,j,k)=alcaix6
+           do i=2,nx
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csx6(i)
+              rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*csx6(i)
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*cwx6(nx)
+           rx(nx,j,k)=rx(nx,j,k)*cwx6(nx)
+           do i=nx-1,1,-1
+              tx(i,j,k)=(tx(i,j,k)-cfx6(i)*tx(i+1,j,k))*cwx6(i)
+              rx(i,j,k)=(rx(i,j,k)-cfx6(i)*rx(i+1,j,k))*cwx6(i)
+           enddo
+           sx(j,k)=(tx(1,j,k)-alcaix6*tx(nx,j,k))/&
+                (1.+rx(1,j,k)-alcaix6*rx(nx,j,k))
+           do i=1,nx
+              tx(i,j,k)=tx(i,j,k)-sx(j,k)*rx(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do k=1,nz
+           do j=1,ny
+              tx(1,j,k)=acix6*(ux(2,j,k)-ux(1,j,k))&
+                   +bcix6*(ux(3,j,k)-ux(2,j,k))             
+              tx(2,j,k)=acix6*(ux(3,j,k)-ux(2,j,k))&
+                   +bcix6*(ux(4,j,k)-ux(1,j,k)) 
+              do i=3,nxm-2
+                 tx(i,j,k)=acix6*(ux(i+1,j,k)-ux(i,j,k))&
+                      +bcix6*(ux(i+2,j,k)-ux(i-1,j,k))              
+              enddo
+              tx(nxm-1,j,k)=acix6*(ux(nxm,j,k)-ux(nxm-1,j,k))&
+                   +bcix6*(ux(nx,j,k)-ux(nxm-2,j,k))         
+              tx(nxm,j,k)=acix6*(ux(nx,j,k)-ux(nxm,j,k))&
+                   +bcix6*(ux(nxm,j,k)-ux(nxm-1,j,k))
+              do i=2,nxm
+                 tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csx6(i)
+              enddo
+              tx(nxm,j,k)=tx(nxm,j,k)*cwx6(nxm)
+              do i=nxm-1,1,-1
+                 tx(i,j,k)=(tx(i,j,k)-cfx6(i)*tx(i+1,j,k))*cwx6(i)
+              enddo
+           enddo
+        enddo
+     endif
+     if (npaire==0) then
+        do k=1,nz
+           do j=1,ny
+              tx(1,j,k)=acix6*(ux(2,j,k)-ux(1,j,k))&
+                   +bcix6*(ux(3,j,k)-2.*ux(1,j,k)+ux(2,j,k)) 
+              tx(2,j,k)=acix6*(ux(3,j,k)-ux(2,j,k))&
+                   +bcix6*(ux(4,j,k)-ux(1,j,k))
+              do i=3,nxm-2
+                 tx(i,j,k)=acix6*(ux(i+1,j,k)-ux(i,j,k))&
+                      +bcix6*(ux(i+2,j,k)-ux(i-1,j,k))
+              enddo
+              tx(nxm-1,j,k)=acix6*(ux(nxm,j,k)-ux(nxm-1,j,k))&
+                   +bcix6*(ux(nx,j,k)-ux(nxm-2,j,k)) 
+              tx(nxm,j,k)=acix6*(ux(nx,j,k)-ux(nxm,j,k))&
+                   +bcix6*(2.*ux(nx,j,k)-ux(nxm,j,k)-ux(nxm-1,j,k)) 
+              do i=2,nxm
+                 tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csx6(i)
+              enddo
+              tx(nxm,j,k)=tx(nxm,j,k)*cwx6(nxm)
+              do i=nxm-1,1,-1
+                 tx(i,j,k)=(tx(i,j,k)-cfx6(i)*tx(i+1,j,k))*cwx6(i)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
 
-USE param 
-USE derivX 
-
-implicit none
-
-integer :: nx,nxm,ny,nz,npaire
-real(mytype), dimension(nxm,ny,nz) :: tx
-real(mytype), dimension(nx,ny,nz) :: ux,rx
-real(mytype), dimension(ny,nz) :: sx
-real(mytype), dimension(nxm) :: cfx6,csx6,cwx6
-integer :: i,j,k,nyz
-
-
-
-nyz=ny*nz
-
-if (nclx) then
-   do k=1,nz
-   do j=1,ny
-      tx(1,j,k)=acix6*(ux(2,j,k)-ux(1  ,j,k))&
-           +bcix6*(ux(3,j,k)-ux(nx,j,k))
-      rx(1,j,k)=-1.
-      tx(2,j,k)=acix6*(ux(3,j,k)-ux(2 ,j,k))&
-           +bcix6*(ux(4,j,k)-ux(1,j,k))
-      rx(2,j,k)=0.
-      do i=3,nx-2
-         tx(i,j,k)=acix6*(ux(i+1,j,k)-ux(i,j,k))&
-              +bcix6*(ux(i+2,j,k)-ux(i-1,j,k))
-         rx(i,j,k)=0.
-      enddo
-      tx(nx-1,j,k)=acix6*(ux(nx,j,k)-ux(nx-1,j,k))&
-           +bcix6*(ux(1 ,j,k)-ux(nx-2,j,k))
-      rx(nx-1,j,k)=0.
-      tx(nx  ,j,k)=acix6*(ux(1,j,k)-ux(nx,j,k))&
-           +bcix6*(ux(2,j,k)-ux(nx-1,j,k))
-      rx(nx  ,j,k)=alcaix6
-      do i=2,nx
-         tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csx6(i)
-         rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*csx6(i)
-      enddo
-      tx(nx,j,k)=tx(nx,j,k)*cwx6(nx)
-      rx(nx,j,k)=rx(nx,j,k)*cwx6(nx)
-      do i=nx-1,1,-1
-         tx(i,j,k)=(tx(i,j,k)-cfx6(i)*tx(i+1,j,k))*cwx6(i)
-         rx(i,j,k)=(rx(i,j,k)-cfx6(i)*rx(i+1,j,k))*cwx6(i)
-      enddo
-      sx(j,k)=(tx(1,j,k)-alcaix6*tx(nx,j,k))/&
-           (1.+rx(1,j,k)-alcaix6*rx(nx,j,k))
-      do i=1,nx
-         tx(i,j,k)=tx(i,j,k)-sx(j,k)*rx(i,j,k)
-      enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do k=1,nz
-      do j=1,ny
-         tx(1,j,k)=acix6*(ux(2,j,k)-ux(1,j,k))&
-              +bcix6*(ux(3,j,k)-ux(2,j,k))             
-         tx(2,j,k)=acix6*(ux(3,j,k)-ux(2,j,k))&
-              +bcix6*(ux(4,j,k)-ux(1,j,k)) 
-         do i=3,nxm-2
-            tx(i,j,k)=acix6*(ux(i+1,j,k)-ux(i,j,k))&
-                 +bcix6*(ux(i+2,j,k)-ux(i-1,j,k))              
-         enddo
-         tx(nxm-1,j,k)=acix6*(ux(nxm,j,k)-ux(nxm-1,j,k))&
-              +bcix6*(ux(nx,j,k)-ux(nxm-2,j,k))         
-         tx(nxm,j,k)=acix6*(ux(nx,j,k)-ux(nxm,j,k))&
-              +bcix6*(ux(nxm,j,k)-ux(nxm-1,j,k))
-         do i=2,nxm
-            tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csx6(i)
-         enddo
-         tx(nxm,j,k)=tx(nxm,j,k)*cwx6(nxm)
-         do i=nxm-1,1,-1
-            tx(i,j,k)=(tx(i,j,k)-cfx6(i)*tx(i+1,j,k))*cwx6(i)
-         enddo
-      enddo
-      enddo
-   endif
-   if (npaire==0) then
-      do k=1,nz
-      do j=1,ny
-         tx(1,j,k)=acix6*(ux(2,j,k)-ux(1,j,k))&
-              +bcix6*(ux(3,j,k)-2.*ux(1,j,k)+ux(2,j,k)) 
-         tx(2,j,k)=acix6*(ux(3,j,k)-ux(2,j,k))&
-              +bcix6*(ux(4,j,k)-ux(1,j,k))
-         do i=3,nxm-2
-            tx(i,j,k)=acix6*(ux(i+1,j,k)-ux(i,j,k))&
-                 +bcix6*(ux(i+2,j,k)-ux(i-1,j,k))
-         enddo
-         tx(nxm-1,j,k)=acix6*(ux(nxm,j,k)-ux(nxm-1,j,k))&
-              +bcix6*(ux(nx,j,k)-ux(nxm-2,j,k)) 
-         tx(nxm,j,k)=acix6*(ux(nx,j,k)-ux(nxm,j,k))&
-              +bcix6*(2.*ux(nx,j,k)-ux(nxm,j,k)-ux(nxm-1,j,k)) 
-         do i=2,nxm
-            tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csx6(i)
-         enddo
-         tx(nxm,j,k)=tx(nxm,j,k)*cwx6(nxm)
-         do i=nxm-1,1,-1
-            tx(i,j,k)=(tx(i,j,k)-cfx6(i)*tx(i+1,j,k))*cwx6(i)
-         enddo
-      enddo
-      enddo
-   endif
-endif
-
-return     
+  return     
 end subroutine derxvp
 
 !********************************************************************
 !
 subroutine interxvp(tx,ux,rx,sx,cifx6,cisx6,ciwx6,nx,nxm,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
+
+  USE param 
+  USE derivX 
+
+  implicit none
+
+  integer :: nx,nxm,ny,nz,npaire,i,j,nyz,k
+  real(mytype), dimension(nxm,ny,nz) :: tx
+  real(mytype), dimension(nx,ny,nz) :: ux,rx
+  real(mytype), dimension(ny,nz) :: sx
+  real(mytype), dimension(nxm) :: cifx6,cisx6,ciwx6
+
+  nyz=ny*nz
+
+  if (nclx) then
+     do k=1,nz
+        do j=1,ny
+           tx(1,j,k)=aicix6*(ux(2,j,k)+ux(1  ,j,k))&
+                +bicix6*(ux(3,j,k)+ux(nx,j,k))&
+                +cicix6*(ux(4,j,k)+ux(nx-1,j,k))&
+                +dicix6*(ux(5,j,k)+ux(nx-2,j,k))
+           rx(1,j,k)=-1.
+           tx(2,j,k)=aicix6*(ux(3,j,k)+ux(2 ,j,k))&
+                +bicix6*(ux(4,j,k)+ux(1,j,k))&
+                +cicix6*(ux(5,j,k)+ux(nx,j,k))&
+                +dicix6*(ux(6,j,k)+ux(nx-1,j,k))
+           rx(2,j,k)=0.
+           tx(3,j,k)=aicix6*(ux(4,j,k)+ux(3 ,j,k))&
+                +bicix6*(ux(5,j,k)+ux(2,j,k))&
+                +cicix6*(ux(6,j,k)+ux(1,j,k))&
+                +dicix6*(ux(7,j,k)+ux(nx,j,k))
+           rx(3,j,k)=0.
+           do i=4,nx-4
+              tx(i,j,k)=aicix6*(ux(i+1,j,k)+ux(i,j,k))&
+                   +bicix6*(ux(i+2,j,k)+ux(i-1,j,k))&
+                   +cicix6*(ux(i+3,j,k)+ux(i-2,j,k))&
+                   +dicix6*(ux(i+4,j,k)+ux(i-3,j,k))
+
+              rx(i,j,k)=0.
+           enddo
+           tx(nx-3,j,k)=aicix6*(ux(nx-2,j,k)+ux(nx-3,j,k))&
+                +bicix6*(ux(nx-1,j,k)+ux(nx-4,j,k))&
+                +cicix6*(ux(nx,j,k)+ux(nx-5,j,k))&
+                +dicix6*(ux(1,j,k)+ux(nx-6,j,k))
+           rx(nx-3,j,k)=0.
+           tx(nx-2,j,k)=aicix6*(ux(nx-1,j,k)+ux(nx-2,j,k))&
+                +bicix6*(ux(nx ,j,k)+ux(nx-3,j,k))&
+                +cicix6*(ux(1,j,k)+ux(nx-4,j,k))&
+                +dicix6*(ux(2,j,k)+ux(nx-5,j,k))
+           rx(nx-2,j,k)=0.
+           tx(nx-1,j,k)=aicix6*(ux(nx,j,k)+ux(nx-1,j,k))&
+                +bicix6*(ux(1 ,j,k)+ux(nx-2,j,k))&
+                +cicix6*(ux(2,j,k)+ux(nx-3,j,k))&
+                +dicix6*(ux(3,j,k)+ux(nx-4,j,k))
+           rx(nx-1,j,k)=0.
+           tx(nx  ,j,k)=aicix6*(ux(1,j,k)+ux(nx,j,k))&
+                +bicix6*(ux(2,j,k)+ux(nx-1,j,k))&
+                +cicix6*(ux(3,j,k)+ux(nx-2,j,k))&
+                +dicix6*(ux(4,j,k)+ux(nx-3,j,k))
+           rx(nx  ,j,k)=ailcaix6
+           do i=2,nx
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*cisx6(i)
+              rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*cisx6(i)
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*ciwx6(nx)
+           rx(nx,j,k)=rx(nx,j,k)*ciwx6(nx)
+           do i=nx-1,1,-1
+              tx(i,j,k)=(tx(i,j,k)-cifx6(i)*tx(i+1,j,k))*ciwx6(i)
+              rx(i,j,k)=(rx(i,j,k)-cifx6(i)*rx(i+1,j,k))*ciwx6(i)
+           enddo
+           sx(j,k)=(tx(1,j,k)-ailcaix6*tx(nx,j,k))/&
+                (1.+rx(1,j,k)-ailcaix6*rx(nx,j,k))
+           do i=1,nx
+              tx(i,j,k)=tx(i,j,k)-sx(j,k)*rx(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do k=1,nz
+           do j=1,ny
+              tx(1,j,k)=aicix6*(ux(2,j,k)+ux(1,j,k))&
+                   +bicix6*(ux(3,j,k)+ux(2,j,k))&
+                   +cicix6*(ux(4,j,k)+ux(3,j,k))&
+                   +dicix6*(ux(5,j,k)+ux(4,j,k))
+              tx(2,j,k)=aicix6*(ux(3,j,k)+ux(2,j,k))&
+                   +bicix6*(ux(4,j,k)+ux(1,j,k))&
+                   +cicix6*(ux(5,j,k)+ux(2,j,k))&
+                   +dicix6*(ux(6,j,k)+ux(3,j,k))
+              tx(3,j,k)=aicix6*(ux(4,j,k)+ux(3,j,k))&
+                   +bicix6*(ux(5,j,k)+ux(2,j,k))&
+                   +cicix6*(ux(6,j,k)+ux(1,j,k))&
+                   +dicix6*(ux(7,j,k)+ux(2,j,k))
+              do i=4,nxm-3
+                 tx(i,j,k)=aicix6*(ux(i+1,j,k)+ux(i,j,k))&
+                      +bicix6*(ux(i+2,j,k)+ux(i-1,j,k))&
+                      +cicix6*(ux(i+3,j,k)+ux(i-2,j,k))&
+                      +dicix6*(ux(i+4,j,k)+ux(i-3,j,k))              
+              enddo
+              tx(nxm-2,j,k)=aicix6*(ux(nxm-1,j,k)+ux(nxm-2,j,k))&
+                   +bicix6*(ux(nxm,j,k)+ux(nxm-3,j,k))&
+                   +cicix6*(ux(nx,j,k)+ux(nxm-4,j,k))&
+                   +dicix6*(ux(nxm,j,k)+ux(nxm-5,j,k))
+              tx(nxm-1,j,k)=aicix6*(ux(nxm,j,k)+ux(nxm-1,j,k))&
+                   +bicix6*(ux(nx,j,k)+ux(nxm-2,j,k))&
+                   +cicix6*(ux(nxm,j,k)+ux(nxm-3,j,k))&
+                   +dicix6*(ux(nxm-1,j,k)+ux(nxm-4,j,k))
+              tx(nxm,j,k)=aicix6*(ux(nx,j,k)+ux(nxm,j,k))&
+                   +bicix6*(ux(nxm,j,k)+ux(nxm-1,j,k))&
+                   +cicix6*(ux(nxm-1,j,k)+ux(nxm-2,j,k))&
+                   +dicix6*(ux(nxm-2,j,k)+ux(nxm-3,j,k))
+              do i=2,nxm
+                 tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*cisx6(i)
+              enddo
+              tx(nxm,j,k)=tx(nxm,j,k)*ciwx6(nxm)
+              do i=nxm-1,1,-1
+                 tx(i,j,k)=(tx(i,j,k)-cifx6(i)*tx(i+1,j,k))*ciwx6(i)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
 
-USE param 
-USE derivX 
-
-implicit none
-
-integer :: nx,nxm,ny,nz,npaire,i,j,nyz,k
-real(mytype), dimension(nxm,ny,nz) :: tx
-real(mytype), dimension(nx,ny,nz) :: ux,rx
-real(mytype), dimension(ny,nz) :: sx
-real(mytype), dimension(nxm) :: cifx6,cisx6,ciwx6
-
-nyz=ny*nz
-
-if (nclx) then
-   do k=1,nz
-   do j=1,ny
-      tx(1,j,k)=aicix6*(ux(2,j,k)+ux(1  ,j,k))&
-           +bicix6*(ux(3,j,k)+ux(nx,j,k))&
-           +cicix6*(ux(4,j,k)+ux(nx-1,j,k))&
-           +dicix6*(ux(5,j,k)+ux(nx-2,j,k))
-      rx(1,j,k)=-1.
-      tx(2,j,k)=aicix6*(ux(3,j,k)+ux(2 ,j,k))&
-           +bicix6*(ux(4,j,k)+ux(1,j,k))&
-           +cicix6*(ux(5,j,k)+ux(nx,j,k))&
-           +dicix6*(ux(6,j,k)+ux(nx-1,j,k))
-      rx(2,j,k)=0.
-      tx(3,j,k)=aicix6*(ux(4,j,k)+ux(3 ,j,k))&
-           +bicix6*(ux(5,j,k)+ux(2,j,k))&
-           +cicix6*(ux(6,j,k)+ux(1,j,k))&
-           +dicix6*(ux(7,j,k)+ux(nx,j,k))
-      rx(3,j,k)=0.
-      do i=4,nx-4
-         tx(i,j,k)=aicix6*(ux(i+1,j,k)+ux(i,j,k))&
-              +bicix6*(ux(i+2,j,k)+ux(i-1,j,k))&
-              +cicix6*(ux(i+3,j,k)+ux(i-2,j,k))&
-              +dicix6*(ux(i+4,j,k)+ux(i-3,j,k))
-
-         rx(i,j,k)=0.
-      enddo
-      tx(nx-3,j,k)=aicix6*(ux(nx-2,j,k)+ux(nx-3,j,k))&
-           +bicix6*(ux(nx-1,j,k)+ux(nx-4,j,k))&
-           +cicix6*(ux(nx,j,k)+ux(nx-5,j,k))&
-           +dicix6*(ux(1,j,k)+ux(nx-6,j,k))
-      rx(nx-3,j,k)=0.
-      tx(nx-2,j,k)=aicix6*(ux(nx-1,j,k)+ux(nx-2,j,k))&
-           +bicix6*(ux(nx ,j,k)+ux(nx-3,j,k))&
-           +cicix6*(ux(1,j,k)+ux(nx-4,j,k))&
-           +dicix6*(ux(2,j,k)+ux(nx-5,j,k))
-      rx(nx-2,j,k)=0.
-      tx(nx-1,j,k)=aicix6*(ux(nx,j,k)+ux(nx-1,j,k))&
-           +bicix6*(ux(1 ,j,k)+ux(nx-2,j,k))&
-           +cicix6*(ux(2,j,k)+ux(nx-3,j,k))&
-           +dicix6*(ux(3,j,k)+ux(nx-4,j,k))
-      rx(nx-1,j,k)=0.
-      tx(nx  ,j,k)=aicix6*(ux(1,j,k)+ux(nx,j,k))&
-           +bicix6*(ux(2,j,k)+ux(nx-1,j,k))&
-           +cicix6*(ux(3,j,k)+ux(nx-2,j,k))&
-           +dicix6*(ux(4,j,k)+ux(nx-3,j,k))
-      rx(nx  ,j,k)=ailcaix6
-      do i=2,nx
-         tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*cisx6(i)
-         rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*cisx6(i)
-      enddo
-      tx(nx,j,k)=tx(nx,j,k)*ciwx6(nx)
-      rx(nx,j,k)=rx(nx,j,k)*ciwx6(nx)
-      do i=nx-1,1,-1
-         tx(i,j,k)=(tx(i,j,k)-cifx6(i)*tx(i+1,j,k))*ciwx6(i)
-         rx(i,j,k)=(rx(i,j,k)-cifx6(i)*rx(i+1,j,k))*ciwx6(i)
-      enddo
-      sx(j,k)=(tx(1,j,k)-ailcaix6*tx(nx,j,k))/&
-           (1.+rx(1,j,k)-ailcaix6*rx(nx,j,k))
-      do i=1,nx
-         tx(i,j,k)=tx(i,j,k)-sx(j,k)*rx(i,j,k)
-      enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do k=1,nz
-      do j=1,ny
-         tx(1,j,k)=aicix6*(ux(2,j,k)+ux(1,j,k))&
-              +bicix6*(ux(3,j,k)+ux(2,j,k))&
-              +cicix6*(ux(4,j,k)+ux(3,j,k))&
-              +dicix6*(ux(5,j,k)+ux(4,j,k))
-         tx(2,j,k)=aicix6*(ux(3,j,k)+ux(2,j,k))&
-              +bicix6*(ux(4,j,k)+ux(1,j,k))&
-              +cicix6*(ux(5,j,k)+ux(2,j,k))&
-              +dicix6*(ux(6,j,k)+ux(3,j,k))
-         tx(3,j,k)=aicix6*(ux(4,j,k)+ux(3,j,k))&
-              +bicix6*(ux(5,j,k)+ux(2,j,k))&
-              +cicix6*(ux(6,j,k)+ux(1,j,k))&
-              +dicix6*(ux(7,j,k)+ux(2,j,k))
-         do i=4,nxm-3
-            tx(i,j,k)=aicix6*(ux(i+1,j,k)+ux(i,j,k))&
-                 +bicix6*(ux(i+2,j,k)+ux(i-1,j,k))&
-                 +cicix6*(ux(i+3,j,k)+ux(i-2,j,k))&
-                 +dicix6*(ux(i+4,j,k)+ux(i-3,j,k))              
-         enddo
-         tx(nxm-2,j,k)=aicix6*(ux(nxm-1,j,k)+ux(nxm-2,j,k))&
-              +bicix6*(ux(nxm,j,k)+ux(nxm-3,j,k))&
-              +cicix6*(ux(nx,j,k)+ux(nxm-4,j,k))&
-              +dicix6*(ux(nxm,j,k)+ux(nxm-5,j,k))
-         tx(nxm-1,j,k)=aicix6*(ux(nxm,j,k)+ux(nxm-1,j,k))&
-              +bicix6*(ux(nx,j,k)+ux(nxm-2,j,k))&
-              +cicix6*(ux(nxm,j,k)+ux(nxm-3,j,k))&
-              +dicix6*(ux(nxm-1,j,k)+ux(nxm-4,j,k))
-         tx(nxm,j,k)=aicix6*(ux(nx,j,k)+ux(nxm,j,k))&
-              +bicix6*(ux(nxm,j,k)+ux(nxm-1,j,k))&
-              +cicix6*(ux(nxm-1,j,k)+ux(nxm-2,j,k))&
-              +dicix6*(ux(nxm-2,j,k)+ux(nxm-3,j,k))
-         do i=2,nxm
-            tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*cisx6(i)
-         enddo
-         tx(nxm,j,k)=tx(nxm,j,k)*ciwx6(nxm)
-         do i=nxm-1,1,-1
-            tx(i,j,k)=(tx(i,j,k)-cifx6(i)*tx(i+1,j,k))*ciwx6(i)
-         enddo
-      enddo
-      enddo
-   endif
-endif
-
-return
-end subroutine interxvp 
+  return
+end subroutine interxvp
 
 !********************************************************************
 !
 subroutine derxpv(tx,ux,rx,sx,cfi6,csi6,cwi6,cfx6,csx6,cwx6,nxm,nx,ny,nz,npaire) 
-!
-!********************************************************************
+  !
+  !********************************************************************
+
+  USE param 
+  USE derivX 
+
+  implicit none
+
+  integer :: nx,nxm,ny,nz,npaire
+  real(mytype), dimension(nx,ny,nz) :: tx
+  real(mytype), dimension(nxm,ny,nz) :: ux,rx
+  real(mytype), dimension(ny,nz) :: sx
+  real(mytype), dimension(nx) :: cfi6,csi6,cwi6
+  real(mytype), dimension(nx) :: cfx6,csx6,cwx6
+  integer :: i,j,k
+
+  if (nclx) then
+     do k=1,nz
+        do j=1,ny
+           tx(1,j,k)=acix6*(ux(1,j,k)-ux(nx  ,j,k))&
+                +bcix6*(ux(2,j,k)-ux(nx-1,j,k))
+           rx(1,j,k)=-1.
+           tx(2,j,k)=acix6*(ux(2,j,k)-ux(1 ,j,k))&
+                +bcix6*(ux(3,j,k)-ux(nx,j,k))
+           rx(2,j,k)=0.
+           do i=3,nx-2
+              tx(i,j,k)=acix6*(ux(i,j,k)-ux(i-1,j,k))&
+                   +bcix6*(ux(i+1,j,k)-ux(i-2,j,k))
+              rx(i,j,k)=0.
+           enddo
+           tx(nx-1,j,k)=acix6*(ux(nx-1,j,k)-ux(nx-2,j,k))&
+                +bcix6*(ux(nx ,j,k)-ux(nx-3,j,k))
+           rx(nx-1,j,k)=0.
+           tx(nx  ,j,k)=acix6*(ux(nx,j,k)-ux(nx-1,j,k))&
+                +bcix6*(ux(1,j,k)-ux(nx-2,j,k))
+           rx(nx  ,j,k)=alcaix6
+           do i=2,nx
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csx6(i)
+              rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*csx6(i)
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*cwx6(nx)
+           rx(nx,j,k)=rx(nx,j,k)*cwx6(nx)
+           do i=nx-1,1,-1
+              tx(i,j,k)=(tx(i,j,k)-cfx6(i)*tx(i+1,j,k))*cwx6(i)
+              rx(i,j,k)=(rx(i,j,k)-cfx6(i)*rx(i+1,j,k))*cwx6(i)
+           enddo
+           sx(j,k)=(tx(1,j,k)-alcaix6*tx(nx,j,k))/&
+                (1.+rx(1,j,k)-alcaix6*rx(nx,j,k))
+           do i=1,nx
+              tx(i,j,k)=tx(i,j,k)-sx(j,k)*rx(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do k=1,nz
+           do j=1,ny
+              tx(1,j,k)=0.  
+              tx(2,j,k)=acix6*(ux(2,j,k)-ux(1,j,k))&
+                   +bcix6*(ux(3,j,k)-ux(1,j,k)) 
+              do i=3,nx-2
+                 tx(i,j,k)=acix6*(ux(i,j,k)-ux(i-1,j,k))&
+                      +bcix6*(ux(i+1,j,k)-ux(i-2,j,k))
+              enddo
+              tx(nx-1,j,k)=acix6*(ux(nx-1,j,k)-ux(nx-2,j,k))&
+                   +bcix6*(ux(nx-1,j,k)-ux(nx-3,j,k))
+              tx(nx,j,k)=0.
+              do i=2,nx
+                 tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csi6(i)
+              enddo
+              tx(nx,j,k)=tx(nx,j,k)*cwi6(nx)
+              do i=nx-1,1,-1
+                 tx(i,j,k)=(tx(i,j,k)-cfi6(i)*tx(i+1,j,k))*cwi6(i)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
 
-USE param 
-USE derivX 
-
-implicit none
-
-integer :: nx,nxm,ny,nz,npaire
-real(mytype), dimension(nx,ny,nz) :: tx
-real(mytype), dimension(nxm,ny,nz) :: ux,rx
-real(mytype), dimension(ny,nz) :: sx
-real(mytype), dimension(nx) :: cfi6,csi6,cwi6
-real(mytype), dimension(nx) :: cfx6,csx6,cwx6
-integer :: i,j,k
-
-if (nclx) then
-   do k=1,nz
-   do j=1,ny
-      tx(1,j,k)=acix6*(ux(1,j,k)-ux(nx  ,j,k))&
-           +bcix6*(ux(2,j,k)-ux(nx-1,j,k))
-      rx(1,j,k)=-1.
-      tx(2,j,k)=acix6*(ux(2,j,k)-ux(1 ,j,k))&
-           +bcix6*(ux(3,j,k)-ux(nx,j,k))
-      rx(2,j,k)=0.
-      do i=3,nx-2
-         tx(i,j,k)=acix6*(ux(i,j,k)-ux(i-1,j,k))&
-              +bcix6*(ux(i+1,j,k)-ux(i-2,j,k))
-         rx(i,j,k)=0.
-      enddo
-      tx(nx-1,j,k)=acix6*(ux(nx-1,j,k)-ux(nx-2,j,k))&
-           +bcix6*(ux(nx ,j,k)-ux(nx-3,j,k))
-      rx(nx-1,j,k)=0.
-      tx(nx  ,j,k)=acix6*(ux(nx,j,k)-ux(nx-1,j,k))&
-           +bcix6*(ux(1,j,k)-ux(nx-2,j,k))
-      rx(nx  ,j,k)=alcaix6
-      do i=2,nx
-         tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csx6(i)
-         rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*csx6(i)
-      enddo
-      tx(nx,j,k)=tx(nx,j,k)*cwx6(nx)
-      rx(nx,j,k)=rx(nx,j,k)*cwx6(nx)
-      do i=nx-1,1,-1
-         tx(i,j,k)=(tx(i,j,k)-cfx6(i)*tx(i+1,j,k))*cwx6(i)
-         rx(i,j,k)=(rx(i,j,k)-cfx6(i)*rx(i+1,j,k))*cwx6(i)
-      enddo
-      sx(j,k)=(tx(1,j,k)-alcaix6*tx(nx,j,k))/&
-           (1.+rx(1,j,k)-alcaix6*rx(nx,j,k))
-      do i=1,nx
-         tx(i,j,k)=tx(i,j,k)-sx(j,k)*rx(i,j,k)
-      enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do k=1,nz
-      do j=1,ny
-         tx(1,j,k)=0.  
-         tx(2,j,k)=acix6*(ux(2,j,k)-ux(1,j,k))&
-              +bcix6*(ux(3,j,k)-ux(1,j,k)) 
-         do i=3,nx-2
-            tx(i,j,k)=acix6*(ux(i,j,k)-ux(i-1,j,k))&
-                 +bcix6*(ux(i+1,j,k)-ux(i-2,j,k))
-         enddo
-         tx(nx-1,j,k)=acix6*(ux(nx-1,j,k)-ux(nx-2,j,k))&
-              +bcix6*(ux(nx-1,j,k)-ux(nx-3,j,k))
-         tx(nx,j,k)=0.
-         do i=2,nx
-            tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*csi6(i)
-         enddo
-         tx(nx,j,k)=tx(nx,j,k)*cwi6(nx)
-         do i=nx-1,1,-1
-            tx(i,j,k)=(tx(i,j,k)-cfi6(i)*tx(i+1,j,k))*cwi6(i)
-         enddo
-      enddo
-      enddo
-   endif
-endif
-
-return
-end subroutine derxpv 
+  return
+end subroutine derxpv
 
 !********************************************************************
 !
 subroutine interxpv(tx,ux,rx,sx,cifi6,cisi6,ciwi6,cifx6,cisx6,ciwx6,&
      nxm,nx,ny,nz,npaire) 
-!
-!********************************************************************
-  
-USE param 
-USE derivX 
-
-implicit none
-
-integer :: nx,nxm,ny,nz,npaire
-real(mytype), dimension(nx,ny,nz) :: tx,rx
-real(mytype), dimension(nxm,ny,nz) :: ux
-real(mytype), dimension(ny,nz) :: sx
-real(mytype), dimension(nx) :: cifi6,cisi6,ciwi6
-real(mytype), dimension(nx) :: cifx6,cisx6,ciwx6
-integer :: i,j,k
-
-if (nclx) then
-   do k=1,nz
-   do j=1,ny
-      tx(1,j,k)=aicix6*(ux(1,j,k)+ux(nx  ,j,k))&
-           +bicix6*(ux(2,j,k)+ux(nx-1,j,k))&
-           +cicix6*(ux(3,j,k)+ux(nx-2,j,k))&
-           +dicix6*(ux(4,j,k)+ux(nx-3,j,k))
-      rx(1,j,k)=-1.
-      tx(2,j,k)=aicix6*(ux(2,j,k)+ux(1 ,j,k))&
-           +bicix6*(ux(3,j,k)+ux(nx,j,k))&
-           +cicix6*(ux(4,j,k)+ux(nx-1,j,k))&
-           +dicix6*(ux(5,j,k)+ux(nx-2,j,k))
-      rx(2,j,k)=0.
-      tx(3,j,k)=aicix6*(ux(3,j,k)+ux(2 ,j,k))&
-           +bicix6*(ux(4,j,k)+ux(1,j,k))&
-           +cicix6*(ux(5,j,k)+ux(nx,j,k))&
-           +dicix6*(ux(6,j,k)+ux(nx-1,j,k))
-      rx(3,j,k)=0.
-      tx(4,j,k)=aicix6*(ux(4,j,k)+ux(3 ,j,k))&
-           +bicix6*(ux(5,j,k)+ux(2,j,k))&
-           +cicix6*(ux(6,j,k)+ux(1,j,k))&
-           +dicix6*(ux(7,j,k)+ux(nx,j,k))
-      rx(4,j,k)=0.
-      do i=5,nx-3
-         tx(i,j,k)=aicix6*(ux(i,j,k)+ux(i-1,j,k))&
-              +bicix6*(ux(i+1,j,k)+ux(i-2,j,k))&
-              +cicix6*(ux(i+2,j,k)+ux(i-3,j,k))&
-              +dicix6*(ux(i+3,j,k)+ux(i-4,j,k))
-         rx(i,j,k)=0.
-      enddo
-      tx(nx-2,j,k)=aicix6*(ux(nx-2,j,k)+ux(nx-3,j,k))&
-           +bicix6*(ux(nx-1,j,k)+ux(nx-4,j,k))&
-           +cicix6*(ux(nx,j,k)+ux(nx-5,j,k))&
-           +dicix6*(ux(1,j,k)+ux(nx-6,j,k))
-      rx(nx-2,j,k)=0.
-      tx(nx-1,j,k)=aicix6*(ux(nx-1,j,k)+ux(nx-2,j,k))&
-           +bicix6*(ux(nx ,j,k)+ux(nx-3,j,k))&
-           +cicix6*(ux(1,j,k)+ux(nx-4,j,k))&
-           +dicix6*(ux(2,j,k)+ux(nx-5,j,k))
-      rx(nx-1,j,k)=0.
-      tx(nx  ,j,k)=aicix6*(ux(nx,j,k)+ux(nx-1,j,k))&
-           +bicix6*(ux(1,j,k)+ux(nx-2,j,k))&
-           +cicix6*(ux(2,j,k)+ux(nx-3,j,k))&
-           +dicix6*(ux(3,j,k)+ux(nx-4,j,k))
-      rx(nx  ,j,k)=ailcaix6
-      do i=2,nx
-         tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*cisx6(i)
-         rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*cisx6(i)
-      enddo
-      tx(nx,j,k)=tx(nx,j,k)*ciwx6(nx)
-      rx(nx,j,k)=rx(nx,j,k)*ciwx6(nx)
-      do i=nx-1,1,-1
-         tx(i,j,k)=(tx(i,j,k)-cifx6(i)*tx(i+1,j,k))*ciwx6(i)
-         rx(i,j,k)=(rx(i,j,k)-cifx6(i)*rx(i+1,j,k))*ciwx6(i)
-      enddo
-      sx(j,k)=(tx(1,j,k)-ailcaix6*tx(nx,j,k))/&
-           (1.+rx(1,j,k)-ailcaix6*rx(nx,j,k))
-      do i=1,nx
-         tx(i,j,k)=tx(i,j,k)-sx(j,k)*rx(i,j,k)
-      enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do k=1,nz
-      do j=1,ny
-         tx(1,j,k)=aicix6*(ux(1,j,k)+ux(1,j,k))&
-              +bicix6*(ux(2,j,k)+ux(2,j,k))&
-              +cicix6*(ux(3,j,k)+ux(3,j,k))&
-              +dicix6*(ux(4,j,k)+ux(4,j,k))
-         tx(2,j,k)=aicix6*(ux(2,j,k)+ux(1,j,k))&
-              +bicix6*(ux(3,j,k)+ux(1,j,k))&
-              +cicix6*(ux(4,j,k)+ux(2,j,k))&
-              +dicix6*(ux(5,j,k)+ux(3,j,k))
-         tx(3,j,k)=aicix6*(ux(3,j,k)+ux(2,j,k))&
-              +bicix6*(ux(4,j,k)+ux(1,j,k))&
-              +cicix6*(ux(5,j,k)+ux(1,j,k))&
-              +dicix6*(ux(6,j,k)+ux(2,j,k))
-         tx(4,j,k)=aicix6*(ux(4,j,k)+ux(3,j,k))&
-              +bicix6*(ux(5,j,k)+ux(2,j,k))&
-              +cicix6*(ux(6,j,k)+ux(1,j,k))&
-              +dicix6*(ux(7,j,k)+ux(1,j,k))
-         do i=5,nx-4
-            tx(i,j,k)=aicix6*(ux(i,j,k)+ux(i-1,j,k))&
-                 +bicix6*(ux(i+1,j,k)+ux(i-2,j,k))&
-                 +cicix6*(ux(i+2,j,k)+ux(i-3,j,k))&
-                 +dicix6*(ux(i+3,j,k)+ux(i-4,j,k))
-         enddo
-         tx(nx-3,j,k)=aicix6*(ux(nx-3,j,k)+ux(nx-4,j,k))&
-              +bicix6*(ux(nx-2,j,k)+ux(nx-5,j,k))&
-              +cicix6*(ux(nx-1,j,k)+ux(nx-6,j,k))&
-              +dicix6*(ux(nx-1,j,k)+ux(nx-7,j,k))        
-         tx(nx-2,j,k)=aicix6*(ux(nx-2,j,k)+ux(nx-3,j,k))&
-              +bicix6*(ux(nx-1,j,k)+ux(nx-4,j,k))&
-              +cicix6*(ux(nx-1,j,k)+ux(nx-5,j,k))&
-              +dicix6*(ux(nx-2,j,k)+ux(nx-6,j,k))        
-         tx(nx-1,j,k)=aicix6*(ux(nx-1,j,k)+ux(nx-2,j,k))&
-              +bicix6*(ux(nx-1,j,k)+ux(nx-3,j,k))&
-              +cicix6*(ux(nx-2,j,k)+ux(nx-4,j,k))&
-              +dicix6*(ux(nx-3,j,k)+ux(nx-5,j,k))
-         tx(nx,j,k)=aicix6*(ux(nx-1,j,k)+ux(nx-1,j,k))&
-              +bicix6*(ux(nx-2,j,k)+ux(nx-2,j,k))&
-              +cicix6*(ux(nx-3,j,k)+ux(nx-3,j,k))&
-              +dicix6*(ux(nx-4,j,k)+ux(nx-4,j,k))
-         do i=2,nx
-            tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*cisi6(i)
-         enddo
-         tx(nx,j,k)=tx(nx,j,k)*ciwi6(nx)
-         do i=nx-1,1,-1
-            tx(i,j,k)=(tx(i,j,k)-cifi6(i)*tx(i+1,j,k))*ciwi6(i)
-         enddo
-      enddo
-      enddo
-   endif
-endif
-
-return
-end subroutine interxpv
+  !
+  !********************************************************************
 
-!********************************************************************
-!
-subroutine interyvp(ty,uy,ry,sy,cify6,cisy6,ciwy6,nx,ny,nym,nz,npaire) 
-!
-!********************************************************************
-  
-USE param 
-USE derivY 
-
-implicit none
-
-integer :: nx,ny,nym,nz,npaire
-real(mytype), dimension(nx,nym,nz) :: ty
-real(mytype), dimension(nx,ny,nz) :: uy,ry
-real(mytype), dimension(nx,nz) :: sy
-real(mytype), dimension(nym) :: cify6,cisy6,ciwy6
-integer :: i,j,k
-
-if (ncly) then
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,1,k)=aiciy6*(uy(i,2,k)+uy(i,1,k))&
-           +biciy6*(uy(i,3,k)+uy(i,ny,k))&
-           +ciciy6*(uy(i,4,k)+uy(i,ny-1,k))&
-           +diciy6*(uy(i,5,k)+uy(i,ny-2,k)) 
-      ry(i,1,k)=-1.
-      ty(i,2,k)=aiciy6*(uy(i,3,k)+uy(i,2,k))&
-           +biciy6*(uy(i,4,k)+uy(i,1,k))&
-           +ciciy6*(uy(i,5,k)+uy(i,ny,k))& 
-           +diciy6*(uy(i,6,k)+uy(i,ny-1,k)) 
-      ry(i,2,k)=0. 
-      ty(i,3,k)=aiciy6*(uy(i,4,k)+uy(i,3,k))&
-           +biciy6*(uy(i,5,k)+uy(i,2,k))&
-           +ciciy6*(uy(i,6,k)+uy(i,1,k))& 
-           +diciy6*(uy(i,7,k)+uy(i,ny,k)) 
-      ry(i,3,k)=0. 
-   enddo
-   enddo
-   do k=1,nz
-   do j=4,ny-4
-   do i=1,nx 
-      ty(i,j,k)=aiciy6*(uy(i,j+1,k)+uy(i,j,k))&
-           +biciy6*(uy(i,j+2,k)+uy(i,j-1,k))&
-           +ciciy6*(uy(i,j+3,k)+uy(i,j-2,k))& 
-           +diciy6*(uy(i,j+4,k)+uy(i,j-3,k)) 
-      ry(i,j,k)=0. 
-   enddo
-   enddo
-   enddo
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,ny-3,k)=aiciy6*(uy(i,ny-2,k)+uy(i,ny-3,k))&
-           +biciy6*(uy(i,ny-1,k)+uy(i,ny-4,k))&
-           +ciciy6*(uy(i,ny,k)+uy(i,ny-5,k))&
-           +diciy6*(uy(i,1,k)+uy(i,ny-6,k))
-      ry(i,ny-3,k)=0.
-      ty(i,ny-2,k)=aiciy6*(uy(i,ny-1,k)+uy(i,ny-2,k))&
-           +biciy6*(uy(i,ny,k)+uy(i,ny-3,k))&
-           +ciciy6*(uy(i,1,k)+uy(i,ny-4,k))&
-           +diciy6*(uy(i,2,k)+uy(i,ny-5,k))
-      ry(i,ny-2,k)=0.
-      ty(i,ny-1,k)=aiciy6*(uy(i,ny,k)+uy(i,ny-1,k))&
-           +biciy6*(uy(i,1,k)+uy(i,ny-2,k))&
-           +ciciy6*(uy(i,2,k)+uy(i,ny-3,k))&
-           +diciy6*(uy(i,3,k)+uy(i,ny-4,k)) 
-      ry(i,ny-1,k)=0. 
-      ty(i,ny,k)=aiciy6*(uy(i,1,k)+uy(i,ny,k))&
-           +biciy6*(uy(i,2,k)+uy(i,ny-1,k))&
-           +ciciy6*(uy(i,3,k)+uy(i,ny-2,k))& 
-           +diciy6*(uy(i,4,k)+uy(i,ny-3,k)) 
-      ry(i,ny,k)=ailcaiy6
-   enddo
-   enddo
-   do k=1,nz
-   do j=2,ny  
-   do i=1,nx 
-      ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*cisy6(j) 
-      ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*cisy6(j) 
-   enddo
-   enddo
-   enddo
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,ny,k)=ty(i,ny,k)*ciwy6(ny) 
-      ry(i,ny,k)=ry(i,ny,k)*ciwy6(ny) 
-   enddo
-   enddo
-   do k=1,nz 
-   do j=ny-1,1,-1 
-   do i=1,nx 
-      ty(i,j,k)=(ty(i,j,k)-cify6(j)*ty(i,j+1,k))*ciwy6(j) 
-      ry(i,j,k)=(ry(i,j,k)-cify6(j)*ry(i,j+1,k))*ciwy6(j) 
-   enddo
-   enddo
-   enddo
-   do k=1,nz 
-   do i=1,nx 
-      sy(i,k)=(ty(i,1,k)-ailcaiy6*ty(i,ny,k))&
-           /(1.+ry(i,1,k)-ailcaiy6*ry(i,ny,k)) 
-   enddo 
-   enddo
-   do k=1,nz
-   do j=1,ny  
-   do i=1,nx 
-      ty(i,j,k)=ty(i,j,k)-sy(i,k)*ry(i,j,k) 
-   enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do k=1,nz
-      do i=1,nx
-         ty(i,1,k)=aiciy6*(uy(i,2,k)+uy(i,1,k))&
-              +biciy6*(uy(i,3,k)+uy(i,2,k))&
-              +ciciy6*(uy(i,4,k)+uy(i,3,k))&             
-              +diciy6*(uy(i,5,k)+uy(i,4,k))             
-         ty(i,2,k)=aiciy6*(uy(i,3,k)+uy(i,2,k))&
-              +biciy6*(uy(i,4,k)+uy(i,1,k))&
-              +ciciy6*(uy(i,5,k)+uy(i,2,k))&             
-              +diciy6*(uy(i,6,k)+uy(i,3,k))             
-         ty(i,3,k)=aiciy6*(uy(i,4,k)+uy(i,3,k))&
-              +biciy6*(uy(i,5,k)+uy(i,2,k))&
-              +ciciy6*(uy(i,6,k)+uy(i,1,k))&             
-              +diciy6*(uy(i,7,k)+uy(i,2,k))             
-      enddo
-      enddo
-      do k=1,nz
-      do j=4,nym-3
-      do i=1,nx
-         ty(i,j,k)=aiciy6*(uy(i,j+1,k)+uy(i,j,k))&
-              +biciy6*(uy(i,j+2,k)+uy(i,j-1,k))&
-              +ciciy6*(uy(i,j+3,k)+uy(i,j-2,k))&              
-              +diciy6*(uy(i,j+4,k)+uy(i,j-3,k))              
-      enddo
-      enddo
-      enddo
-      do k=1,nz
-      do i=1,nx
-         ty(i,nym-2,k)=aiciy6*(uy(i,nym-1,k)+uy(i,nym-2,k))&
-              +biciy6*(uy(i,nym,k)+uy(i,nym-3,k))&
-              +ciciy6*(uy(i,ny,k)+uy(i,nym-4,k))&         
-              +diciy6*(uy(i,nym,k)+uy(i,nym-5,k))         
-         ty(i,nym-1,k)=aiciy6*(uy(i,nym,k)+uy(i,nym-1,k))&
-              +biciy6*(uy(i,ny,k)+uy(i,nym-2,k))&
-              +ciciy6*(uy(i,nym,k)+uy(i,nym-3,k))&         
-              +diciy6*(uy(i,nym-1,k)+uy(i,nym-4,k))         
-         ty(i,nym,k)=aiciy6*(uy(i,ny,k)+uy(i,nym,k))&
-              +biciy6*(uy(i,nym,k)+uy(i,nym-1,k))&
-              +ciciy6*(uy(i,nym-1,k)+uy(i,nym-2,k))&            
-              +diciy6*(uy(i,nym-2,k)+uy(i,nym-3,k))            
-      enddo
-      enddo
-      do k=1,nz
-      do j=2,nym
-      do i=1,nx
-         ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*cisy6(j)
-      enddo
-      enddo
-      enddo
-      do k=1,nz
-      do i=1,nx
-         ty(i,nym,k)=ty(i,nym,k)*ciwy6(nym)
-      enddo
-      enddo
-      do k=1,nz
-      do j=nym-1,1,-1
-      do i=1,nx
-         ty(i,j,k)=(ty(i,j,k)-cify6(j)*ty(i,j+1,k))*ciwy6(j)
-      enddo
-      enddo
-      enddo
-   endif
-endif
-
-return
-end subroutine interyvp
+  USE param 
+  USE derivX 
 
-!********************************************************************
-!
-subroutine deryvp(ty,uy,ry,sy,cfy6,csy6,cwy6,ppyi,nx,ny,nym,nz,npaire) 
-!
-!********************************************************************
-  
-USE param
-USE derivY 
-
-implicit none
-
-integer :: nx,ny,nym,nz,npaire
-real(mytype), dimension(nx,nym,nz) :: ty
-real(mytype), dimension(nx,ny,nz) :: uy
-real(mytype), dimension(nx,ny,nz) :: ry
-real(mytype), dimension(nx,nz) :: sy
-real(mytype), dimension(nym) :: cfy6,csy6,cwy6,ppyi
-integer :: i,j,k
-
-if (ncly) then
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,1,k)=aciy6*(uy(i,2,k)-uy(i,1,k))&
-           +bciy6*(uy(i,3,k)-uy(i,ny,k)) 
-      ry(i,1,k)=-1.
-      ty(i,2,k)=aciy6*(uy(i,3,k)-uy(i,2,k))&
-           +bciy6*(uy(i,4,k)-uy(i,1,k)) 
-      ry(i,2,k)=0. 
-   enddo
-   enddo 
-   do k=1,nz
-   do j=3,ny-2  
-   do i=1,nx 
-      ty(i,j,k)=aciy6*(uy(i,j+1,k)-uy(i,j,k))&
-           +bciy6*(uy(i,j+2,k)-uy(i,j-1,k)) 
-      ry(i,j,k)=0.
-   enddo
-   enddo
-   enddo
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,ny-1,k)=aciy6*(uy(i,ny,k)-uy(i,ny-1,k))&
-           +bciy6*(uy(i,1,k)-uy(i,ny-2,k)) 
-      ry(i,ny-1,k)=0. 
-      ty(i,ny,k)=aciy6*(uy(i,1,k)-uy(i,ny,k))&
-           +bciy6*(uy(i,2,k)-uy(i,ny-1,k)) 
-      ry(i,ny,k)=alcaiy6
-   enddo
-   enddo
-   do k=1,nz
-   do j=2,ny  
-   do i=1,nx 
-      ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*csy6(j) 
-      ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*csy6(j) 
-   enddo
-   enddo
-   enddo
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,ny,k)=ty(i,ny,k)*cwy6(ny) 
-      ry(i,ny,k)=ry(i,ny,k)*cwy6(ny) 
-   enddo
-   enddo
-   do k=1,nz
-   do j=ny-1,1,-1  
-   do i=1,nx 
-      ty(i,j,k)=(ty(i,j,k)-cfy6(j)*ty(i,j+1,k))*cwy6(j) 
-      ry(i,j,k)=(ry(i,j,k)-cfy6(j)*ry(i,j+1,k))*cwy6(j) 
-   enddo
-   enddo
-   enddo
-   do k=1,nz 
-   do i=1,nx 
-      sy(i,k)=(ty(i,1,k)-alcaiy6*ty(i,ny,k))&
-           /(1.+ry(i,1,k)-alcaiy6*ry(i,ny,k)) 
-   enddo
-   enddo
-   do k=1,nz
-   do j=1,ny  
-   do i=1,nx 
-      ty(i,j,k)=ty(i,j,k)-sy(i,k)*ry(i,j,k) 
-   enddo
-   enddo
-   enddo
-else
-   if (npaire==0) then
-      do k=1,nz
-      do i=1,nx
-         ty(i,1,k)=aciy6*(uy(i,2,k)-uy(i,1,k))&
-              +bciy6*(uy(i,3,k)-2.*uy(i,1,k)+uy(i,2,k)) 
-         ty(i,2,k)=aciy6*(uy(i,3,k)-uy(i,2,k))&
-              +bciy6*(uy(i,4,k)-uy(i,1,k))
-      enddo
-      enddo
-      do k=1,nz
-      do j=3,nym-2
-      do i=1,nx
-         ty(i,j,k)=aciy6*(uy(i,j+1,k)-uy(i,j,k))&
-              +bciy6*(uy(i,j+2,k)-uy(i,j-1,k))
-      enddo
-      enddo
-      enddo
-      do k=1,nz
-      do i=1,nx
-         ty(i,nym-1,k)=aciy6*(uy(i,nym,k)-uy(i,nym-1,k))&
-              +bciy6*(uy(i,ny,k)-uy(i,nym-2,k)) 
-         ty(i,nym,k)=aciy6*(uy(i,ny,k)-uy(i,nym,k))&
-              +bciy6*(2.*uy(i,ny,k)-uy(i,nym,k)-uy(i,nym-1,k)) 
-      enddo
-      enddo
-      do k=1,nz
-      do j=2,nym
-      do i=1,nx
-         ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*csy6(j)
-      enddo
-      enddo
-      enddo
-      do k=1,nz
-      do i=1,nx
-         ty(i,nym,k)=ty(i,nym,k)*cwy6(nym)
-      enddo
-      enddo
-      do k=1,nz
-      do j=nym-1,1,-1
-      do i=1,nx
-         ty(i,j,k)=(ty(i,j,k)-cfy6(j)*ty(i,j+1,k))*cwy6(j)
-      enddo
-      enddo
-      enddo
-   endif
-endif
-
-if (istret.ne.0) then
-   do k=1,nz
-   do j=1,nym
-   do i=1,nx
-      ty(i,j,k)=ty(i,j,k)*ppyi(j)
-   enddo
-   enddo
-   enddo
-endif
-
-return
-end subroutine deryvp
+  implicit none
 
-!********************************************************************
-!
-subroutine interypv(ty,uy,ry,sy,cifi6y,cisi6y,ciwi6y,cify6,cisy6,ciwy6,&
-     nx,nym,ny,nz,npaire) 
-!
-!********************************************************************
-  
-USE param
-USE derivY
-
-implicit none
-
-integer :: nx,ny,nym,nz,npaire
-real(mytype), dimension(nx,ny,nz) :: ty
-real(mytype), dimension(nx,nym,nz) :: uy
-real(mytype), dimension(nx,ny,nz) :: ry
-real(mytype), dimension(nx,nz) :: sy
-real(mytype), dimension(ny) :: cifi6y,cisi6y,ciwi6y
-real(mytype), dimension(ny) :: cify6,cisy6,ciwy6
-integer :: i,j,k
-    
-if (ncly) then
-   do k=1,nz
-   do i=1,nx
-      ty(i,1,k)=aiciy6*(uy(i,1,k)+uy(i,ny,k))&
-           +biciy6*(uy(i,2,k)+uy(i,ny-1,k))&
-           +ciciy6*(uy(i,3,k)+uy(i,ny-2,k))&
-           +diciy6*(uy(i,4,k)+uy(i,ny-3,k))
-      ry(i,1,k)=-1.
-      ty(i,2,k)=aiciy6*(uy(i,2,k)+uy(i,1,k))&
-           +biciy6*(uy(i,3,k)+uy(i,ny,k))&
-           +ciciy6*(uy(i,4,k)+uy(i,ny-1,k))&
-           +diciy6*(uy(i,5,k)+uy(i,ny-2,k))
-      ry(i,2,k)=0.
-      ty(i,3,k)=aiciy6*(uy(i,3,k)+uy(i,2,k))&
-           +biciy6*(uy(i,4,k)+uy(i,1,k))&
-           +ciciy6*(uy(i,5,k)+uy(i,ny,k))&
-           +diciy6*(uy(i,6,k)+uy(i,ny-1,k))
-      ry(i,3,k)=0.
-      ty(i,4,k)=aiciy6*(uy(i,4,k)+uy(i,3,k))&
-           +biciy6*(uy(i,5,k)+uy(i,2,k))&
-           +ciciy6*(uy(i,6,k)+uy(i,1,k))&
-           +diciy6*(uy(i,7,k)+uy(i,ny,k))
-      ry(i,4,k)=0.
-   enddo
-   enddo
-   do k=1,nz
-   do j=5,ny-3
-   do i=1,nx
-      ty(i,j,k)=aiciy6*(uy(i,j,k)+uy(i,j-1,k))&
-           +biciy6*(uy(i,j+1,k)+uy(i,j-2,k))&
-           +ciciy6*(uy(i,j+2,k)+uy(i,j-3,k))&
-           +diciy6*(uy(i,j+3,k)+uy(i,j-4,k))
-      ry(i,j,k)=0.
-   enddo
-   enddo
-   enddo
-   do k=1,nz
-   do i=1,nx
-      ty(i,ny-2,k)=aiciy6*(uy(i,ny-2,k)+uy(i,ny-3,k))&
-           +biciy6*(uy(i,ny-1,k)+uy(i,ny-4,k))&
-           +ciciy6*(uy(i,ny,k)+uy(i,ny-5,k))&
-           +diciy6*(uy(i,1,k)+uy(i,ny-6,k))
-      ry(i,ny-2,k)=0.
-      ty(i,ny-1,k)=aiciy6*(uy(i,ny-1,k)+uy(i,ny-2,k))&
-           +biciy6*(uy(i,ny,k)+uy(i,ny-3,k))&
-           +ciciy6*(uy(i,1,k)+uy(i,ny-4,k))&
-           +diciy6*(uy(i,2,k)+uy(i,ny-5,k))
-      ry(i,ny-1,k)=0.
-      ty(i,ny,k)=aiciy6*(uy(i,ny,k)+uy(i,ny-1,k))&
-           +biciy6*(uy(i,1,k)+uy(i,ny-2,k))&
-           +ciciy6*(uy(i,2,k)+uy(i,ny-3,k))&
-           +diciy6*(uy(i,3,k)+uy(i,ny-4,k))
-      ry(i,ny,k)=ailcaiy6
-   enddo
-   enddo
-   do k=1,nz
-   do j=2,ny
-   do i=1,nx
-      ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*cisy6(j)
-      ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*cisy6(j)
-   enddo
-   enddo
-   enddo
-   do k=1,nz
-   do i=1,nx
-      ty(i,ny,k)=ty(i,ny,k)*ciwy6(ny)
-      ry(i,ny,k)=ry(i,ny,k)*ciwy6(ny)
-   enddo
-   enddo
-   do j=ny-1,1,-1
-   do k=1,nz
-   do i=1,nx
-      ty(i,j,k)=(ty(i,j,k)-cify6(j)*ty(i,j+1,k))*ciwy6(j)
-      ry(i,j,k)=(ry(i,j,k)-cify6(j)*ry(i,j+1,k))*ciwy6(j)
-   enddo
-   enddo
-   enddo
-   do k=1,nz
-   do i=1,nx
-      sy(i,k)=(ty(i,1,k)-ailcaiy6*ty(i,ny,k))/&
-           (1.+ry(i,1,k)-ailcaiy6*ry(i,ny,k))
-   enddo
-   enddo
-   do j=1,ny
-   do k=1,nz
-   do i=1,nx
-      ty(i,j,k)=ty(i,j,k)-sy(i,k)*ry(i,j,k)
-   enddo
-   enddo
-   enddo   
-else
-   if (npaire==1) then
-      do k=1,nz
-      do i=1,nx
-         ty(i,1,k)=aiciy6*(uy(i,1,k)+uy(i,1,k))&
-              +biciy6*(uy(i,2,k)+uy(i,2,k))&
-              +ciciy6*(uy(i,3,k)+uy(i,3,k))&
-              +diciy6*(uy(i,4,k)+uy(i,4,k))             
-         ty(i,2,k)=aiciy6*(uy(i,2,k)+uy(i,1,k))&
-              +biciy6*(uy(i,3,k)+uy(i,1,k))&
-              +ciciy6*(uy(i,4,k)+uy(i,2,k))&
-              +diciy6*(uy(i,5,k)+uy(i,3,k))
-         ty(i,3,k)=aiciy6*(uy(i,3,k)+uy(i,2,k))&
-              +biciy6*(uy(i,4,k)+uy(i,1,k))&
-              +ciciy6*(uy(i,5,k)+uy(i,1,k))&
-              +diciy6*(uy(i,6,k)+uy(i,2,k))             
-         ty(i,4,k)=aiciy6*(uy(i,4,k)+uy(i,3,k))&
-              +biciy6*(uy(i,5,k)+uy(i,2,k))&
-              +ciciy6*(uy(i,6,k)+uy(i,1,k))&
-              +diciy6*(uy(i,7,k)+uy(i,1,k))             
-      enddo
-      enddo
-      do j=5,ny-4
-      do k=1,nz
-      do i=1,nx
-         ty(i,j,k)=aiciy6*(uy(i,j,k)+uy(i,j-1,k))&
-              +biciy6*(uy(i,j+1,k)+uy(i,j-2,k))&
-              +ciciy6*(uy(i,j+2,k)+uy(i,j-3,k))&
-              +diciy6*(uy(i,j+3,k)+uy(i,j-4,k))              
-      enddo
-      enddo
-      enddo
-      do k=1,nz
-      do i=1,nx
-        ty(i,ny-3,k)=aiciy6*(uy(i,ny-3,k)+uy(i,ny-4,k))&
-              +biciy6*(uy(i,ny-2,k)+uy(i,ny-5,k))&
-              +ciciy6*(uy(i,ny-1,k)+uy(i,ny-6,k))&
-              +diciy6*(uy(i,ny-1,k)+uy(i,ny-7,k))   
-        ty(i,ny-2,k)=aiciy6*(uy(i,ny-2,k)+uy(i,ny-3,k))&
-              +biciy6*(uy(i,ny-1,k)+uy(i,ny-4,k))&
-              +ciciy6*(uy(i,ny-1,k)+uy(i,ny-5,k))&
-              +diciy6*(uy(i,ny-2,k)+uy(i,ny-6,k))   
-         ty(i,ny-1,k)=aiciy6*(uy(i,ny-1,k)+uy(i,ny-2,k))&
-              +biciy6*(uy(i,ny-1,k)+uy(i,ny-3,k))&
-              +ciciy6*(uy(i,ny-2,k)+uy(i,ny-4,k))&
-              +diciy6*(uy(i,ny-3,k)+uy(i,ny-5,k))         
-         ty(i,ny,k)=aiciy6*(uy(i,ny-1,k)+uy(i,ny-1,k))&
-              +biciy6*(uy(i,ny-2,k)+uy(i,ny-2,k))&
-              +ciciy6*(uy(i,ny-3,k)+uy(i,ny-3,k))&
-              +diciy6*(uy(i,ny-4,k)+uy(i,ny-4,k))            
-      enddo
-      enddo
-      do j=2,ny
-      do k=1,nz
-      do i=1,nx
-         ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*cisi6y(j)
-      enddo
-      enddo
-      enddo
-      do k=1,nz
-      do i=1,nx
-         ty(i,ny,k)=ty(i,ny,k)*ciwi6y(ny)
-      enddo
-      enddo
-      do j=ny-1,1,-1
-      do k=1,nz
-      do i=1,nx
-         ty(i,j,k)=(ty(i,j,k)-cifi6y(j)*ty(i,j+1,k))*ciwi6y(j)
-      enddo
-      enddo
-      enddo
-   endif
-endif
-
-return  
-end subroutine interypv 
+  integer :: nx,nxm,ny,nz,npaire
+  real(mytype), dimension(nx,ny,nz) :: tx,rx
+  real(mytype), dimension(nxm,ny,nz) :: ux
+  real(mytype), dimension(ny,nz) :: sx
+  real(mytype), dimension(nx) :: cifi6,cisi6,ciwi6
+  real(mytype), dimension(nx) :: cifx6,cisx6,ciwx6
+  integer :: i,j,k
 
-!********************************************************************
-!
-subroutine derypv(ty,uy,ry,sy,cfi6y,csi6y,cwi6y,cfy6,csy6,cwy6,&
-     ppy,nx,nym,ny,nz,npaire) 
-!
-!********************************************************************
-  
-USE param
-USE derivY 
-
-implicit none
-
-integer :: nx,ny,nym,nz,npaire
-real(mytype), dimension(nx,ny,nz) :: ty
-real(mytype), dimension(nx,nym,nz) :: uy
-real(mytype), dimension(nx,ny,nz) :: ry
-real(mytype), dimension(nx,nz) :: sy
-real(mytype), dimension(ny) :: cfi6y,csi6y,cwi6y,ppy
-real(mytype), dimension(nym) :: cfy6,csy6,cwy6
-integer :: i,j,k
-
-if (ncly) then
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,1,k)=aciy6*(uy(i,1,k)-uy(i,ny,k))&
-           +bciy6*(uy(i,2,k)-uy(i,ny-1,k)) 
-      ry(i,1,k)=-1.
-      ty(i,2,k)=aciy6*(uy(i,2,k)-uy(i,1,k))&
-           +bciy6*(uy(i,3,k)-uy(i,ny,k)) 
-      ry(i,2,k)=0.
-   enddo
-   enddo 
-   do j=3,ny-2 
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,j,k)=aciy6*(uy(i,j,k)-uy(i,j-1,k))&
-           +bciy6*(uy(i,j+1,k)-uy(i,j-2,k)) 
-      ry(i,j,k)=0. 
-   enddo
-   enddo
-   enddo
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,ny-1,k)=aciy6*(uy(i,ny-1,k)-uy(i,ny-2,k))&
-           +bciy6*(uy(i,ny,k)-uy(i,ny-3,k)) 
-      ry(i,ny-1,k)=0. 
-      ty(i,ny,k)=aciy6*(uy(i,ny,k)-uy(i,ny-1,k))&
-           +bciy6*(uy(i,1,k)-uy(i,ny-2,k)) 
-      ry(i,ny,k)=alcaiy6
-   enddo
-   enddo
-   do j=2,ny 
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*csy6(j) 
-      ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*csy6(j) 
-   enddo
-   enddo
-   enddo
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,ny,k)=ty(i,ny,k)*cwy6(ny) 
-      ry(i,ny,k)=ry(i,ny,k)*cwy6(ny) 
-   enddo
-   enddo
-   do j=ny-1,1,-1 
-   do k=1,nz 
-   do i=1,nx 
-      ty(i,j,k)=(ty(i,j,k)-cfy6(j)*ty(i,j+1,k))*cwy6(j) 
-      ry(i,j,k)=(ry(i,j,k)-cfy6(j)*ry(i,j+1,k))*cwy6(j) 
-   enddo
-   enddo
-   enddo
-   do k=1,nz 
-   do i=1,nx 
-      sy(i,k)=(ty(i,1,k)-alcaiy6*ty(i,ny,k))&
-           /(1.+ry(i,1,k)-alcaiy6*ry(i,ny,k)) 
-   enddo
-   enddo
-   do j=1,ny 
-   do k=1,nz 
-   do i=1,nx
-      ty(i,j,k)=ty(i,j,k)-sy(i,k)*ry(i,j,k)
-   enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do k=1,nz
-      do i=1,nx
-         ty(i,1,k)=0.            
-         ty(i,2,k)=aciy6*(uy(i,2,k)-uy(i,1,k))&
-              +bciy6*(uy(i,3,k)-uy(i,1,k))             
-      enddo
-      enddo
-      do j=3,ny-2
-      do k=1,nz
-      do i=1,nx
-         ty(i,j,k)=aciy6*(uy(i,j,k)-uy(i,j-1,k))&
-              +bciy6*(uy(i,j+1,k)-uy(i,j-2,k))              
-      enddo
-      enddo
-      enddo
-      do k=1,nz
-      do i=1,nx
-         ty(i,ny-1,k)=aciy6*(uy(i,ny-1,k)-uy(i,ny-2,k))&
-              +bciy6*(uy(i,ny-1,k)-uy(i,ny-3,k))         
-         ty(i,ny,k)=0.          
-      enddo
-      enddo
-      do j=2,ny
-      do k=1,nz
-      do i=1,nx
-         ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*csi6y(j)
-      enddo
-      enddo
-      enddo
-      do k=1,nz
-      do i=1,nx
-         ty(i,ny,k)=ty(i,ny,k)*cwi6y(ny)
-      enddo
-      enddo
-      do j=ny-1,1,-1
-      do k=1,nz
-      do i=1,nx
-         ty(i,j,k)=(ty(i,j,k)-cfi6y(j)*ty(i,j+1,k))*cwi6y(j)
-      enddo
-      enddo
-      enddo
-   endif
-endif
-
-if (istret.ne.0) then   
-   do k=1,nz 
-   do j=1,ny
-   do i=1,nx 
-      ty(i,j,k)=ty(i,j,k)*ppy(j) 
-   enddo
-   enddo
-   enddo
-endif
-
-return  
-end subroutine derypv
+  if (nclx) then
+     do k=1,nz
+        do j=1,ny
+           tx(1,j,k)=aicix6*(ux(1,j,k)+ux(nx  ,j,k))&
+                +bicix6*(ux(2,j,k)+ux(nx-1,j,k))&
+                +cicix6*(ux(3,j,k)+ux(nx-2,j,k))&
+                +dicix6*(ux(4,j,k)+ux(nx-3,j,k))
+           rx(1,j,k)=-1.
+           tx(2,j,k)=aicix6*(ux(2,j,k)+ux(1 ,j,k))&
+                +bicix6*(ux(3,j,k)+ux(nx,j,k))&
+                +cicix6*(ux(4,j,k)+ux(nx-1,j,k))&
+                +dicix6*(ux(5,j,k)+ux(nx-2,j,k))
+           rx(2,j,k)=0.
+           tx(3,j,k)=aicix6*(ux(3,j,k)+ux(2 ,j,k))&
+                +bicix6*(ux(4,j,k)+ux(1,j,k))&
+                +cicix6*(ux(5,j,k)+ux(nx,j,k))&
+                +dicix6*(ux(6,j,k)+ux(nx-1,j,k))
+           rx(3,j,k)=0.
+           tx(4,j,k)=aicix6*(ux(4,j,k)+ux(3 ,j,k))&
+                +bicix6*(ux(5,j,k)+ux(2,j,k))&
+                +cicix6*(ux(6,j,k)+ux(1,j,k))&
+                +dicix6*(ux(7,j,k)+ux(nx,j,k))
+           rx(4,j,k)=0.
+           do i=5,nx-3
+              tx(i,j,k)=aicix6*(ux(i,j,k)+ux(i-1,j,k))&
+                   +bicix6*(ux(i+1,j,k)+ux(i-2,j,k))&
+                   +cicix6*(ux(i+2,j,k)+ux(i-3,j,k))&
+                   +dicix6*(ux(i+3,j,k)+ux(i-4,j,k))
+              rx(i,j,k)=0.
+           enddo
+           tx(nx-2,j,k)=aicix6*(ux(nx-2,j,k)+ux(nx-3,j,k))&
+                +bicix6*(ux(nx-1,j,k)+ux(nx-4,j,k))&
+                +cicix6*(ux(nx,j,k)+ux(nx-5,j,k))&
+                +dicix6*(ux(1,j,k)+ux(nx-6,j,k))
+           rx(nx-2,j,k)=0.
+           tx(nx-1,j,k)=aicix6*(ux(nx-1,j,k)+ux(nx-2,j,k))&
+                +bicix6*(ux(nx ,j,k)+ux(nx-3,j,k))&
+                +cicix6*(ux(1,j,k)+ux(nx-4,j,k))&
+                +dicix6*(ux(2,j,k)+ux(nx-5,j,k))
+           rx(nx-1,j,k)=0.
+           tx(nx  ,j,k)=aicix6*(ux(nx,j,k)+ux(nx-1,j,k))&
+                +bicix6*(ux(1,j,k)+ux(nx-2,j,k))&
+                +cicix6*(ux(2,j,k)+ux(nx-3,j,k))&
+                +dicix6*(ux(3,j,k)+ux(nx-4,j,k))
+           rx(nx  ,j,k)=ailcaix6
+           do i=2,nx
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*cisx6(i)
+              rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*cisx6(i)
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*ciwx6(nx)
+           rx(nx,j,k)=rx(nx,j,k)*ciwx6(nx)
+           do i=nx-1,1,-1
+              tx(i,j,k)=(tx(i,j,k)-cifx6(i)*tx(i+1,j,k))*ciwx6(i)
+              rx(i,j,k)=(rx(i,j,k)-cifx6(i)*rx(i+1,j,k))*ciwx6(i)
+           enddo
+           sx(j,k)=(tx(1,j,k)-ailcaix6*tx(nx,j,k))/&
+                (1.+rx(1,j,k)-ailcaix6*rx(nx,j,k))
+           do i=1,nx
+              tx(i,j,k)=tx(i,j,k)-sx(j,k)*rx(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do k=1,nz
+           do j=1,ny
+              tx(1,j,k)=aicix6*(ux(1,j,k)+ux(1,j,k))&
+                   +bicix6*(ux(2,j,k)+ux(2,j,k))&
+                   +cicix6*(ux(3,j,k)+ux(3,j,k))&
+                   +dicix6*(ux(4,j,k)+ux(4,j,k))
+              tx(2,j,k)=aicix6*(ux(2,j,k)+ux(1,j,k))&
+                   +bicix6*(ux(3,j,k)+ux(1,j,k))&
+                   +cicix6*(ux(4,j,k)+ux(2,j,k))&
+                   +dicix6*(ux(5,j,k)+ux(3,j,k))
+              tx(3,j,k)=aicix6*(ux(3,j,k)+ux(2,j,k))&
+                   +bicix6*(ux(4,j,k)+ux(1,j,k))&
+                   +cicix6*(ux(5,j,k)+ux(1,j,k))&
+                   +dicix6*(ux(6,j,k)+ux(2,j,k))
+              tx(4,j,k)=aicix6*(ux(4,j,k)+ux(3,j,k))&
+                   +bicix6*(ux(5,j,k)+ux(2,j,k))&
+                   +cicix6*(ux(6,j,k)+ux(1,j,k))&
+                   +dicix6*(ux(7,j,k)+ux(1,j,k))
+              do i=5,nx-4
+                 tx(i,j,k)=aicix6*(ux(i,j,k)+ux(i-1,j,k))&
+                      +bicix6*(ux(i+1,j,k)+ux(i-2,j,k))&
+                      +cicix6*(ux(i+2,j,k)+ux(i-3,j,k))&
+                      +dicix6*(ux(i+3,j,k)+ux(i-4,j,k))
+              enddo
+              tx(nx-3,j,k)=aicix6*(ux(nx-3,j,k)+ux(nx-4,j,k))&
+                   +bicix6*(ux(nx-2,j,k)+ux(nx-5,j,k))&
+                   +cicix6*(ux(nx-1,j,k)+ux(nx-6,j,k))&
+                   +dicix6*(ux(nx-1,j,k)+ux(nx-7,j,k))        
+              tx(nx-2,j,k)=aicix6*(ux(nx-2,j,k)+ux(nx-3,j,k))&
+                   +bicix6*(ux(nx-1,j,k)+ux(nx-4,j,k))&
+                   +cicix6*(ux(nx-1,j,k)+ux(nx-5,j,k))&
+                   +dicix6*(ux(nx-2,j,k)+ux(nx-6,j,k))        
+              tx(nx-1,j,k)=aicix6*(ux(nx-1,j,k)+ux(nx-2,j,k))&
+                   +bicix6*(ux(nx-1,j,k)+ux(nx-3,j,k))&
+                   +cicix6*(ux(nx-2,j,k)+ux(nx-4,j,k))&
+                   +dicix6*(ux(nx-3,j,k)+ux(nx-5,j,k))
+              tx(nx,j,k)=aicix6*(ux(nx-1,j,k)+ux(nx-1,j,k))&
+                   +bicix6*(ux(nx-2,j,k)+ux(nx-2,j,k))&
+                   +cicix6*(ux(nx-3,j,k)+ux(nx-3,j,k))&
+                   +dicix6*(ux(nx-4,j,k)+ux(nx-4,j,k))
+              do i=2,nx
+                 tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*cisi6(i)
+              enddo
+              tx(nx,j,k)=tx(nx,j,k)*ciwi6(nx)
+              do i=nx-1,1,-1
+                 tx(i,j,k)=(tx(i,j,k)-cifi6(i)*tx(i+1,j,k))*ciwi6(i)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
 
-!********************************************************************
-!
-subroutine derzvp(tz,uz,rz,sz,cfz6,csz6,cwz6,nx,ny,nz,nzm,npaire) 
-!
-!********************************************************************
-  
-USE param
-USE derivZ 
-
-implicit none
-
-integer :: nx,ny,nz,nzm,npaire
-real(mytype), dimension(nx,ny,nzm) :: tz
-real(mytype), dimension(nx,ny,nz) :: uz
-real(mytype), dimension(nx,ny,nz) :: rz
-real(mytype), dimension(nx,ny) :: sz
-real(mytype), dimension(nzm) :: cfz6,csz6,cwz6
-integer :: i,j,k
-
-if (nclz) then
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,1)=aciz6*(uz(i,j,2)-uz(i,j,1))&
-           +bciz6*(uz(i,j,3)-uz(i,j,nz))
-      rz(i,j,1)=-1.
-      tz(i,j,2)=aciz6*(uz(i,j,3)-uz(i,j,2))&
-           +bciz6*(uz(i,j,4)-uz(i,j,1))
-      rz(i,j,2)=0.
-   enddo
-   enddo
-   do k=3,nz-2
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=aciz6*(uz(i,j,k+1)-uz(i,j,k))&
-           +bciz6*(uz(i,j,k+2)-uz(i,j,k-1))
-      rz(i,j,k)=0.
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,nz-1)=aciz6*(uz(i,j,nz)-uz(i,j,nz-1))&
-           +bciz6*(uz(i,j,1)-uz(i,j,nz-2))
-      rz(i,j,nz-1)=0.
-      tz(i,j,nz)=aciz6*(uz(i,j,1)-uz(i,j,nz))&
-           +bciz6*(uz(i,j,2)-uz(i,j,nz-1))
-      rz(i  ,j,nz)=alcaiz6
-   enddo
-   enddo
-   do k=2,nz
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csz6(k)
-      rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*csz6(k)
-   enddo
-   enddo
-   enddo
-   do i=1,nx
-   do j=1,ny
-      tz(i,j,nz)=tz(i,j,nz)*cwz6(nz)
-      rz(i,j,nz)=rz(i,j,nz)*cwz6(nz)
-   enddo
-   enddo
-   do k=nz-1,1,-1
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=(tz(i,j,k)-cfz6(k)*tz(i,j,k+1))*cwz6(k)
-      rz(i,j,k)=(rz(i,j,k)-cfz6(k)*rz(i,j,k+1))*cwz6(k)
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      sz(i,j)=(tz(i,j,1)-alcaiz6*tz(i,j,nz))/&
-           (1.+rz(i,j,1)-alcaiz6*rz(i,j,nz))
-   enddo
-   enddo
-   do k=1,nz
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=tz(i,j,k)-sz(i,j)*rz(i,j,k)
-   enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,1)=aciz6*(uz(i,j,2)-uz(i,j,1))&
-              +bciz6*(uz(i,j,3)-uz(i,j,2))             
-         tz(i,j,2)=aciz6*(uz(i,j,3)-uz(i,j,2))&
-              +bciz6*(uz(i,j,4)-uz(i,j,1))             
-      enddo
-      enddo
-      do k=3,nzm-2
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=aciz6*(uz(i,j,k+1)-uz(i,j,k))&
-              +bciz6*(uz(i,j,k+2)-uz(i,j,k-1))              
-      enddo
-      enddo
-      enddo
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nzm-1)=aciz6*(uz(i,j,nzm)-uz(i,j,nzm-1))&
-              +bciz6*(uz(nz,j,k)-uz(nzm-2,j,k))         
-         tz(i,j,nzm)=aciz6*(uz(i,j,nz)-uz(i,j,nzm))&
-              +bciz6*(uz(i,j,nzm)-uz(i,j,nzm-1))            
-      enddo
-      enddo
-      do k=2,nzm
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csz6(k)
-      enddo
-      enddo
-      enddo
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nzm)=tz(i,j,nzm)*cwz6(nzm)
-      enddo
-      enddo
-      do k=nzm-1,1,-1
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=(tz(i,j,k)-cfz6(k)*tz(i,j,k+1))*cwz6(k)
-      enddo
-      enddo
-      enddo
-   endif
-   if (npaire==0) then
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,1)=aciz6*(uz(i,j,2)-uz(i,j,1))&
-              +bciz6*(uz(i,j,3)-2.*uz(i,j,1)+uz(i,j,2)) 
-         tz(i,j,2)=aciz6*(uz(i,j,3)-uz(i,j,2))&
-              +bciz6*(uz(i,j,4)-uz(i,j,1))
-      enddo
-      enddo
-      do k=3,nzm-2
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=aciz6*(uz(i,j,k+1)-uz(i,j,k))&
-              +bciz6*(uz(i,j,k+2)-uz(i,j,k-1))
-      enddo
-      enddo
-      enddo
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nzm-1)=aciz6*(uz(i,j,nz-1)-uz(i,j,nz-2))&
-              +bciz6*(uz(i,j,nz)-uz(i,j,nz-3)) 
-         tz(i,j,nzm)=aciz6*(uz(i,j,nz)-uz(i,j,nz-1))&
-              +bciz6*(2.*uz(i,j,nz)-uz(i,j,nz-1)-uz(i,j,nz-2)) 
-      enddo
-      enddo
-      do k=2,nzm
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csz6(k)
-      enddo
-      enddo
-      enddo
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nzm)=tz(i,j,nzm)*cwz6(nzm)
-      enddo
-      enddo
-      do k=nzm-1,1,-1
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=(tz(i,j,k)-cfz6(k)*tz(i,j,k+1))*cwz6(k)
-      enddo
-      enddo
-      enddo
-   endif
-endif
-
-return     
-end subroutine derzvp
+  return
+end subroutine interxpv
 
 !********************************************************************
 !
-subroutine interzvp(tz,uz,rz,sz,cifz6,cisz6,ciwz6,nx,ny,nz,nzm,npaire) 
-!
-!********************************************************************
-  
-USE param 
-USE derivZ 
-
-implicit none
-
-integer :: nx,ny,nz,nzm,npaire
-real(mytype), dimension(nx,ny,nzm) :: tz
-real(mytype), dimension(nx,ny,nz) :: uz,rz
-real(mytype), dimension(nx,ny) :: sz
-real(mytype), dimension(nzm) :: cifz6,cisz6,ciwz6
-integer :: i,j,k
-
-if (nclz) then
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,1)=aiciz6*(uz(i,j,2)+uz(i,j,1))&
-           +biciz6*(uz(i,j,3)+uz(i,j,nz))&
-           +ciciz6*(uz(i,j,4)+uz(i,j,nz-1))&
-           +diciz6*(uz(i,j,5)+uz(i,j,nz-2))
-      rz(i,j,1)=-1.
-      tz(i,j,2)=aiciz6*(uz(i,j,3)+uz(i,j,2))&
-           +biciz6*(uz(i,j,4)+uz(i,j,1))&
-           +ciciz6*(uz(i,j,5)+uz(i,j,nz))&
-           +diciz6*(uz(i,j,6)+uz(i,j,nz-1))
-      rz(i,j,2)=0.
-      tz(i,j,3)=aiciz6*(uz(i,j,4)+uz(i,j,3))&
-           +biciz6*(uz(i,j,5)+uz(i,j,2))&
-           +ciciz6*(uz(i,j,6)+uz(i,j,1))&
-           +diciz6*(uz(i,j,7)+uz(i,j,nz))
-      rz(i,j,3)=0.
-   enddo
-   enddo
-   do k=4,nz-4
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=aiciz6*(uz(i,j,k+1)+uz(i,j,k))&
-           +biciz6*(uz(i,j,k+2)+uz(i,j,k-1))&
-           +ciciz6*(uz(i,j,k+3)+uz(i,j,k-2))&
-           +diciz6*(uz(i,j,k+4)+uz(i,j,k-3))
-      rz(i,j,k)=0.
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,nz-3)=aiciz6*(uz(i,j,nz-2)+uz(i,j,nz-3))&
-           +biciz6*(uz(i,j,nz-1)+uz(i,j,nz-4))&
-           +ciciz6*(uz(i,j,nz)+uz(i,j,nz-5))&
-           +diciz6*(uz(i,j,1)+uz(i,j,nz-6))
-      rz(i,j,nz-3)=0.
-      tz(i,j,nz-2)=aiciz6*(uz(i,j,nz-1)+uz(i,j,nz-2))&
-           +biciz6*(uz(i,j,nz)+uz(i,j,nz-3))&
-           +ciciz6*(uz(i,j,1)+uz(i,j,nz-4))&
-           +diciz6*(uz(i,j,2)+uz(i,j,nz-5))
-      rz(i,j,nz-2)=0.
-      tz(i,j,nz-1)=aiciz6*(uz(i,j,nz)+uz(i,j,nz-1))&
-           +biciz6*(uz(i,j,1)+uz(i,j,nz-2))&
-           +ciciz6*(uz(i,j,2)+uz(i,j,nz-3))&
-           +diciz6*(uz(i,j,3)+uz(i,j,nz-4))
-      rz(i,j,nz-1)=0.
-      tz(i,j,nz)=aiciz6*(uz(i,j,1)+uz(i,j,nz))&
-           +biciz6*(uz(i,j,2)+uz(i,j,nz-1))&
-           +ciciz6*(uz(i,j,3)+uz(i,j,nz-2))&
-           +diciz6*(uz(i,j,4)+uz(i,j,nz-3))
-      rz(i  ,j,nz)=ailcaiz6
-   enddo
-   enddo
-   do k=2,nz
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*cisz6(k)
-      rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*cisz6(k)
-   enddo
-   enddo
-   enddo
-   do i=1,nx
-   do j=1,ny
-      tz(i,j,nz)=tz(i,j,nz)*ciwz6(nz)
-      rz(i,j,nz)=rz(i,j,nz)*ciwz6(nz)
-   enddo
-   enddo
-   do k=nz-1,1,-1
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=(tz(i,j,k)-cifz6(k)*tz(i,j,k+1))*ciwz6(k)
-      rz(i,j,k)=(rz(i,j,k)-cifz6(k)*rz(i,j,k+1))*ciwz6(k)
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      sz(i,j)=(tz(i,j,1)-ailcaiz6*tz(i,j,nz))/&
-           (1.+rz(i,j,1)-ailcaiz6*rz(i,j,nz))
-   enddo
-   enddo
-   do k=1,nz
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=tz(i,j,k)-sz(i,j)*rz(i,j,k)
-   enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,1)=aiciz6*(uz(i,j,2)+uz(i,j,1))&
-              +biciz6*(uz(i,j,3)+uz(i,j,2))&
-              +ciciz6*(uz(i,j,4)+uz(i,j,3))&             
-              +diciz6*(uz(i,j,5)+uz(i,j,4))             
-         tz(i,j,2)=aiciz6*(uz(i,j,3)+uz(i,j,2))&
-              +biciz6*(uz(i,j,4)+uz(i,j,1))&
-              +ciciz6*(uz(i,j,5)+uz(i,j,2))&             
-              +diciz6*(uz(i,j,6)+uz(i,j,3))             
-         tz(i,j,3)=aiciz6*(uz(i,j,4)+uz(i,j,3))&
-              +biciz6*(uz(i,j,5)+uz(i,j,2))&
-              +ciciz6*(uz(i,j,6)+uz(i,j,1))&             
-              +diciz6*(uz(i,j,7)+uz(i,j,2))             
-      enddo
-      enddo
-      do k=4,nzm-3
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=aiciz6*(uz(i,j,k+1)+uz(i,j,k))&
-              +biciz6*(uz(i,j,k+2)+uz(i,j,k-1))&
-              +ciciz6*(uz(i,j,k+3)+uz(i,j,k-2))&              
-              +diciz6*(uz(i,j,k+4)+uz(i,j,k-3))              
-      enddo
-      enddo
-      enddo
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nzm-2)=aiciz6*(uz(i,j,nzm-1)+uz(i,j,nzm-2))&
-              +biciz6*(uz(i,j,nzm)+uz(i,j,nzm-3))&
-              +ciciz6*(uz(i,j,nz)+uz(i,j,nzm-4))&         
-              +diciz6*(uz(i,j,nzm)+uz(i,j,nzm-5))         
-         tz(i,j,nzm-1)=aiciz6*(uz(i,j,nzm)+uz(i,j,nzm-1))&
-              +biciz6*(uz(i,j,nz)+uz(i,j,nzm-2))&
-              +ciciz6*(uz(i,j,nzm)+uz(i,j,nzm-3))&         
-              +diciz6*(uz(i,j,nzm-1)+uz(i,j,nzm-4))         
-         tz(i,j,nzm)=aiciz6*(uz(i,j,nz)+uz(i,j,nzm))&
-              +biciz6*(uz(i,j,nzm)+uz(i,j,nzm-1))&
-              +ciciz6*(uz(i,j,nzm-1)+uz(i,j,nzm-2))&            
-              +diciz6*(uz(i,j,nzm-2)+uz(i,j,nzm-3))            
-      enddo
-      enddo
-      do k=2,nzm
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*cisz6(k)
-      enddo
-      enddo
-      enddo
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nzm)=tz(i,j,nzm)*ciwz6(nzm)
-      enddo
-      enddo
-      do k=nzm-1,1,-1
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=(tz(i,j,k)-cifz6(k)*tz(i,j,k+1))*ciwz6(k)
-      enddo
-      enddo
-      enddo
-   endif
-endif
-
-return     
-end subroutine interzvp 
+subroutine interyvp(ty,uy,ry,sy,cify6,cisy6,ciwy6,nx,ny,nym,nz,npaire) 
+  !
+  !********************************************************************
 
-!********************************************************************
-!
-subroutine derzpv(tz,uz,rz,sz,cfiz6,csiz6,cwiz6,cfz6,csz6,cwz6,&
-     nx,ny,nzm,nz,npaire) 
-!
-!********************************************************************
-  
-USE param 
-USE derivZ 
-
-implicit none
-
-integer :: nx,nzm,ny,nz,npaire
-real(mytype), dimension(nx,ny,nz) :: tz
-real(mytype), dimension(nx,ny,nzm) :: uz,rz
-real(mytype), dimension(nx,ny) :: sz
-real(mytype), dimension(nz) :: cfiz6,csiz6,cwiz6
-real(mytype), dimension(nz) :: cfz6,csz6,cwz6
-integer :: i,j,k
-
-if (nclz) then
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,1)=aciz6*(uz(i,j,1)-uz(i,j,nz))&
-           +bciz6*(uz(i,j,2)-uz(i,j,nz-1))
-      rz(i,j,1)=-1.
-      tz(i,j,2)=aciz6*(uz(i,j,2)-uz(i,j,1))&
-           +bciz6*(uz(i,j,3)-uz(i,j,nz))
-      rz(i,j,2)=0.
-   enddo
-   enddo
-   do k=3,nz-2
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=aciz6*(uz(i,j,k)-uz(i,j,k-1))&
-           +bciz6*(uz(i,j,k+1)-uz(i,j,k-2))
-      rz(i,j,k)=0.
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,nz-1)=aciz6*(uz(i,j,nz-1)-uz(i,j,nz-2))&
-           +bciz6*(uz(i,j,nz)-uz(i,j,nz-3))
-      rz(i,j,nz-1)=0.
-      tz(i,j,nz)=aciz6*(uz(i,j,nz)-uz(i,j,nz-1))&
-           +bciz6*(uz(i,j,1)-uz(i,j,nz-2))
-      rz(i,j,nz)=alcaiz6
-   enddo
-   enddo
-   do k=2,nz
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csz6(k)
-      rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*csz6(k)
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,nz)=tz(i,j,nz)*cwz6(nz)
-      rz(i,j,nz)=rz(i,j,nz)*cwz6(nz)
-   enddo
-   enddo
-   do k=nz-1,1,-1
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=(tz(i,j,k)-cfz6(k)*tz(i,j,k+1))*cwz6(k)
-      rz(i,j,k)=(rz(i,j,k)-cfz6(k)*rz(i,j,k+1))*cwz6(k)
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      sz(i,j)=(tz(i,j,1)-alcaiz6*tz(i,j,nz))/&
-           (1.+rz(i,j,1)-alcaiz6*rz(i,j,nz))
-   enddo
-   enddo
-   do k=1,nz
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=tz(i,j,k)-sz(i,j)*rz(i,j,k)
-   enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,1)=0.
-         tz(i,j,2)=aciz6*(uz(i,j,2)-uz(i,j,1))&
-              +bciz6*(uz(i,j,3)-uz(i,j,1))  
-      enddo
-      enddo
-      do k=3,nz-2
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=aciz6*(uz(i,j,k)-uz(i,j,k-1))&
-              +bciz6*(uz(i,j,k+1)-uz(i,j,k-2))
-      enddo
-      enddo
-      enddo
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nz-1)=aciz6*(uz(i,j,nz-1)-uz(i,j,nz-2))&
-              +bciz6*(uz(i,j,nz-1)-uz(i,j,nz-3))
-         tz(i,j,nz)=0.
-      enddo
-      enddo
-      do k=2,nz
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csiz6(k)
-      enddo
-      enddo
-      enddo
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nz)=tz(i,j,nz)*cwiz6(nz)
-      enddo
-      enddo
-      do k=nz-1,1,-1
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=(tz(i,j,k)-cfiz6(k)*tz(i,j,k+1))*cwiz6(k)
-      enddo
-      enddo
-      enddo
-   endif
-endif
-
-return
-end subroutine derzpv 
+  USE param 
+  USE derivY 
+
+  implicit none
+
+  integer :: nx,ny,nym,nz,npaire
+  real(mytype), dimension(nx,nym,nz) :: ty
+  real(mytype), dimension(nx,ny,nz) :: uy,ry
+  real(mytype), dimension(nx,nz) :: sy
+  real(mytype), dimension(nym) :: cify6,cisy6,ciwy6
+  integer :: i,j,k
+
+  if (ncly) then
+     do k=1,nz 
+        do i=1,nx 
+           ty(i,1,k)=aiciy6*(uy(i,2,k)+uy(i,1,k))&
+                +biciy6*(uy(i,3,k)+uy(i,ny,k))&
+                +ciciy6*(uy(i,4,k)+uy(i,ny-1,k))&
+                +diciy6*(uy(i,5,k)+uy(i,ny-2,k)) 
+           ry(i,1,k)=-1.
+           ty(i,2,k)=aiciy6*(uy(i,3,k)+uy(i,2,k))&
+                +biciy6*(uy(i,4,k)+uy(i,1,k))&
+                +ciciy6*(uy(i,5,k)+uy(i,ny,k))& 
+                +diciy6*(uy(i,6,k)+uy(i,ny-1,k)) 
+           ry(i,2,k)=0. 
+           ty(i,3,k)=aiciy6*(uy(i,4,k)+uy(i,3,k))&
+                +biciy6*(uy(i,5,k)+uy(i,2,k))&
+                +ciciy6*(uy(i,6,k)+uy(i,1,k))& 
+                +diciy6*(uy(i,7,k)+uy(i,ny,k)) 
+           ry(i,3,k)=0. 
+        enddo
+     enddo
+     do k=1,nz
+        do j=4,ny-4
+           do i=1,nx 
+              ty(i,j,k)=aiciy6*(uy(i,j+1,k)+uy(i,j,k))&
+                   +biciy6*(uy(i,j+2,k)+uy(i,j-1,k))&
+                   +ciciy6*(uy(i,j+3,k)+uy(i,j-2,k))& 
+                   +diciy6*(uy(i,j+4,k)+uy(i,j-3,k)) 
+              ry(i,j,k)=0. 
+           enddo
+        enddo
+     enddo
+     do k=1,nz 
+        do i=1,nx 
+           ty(i,ny-3,k)=aiciy6*(uy(i,ny-2,k)+uy(i,ny-3,k))&
+                +biciy6*(uy(i,ny-1,k)+uy(i,ny-4,k))&
+                +ciciy6*(uy(i,ny,k)+uy(i,ny-5,k))&
+                +diciy6*(uy(i,1,k)+uy(i,ny-6,k))
+           ry(i,ny-3,k)=0.
+           ty(i,ny-2,k)=aiciy6*(uy(i,ny-1,k)+uy(i,ny-2,k))&
+                +biciy6*(uy(i,ny,k)+uy(i,ny-3,k))&
+                +ciciy6*(uy(i,1,k)+uy(i,ny-4,k))&
+                +diciy6*(uy(i,2,k)+uy(i,ny-5,k))
+           ry(i,ny-2,k)=0.
+           ty(i,ny-1,k)=aiciy6*(uy(i,ny,k)+uy(i,ny-1,k))&
+                +biciy6*(uy(i,1,k)+uy(i,ny-2,k))&
+                +ciciy6*(uy(i,2,k)+uy(i,ny-3,k))&
+                +diciy6*(uy(i,3,k)+uy(i,ny-4,k)) 
+           ry(i,ny-1,k)=0. 
+           ty(i,ny,k)=aiciy6*(uy(i,1,k)+uy(i,ny,k))&
+                +biciy6*(uy(i,2,k)+uy(i,ny-1,k))&
+                +ciciy6*(uy(i,3,k)+uy(i,ny-2,k))& 
+                +diciy6*(uy(i,4,k)+uy(i,ny-3,k)) 
+           ry(i,ny,k)=ailcaiy6
+        enddo
+     enddo
+     do k=1,nz
+        do j=2,ny  
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*cisy6(j) 
+              ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*cisy6(j) 
+           enddo
+        enddo
+     enddo
+     do k=1,nz 
+        do i=1,nx 
+           ty(i,ny,k)=ty(i,ny,k)*ciwy6(ny) 
+           ry(i,ny,k)=ry(i,ny,k)*ciwy6(ny) 
+        enddo
+     enddo
+     do k=1,nz 
+        do j=ny-1,1,-1 
+           do i=1,nx 
+              ty(i,j,k)=(ty(i,j,k)-cify6(j)*ty(i,j+1,k))*ciwy6(j) 
+              ry(i,j,k)=(ry(i,j,k)-cify6(j)*ry(i,j+1,k))*ciwy6(j) 
+           enddo
+        enddo
+     enddo
+     do k=1,nz 
+        do i=1,nx 
+           sy(i,k)=(ty(i,1,k)-ailcaiy6*ty(i,ny,k))&
+                /(1.+ry(i,1,k)-ailcaiy6*ry(i,ny,k)) 
+        enddo
+     enddo
+     do k=1,nz
+        do j=1,ny  
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)-sy(i,k)*ry(i,j,k) 
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do k=1,nz
+           do i=1,nx
+              ty(i,1,k)=aiciy6*(uy(i,2,k)+uy(i,1,k))&
+                   +biciy6*(uy(i,3,k)+uy(i,2,k))&
+                   +ciciy6*(uy(i,4,k)+uy(i,3,k))&             
+                   +diciy6*(uy(i,5,k)+uy(i,4,k))             
+              ty(i,2,k)=aiciy6*(uy(i,3,k)+uy(i,2,k))&
+                   +biciy6*(uy(i,4,k)+uy(i,1,k))&
+                   +ciciy6*(uy(i,5,k)+uy(i,2,k))&             
+                   +diciy6*(uy(i,6,k)+uy(i,3,k))             
+              ty(i,3,k)=aiciy6*(uy(i,4,k)+uy(i,3,k))&
+                   +biciy6*(uy(i,5,k)+uy(i,2,k))&
+                   +ciciy6*(uy(i,6,k)+uy(i,1,k))&             
+                   +diciy6*(uy(i,7,k)+uy(i,2,k))             
+           enddo
+        enddo
+        do k=1,nz
+           do j=4,nym-3
+              do i=1,nx
+                 ty(i,j,k)=aiciy6*(uy(i,j+1,k)+uy(i,j,k))&
+                      +biciy6*(uy(i,j+2,k)+uy(i,j-1,k))&
+                      +ciciy6*(uy(i,j+3,k)+uy(i,j-2,k))&              
+                      +diciy6*(uy(i,j+4,k)+uy(i,j-3,k))              
+              enddo
+           enddo
+        enddo
+        do k=1,nz
+           do i=1,nx
+              ty(i,nym-2,k)=aiciy6*(uy(i,nym-1,k)+uy(i,nym-2,k))&
+                   +biciy6*(uy(i,nym,k)+uy(i,nym-3,k))&
+                   +ciciy6*(uy(i,ny,k)+uy(i,nym-4,k))&         
+                   +diciy6*(uy(i,nym,k)+uy(i,nym-5,k))         
+              ty(i,nym-1,k)=aiciy6*(uy(i,nym,k)+uy(i,nym-1,k))&
+                   +biciy6*(uy(i,ny,k)+uy(i,nym-2,k))&
+                   +ciciy6*(uy(i,nym,k)+uy(i,nym-3,k))&         
+                   +diciy6*(uy(i,nym-1,k)+uy(i,nym-4,k))         
+              ty(i,nym,k)=aiciy6*(uy(i,ny,k)+uy(i,nym,k))&
+                   +biciy6*(uy(i,nym,k)+uy(i,nym-1,k))&
+                   +ciciy6*(uy(i,nym-1,k)+uy(i,nym-2,k))&            
+                   +diciy6*(uy(i,nym-2,k)+uy(i,nym-3,k))            
+           enddo
+        enddo
+        do k=1,nz
+           do j=2,nym
+              do i=1,nx
+                 ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*cisy6(j)
+              enddo
+           enddo
+        enddo
+        do k=1,nz
+           do i=1,nx
+              ty(i,nym,k)=ty(i,nym,k)*ciwy6(nym)
+           enddo
+        enddo
+        do k=1,nz
+           do j=nym-1,1,-1
+              do i=1,nx
+                 ty(i,j,k)=(ty(i,j,k)-cify6(j)*ty(i,j+1,k))*ciwy6(j)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
+
+  return
+end subroutine interyvp
+
+!********************************************************************
+!
+subroutine deryvp(ty,uy,ry,sy,cfy6,csy6,cwy6,ppyi,nx,ny,nym,nz,npaire) 
+  !
+  !********************************************************************
+
+  USE param
+  USE derivY 
+
+  implicit none
+
+  integer :: nx,ny,nym,nz,npaire
+  real(mytype), dimension(nx,nym,nz) :: ty
+  real(mytype), dimension(nx,ny,nz) :: uy
+  real(mytype), dimension(nx,ny,nz) :: ry
+  real(mytype), dimension(nx,nz) :: sy
+  real(mytype), dimension(nym) :: cfy6,csy6,cwy6,ppyi
+  integer :: i,j,k
+
+  if (ncly) then
+     do k=1,nz 
+        do i=1,nx 
+           ty(i,1,k)=aciy6*(uy(i,2,k)-uy(i,1,k))&
+                +bciy6*(uy(i,3,k)-uy(i,ny,k)) 
+           ry(i,1,k)=-1.
+           ty(i,2,k)=aciy6*(uy(i,3,k)-uy(i,2,k))&
+                +bciy6*(uy(i,4,k)-uy(i,1,k)) 
+           ry(i,2,k)=0. 
+        enddo
+     enddo
+     do k=1,nz
+        do j=3,ny-2  
+           do i=1,nx 
+              ty(i,j,k)=aciy6*(uy(i,j+1,k)-uy(i,j,k))&
+                   +bciy6*(uy(i,j+2,k)-uy(i,j-1,k)) 
+              ry(i,j,k)=0.
+           enddo
+        enddo
+     enddo
+     do k=1,nz 
+        do i=1,nx 
+           ty(i,ny-1,k)=aciy6*(uy(i,ny,k)-uy(i,ny-1,k))&
+                +bciy6*(uy(i,1,k)-uy(i,ny-2,k)) 
+           ry(i,ny-1,k)=0. 
+           ty(i,ny,k)=aciy6*(uy(i,1,k)-uy(i,ny,k))&
+                +bciy6*(uy(i,2,k)-uy(i,ny-1,k)) 
+           ry(i,ny,k)=alcaiy6
+        enddo
+     enddo
+     do k=1,nz
+        do j=2,ny  
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*csy6(j) 
+              ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*csy6(j) 
+           enddo
+        enddo
+     enddo
+     do k=1,nz 
+        do i=1,nx 
+           ty(i,ny,k)=ty(i,ny,k)*cwy6(ny) 
+           ry(i,ny,k)=ry(i,ny,k)*cwy6(ny) 
+        enddo
+     enddo
+     do k=1,nz
+        do j=ny-1,1,-1  
+           do i=1,nx 
+              ty(i,j,k)=(ty(i,j,k)-cfy6(j)*ty(i,j+1,k))*cwy6(j) 
+              ry(i,j,k)=(ry(i,j,k)-cfy6(j)*ry(i,j+1,k))*cwy6(j) 
+           enddo
+        enddo
+     enddo
+     do k=1,nz 
+        do i=1,nx 
+           sy(i,k)=(ty(i,1,k)-alcaiy6*ty(i,ny,k))&
+                /(1.+ry(i,1,k)-alcaiy6*ry(i,ny,k)) 
+        enddo
+     enddo
+     do k=1,nz
+        do j=1,ny  
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)-sy(i,k)*ry(i,j,k) 
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==0) then
+        do k=1,nz
+           do i=1,nx
+              ty(i,1,k)=aciy6*(uy(i,2,k)-uy(i,1,k))&
+                   +bciy6*(uy(i,3,k)-2.*uy(i,1,k)+uy(i,2,k)) 
+              ty(i,2,k)=aciy6*(uy(i,3,k)-uy(i,2,k))&
+                   +bciy6*(uy(i,4,k)-uy(i,1,k))
+           enddo
+        enddo
+        do k=1,nz
+           do j=3,nym-2
+              do i=1,nx
+                 ty(i,j,k)=aciy6*(uy(i,j+1,k)-uy(i,j,k))&
+                      +bciy6*(uy(i,j+2,k)-uy(i,j-1,k))
+              enddo
+           enddo
+        enddo
+        do k=1,nz
+           do i=1,nx
+              ty(i,nym-1,k)=aciy6*(uy(i,nym,k)-uy(i,nym-1,k))&
+                   +bciy6*(uy(i,ny,k)-uy(i,nym-2,k)) 
+              ty(i,nym,k)=aciy6*(uy(i,ny,k)-uy(i,nym,k))&
+                   +bciy6*(2.*uy(i,ny,k)-uy(i,nym,k)-uy(i,nym-1,k)) 
+           enddo
+        enddo
+        do k=1,nz
+           do j=2,nym
+              do i=1,nx
+                 ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*csy6(j)
+              enddo
+           enddo
+        enddo
+        do k=1,nz
+           do i=1,nx
+              ty(i,nym,k)=ty(i,nym,k)*cwy6(nym)
+           enddo
+        enddo
+        do k=1,nz
+           do j=nym-1,1,-1
+              do i=1,nx
+                 ty(i,j,k)=(ty(i,j,k)-cfy6(j)*ty(i,j+1,k))*cwy6(j)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
+
+  if (istret.ne.0) then
+     do k=1,nz
+        do j=1,nym
+           do i=1,nx
+              ty(i,j,k)=ty(i,j,k)*ppyi(j)
+           enddo
+        enddo
+     enddo
+  endif
+
+  return
+end subroutine deryvp
+
+!********************************************************************
+!
+subroutine interypv(ty,uy,ry,sy,cifi6y,cisi6y,ciwi6y,cify6,cisy6,ciwy6,&
+     nx,nym,ny,nz,npaire) 
+  !
+  !********************************************************************
+
+  USE param
+  USE derivY
+
+  implicit none
+
+  integer :: nx,ny,nym,nz,npaire
+  real(mytype), dimension(nx,ny,nz) :: ty
+  real(mytype), dimension(nx,nym,nz) :: uy
+  real(mytype), dimension(nx,ny,nz) :: ry
+  real(mytype), dimension(nx,nz) :: sy
+  real(mytype), dimension(ny) :: cifi6y,cisi6y,ciwi6y
+  real(mytype), dimension(ny) :: cify6,cisy6,ciwy6
+  integer :: i,j,k
+
+  if (ncly) then
+     do k=1,nz
+        do i=1,nx
+           ty(i,1,k)=aiciy6*(uy(i,1,k)+uy(i,ny,k))&
+                +biciy6*(uy(i,2,k)+uy(i,ny-1,k))&
+                +ciciy6*(uy(i,3,k)+uy(i,ny-2,k))&
+                +diciy6*(uy(i,4,k)+uy(i,ny-3,k))
+           ry(i,1,k)=-1.
+           ty(i,2,k)=aiciy6*(uy(i,2,k)+uy(i,1,k))&
+                +biciy6*(uy(i,3,k)+uy(i,ny,k))&
+                +ciciy6*(uy(i,4,k)+uy(i,ny-1,k))&
+                +diciy6*(uy(i,5,k)+uy(i,ny-2,k))
+           ry(i,2,k)=0.
+           ty(i,3,k)=aiciy6*(uy(i,3,k)+uy(i,2,k))&
+                +biciy6*(uy(i,4,k)+uy(i,1,k))&
+                +ciciy6*(uy(i,5,k)+uy(i,ny,k))&
+                +diciy6*(uy(i,6,k)+uy(i,ny-1,k))
+           ry(i,3,k)=0.
+           ty(i,4,k)=aiciy6*(uy(i,4,k)+uy(i,3,k))&
+                +biciy6*(uy(i,5,k)+uy(i,2,k))&
+                +ciciy6*(uy(i,6,k)+uy(i,1,k))&
+                +diciy6*(uy(i,7,k)+uy(i,ny,k))
+           ry(i,4,k)=0.
+        enddo
+     enddo
+     do k=1,nz
+        do j=5,ny-3
+           do i=1,nx
+              ty(i,j,k)=aiciy6*(uy(i,j,k)+uy(i,j-1,k))&
+                   +biciy6*(uy(i,j+1,k)+uy(i,j-2,k))&
+                   +ciciy6*(uy(i,j+2,k)+uy(i,j-3,k))&
+                   +diciy6*(uy(i,j+3,k)+uy(i,j-4,k))
+              ry(i,j,k)=0.
+           enddo
+        enddo
+     enddo
+     do k=1,nz
+        do i=1,nx
+           ty(i,ny-2,k)=aiciy6*(uy(i,ny-2,k)+uy(i,ny-3,k))&
+                +biciy6*(uy(i,ny-1,k)+uy(i,ny-4,k))&
+                +ciciy6*(uy(i,ny,k)+uy(i,ny-5,k))&
+                +diciy6*(uy(i,1,k)+uy(i,ny-6,k))
+           ry(i,ny-2,k)=0.
+           ty(i,ny-1,k)=aiciy6*(uy(i,ny-1,k)+uy(i,ny-2,k))&
+                +biciy6*(uy(i,ny,k)+uy(i,ny-3,k))&
+                +ciciy6*(uy(i,1,k)+uy(i,ny-4,k))&
+                +diciy6*(uy(i,2,k)+uy(i,ny-5,k))
+           ry(i,ny-1,k)=0.
+           ty(i,ny,k)=aiciy6*(uy(i,ny,k)+uy(i,ny-1,k))&
+                +biciy6*(uy(i,1,k)+uy(i,ny-2,k))&
+                +ciciy6*(uy(i,2,k)+uy(i,ny-3,k))&
+                +diciy6*(uy(i,3,k)+uy(i,ny-4,k))
+           ry(i,ny,k)=ailcaiy6
+        enddo
+     enddo
+     do k=1,nz
+        do j=2,ny
+           do i=1,nx
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*cisy6(j)
+              ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*cisy6(j)
+           enddo
+        enddo
+     enddo
+     do k=1,nz
+        do i=1,nx
+           ty(i,ny,k)=ty(i,ny,k)*ciwy6(ny)
+           ry(i,ny,k)=ry(i,ny,k)*ciwy6(ny)
+        enddo
+     enddo
+     do j=ny-1,1,-1
+        do k=1,nz
+           do i=1,nx
+              ty(i,j,k)=(ty(i,j,k)-cify6(j)*ty(i,j+1,k))*ciwy6(j)
+              ry(i,j,k)=(ry(i,j,k)-cify6(j)*ry(i,j+1,k))*ciwy6(j)
+           enddo
+        enddo
+     enddo
+     do k=1,nz
+        do i=1,nx
+           sy(i,k)=(ty(i,1,k)-ailcaiy6*ty(i,ny,k))/&
+                (1.+ry(i,1,k)-ailcaiy6*ry(i,ny,k))
+        enddo
+     enddo
+     do j=1,ny
+        do k=1,nz
+           do i=1,nx
+              ty(i,j,k)=ty(i,j,k)-sy(i,k)*ry(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do k=1,nz
+           do i=1,nx
+              ty(i,1,k)=aiciy6*(uy(i,1,k)+uy(i,1,k))&
+                   +biciy6*(uy(i,2,k)+uy(i,2,k))&
+                   +ciciy6*(uy(i,3,k)+uy(i,3,k))&
+                   +diciy6*(uy(i,4,k)+uy(i,4,k))             
+              ty(i,2,k)=aiciy6*(uy(i,2,k)+uy(i,1,k))&
+                   +biciy6*(uy(i,3,k)+uy(i,1,k))&
+                   +ciciy6*(uy(i,4,k)+uy(i,2,k))&
+                   +diciy6*(uy(i,5,k)+uy(i,3,k))
+              ty(i,3,k)=aiciy6*(uy(i,3,k)+uy(i,2,k))&
+                   +biciy6*(uy(i,4,k)+uy(i,1,k))&
+                   +ciciy6*(uy(i,5,k)+uy(i,1,k))&
+                   +diciy6*(uy(i,6,k)+uy(i,2,k))             
+              ty(i,4,k)=aiciy6*(uy(i,4,k)+uy(i,3,k))&
+                   +biciy6*(uy(i,5,k)+uy(i,2,k))&
+                   +ciciy6*(uy(i,6,k)+uy(i,1,k))&
+                   +diciy6*(uy(i,7,k)+uy(i,1,k))             
+           enddo
+        enddo
+        do j=5,ny-4
+           do k=1,nz
+              do i=1,nx
+                 ty(i,j,k)=aiciy6*(uy(i,j,k)+uy(i,j-1,k))&
+                      +biciy6*(uy(i,j+1,k)+uy(i,j-2,k))&
+                      +ciciy6*(uy(i,j+2,k)+uy(i,j-3,k))&
+                      +diciy6*(uy(i,j+3,k)+uy(i,j-4,k))              
+              enddo
+           enddo
+        enddo
+        do k=1,nz
+           do i=1,nx
+              ty(i,ny-3,k)=aiciy6*(uy(i,ny-3,k)+uy(i,ny-4,k))&
+                   +biciy6*(uy(i,ny-2,k)+uy(i,ny-5,k))&
+                   +ciciy6*(uy(i,ny-1,k)+uy(i,ny-6,k))&
+                   +diciy6*(uy(i,ny-1,k)+uy(i,ny-7,k))   
+              ty(i,ny-2,k)=aiciy6*(uy(i,ny-2,k)+uy(i,ny-3,k))&
+                   +biciy6*(uy(i,ny-1,k)+uy(i,ny-4,k))&
+                   +ciciy6*(uy(i,ny-1,k)+uy(i,ny-5,k))&
+                   +diciy6*(uy(i,ny-2,k)+uy(i,ny-6,k))   
+              ty(i,ny-1,k)=aiciy6*(uy(i,ny-1,k)+uy(i,ny-2,k))&
+                   +biciy6*(uy(i,ny-1,k)+uy(i,ny-3,k))&
+                   +ciciy6*(uy(i,ny-2,k)+uy(i,ny-4,k))&
+                   +diciy6*(uy(i,ny-3,k)+uy(i,ny-5,k))         
+              ty(i,ny,k)=aiciy6*(uy(i,ny-1,k)+uy(i,ny-1,k))&
+                   +biciy6*(uy(i,ny-2,k)+uy(i,ny-2,k))&
+                   +ciciy6*(uy(i,ny-3,k)+uy(i,ny-3,k))&
+                   +diciy6*(uy(i,ny-4,k)+uy(i,ny-4,k))            
+           enddo
+        enddo
+        do j=2,ny
+           do k=1,nz
+              do i=1,nx
+                 ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*cisi6y(j)
+              enddo
+           enddo
+        enddo
+        do k=1,nz
+           do i=1,nx
+              ty(i,ny,k)=ty(i,ny,k)*ciwi6y(ny)
+           enddo
+        enddo
+        do j=ny-1,1,-1
+           do k=1,nz
+              do i=1,nx
+                 ty(i,j,k)=(ty(i,j,k)-cifi6y(j)*ty(i,j+1,k))*ciwi6y(j)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
+
+  return  
+end subroutine interypv
+
+!********************************************************************
+!
+subroutine derypv(ty,uy,ry,sy,cfi6y,csi6y,cwi6y,cfy6,csy6,cwy6,&
+     ppy,nx,nym,ny,nz,npaire) 
+  !
+  !********************************************************************
+
+  USE param
+  USE derivY 
+
+  implicit none
+
+  integer :: nx,ny,nym,nz,npaire
+  real(mytype), dimension(nx,ny,nz) :: ty
+  real(mytype), dimension(nx,nym,nz) :: uy
+  real(mytype), dimension(nx,ny,nz) :: ry
+  real(mytype), dimension(nx,nz) :: sy
+  real(mytype), dimension(ny) :: cfi6y,csi6y,cwi6y,ppy
+  real(mytype), dimension(nym) :: cfy6,csy6,cwy6
+  integer :: i,j,k
+
+  if (ncly) then
+     do k=1,nz 
+        do i=1,nx 
+           ty(i,1,k)=aciy6*(uy(i,1,k)-uy(i,ny,k))&
+                +bciy6*(uy(i,2,k)-uy(i,ny-1,k)) 
+           ry(i,1,k)=-1.
+           ty(i,2,k)=aciy6*(uy(i,2,k)-uy(i,1,k))&
+                +bciy6*(uy(i,3,k)-uy(i,ny,k)) 
+           ry(i,2,k)=0.
+        enddo
+     enddo
+     do j=3,ny-2 
+        do k=1,nz 
+           do i=1,nx 
+              ty(i,j,k)=aciy6*(uy(i,j,k)-uy(i,j-1,k))&
+                   +bciy6*(uy(i,j+1,k)-uy(i,j-2,k)) 
+              ry(i,j,k)=0. 
+           enddo
+        enddo
+     enddo
+     do k=1,nz 
+        do i=1,nx 
+           ty(i,ny-1,k)=aciy6*(uy(i,ny-1,k)-uy(i,ny-2,k))&
+                +bciy6*(uy(i,ny,k)-uy(i,ny-3,k)) 
+           ry(i,ny-1,k)=0. 
+           ty(i,ny,k)=aciy6*(uy(i,ny,k)-uy(i,ny-1,k))&
+                +bciy6*(uy(i,1,k)-uy(i,ny-2,k)) 
+           ry(i,ny,k)=alcaiy6
+        enddo
+     enddo
+     do j=2,ny 
+        do k=1,nz 
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*csy6(j) 
+              ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*csy6(j) 
+           enddo
+        enddo
+     enddo
+     do k=1,nz 
+        do i=1,nx 
+           ty(i,ny,k)=ty(i,ny,k)*cwy6(ny) 
+           ry(i,ny,k)=ry(i,ny,k)*cwy6(ny) 
+        enddo
+     enddo
+     do j=ny-1,1,-1 
+        do k=1,nz 
+           do i=1,nx 
+              ty(i,j,k)=(ty(i,j,k)-cfy6(j)*ty(i,j+1,k))*cwy6(j) 
+              ry(i,j,k)=(ry(i,j,k)-cfy6(j)*ry(i,j+1,k))*cwy6(j) 
+           enddo
+        enddo
+     enddo
+     do k=1,nz 
+        do i=1,nx 
+           sy(i,k)=(ty(i,1,k)-alcaiy6*ty(i,ny,k))&
+                /(1.+ry(i,1,k)-alcaiy6*ry(i,ny,k)) 
+        enddo
+     enddo
+     do j=1,ny 
+        do k=1,nz 
+           do i=1,nx
+              ty(i,j,k)=ty(i,j,k)-sy(i,k)*ry(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do k=1,nz
+           do i=1,nx
+              ty(i,1,k)=0.            
+              ty(i,2,k)=aciy6*(uy(i,2,k)-uy(i,1,k))&
+                   +bciy6*(uy(i,3,k)-uy(i,1,k))             
+           enddo
+        enddo
+        do j=3,ny-2
+           do k=1,nz
+              do i=1,nx
+                 ty(i,j,k)=aciy6*(uy(i,j,k)-uy(i,j-1,k))&
+                      +bciy6*(uy(i,j+1,k)-uy(i,j-2,k))              
+              enddo
+           enddo
+        enddo
+        do k=1,nz
+           do i=1,nx
+              ty(i,ny-1,k)=aciy6*(uy(i,ny-1,k)-uy(i,ny-2,k))&
+                   +bciy6*(uy(i,ny-1,k)-uy(i,ny-3,k))         
+              ty(i,ny,k)=0.          
+           enddo
+        enddo
+        do j=2,ny
+           do k=1,nz
+              do i=1,nx
+                 ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*csi6y(j)
+              enddo
+           enddo
+        enddo
+        do k=1,nz
+           do i=1,nx
+              ty(i,ny,k)=ty(i,ny,k)*cwi6y(ny)
+           enddo
+        enddo
+        do j=ny-1,1,-1
+           do k=1,nz
+              do i=1,nx
+                 ty(i,j,k)=(ty(i,j,k)-cfi6y(j)*ty(i,j+1,k))*cwi6y(j)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
+
+  if (istret.ne.0) then   
+     do k=1,nz 
+        do j=1,ny
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)*ppy(j) 
+           enddo
+        enddo
+     enddo
+  endif
+
+  return  
+end subroutine derypv
+
+!********************************************************************
+!
+subroutine derzvp(tz,uz,rz,sz,cfz6,csz6,cwz6,nx,ny,nz,nzm,npaire) 
+  !
+  !********************************************************************
+
+  USE param
+  USE derivZ 
+
+  implicit none
+
+  integer :: nx,ny,nz,nzm,npaire
+  real(mytype), dimension(nx,ny,nzm) :: tz
+  real(mytype), dimension(nx,ny,nz) :: uz
+  real(mytype), dimension(nx,ny,nz) :: rz
+  real(mytype), dimension(nx,ny) :: sz
+  real(mytype), dimension(nzm) :: cfz6,csz6,cwz6
+  integer :: i,j,k
+
+  if (nclz) then
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,1)=aciz6*(uz(i,j,2)-uz(i,j,1))&
+                +bciz6*(uz(i,j,3)-uz(i,j,nz))
+           rz(i,j,1)=-1.
+           tz(i,j,2)=aciz6*(uz(i,j,3)-uz(i,j,2))&
+                +bciz6*(uz(i,j,4)-uz(i,j,1))
+           rz(i,j,2)=0.
+        enddo
+     enddo
+     do k=3,nz-2
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=aciz6*(uz(i,j,k+1)-uz(i,j,k))&
+                   +bciz6*(uz(i,j,k+2)-uz(i,j,k-1))
+              rz(i,j,k)=0.
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,nz-1)=aciz6*(uz(i,j,nz)-uz(i,j,nz-1))&
+                +bciz6*(uz(i,j,1)-uz(i,j,nz-2))
+           rz(i,j,nz-1)=0.
+           tz(i,j,nz)=aciz6*(uz(i,j,1)-uz(i,j,nz))&
+                +bciz6*(uz(i,j,2)-uz(i,j,nz-1))
+           rz(i  ,j,nz)=alcaiz6
+        enddo
+     enddo
+     do k=2,nz
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csz6(k)
+              rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*csz6(k)
+           enddo
+        enddo
+     enddo
+     do i=1,nx
+        do j=1,ny
+           tz(i,j,nz)=tz(i,j,nz)*cwz6(nz)
+           rz(i,j,nz)=rz(i,j,nz)*cwz6(nz)
+        enddo
+     enddo
+     do k=nz-1,1,-1
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=(tz(i,j,k)-cfz6(k)*tz(i,j,k+1))*cwz6(k)
+              rz(i,j,k)=(rz(i,j,k)-cfz6(k)*rz(i,j,k+1))*cwz6(k)
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           sz(i,j)=(tz(i,j,1)-alcaiz6*tz(i,j,nz))/&
+                (1.+rz(i,j,1)-alcaiz6*rz(i,j,nz))
+        enddo
+     enddo
+     do k=1,nz
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=tz(i,j,k)-sz(i,j)*rz(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,1)=aciz6*(uz(i,j,2)-uz(i,j,1))&
+                   +bciz6*(uz(i,j,3)-uz(i,j,2))             
+              tz(i,j,2)=aciz6*(uz(i,j,3)-uz(i,j,2))&
+                   +bciz6*(uz(i,j,4)-uz(i,j,1))             
+           enddo
+        enddo
+        do k=3,nzm-2
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=aciz6*(uz(i,j,k+1)-uz(i,j,k))&
+                      +bciz6*(uz(i,j,k+2)-uz(i,j,k-1))              
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nzm-1)=aciz6*(uz(i,j,nzm)-uz(i,j,nzm-1))&
+                   +bciz6*(uz(nz,j,k)-uz(nzm-2,j,k))         
+              tz(i,j,nzm)=aciz6*(uz(i,j,nz)-uz(i,j,nzm))&
+                   +bciz6*(uz(i,j,nzm)-uz(i,j,nzm-1))            
+           enddo
+        enddo
+        do k=2,nzm
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csz6(k)
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nzm)=tz(i,j,nzm)*cwz6(nzm)
+           enddo
+        enddo
+        do k=nzm-1,1,-1
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=(tz(i,j,k)-cfz6(k)*tz(i,j,k+1))*cwz6(k)
+              enddo
+           enddo
+        enddo
+     endif
+     if (npaire==0) then
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,1)=aciz6*(uz(i,j,2)-uz(i,j,1))&
+                   +bciz6*(uz(i,j,3)-2.*uz(i,j,1)+uz(i,j,2)) 
+              tz(i,j,2)=aciz6*(uz(i,j,3)-uz(i,j,2))&
+                   +bciz6*(uz(i,j,4)-uz(i,j,1))
+           enddo
+        enddo
+        do k=3,nzm-2
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=aciz6*(uz(i,j,k+1)-uz(i,j,k))&
+                      +bciz6*(uz(i,j,k+2)-uz(i,j,k-1))
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nzm-1)=aciz6*(uz(i,j,nz-1)-uz(i,j,nz-2))&
+                   +bciz6*(uz(i,j,nz)-uz(i,j,nz-3)) 
+              tz(i,j,nzm)=aciz6*(uz(i,j,nz)-uz(i,j,nz-1))&
+                   +bciz6*(2.*uz(i,j,nz)-uz(i,j,nz-1)-uz(i,j,nz-2)) 
+           enddo
+        enddo
+        do k=2,nzm
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csz6(k)
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nzm)=tz(i,j,nzm)*cwz6(nzm)
+           enddo
+        enddo
+        do k=nzm-1,1,-1
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=(tz(i,j,k)-cfz6(k)*tz(i,j,k+1))*cwz6(k)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
+
+  return     
+end subroutine derzvp
 
 !********************************************************************
 !
-subroutine interzpv(tz,uz,rz,sz,cifiz6,cisiz6,ciwiz6,cifz6,cisz6,ciwz6,&
-     nx,ny,nzm,nz,npaire) 
+subroutine interzvp(tz,uz,rz,sz,cifz6,cisz6,ciwz6,nx,ny,nz,nzm,npaire) 
+  !
+  !********************************************************************
+
+  USE param 
+  USE derivZ 
+
+  implicit none
+
+  integer :: nx,ny,nz,nzm,npaire
+  real(mytype), dimension(nx,ny,nzm) :: tz
+  real(mytype), dimension(nx,ny,nz) :: uz,rz
+  real(mytype), dimension(nx,ny) :: sz
+  real(mytype), dimension(nzm) :: cifz6,cisz6,ciwz6
+  integer :: i,j,k
+
+  if (nclz) then
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,1)=aiciz6*(uz(i,j,2)+uz(i,j,1))&
+                +biciz6*(uz(i,j,3)+uz(i,j,nz))&
+                +ciciz6*(uz(i,j,4)+uz(i,j,nz-1))&
+                +diciz6*(uz(i,j,5)+uz(i,j,nz-2))
+           rz(i,j,1)=-1.
+           tz(i,j,2)=aiciz6*(uz(i,j,3)+uz(i,j,2))&
+                +biciz6*(uz(i,j,4)+uz(i,j,1))&
+                +ciciz6*(uz(i,j,5)+uz(i,j,nz))&
+                +diciz6*(uz(i,j,6)+uz(i,j,nz-1))
+           rz(i,j,2)=0.
+           tz(i,j,3)=aiciz6*(uz(i,j,4)+uz(i,j,3))&
+                +biciz6*(uz(i,j,5)+uz(i,j,2))&
+                +ciciz6*(uz(i,j,6)+uz(i,j,1))&
+                +diciz6*(uz(i,j,7)+uz(i,j,nz))
+           rz(i,j,3)=0.
+        enddo
+     enddo
+     do k=4,nz-4
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=aiciz6*(uz(i,j,k+1)+uz(i,j,k))&
+                   +biciz6*(uz(i,j,k+2)+uz(i,j,k-1))&
+                   +ciciz6*(uz(i,j,k+3)+uz(i,j,k-2))&
+                   +diciz6*(uz(i,j,k+4)+uz(i,j,k-3))
+              rz(i,j,k)=0.
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,nz-3)=aiciz6*(uz(i,j,nz-2)+uz(i,j,nz-3))&
+                +biciz6*(uz(i,j,nz-1)+uz(i,j,nz-4))&
+                +ciciz6*(uz(i,j,nz)+uz(i,j,nz-5))&
+                +diciz6*(uz(i,j,1)+uz(i,j,nz-6))
+           rz(i,j,nz-3)=0.
+           tz(i,j,nz-2)=aiciz6*(uz(i,j,nz-1)+uz(i,j,nz-2))&
+                +biciz6*(uz(i,j,nz)+uz(i,j,nz-3))&
+                +ciciz6*(uz(i,j,1)+uz(i,j,nz-4))&
+                +diciz6*(uz(i,j,2)+uz(i,j,nz-5))
+           rz(i,j,nz-2)=0.
+           tz(i,j,nz-1)=aiciz6*(uz(i,j,nz)+uz(i,j,nz-1))&
+                +biciz6*(uz(i,j,1)+uz(i,j,nz-2))&
+                +ciciz6*(uz(i,j,2)+uz(i,j,nz-3))&
+                +diciz6*(uz(i,j,3)+uz(i,j,nz-4))
+           rz(i,j,nz-1)=0.
+           tz(i,j,nz)=aiciz6*(uz(i,j,1)+uz(i,j,nz))&
+                +biciz6*(uz(i,j,2)+uz(i,j,nz-1))&
+                +ciciz6*(uz(i,j,3)+uz(i,j,nz-2))&
+                +diciz6*(uz(i,j,4)+uz(i,j,nz-3))
+           rz(i  ,j,nz)=ailcaiz6
+        enddo
+     enddo
+     do k=2,nz
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*cisz6(k)
+              rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*cisz6(k)
+           enddo
+        enddo
+     enddo
+     do i=1,nx
+        do j=1,ny
+           tz(i,j,nz)=tz(i,j,nz)*ciwz6(nz)
+           rz(i,j,nz)=rz(i,j,nz)*ciwz6(nz)
+        enddo
+     enddo
+     do k=nz-1,1,-1
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=(tz(i,j,k)-cifz6(k)*tz(i,j,k+1))*ciwz6(k)
+              rz(i,j,k)=(rz(i,j,k)-cifz6(k)*rz(i,j,k+1))*ciwz6(k)
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           sz(i,j)=(tz(i,j,1)-ailcaiz6*tz(i,j,nz))/&
+                (1.+rz(i,j,1)-ailcaiz6*rz(i,j,nz))
+        enddo
+     enddo
+     do k=1,nz
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=tz(i,j,k)-sz(i,j)*rz(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,1)=aiciz6*(uz(i,j,2)+uz(i,j,1))&
+                   +biciz6*(uz(i,j,3)+uz(i,j,2))&
+                   +ciciz6*(uz(i,j,4)+uz(i,j,3))&             
+                   +diciz6*(uz(i,j,5)+uz(i,j,4))             
+              tz(i,j,2)=aiciz6*(uz(i,j,3)+uz(i,j,2))&
+                   +biciz6*(uz(i,j,4)+uz(i,j,1))&
+                   +ciciz6*(uz(i,j,5)+uz(i,j,2))&             
+                   +diciz6*(uz(i,j,6)+uz(i,j,3))             
+              tz(i,j,3)=aiciz6*(uz(i,j,4)+uz(i,j,3))&
+                   +biciz6*(uz(i,j,5)+uz(i,j,2))&
+                   +ciciz6*(uz(i,j,6)+uz(i,j,1))&             
+                   +diciz6*(uz(i,j,7)+uz(i,j,2))             
+           enddo
+        enddo
+        do k=4,nzm-3
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=aiciz6*(uz(i,j,k+1)+uz(i,j,k))&
+                      +biciz6*(uz(i,j,k+2)+uz(i,j,k-1))&
+                      +ciciz6*(uz(i,j,k+3)+uz(i,j,k-2))&              
+                      +diciz6*(uz(i,j,k+4)+uz(i,j,k-3))              
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nzm-2)=aiciz6*(uz(i,j,nzm-1)+uz(i,j,nzm-2))&
+                   +biciz6*(uz(i,j,nzm)+uz(i,j,nzm-3))&
+                   +ciciz6*(uz(i,j,nz)+uz(i,j,nzm-4))&         
+                   +diciz6*(uz(i,j,nzm)+uz(i,j,nzm-5))         
+              tz(i,j,nzm-1)=aiciz6*(uz(i,j,nzm)+uz(i,j,nzm-1))&
+                   +biciz6*(uz(i,j,nz)+uz(i,j,nzm-2))&
+                   +ciciz6*(uz(i,j,nzm)+uz(i,j,nzm-3))&         
+                   +diciz6*(uz(i,j,nzm-1)+uz(i,j,nzm-4))         
+              tz(i,j,nzm)=aiciz6*(uz(i,j,nz)+uz(i,j,nzm))&
+                   +biciz6*(uz(i,j,nzm)+uz(i,j,nzm-1))&
+                   +ciciz6*(uz(i,j,nzm-1)+uz(i,j,nzm-2))&            
+                   +diciz6*(uz(i,j,nzm-2)+uz(i,j,nzm-3))            
+           enddo
+        enddo
+        do k=2,nzm
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*cisz6(k)
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nzm)=tz(i,j,nzm)*ciwz6(nzm)
+           enddo
+        enddo
+        do k=nzm-1,1,-1
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=(tz(i,j,k)-cifz6(k)*tz(i,j,k+1))*ciwz6(k)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
+
+  return     
+end subroutine interzvp
+
+!********************************************************************
 !
+subroutine derzpv(tz,uz,rz,sz,cfiz6,csiz6,cwiz6,cfz6,csz6,cwz6,&
+     nx,ny,nzm,nz,npaire) 
+  !
+  !********************************************************************
+
+  USE param 
+  USE derivZ 
+
+  implicit none
+
+  integer :: nx,nzm,ny,nz,npaire
+  real(mytype), dimension(nx,ny,nz) :: tz
+  real(mytype), dimension(nx,ny,nzm) :: uz,rz
+  real(mytype), dimension(nx,ny) :: sz
+  real(mytype), dimension(nz) :: cfiz6,csiz6,cwiz6
+  real(mytype), dimension(nz) :: cfz6,csz6,cwz6
+  integer :: i,j,k
+
+  if (nclz) then
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,1)=aciz6*(uz(i,j,1)-uz(i,j,nz))&
+                +bciz6*(uz(i,j,2)-uz(i,j,nz-1))
+           rz(i,j,1)=-1.
+           tz(i,j,2)=aciz6*(uz(i,j,2)-uz(i,j,1))&
+                +bciz6*(uz(i,j,3)-uz(i,j,nz))
+           rz(i,j,2)=0.
+        enddo
+     enddo
+     do k=3,nz-2
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=aciz6*(uz(i,j,k)-uz(i,j,k-1))&
+                   +bciz6*(uz(i,j,k+1)-uz(i,j,k-2))
+              rz(i,j,k)=0.
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,nz-1)=aciz6*(uz(i,j,nz-1)-uz(i,j,nz-2))&
+                +bciz6*(uz(i,j,nz)-uz(i,j,nz-3))
+           rz(i,j,nz-1)=0.
+           tz(i,j,nz)=aciz6*(uz(i,j,nz)-uz(i,j,nz-1))&
+                +bciz6*(uz(i,j,1)-uz(i,j,nz-2))
+           rz(i,j,nz)=alcaiz6
+        enddo
+     enddo
+     do k=2,nz
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csz6(k)
+              rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*csz6(k)
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,nz)=tz(i,j,nz)*cwz6(nz)
+           rz(i,j,nz)=rz(i,j,nz)*cwz6(nz)
+        enddo
+     enddo
+     do k=nz-1,1,-1
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=(tz(i,j,k)-cfz6(k)*tz(i,j,k+1))*cwz6(k)
+              rz(i,j,k)=(rz(i,j,k)-cfz6(k)*rz(i,j,k+1))*cwz6(k)
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           sz(i,j)=(tz(i,j,1)-alcaiz6*tz(i,j,nz))/&
+                (1.+rz(i,j,1)-alcaiz6*rz(i,j,nz))
+        enddo
+     enddo
+     do k=1,nz
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=tz(i,j,k)-sz(i,j)*rz(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,1)=0.
+              tz(i,j,2)=aciz6*(uz(i,j,2)-uz(i,j,1))&
+                   +bciz6*(uz(i,j,3)-uz(i,j,1))  
+           enddo
+        enddo
+        do k=3,nz-2
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=aciz6*(uz(i,j,k)-uz(i,j,k-1))&
+                      +bciz6*(uz(i,j,k+1)-uz(i,j,k-2))
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nz-1)=aciz6*(uz(i,j,nz-1)-uz(i,j,nz-2))&
+                   +bciz6*(uz(i,j,nz-1)-uz(i,j,nz-3))
+              tz(i,j,nz)=0.
+           enddo
+        enddo
+        do k=2,nz
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*csiz6(k)
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nz)=tz(i,j,nz)*cwiz6(nz)
+           enddo
+        enddo
+        do k=nz-1,1,-1
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=(tz(i,j,k)-cfiz6(k)*tz(i,j,k+1))*cwiz6(k)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
+
+  return
+end subroutine derzpv
+
 !********************************************************************
-  
-USE param
-USE derivZ 
-
-implicit none
-
-integer :: nx,ny,nz,nzm,npaire
-real(mytype), dimension(nx,ny,nz) :: tz
-real(mytype), dimension(nx,ny,nzm) :: uz
-real(mytype), dimension(nx,ny,nz) :: rz
-real(mytype), dimension(nx,ny) :: sz
-real(mytype), dimension(nz) :: cifiz6,cisiz6,ciwiz6
-real(mytype), dimension(nz) :: cifz6,cisz6,ciwz6
-integer :: i,j,k
-
-if (nclz) then
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,1)=aiciz6*(uz(i,j,1)+uz(i,j,nz))&
-           +biciz6*(uz(i,j,2)+uz(i,j,nz-1))&
-           +ciciz6*(uz(i,j,3)+uz(i,j,nz-2))&
-           +diciz6*(uz(i,j,4)+uz(i,j,nz-3))
-      rz(i,j,1)=-1.
-      tz(i,j,2)=aiciz6*(uz(i,j,2)+uz(i,j,1))&
-           +biciz6*(uz(i,j,3)+uz(i,j,nz))&
-           +ciciz6*(uz(i,j,4)+uz(i,j,nz-1))&
-           +diciz6*(uz(i,j,5)+uz(i,j,nz-2))
-      rz(i,j,2)=0.
-      tz(i,j,3)=aiciz6*(uz(i,j,3)+uz(i,j,2))&
-           +biciz6*(uz(i,j,4)+uz(i,j,1))&
-           +ciciz6*(uz(i,j,5)+uz(i,j,nz))&
-           +diciz6*(uz(i,j,6)+uz(i,j,nz-1))
-      rz(i,j,3)=0.
-      tz(i,j,4)=aiciz6*(uz(i,j,4)+uz(i,j,3))&
-           +biciz6*(uz(i,j,5)+uz(i,j,2))&
-           +ciciz6*(uz(i,j,6)+uz(i,j,1))&
-           +diciz6*(uz(i,j,7)+uz(i,j,nz))
-      rz(i,j,4)=0.
-   enddo
-   enddo
-   do k=5,nz-3
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=aiciz6*(uz(i,j,k)+uz(i,j,k-1))&
-           +biciz6*(uz(i,j,k+1)+uz(i,j,k-2))&
-           +ciciz6*(uz(i,j,k+2)+uz(i,j,k-3))&
-           +diciz6*(uz(i,j,k+3)+uz(i,j,k-4))
-      rz(i,j,k)=0.
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,nz-2)=aiciz6*(uz(i,j,nz-2)+uz(i,j,nz-3))&
-           +biciz6*(uz(i,j,nz-1)+uz(i,j,nz-4))&
-           +ciciz6*(uz(i,j,nz)+uz(i,j,nz-5))&
-           +diciz6*(uz(i,j,1)+uz(i,j,nz-6))
-      rz(i,j,nz-2)=0.
-      tz(i,j,nz-1)=aiciz6*(uz(i,j,nz-1)+uz(i,j,nz-2))&
-           +biciz6*(uz(i,j,nz)+uz(i,j,nz-3))&
-           +ciciz6*(uz(i,j,1)+uz(i,j,nz-4))&
-           +diciz6*(uz(i,j,2)+uz(i,j,nz-5))
-      rz(i,j,nz-1)=0.
-      tz(i,j,nz)=aiciz6*(uz(i,j,nz)+uz(i,j,nz-1))&
-           +biciz6*(uz(i,j,1)+uz(i,j,nz-2))&
-           +ciciz6*(uz(i,j,2)+uz(i,j,nz-3))&
-           +diciz6*(uz(i,j,3)+uz(i,j,nz-4))
-      rz(i,j,nz)=ailcaiz6
-   enddo
-   enddo
-   do k=2,nz
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*cisz6(k)
-      rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*cisz6(k)
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,nz)=tz(i,j,nz)*ciwz6(nz)
-      rz(i,j,nz)=rz(i,j,nz)*ciwz6(nz)
-   enddo
-   enddo
-   do k=nz-1,1,-1
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=(tz(i,j,k)-cifz6(k)*tz(i,j,k+1))*ciwz6(k)
-      rz(i,j,k)=(rz(i,j,k)-cifz6(k)*rz(i,j,k+1))*ciwz6(k)
-   enddo
-   enddo
-   enddo
-   do j=1,ny
-   do i=1,nx
-      sz(i,j)=(tz(i,j,1)-ailcaiz6*tz(i,j,nz))/&
-           (1.+rz(i,j,1)-ailcaiz6*rz(i,j,nz))
-   enddo
-   enddo
-   do k=1,nz
-   do j=1,ny
-   do i=1,nx
-      tz(i,j,k)=tz(i,j,k)-sz(i,j)*rz(i,j,k)
-   enddo
-   enddo
-   enddo
-else
-   if (npaire==1) then
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,1)=aiciz6*(uz(i,j,1)+uz(i,j,1))&
-              +biciz6*(uz(i,j,2)+uz(i,j,2))&
-              +ciciz6*(uz(i,j,3)+uz(i,j,3))&
-              +diciz6*(uz(i,j,4)+uz(i,j,4))     
-         tz(i,j,2)=aiciz6*(uz(i,j,2)+uz(i,j,1))&
-              +biciz6*(uz(i,j,3)+uz(i,j,1))&
-              +ciciz6*(uz(i,j,4)+uz(i,j,2))&
-              +diciz6*(uz(i,j,5)+uz(i,j,3))
-         tz(i,j,3)=aiciz6*(uz(i,j,3)+uz(i,j,2))&
-              +biciz6*(uz(i,j,4)+uz(i,j,1))&
-              +ciciz6*(uz(i,j,5)+uz(i,j,1))&
-              +diciz6*(uz(i,j,6)+uz(i,j,2))  
-         tz(i,j,4)=aiciz6*(uz(i,j,4)+uz(i,j,3))&
-              +biciz6*(uz(i,j,5)+uz(i,j,2))&
-              +ciciz6*(uz(i,j,6)+uz(i,j,1))&
-              +diciz6*(uz(i,j,7)+uz(i,j,1))  
-      enddo
-      enddo         
-      do k=5,nz-4
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=aiciz6*(uz(i,j,k)+uz(i,j,k-1))&
-              +biciz6*(uz(i,j,k+1)+uz(i,j,k-2))&
-              +ciciz6*(uz(i,j,k+2)+uz(i,j,k-3))&
-              +diciz6*(uz(i,j,k+3)+uz(i,j,k-4))
-      enddo
-      enddo
-      enddo     
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nz-3)=aiciz6*(uz(i,j,nz-3)+uz(i,j,nz-4))&
-              +biciz6*(uz(i,j,nz-2)+uz(i,j,nz-5))&
-              +ciciz6*(uz(i,j,nz-1)+uz(i,j,nz-6))&
-              +diciz6*(uz(i,j,nz-1)+uz(i,j,nz-7))
-         tz(i,j,nz-2)=aiciz6*(uz(i,j,nz-2)+uz(i,j,nz-3))&
-              +biciz6*(uz(i,j,nz-1)+uz(i,j,nz-4))&
-              +ciciz6*(uz(i,j,nz-1)+uz(i,j,nz-5))&
-              +diciz6*(uz(i,j,nz-2)+uz(i,j,nz-6))
-         tz(i,j,nz-1)=aiciz6*(uz(i,j,nz-1)+uz(i,j,nz-2))&
-              +biciz6*(uz(i,j,nz-1)+uz(i,j,nz-3))&
-              +ciciz6*(uz(i,j,nz-2)+uz(i,j,nz-4))&
-              +diciz6*(uz(i,j,nz-3)+uz(i,j,nz-5))
-         tz(i,j,nz)=aiciz6*(uz(i,j,nz-1)+uz(i,j,nz-1))&
-              +biciz6*(uz(i,j,nz-2)+uz(i,j,nz-2))&
-              +ciciz6*(uz(i,j,nz-3)+uz(i,j,nz-3))& 
-              +diciz6*(uz(i,j,nz-4)+uz(i,j,nz-4)) 
-      enddo
-      enddo
-      do k=2,nz
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*cisiz6(k)
-      enddo
-      enddo
-      enddo
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,nz)=tz(i,j,nz)*ciwiz6(nz)
-      enddo
-      enddo
-      do k=nz-1,1,-1
-      do j=1,ny
-      do i=1,nx
-         tz(i,j,k)=(tz(i,j,k)-cifiz6(k)*tz(i,j,k+1))*ciwiz6(k)
-      enddo
-      enddo
-      enddo 
-   endif
-endif
-
-return
+!
+subroutine interzpv(tz,uz,rz,sz,cifiz6,cisiz6,ciwiz6,cifz6,cisz6,ciwz6,&
+     nx,ny,nzm,nz,npaire) 
+  !
+  !********************************************************************
+
+  USE param
+  USE derivZ 
+
+  implicit none
+
+  integer :: nx,ny,nz,nzm,npaire
+  real(mytype), dimension(nx,ny,nz) :: tz
+  real(mytype), dimension(nx,ny,nzm) :: uz
+  real(mytype), dimension(nx,ny,nz) :: rz
+  real(mytype), dimension(nx,ny) :: sz
+  real(mytype), dimension(nz) :: cifiz6,cisiz6,ciwiz6
+  real(mytype), dimension(nz) :: cifz6,cisz6,ciwz6
+  integer :: i,j,k
+
+  if (nclz) then
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,1)=aiciz6*(uz(i,j,1)+uz(i,j,nz))&
+                +biciz6*(uz(i,j,2)+uz(i,j,nz-1))&
+                +ciciz6*(uz(i,j,3)+uz(i,j,nz-2))&
+                +diciz6*(uz(i,j,4)+uz(i,j,nz-3))
+           rz(i,j,1)=-1.
+           tz(i,j,2)=aiciz6*(uz(i,j,2)+uz(i,j,1))&
+                +biciz6*(uz(i,j,3)+uz(i,j,nz))&
+                +ciciz6*(uz(i,j,4)+uz(i,j,nz-1))&
+                +diciz6*(uz(i,j,5)+uz(i,j,nz-2))
+           rz(i,j,2)=0.
+           tz(i,j,3)=aiciz6*(uz(i,j,3)+uz(i,j,2))&
+                +biciz6*(uz(i,j,4)+uz(i,j,1))&
+                +ciciz6*(uz(i,j,5)+uz(i,j,nz))&
+                +diciz6*(uz(i,j,6)+uz(i,j,nz-1))
+           rz(i,j,3)=0.
+           tz(i,j,4)=aiciz6*(uz(i,j,4)+uz(i,j,3))&
+                +biciz6*(uz(i,j,5)+uz(i,j,2))&
+                +ciciz6*(uz(i,j,6)+uz(i,j,1))&
+                +diciz6*(uz(i,j,7)+uz(i,j,nz))
+           rz(i,j,4)=0.
+        enddo
+     enddo
+     do k=5,nz-3
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=aiciz6*(uz(i,j,k)+uz(i,j,k-1))&
+                   +biciz6*(uz(i,j,k+1)+uz(i,j,k-2))&
+                   +ciciz6*(uz(i,j,k+2)+uz(i,j,k-3))&
+                   +diciz6*(uz(i,j,k+3)+uz(i,j,k-4))
+              rz(i,j,k)=0.
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,nz-2)=aiciz6*(uz(i,j,nz-2)+uz(i,j,nz-3))&
+                +biciz6*(uz(i,j,nz-1)+uz(i,j,nz-4))&
+                +ciciz6*(uz(i,j,nz)+uz(i,j,nz-5))&
+                +diciz6*(uz(i,j,1)+uz(i,j,nz-6))
+           rz(i,j,nz-2)=0.
+           tz(i,j,nz-1)=aiciz6*(uz(i,j,nz-1)+uz(i,j,nz-2))&
+                +biciz6*(uz(i,j,nz)+uz(i,j,nz-3))&
+                +ciciz6*(uz(i,j,1)+uz(i,j,nz-4))&
+                +diciz6*(uz(i,j,2)+uz(i,j,nz-5))
+           rz(i,j,nz-1)=0.
+           tz(i,j,nz)=aiciz6*(uz(i,j,nz)+uz(i,j,nz-1))&
+                +biciz6*(uz(i,j,1)+uz(i,j,nz-2))&
+                +ciciz6*(uz(i,j,2)+uz(i,j,nz-3))&
+                +diciz6*(uz(i,j,3)+uz(i,j,nz-4))
+           rz(i,j,nz)=ailcaiz6
+        enddo
+     enddo
+     do k=2,nz
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*cisz6(k)
+              rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*cisz6(k)
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           tz(i,j,nz)=tz(i,j,nz)*ciwz6(nz)
+           rz(i,j,nz)=rz(i,j,nz)*ciwz6(nz)
+        enddo
+     enddo
+     do k=nz-1,1,-1
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=(tz(i,j,k)-cifz6(k)*tz(i,j,k+1))*ciwz6(k)
+              rz(i,j,k)=(rz(i,j,k)-cifz6(k)*rz(i,j,k+1))*ciwz6(k)
+           enddo
+        enddo
+     enddo
+     do j=1,ny
+        do i=1,nx
+           sz(i,j)=(tz(i,j,1)-ailcaiz6*tz(i,j,nz))/&
+                (1.+rz(i,j,1)-ailcaiz6*rz(i,j,nz))
+        enddo
+     enddo
+     do k=1,nz
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,k)=tz(i,j,k)-sz(i,j)*rz(i,j,k)
+           enddo
+        enddo
+     enddo
+  else
+     if (npaire==1) then
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,1)=aiciz6*(uz(i,j,1)+uz(i,j,1))&
+                   +biciz6*(uz(i,j,2)+uz(i,j,2))&
+                   +ciciz6*(uz(i,j,3)+uz(i,j,3))&
+                   +diciz6*(uz(i,j,4)+uz(i,j,4))     
+              tz(i,j,2)=aiciz6*(uz(i,j,2)+uz(i,j,1))&
+                   +biciz6*(uz(i,j,3)+uz(i,j,1))&
+                   +ciciz6*(uz(i,j,4)+uz(i,j,2))&
+                   +diciz6*(uz(i,j,5)+uz(i,j,3))
+              tz(i,j,3)=aiciz6*(uz(i,j,3)+uz(i,j,2))&
+                   +biciz6*(uz(i,j,4)+uz(i,j,1))&
+                   +ciciz6*(uz(i,j,5)+uz(i,j,1))&
+                   +diciz6*(uz(i,j,6)+uz(i,j,2))  
+              tz(i,j,4)=aiciz6*(uz(i,j,4)+uz(i,j,3))&
+                   +biciz6*(uz(i,j,5)+uz(i,j,2))&
+                   +ciciz6*(uz(i,j,6)+uz(i,j,1))&
+                   +diciz6*(uz(i,j,7)+uz(i,j,1))  
+           enddo
+        enddo
+        do k=5,nz-4
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=aiciz6*(uz(i,j,k)+uz(i,j,k-1))&
+                      +biciz6*(uz(i,j,k+1)+uz(i,j,k-2))&
+                      +ciciz6*(uz(i,j,k+2)+uz(i,j,k-3))&
+                      +diciz6*(uz(i,j,k+3)+uz(i,j,k-4))
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nz-3)=aiciz6*(uz(i,j,nz-3)+uz(i,j,nz-4))&
+                   +biciz6*(uz(i,j,nz-2)+uz(i,j,nz-5))&
+                   +ciciz6*(uz(i,j,nz-1)+uz(i,j,nz-6))&
+                   +diciz6*(uz(i,j,nz-1)+uz(i,j,nz-7))
+              tz(i,j,nz-2)=aiciz6*(uz(i,j,nz-2)+uz(i,j,nz-3))&
+                   +biciz6*(uz(i,j,nz-1)+uz(i,j,nz-4))&
+                   +ciciz6*(uz(i,j,nz-1)+uz(i,j,nz-5))&
+                   +diciz6*(uz(i,j,nz-2)+uz(i,j,nz-6))
+              tz(i,j,nz-1)=aiciz6*(uz(i,j,nz-1)+uz(i,j,nz-2))&
+                   +biciz6*(uz(i,j,nz-1)+uz(i,j,nz-3))&
+                   +ciciz6*(uz(i,j,nz-2)+uz(i,j,nz-4))&
+                   +diciz6*(uz(i,j,nz-3)+uz(i,j,nz-5))
+              tz(i,j,nz)=aiciz6*(uz(i,j,nz-1)+uz(i,j,nz-1))&
+                   +biciz6*(uz(i,j,nz-2)+uz(i,j,nz-2))&
+                   +ciciz6*(uz(i,j,nz-3)+uz(i,j,nz-3))& 
+                   +diciz6*(uz(i,j,nz-4)+uz(i,j,nz-4)) 
+           enddo
+        enddo
+        do k=2,nz
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*cisiz6(k)
+              enddo
+           enddo
+        enddo
+        do j=1,ny
+           do i=1,nx
+              tz(i,j,nz)=tz(i,j,nz)*ciwiz6(nz)
+           enddo
+        enddo
+        do k=nz-1,1,-1
+           do j=1,ny
+              do i=1,nx
+                 tz(i,j,k)=(tz(i,j,k)-cifiz6(k)*tz(i,j,k+1))*ciwiz6(k)
+              enddo
+           enddo
+        enddo
+     endif
+  endif
+
+  return
 end subroutine interzpv
diff --git a/src/filters.f90 b/src/filters.f90
index 5e311fee..839b71af 100644
--- a/src/filters.f90
+++ b/src/filters.f90
@@ -6,9 +6,9 @@ subroutine filter(af)
   USE parfiZ
   USE variables
   USE var
-!=================================================
-! Discrete low-pass filter according to 
-!=================================================	
+  !=================================================
+  ! Discrete low-pass filter according to 
+  !=================================================	
   implicit none
   real(mytype),intent(in) :: af 
 
@@ -37,28 +37,28 @@ subroutine filter(af)
 
   ! Set coefficients for x-direction filter
   call set_filter_coefficients(af,fial1x,fia1x,fib1x,fic1x,fid1x,fial2x,fia2x,fib2x,fic2x,fid2x,fial3x,fia3x,fib3x,fic3x,fid3x,fie3x,fif3x,&
-                                  fialnx,fianx,fibnx,ficnx,fidnx,fialmx,fiamx,fibmx,ficmx,fidmx,fialpx,fiapx,fibpx,ficpx,fidpx,fiepx,fifpx,&
-                                  fialix,fiaix,fibix,ficix,fidix,fiffx,fifsx,fifwx,fiffxp,fifsxp,fifwxp,nx,nclx1,nclxn)
+       fialnx,fianx,fibnx,ficnx,fidnx,fialmx,fiamx,fibmx,ficmx,fidmx,fialpx,fiapx,fibpx,ficpx,fidpx,fiepx,fifpx,&
+       fialix,fiaix,fibix,ficix,fidix,fiffx,fifsx,fifwx,fiffxp,fifsxp,fifwxp,nx,nclx1,nclxn)
   ! Set coefficients for y-direction filter
   call set_filter_coefficients(af,fial1y,fia1y,fib1y,fic1y,fid1y,fial2y,fia2y,fib2y,fic2y,fid2y,fial3y,fia3y,fib3y,fic3y,fid3y,fie3y,fif3y,&
-                                  fialny,fiany,fibny,ficny,fidny,fialmy,fiamy,fibmy,ficmy,fidmy,fialpy,fiapy,fibpy,ficpy,fidpy,fiepy,fifpy,&
-                                  fialjy,fiajy,fibjy,ficjy,fidjy,fiffy,fifsy,fifwy,fiffyp,fifsyp,fifwyp,ny,ncly1,nclyn)
+       fialny,fiany,fibny,ficny,fidny,fialmy,fiamy,fibmy,ficmy,fidmy,fialpy,fiapy,fibpy,ficpy,fidpy,fiepy,fifpy,&
+       fialjy,fiajy,fibjy,ficjy,fidjy,fiffy,fifsy,fifwy,fiffyp,fifsyp,fifwyp,ny,ncly1,nclyn)
   ! Set coefficients for z-direction filter
   call set_filter_coefficients(af,fial1z,fia1z,fib1z,fic1z,fid1z,fial2z,fia2z,fib2z,fic2z,fid2z,fial3z,fia3z,fib3z,fic3z,fid3z,fie3z,fif3z,&
-                                  fialnz,fianz,fibnz,ficnz,fidnz,fialmz,fiamz,fibmz,ficmz,fidmz,fialpz,fiapz,fibpz,ficpz,fidpz,fiepz,fifpz,&
-                                  fialkz,fiakz,fibkz,fickz,fidkz,fiffz,fifsz,fifwz,fiffzp,fifszp,fifwzp,nz,nclz1,nclzn)
+       fialnz,fianz,fibnz,ficnz,fidnz,fialmz,fiamz,fibmz,ficmz,fidmz,fialpz,fiapz,fibpz,ficpz,fidpz,fiepz,fifpz,&
+       fialkz,fiakz,fibkz,fickz,fidkz,fiffz,fifsz,fifwz,fiffzp,fifszp,fifwzp,nz,nclz1,nclzn)
 #ifdef DEBG 
   if (nrank .eq. 0) print *,'# filter calculation end'
 #endif
 
-return 
+  return 
 
 end subroutine filter
 
 
 subroutine set_filter_coefficients(af,alfa1,a1,b1,c1,d1,alfa2,a2,b2,c2,d2,alfa3,a3,b3,c3,d3,e3,f3,&
-                                      alfan,an,bn,cn,dn,alfam,am,bm,cm,dm,alfap,ap,bp,cp,dp,ep,fp,&
-                                      alfai,ai,bi,ci,di,ff,fs,fw,ffp,fsp,fwp,n,ncl1,ncln)
+     alfan,an,bn,cn,dn,alfam,am,bm,cm,dm,alfap,ap,bp,cp,dp,ep,fp,&
+     alfai,ai,bi,ci,di,ff,fs,fw,ffp,fsp,fwp,n,ncl1,ncln)
 
   use decomp_2d, only : mytype, nrank
   use param
@@ -69,73 +69,73 @@ subroutine set_filter_coefficients(af,alfa1,a1,b1,c1,d1,alfa2,a2,b2,c2,d2,alfa3,
   integer,intent(in) :: n,ncl1,ncln
   real(mytype),dimension(n),intent(out) :: ff,fs,fw,ffp,fsp,fwp
   real(mytype),intent(out) :: alfa1,a1,b1,c1,d1,alfa2,a2,b2,c2,d2,alfa3,a3,b3,c3,d3,e3,f3,&
-                              alfan,an,bn,cn,dn,alfam,am,bm,cm,dm,alfap,ap,bp,cp,dp,ep,fp,&
-                              alfai,ai,bi,ci,di
+       alfan,an,bn,cn,dn,alfam,am,bm,cm,dm,alfap,ap,bp,cp,dp,ep,fp,&
+       alfai,ai,bi,ci,di
   integer :: i
   real(mytype),dimension(n) :: fb,fc
 
-    ! Set the coefficient for the discrete filter following 
-    ! the tridiagonal filtering of Motheau and Abraham, JCP 2016 
-    ! Filter should be -0.5<filax<0.5
-    
-    ! General Case (entire points)
-    ! alpha*fhat(i-1)+fhat(i)+alpha*fhat(i+1)=af(i)+b/2*[f(i+1)+f(i-1)] + ...
-    
-    ! Coefficients are calculated according to the report of Gaitonde & Visbal, 1998,
-    ! "High-order schemes for Navier-Stokes equations: Algorithm and implementation into FDL3DI"
-
-  
-    alfai=af                                       ! alpha_f
-    !Interior points
-    ai=(eleven + ten*af)/sixteen                   ! a
-    bi=half*(fifteen +thirtyfour*af)/thirtytwo     ! b/2 
-    ci=half*(-three + six*af)/sixteen              ! c/2
-    di=half*(one - two*af)/thirtytwo               ! d/2
-    ! Explicit third/fifth-order filters near the boundaries!
-    !Boundary point 1 (no-filtering)
-    alfa1=zero
-    a1=one                           ! a1=7./8.+af/8.! a1/2
-    b1=zero                          ! b1=3./8.+5.*af/8.  
-    c1=zero                          ! c1=-3./8.+3./8.*af 
-    d1=zero                          ! d1=1./8.-1./8.*af 
-    !Boundary point 2 (Third order)
-    alfa2=af
-    a2=one/eight+three/four*af            ! a2
-    b2=five/eight+three/four*af           ! b2
-    c2=three/eight+af/four                ! c2
-    d2=-one/eight+af/four                 ! d2
-    !Boundary point 3 (Fifth order)
-    alfa3=af
-    a3= -one/thirtytwo+af/sixteen         ! a3
-    b3= five/thirtytwo+eleven/sixteen*af  ! b3
-    c3= eleven/sixteen+five*af/eight      ! c3
-    d3= five/sixteen+three*af/eight       ! d3
-    e3=-five/thirtytwo+five*af/sixteen    ! e3
-    f3= one/thirtytwo-af/sixteen          ! f3
-    !Boundary point n (no-filtering)
-    alfan=zero
-    an=one                                !an = 7./8.+af/8.! a1/2
-    bn=zero                               !bn = 3./8.+5.*af/8.
-    cn=zero                               !cn =-3./8.+3./8.*af    
-    dn=zero                               !dn = 1./8.-1./8.*af    
-    !Boundary point 2 (Third order)
-    alfam=af
-    am=one/eight+three/four*af            ! am
-    bm=five/eight+three/four*af           ! bm
-    cm=three/eight+af/four                ! cm
-    dm=-one/eight+af/four                 ! dm
-    !Boundary point 3 (Fifth order)
-    alfap=af
-    ap=-one/thirtytwo+af/sixteen          ! ap
-    bp= five/thirtytwo+eleven/sixteen*af  ! bp
-    cp= eleven/sixteen+five*af/eight      ! cp
-    dp= five/sixteen+three*af/eight       ! dp
-    ep=-five/thirtytwo+five*af/sixteen    ! ep
-    fp= one/thirtytwo-af/sixteen          ! fp
-
-    ff=zero;fs=zero;fw=zero;ffp=zero;fsp=zero;fwp=zero
-    fb=zero;fc=zero
- 
+  ! Set the coefficient for the discrete filter following 
+  ! the tridiagonal filtering of Motheau and Abraham, JCP 2016 
+  ! Filter should be -0.5<filax<0.5
+
+  ! General Case (entire points)
+  ! alpha*fhat(i-1)+fhat(i)+alpha*fhat(i+1)=af(i)+b/2*[f(i+1)+f(i-1)] + ...
+
+  ! Coefficients are calculated according to the report of Gaitonde & Visbal, 1998,
+  ! "High-order schemes for Navier-Stokes equations: Algorithm and implementation into FDL3DI"
+
+
+  alfai=af                                       ! alpha_f
+  !Interior points
+  ai=(eleven + ten*af)/sixteen                   ! a
+  bi=half*(fifteen +thirtyfour*af)/thirtytwo     ! b/2 
+  ci=half*(-three + six*af)/sixteen              ! c/2
+  di=half*(one - two*af)/thirtytwo               ! d/2
+  ! Explicit third/fifth-order filters near the boundaries!
+  !Boundary point 1 (no-filtering)
+  alfa1=zero
+  a1=one                           ! a1=7./8.+af/8.! a1/2
+  b1=zero                          ! b1=3./8.+5.*af/8.  
+  c1=zero                          ! c1=-3./8.+3./8.*af 
+  d1=zero                          ! d1=1./8.-1./8.*af 
+  !Boundary point 2 (Third order)
+  alfa2=af
+  a2=one/eight+three/four*af            ! a2
+  b2=five/eight+three/four*af           ! b2
+  c2=three/eight+af/four                ! c2
+  d2=-one/eight+af/four                 ! d2
+  !Boundary point 3 (Fifth order)
+  alfa3=af
+  a3= -one/thirtytwo+af/sixteen         ! a3
+  b3= five/thirtytwo+eleven/sixteen*af  ! b3
+  c3= eleven/sixteen+five*af/eight      ! c3
+  d3= five/sixteen+three*af/eight       ! d3
+  e3=-five/thirtytwo+five*af/sixteen    ! e3
+  f3= one/thirtytwo-af/sixteen          ! f3
+  !Boundary point n (no-filtering)
+  alfan=zero
+  an=one                                !an = 7./8.+af/8.! a1/2
+  bn=zero                               !bn = 3./8.+5.*af/8.
+  cn=zero                               !cn =-3./8.+3./8.*af    
+  dn=zero                               !dn = 1./8.-1./8.*af    
+  !Boundary point 2 (Third order)
+  alfam=af
+  am=one/eight+three/four*af            ! am
+  bm=five/eight+three/four*af           ! bm
+  cm=three/eight+af/four                ! cm
+  dm=-one/eight+af/four                 ! dm
+  !Boundary point 3 (Fifth order)
+  alfap=af
+  ap=-one/thirtytwo+af/sixteen          ! ap
+  bp= five/thirtytwo+eleven/sixteen*af  ! bp
+  cp= eleven/sixteen+five*af/eight      ! cp
+  dp= five/sixteen+three*af/eight       ! dp
+  ep=-five/thirtytwo+five*af/sixteen    ! ep
+  fp= one/thirtytwo-af/sixteen          ! fp
+
+  ff=zero;fs=zero;fw=zero;ffp=zero;fsp=zero;fwp=zero
+  fb=zero;fc=zero
+
   if     (ncl1.eq.0) then !Periodic
      ff(1)   =alfai
      ff(2)   =alfai
@@ -194,7 +194,7 @@ subroutine set_filter_coefficients(af,alfa1,a1,b1,c1,d1,alfa2,a2,b2,c2,d2,alfa3,
      fc(i)=one
      fb(i)=alfai
   enddo
- 
+
   do i=1,n
      ffp(i)=ff(i)
   enddo
@@ -216,1017 +216,1017 @@ end subroutine set_filter_coefficients
 
 subroutine filx_00(tx,ux,rx,fisx,fiffx,fifsx,fifwx,nx,ny,nz,npaire) 
 
-USE param  
-USE parfiX 
-
-implicit none
-
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
-real(mytype), dimension(ny,nz) :: fisx
-real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
-  
-if(iibm.eq.2) call lagpolx(ux)
-
-   do k=1,nz 
-   do j=1,ny 
-      tx(1,j,k)=fiaix*ux(1,j,k)+fibix*(ux(2,j,k)+ux(nx,j,k))& 
-                               +ficix*(ux(3,j,k)+ux(nx-1,j,k))&
-                               +fidix*(ux(4,j,k)+ux(nx-2,j,k)) 
-      rx(1,j,k)=-1.
-      tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))&
-                               +ficix*(ux(4,j,k)+ux(nx,j,k))& 
-                               +fidix*(ux(5,j,k)+ux(nx-1,j,k)) 
-      rx(2,j,k)=0. 
-      tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
-                               +ficix*(ux(5,j,k)+ux(1,j,k))& 
-                               +fidix*(ux(6,j,k)+ux(nx,j,k)) 
-      rx(3,j,k)=0. 
-      do i=4,nx-3
-         tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
-                                  +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
-                                  +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
-         rx(i,j,k)=0. 
-      enddo
-      tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*(ux(nx-3,j,k)+ux(nx-1,j,k))&
-                                     +ficix*(ux(nx-4,j,k)+ux(nx,j,k))& 
-                                     +fidix*(ux(nx-5,j,k)+ux(1,j,k)) 
-      rx(nx-2,j,k)=0. 
-      tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*(ux(nx-2,j,k)+ux(nx,j,k))&
-                                     +ficix*(ux(nx-3,j,k)+ux(1,j,k))& 
-                                     +fidix*(ux(nx-4,j,k)+ux(2,j,k)) 
-      rx(nx-1,j,k)=0. 
-      tx(nx,j,k)=fiaix*ux(nx,j,k)+fibix*(ux(nx-1,j,k)+ux(1,j,k))&
-                                 +ficix*(ux(nx-2,j,k)+ux(2,j,k))& 
-                                 +fidix*(ux(nx-3,j,k)+ux(3,j,k)) 
-      rx(nx,j,k)=fialix           
-      do i=2, nx
-         tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
-         rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*fifsx(i) 
-      enddo
-      tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
-      rx(nx,j,k)=rx(nx,j,k)*fifwx(nx) 
-      do i=nx-1,1,-1
-         tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
-         rx(i,j,k)=(rx(i,j,k)-fiffx(i)*rx(i+1,j,k))*fifwx(i) 
-      enddo
+  USE param  
+  USE parfiX 
+
+  implicit none
+
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
+  real(mytype), dimension(ny,nz) :: fisx
+  real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
+
+  if(iibm.eq.2) call lagpolx(ux)
+
+  do k=1,nz 
+     do j=1,ny 
+        tx(1,j,k)=fiaix*ux(1,j,k)+fibix*(ux(2,j,k)+ux(nx,j,k))& 
+             +ficix*(ux(3,j,k)+ux(nx-1,j,k))&
+             +fidix*(ux(4,j,k)+ux(nx-2,j,k)) 
+        rx(1,j,k)=-1.
+        tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))&
+             +ficix*(ux(4,j,k)+ux(nx,j,k))& 
+             +fidix*(ux(5,j,k)+ux(nx-1,j,k)) 
+        rx(2,j,k)=0. 
+        tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
+             +ficix*(ux(5,j,k)+ux(1,j,k))& 
+             +fidix*(ux(6,j,k)+ux(nx,j,k)) 
+        rx(3,j,k)=0. 
+        do i=4,nx-3
+           tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
+                +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
+                +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
+           rx(i,j,k)=0. 
+        enddo
+        tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*(ux(nx-3,j,k)+ux(nx-1,j,k))&
+             +ficix*(ux(nx-4,j,k)+ux(nx,j,k))& 
+             +fidix*(ux(nx-5,j,k)+ux(1,j,k)) 
+        rx(nx-2,j,k)=0. 
+        tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*(ux(nx-2,j,k)+ux(nx,j,k))&
+             +ficix*(ux(nx-3,j,k)+ux(1,j,k))& 
+             +fidix*(ux(nx-4,j,k)+ux(2,j,k)) 
+        rx(nx-1,j,k)=0. 
+        tx(nx,j,k)=fiaix*ux(nx,j,k)+fibix*(ux(nx-1,j,k)+ux(1,j,k))&
+             +ficix*(ux(nx-2,j,k)+ux(2,j,k))& 
+             +fidix*(ux(nx-3,j,k)+ux(3,j,k)) 
+        rx(nx,j,k)=fialix           
+        do i=2, nx
+           tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
+           rx(i,j,k)=rx(i,j,k)-rx(i-1,j,k)*fifsx(i) 
+        enddo
+        tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
+        rx(nx,j,k)=rx(nx,j,k)*fifwx(nx) 
+        do i=nx-1,1,-1
+           tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
+           rx(i,j,k)=(rx(i,j,k)-fiffx(i)*rx(i+1,j,k))*fifwx(i) 
+        enddo
         fisx(j,k)=(tx(1,j,k)-fialix*tx(nx,j,k))&
-           /(1.+rx(1,j,k)-fialix*rx(nx,j,k)) 
-      do i=1,nx 
-         tx(i,j,k)=tx(i,j,k)-fisx(j,k)*rx(i,j,k) 
-      enddo
-   enddo
-   enddo
+             /(1.+rx(1,j,k)-fialix*rx(nx,j,k)) 
+        do i=1,nx 
+           tx(i,j,k)=tx(i,j,k)-fisx(j,k)*rx(i,j,k) 
+        enddo
+     enddo
+  enddo
 
-return  
+  return  
 
 end subroutine filx_00
 
 subroutine filx_11(tx,ux,rx,fisx,fiffx,fifsx,fifwx,nx,ny,nz,npaire) 
 
-USE param  
-USE parfiX 
-
-implicit none
-
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
-real(mytype), dimension(ny,nz) :: fisx
-real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
-
-    if(iibm.eq.2) call lagpolx(ux)
-
-    if (npaire==1) then 
-    do k=1,nz 
-    do j=1,ny 
-         tx(1,j,k)=fiaix*ux(1,j,k)+fibix*(ux(2,j,k)+ux(2,j,k))&
-                                  +ficix*(ux(3,j,k)+ux(3,j,k))&
-                                  +fidix*(ux(4,j,k)+ux(4,j,k))
-         tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))& 
-                                  +ficix*(ux(4,j,k)+ux(2,j,k))&
-                                  +fidix*(ux(5,j,k)+ux(3,j,k)) 
-         tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
-                                  +ficix*(ux(5,j,k)+ux(1,j,k))&
-                                  +fidix*(ux(6,j,k)+ux(2,j,k)) 
-        do i=4,nx-3 
-            tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
-                                +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
-                                +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
-        enddo 
-        tx(nx,j,k)  =fiaix*ux(nx,j,k)  +fibix*(ux(nx-1,j,k)+ux(nx-1,j,k))&
-                                      +ficix*(ux(nx-2,j,k)+ux(nx-2,j,k))&
-                                      +fidix*(ux(nx-3,j,k)+ux(nx-3,j,k))
-       tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*(ux(  nx,j,k)+ux(nx-2,j,k))& 
-                                      +ficix*(ux(nx-1,j,k)+ux(nx-3,j,k))&
-                                      +fidix*(ux(nx-2,j,k)+ux(nx-4,j,k)) 
-       tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*(ux(nx-1,j,k)+ux(nx-3,j,k))& 
-                                      +ficix*(ux(  nx,j,k)+ux(nx-4,j,k))&
-                                      +fidix*(ux(nx-1,j,k)+ux(nx-5,j,k)) 
-    do i=2,nx 
-       tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
-    enddo
-       tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
-    do i=nx-1,1,-1  
-    tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
-    enddo 
-    enddo
-    enddo
-    endif
-   
-    if (npaire==0) then 
-    do k=1,nz 
-    do j=1,ny 
-         tx(1,j,k)=zero
-         tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))& 
-                                  +ficix*(ux(4,j,k)-ux(2,j,k))&
-                                  +fidix*(ux(5,j,k)-ux(3,j,k)) 
-         tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
-                                  +ficix*(ux(5,j,k)+ux(1,j,k))&
-                                  +fidix*(ux(6,j,k)-ux(2,j,k)) 
-    do i=4,nx-3 
-         tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
-                                  +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
-                                  +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
-    enddo 
-         tx(nx  ,j,k)=zero
-         tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*( ux(nx  ,j,k)+ux(nx-2,j,k))& 
-                                        +ficix*(-ux(nx-1,j,k)+ux(nx-3,j,k))&
-                                        +fidix*(-ux(nx-2,j,k)+ux(nx-4,j,k)) 
-         tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*( ux(nx-1,j,k)+ux(nx-3,j,k))& 
-                                        +ficix*( ux(nx  ,j,k)+ux(nx-4,j,k))&
-                                        +fidix*(-ux(nx-1,j,k)+ux(nx-5,j,k)) 
-      do i=2,nx 
-         tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
-      enddo
-         tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
-      do i=nx-1,1,-1  
-        tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
-      enddo 
-      
-    enddo 
-    enddo 
-   endif
-
-   return
+  USE param  
+  USE parfiX 
+
+  implicit none
+
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
+  real(mytype), dimension(ny,nz) :: fisx
+  real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
+
+  if(iibm.eq.2) call lagpolx(ux)
+
+  if (npaire==1) then 
+     do k=1,nz 
+        do j=1,ny 
+           tx(1,j,k)=fiaix*ux(1,j,k)+fibix*(ux(2,j,k)+ux(2,j,k))&
+                +ficix*(ux(3,j,k)+ux(3,j,k))&
+                +fidix*(ux(4,j,k)+ux(4,j,k))
+           tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))& 
+                +ficix*(ux(4,j,k)+ux(2,j,k))&
+                +fidix*(ux(5,j,k)+ux(3,j,k)) 
+           tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
+                +ficix*(ux(5,j,k)+ux(1,j,k))&
+                +fidix*(ux(6,j,k)+ux(2,j,k)) 
+           do i=4,nx-3 
+              tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
+                   +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
+                   +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
+           enddo
+           tx(nx,j,k)  =fiaix*ux(nx,j,k)  +fibix*(ux(nx-1,j,k)+ux(nx-1,j,k))&
+                +ficix*(ux(nx-2,j,k)+ux(nx-2,j,k))&
+                +fidix*(ux(nx-3,j,k)+ux(nx-3,j,k))
+           tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*(ux(  nx,j,k)+ux(nx-2,j,k))& 
+                +ficix*(ux(nx-1,j,k)+ux(nx-3,j,k))&
+                +fidix*(ux(nx-2,j,k)+ux(nx-4,j,k)) 
+           tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*(ux(nx-1,j,k)+ux(nx-3,j,k))& 
+                +ficix*(ux(  nx,j,k)+ux(nx-4,j,k))&
+                +fidix*(ux(nx-1,j,k)+ux(nx-5,j,k)) 
+           do i=2,nx 
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
+           do i=nx-1,1,-1  
+              tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
+           enddo
+        enddo
+     enddo
+  endif
+
+  if (npaire==0) then 
+     do k=1,nz 
+        do j=1,ny 
+           tx(1,j,k)=zero
+           tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))& 
+                +ficix*(ux(4,j,k)-ux(2,j,k))&
+                +fidix*(ux(5,j,k)-ux(3,j,k)) 
+           tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
+                +ficix*(ux(5,j,k)+ux(1,j,k))&
+                +fidix*(ux(6,j,k)-ux(2,j,k)) 
+           do i=4,nx-3 
+              tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
+                   +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
+                   +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
+           enddo
+           tx(nx  ,j,k)=zero
+           tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*( ux(nx  ,j,k)+ux(nx-2,j,k))& 
+                +ficix*(-ux(nx-1,j,k)+ux(nx-3,j,k))&
+                +fidix*(-ux(nx-2,j,k)+ux(nx-4,j,k)) 
+           tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*( ux(nx-1,j,k)+ux(nx-3,j,k))& 
+                +ficix*( ux(nx  ,j,k)+ux(nx-4,j,k))&
+                +fidix*(-ux(nx-1,j,k)+ux(nx-5,j,k)) 
+           do i=2,nx 
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
+           do i=nx-1,1,-1  
+              tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
+           enddo
+
+        enddo
+     enddo
+  endif
+
+  return
 
 end subroutine filx_11
 
 subroutine filx_12(tx,ux,rx,fisx,fiffx,fifsx,fifwx,nx,ny,nz,npaire) 
 
-USE param  
-USE parfiX 
+  USE param  
+  USE parfiX 
 
-implicit none
+  implicit none
 
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
-real(mytype), dimension(ny,nz) :: fisx
-real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
+  real(mytype), dimension(ny,nz) :: fisx
+  real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
 
-   if(iibm.eq.2) call lagpolx(ux)
+  if(iibm.eq.2) call lagpolx(ux)
 
-   if (npaire==1) then 
-   do k=1,nz 
-   do j=1,ny 
-        tx(1,j,k)=fiaix*ux(1,j,k)+fibix*(ux(2,j,k)+ux(2,j,k))&
-                                 +ficix*(ux(3,j,k)+ux(3,j,k))&
-                                 +fidix*(ux(4,j,k)+ux(4,j,k))
-        tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))& 
-                                 +ficix*(ux(4,j,k)+ux(2,j,k))&
-                                 +fidix*(ux(5,j,k)+ux(3,j,k)) 
-        tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
-                                 +ficix*(ux(5,j,k)+ux(1,j,k))&
-                                 +fidix*(ux(6,j,k)+ux(2,j,k)) 
-   do i=4,nx-3 
-      tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
-                               +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
-                               +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
-   enddo
-      tx(nx,j,k)=ux(nx,j,k)
-      tx(nx-1,j,k)=fiamx*ux(nx,j,k)+fibmx*ux(nx-1,j,k)+ficmx*ux(nx-2,j,k)+&
-                            fidmx*ux(nx-3,j,k)
-      tx(nx-2,j,k)=fiapx*ux(nx,j,k)+fibpx*ux(nx-1,j,k)+ficpx*ux(nx-2,j,k)+&
+  if (npaire==1) then 
+     do k=1,nz 
+        do j=1,ny 
+           tx(1,j,k)=fiaix*ux(1,j,k)+fibix*(ux(2,j,k)+ux(2,j,k))&
+                +ficix*(ux(3,j,k)+ux(3,j,k))&
+                +fidix*(ux(4,j,k)+ux(4,j,k))
+           tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))& 
+                +ficix*(ux(4,j,k)+ux(2,j,k))&
+                +fidix*(ux(5,j,k)+ux(3,j,k)) 
+           tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
+                +ficix*(ux(5,j,k)+ux(1,j,k))&
+                +fidix*(ux(6,j,k)+ux(2,j,k)) 
+           do i=4,nx-3 
+              tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
+                   +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
+                   +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
+           enddo
+           tx(nx,j,k)=ux(nx,j,k)
+           tx(nx-1,j,k)=fiamx*ux(nx,j,k)+fibmx*ux(nx-1,j,k)+ficmx*ux(nx-2,j,k)+&
+                fidmx*ux(nx-3,j,k)
+           tx(nx-2,j,k)=fiapx*ux(nx,j,k)+fibpx*ux(nx-1,j,k)+ficpx*ux(nx-2,j,k)+&
                 fidpx*ux(nx-3,j,k)+fiepx*ux(nx-4,j,k)+fifpx*ux(nx-5,j,k)
-   do i=2,nx 
-      tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
-   enddo
-      tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
-   do i=nx-1,1,-1  
-   tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
-   enddo 
-   enddo
-   enddo
-    endif
-   
-    if (npaire==0) then 
-      do k=1,nz 
-      do j=1,ny 
-         tx(1,j,k)=zero
-         tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))& 
-                                  +ficix*(ux(4,j,k)-ux(2,j,k))&
-                                  +fidix*(ux(5,j,k)-ux(3,j,k)) 
-         tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
-                                  +ficix*(ux(5,j,k)+ux(1,j,k))&
-                                  +fidix*(ux(6,j,k)-ux(2,j,k)) 
-      do i=4,nx-3 
-         tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
-                                  +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
-                                  +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
-      enddo 
-      tx(nx,j,k)=ux(nx,j,k)
-      tx(nx-1,j,k)=fiamx*ux(nx,j,k)+fibmx*ux(nx-1,j,k)+ficmx*ux(nx-2,j,k)+&
-                            fidmx*ux(nx-3,j,k)
-      tx(nx-2,j,k)=fiapx*ux(nx,j,k)+fibpx*ux(nx-1,j,k)+ficpx*ux(nx-2,j,k)+&
+           do i=2,nx 
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
+           do i=nx-1,1,-1  
+              tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
+           enddo
+        enddo
+     enddo
+  endif
+
+  if (npaire==0) then 
+     do k=1,nz 
+        do j=1,ny 
+           tx(1,j,k)=zero
+           tx(2,j,k)=fiaix*ux(2,j,k)+fibix*(ux(3,j,k)+ux(1,j,k))& 
+                +ficix*(ux(4,j,k)-ux(2,j,k))&
+                +fidix*(ux(5,j,k)-ux(3,j,k)) 
+           tx(3,j,k)=fiaix*ux(3,j,k)+fibix*(ux(4,j,k)+ux(2,j,k))& 
+                +ficix*(ux(5,j,k)+ux(1,j,k))&
+                +fidix*(ux(6,j,k)-ux(2,j,k)) 
+           do i=4,nx-3 
+              tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
+                   +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
+                   +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
+           enddo
+           tx(nx,j,k)=ux(nx,j,k)
+           tx(nx-1,j,k)=fiamx*ux(nx,j,k)+fibmx*ux(nx-1,j,k)+ficmx*ux(nx-2,j,k)+&
+                fidmx*ux(nx-3,j,k)
+           tx(nx-2,j,k)=fiapx*ux(nx,j,k)+fibpx*ux(nx-1,j,k)+ficpx*ux(nx-2,j,k)+&
                 fidpx*ux(nx-3,j,k)+fiepx*ux(nx-4,j,k)+fifpx*ux(nx-5,j,k)
-      do i=2,nx 
-         tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
-      enddo
-         tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
-      do i=nx-1,1,-1  
-        tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
-      enddo 
-      
-      enddo 
-      enddo 
-   endif
-
-   return
+           do i=2,nx 
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
+           do i=nx-1,1,-1  
+              tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
+           enddo
+
+        enddo
+     enddo
+  endif
+
+  return
 
 end subroutine filx_12
 
 subroutine filx_21(tx,ux,rx,fisx,fiffx,fifsx,fifwx,nx,ny,nz,npaire) 
 
-USE param  
-USE parfiX 
+  USE param  
+  USE parfiX 
 
-implicit none
+  implicit none
 
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
-real(mytype), dimension(ny,nz) :: fisx
-real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
+  real(mytype), dimension(ny,nz) :: fisx
+  real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
 
-   if(iibm.eq.2) call lagpolx(ux)
-    
-   if (npaire==1) then 
-    do k=1,nz 
-    do j=1,ny 
-      tx(1,j,k)=ux(1,j,k)
-      tx(2,j,k)=fia2x*ux(1,j,k)+fib2x*ux(2,j,k)+fic2x*ux(3,j,k)+&
+  if(iibm.eq.2) call lagpolx(ux)
+
+  if (npaire==1) then 
+     do k=1,nz 
+        do j=1,ny 
+           tx(1,j,k)=ux(1,j,k)
+           tx(2,j,k)=fia2x*ux(1,j,k)+fib2x*ux(2,j,k)+fic2x*ux(3,j,k)+&
                 fid2x*ux(4,j,k)
-      tx(3,j,k)=fia3x*ux(1,j,k)+fib3x*ux(2,j,k)+fic3x*ux(3,j,k)+&
+           tx(3,j,k)=fia3x*ux(1,j,k)+fib3x*ux(2,j,k)+fic3x*ux(3,j,k)+&
                 fid3x*ux(4,j,k)+fie3x*ux(5,j,k)+fif3x*ux(6,j,k)
-      do i=4,nx-3 
-          tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
-                              +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
-                              +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
-      enddo 
-      tx(nx,j,k)  =fiaix*ux(nx,j,k)  +fibix*(ux(nx-1,j,k)+ux(nx-1,j,k))&
-                                    +ficix*(ux(nx-2,j,k)+ux(nx-2,j,k))&
-                                    +fidix*(ux(nx-3,j,k)+ux(nx-3,j,k))
-      tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*(ux(  nx,j,k)+ux(nx-2,j,k))& 
-                                     +ficix*(ux(nx-1,j,k)+ux(nx-3,j,k))&
-                                     +fidix*(ux(nx-2,j,k)+ux(nx-4,j,k)) 
-      tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*(ux(nx-1,j,k)+ux(nx-3,j,k))& 
-                                      +ficix*(ux(  nx,j,k)+ux(nx-4,j,k))&
-                                      +fidix*(ux(nx-1,j,k)+ux(nx-5,j,k)) 
-    do i=2,nx 
-       tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
-    enddo
-       tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
-    do i=nx-1,1,-1  
-    tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
-    enddo 
-    enddo
-    enddo
-    endif
-   
-    if (npaire==0) then 
-    do k=1,nz 
-    do j=1,ny 
-      tx(1,j,k)=ux(1,j,k)
-      tx(2,j,k)=fia2x*ux(1,j,k)+fib2x*ux(2,j,k)+fic2x*ux(3,j,k)+&
+           do i=4,nx-3 
+              tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
+                   +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
+                   +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
+           enddo
+           tx(nx,j,k)  =fiaix*ux(nx,j,k)  +fibix*(ux(nx-1,j,k)+ux(nx-1,j,k))&
+                +ficix*(ux(nx-2,j,k)+ux(nx-2,j,k))&
+                +fidix*(ux(nx-3,j,k)+ux(nx-3,j,k))
+           tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*(ux(  nx,j,k)+ux(nx-2,j,k))& 
+                +ficix*(ux(nx-1,j,k)+ux(nx-3,j,k))&
+                +fidix*(ux(nx-2,j,k)+ux(nx-4,j,k)) 
+           tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*(ux(nx-1,j,k)+ux(nx-3,j,k))& 
+                +ficix*(ux(  nx,j,k)+ux(nx-4,j,k))&
+                +fidix*(ux(nx-1,j,k)+ux(nx-5,j,k)) 
+           do i=2,nx 
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
+           do i=nx-1,1,-1  
+              tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
+           enddo
+        enddo
+     enddo
+  endif
+
+  if (npaire==0) then 
+     do k=1,nz 
+        do j=1,ny 
+           tx(1,j,k)=ux(1,j,k)
+           tx(2,j,k)=fia2x*ux(1,j,k)+fib2x*ux(2,j,k)+fic2x*ux(3,j,k)+&
                 fid2x*ux(4,j,k)
-      tx(3,j,k)=fia3x*ux(1,j,k)+fib3x*ux(2,j,k)+fic3x*ux(3,j,k)+&
+           tx(3,j,k)=fia3x*ux(1,j,k)+fib3x*ux(2,j,k)+fic3x*ux(3,j,k)+&
                 fid3x*ux(4,j,k)+fie3x*ux(5,j,k)+fif3x*ux(6,j,k)
-    
-        do i=4,nx-3 
-            tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
-                                  +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
-                                  +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
-        enddo 
-         tx(nx  ,j,k)=zero
-         tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*( ux(nx  ,j,k)+ux(nx-2,j,k))& 
-                                        +ficix*(-ux(nx-1,j,k)+ux(nx-3,j,k))&
-                                        +fidix*(-ux(nx-2,j,k)+ux(nx-4,j,k)) 
-         tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*( ux(nx-1,j,k)+ux(nx-3,j,k))& 
-                                        +ficix*( ux(nx  ,j,k)+ux(nx-4,j,k))&
-                                        +fidix*(-ux(nx-1,j,k)+ux(nx-5,j,k)) 
-      do i=2,nx 
-         tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
-      enddo
-         tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
-      do i=nx-1,1,-1  
-        tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
-      enddo 
-      
-    enddo 
-    enddo 
-   endif
-
-   return
+
+           do i=4,nx-3 
+              tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
+                   +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
+                   +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
+           enddo
+           tx(nx  ,j,k)=zero
+           tx(nx-1,j,k)=fiaix*ux(nx-1,j,k)+fibix*( ux(nx  ,j,k)+ux(nx-2,j,k))& 
+                +ficix*(-ux(nx-1,j,k)+ux(nx-3,j,k))&
+                +fidix*(-ux(nx-2,j,k)+ux(nx-4,j,k)) 
+           tx(nx-2,j,k)=fiaix*ux(nx-2,j,k)+fibix*( ux(nx-1,j,k)+ux(nx-3,j,k))& 
+                +ficix*( ux(nx  ,j,k)+ux(nx-4,j,k))&
+                +fidix*(-ux(nx-1,j,k)+ux(nx-5,j,k)) 
+           do i=2,nx 
+              tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
+           enddo
+           tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
+           do i=nx-1,1,-1  
+              tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
+           enddo
+
+        enddo
+     enddo
+  endif
+
+  return
 
 end subroutine filx_21
 
 
 subroutine filx_22(tx,ux,rx,fisx,fiffx,fifsx,fifwx,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiX 
 
-implicit none
+  USE param  
+  USE parfiX 
 
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
-real(mytype), dimension(ny,nz) :: fisx
-real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
+  implicit none
 
-   if(iibm.eq.2) call lagpolx(ux)
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tx,ux,rx 
+  real(mytype), dimension(ny,nz) :: fisx
+  real(mytype), dimension(nx) :: fiffx,fifsx,fifwx
+
+  if(iibm.eq.2) call lagpolx(ux)
+
+  do k=1,nz
+     do j=1,ny 
+        tx(1,j,k)=ux(1,j,k)
+        tx(2,j,k)=fia2x*ux(1,j,k)+fib2x*ux(2,j,k)+fic2x*ux(3,j,k)+&
+             fid2x*ux(4,j,k)
+        tx(3,j,k)=fia3x*ux(1,j,k)+fib3x*ux(2,j,k)+fic3x*ux(3,j,k)+&
+             fid3x*ux(4,j,k)+fie3x*ux(5,j,k)+fif3x*ux(6,j,k)
+        do i=4,nx-3
+           tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
+                +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
+                +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
+        enddo
+        tx(nx,j,k)=ux(nx,j,k)
+        tx(nx-1,j,k)=fiamx*ux(nx,j,k)+fibmx*ux(nx-1,j,k)+ficmx*ux(nx-2,j,k)+&
+             fidmx*ux(nx-3,j,k)
+        tx(nx-2,j,k)=fiapx*ux(nx,j,k)+fibpx*ux(nx-1,j,k)+ficpx*ux(nx-2,j,k)+&
+             fidpx*ux(nx-3,j,k)+fiepx*ux(nx-4,j,k)+fifpx*ux(nx-5,j,k)
+        do i=2,nx 
+           tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
+        enddo
+        tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
+        do i=nx-1,1,-1
+           tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
+        enddo
+     enddo
+  enddo
 
-   do k=1,nz
-   do j=1,ny 
-      tx(1,j,k)=ux(1,j,k)
-      tx(2,j,k)=fia2x*ux(1,j,k)+fib2x*ux(2,j,k)+fic2x*ux(3,j,k)+&
-                fid2x*ux(4,j,k)
-      tx(3,j,k)=fia3x*ux(1,j,k)+fib3x*ux(2,j,k)+fic3x*ux(3,j,k)+&
-                fid3x*ux(4,j,k)+fie3x*ux(5,j,k)+fif3x*ux(6,j,k)
-      do i=4,nx-3
-         tx(i,j,k)=fiaix*ux(i,j,k)+fibix*(ux(i+1,j,k)+ux(i-1,j,k))& 
-                                  +ficix*(ux(i+2,j,k)+ux(i-2,j,k))&
-                                  +fidix*(ux(i+3,j,k)+ux(i-3,j,k)) 
-      enddo
-      tx(nx,j,k)=ux(nx,j,k)
-      tx(nx-1,j,k)=fiamx*ux(nx,j,k)+fibmx*ux(nx-1,j,k)+ficmx*ux(nx-2,j,k)+&
-                            fidmx*ux(nx-3,j,k)
-      tx(nx-2,j,k)=fiapx*ux(nx,j,k)+fibpx*ux(nx-1,j,k)+ficpx*ux(nx-2,j,k)+&
-                fidpx*ux(nx-3,j,k)+fiepx*ux(nx-4,j,k)+fifpx*ux(nx-5,j,k)
-      do i=2,nx 
-         tx(i,j,k)=tx(i,j,k)-tx(i-1,j,k)*fifsx(i) 
-      enddo
-      tx(nx,j,k)=tx(nx,j,k)*fifwx(nx) 
-      do i=nx-1,1,-1
-         tx(i,j,k)=(tx(i,j,k)-fiffx(i)*tx(i+1,j,k))*fifwx(i) 
-      enddo
-   enddo 
-   enddo 
-
-return  
+  return  
 end subroutine filx_22
 
 subroutine fily_00(ty,uy,ry,fisy,fiffy,fifsy,fifwy,ppy,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiY 
-
-implicit none
-
-integer :: nx,ny,nz,i,j,k,npaire
-real(mytype), dimension(nx,ny,nz) :: ty,uy 
-real(mytype), dimension(nx,ny,nz) :: ry
-real(mytype), dimension(nx,nz)  :: fisy
-real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
-    
-if(iibm.eq.2) call lagpoly(uy)
-
-    do k=1,nz 
-    do i=1,nx 
-       ty(i,1,k)=fiajy*uy(i,1,k)+fibjy*(uy(i,2,k)+uy(i,ny,k))& 
-                                +ficjy*(uy(i,3,k)+uy(i,ny-1,k))&
-                                +fidjy*(uy(i,4,k)+uy(i,ny-2,k)) 
-       ry(i,1,k)=-1.
-       ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))&
-                                +ficjy*(uy(i,4,k)+uy(i,ny,k))& 
-                                +fidjy*(uy(i,5,k)+uy(i,ny-1,k)) 
-       ry(i,2,k)=0. 
-       ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))&
-                                +ficjy*(uy(i,5,k)+uy(i,1,k))& 
-                                +fidjy*(uy(i,6,k)+uy(i,ny,k)) 
-       ry(i,3,k)=0. 
-       do j=4,ny-3
-          ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
-                                   +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
-                                   +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
-          ry(i,j,k)=0. 
-       enddo
-       ty(i,ny-2,k)=fiajy*uy(i,ny-2,k)+fibjy*(uy(i,ny-3,k)+uy(i,ny-1,k))&
-                                      +ficjy*(uy(i,ny-4,k)+uy(i,ny,k))& 
-                                      +fidjy*(uy(i,ny-5,k)+uy(i,1,k)) 
-       ry(i,ny-2,k)=0. 
-       ty(i,ny-1,k)=fiajy*uy(i,ny-1,k)+fibjy*(uy(i,ny-2,k)+uy(i,ny,k))&
-                                      +ficjy*(uy(i,ny-3,k)+uy(i,1,k))& 
-                                      +fidjy*(uy(i,ny-4,k)+uy(i,2,k)) 
-       ry(i,ny-1,k)=0. 
-       ty(i,ny,k)=fiajy*uy(i,ny,k)+fibjy*(uy(i,ny-1,k)+uy(i,1,k))&
-                                  +ficjy*(uy(i,ny-2,k)+uy(i,2,k))& 
-                                  +fidjy*(uy(i,ny-3,k)+uy(i,3,k)) 
-       ry(i,ny,k)=fialjy           
-       do j=2, ny
-          ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
-          ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*fifsy(j) 
-       enddo
-       ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
-       ry(i,ny,k)=ry(i,ny,k)*fifwy(ny) 
-       do j=ny-1,1,-1
-          ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
-          ry(i,j,k)=(ry(i,j,k)-fiffy(j)*ry(i,j+1,k))*fifwy(j) 
-       enddo
-         fisy(i,k)=(ty(i,1,k)-fialjy*ty(i,ny,k))&
-            /(1.+ry(i,1,k)-fialjy*ry(i,ny,k)) 
-       do j=1,ny 
-          ty(i,j,k)=ty(i,j,k)-fisy(i,k)*ry(i,j,k) 
-       enddo
-    enddo
-    enddo
-
-    if (istret.ne.0) then   
-        do k=1,nz 
-           do j=1,ny 
-              do i=1,nx 
-                 ty(i,j,k)=ty(i,j,k)*ppy(j) 
-              enddo
+
+  USE param  
+  USE parfiY 
+
+  implicit none
+
+  integer :: nx,ny,nz,i,j,k,npaire
+  real(mytype), dimension(nx,ny,nz) :: ty,uy 
+  real(mytype), dimension(nx,ny,nz) :: ry
+  real(mytype), dimension(nx,nz)  :: fisy
+  real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
+
+  if(iibm.eq.2) call lagpoly(uy)
+
+  do k=1,nz 
+     do i=1,nx 
+        ty(i,1,k)=fiajy*uy(i,1,k)+fibjy*(uy(i,2,k)+uy(i,ny,k))& 
+             +ficjy*(uy(i,3,k)+uy(i,ny-1,k))&
+             +fidjy*(uy(i,4,k)+uy(i,ny-2,k)) 
+        ry(i,1,k)=-1.
+        ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))&
+             +ficjy*(uy(i,4,k)+uy(i,ny,k))& 
+             +fidjy*(uy(i,5,k)+uy(i,ny-1,k)) 
+        ry(i,2,k)=0. 
+        ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))&
+             +ficjy*(uy(i,5,k)+uy(i,1,k))& 
+             +fidjy*(uy(i,6,k)+uy(i,ny,k)) 
+        ry(i,3,k)=0. 
+        do j=4,ny-3
+           ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
+                +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
+                +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
+           ry(i,j,k)=0. 
+        enddo
+        ty(i,ny-2,k)=fiajy*uy(i,ny-2,k)+fibjy*(uy(i,ny-3,k)+uy(i,ny-1,k))&
+             +ficjy*(uy(i,ny-4,k)+uy(i,ny,k))& 
+             +fidjy*(uy(i,ny-5,k)+uy(i,1,k)) 
+        ry(i,ny-2,k)=0. 
+        ty(i,ny-1,k)=fiajy*uy(i,ny-1,k)+fibjy*(uy(i,ny-2,k)+uy(i,ny,k))&
+             +ficjy*(uy(i,ny-3,k)+uy(i,1,k))& 
+             +fidjy*(uy(i,ny-4,k)+uy(i,2,k)) 
+        ry(i,ny-1,k)=0. 
+        ty(i,ny,k)=fiajy*uy(i,ny,k)+fibjy*(uy(i,ny-1,k)+uy(i,1,k))&
+             +ficjy*(uy(i,ny-2,k)+uy(i,2,k))& 
+             +fidjy*(uy(i,ny-3,k)+uy(i,3,k)) 
+        ry(i,ny,k)=fialjy           
+        do j=2, ny
+           ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
+           ry(i,j,k)=ry(i,j,k)-ry(i,j-1,k)*fifsy(j) 
+        enddo
+        ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
+        ry(i,ny,k)=ry(i,ny,k)*fifwy(ny) 
+        do j=ny-1,1,-1
+           ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
+           ry(i,j,k)=(ry(i,j,k)-fiffy(j)*ry(i,j+1,k))*fifwy(j) 
+        enddo
+        fisy(i,k)=(ty(i,1,k)-fialjy*ty(i,ny,k))&
+             /(1.+ry(i,1,k)-fialjy*ry(i,ny,k)) 
+        do j=1,ny 
+           ty(i,j,k)=ty(i,j,k)-fisy(i,k)*ry(i,j,k) 
+        enddo
+     enddo
+  enddo
+
+  if (istret.ne.0) then   
+     do k=1,nz 
+        do j=1,ny 
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)*ppy(j) 
            enddo
         enddo
-    endif
-    
-return
+     enddo
+  endif
+
+  return
 
 end subroutine fily_00
 
 !********************************************************************
 !
 subroutine fily_11(ty,uy,ry,fisy,fiffy,fifsy,fifwy,ppy,nx,ny,nz,npaire) 
-!
-!********************************************************************
-  
-USE param  
-USE parfiY 
+  !
+  !********************************************************************
 
-implicit none
+  USE param  
+  USE parfiY 
 
-integer :: nx,ny,nz,i,j,k,npaire
-real(mytype), dimension(nx,ny,nz) :: ty,uy 
-real(mytype), dimension(nx,ny,nz) :: ry
-real(mytype), dimension(nx,nz)  :: fisy
-real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
-    
-if(iibm.eq.2) call lagpoly(uy)
+  implicit none
 
-    if (npaire==1) then 
-        do k=1,nz 
+  integer :: nx,ny,nz,i,j,k,npaire
+  real(mytype), dimension(nx,ny,nz) :: ty,uy 
+  real(mytype), dimension(nx,ny,nz) :: ry
+  real(mytype), dimension(nx,nz)  :: fisy
+  real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
+
+  if(iibm.eq.2) call lagpoly(uy)
+
+  if (npaire==1) then 
+     do k=1,nz 
         do i=1,nx 
-            ty(i,1,k)=fiajy*uy(i,1,k)+fibjy*(uy(i,2,k)+uy(i,2,k))&
-                                  +ficjy*(uy(i,3,k)+uy(i,3,k))&
-                                  +fidjy*(uy(i,4,k)+uy(i,4,k))
-            ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))& 
-                                  +ficjy*(uy(i,4,k)+uy(i,2,k))&
-                                  +fidjy*(uy(i,5,k)+uy(i,3,k)) 
-            ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))& 
-                                  +ficjy*(uy(i,5,k)+uy(i,1,k))&
-                                  +fidjy*(uy(i,6,k)+uy(i,2,k)) 
-            do j=4,ny-3 
-                ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
-                                +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
-                                +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
-            enddo 
-            ty(i,ny,k)=fiajy*uy(i,ny,k)    +fibjy*(uy(i,ny-1,k)+uy(i,ny-1,k))&
-                                      +ficjy*(uy(i,ny-2,k)+uy(i,ny-2,k))&
-                                      +fidjy*(uy(i,ny-3,k)+uy(i,ny-3,k))
-            ty(i,ny-1,k)=fiajy*uy(i,ny-1,k)+fibjy*(uy(i,ny,k)  +uy(i,ny-2,k))& 
-                                      +ficjy*(uy(i,ny-1,k)+uy(i,ny-3,k))&
-                                      +fidjy*(uy(i,ny-2,k)+uy(i,ny-4,k)) 
-            ty(i,ny-2,k)=fiajy*uy(i,ny-2,k)+fibjy*(uy(i,ny-1,k)+uy(i,ny-3,k))& 
-                                      +ficjy*(uy(i,ny,k)+uy(i,ny-4,k))&
-                                      +fidjy*(uy(i,ny-1,k)+uy(i,ny-5,k)) 
-            do j=2,ny  
-                ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
-            enddo
-            ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
-            do j=ny-1,1,-1  
-                ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
-            enddo 
-        enddo 
-        enddo 
-    endif
-    if (npaire==0) then 
-        do k=1,nz 
+           ty(i,1,k)=fiajy*uy(i,1,k)+fibjy*(uy(i,2,k)+uy(i,2,k))&
+                +ficjy*(uy(i,3,k)+uy(i,3,k))&
+                +fidjy*(uy(i,4,k)+uy(i,4,k))
+           ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))& 
+                +ficjy*(uy(i,4,k)+uy(i,2,k))&
+                +fidjy*(uy(i,5,k)+uy(i,3,k)) 
+           ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))& 
+                +ficjy*(uy(i,5,k)+uy(i,1,k))&
+                +fidjy*(uy(i,6,k)+uy(i,2,k)) 
+           do j=4,ny-3 
+              ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
+                   +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
+                   +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
+           enddo
+           ty(i,ny,k)=fiajy*uy(i,ny,k)    +fibjy*(uy(i,ny-1,k)+uy(i,ny-1,k))&
+                +ficjy*(uy(i,ny-2,k)+uy(i,ny-2,k))&
+                +fidjy*(uy(i,ny-3,k)+uy(i,ny-3,k))
+           ty(i,ny-1,k)=fiajy*uy(i,ny-1,k)+fibjy*(uy(i,ny,k)  +uy(i,ny-2,k))& 
+                +ficjy*(uy(i,ny-1,k)+uy(i,ny-3,k))&
+                +fidjy*(uy(i,ny-2,k)+uy(i,ny-4,k)) 
+           ty(i,ny-2,k)=fiajy*uy(i,ny-2,k)+fibjy*(uy(i,ny-1,k)+uy(i,ny-3,k))& 
+                +ficjy*(uy(i,ny,k)+uy(i,ny-4,k))&
+                +fidjy*(uy(i,ny-1,k)+uy(i,ny-5,k)) 
+           do j=2,ny  
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
+           enddo
+           ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
+           do j=ny-1,1,-1  
+              ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
+           enddo
+        enddo
+     enddo
+  endif
+  if (npaire==0) then 
+     do k=1,nz 
         do i=1,nx 
-            ty(i,1,k)=zero !fiajy*uy(i,1,k)
-            ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))& 
-                                     +ficjy*(uy(i,4,k)-uy(i,2,k))&
-                                     +fidjy*(uy(i,5,k)-uy(i,3,k)) 
-            ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))& 
-                                     +ficjy*(uy(i,5,k)+uy(i,1,k))&
-                                     +fidjy*(uy(i,6,k)-uy(i,2,k)) 
-            do j=4,ny-3 
-                ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
-                                         +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
-                                         +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
-            enddo 
-            ty(i,ny,k)=zero !fiajy*uy(i,ny,k)
-            ty(i,ny-1,k)=fiajy*uy(i,ny-1,k) +fibjy*(uy(i,ny,k)+uy(i,ny-2,k))& 
-                                            +ficjy*(-uy(i,ny-1,k)+uy(i,ny-3,k))&
-                                            +fidjy*(-uy(i,ny-2,k)+uy(i,ny-4,k)) 
-            ty(i,ny-2,k)=fiajy*uy(i,ny-2,k) +fibjy*(uy(i,ny-1,k)+uy(i,ny-3,k))& 
-                                            +ficjy*(uy(i,ny,k)+uy(i,ny-4,k))&
-                                            +fidjy*(-uy(i,ny-1,k)+uy(i,ny-5,k)) 
-            do j=2,ny  
-                ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
-            enddo
-            ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
-      
-            do j=ny-1,1,-1  
-                ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
-            enddo 
-        enddo 
-        enddo 
-    endif
-
-    if (istret.ne.0) then   
-        do k=1,nz 
-           do j=1,ny 
-              do i=1,nx 
-                 ty(i,j,k)=ty(i,j,k)*ppy(j) 
-              enddo
+           ty(i,1,k)=zero !fiajy*uy(i,1,k)
+           ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))& 
+                +ficjy*(uy(i,4,k)-uy(i,2,k))&
+                +fidjy*(uy(i,5,k)-uy(i,3,k)) 
+           ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))& 
+                +ficjy*(uy(i,5,k)+uy(i,1,k))&
+                +fidjy*(uy(i,6,k)-uy(i,2,k)) 
+           do j=4,ny-3 
+              ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
+                   +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
+                   +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
+           enddo
+           ty(i,ny,k)=zero !fiajy*uy(i,ny,k)
+           ty(i,ny-1,k)=fiajy*uy(i,ny-1,k) +fibjy*(uy(i,ny,k)+uy(i,ny-2,k))& 
+                +ficjy*(-uy(i,ny-1,k)+uy(i,ny-3,k))&
+                +fidjy*(-uy(i,ny-2,k)+uy(i,ny-4,k)) 
+           ty(i,ny-2,k)=fiajy*uy(i,ny-2,k) +fibjy*(uy(i,ny-1,k)+uy(i,ny-3,k))& 
+                +ficjy*(uy(i,ny,k)+uy(i,ny-4,k))&
+                +fidjy*(-uy(i,ny-1,k)+uy(i,ny-5,k)) 
+           do j=2,ny  
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
+           enddo
+           ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
+
+           do j=ny-1,1,-1  
+              ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
+           enddo
+        enddo
+     enddo
+  endif
+
+  if (istret.ne.0) then   
+     do k=1,nz 
+        do j=1,ny 
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)*ppy(j) 
            enddo
         enddo
-    endif
-    
-return
+     enddo
+  endif
+
+  return
 
 end subroutine fily_11
 
 
 subroutine fily_12(ty,uy,ry,fisy,fiffy,fifsy,fifwy,ppy,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiY 
-
-implicit none
-
-integer :: nx,ny,nz,i,j,k,npaire
-real(mytype), dimension(nx,ny,nz) :: ty,uy 
-real(mytype), dimension(nx,ny,nz) :: ry
-real(mytype), dimension(nx,nz)  :: fisy
-real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
-    
-    if(iibm.eq.2) call lagpoly(uy)
-    if (npaire==1) then 
-        do k=1,nz 
+
+  USE param  
+  USE parfiY 
+
+  implicit none
+
+  integer :: nx,ny,nz,i,j,k,npaire
+  real(mytype), dimension(nx,ny,nz) :: ty,uy 
+  real(mytype), dimension(nx,ny,nz) :: ry
+  real(mytype), dimension(nx,nz)  :: fisy
+  real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
+
+  if(iibm.eq.2) call lagpoly(uy)
+  if (npaire==1) then 
+     do k=1,nz 
         do i=1,nx 
-            ty(i,1,k)=fiajy*uy(i,1,k)+fibjy*(uy(i,2,k)+uy(i,2,k))&
-                                  +ficjy*(uy(i,3,k)+uy(i,3,k))&
-                                  +fidjy*(uy(i,4,k)+uy(i,4,k))
-            ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))& 
-                                  +ficjy*(uy(i,4,k)+uy(i,2,k))&
-                                  +fidjy*(uy(i,5,k)+uy(i,3,k)) 
-            ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))& 
-                                  +ficjy*(uy(i,5,k)+uy(i,1,k))&
-                                  +fidjy*(uy(i,6,k)+uy(i,2,k)) 
-            do j=4,ny-3 
-                ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
-                                +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
-                                +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
-            enddo 
-            ty(i,ny,k)  =      uy(i,ny,k)
-            ty(i,ny-1,k)=fiamy*uy(i,ny  ,k)+fibmy*uy(i,ny-1,k)+ficmy*uy(i,ny-2,k)+&
-                         fidmy*uy(i,ny-3,k)
-            ty(i,ny-2,k)=fiapy*uy(i,ny  ,k)+fibpy*uy(i,ny-1,k)+ficpy*uy(i,ny-2,k)+&
-                         fidpy*uy(i,ny-3,k)+fiepy*uy(i,ny-4,k)+fifpy*uy(i,ny-5,k)
-            do j=2,ny  
-                ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
-            enddo
-            ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
-            do j=ny-1,1,-1  
-                ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
-            enddo 
-        enddo 
-        enddo 
-    endif
-    if (npaire==0) then 
-        do k=1,nz 
+           ty(i,1,k)=fiajy*uy(i,1,k)+fibjy*(uy(i,2,k)+uy(i,2,k))&
+                +ficjy*(uy(i,3,k)+uy(i,3,k))&
+                +fidjy*(uy(i,4,k)+uy(i,4,k))
+           ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))& 
+                +ficjy*(uy(i,4,k)+uy(i,2,k))&
+                +fidjy*(uy(i,5,k)+uy(i,3,k)) 
+           ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))& 
+                +ficjy*(uy(i,5,k)+uy(i,1,k))&
+                +fidjy*(uy(i,6,k)+uy(i,2,k)) 
+           do j=4,ny-3 
+              ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
+                   +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
+                   +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
+           enddo
+           ty(i,ny,k)  =      uy(i,ny,k)
+           ty(i,ny-1,k)=fiamy*uy(i,ny  ,k)+fibmy*uy(i,ny-1,k)+ficmy*uy(i,ny-2,k)+&
+                fidmy*uy(i,ny-3,k)
+           ty(i,ny-2,k)=fiapy*uy(i,ny  ,k)+fibpy*uy(i,ny-1,k)+ficpy*uy(i,ny-2,k)+&
+                fidpy*uy(i,ny-3,k)+fiepy*uy(i,ny-4,k)+fifpy*uy(i,ny-5,k)
+           do j=2,ny  
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
+           enddo
+           ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
+           do j=ny-1,1,-1  
+              ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
+           enddo
+        enddo
+     enddo
+  endif
+  if (npaire==0) then 
+     do k=1,nz 
         do i=1,nx 
-            ty(i,1,k)=zero !fiajy*uy(i,1,k)
-            ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))& 
-                                     +ficjy*(uy(i,4,k)-uy(i,2,k))&
-                                     +fidjy*(uy(i,5,k)-uy(i,3,k)) 
-            ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))& 
-                                     +ficjy*(uy(i,5,k)+uy(i,1,k))&
-                                     +fidjy*(uy(i,6,k)-uy(i,2,k)) 
-            do j=4,ny-3 
-                ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
-                                         +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
-                                         +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
-            enddo 
-            ty(i,ny,k)  =      uy(i,ny,k)
-            ty(i,ny-1,k)=fiamy*uy(i,ny  ,k)+fibmy*uy(i,ny-1,k)+ficmy*uy(i,ny-2,k)+&
-                         fidmy*uy(i,ny-3,k)
-            ty(i,ny-2,k)=fiapy*uy(i,ny  ,k)+fibpy*uy(i,ny-1,k)+ficpy*uy(i,ny-2,k)+&
-                         fidpy*uy(i,ny-3,k)+fiepy*uy(i,ny-4,k)+fifpy*uy(i,ny-5,k)
-            do j=2,ny  
-                ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
-            enddo
-            ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
-      
-            do j=ny-1,1,-1  
-                ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
-            enddo 
-        enddo 
-        enddo 
-    endif
-
-    if (istret.ne.0) then   
-        do k=1,nz 
-           do j=1,ny 
-              do i=1,nx 
-                 ty(i,j,k)=ty(i,j,k)*ppy(j) 
-              enddo
+           ty(i,1,k)=zero !fiajy*uy(i,1,k)
+           ty(i,2,k)=fiajy*uy(i,2,k)+fibjy*(uy(i,3,k)+uy(i,1,k))& 
+                +ficjy*(uy(i,4,k)-uy(i,2,k))&
+                +fidjy*(uy(i,5,k)-uy(i,3,k)) 
+           ty(i,3,k)=fiajy*uy(i,3,k)+fibjy*(uy(i,4,k)+uy(i,2,k))& 
+                +ficjy*(uy(i,5,k)+uy(i,1,k))&
+                +fidjy*(uy(i,6,k)-uy(i,2,k)) 
+           do j=4,ny-3 
+              ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
+                   +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
+                   +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
+           enddo
+           ty(i,ny,k)  =      uy(i,ny,k)
+           ty(i,ny-1,k)=fiamy*uy(i,ny  ,k)+fibmy*uy(i,ny-1,k)+ficmy*uy(i,ny-2,k)+&
+                fidmy*uy(i,ny-3,k)
+           ty(i,ny-2,k)=fiapy*uy(i,ny  ,k)+fibpy*uy(i,ny-1,k)+ficpy*uy(i,ny-2,k)+&
+                fidpy*uy(i,ny-3,k)+fiepy*uy(i,ny-4,k)+fifpy*uy(i,ny-5,k)
+           do j=2,ny  
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
+           enddo
+           ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
+
+           do j=ny-1,1,-1  
+              ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
            enddo
         enddo
-    endif
+     enddo
+  endif
+
+  if (istret.ne.0) then   
+     do k=1,nz 
+        do j=1,ny 
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)*ppy(j) 
+           enddo
+        enddo
+     enddo
+  endif
 
 end subroutine fily_12
 
 
 subroutine fily_21(ty,uy,ry,fisy,fiffy,fifsy,fifwy,ppy,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiY 
-
-implicit none
-
-integer :: nx,ny,nz,i,j,k,npaire
-real(mytype), dimension(nx,ny,nz) :: ty,uy 
-real(mytype), dimension(nx,ny,nz) :: ry
-real(mytype), dimension(nx,nz)  :: fisy
-real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
-    
-   if(iibm.eq.2) call lagpoly(uy)
-    
-   if (npaire==1) then 
-        do k=1,nz 
+
+  USE param  
+  USE parfiY 
+
+  implicit none
+
+  integer :: nx,ny,nz,i,j,k,npaire
+  real(mytype), dimension(nx,ny,nz) :: ty,uy 
+  real(mytype), dimension(nx,ny,nz) :: ry
+  real(mytype), dimension(nx,nz)  :: fisy
+  real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
+
+  if(iibm.eq.2) call lagpoly(uy)
+
+  if (npaire==1) then 
+     do k=1,nz 
         do i=1,nx 
-            ty(i,1,k)=      uy(i,1,k)
-            ty(i,2,k)=fia2y*uy(i,1,k)+fib2y*uy(i,2,k)+fic2y*uy(i,3,k)+&
-                      fid2y*uy(i,4,k)
-            ty(i,3,k)=fia3y*uy(i,1,k)+fib3y*uy(i,2,k)+fic3y*uy(i,3,k)+&
-                      fid3y*uy(i,4,k)+fie3y*uy(i,5,k)+fif3y*uy(i,6,k)
-            do j=4,ny-3 
-                ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
-                                +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
-                                +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
-            enddo 
-            ty(i,ny,k)=fiajy*uy(i,ny,k)    +fibjy*(uy(i,ny-1,k)+uy(i,ny-1,k))&
-                                      +ficjy*(uy(i,ny-2,k)+uy(i,ny-2,k))&
-                                      +fidjy*(uy(i,ny-3,k)+uy(i,ny-3,k))
-            ty(i,ny-1,k)=fiajy*uy(i,ny-1,k)+fibjy*(uy(i,ny,k)  +uy(i,ny-2,k))& 
-                                      +ficjy*(uy(i,ny-1,k)+uy(i,ny-3,k))&
-                                      +fidjy*(uy(i,ny-2,k)+uy(i,ny-4,k)) 
-            ty(i,ny-2,k)=fiajy*uy(i,ny-2,k)+fibjy*(uy(i,ny-1,k)+uy(i,ny-3,k))& 
-                                      +ficjy*(uy(i,ny,k)+uy(i,ny-4,k))&
-                                      +fidjy*(uy(i,ny-1,k)+uy(i,ny-5,k)) 
-            do j=2,ny  
-                ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
-            enddo
-            ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
-            do j=ny-1,1,-1  
-                ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
-            enddo 
-        enddo 
-        enddo 
-    endif
-    if (npaire==0) then 
-        do k=1,nz 
+           ty(i,1,k)=      uy(i,1,k)
+           ty(i,2,k)=fia2y*uy(i,1,k)+fib2y*uy(i,2,k)+fic2y*uy(i,3,k)+&
+                fid2y*uy(i,4,k)
+           ty(i,3,k)=fia3y*uy(i,1,k)+fib3y*uy(i,2,k)+fic3y*uy(i,3,k)+&
+                fid3y*uy(i,4,k)+fie3y*uy(i,5,k)+fif3y*uy(i,6,k)
+           do j=4,ny-3 
+              ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
+                   +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
+                   +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
+           enddo
+           ty(i,ny,k)=fiajy*uy(i,ny,k)    +fibjy*(uy(i,ny-1,k)+uy(i,ny-1,k))&
+                +ficjy*(uy(i,ny-2,k)+uy(i,ny-2,k))&
+                +fidjy*(uy(i,ny-3,k)+uy(i,ny-3,k))
+           ty(i,ny-1,k)=fiajy*uy(i,ny-1,k)+fibjy*(uy(i,ny,k)  +uy(i,ny-2,k))& 
+                +ficjy*(uy(i,ny-1,k)+uy(i,ny-3,k))&
+                +fidjy*(uy(i,ny-2,k)+uy(i,ny-4,k)) 
+           ty(i,ny-2,k)=fiajy*uy(i,ny-2,k)+fibjy*(uy(i,ny-1,k)+uy(i,ny-3,k))& 
+                +ficjy*(uy(i,ny,k)+uy(i,ny-4,k))&
+                +fidjy*(uy(i,ny-1,k)+uy(i,ny-5,k)) 
+           do j=2,ny  
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
+           enddo
+           ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
+           do j=ny-1,1,-1  
+              ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
+           enddo
+        enddo
+     enddo
+  endif
+  if (npaire==0) then 
+     do k=1,nz 
         do i=1,nx 
-            ty(i,1,k)=      uy(i,1,k)
-            ty(i,2,k)=fia2y*uy(i,1,k)+fib2y*uy(i,2,k)+fic2y*uy(i,3,k)+&
-                      fid2y*uy(i,4,k)
-            ty(i,3,k)=fia3y*uy(i,1,k)+fib3y*uy(i,2,k)+fic3y*uy(i,3,k)+&
-                      fid3y*uy(i,4,k)+fie3y*uy(i,5,k)+fif3y*uy(i,6,k)
-            do j=4,ny-3 
-                ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
-                                         +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
-                                         +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
-            enddo 
-            ty(i,ny,k)=zero !fiajy*uy(i,ny,k)
-            ty(i,ny-1,k)=fiajy*uy(i,ny-1,k) +fibjy*(uy(i,ny,k)+uy(i,ny-2,k))& 
-                                            +ficjy*(-uy(i,ny-1,k)+uy(i,ny-3,k))&
-                                            +fidjy*(-uy(i,ny-2,k)+uy(i,ny-4,k)) 
-            ty(i,ny-2,k)=fiajy*uy(i,ny-2,k) +fibjy*(uy(i,ny-1,k)+uy(i,ny-3,k))& 
-                                            +ficjy*(uy(i,ny,k)+uy(i,ny-4,k))&
-                                            +fidjy*(-uy(i,ny-1,k)+uy(i,ny-5,k)) 
-            do j=2,ny  
-                ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
-            enddo
-            ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
-      
-            do j=ny-1,1,-1  
-                ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
-            enddo 
-        enddo 
-        enddo 
-    endif
-
-    if (istret.ne.0) then   
-        do k=1,nz 
-           do j=1,ny 
-              do i=1,nx 
-                 ty(i,j,k)=ty(i,j,k)*ppy(j) 
-              enddo
+           ty(i,1,k)=      uy(i,1,k)
+           ty(i,2,k)=fia2y*uy(i,1,k)+fib2y*uy(i,2,k)+fic2y*uy(i,3,k)+&
+                fid2y*uy(i,4,k)
+           ty(i,3,k)=fia3y*uy(i,1,k)+fib3y*uy(i,2,k)+fic3y*uy(i,3,k)+&
+                fid3y*uy(i,4,k)+fie3y*uy(i,5,k)+fif3y*uy(i,6,k)
+           do j=4,ny-3 
+              ty(i,j,k)=fiajy*uy(i,j,k)+fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
+                   +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
+                   +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
+           enddo
+           ty(i,ny,k)=zero !fiajy*uy(i,ny,k)
+           ty(i,ny-1,k)=fiajy*uy(i,ny-1,k) +fibjy*(uy(i,ny,k)+uy(i,ny-2,k))& 
+                +ficjy*(-uy(i,ny-1,k)+uy(i,ny-3,k))&
+                +fidjy*(-uy(i,ny-2,k)+uy(i,ny-4,k)) 
+           ty(i,ny-2,k)=fiajy*uy(i,ny-2,k) +fibjy*(uy(i,ny-1,k)+uy(i,ny-3,k))& 
+                +ficjy*(uy(i,ny,k)+uy(i,ny-4,k))&
+                +fidjy*(-uy(i,ny-1,k)+uy(i,ny-5,k)) 
+           do j=2,ny  
+              ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
+           enddo
+           ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
+
+           do j=ny-1,1,-1  
+              ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
            enddo
         enddo
-    endif
-    
-return
+     enddo
+  endif
+
+  if (istret.ne.0) then   
+     do k=1,nz 
+        do j=1,ny 
+           do i=1,nx 
+              ty(i,j,k)=ty(i,j,k)*ppy(j) 
+           enddo
+        enddo
+     enddo
+  endif
+
+  return
 
 end subroutine fily_21
 
 subroutine fily_22(ty,uy,ry,fisy,fiffy,fifsy,fifwy,ppy,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiY 
-
-implicit none
-
-integer :: nx,ny,nz,i,j,k,npaire
-real(mytype), dimension(nx,ny,nz) :: ty,uy 
-real(mytype), dimension(nx,ny,nz) :: ry
-real(mytype), dimension(nx,nz)  :: fisy
-real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
-    
-   if(iibm.eq.2) call lagpoly(uy)
-
-   do k=1,nz
-   do i=1,nx 
-      ty(i,1,k)=      uy(i,1,k)
-      ty(i,2,k)=fia2y*uy(i,1,k)+fib2y*uy(i,2,k)+fic2y*uy(i,3,k)+&
-                fid2y*uy(i,4,k)
-      ty(i,3,k)=fia3y*uy(i,1,k)+fib3y*uy(i,2,k)+fic3y*uy(i,3,k)+&
-                fid3y*uy(i,4,k)+fie3y*uy(i,5,k)+fif3y*uy(i,6,k)
-      do j=4,ny-3
-        ty(i,j,k)=fiajy*uy(i,j,k) +fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
-                                  +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
-                                  +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
-      enddo
-      ty(i,ny,k)  =      uy(i,ny  ,k)
-      ty(i,ny-1,k)=fiamy*uy(i,ny  ,k)+fibmy*uy(i,ny-1,k)+ficmy*uy(i,ny-2,k)+&
-                   fidmy*uy(i,ny-3,k)
-      ty(i,ny-2,k)=fiapy*uy(i,ny  ,k)+fibpy*uy(i,ny-1,k)+ficpy*uy(i,ny-2,k)+&
-                   fidpy*uy(i,ny-3,k)+fiepy*uy(i,ny-4,k)+fifpy*uy(i,ny-5,k)
-      do j=2,ny 
-         ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
-      enddo
-      ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
-      do j=ny-1,1,-1
-         ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
-      enddo
-   enddo 
-   enddo 
-
-   return
+
+  USE param  
+  USE parfiY 
+
+  implicit none
+
+  integer :: nx,ny,nz,i,j,k,npaire
+  real(mytype), dimension(nx,ny,nz) :: ty,uy 
+  real(mytype), dimension(nx,ny,nz) :: ry
+  real(mytype), dimension(nx,nz)  :: fisy
+  real(mytype), dimension(ny) :: fiffy,fifsy,fifwy,ppy
+
+  if(iibm.eq.2) call lagpoly(uy)
+
+  do k=1,nz
+     do i=1,nx 
+        ty(i,1,k)=      uy(i,1,k)
+        ty(i,2,k)=fia2y*uy(i,1,k)+fib2y*uy(i,2,k)+fic2y*uy(i,3,k)+&
+             fid2y*uy(i,4,k)
+        ty(i,3,k)=fia3y*uy(i,1,k)+fib3y*uy(i,2,k)+fic3y*uy(i,3,k)+&
+             fid3y*uy(i,4,k)+fie3y*uy(i,5,k)+fif3y*uy(i,6,k)
+        do j=4,ny-3
+           ty(i,j,k)=fiajy*uy(i,j,k) +fibjy*(uy(i,j+1,k)+uy(i,j-1,k))& 
+                +ficjy*(uy(i,j+2,k)+uy(i,j-2,k))&
+                +fidjy*(uy(i,j+3,k)+uy(i,j-3,k)) 
+        enddo
+        ty(i,ny,k)  =      uy(i,ny  ,k)
+        ty(i,ny-1,k)=fiamy*uy(i,ny  ,k)+fibmy*uy(i,ny-1,k)+ficmy*uy(i,ny-2,k)+&
+             fidmy*uy(i,ny-3,k)
+        ty(i,ny-2,k)=fiapy*uy(i,ny  ,k)+fibpy*uy(i,ny-1,k)+ficpy*uy(i,ny-2,k)+&
+             fidpy*uy(i,ny-3,k)+fiepy*uy(i,ny-4,k)+fifpy*uy(i,ny-5,k)
+        do j=2,ny 
+           ty(i,j,k)=ty(i,j,k)-ty(i,j-1,k)*fifsy(j) 
+        enddo
+        ty(i,ny,k)=ty(i,ny,k)*fifwy(ny) 
+        do j=ny-1,1,-1
+           ty(i,j,k)=(ty(i,j,k)-fiffy(j)*ty(i,j+1,k))*fifwy(j) 
+        enddo
+     enddo
+  enddo
+
+  return
 
 end subroutine fily_22
 
 subroutine filz_00(tz,uz,rz,fisz,fiffz,fifsz,fifwz,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiZ 
 
-implicit none
+  USE param  
+  USE parfiZ 
+
+  implicit none
 
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
-real(mytype), dimension(nx,ny) :: fisz
-real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
+  real(mytype), dimension(nx,ny) :: fisz
+  real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
 
   if(iibm.eq.2) call lagpolz(uz)
 
-   do j=1,ny 
-   do i=1,nx 
-      tz(i,j,1)=fiakz*uz(i,j,1)+fibkz*(uz(i,j,2)+uz(i,j,nz))& 
-                               +fickz*(uz(i,j,3)+uz(i,j,nz-1))&
-                               +fidkz*(uz(i,j,4)+uz(i,j,nz-2)) 
-      rz(i,j,1)=-1.
-      tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))&
-                               +fickz*(uz(i,j,4)+uz(i,j,nz))& 
-                               +fidkz*(uz(i,j,5)+uz(i,j,nz-1)) 
-      rz(i,j,2)=0. 
-      tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))&
-                               +fickz*(uz(i,j,5)+uz(i,j,1))& 
-                               +fidkz*(uz(i,j,6)+uz(i,j,nz)) 
-      rz(i,j,3)=0.
-   do k=4,nz-3
-         tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
-                                  +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
-                                  +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
-         rz(i,j,k)=0. 
-   enddo
-      tz(i,j,nz-2)=fiakz*uz(i,j,nz-2)+fibkz*(uz(i,j,nz-3)+uz(i,j,nz-1))&
-                                     +fickz*(uz(i,j,nz-4)+uz(i,j,nz))& 
-                                     +fidkz*(uz(i,j,nz-5)+uz(i,j,1)) 
-      rz(i,j,nz-2)=0. 
-      tz(i,j,nz-1)=fiakz*uz(i,j,nz-1)+fibkz*(uz(i,j,nz-2)+uz(i,j,nz))&
-                                     +fickz*(uz(i,j,nz-3)+uz(i,j,1))& 
-                                     +fidkz*(uz(i,j,nz-4)+uz(i,j,2)) 
-      rz(i,j,nz-1)=0. 
-      tz(i,j,nz)=fiakz*uz(i,j,nz)+fibkz*(uz(i,j,nz-1)+uz(i,j,1))&
-                                 +fickz*(uz(i,j,nz-2)+uz(i,j,2))& 
-                                 +fidkz*(uz(i,j,nz-3)+uz(i,j,3)) 
-      rz(i,j,nz)=fialkz           
-   do k=2,nz
-         tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
-         rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*fifsz(k) 
-   enddo
-      tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
-      rz(i,j,nz)=rz(i,j,nz)*fifwz(nz) 
-   do k=nz-1,1,-1
-         tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
-         rz(i,j,k)=(rz(i,j,k)-fiffz(k)*rz(i,j,k+1))*fifwz(k) 
-   enddo   
-    fisz(i,j)=(tz(i,j,1)-fialkz*tz(i,j,nz))&
-           /(1.+rz(i,j,1)-fialkz*rz(i,j,nz)) 
-   do k=1,nz 
-         tz(i,j,k)=tz(i,j,k)-fisz(i,j)*rz(i,j,k) 
-   enddo
-   
-   enddo
-   enddo
-
-return  
+  do j=1,ny 
+     do i=1,nx 
+        tz(i,j,1)=fiakz*uz(i,j,1)+fibkz*(uz(i,j,2)+uz(i,j,nz))& 
+             +fickz*(uz(i,j,3)+uz(i,j,nz-1))&
+             +fidkz*(uz(i,j,4)+uz(i,j,nz-2)) 
+        rz(i,j,1)=-1.
+        tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))&
+             +fickz*(uz(i,j,4)+uz(i,j,nz))& 
+             +fidkz*(uz(i,j,5)+uz(i,j,nz-1)) 
+        rz(i,j,2)=0. 
+        tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))&
+             +fickz*(uz(i,j,5)+uz(i,j,1))& 
+             +fidkz*(uz(i,j,6)+uz(i,j,nz)) 
+        rz(i,j,3)=0.
+        do k=4,nz-3
+           tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
+                +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
+                +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
+           rz(i,j,k)=0. 
+        enddo
+        tz(i,j,nz-2)=fiakz*uz(i,j,nz-2)+fibkz*(uz(i,j,nz-3)+uz(i,j,nz-1))&
+             +fickz*(uz(i,j,nz-4)+uz(i,j,nz))& 
+             +fidkz*(uz(i,j,nz-5)+uz(i,j,1)) 
+        rz(i,j,nz-2)=0. 
+        tz(i,j,nz-1)=fiakz*uz(i,j,nz-1)+fibkz*(uz(i,j,nz-2)+uz(i,j,nz))&
+             +fickz*(uz(i,j,nz-3)+uz(i,j,1))& 
+             +fidkz*(uz(i,j,nz-4)+uz(i,j,2)) 
+        rz(i,j,nz-1)=0. 
+        tz(i,j,nz)=fiakz*uz(i,j,nz)+fibkz*(uz(i,j,nz-1)+uz(i,j,1))&
+             +fickz*(uz(i,j,nz-2)+uz(i,j,2))& 
+             +fidkz*(uz(i,j,nz-3)+uz(i,j,3)) 
+        rz(i,j,nz)=fialkz           
+        do k=2,nz
+           tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
+           rz(i,j,k)=rz(i,j,k)-rz(i,j,k-1)*fifsz(k) 
+        enddo
+        tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
+        rz(i,j,nz)=rz(i,j,nz)*fifwz(nz) 
+        do k=nz-1,1,-1
+           tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
+           rz(i,j,k)=(rz(i,j,k)-fiffz(k)*rz(i,j,k+1))*fifwz(k) 
+        enddo
+        fisz(i,j)=(tz(i,j,1)-fialkz*tz(i,j,nz))&
+             /(1.+rz(i,j,1)-fialkz*rz(i,j,nz)) 
+        do k=1,nz 
+           tz(i,j,k)=tz(i,j,k)-fisz(i,j)*rz(i,j,k) 
+        enddo
+
+     enddo
+  enddo
+
+  return  
 end subroutine filz_00
 
 subroutine filz_11(tz,uz,rz,fisz,fiffz,fifsz,fifwz,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiZ 
 
-implicit none
+  USE param  
+  USE parfiZ 
+
+  implicit none
 
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
-real(mytype), dimension(nx,ny) :: fisz
-real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
+  real(mytype), dimension(nx,ny) :: fisz
+  real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
 
   if(iibm.eq.2) call lagpolz(uz)
-   
-    if (npaire==1) then 
-    do j=1,ny 
-    do i=1,nx 
-        tz(i,j,1)=fiakz*uz(i,j,1)+fibkz*(uz(i,j,2)+uz(i,j,2))&
-                                 +fickz*(uz(i,j,3)+uz(i,j,3))&
-                                 +fidkz*(uz(i,j,4)+uz(i,j,4))
-        tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))& 
-                                 +fickz*(uz(i,j,4)+uz(i,j,2))&
-                                 +fidkz*(uz(i,j,5)+uz(i,j,3)) 
-        tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))& 
-                                 +fickz*(uz(i,j,5)+uz(i,j,1))&
-                                 +fidkz*(uz(i,j,6)+uz(i,j,2)) 
-        do k=4,nz-3 
-            tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
-                                     +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
-                                     +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
-        enddo 
-        tz(i,j,nz)=fiakz*uz(i,j,nz)    +fibkz*(uz(i,j,nz-1)+uz(i,j,nz-1))&
-                                       +fickz*(uz(i,j,nz-2)+uz(i,j,nz-2))&
-                                       +fidkz*(uz(i,j,nz-3)+uz(i,j,nz-3))
-        tz(i,j,nz-1)=fiakz*uz(i,j,nz-1)+fibkz*(uz(i,j,nz  )+uz(i,j,nz-2))& 
-                                       +fickz*(uz(i,j,nz-1)+uz(i,j,nz-3))&
-                                       +fidkz*(uz(i,j,nz-2)+uz(i,j,nz-4)) 
-        tz(i,j,nz-2)=fiakz*uz(i,j,nz-2)+fibkz*(uz(i,j,nz-1)+uz(i,j,nz-3))& 
-                                       +fickz*(uz(i,j,nz  )+uz(i,j,nz-4))&
-                                       +fidkz*(uz(i,j,nz-1)+uz(i,j,nz-5)) 
-        do k=2,nz  
-            tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
+
+  if (npaire==1) then 
+     do j=1,ny 
+        do i=1,nx 
+           tz(i,j,1)=fiakz*uz(i,j,1)+fibkz*(uz(i,j,2)+uz(i,j,2))&
+                +fickz*(uz(i,j,3)+uz(i,j,3))&
+                +fidkz*(uz(i,j,4)+uz(i,j,4))
+           tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))& 
+                +fickz*(uz(i,j,4)+uz(i,j,2))&
+                +fidkz*(uz(i,j,5)+uz(i,j,3)) 
+           tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))& 
+                +fickz*(uz(i,j,5)+uz(i,j,1))&
+                +fidkz*(uz(i,j,6)+uz(i,j,2)) 
+           do k=4,nz-3 
+              tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
+                   +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
+                   +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
+           enddo
+           tz(i,j,nz)=fiakz*uz(i,j,nz)    +fibkz*(uz(i,j,nz-1)+uz(i,j,nz-1))&
+                +fickz*(uz(i,j,nz-2)+uz(i,j,nz-2))&
+                +fidkz*(uz(i,j,nz-3)+uz(i,j,nz-3))
+           tz(i,j,nz-1)=fiakz*uz(i,j,nz-1)+fibkz*(uz(i,j,nz  )+uz(i,j,nz-2))& 
+                +fickz*(uz(i,j,nz-1)+uz(i,j,nz-3))&
+                +fidkz*(uz(i,j,nz-2)+uz(i,j,nz-4)) 
+           tz(i,j,nz-2)=fiakz*uz(i,j,nz-2)+fibkz*(uz(i,j,nz-1)+uz(i,j,nz-3))& 
+                +fickz*(uz(i,j,nz  )+uz(i,j,nz-4))&
+                +fidkz*(uz(i,j,nz-1)+uz(i,j,nz-5)) 
+           do k=2,nz  
+              tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
+           enddo
+           tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
+           do k=nz-1,1,-1  
+              tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
+           enddo
         enddo
-        tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
-        do k=nz-1,1,-1  
-            tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
-        enddo 
-    enddo 
-    enddo 
-    endif
-    if (npaire==0) then 
-        do j=1,ny 
+     enddo
+  endif
+  if (npaire==0) then 
+     do j=1,ny 
         do i=1,nx 
-            tz(i,j,1)=zero 
-            tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))& 
-                                     +fickz*(uz(i,j,4)-uz(i,j,2))&
-                                     +fidkz*(uz(i,j,5)-uz(i,j,3)) 
-            tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))& 
-                                     +fickz*(uz(i,j,5)+uz(i,j,1))&
-                                     +fidkz*(uz(i,j,6)-uz(i,j,2)) 
-            do k=4,nz-3 
-                tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
-                                         +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
-                                         +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
-            enddo 
-            tz(i,j,nz)=zero 
-            tz(i,j,nz-1)=fiakz*uz(i,j,nz-1) +fibkz*( uz(i,j,nz  )+uz(i,j,nz-2))& 
-                                            +fickz*(-uz(i,j,nz-1)+uz(i,j,nz-3))&
-                                            +fidkz*(-uz(i,j,nz-2)+uz(i,j,nz-4)) 
-            tz(i,j,nz-2)=fiakz*uz(i,j,nz-2) +fibkz*( uz(i,j,nz-1)+uz(i,j,nz-3))& 
-                                            +fickz*( uz(i,j,nz  )+uz(i,j,nz-4))&
-                                            +fidkz*(-uz(i,j,nz-1)+uz(i,j,nz-5)) 
-            do k=2,nz  
-                tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
-            enddo
-                tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
-      
-            do k=nz-1,1,-1  
-                tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
-            enddo 
-        enddo 
-        enddo 
-    endif
-
-    return
+           tz(i,j,1)=zero 
+           tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))& 
+                +fickz*(uz(i,j,4)-uz(i,j,2))&
+                +fidkz*(uz(i,j,5)-uz(i,j,3)) 
+           tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))& 
+                +fickz*(uz(i,j,5)+uz(i,j,1))&
+                +fidkz*(uz(i,j,6)-uz(i,j,2)) 
+           do k=4,nz-3 
+              tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
+                   +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
+                   +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
+           enddo
+           tz(i,j,nz)=zero 
+           tz(i,j,nz-1)=fiakz*uz(i,j,nz-1) +fibkz*( uz(i,j,nz  )+uz(i,j,nz-2))& 
+                +fickz*(-uz(i,j,nz-1)+uz(i,j,nz-3))&
+                +fidkz*(-uz(i,j,nz-2)+uz(i,j,nz-4)) 
+           tz(i,j,nz-2)=fiakz*uz(i,j,nz-2) +fibkz*( uz(i,j,nz-1)+uz(i,j,nz-3))& 
+                +fickz*( uz(i,j,nz  )+uz(i,j,nz-4))&
+                +fidkz*(-uz(i,j,nz-1)+uz(i,j,nz-5)) 
+           do k=2,nz  
+              tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
+           enddo
+           tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
+
+           do k=nz-1,1,-1  
+              tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
+           enddo
+        enddo
+     enddo
+  endif
+
+  return
 end subroutine filz_11
 
 subroutine filz_12(tz,uz,rz,fisz,fiffz,fifsz,fifwz,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiZ 
 
-implicit none
+  USE param  
+  USE parfiZ 
 
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
-real(mytype), dimension(nx,ny) :: fisz
-real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
+  implicit none
+
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
+  real(mytype), dimension(nx,ny) :: fisz
+  real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
 
   if(iibm.eq.2) call lagpolz(uz)
-   
+
   if (npaire==1) then 
-  do j=1,ny 
-  do i=1,nx 
-      tz(i,j,1)=fiakz*uz(i,j,1)+fibkz*(uz(i,j,2)+uz(i,j,2))&
-                               +fickz*(uz(i,j,3)+uz(i,j,3))&
-                               +fidkz*(uz(i,j,4)+uz(i,j,4))
-      tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))& 
-                               +fickz*(uz(i,j,4)+uz(i,j,2))&
-                               +fidkz*(uz(i,j,5)+uz(i,j,3)) 
-      tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))& 
-                               +fickz*(uz(i,j,5)+uz(i,j,1))&
-                               +fidkz*(uz(i,j,6)+uz(i,j,2)) 
-      do k=4,nz-3 
-          tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
-                                   +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
-                                   +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
-      enddo 
-     tz(i,j,nz)   =      uz(i,j,nz  )
-     tz(i,j,nz-1 )=fiamz*uz(i,j,nz  )+fibmz*uz(i,j,nz-1)+ficmz*uz(i,j,nz-2)+&
-                   fidmz*uz(i,j,nz-3)
-     tz(i,j,nz-2 )=fiapz*uz(i,j,nz  )+fibpz*uz(i,j,nz-1)+ficpz*uz(i,j,nz-2)+&
-                   fidpz*uz(i,j,nz-3)+fiepz*uz(i,j,nz-4)+fifpz*uz(i,j,nz-5)
-      do k=2,nz  
-          tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
-      enddo
-      tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
-      do k=nz-1,1,-1  
-          tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
-      enddo 
-  enddo 
-  enddo 
+     do j=1,ny 
+        do i=1,nx 
+           tz(i,j,1)=fiakz*uz(i,j,1)+fibkz*(uz(i,j,2)+uz(i,j,2))&
+                +fickz*(uz(i,j,3)+uz(i,j,3))&
+                +fidkz*(uz(i,j,4)+uz(i,j,4))
+           tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))& 
+                +fickz*(uz(i,j,4)+uz(i,j,2))&
+                +fidkz*(uz(i,j,5)+uz(i,j,3)) 
+           tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))& 
+                +fickz*(uz(i,j,5)+uz(i,j,1))&
+                +fidkz*(uz(i,j,6)+uz(i,j,2)) 
+           do k=4,nz-3 
+              tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
+                   +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
+                   +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
+           enddo
+           tz(i,j,nz)   =      uz(i,j,nz  )
+           tz(i,j,nz-1 )=fiamz*uz(i,j,nz  )+fibmz*uz(i,j,nz-1)+ficmz*uz(i,j,nz-2)+&
+                fidmz*uz(i,j,nz-3)
+           tz(i,j,nz-2 )=fiapz*uz(i,j,nz  )+fibpz*uz(i,j,nz-1)+ficpz*uz(i,j,nz-2)+&
+                fidpz*uz(i,j,nz-3)+fiepz*uz(i,j,nz-4)+fifpz*uz(i,j,nz-5)
+           do k=2,nz  
+              tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
+           enddo
+           tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
+           do k=nz-1,1,-1  
+              tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
+           enddo
+        enddo
+     enddo
   endif
   if (npaire==0) then 
-      do j=1,ny 
-      do i=1,nx 
-          tz(i,j,1)=zero 
-          tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))& 
-                                   +fickz*(uz(i,j,4)-uz(i,j,2))&
-                                   +fidkz*(uz(i,j,5)-uz(i,j,3)) 
-          tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))& 
-                                   +fickz*(uz(i,j,5)+uz(i,j,1))&
-                                   +fidkz*(uz(i,j,6)-uz(i,j,2)) 
-          do k=4,nz-3 
+     do j=1,ny 
+        do i=1,nx 
+           tz(i,j,1)=zero 
+           tz(i,j,2)=fiakz*uz(i,j,2)+fibkz*(uz(i,j,3)+uz(i,j,1))& 
+                +fickz*(uz(i,j,4)-uz(i,j,2))&
+                +fidkz*(uz(i,j,5)-uz(i,j,3)) 
+           tz(i,j,3)=fiakz*uz(i,j,3)+fibkz*(uz(i,j,4)+uz(i,j,2))& 
+                +fickz*(uz(i,j,5)+uz(i,j,1))&
+                +fidkz*(uz(i,j,6)-uz(i,j,2)) 
+           do k=4,nz-3 
               tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
-                                       +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
-                                       +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
-          enddo 
-          tz(i,j,nz)   =      uz(i,j,nz  )
-          tz(i,j,nz-1 )=fiamz*uz(i,j,nz  )+fibmz*uz(i,j,nz-1)+ficmz*uz(i,j,nz-2)+&
-                        fidmz*uz(i,j,nz-3)
-          tz(i,j,nz-2 )=fiapz*uz(i,j,nz  )+fibpz*uz(i,j,nz-1)+ficpz*uz(i,j,nz-2)+&
-                        fidpz*uz(i,j,nz-3)+fiepz*uz(i,j,nz-4)+fifpz*uz(i,j,nz-5)
-          do k=2,nz  
+                   +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
+                   +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
+           enddo
+           tz(i,j,nz)   =      uz(i,j,nz  )
+           tz(i,j,nz-1 )=fiamz*uz(i,j,nz  )+fibmz*uz(i,j,nz-1)+ficmz*uz(i,j,nz-2)+&
+                fidmz*uz(i,j,nz-3)
+           tz(i,j,nz-2 )=fiapz*uz(i,j,nz  )+fibpz*uz(i,j,nz-1)+ficpz*uz(i,j,nz-2)+&
+                fidpz*uz(i,j,nz-3)+fiepz*uz(i,j,nz-4)+fifpz*uz(i,j,nz-5)
+           do k=2,nz  
               tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
-          enddo
-              tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
-    
-          do k=nz-1,1,-1  
+           enddo
+           tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
+
+           do k=nz-1,1,-1  
               tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
-          enddo 
-      enddo 
-      enddo 
+           enddo
+        enddo
+     enddo
   endif
 
   return
@@ -1234,81 +1234,81 @@ real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
 end subroutine filz_12
 
 subroutine filz_21(tz,uz,rz,fisz,fiffz,fifsz,fifwz,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiZ 
 
-implicit none
+  USE param  
+  USE parfiZ 
+
+  implicit none
 
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
-real(mytype), dimension(nx,ny) :: fisz
-real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
+  real(mytype), dimension(nx,ny) :: fisz
+  real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
 
   if(iibm.eq.2) call lagpolz(uz)
-   
+
   if (npaire==1) then 
-  do j=1,ny 
-  do i=1,nx 
-     tz(i,j,1)=      uz(i,j,1)
-     tz(i,j,2)=fia2z*uz(i,j,1)+fib2z*uz(i,j,2)+fic2z*uz(i,j,3)+&
-               fid2z*uz(i,j,4)
-     tz(i,j,3)=fia3z*uz(i,j,1)+fib3z*uz(i,j,2)+fic3z*uz(i,j,3)+&
-                 fid3z*uz(i,j,4)+fie3z*uz(i,j,5)+fif3z*uz(i,j,6)
-      do k=4,nz-3 
-          tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
-                                   +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
-                                   +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
-      enddo 
-      tz(i,j,nz)=fiakz*uz(i,j,nz)    +fibkz*(uz(i,j,nz-1)+uz(i,j,nz-1))&
-                                     +fickz*(uz(i,j,nz-2)+uz(i,j,nz-2))&
-                                     +fidkz*(uz(i,j,nz-3)+uz(i,j,nz-3))
-      tz(i,j,nz-1)=fiakz*uz(i,j,nz-1)+fibkz*(uz(i,j,nz  )+uz(i,j,nz-2))& 
-                                     +fickz*(uz(i,j,nz-1)+uz(i,j,nz-3))&
-                                     +fidkz*(uz(i,j,nz-2)+uz(i,j,nz-4)) 
-      tz(i,j,nz-2)=fiakz*uz(i,j,nz-2)+fibkz*(uz(i,j,nz-1)+uz(i,j,nz-3))& 
-                                     +fickz*(uz(i,j,nz  )+uz(i,j,nz-4))&
-                                     +fidkz*(uz(i,j,nz-1)+uz(i,j,nz-5)) 
-      do k=2,nz  
-          tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
-      enddo
-      tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
-      do k=nz-1,1,-1  
-          tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
-      enddo 
-  enddo 
-  enddo 
+     do j=1,ny 
+        do i=1,nx 
+           tz(i,j,1)=      uz(i,j,1)
+           tz(i,j,2)=fia2z*uz(i,j,1)+fib2z*uz(i,j,2)+fic2z*uz(i,j,3)+&
+                fid2z*uz(i,j,4)
+           tz(i,j,3)=fia3z*uz(i,j,1)+fib3z*uz(i,j,2)+fic3z*uz(i,j,3)+&
+                fid3z*uz(i,j,4)+fie3z*uz(i,j,5)+fif3z*uz(i,j,6)
+           do k=4,nz-3 
+              tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
+                   +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
+                   +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
+           enddo
+           tz(i,j,nz)=fiakz*uz(i,j,nz)    +fibkz*(uz(i,j,nz-1)+uz(i,j,nz-1))&
+                +fickz*(uz(i,j,nz-2)+uz(i,j,nz-2))&
+                +fidkz*(uz(i,j,nz-3)+uz(i,j,nz-3))
+           tz(i,j,nz-1)=fiakz*uz(i,j,nz-1)+fibkz*(uz(i,j,nz  )+uz(i,j,nz-2))& 
+                +fickz*(uz(i,j,nz-1)+uz(i,j,nz-3))&
+                +fidkz*(uz(i,j,nz-2)+uz(i,j,nz-4)) 
+           tz(i,j,nz-2)=fiakz*uz(i,j,nz-2)+fibkz*(uz(i,j,nz-1)+uz(i,j,nz-3))& 
+                +fickz*(uz(i,j,nz  )+uz(i,j,nz-4))&
+                +fidkz*(uz(i,j,nz-1)+uz(i,j,nz-5)) 
+           do k=2,nz  
+              tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
+           enddo
+           tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
+           do k=nz-1,1,-1  
+              tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
+           enddo
+        enddo
+     enddo
   endif
   if (npaire==0) then 
-      do j=1,ny 
-      do i=1,nx 
-             tz(i,j,1)=      uz(i,j,1)
-             tz(i,j,2)=fia2z*uz(i,j,1)+fib2z*uz(i,j,2)+fic2z*uz(i,j,3)+&
-                       fid2z*uz(i,j,4)
-             tz(i,j,3)=fia3z*uz(i,j,1)+fib3z*uz(i,j,2)+fic3z*uz(i,j,3)+&
-                         fid3z*uz(i,j,4)+fie3z*uz(i,j,5)+fif3z*uz(i,j,6)
-          do k=4,nz-3 
+     do j=1,ny 
+        do i=1,nx 
+           tz(i,j,1)=      uz(i,j,1)
+           tz(i,j,2)=fia2z*uz(i,j,1)+fib2z*uz(i,j,2)+fic2z*uz(i,j,3)+&
+                fid2z*uz(i,j,4)
+           tz(i,j,3)=fia3z*uz(i,j,1)+fib3z*uz(i,j,2)+fic3z*uz(i,j,3)+&
+                fid3z*uz(i,j,4)+fie3z*uz(i,j,5)+fif3z*uz(i,j,6)
+           do k=4,nz-3 
               tz(i,j,k)=fiakz*uz(i,j,k)+fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
-                                       +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
-                                       +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
-          enddo 
-          tz(i,j,nz)=zero 
-          tz(i,j,nz-1)=fiakz*uz(i,j,nz-1) +fibkz*( uz(i,j,nz  )+uz(i,j,nz-2))& 
-                                          +fickz*(-uz(i,j,nz-1)+uz(i,j,nz-3))&
-                                          +fidkz*(-uz(i,j,nz-2)+uz(i,j,nz-4)) 
-          tz(i,j,nz-2)=fiakz*uz(i,j,nz-2) +fibkz*( uz(i,j,nz-1)+uz(i,j,nz-3))& 
-                                          +fickz*( uz(i,j,nz  )+uz(i,j,nz-4))&
-                                          +fidkz*(-uz(i,j,nz-1)+uz(i,j,nz-5)) 
-          do k=2,nz  
+                   +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
+                   +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
+           enddo
+           tz(i,j,nz)=zero 
+           tz(i,j,nz-1)=fiakz*uz(i,j,nz-1) +fibkz*( uz(i,j,nz  )+uz(i,j,nz-2))& 
+                +fickz*(-uz(i,j,nz-1)+uz(i,j,nz-3))&
+                +fidkz*(-uz(i,j,nz-2)+uz(i,j,nz-4)) 
+           tz(i,j,nz-2)=fiakz*uz(i,j,nz-2) +fibkz*( uz(i,j,nz-1)+uz(i,j,nz-3))& 
+                +fickz*( uz(i,j,nz  )+uz(i,j,nz-4))&
+                +fidkz*(-uz(i,j,nz-1)+uz(i,j,nz-5)) 
+           do k=2,nz  
               tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
-          enddo
-              tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
-    
-          do k=nz-1,1,-1  
+           enddo
+           tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
+
+           do k=nz-1,1,-1  
               tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
-          enddo 
-      enddo 
-      enddo 
+           enddo
+        enddo
+     enddo
   endif
 
   return
@@ -1317,45 +1317,45 @@ end subroutine filz_21
 
 
 subroutine filz_22(tz,uz,rz,fisz,fiffz,fifsz,fifwz,nx,ny,nz,npaire) 
-  
-USE param  
-USE parfiZ 
 
-implicit none
+  USE param  
+  USE parfiZ 
 
-integer :: nx,ny,nz,npaire,i,j,k
-real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
-real(mytype), dimension(nx,ny) :: fisz
-real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
+  implicit none
+
+  integer :: nx,ny,nz,npaire,i,j,k
+  real(mytype), dimension(nx,ny,nz) :: tz,uz,rz
+  real(mytype), dimension(nx,ny) :: fisz
+  real(mytype), dimension(nz) :: fiffz,fifsz,fifwz
 
   if(iibm.eq.2) call lagpolz(uz)
-   
+
   do j=1,ny
-  do i=1,nx 
-     tz(i,j,1)=      uz(i,j,1)
-     tz(i,j,2)=fia2z*uz(i,j,1)+fib2z*uz(i,j,2)+fic2z*uz(i,j,3)+&
-               fid2z*uz(i,j,4)
-     tz(i,j,3)=fia3z*uz(i,j,1)+fib3z*uz(i,j,2)+fic3z*uz(i,j,3)+&
-                 fid3z*uz(i,j,4)+fie3z*uz(i,j,5)+fif3z*uz(i,j,6)
-     do k=4,nz-3
-       tz(i,j,k)=fiakz*uz(i,j,k) +fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
-                                 +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
-                                 +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
-     enddo
-     tz(i,j,nz)   =      uz(i,j,nz  )
-     tz(i,j,nz-1 )=fiamz*uz(i,j,nz  )+fibmz*uz(i,j,nz-1)+ficmz*uz(i,j,nz-2)+&
-                   fidmz*uz(i,j,nz-3)
-     tz(i,j,nz-2 )=fiapz*uz(i,j,nz  )+fibpz*uz(i,j,nz-1)+ficpz*uz(i,j,nz-2)+&
-                   fidpz*uz(i,j,nz-3)+fiepz*uz(i,j,nz-4)+fifpz*uz(i,j,nz-5)
-     do k=2,nz 
-        tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
-     enddo
-     tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
-     do k=nz-1,1,-1
-        tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
+     do i=1,nx 
+        tz(i,j,1)=      uz(i,j,1)
+        tz(i,j,2)=fia2z*uz(i,j,1)+fib2z*uz(i,j,2)+fic2z*uz(i,j,3)+&
+             fid2z*uz(i,j,4)
+        tz(i,j,3)=fia3z*uz(i,j,1)+fib3z*uz(i,j,2)+fic3z*uz(i,j,3)+&
+             fid3z*uz(i,j,4)+fie3z*uz(i,j,5)+fif3z*uz(i,j,6)
+        do k=4,nz-3
+           tz(i,j,k)=fiakz*uz(i,j,k) +fibkz*(uz(i,j,k+1)+uz(i,j,k-1))& 
+                +fickz*(uz(i,j,k+2)+uz(i,j,k-2))&
+                +fidkz*(uz(i,j,k+3)+uz(i,j,k-3)) 
+        enddo
+        tz(i,j,nz)   =      uz(i,j,nz  )
+        tz(i,j,nz-1 )=fiamz*uz(i,j,nz  )+fibmz*uz(i,j,nz-1)+ficmz*uz(i,j,nz-2)+&
+             fidmz*uz(i,j,nz-3)
+        tz(i,j,nz-2 )=fiapz*uz(i,j,nz  )+fibpz*uz(i,j,nz-1)+ficpz*uz(i,j,nz-2)+&
+             fidpz*uz(i,j,nz-3)+fiepz*uz(i,j,nz-4)+fifpz*uz(i,j,nz-5)
+        do k=2,nz 
+           tz(i,j,k)=tz(i,j,k)-tz(i,j,k-1)*fifsz(k) 
+        enddo
+        tz(i,j,nz)=tz(i,j,nz)*fifwz(nz) 
+        do k=nz-1,1,-1
+           tz(i,j,k)=(tz(i,j,k)-fiffz(k)*tz(i,j,k+1))*fifwz(k) 
+        enddo
      enddo
-  enddo 
-  enddo 
+  enddo
 
   return
 
diff --git a/src/genepsi3d.f90 b/src/genepsi3d.f90
index 2f0b4bd3..b7a17b56 100644
--- a/src/genepsi3d.f90
+++ b/src/genepsi3d.f90
@@ -7,32 +7,32 @@ subroutine corgp_IBM (ux,uy,uz,px,py,pz,nlock)
   real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: ux,uy,uz,px,py,pz
   nxyz=xsize(1)*xsize(2)*xsize(3)
   if (nlock.eq.1) then
-    if (nz.gt.1) then
-      do ijk=1,nxyz
-        uy(ijk,1,1)=-py(ijk,1,1)+uy(ijk,1,1)
-        uz(ijk,1,1)=-pz(ijk,1,1)+uz(ijk,1,1)
-        ux(ijk,1,1)=-px(ijk,1,1)+ux(ijk,1,1)
-      enddo
-    else
-      do ijk=1,nxyz
-        uy(ijk,1,1)=-py(ijk,1,1)+uy(ijk,1,1)
-        ux(ijk,1,1)=-px(ijk,1,1)+ux(ijk,1,1)
-      enddo
-    endif
+     if (nz.gt.1) then
+        do ijk=1,nxyz
+           uy(ijk,1,1)=-py(ijk,1,1)+uy(ijk,1,1)
+           uz(ijk,1,1)=-pz(ijk,1,1)+uz(ijk,1,1)
+           ux(ijk,1,1)=-px(ijk,1,1)+ux(ijk,1,1)
+        enddo
+     else
+        do ijk=1,nxyz
+           uy(ijk,1,1)=-py(ijk,1,1)+uy(ijk,1,1)
+           ux(ijk,1,1)=-px(ijk,1,1)+ux(ijk,1,1)
+        enddo
+     endif
   endif
   if (nlock.eq.2) then
-    if (nz.gt.1) then
-      do ijk=1,nxyz
-        uy(ijk,1,1)=py(ijk,1,1)+uy(ijk,1,1)
-        uz(ijk,1,1)=pz(ijk,1,1)+uz(ijk,1,1)
-        ux(ijk,1,1)=px(ijk,1,1)+ux(ijk,1,1)
-      enddo
-    else
-      do ijk=1,nxyz
-        uy(ijk,1,1)=py(ijk,1,1)+uy(ijk,1,1)
-        ux(ijk,1,1)=px(ijk,1,1)+ux(ijk,1,1)
-      enddo
-    endif
+     if (nz.gt.1) then
+        do ijk=1,nxyz
+           uy(ijk,1,1)=py(ijk,1,1)+uy(ijk,1,1)
+           uz(ijk,1,1)=pz(ijk,1,1)+uz(ijk,1,1)
+           ux(ijk,1,1)=px(ijk,1,1)+ux(ijk,1,1)
+        enddo
+     else
+        do ijk=1,nxyz
+           uy(ijk,1,1)=py(ijk,1,1)+uy(ijk,1,1)
+           ux(ijk,1,1)=px(ijk,1,1)+ux(ijk,1,1)
+        enddo
+     endif
   endif
 
   return
@@ -89,15 +89,15 @@ SUBROUTINE geomcomplex(epsi, nxi, nxf, ny, nyi, nyf, nzi, nzf, dx, yp, dz, remp)
   REAL(mytype)               :: remp
 
   IF (itype.EQ.itype_cyl) THEN
-     
+
      CALL geomcomplex_cyl(epsi, nxi, nxf, ny, nyi, nyf, nzi, nzf, dx, yp, dz, remp)
-     
+
   ELSEIF (itype.EQ.itype_hill) THEN
-     
+
      CALL geomcomplex_hill(epsi,nxi,nxf,ny,nyi,nyf,nzi,nzf,dx,yp,dz,remp)
-     
+
   ENDIF
-  
+
 END SUBROUTINE geomcomplex
 !*******************************************************************
 subroutine genepsi3d(ep1)
@@ -972,126 +972,126 @@ subroutine write_geomcomplex(nx,ny,nz,ep1,nobjx,nobjy,nobjz,xi,xf,yi,yf,zi,zf,&
   return
 end subroutine write_geomcomplex
 subroutine read_geomcomplex()
-!
-USE complex_geometry
-USE decomp_2d
-USE MPI
-!
-implicit none
-!
-integer :: i,j,k
-integer :: code
-!
-if(nrank.eq.0)then
-   open(11,file='nobjx.dat'  ,form='formatted', status='old')
-   do k=1,nz
-   do j=1,ny
-      read(11,*)nobjx(j,k)
-   enddo
-   enddo
-   close(11)
-endif
-call MPI_BCAST(nobjx,ny*nz,MPI_INTEGER,0,MPI_COMM_WORLD,code)
-if(nrank.eq.0)then
-   open(12,file='nobjy.dat'  ,form='formatted', status='old')
-   do k=1,nz
-   do i=1,nx
-      read(12,*)nobjy(i,k)
-   enddo
-   enddo
-   close(12)
-endif
-call MPI_BCAST(nobjy,nx*nz,MPI_INTEGER,0,MPI_COMM_WORLD,code)
-if(nrank.eq.0)then
-   open(13,file='nobjz.dat'  ,form='formatted', status='old')
-   do j=1,ny
-   do i=1,nx
-      read(13,*)nobjz(i,j)
-   enddo
-   enddo
-   close(13)
-endif
-call MPI_BCAST(nobjz,nx*ny,MPI_INTEGER,0,MPI_COMM_WORLD,code)
-if(nrank.eq.0)then
-   open(21,file='nxifpif.dat',form='formatted', status='old')
-   do k=1,nz
-   do j=1,ny
-   do i=0,nobjmax
-      read(21,*)nxipif(i,j,k),nxfpif(i,j,k)
-   enddo
-   enddo
-   enddo
-   close(21)
-endif
-call MPI_BCAST(nxipif,ny*nz*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
-call MPI_BCAST(nxfpif,ny*nz*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
-if(nrank.eq.0)then
-   open(22,file='nyifpif.dat',form='formatted', status='old')
-   do k=1,nz
-   do i=1,nx
-   do j=0,nobjmax
-      read(22,*)nyipif(j,i,k),nyfpif(j,i,k)
-   enddo
-   enddo
-   enddo
-   close(22)
-endif
-call MPI_BCAST(nyipif,nx*nz*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
-call MPI_BCAST(nyfpif,nx*nz*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
-if(nrank.eq.0)then
-   open(23,file='nzifpif.dat',form='formatted', status='old')
-   do j=1,ny
-   do i=1,nx
-   do k=0,nobjmax
-      read(23,*)nzipif(k,i,j),nzfpif(k,i,j)
-   enddo
-   enddo
-   enddo
-   close(23)
-endif
-call MPI_BCAST(nzipif,nx*ny*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
-call MPI_BCAST(nzfpif,nx*ny*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
-if(nrank.eq.0)then
-   open(31,file='xixf.dat'   ,form='formatted', status='old')
-   do k=1,nz
-   do j=1,ny
-   do i=1,nobjmax
-      read(31,*)xi(i,j,k),xf(i,j,k)
-   enddo
-   enddo
-   enddo
-   close(31)
-endif
-call MPI_BCAST(xi,ny*nz*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
-call MPI_BCAST(xf,ny*nz*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
-if(nrank.eq.0)then
-   open(32,file='yiyf.dat'   ,form='formatted', status='old')
-   do k=1,nz
-   do i=1,nx
-   do j=1,nobjmax
-      read(32,*)yi(j,i,k),yf(j,i,k)
-   enddo
-   enddo
-   enddo
-   close(32)
-endif
-call MPI_BCAST(yi,nx*nz*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
-call MPI_BCAST(yf,nx*nz*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
-if(nrank.eq.0)then
-   open(33,file='zizf.dat'   ,form='formatted', status='old')
-   do j=1,ny
-   do i=1,nx
-   do k=1,nobjmax
-      read(33,*)zi(k,i,j),zf(k,i,j)
-   enddo
-   enddo
-   enddo
-   close(33)
-endif
-call MPI_BCAST(zi,nx*ny*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
-call MPI_BCAST(zf,nx*ny*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
-!
-return
+  !
+  USE complex_geometry
+  USE decomp_2d
+  USE MPI
+  !
+  implicit none
+  !
+  integer :: i,j,k
+  integer :: code
+  !
+  if(nrank.eq.0)then
+     open(11,file='nobjx.dat'  ,form='formatted', status='old')
+     do k=1,nz
+        do j=1,ny
+           read(11,*)nobjx(j,k)
+        enddo
+     enddo
+     close(11)
+  endif
+  call MPI_BCAST(nobjx,ny*nz,MPI_INTEGER,0,MPI_COMM_WORLD,code)
+  if(nrank.eq.0)then
+     open(12,file='nobjy.dat'  ,form='formatted', status='old')
+     do k=1,nz
+        do i=1,nx
+           read(12,*)nobjy(i,k)
+        enddo
+     enddo
+     close(12)
+  endif
+  call MPI_BCAST(nobjy,nx*nz,MPI_INTEGER,0,MPI_COMM_WORLD,code)
+  if(nrank.eq.0)then
+     open(13,file='nobjz.dat'  ,form='formatted', status='old')
+     do j=1,ny
+        do i=1,nx
+           read(13,*)nobjz(i,j)
+        enddo
+     enddo
+     close(13)
+  endif
+  call MPI_BCAST(nobjz,nx*ny,MPI_INTEGER,0,MPI_COMM_WORLD,code)
+  if(nrank.eq.0)then
+     open(21,file='nxifpif.dat',form='formatted', status='old')
+     do k=1,nz
+        do j=1,ny
+           do i=0,nobjmax
+              read(21,*)nxipif(i,j,k),nxfpif(i,j,k)
+           enddo
+        enddo
+     enddo
+     close(21)
+  endif
+  call MPI_BCAST(nxipif,ny*nz*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
+  call MPI_BCAST(nxfpif,ny*nz*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
+  if(nrank.eq.0)then
+     open(22,file='nyifpif.dat',form='formatted', status='old')
+     do k=1,nz
+        do i=1,nx
+           do j=0,nobjmax
+              read(22,*)nyipif(j,i,k),nyfpif(j,i,k)
+           enddo
+        enddo
+     enddo
+     close(22)
+  endif
+  call MPI_BCAST(nyipif,nx*nz*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
+  call MPI_BCAST(nyfpif,nx*nz*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
+  if(nrank.eq.0)then
+     open(23,file='nzifpif.dat',form='formatted', status='old')
+     do j=1,ny
+        do i=1,nx
+           do k=0,nobjmax
+              read(23,*)nzipif(k,i,j),nzfpif(k,i,j)
+           enddo
+        enddo
+     enddo
+     close(23)
+  endif
+  call MPI_BCAST(nzipif,nx*ny*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
+  call MPI_BCAST(nzfpif,nx*ny*(nobjmax+1),MPI_INTEGER,0,MPI_COMM_WORLD,code)
+  if(nrank.eq.0)then
+     open(31,file='xixf.dat'   ,form='formatted', status='old')
+     do k=1,nz
+        do j=1,ny
+           do i=1,nobjmax
+              read(31,*)xi(i,j,k),xf(i,j,k)
+           enddo
+        enddo
+     enddo
+     close(31)
+  endif
+  call MPI_BCAST(xi,ny*nz*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
+  call MPI_BCAST(xf,ny*nz*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
+  if(nrank.eq.0)then
+     open(32,file='yiyf.dat'   ,form='formatted', status='old')
+     do k=1,nz
+        do i=1,nx
+           do j=1,nobjmax
+              read(32,*)yi(j,i,k),yf(j,i,k)
+           enddo
+        enddo
+     enddo
+     close(32)
+  endif
+  call MPI_BCAST(yi,nx*nz*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
+  call MPI_BCAST(yf,nx*nz*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
+  if(nrank.eq.0)then
+     open(33,file='zizf.dat'   ,form='formatted', status='old')
+     do j=1,ny
+        do i=1,nx
+           do k=1,nobjmax
+              read(33,*)zi(k,i,j),zf(k,i,j)
+           enddo
+        enddo
+     enddo
+     close(33)
+  endif
+  call MPI_BCAST(zi,nx*ny*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
+  call MPI_BCAST(zf,nx*ny*nobjmax,MPI_REAL,0,MPI_COMM_WORLD,code)
+  !
+  return
 end subroutine read_geomcomplex
 !
 !***************************************************************************
@@ -1099,87 +1099,87 @@ end subroutine read_geomcomplex
 !***************************************************************************
 !
 subroutine lagpolx(u)
-!
-USE param
-USE complex_geometry
-USE decomp_2d
-USE variables
-!
-implicit none
-!
-real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: u
-integer                                            :: i,j,k
-real(mytype)                                       :: x,y,z
-integer                                            :: ix              != position du point "zappé"
-integer                                            :: ipif,ipol,nxpif
-integer                                            :: ipoli,ipolf     != positions Initiales et Finales du POLynôme considéré
-real(mytype)                                       :: xpol,ypol,dypol !|variables concernant les polynômes
-real(mytype),dimension(10)                         :: xa,ya           !|de Lagrange. A mettre impérativement en 
-integer                                            :: ia,na           !|double précision
-!
-do k=1,xsize(3)
-do j=1,xsize(2)
-   if(nobjx(j,k).ne.0)then
-      ia=0
-      do i=1,nobjx(j,k)          !boucle sur le nombre d'objets par (j,k)
-         !1ère frontière
-         nxpif=npif
-         ia=ia+1
-         xa(ia)=xi(i,j,k)
-         ya(ia)=0.
-         if(xi(i,j,k).gt.0.)then!objet immergé
-            ix=xi(i,j,k)/dx+1
-            ipoli=ix+1
-            if(nxipif(i,j,k).lt.npif)nxpif=nxipif(i,j,k)
-            do ipif=1,nxpif
-               ia=ia+1
-               if(izap.eq.1)then!zapping
-                  xa(ia)=(ix-1)*dx-ipif*dx
-                  ya(ia)=u(ix-ipif,j,k)
-               else             !no zapping
-                  xa(ia)=(ix-1)*dx-(ipif-1)*dx
-                  ya(ia)=u(ix-ipif+1,j,k)
-               endif
-            enddo
-         else                   !objet semi-immergé
-            ipoli=1
-         endif
-         !2ème frontière
-         nxpif=npif
-         ia=ia+1
-         xa(ia)=xf(i,j,k)
-         ya(ia)=0.
-         if(xf(i,j,k).lt.xlx)then!objet immergé
-            ix=(xf(i,j,k)+dx)/dx+1
-            ipolf=ix-1
-            if(nxfpif(i,j,k).lt.npif)nxpif=nxfpif(i,j,k)
-            do ipif=1,nxpif
-               ia=ia+1
-               if(izap.eq.1)then!zapping
-                  xa(ia)=(ix-1)*dx+ipif*dx
-                  ya(ia)=u(ix+ipif,j,k)
-               else             !no zapping
-                  xa(ia)=(ix-1)*dx+(ipif-1)*dx
-                  ya(ia)=u(ix+ipif-1,j,k)
-               endif
-            enddo
-         else                   !objet semi-immergé
-            ipolf=nx
-         endif
-         !calcul du polynôme
-         na=ia
-         do ipol=ipoli,ipolf
-            xpol=dx*(ipol-1)
-            call polint(xa,ya,na,xpol,ypol,dypol)
-            u(ipol,j,k)=ypol
-         enddo
-         ia=0
-      enddo
-   endif
-enddo
-enddo
-!
-return
+  !
+  USE param
+  USE complex_geometry
+  USE decomp_2d
+  USE variables
+  !
+  implicit none
+  !
+  real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: u
+  integer                                            :: i,j,k
+  real(mytype)                                       :: x,y,z
+  integer                                            :: ix              != position du point "zappé"
+  integer                                            :: ipif,ipol,nxpif
+  integer                                            :: ipoli,ipolf     != positions Initiales et Finales du POLynôme considéré
+  real(mytype)                                       :: xpol,ypol,dypol !|variables concernant les polynômes
+  real(mytype),dimension(10)                         :: xa,ya           !|de Lagrange. A mettre impérativement en 
+  integer                                            :: ia,na           !|double précision
+  !
+  do k=1,xsize(3)
+     do j=1,xsize(2)
+        if(nobjx(j,k).ne.0)then
+           ia=0
+           do i=1,nobjx(j,k)          !boucle sur le nombre d'objets par (j,k)
+              !1ère frontière
+              nxpif=npif
+              ia=ia+1
+              xa(ia)=xi(i,j,k)
+              ya(ia)=0.
+              if(xi(i,j,k).gt.0.)then!objet immergé
+                 ix=xi(i,j,k)/dx+1
+                 ipoli=ix+1
+                 if(nxipif(i,j,k).lt.npif)nxpif=nxipif(i,j,k)
+                 do ipif=1,nxpif
+                    ia=ia+1
+                    if(izap.eq.1)then!zapping
+                       xa(ia)=(ix-1)*dx-ipif*dx
+                       ya(ia)=u(ix-ipif,j,k)
+                    else             !no zapping
+                       xa(ia)=(ix-1)*dx-(ipif-1)*dx
+                       ya(ia)=u(ix-ipif+1,j,k)
+                    endif
+                 enddo
+              else                   !objet semi-immergé
+                 ipoli=1
+              endif
+              !2ème frontière
+              nxpif=npif
+              ia=ia+1
+              xa(ia)=xf(i,j,k)
+              ya(ia)=0.
+              if(xf(i,j,k).lt.xlx)then!objet immergé
+                 ix=(xf(i,j,k)+dx)/dx+1
+                 ipolf=ix-1
+                 if(nxfpif(i,j,k).lt.npif)nxpif=nxfpif(i,j,k)
+                 do ipif=1,nxpif
+                    ia=ia+1
+                    if(izap.eq.1)then!zapping
+                       xa(ia)=(ix-1)*dx+ipif*dx
+                       ya(ia)=u(ix+ipif,j,k)
+                    else             !no zapping
+                       xa(ia)=(ix-1)*dx+(ipif-1)*dx
+                       ya(ia)=u(ix+ipif-1,j,k)
+                    endif
+                 enddo
+              else                   !objet semi-immergé
+                 ipolf=nx
+              endif
+              !calcul du polynôme
+              na=ia
+              do ipol=ipoli,ipolf
+                 xpol=dx*(ipol-1)
+                 call polint(xa,ya,na,xpol,ypol,dypol)
+                 u(ipol,j,k)=ypol
+              enddo
+              ia=0
+           enddo
+        endif
+     enddo
+  enddo
+  !
+  return
 end subroutine lagpolx
 !
 !***************************************************************************
@@ -1187,94 +1187,94 @@ end subroutine lagpolx
 !***************************************************************************
 !
 subroutine lagpoly(u)
-!
-USE param
-USE complex_geometry
-USE decomp_2d
-USE variables
-!
-implicit none
-!
-real(mytype),dimension(ysize(1),ysize(2),ysize(3)) :: u
-integer                                            :: i,j,k
-real(mytype)                                       :: x,y,z
-integer                                            :: jy              != position du point "zappé"
-integer                                            :: jpif,jpol,nypif
-integer                                            :: jpoli,jpolf     != positions Initiales et Finales du POLynôme considéré
-real(mytype)                                       :: xpol,ypol,dypol !|variables concernant les polynômes
-real(mytype),dimension(10)                         :: xa,ya           !|de Lagrange. A mettre impérativement en 
-integer                                            :: ia,na           !|double précision
-!
-do k=1,ysize(3)
-do i=1,ysize(1)
-   if(nobjy(i,k).ne.0)then
-      ia=0
-      do j=1,nobjy(i,k)          !boucle sur le nombre d'objets par (j,k)
-         !1ère frontière
-         nypif=npif
-         ia=ia+1
-         xa(ia)=yi(j,i,k)
-         ya(ia)=0.
-         if(yi(j,i,k).gt.0.)then!objet immergé
-            jy=1!jy=yi(j,i,k)/dy+1
-            do while(yp(jy).lt.yi(j,i,k))
-               jy=jy+1
-            enddo
-            jy=jy-1
-            jpoli=jy+1
-            if(nyipif(j,i,k).lt.npif)nypif=nyipif(j,i,k)
-            do jpif=1,nypif
-               ia=ia+1
-               if(izap.eq.1)then!zapping
-                  xa(ia)=yp(jy-jpif)!(jy-1)*dy-jpif*dy
-                  ya(ia)=u(i,jy-jpif,k)
-               else             !no zapping
-                  xa(ia)=yp(jy-jpif+1)!(jy-1)*dy-(jpif-1)*dy
-                  ya(ia)=u(i,jy-jpif+1,k)
-               endif
-            enddo
-         else                   !objet semi-immergé
-            jpoli=1
-         endif
-         !2ème frontière
-         nypif=npif
-         ia=ia+1
-         xa(ia)=yf(j,i,k)
-         ya(ia)=0.
-         if(yf(j,i,k).lt.yly)then!objet immergé
-            jy=1!jy=(yf(j,i,k)+dy)/dy+1
-            do while(yp(jy).lt.yf(j,i,k))  !there was a bug here yi<-->yf
-               jy=jy+1
-            enddo
-            jpolf=jy-1
-            if(nyfpif(j,i,k).lt.npif)nypif=nyfpif(j,i,k)
-            do jpif=1,nypif
-               ia=ia+1
-               if(izap.eq.1)then!zapping
-                  xa(ia)=yp(jy+jpif)!(jy-1)*dy+jpif*dy
-                  ya(ia)=u(i,jy+jpif,k)
-               else             !no zapping
-                  xa(ia)=yp(jy+jpif-1)!(jy-1)*dy+(jpif-1)*dy
-                  ya(ia)=u(i,jy+jpif-1,k)
-               endif
-            enddo
-         else                   !objet semi-immergé
-            jpolf=ny
-         endif
-         !calcul du polynôme
-         na=ia
-         do jpol=jpoli,jpolf
-            xpol=yp(jpol)!dy*(jpol-1)
-            call polint(xa,ya,na,xpol,ypol,dypol)
-            u(i,jpol,k)=ypol
-         enddo
-         ia=0
-      enddo
-   endif
-enddo
-enddo
-!
-return
+  !
+  USE param
+  USE complex_geometry
+  USE decomp_2d
+  USE variables
+  !
+  implicit none
+  !
+  real(mytype),dimension(ysize(1),ysize(2),ysize(3)) :: u
+  integer                                            :: i,j,k
+  real(mytype)                                       :: x,y,z
+  integer                                            :: jy              != position du point "zappé"
+  integer                                            :: jpif,jpol,nypif
+  integer                                            :: jpoli,jpolf     != positions Initiales et Finales du POLynôme considéré
+  real(mytype)                                       :: xpol,ypol,dypol !|variables concernant les polynômes
+  real(mytype),dimension(10)                         :: xa,ya           !|de Lagrange. A mettre impérativement en 
+  integer                                            :: ia,na           !|double précision
+  !
+  do k=1,ysize(3)
+     do i=1,ysize(1)
+        if(nobjy(i,k).ne.0)then
+           ia=0
+           do j=1,nobjy(i,k)          !boucle sur le nombre d'objets par (j,k)
+              !1ère frontière
+              nypif=npif
+              ia=ia+1
+              xa(ia)=yi(j,i,k)
+              ya(ia)=0.
+              if(yi(j,i,k).gt.0.)then!objet immergé
+                 jy=1!jy=yi(j,i,k)/dy+1
+                 do while(yp(jy).lt.yi(j,i,k))
+                    jy=jy+1
+                 enddo
+                 jy=jy-1
+                 jpoli=jy+1
+                 if(nyipif(j,i,k).lt.npif)nypif=nyipif(j,i,k)
+                 do jpif=1,nypif
+                    ia=ia+1
+                    if(izap.eq.1)then!zapping
+                       xa(ia)=yp(jy-jpif)!(jy-1)*dy-jpif*dy
+                       ya(ia)=u(i,jy-jpif,k)
+                    else             !no zapping
+                       xa(ia)=yp(jy-jpif+1)!(jy-1)*dy-(jpif-1)*dy
+                       ya(ia)=u(i,jy-jpif+1,k)
+                    endif
+                 enddo
+              else                   !objet semi-immergé
+                 jpoli=1
+              endif
+              !2ème frontière
+              nypif=npif
+              ia=ia+1
+              xa(ia)=yf(j,i,k)
+              ya(ia)=0.
+              if(yf(j,i,k).lt.yly)then!objet immergé
+                 jy=1!jy=(yf(j,i,k)+dy)/dy+1
+                 do while(yp(jy).lt.yf(j,i,k))  !there was a bug here yi<-->yf
+                    jy=jy+1
+                 enddo
+                 jpolf=jy-1
+                 if(nyfpif(j,i,k).lt.npif)nypif=nyfpif(j,i,k)
+                 do jpif=1,nypif
+                    ia=ia+1
+                    if(izap.eq.1)then!zapping
+                       xa(ia)=yp(jy+jpif)!(jy-1)*dy+jpif*dy
+                       ya(ia)=u(i,jy+jpif,k)
+                    else             !no zapping
+                       xa(ia)=yp(jy+jpif-1)!(jy-1)*dy+(jpif-1)*dy
+                       ya(ia)=u(i,jy+jpif-1,k)
+                    endif
+                 enddo
+              else                   !objet semi-immergé
+                 jpolf=ny
+              endif
+              !calcul du polynôme
+              na=ia
+              do jpol=jpoli,jpolf
+                 xpol=yp(jpol)!dy*(jpol-1)
+                 call polint(xa,ya,na,xpol,ypol,dypol)
+                 u(i,jpol,k)=ypol
+              enddo
+              ia=0
+           enddo
+        endif
+     enddo
+  enddo
+  !
+  return
 end subroutine lagpoly
 !
 !***************************************************************************
@@ -1282,87 +1282,87 @@ end subroutine lagpoly
 !***************************************************************************
 !
 subroutine lagpolz(u)
-!
-USE param
-USE complex_geometry
-USE decomp_2d
-USE variables
-!
-implicit none
-!
-real(mytype),dimension(zsize(1),zsize(2),zsize(3)) :: u
-integer                                            :: i,j,k
-real(mytype)                                       :: x,y,z
-integer                                            :: kz              != position du point "zappé"
-integer                                            :: kpif,kpol,nzpif
-integer                                            :: kpoli,kpolf     != positions Initiales et Finales du POLynôme considéré
-real(mytype)                                       :: xpol,ypol,dypol !|variables concernant les polynômes
-real(mytype),dimension(10)                         :: xa,ya           !|de Lagrange. A mettre imérativement en 
-integer                                            :: ia,na           !|double précision
-!
-do j=1,zsize(2)
-do i=1,zsize(1)
-   if(nobjz(i,j).ne.0)then
-      ia=0
-      do k=1,nobjz(i,j)          !boucle sur le nombre d'objets par couple (i,j)
-         !1ère frontière
-         nzpif=npif
-         ia=ia+1
-         xa(ia)=zi(k,i,j)
-         ya(ia)=0.
-         if(zi(k,i,j).gt.0.)then!objet immergé
-            kz=zi(k,i,j)/dz+1
-            kpoli=kz+1
-            if(nzipif(k,i,j).lt.npif)nzpif=nzipif(k,i,j)
-            do kpif=1,nzpif
-               ia=ia+1
-               if(izap.eq.1)then!zapping
-                  xa(ia)=(kz-1)*dz-kpif*dz
-                  ya(ia)=u(i,j,kz-kpif)
-               else             !no zapping
-                  xa(ia)=(kz-1)*dz-(kpif-1)*dz
-                  ya(ia)=u(i,j,kz-kpif+1)
-               endif
-            enddo
-         else                   !objet semi-immergé
-            kpoli=1
-         endif
-         !2ème frontière
-         nzpif=npif
-         ia=ia+1
-         xa(ia)=zf(k,i,j)
-         ya(ia)=0.
-         if(zf(k,i,j).lt.zlz)then!objet immergé
-            kz=(zf(k,i,j)+dz)/dz+1
-            kpolf=kz-1
-            if(nzfpif(k,i,j).lt.npif)nzpif=nzfpif(k,i,j)
-            do kpif=1,nzpif
-               ia=ia+1
-               if(izap.eq.1)then!zapping
-                  xa(ia)=(kz-1)*dz+kpif*dz
-                  ya(ia)=u(i,j,kz+kpif)
-               else             !no zapping
-                  xa(ia)=(kz-1)*dz+(kpif-1)*dz
-                  ya(ia)=u(i,j,kz+kpif-1)
-               endif
-            enddo
-         else                   !objet semi-immergé
-            kpolf=nz
-         endif
-         !calcul du polynôme
-         na=ia
-         do kpol=kpoli,kpolf
-            xpol=dz*(kpol-1)
-            call polint(xa,ya,na,xpol,ypol,dypol)
-            u(i,j,kpol)=ypol
-         enddo
-         ia=0
-      enddo
-   endif
-enddo
-enddo
-!
-return
+  !
+  USE param
+  USE complex_geometry
+  USE decomp_2d
+  USE variables
+  !
+  implicit none
+  !
+  real(mytype),dimension(zsize(1),zsize(2),zsize(3)) :: u
+  integer                                            :: i,j,k
+  real(mytype)                                       :: x,y,z
+  integer                                            :: kz              != position du point "zappé"
+  integer                                            :: kpif,kpol,nzpif
+  integer                                            :: kpoli,kpolf     != positions Initiales et Finales du POLynôme considéré
+  real(mytype)                                       :: xpol,ypol,dypol !|variables concernant les polynômes
+  real(mytype),dimension(10)                         :: xa,ya           !|de Lagrange. A mettre imérativement en 
+  integer                                            :: ia,na           !|double précision
+  !
+  do j=1,zsize(2)
+     do i=1,zsize(1)
+        if(nobjz(i,j).ne.0)then
+           ia=0
+           do k=1,nobjz(i,j)          !boucle sur le nombre d'objets par couple (i,j)
+              !1ère frontière
+              nzpif=npif
+              ia=ia+1
+              xa(ia)=zi(k,i,j)
+              ya(ia)=0.
+              if(zi(k,i,j).gt.0.)then!objet immergé
+                 kz=zi(k,i,j)/dz+1
+                 kpoli=kz+1
+                 if(nzipif(k,i,j).lt.npif)nzpif=nzipif(k,i,j)
+                 do kpif=1,nzpif
+                    ia=ia+1
+                    if(izap.eq.1)then!zapping
+                       xa(ia)=(kz-1)*dz-kpif*dz
+                       ya(ia)=u(i,j,kz-kpif)
+                    else             !no zapping
+                       xa(ia)=(kz-1)*dz-(kpif-1)*dz
+                       ya(ia)=u(i,j,kz-kpif+1)
+                    endif
+                 enddo
+              else                   !objet semi-immergé
+                 kpoli=1
+              endif
+              !2ème frontière
+              nzpif=npif
+              ia=ia+1
+              xa(ia)=zf(k,i,j)
+              ya(ia)=0.
+              if(zf(k,i,j).lt.zlz)then!objet immergé
+                 kz=(zf(k,i,j)+dz)/dz+1
+                 kpolf=kz-1
+                 if(nzfpif(k,i,j).lt.npif)nzpif=nzfpif(k,i,j)
+                 do kpif=1,nzpif
+                    ia=ia+1
+                    if(izap.eq.1)then!zapping
+                       xa(ia)=(kz-1)*dz+kpif*dz
+                       ya(ia)=u(i,j,kz+kpif)
+                    else             !no zapping
+                       xa(ia)=(kz-1)*dz+(kpif-1)*dz
+                       ya(ia)=u(i,j,kz+kpif-1)
+                    endif
+                 enddo
+              else                   !objet semi-immergé
+                 kpolf=nz
+              endif
+              !calcul du polynôme
+              na=ia
+              do kpol=kpoli,kpolf
+                 xpol=dz*(kpol-1)
+                 call polint(xa,ya,na,xpol,ypol,dypol)
+                 u(i,j,kpol)=ypol
+              enddo
+              ia=0
+           enddo
+        endif
+     enddo
+  enddo
+  !
+  return
 end subroutine lagpolz
 !
 !
@@ -1371,48 +1371,48 @@ end subroutine lagpolz
 !***************************************************************************
 !
 subroutine polint(xa,ya,n,x,y,dy)
-!
-USE decomp_2d
-!
-implicit none
-!
-   integer,parameter            :: nmax=30
-   integer                      :: n,i,m,ns
-   real(mytype)                 :: dy,x,y,den,dif,dift,ho,hp,w
-   real(mytype),dimension(nmax) :: c,d
-   real(mytype),dimension(n)    :: xa,ya
-   ns=1
-   dif=abs(x-xa(1))
-   do i=1,n
-      dift=abs(x-xa(i))
-      if(dift.lt.dif)then
-         ns=i
-         dif=dift
-      endif
-      c(i)=ya(i)
-      d(i)=ya(i)
-   enddo
-   y=ya(ns)
-   ns=ns-1
-   do m=1,n-1
-      do i=1,n-m
-         ho=xa(i)-x
-         hp=xa(i+m)-x
-         w=c(i+1)-d(i)
-         den=ho-hp
-!         if(den.eq.0)read(*,*)
-         den=w/den
-         d(i)=hp*den
-         c(i)=ho*den
-      enddo
-      if (2*ns.lt.n-m)then
-         dy=c(ns+1)
-      else
-         dy=d(ns)
-         ns=ns-1
-      endif
-      y=y+dy
-   enddo
-   return
+  !
+  USE decomp_2d
+  !
+  implicit none
+  !
+  integer,parameter            :: nmax=30
+  integer                      :: n,i,m,ns
+  real(mytype)                 :: dy,x,y,den,dif,dift,ho,hp,w
+  real(mytype),dimension(nmax) :: c,d
+  real(mytype),dimension(n)    :: xa,ya
+  ns=1
+  dif=abs(x-xa(1))
+  do i=1,n
+     dift=abs(x-xa(i))
+     if(dift.lt.dif)then
+        ns=i
+        dif=dift
+     endif
+     c(i)=ya(i)
+     d(i)=ya(i)
+  enddo
+  y=ya(ns)
+  ns=ns-1
+  do m=1,n-1
+     do i=1,n-m
+        ho=xa(i)-x
+        hp=xa(i+m)-x
+        w=c(i+1)-d(i)
+        den=ho-hp
+        !         if(den.eq.0)read(*,*)
+        den=w/den
+        d(i)=hp*den
+        c(i)=ho*den
+     enddo
+     if (2*ns.lt.n-m)then
+        dy=c(ns+1)
+     else
+        dy=d(ns)
+        ns=ns-1
+     endif
+     y=y+dy
+  enddo
+  return
 end subroutine polint
 
diff --git a/src/incompact3d.f90 b/src/incompact3d.f90
index 6ce67285..936e1a74 100644
--- a/src/incompact3d.f90
+++ b/src/incompact3d.f90
@@ -26,7 +26,7 @@ PROGRAM incompact3d
   character(len=80) :: InputFN, FNBase
   character(len=20) :: filename
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! Initialisation
   !!-------------------------------------------------------------------------------
 
@@ -107,7 +107,7 @@ PROGRAM incompact3d
   call calc_divu_constraint(divu3, rho1, phi1)
   !!-------------------------------------------------------------------------------
   !! End initialisation
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   if(nrank.eq.0)then
      open(42,file='time_evol.dat',form='formatted')
   endif
@@ -120,17 +120,17 @@ PROGRAM incompact3d
 
      do itr=1,iadvance_time
 
-        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
         !! Initialise timestep
         !!-------------------------------------------------------------------------
         call boundary_conditions(rho1,ux1,uy1,uz1,phi1,ep1)
         !!-------------------------------------------------------------------------
         !! End initialise timestep
-        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
         CALL calculate_transeq_rhs(drho1,dux1,duy1,duz1,dphi1,rho1,ux1,uy1,uz1,ep1,phi1,divu3)
 
-        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
         !! Time integrate transport equations
         !!-------------------------------------------------------------------------
         if (ilmn) then
@@ -144,7 +144,7 @@ PROGRAM incompact3d
         call pre_correc(ux1,uy1,uz1,ep1)
         !!-------------------------------------------------------------------------
         !! End time integrate transport equations
-        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
         if (iibm==1) then !solid body old school
            call corgp_IBM(ux1,uy1,uz1,px1,py1,pz1,1)
@@ -152,7 +152,7 @@ PROGRAM incompact3d
            call corgp_IBM(ux1,uy1,uz1,px1,py1,pz1,2)
         endif
 
-        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
         !! Poisson solver and velocity correction
         !!-------------------------------------------------------------------------
         call calc_divu_constraint(divu3, rho1, phi1)
@@ -167,7 +167,7 @@ PROGRAM incompact3d
         endif
         !!-------------------------------------------------------------------------
         !! End Poisson solver and velocity correction
-        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
         if (mod(itime,10)==0) then
            call divergence(dv3,rho1,ux1,uy1,uz1,ep1,drho1,divu3,2)
@@ -184,7 +184,7 @@ PROGRAM incompact3d
      endif
 #endif
 
-     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      !! Post-processing / IO
      !!----------------------------------------------------------------------------
 
@@ -197,11 +197,11 @@ PROGRAM incompact3d
      CALL visu(rho1, ux1, uy1, uz1, pp3(:,:,:,1), phi1, itime)
      !!----------------------------------------------------------------------------
      !! End post-processing / IO
-     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
   enddo !! End time loop
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! End simulation
   !!-------------------------------------------------------------------------------
   if(nrank.eq.0)then
@@ -305,7 +305,7 @@ SUBROUTINE solve_poisson(pp3, px1, py1, pz1, rho1, ux1, uy1, uz1, ep1, drho1, di
      CALL conserved_to_primary(rho1, uy1)
      CALL conserved_to_primary(rho1, uz1)
   ENDIF
-  
+
   CALL divergence(pp3(:,:,:,1),rho1,ux1,uy1,uz1,ep1,drho1,divu3,nlock)
   IF (ilmn.AND.ivarcoeff) THEN
      dv3(:,:,:) = pp3(:,:,:,1)
@@ -313,7 +313,7 @@ SUBROUTINE solve_poisson(pp3, px1, py1, pz1, rho1, ux1, uy1, uz1, ep1, drho1, di
 
   DO WHILE(.NOT.converged)
      IF (ivarcoeff) THEN
-        
+
         !! Test convergence
         CALL test_varcoeff(converged, pp3, dv3, atol, rtol, poissiter)
 
@@ -329,7 +329,7 @@ SUBROUTINE solve_poisson(pp3, px1, py1, pz1, rho1, ux1, uy1, uz1, ep1, drho1, di
 
         !! Need to update pressure gradient here for varcoeff
         CALL gradp(px1,py1,pz1,pp3(:,:,:,1))
-        
+
         IF ((.NOT.ilmn).OR.(.NOT.ivarcoeff)) THEN
            !! Once-through solver
            !! - Incompressible flow
@@ -481,7 +481,7 @@ SUBROUTINE intt(rho1, ux1, uy1, uz1, phi1, drho1, dux1, duy1, duz1, dphi1)
      DO is = 1, numscalar
         IF (is.NE.primary_species) THEN
            CALL int_time(phi1(:,:,:,is), dphi1(:,:,:,:,is))
-           
+
            IF (massfrac(is)) THEN
               DO k = 1, xsize(3)
                  DO j = 1, xsize(2)
@@ -494,7 +494,7 @@ SUBROUTINE intt(rho1, ux1, uy1, uz1, phi1, drho1, dux1, duy1, duz1, dphi1)
            ENDIF
         ENDIF
      ENDDO
-     
+
      IF (primary_species.GE.1) THEN
         phi1(:,:,:,primary_species) = one
         DO is = 1, numscalar
@@ -502,7 +502,7 @@ SUBROUTINE intt(rho1, ux1, uy1, uz1, phi1, drho1, dux1, duy1, duz1, dphi1)
               phi1(:,:,:,primary_species) = phi1(:,:,:,primary_species) - phi1(:,:,:,is)
            ENDIF
         ENDDO
-        
+
         DO k = 1, xsize(3)
            DO j = 1, xsize(2)
               DO i = 1, xsize(1)
@@ -512,7 +512,7 @@ SUBROUTINE intt(rho1, ux1, uy1, uz1, phi1, drho1, dux1, duy1, duz1, dphi1)
            ENDDO
         ENDDO
      ENDIF
-     
+
      IF (ilmn.and.ilmn_solve_temp) THEN
         !! Compute rho
         call calc_temp_eos(rho1(:,:,:,1), ta1, phi1, tb1, xsize(1), xsize(2), xsize(3))
diff --git a/src/les_models.f90 b/src/les_models.f90
index c3c20281..22456547 100755
--- a/src/les_models.f90
+++ b/src/les_models.f90
@@ -38,7 +38,7 @@ subroutine init_explicit_les
 end subroutine init_explicit_les
 !************************************************************
 subroutine Compute_SGS(sgsx1,sgsy1,sgsz1,ux1,uy1,uz1,ep1,iconservative)
-    
+
   USE param
   USE variables
   USE decomp_2d
@@ -50,28 +50,28 @@ subroutine Compute_SGS(sgsx1,sgsy1,sgsz1,ux1,uy1,uz1,ep1,iconservative)
   real(mytype), dimension(xsize(1), xsize(2), xsize(3)) :: sgsx1, sgsy1, sgsz1
   integer :: iconservative
 
-    ! Calculate eddy-viscosity
-    if(jLES.eq.1) then ! Smagorinsky
+  ! Calculate eddy-viscosity
+  if(jLES.eq.1) then ! Smagorinsky
+
+     call smag(nut1,ux1,uy1,uz1)
 
-        call smag(nut1,ux1,uy1,uz1)
-    
-    elseif(jLES.eq.2) then !WALE
-        
-        call smag(nut1,ux1,uy1,uz1)
-  
-    elseif(jLES.eq.3) then ! Lilly-style Dynamic Smagorinsky
+  elseif(jLES.eq.2) then !WALE
 
-    endif
+     call smag(nut1,ux1,uy1,uz1)
 
-    if(iconservative.eq.0) then ! Non-conservative form for calculating the divergence of the SGS stresses
-   
-        call les_nonconservative(sgsx1,sgsy1,sgsz1,ux1,uy1,uz1,nut1,ep1)
+  elseif(jLES.eq.3) then ! Lilly-style Dynamic Smagorinsky
+
+  endif
 
-    elseif (iconservative.eq.1) then ! Conservative form for calculating the divergence of the SGS stresses (used with wall functions)
+  if(iconservative.eq.0) then ! Non-conservative form for calculating the divergence of the SGS stresses
 
-        ! Call les_conservative
+     call les_nonconservative(sgsx1,sgsy1,sgsz1,ux1,uy1,uz1,nut1,ep1)
 
-    endif
+  elseif (iconservative.eq.1) then ! Conservative form for calculating the divergence of the SGS stresses (used with wall functions)
+
+     ! Call les_conservative
+
+  endif
 
   return
 
diff --git a/src/mkl_dfti.f90 b/src/mkl_dfti.f90
index 483206ce..60665d2c 100644
--- a/src/mkl_dfti.f90
+++ b/src/mkl_dfti.f90
@@ -314,7 +314,7 @@ MODULE MKL_DFTI
        INTEGER, INTENT(IN), DIMENSION(*) :: dims
      END FUNCTION dfti_create_descriptor_d_md
 
-  END INTERFACE
+  END INTERFACE DftiCreateDescriptor
 
   INTERFACE DftiCopyDescriptor
 
@@ -327,7 +327,7 @@ MODULE MKL_DFTI
        TYPE(DFTI_DESCRIPTOR), POINTER :: new_desc
      END FUNCTION dfti_copy_descriptor_external
 
-  END INTERFACE
+  END INTERFACE DftiCopyDescriptor
 
   INTERFACE DftiCommitDescriptor
 
@@ -339,7 +339,7 @@ MODULE MKL_DFTI
        TYPE(DFTI_DESCRIPTOR), POINTER :: desc
      END FUNCTION dfti_commit_descriptor_external
 
-  END INTERFACE
+  END INTERFACE DftiCommitDescriptor
 
   INTERFACE DftiSetValue
 
@@ -398,7 +398,7 @@ MODULE MKL_DFTI
        TYPE(DFTI_DESCRIPTOR), POINTER :: desc
      END FUNCTION dfti_set_value_chars
 
-  END INTERFACE
+  END INTERFACE DftiSetValue
 
   INTERFACE DftiGetValue
 
@@ -457,7 +457,7 @@ MODULE MKL_DFTI
        TYPE(DFTI_DESCRIPTOR), POINTER :: desc
      END FUNCTION dfti_get_value_chars
 
-  END INTERFACE
+  END INTERFACE DftiGetValue
 
   INTERFACE DftiComputeForward
 
@@ -746,7 +746,7 @@ MODULE MKL_DFTI
        TYPE(DFTI_DESCRIPTOR), POINTER :: desc
      END FUNCTION dfti_free_descriptor_external
 
-  END INTERFACE
+  END INTERFACE DftiFreeDescriptor
 
   INTERFACE DftiErrorClass
 
@@ -759,7 +759,7 @@ MODULE MKL_DFTI
        INTEGER, INTENT(IN) :: ErrorClass
      END FUNCTION dfti_error_class_external
 
-  END INTERFACE
+  END INTERFACE DftiErrorClass
 
   INTERFACE DftiErrorMessage
 
@@ -771,6 +771,6 @@ MODULE MKL_DFTI
        INTEGER, INTENT(IN) :: Status
      END FUNCTION dfti_error_message_external
 
-  END INTERFACE
+  END INTERFACE DftiErrorMessage
 
 END MODULE MKL_DFTI
diff --git a/src/navier.f90 b/src/navier.f90
index 6de6c0e4..2d0c985b 100644
--- a/src/navier.f90
+++ b/src/navier.f90
@@ -293,9 +293,9 @@ subroutine int_time_temperature(rho1, drho1, dphi1, phi1)
      endif
   endif
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! XXX We are integrating the temperature equation - get temperature
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   call calc_temp_eos(tc1, rho1(:,:,:,1), phi1, tb1, xsize(1), xsize(2), xsize(3))
 
   !! Now we can update current temperature
@@ -310,9 +310,9 @@ subroutine int_time_temperature(rho1, drho1, dphi1, phi1)
      enddo
   enddo
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! XXX We are integrating the temperature equation - get back to rho
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   call calc_rho_eos(rho1(:,:,:,1), tc1, phi1, tb1, xsize(1), xsize(2), xsize(3))
 
 endsubroutine int_time_temperature
@@ -337,7 +337,7 @@ SUBROUTINE lmn_t_to_rho_trans(drho1, dtemp1, rho1, dphi1, phi1)
   USE param, ONLY : ntime
   USE var, ONLY : numscalar
   USE var, ONLY : ta1, tb1
-  
+
   IMPLICIT NONE
 
   !! INPUTS
@@ -350,16 +350,16 @@ SUBROUTINE lmn_t_to_rho_trans(drho1, dtemp1, rho1, dphi1, phi1)
 
   !! LOCALS
   INTEGER :: is
-  
+
   drho1(:,:,:) = zero
-  
+
   IF (imultispecies) THEN
      DO is = 1, numscalar
         IF (massfrac(is)) THEN
            drho1(:,:,:) = drho1(:,:,:) - dphi1(:,:,:,1,is) / mol_weight(is)
         ENDIF
      ENDDO
-     
+
      ta1(:,:,:) = zero !! Mean molecular weight
      DO is = 1, numscalar
         IF (massfrac(is)) THEN
@@ -371,9 +371,9 @@ SUBROUTINE lmn_t_to_rho_trans(drho1, dtemp1, rho1, dphi1, phi1)
 
   CALL calc_temp_eos(ta1, rho1, phi1, tb1, xsize(1), xsize(2), xsize(3))
   drho1(:,:,:) = drho1(:,:,:) - dtemp1(:,:,:) / ta1(:,:,:)
-  
+
   drho1(:,:,:) = rho1(:,:,:) * drho1(:,:,:)
-  
+
 ENDSUBROUTINE lmn_t_to_rho_trans
 
 !********************************************************************
@@ -421,7 +421,7 @@ subroutine divergence (pp3,rho1,ux1,uy1,uz1,ep1,drho1,divu3,nlock)
 
   implicit none
 
-!  TYPE(DECOMP_INFO) :: ph1,ph3,ph4
+  !  TYPE(DECOMP_INFO) :: ph1,ph3,ph4
 
   !X PENCILS NX NY NZ  -->NXM NY NZ
   real(mytype),dimension(xsize(1),xsize(2),xsize(3)),intent(in) :: ux1,uy1,uz1,ep1
@@ -494,18 +494,18 @@ subroutine divergence (pp3,rho1,ux1,uy1,uz1,ep1,drho1,divu3,nlock)
   pp3(:,:,:) = pp3(:,:,:) + po3(:,:,:)
 
   if (nlock==2) then
-    pp3(:,:,:)=pp3(:,:,:)-pp3(ph1%zst(1),ph1%zst(2),nzmsize)
+     pp3(:,:,:)=pp3(:,:,:)-pp3(ph1%zst(1),ph1%zst(2),nzmsize)
   endif
 
   tmax=-1609._mytype
   tmoy=0._mytype
   do k=1,nzmsize
-    do j=ph1%zst(2),ph1%zen(2)
-      do i=ph1%zst(1),ph1%zen(1)
-        if (pp3(i,j,k).gt.tmax) tmax=pp3(i,j,k)
-        tmoy=tmoy+abs(pp3(i,j,k))
-      enddo
-    enddo
+     do j=ph1%zst(2),ph1%zen(2)
+        do i=ph1%zst(1),ph1%zen(1)
+           if (pp3(i,j,k).gt.tmax) tmax=pp3(i,j,k)
+           tmoy=tmoy+abs(pp3(i,j,k))
+        enddo
+     enddo
   enddo
   tmoy=tmoy/nvect3
 
@@ -513,11 +513,11 @@ subroutine divergence (pp3,rho1,ux1,uy1,uz1,ep1,drho1,divu3,nlock)
   call MPI_REDUCE(tmoy,tmoy1,1,real_type,MPI_SUM,0,MPI_COMM_WORLD,code)
 
   if ((nrank==0).and.(nlock.gt.0)) then
-    if (nlock==2) then
-      print *,'DIV U  max mean=',real(tmax1,4),real(tmoy1/real(nproc),4)
-    else
-      print *,'DIV U* max mean=',real(tmax1,4),real(tmoy1/real(nproc),4)
-    endif
+     if (nlock==2) then
+        print *,'DIV U  max mean=',real(tmax1,4),real(tmoy1/real(nproc),4)
+     else
+        print *,'DIV U* max mean=',real(tmax1,4),real(tmoy1/real(nproc),4)
+     endif
   endif
 
   return
@@ -544,7 +544,7 @@ subroutine gradp(px1,py1,pz1,pp3)
   USE variables
   USE MPI
   USE var, only: pp1,pgy1,pgz1,di1,pp2,ppi2,pgy2,pgz2,pgzi2,dip2,&
-          pgz3,ppi3,dip3,nxmsize,nymsize,nzmsize
+       pgz3,ppi3,dip3,nxmsize,nymsize,nzmsize
 #ifdef FORCES
   USE forces, only : ppi1
 #endif
@@ -590,67 +590,67 @@ subroutine gradp(px1,py1,pz1,pp3)
 
 #ifdef FORCES
   call interxpv(ppi1,pp1,di1,sx,cifip6,cisip6,ciwip6,cifx6,cisx6,ciwx6,&
-  nxmsize,xsize(1),xsize(2),xsize(3),1)
+       nxmsize,xsize(1),xsize(2),xsize(3),1)
 #endif
 
   !we are in X pencils:
   if (nclx1.eq.2) then
-    do k=1,xsize(3)
-      do j=1,xsize(2)
-        dpdyx1(j,k)=py1(1,j,k)/gdt(itr)
-        dpdzx1(j,k)=pz1(1,j,k)/gdt(itr)
-      enddo
-    enddo
+     do k=1,xsize(3)
+        do j=1,xsize(2)
+           dpdyx1(j,k)=py1(1,j,k)/gdt(itr)
+           dpdzx1(j,k)=pz1(1,j,k)/gdt(itr)
+        enddo
+     enddo
   endif
   if (nclxn.eq.2) then
-    do k=1,xsize(3)
-      do j=1,xsize(2)
-        dpdyxn(j,k)=py1(nx,j,k)/gdt(itr)
-        dpdzxn(j,k)=pz1(nx,j,k)/gdt(itr)
-      enddo
-    enddo
+     do k=1,xsize(3)
+        do j=1,xsize(2)
+           dpdyxn(j,k)=py1(nx,j,k)/gdt(itr)
+           dpdzxn(j,k)=pz1(nx,j,k)/gdt(itr)
+        enddo
+     enddo
   endif
 
   if (ncly1.eq.2) then
-    if (xsize(2)==1) then
-      do k=1,xsize(3)
-        do i=1,xsize(1)
-          dpdxy1(i,k)=px1(i,1,k)/gdt(itr)
-          dpdzy1(i,k)=pz1(i,1,k)/gdt(itr)
+     if (xsize(2)==1) then
+        do k=1,xsize(3)
+           do i=1,xsize(1)
+              dpdxy1(i,k)=px1(i,1,k)/gdt(itr)
+              dpdzy1(i,k)=pz1(i,1,k)/gdt(itr)
+           enddo
         enddo
-      enddo
-    endif
+     endif
   endif
   if (nclyn.eq.2) then
-    if (xsize(2)==ny) then
-      do k=1,xsize(3)
-        do i=1,xsize(1)
-          dpdxyn(i,k)=px1(i,ny,k)/gdt(itr)
-          dpdzyn(i,k)=pz1(i,ny,k)/gdt(itr)
+     if (xsize(2)==ny) then
+        do k=1,xsize(3)
+           do i=1,xsize(1)
+              dpdxyn(i,k)=px1(i,ny,k)/gdt(itr)
+              dpdzyn(i,k)=pz1(i,ny,k)/gdt(itr)
+           enddo
         enddo
-      enddo
-    endif
+     endif
   endif
 
   if (nclz1.eq.2) then
-    if (xstart(3)==1) then
-      do j=1,xsize(2)
-        do i=1,xsize(1)
-          dpdxz1(i,j)=py1(i,j,1)/gdt(itr)
-          dpdyz1(i,j)=pz1(i,j,1)/gdt(itr)
+     if (xstart(3)==1) then
+        do j=1,xsize(2)
+           do i=1,xsize(1)
+              dpdxz1(i,j)=py1(i,j,1)/gdt(itr)
+              dpdyz1(i,j)=pz1(i,j,1)/gdt(itr)
+           enddo
         enddo
-      enddo
-    endif
+     endif
   endif
   if (nclzn.eq.2) then
-    if (xend(3)==nz) then
-      do j=1,xsize(2)
-        do i=1,xsize(1)
-          dpdxzn(i,j)=py1(i,j,xsize(3))/gdt(itr)
-          dpdyzn(i,j)=pz1(i,j,xsize(3))/gdt(itr)
+     if (xend(3)==nz) then
+        do j=1,xsize(2)
+           do i=1,xsize(1)
+              dpdxzn(i,j)=py1(i,j,xsize(3))/gdt(itr)
+              dpdyzn(i,j)=pz1(i,j,xsize(3))/gdt(itr)
+           enddo
         enddo
-      enddo
-    endif
+     endif
   endif
 
   return
@@ -781,8 +781,8 @@ subroutine pre_correc(ux,uy,uz,ep)
               dpdzyn(i,k)=dpdzyn(i,k)*gdt(itr)
            enddo
         enddo
-      endif
-      if (dims(1)==1) then
+     endif
+     if (dims(1)==1) then
         do k=1,xsize(3)
            do i=1,xsize(1)
               ux(i,xsize(2),k)=byxn(i,k)+dpdxyn(i,k)
@@ -791,13 +791,13 @@ subroutine pre_correc(ux,uy,uz,ep)
            enddo
         enddo
      elseif (ny - (nym / dims(1)) == xstart(2)) then
-       do k=1,xsize(3)
-          do i=1,xsize(1)
-             ux(i,xsize(2),k)=byxn(i,k)+dpdxyn(i,k)
-             uy(i,xsize(2),k)=byyn(i,k)
-             uz(i,xsize(2),k)=byzn(i,k)+dpdzyn(i,k)
-          enddo
-       enddo
+        do k=1,xsize(3)
+           do i=1,xsize(1)
+              ux(i,xsize(2),k)=byxn(i,k)+dpdxyn(i,k)
+              uy(i,xsize(2),k)=byyn(i,k)
+              uz(i,xsize(2),k)=byzn(i,k)+dpdzyn(i,k)
+           enddo
+        enddo
      endif
   endif
 
@@ -823,62 +823,62 @@ subroutine pre_correc(ux,uy,uz,ep)
   endif
 
 
-    !********NCLZ==2*************************************
-    if (nclz1==2) then
-       if (xstart(3)==1) then
-          do j=1,xsize(2)
-             do i=1,xsize(1)
-                dpdxz1(i,j)=dpdxz1(i,j)*gdt(itr)
-                dpdyz1(i,j)=dpdyz1(i,j)*gdt(itr)
-             enddo
-          enddo
-          do j=1,xsize(2)
-             do i=1,xsize(1)
-                ux(i,j,1)=bzx1(i,j)+dpdxz1(i,j)
-                uy(i,j,1)=bzy1(i,j)+dpdyz1(i,j)
-                uz(i,j,1)=bzz1(i,j)
-             enddo
-          enddo
-       endif
-    endif
-
-    if (nclzn==2) then
-       if (xend(3)==nz) then
-          do j=1,xsize(2)
-             do i=1,xsize(1)
-                dpdxzn(i,j)=dpdxzn(i,j)*gdt(itr)
-                dpdyzn(i,j)=dpdyzn(i,j)*gdt(itr)
-             enddo
-          enddo
-          do j=1,xsize(2)
-             do i=1,xsize(1)
-                ux(i,j,xsize(3))=bzxn(i,j)+dpdxzn(i,j)
-                uy(i,j,xsize(3))=bzyn(i,j)+dpdyzn(i,j)
-                uz(i,j,xsize(3))=bzzn(i,j)
-             enddo
-          enddo
-       endif
-    endif
-    !********NCLZ==1************************************* !just to reforce free-slip condition
-    if (nclz1==1) then
-       if (xstart(3)==1) then
-          do j=1,xsize(2)
-             do i=1,xsize(1)
-                uz(i,j,1)=zero
-             enddo
-          enddo
-       endif
-    endif
-
-    if (nclzn==1) then
-       if (xend(3)==nz) then
-          do j=1,xsize(2)
-             do i=1,xsize(1)
-                uz(i,j,xsize(3))=zero
-             enddo
-          enddo
-       endif
-    endif
+  !********NCLZ==2*************************************
+  if (nclz1==2) then
+     if (xstart(3)==1) then
+        do j=1,xsize(2)
+           do i=1,xsize(1)
+              dpdxz1(i,j)=dpdxz1(i,j)*gdt(itr)
+              dpdyz1(i,j)=dpdyz1(i,j)*gdt(itr)
+           enddo
+        enddo
+        do j=1,xsize(2)
+           do i=1,xsize(1)
+              ux(i,j,1)=bzx1(i,j)+dpdxz1(i,j)
+              uy(i,j,1)=bzy1(i,j)+dpdyz1(i,j)
+              uz(i,j,1)=bzz1(i,j)
+           enddo
+        enddo
+     endif
+  endif
+
+  if (nclzn==2) then
+     if (xend(3)==nz) then
+        do j=1,xsize(2)
+           do i=1,xsize(1)
+              dpdxzn(i,j)=dpdxzn(i,j)*gdt(itr)
+              dpdyzn(i,j)=dpdyzn(i,j)*gdt(itr)
+           enddo
+        enddo
+        do j=1,xsize(2)
+           do i=1,xsize(1)
+              ux(i,j,xsize(3))=bzxn(i,j)+dpdxzn(i,j)
+              uy(i,j,xsize(3))=bzyn(i,j)+dpdyzn(i,j)
+              uz(i,j,xsize(3))=bzzn(i,j)
+           enddo
+        enddo
+     endif
+  endif
+  !********NCLZ==1************************************* !just to reforce free-slip condition
+  if (nclz1==1) then
+     if (xstart(3)==1) then
+        do j=1,xsize(2)
+           do i=1,xsize(1)
+              uz(i,j,1)=zero
+           enddo
+        enddo
+     endif
+  endif
+
+  if (nclzn==1) then
+     if (xend(3)==nz) then
+        do j=1,xsize(2)
+           do i=1,xsize(1)
+              uz(i,j,xsize(3))=zero
+           enddo
+        enddo
+     endif
+  endif
 
   return
 end subroutine pre_correc
@@ -953,7 +953,7 @@ SUBROUTINE calc_divu_constraint(divu3, rho1, phi1)
            ENDIF
         ENDDO
         td1(:,:,:) = one / td1(:,:,:)
-        
+
         DO is = 1, numscalar
            IF (massfrac(is)) THEN
               CALL derxx (tc1, phi1(:,:,:,is), di1, sx, sfxp, ssxp, swxp, xsize(1), xsize(2), xsize(3), 1)
@@ -1068,10 +1068,10 @@ SUBROUTINE extrapol_drhodt(drhodt1_next, rho1, drho1)
         drhodt1_next(:,:,:) = three * rho1(:,:,:,1) - four * rho1(:,:,:,2) + rho1(:,:,:,3)
         drhodt1_next(:,:,:) = half * drhodt1_next(:,:,:) / dt
      ENDIF
-  ! ELSEIF (itimescheme.EQ.3) THEN
-  !    !! AB3
-  ! ELSEIF (itimescheme.EQ.4) THEN
-  !    !! AB4
+     ! ELSEIF (itimescheme.EQ.3) THEN
+     !    !! AB3
+     ! ELSEIF (itimescheme.EQ.4) THEN
+     !    !! AB4
   ELSEIF (itimescheme.EQ.5) THEN
      !! RK3
      IF (itime.GT.1) THEN
@@ -1100,7 +1100,7 @@ ENDSUBROUTINE extrapol_drhodt
 !!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 SUBROUTINE test_varcoeff(converged, pp3, dv3, atol, rtol, poissiter)
-  
+
   USE MPI
   USE decomp_2d, ONLY: mytype, ph1, real_type, nrank
   USE var, ONLY : nzmsize
@@ -1121,12 +1121,12 @@ SUBROUTINE test_varcoeff(converged, pp3, dv3, atol, rtol, poissiter)
   !! LOCALS
   INTEGER :: ierr
   REAL(mytype) :: errloc, errglob, divup3norm
-     
+
   IF (poissiter.EQ.0) THEN
      errloc = SUM(dv3**2)
      CALL MPI_ALLREDUCE(errloc,divup3norm,1,real_type,MPI_SUM,MPI_COMM_WORLD,ierr)
      divup3norm = SQRT(divup3norm / nxm / nym / nzm)
-  
+
      IF (nrank.EQ.0) THEN
         PRINT *, "Solving variable-coefficient Poisson equation:"
         PRINT *, "+ RMS div(u*) - div(u): ", divup3norm
@@ -1160,7 +1160,7 @@ SUBROUTINE test_varcoeff(converged, pp3, dv3, atol, rtol, poissiter)
         pp3(:,:,:,2) = pp3(:,:,:,1)
      ENDIF
   ENDIF
-  
+
 ENDSUBROUTINE test_varcoeff
 
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -1173,7 +1173,7 @@ ENDSUBROUTINE test_varcoeff
 SUBROUTINE calc_varcoeff_rhs(pp3, rho1, px1, py1, pz1, dv3, drho1, ep1, divu3, poissiter)
 
   USE MPI
-  
+
   USE decomp_2d
 
   USE param, ONLY : nrhotime, ntime
@@ -1181,7 +1181,7 @@ SUBROUTINE calc_varcoeff_rhs(pp3, rho1, px1, py1, pz1, dv3, drho1, ep1, divu3, p
 
   USE var, ONLY : ta1, tb1, tc1
   USE var, ONLY : nzmsize
-  
+
   IMPLICIT NONE
 
   !! INPUTS
diff --git a/src/parameters.f90 b/src/parameters.f90
index 1090814a..250d9fc0 100644
--- a/src/parameters.f90
+++ b/src/parameters.f90
@@ -342,7 +342,7 @@ subroutine parameter_defaults()
   USE variables
   USE decomp_2d
   USE complex_geometry
-  
+
   IMPLICIT NONE
 
   ro = 99999999._mytype
@@ -366,7 +366,7 @@ subroutine parameter_defaults()
   !! IBM stuff
   nraf = 0
   nobjmax = 0
-   
+
   itrip = 0
   wrotation = zero
   irotation = 0
@@ -394,7 +394,7 @@ subroutine parameter_defaults()
   !! IO
   ivisu = 1
   ipost = 0
-  
+
   save_ux = 0
   save_uy = 0
   save_uz = 0
diff --git a/src/poisson.f90 b/src/poisson.f90
index f877a63a..e063ffb0 100644
--- a/src/poisson.f90
+++ b/src/poisson.f90
@@ -10,7 +10,7 @@ module decomp_2d_poisson
 
   private        ! Make everything private unless declared public
 
-!  real(mytype), private, parameter :: PI = 3.14159265358979323846_mytype
+  !  real(mytype), private, parameter :: PI = 3.14159265358979323846_mytype
 
 #ifdef DOUBLE_PREC
   real(mytype), parameter :: epsilon = 1.e-16_mytype
@@ -23,7 +23,7 @@ module decomp_2d_poisson
 
   ! decomposition object for physical space
   TYPE(DECOMP_INFO), save :: ph
-  
+
   ! decomposition object for spectral space
   TYPE(DECOMP_INFO), save :: sp
 
@@ -58,9 +58,9 @@ contains
 
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Initialise Poisson solver for given boundary conditions
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine decomp_2d_poisson_init()
 
     implicit none
@@ -83,9 +83,9 @@ contains
        bcz=1
     endif
 
-    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     ! Top level wrapper
-    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     if (bcx==0 .and. bcy==0 .and. bcz==0) then
        poisson => poisson_000
     else if (bcx==1 .and. bcy==0 .and. bcz==0) then
@@ -108,7 +108,7 @@ contains
     if (bcz==1) nz=nz-1
 
 #ifdef DEBG 
-  if (nrank .eq. 0) print *,'# decomp_2d_poisson_init start'
+    if (nrank .eq. 0) print *,'# decomp_2d_poisson_init start'
 #endif
 
     allocate(ax(nx),bx(nx))
@@ -117,14 +117,14 @@ contains
     call abxyz(ax,ay,az,bx,by,bz,nx,ny,nz,bcx,bcy,bcz)
 
 #ifdef DEBG 
-  if (nrank .eq. 0) print *,'# decomp_2d_poisson_init decomp_info_init'
+    if (nrank .eq. 0) print *,'# decomp_2d_poisson_init decomp_info_init'
 #endif
 
     call decomp_info_init(nx, ny, nz, ph)
     call decomp_info_init(nx, ny, nz/2+1, sp)
 
 #ifdef DEBG 
-  if (nrank .eq. 0) print *,'# decomp_2d_poisson_init decomp_info_init ok'
+    if (nrank .eq. 0) print *,'# decomp_2d_poisson_init decomp_info_init ok'
 #endif
 
     ! allocate work space
@@ -138,9 +138,9 @@ contains
        allocate(a3(sp%yst(1):sp%yen(1),ny,sp%yst(3):sp%yen(3),5))
     else if (bcx==1 .and. bcy==0 .and. bcz==0) then
        allocate(cw1(sp%xst(1):sp%xen(1),sp%xst(2):sp%xen(2), &
-                 sp%xst(3):sp%xen(3)))
+            sp%xst(3):sp%xen(3)))
        allocate(cw1b(sp%xst(1):sp%xen(1),sp%xst(2):sp%xen(2), &
-                 sp%xst(3):sp%xen(3)))
+            sp%xst(3):sp%xen(3)))
        allocate(rw1(ph%xst(1):ph%xen(1),ph%xst(2):ph%xen(2), &
             ph%xst(3):ph%xen(3)))
        allocate(rw1b(ph%xst(1):ph%xen(1),ph%xst(2):ph%xen(2), &
@@ -158,15 +158,15 @@ contains
        allocate(rw2b(ph%yst(1):ph%yen(1),ph%yst(2):ph%yen(2), &
             ph%yst(3):ph%yen(3)))
        allocate(cw1(sp%xst(1):sp%xen(1),sp%xst(2):sp%xen(2), &
-                 sp%xst(3):sp%xen(3)))
+            sp%xst(3):sp%xen(3)))
        allocate(cw2(sp%yst(1):sp%yen(1),sp%yst(2):sp%yen(2), &
-                 sp%yst(3):sp%yen(3)))
+            sp%yst(3):sp%yen(3)))
        allocate(cw22(sp%yst(1):sp%yen(1),sp%yst(2):sp%yen(2), &
             sp%yst(3):sp%yen(3)))
        allocate(cw2b(sp%yst(1):sp%yen(1),sp%yst(2):sp%yen(2), &
-                 sp%yst(3):sp%yen(3)))
+            sp%yst(3):sp%yen(3)))
        allocate(cw2c(sp%yst(1):sp%yen(1),sp%yst(2):sp%yen(2), &
-                 sp%yst(3):sp%yen(3)))
+            sp%yst(3):sp%yen(3)))
        allocate(kxyz(sp%yst(1):sp%yen(1),sp%yst(2):sp%yen(2), &
             sp%yst(3):sp%yen(3)))
        allocate(a(sp%yst(1):sp%yen(1),ny/2,sp%yst(3):sp%yen(3),5))
@@ -174,17 +174,17 @@ contains
        allocate(a3(sp%yst(1):sp%yen(1),ny,sp%yst(3):sp%yen(3),5))
     else if (bcx==1 .and. bcy==1) then
        allocate(cw1(sp%xst(1):sp%xen(1),sp%xst(2):sp%xen(2), &
-                 sp%xst(3):sp%xen(3)))
+            sp%xst(3):sp%xen(3)))
        allocate(cw1b(sp%xst(1):sp%xen(1),sp%xst(2):sp%xen(2), &
-                 sp%xst(3):sp%xen(3)))
+            sp%xst(3):sp%xen(3)))
        allocate(cw2(sp%yst(1):sp%yen(1),sp%yst(2):sp%yen(2), &
-                 sp%yst(3):sp%yen(3)))
+            sp%yst(3):sp%yen(3)))
        allocate(cw22(sp%yst(1):sp%yen(1),sp%yst(2):sp%yen(2), &
             sp%yst(3):sp%yen(3)))
        allocate(cw2b(sp%yst(1):sp%yen(1),sp%yst(2):sp%yen(2), &
-                 sp%yst(3):sp%yen(3)))
+            sp%yst(3):sp%yen(3)))
        allocate(cw2c(sp%yst(1):sp%yen(1),sp%yst(2):sp%yen(2), &
-                 sp%yst(3):sp%yen(3)))
+            sp%yst(3):sp%yen(3)))
        allocate(rw1(ph%xst(1):ph%xen(1),ph%xst(2):ph%xen(2), &
             ph%xst(3):ph%xen(3)))
        allocate(rw1b(ph%xst(1):ph%xen(1),ph%xst(2):ph%xen(2), &
@@ -201,25 +201,25 @@ contains
        allocate(a(sp%yst(1):sp%yen(1),ny/2,sp%yst(3):sp%yen(3),5))
        allocate(a2(sp%yst(1):sp%yen(1),ny/2,sp%yst(3):sp%yen(3),5))
        allocate(a3(sp%yst(1):sp%yen(1),nym,sp%yst(3):sp%yen(3),5))      
-    end if 
+    end if
 
 #ifdef DEBG 
-  if (nrank .eq. 0) print *,'# decomp_2d_poisson_init before waves'
+    if (nrank .eq. 0) print *,'# decomp_2d_poisson_init before waves'
 #endif
 
     call waves()
 
 #ifdef DEBG 
-  if (nrank .eq. 0) print *,'# decomp_2d_poisson_init end'
+    if (nrank .eq. 0) print *,'# decomp_2d_poisson_init end'
 #endif
 
     return
   end subroutine decomp_2d_poisson_init
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Release memory used by Poisson solver
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine decomp_2d_poisson_finalize
 
     implicit none
@@ -254,9 +254,9 @@ contains
     return
   end subroutine decomp_2d_poisson_finalize
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Solving 3D Poisson equation with periodic B.C in all 3 dimensions
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine poisson_000(rhs)
 
     use derivX
@@ -276,7 +276,7 @@ contains
 
 
     real(mytype) :: tmp1, tmp2,x ,y, z
-    
+
     integer :: nx,ny,nz, i,j,k
 
     nx = nx_global
@@ -327,19 +327,19 @@ contains
              ! CANNOT DO A DIVISION BY ZERO
              if ((tmp1.lt.epsilon).or.(tmp2.lt.epsilon)) then
                 cw1(i,j,k)=0._mytype
-!                print *,'DIV 0',i,j,k,epsilon
+                !                print *,'DIV 0',i,j,k,epsilon
              else
                 cw1(i,j,k)=cmplx( real(cw1(i,j,k), kind=mytype) / (-tmp1), &
                      aimag(cw1(i,j,k))/(-tmp2), kind=mytype)
              end if
 
-           !Print result in spectal space after Poisson
-      !     if (abs(out(i,j,k)) > 1.0e-4) then
-      !        write(*,*) 'AFTER',i,j,k,out(i,j,k),xyzk
-      !     end if
+             !Print result in spectal space after Poisson
+             !     if (abs(out(i,j,k)) > 1.0e-4) then
+             !        write(*,*) 'AFTER',i,j,k,out(i,j,k),xyzk
+             !     end if
 
              ! post-processing backward
-             
+
              ! POST PROCESSING IN Z
              tmp1 = real(cw1(i,j,k), kind=mytype)
              tmp2 = aimag(cw1(i,j,k))
@@ -363,11 +363,11 @@ contains
           end do
        end do
     end do
-             
+
     ! compute c2r transform
     call decomp_2d_fft_3d(cw1,rhs)
-    
- !   call decomp_2d_fft_finalize
+
+    !   call decomp_2d_fft_finalize
 
     return
   end subroutine poisson_000
@@ -382,7 +382,7 @@ contains
     complex(mytype) :: xyzk
     real(mytype) :: tmp1, tmp2, tmp3, tmp4
     real(mytype) :: xx1,xx2,xx3,xx4,xx5,xx6,xx7,xx8
-    
+
     integer :: nx,ny,nz, i,j,k, itmp
 
 100 format(1x,a8,3I4,2F12.6)
@@ -533,7 +533,7 @@ contains
           end do
        end do
     end do
-    
+
     ! post-processing backward
 
     ! POST PROCESSING IN X
@@ -621,16 +621,16 @@ contains
     end do
     call transpose_x_to_y(rw1b,rw2,ph)
     call transpose_y_to_z(rw2,rhs,ph)
-    
-  !  call decomp_2d_fft_finalize
+
+    !  call decomp_2d_fft_finalize
 
     return
   end subroutine poisson_100
 
-  
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Solving 3D Poisson equation: Neumann in Y; periodic in X & Z
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine poisson_010(rhs)
 
     implicit none
@@ -719,7 +719,7 @@ contains
           end do
        end do
     end do
-    
+
     ! POST PROCESSING IN Y
     ! NEED TO BE IN Y PENCILS!!!!!!!!!!!!!!!
     call transpose_x_to_y(cw1,cw2,sp)
@@ -760,100 +760,100 @@ contains
 
     if (istret==0) then 
 
-    ! Solve Poisson
-    ! doing wave number division in Y-pencil
-    do k = sp%yst(3), sp%yen(3)
-       do j = sp%yst(2), sp%yen(2)
-          do i = sp%yst(1), sp%yen(1)
-             !tmp1=real(zk2(k)+yk2(j)+xk2(i), kind=mytype)
-             !tmp2=aimag(zk2(k)+yk2(j)+xk2(i))
-             tmp1=real(kxyz(i,j,k), kind=mytype)
-             tmp2=aimag(kxyz(i,j,k))
-             !xyzk=cmplx(tmp1,tmp2, kind=mytype)
-             !CANNOT DO A DIVISION BY ZERO
-             if ((abs(tmp1).lt.epsilon).and.(abs(tmp2).lt.epsilon)) then    
-                cw2b(i,j,k)=cmplx(0._mytype,0._mytype, kind=mytype)
-             end if
-             if ((abs(tmp1).lt.epsilon).and.(abs(tmp2).ge.epsilon)) then
-                cw2b(i,j,k)=cmplx(0._mytype, &
-                     aimag(cw2b(i,j,k))/(-tmp2), kind=mytype)
-             end if
-             if ((abs(tmp1).ge.epsilon).and.(abs(tmp2).lt.epsilon)) then    
-                cw2b(i,j,k)=cmplx( real(cw2b(i,j,k), kind=mytype) &
-                     /(-tmp1), 0._mytype, kind=mytype)
-             end if
-             if ((abs(tmp1).ge.epsilon).and.(abs(tmp2).ge.epsilon)) then
-                cw2b(i,j,k)=cmplx( real(cw2b(i,j,k), kind=mytype) &
-                     /(-tmp1), &
-                     aimag(cw2b(i,j,k))/(-tmp2), kind=mytype)
-             end if
+       ! Solve Poisson
+       ! doing wave number division in Y-pencil
+       do k = sp%yst(3), sp%yen(3)
+          do j = sp%yst(2), sp%yen(2)
+             do i = sp%yst(1), sp%yen(1)
+                !tmp1=real(zk2(k)+yk2(j)+xk2(i), kind=mytype)
+                !tmp2=aimag(zk2(k)+yk2(j)+xk2(i))
+                tmp1=real(kxyz(i,j,k), kind=mytype)
+                tmp2=aimag(kxyz(i,j,k))
+                !xyzk=cmplx(tmp1,tmp2, kind=mytype)
+                !CANNOT DO A DIVISION BY ZERO
+                if ((abs(tmp1).lt.epsilon).and.(abs(tmp2).lt.epsilon)) then    
+                   cw2b(i,j,k)=cmplx(0._mytype,0._mytype, kind=mytype)
+                end if
+                if ((abs(tmp1).lt.epsilon).and.(abs(tmp2).ge.epsilon)) then
+                   cw2b(i,j,k)=cmplx(0._mytype, &
+                        aimag(cw2b(i,j,k))/(-tmp2), kind=mytype)
+                end if
+                if ((abs(tmp1).ge.epsilon).and.(abs(tmp2).lt.epsilon)) then    
+                   cw2b(i,j,k)=cmplx( real(cw2b(i,j,k), kind=mytype) &
+                        /(-tmp1), 0._mytype, kind=mytype)
+                end if
+                if ((abs(tmp1).ge.epsilon).and.(abs(tmp2).ge.epsilon)) then
+                   cw2b(i,j,k)=cmplx( real(cw2b(i,j,k), kind=mytype) &
+                        /(-tmp1), &
+                        aimag(cw2b(i,j,k))/(-tmp2), kind=mytype)
+                end if
+             end do
           end do
        end do
-    end do
 
     else
-       
+
        call matrice_refinement()
-!       do k = sp%yst(3), sp%yen(3)
-!          do j = 1,ny/2
-!             do i = sp%yst(1), sp%yen(1)
-!                print *,i,j,k,a(i,j,k,3)
-!!                if (nrank.le.1) print *,i,j,k,a(i,j,k,3)
-!!                if (nrank.gt.1) print *,i+4,j,k,a(i,j,k,3)
-!             enddo
-!          enddo
-!       enddo
-     
+       !       do k = sp%yst(3), sp%yen(3)
+       !          do j = 1,ny/2
+       !             do i = sp%yst(1), sp%yen(1)
+       !                print *,i,j,k,a(i,j,k,3)
+       !!                if (nrank.le.1) print *,i,j,k,a(i,j,k,3)
+       !!                if (nrank.gt.1) print *,i+4,j,k,a(i,j,k,3)
+       !             enddo
+       !          enddo
+       !       enddo
+
 
        if (istret.ne.3) then
           cw2(:,:,:)=0.;cw2c(:,:,:)=0.
           do k = sp%yst(3), sp%yen(3)
-          do j = 1,ny/2
-          do i = sp%yst(1), sp%yen(1)
-             cw2(i,j,k)=cw2b(i,2*j-1,k) 
-             cw2c(i,j,k)=cw2b(i,2*j,k)
-          enddo
-          enddo
+             do j = 1,ny/2
+                do i = sp%yst(1), sp%yen(1)
+                   cw2(i,j,k)=cw2b(i,2*j-1,k) 
+                   cw2c(i,j,k)=cw2b(i,2*j,k)
+                enddo
+             enddo
           enddo
 
- !   do k = sp%yst(3), sp%yen(3)
- !      do j = 1,ny/2
- !         do i = sp%yst(1), sp%yen(1)
- !            if (abs(cw2(i,j,k)) > 1.0e-4) then
- !               write(*,*) 'before IN',i,j,k,cw2(i,j,k)!*2.
- !!            end if
-  !        end do
-  !     end do
-  !  end do
-          
-         call inversion5_v1(a,cw2,sp)
-         call inversion5_v1(a2,cw2c,sp)
-
-!         cw2(1,1,1)=cw2(1,1,1)*0.5
-         
- 
-!   do k = sp%yst(3), sp%yen(3)
-!       do j = 1,ny/2
-!          do i = sp%yst(1), sp%yen(1)
-!             if (abs(cw2c(i,j,k)) > 1.0e-4) then
-!                write(*,*) 'after IN',i,j,k,cw2c(i,j,k)!*2.
-!             end if
-!          end do
-!       end do
-!    end do
+          !   do k = sp%yst(3), sp%yen(3)
+          !      do j = 1,ny/2
+          !         do i = sp%yst(1), sp%yen(1)
+          !            if (abs(cw2(i,j,k)) > 1.0e-4) then
+          !               write(*,*) 'before IN',i,j,k,cw2(i,j,k)!*2.
+          !!            end if
+          !        end do
+          !     end do
+          !  end do
+
+          call inversion5_v1(a,cw2,sp)
+          call inversion5_v1(a2,cw2c,sp)
+
+          !         cw2(1,1,1)=cw2(1,1,1)*0.5
+
+
+          !   do k = sp%yst(3), sp%yen(3)
+          !       do j = 1,ny/2
+          !          do i = sp%yst(1), sp%yen(1)
+          !             if (abs(cw2c(i,j,k)) > 1.0e-4) then
+          !                write(*,*) 'after IN',i,j,k,cw2c(i,j,k)!*2.
+          !             end if
+          !          end do
+          !       end do
+          !    end do
 
           cw2b(:,:,:)=0.
           do k=sp%yst(3), sp%yen(3)
-          do j=1,ny-1,2
-          do i=sp%yst(1), sp%yen(1)
-             cw2b(i,j,k)=cw2(i,(j+1)/2,k)
-          enddo
-          enddo
-          do j=2,ny,2
-          do i=sp%yst(1), sp%yen(1)
-             cw2b(i,j,k)=cw2c(i,j/2,k)
-          enddo
-          enddo
+             do j=1,ny-1,2
+                do i=sp%yst(1), sp%yen(1)
+                   cw2b(i,j,k)=cw2(i,(j+1)/2,k)
+                enddo
+             enddo
+             do j=2,ny,2
+                do i=sp%yst(1), sp%yen(1)
+                   cw2b(i,j,k)=cw2c(i,j/2,k)
+                enddo
+             enddo
           enddo
           !do k=sp%yst(3), sp%yen(3)
           !do i=sp%yst(1), sp%yen(1)
@@ -865,33 +865,33 @@ contains
           !enddo
        else
           do k = sp%yst(3), sp%yen(3)
-          do j = 1,ny
-          do i = sp%yst(1), sp%yen(1)
-             cw2(i,j,k)=cw2b(i,j,k) 
-          enddo
-          enddo
+             do j = 1,ny
+                do i = sp%yst(1), sp%yen(1)
+                   cw2(i,j,k)=cw2b(i,j,k) 
+                enddo
+             enddo
           enddo
           call inversion5_v2(a3,cw2,sp)
           do k = sp%yst(3), sp%yen(3)
-          do j = 1,ny
-          do i = sp%yst(1), sp%yen(1)
-             cw2b(i,j,k)=cw2(i,j,k) 
-          enddo
-          enddo
+             do j = 1,ny
+                do i = sp%yst(1), sp%yen(1)
+                   cw2b(i,j,k)=cw2(i,j,k) 
+                enddo
+             enddo
           enddo
        endif
 
     endif
 
-!    print *,nrank, sp%yst(3),sp%yen(3),sp%yst(1),sp%yen(1)
+    !    print *,nrank, sp%yst(3),sp%yen(3),sp%yst(1),sp%yen(1)
 
-!we are in Y pencil
+    !we are in Y pencil
     do k = sp%yst(3), sp%yen(3)  
-    do i = sp%yst(1), sp%yen(1)
-       if ((i==nx/2+1).and.(k==nz/2+1)) then
-          cw2b(i,:,k)=0.
-       endif
-    enddo
+       do i = sp%yst(1), sp%yen(1)
+          if ((i==nx/2+1).and.(k==nz/2+1)) then
+             cw2b(i,:,k)=0.
+          endif
+       enddo
     enddo
 #ifdef DEBUG
     do k = sp%yst(3), sp%yen(3)
@@ -930,7 +930,7 @@ contains
           end do
        end do
     end do
-           
+
     ! Back to X-pencil
     call transpose_y_to_x(cw2,cw1,sp)
 #ifdef DEBUG
@@ -944,7 +944,7 @@ contains
        end do
     end do
 #endif
-    
+
     ! POST PROCESSING IN X
     do k = sp%xst(3),sp%xen(3)
        do j = sp%xst(2),sp%xen(2)
@@ -995,15 +995,15 @@ contains
     end do
     call transpose_y_to_z(rw2b,rhs,ph)
 
-  !  call decomp_2d_fft_finalize
+    !  call decomp_2d_fft_finalize
 
     return
   end subroutine poisson_010
 
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   ! Solving 3D Poisson equation: Neumann in X, Y; Neumann/periodic in Z
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine poisson_11x(rhs)
 
     implicit none
@@ -1026,7 +1026,7 @@ contains
     else if (bcz==0) then
        nz = nz_global
     end if
-   
+
     if (bcz==1) then  
        do j=1,ph%zsz(2)
           do i=1,ph%zsz(1)
@@ -1042,7 +1042,7 @@ contains
     else if (bcz==0) then     
        call transpose_z_to_y(rhs,rw2,ph)
     end if
-    
+
 
     do k=ph%yst(3),ph%yen(3)
        do i=ph%yst(1),ph%yen(1)
@@ -1139,7 +1139,7 @@ contains
           end do
        end do
     end do
-    
+
     ! back to X-pencil
     call transpose_y_to_x(cw2b,cw1,sp)
 #ifdef DEBUG
@@ -1153,7 +1153,7 @@ contains
        end do
     end do
 #endif
-    
+
     ! POST PROCESSING IN X
     do k = sp%xst(3),sp%xen(3)
        do j = sp%xst(2),sp%xen(2)
@@ -1176,7 +1176,7 @@ contains
           end do
        end do
     end do
-    
+
 #ifdef DEBUG
     do k = sp%xst(3),sp%xen(3)
        do j = sp%xst(2),sp%xen(2)
@@ -1191,89 +1191,89 @@ contains
 
     if (istret==0) then
 
-    ! Solve Poisson
-    do k = sp%xst(3),sp%xen(3)
-       do j = sp%xst(2),sp%xen(2)
-          do i = sp%xst(1),sp%xen(1)
-             !tmp1=real(zk2(k)+yk2(j)+xk2(i), kind=mytype)
-             !tmp2=aimag(zk2(k)+yk2(j)+xk2(i))
-             tmp1=real(kxyz(i,j,k), kind=mytype)
-             tmp2=aimag(kxyz(i,j,k))
-             !xyzk=cmplx(tmp1,tmp2, kind=mytype)
-             !CANNOT DO A DIVISION BY ZERO
-             if ((abs(tmp1).lt.epsilon).and.(abs(tmp2).lt.epsilon)) then    
-                cw1b(i,j,k)=cmplx(0._mytype,0._mytype, kind=mytype)
-             end if
-             if ((abs(tmp1).lt.epsilon).and.(abs(tmp2).ge.epsilon)) then
-                cw1b(i,j,k)=cmplx(0._mytype, &
-                     aimag(cw1b(i,j,k))/(-tmp2), kind=mytype)
-             end if
-             if ((abs(tmp1).ge.epsilon).and.(abs(tmp2).lt.epsilon)) then    
-                cw1b(i,j,k)=cmplx( real(cw1b(i,j,k), kind=mytype) &
-                     /(-tmp1), 0._mytype, kind=mytype)
-             end if
-             if ((abs(tmp1).ge.epsilon).and.(abs(tmp2).ge.epsilon)) then
-                cw1b(i,j,k)=cmplx( real(cw1b(i,j,k), kind=mytype) &
-                     /(-tmp1), &
-                     aimag(cw1b(i,j,k))/(-tmp2), kind=mytype)
-             end if
+       ! Solve Poisson
+       do k = sp%xst(3),sp%xen(3)
+          do j = sp%xst(2),sp%xen(2)
+             do i = sp%xst(1),sp%xen(1)
+                !tmp1=real(zk2(k)+yk2(j)+xk2(i), kind=mytype)
+                !tmp2=aimag(zk2(k)+yk2(j)+xk2(i))
+                tmp1=real(kxyz(i,j,k), kind=mytype)
+                tmp2=aimag(kxyz(i,j,k))
+                !xyzk=cmplx(tmp1,tmp2, kind=mytype)
+                !CANNOT DO A DIVISION BY ZERO
+                if ((abs(tmp1).lt.epsilon).and.(abs(tmp2).lt.epsilon)) then    
+                   cw1b(i,j,k)=cmplx(0._mytype,0._mytype, kind=mytype)
+                end if
+                if ((abs(tmp1).lt.epsilon).and.(abs(tmp2).ge.epsilon)) then
+                   cw1b(i,j,k)=cmplx(0._mytype, &
+                        aimag(cw1b(i,j,k))/(-tmp2), kind=mytype)
+                end if
+                if ((abs(tmp1).ge.epsilon).and.(abs(tmp2).lt.epsilon)) then    
+                   cw1b(i,j,k)=cmplx( real(cw1b(i,j,k), kind=mytype) &
+                        /(-tmp1), 0._mytype, kind=mytype)
+                end if
+                if ((abs(tmp1).ge.epsilon).and.(abs(tmp2).ge.epsilon)) then
+                   cw1b(i,j,k)=cmplx( real(cw1b(i,j,k), kind=mytype) &
+                        /(-tmp1), &
+                        aimag(cw1b(i,j,k))/(-tmp2), kind=mytype)
+                end if
+             end do
           end do
        end do
-    end do
 
     else
        call matrice_refinement()
-! the stretching is only working in Y pencils
+       ! the stretching is only working in Y pencils
        call transpose_x_to_y(cw1b,cw2b,sp)
        !we are now in Y pencil
-       
+
        if (istret.ne.3) then
           cw2(:,:,:)=0.;cw2c(:,:,:)=0.
           do k = sp%yst(3), sp%yen(3)
-          do j = 1,ny/2
-          do i = sp%yst(1), sp%yen(1)
-             cw2(i,j,k)=cw2b(i,2*j-1,k) 
-             cw2c(i,j,k)=cw2b(i,2*j,k)
-          enddo
-          enddo
+             do j = 1,ny/2
+                do i = sp%yst(1), sp%yen(1)
+                   cw2(i,j,k)=cw2b(i,2*j-1,k) 
+                   cw2c(i,j,k)=cw2b(i,2*j,k)
+                enddo
+             enddo
           enddo
           call inversion5_v1(a,cw2,sp)
           call inversion5_v1(a2,cw2c,sp)
 
           cw2b(:,:,:)=0.
           do k=sp%yst(3), sp%yen(3)
-          do j=1,ny-1,2
-          do i=sp%yst(1), sp%yen(1)
-             cw2b(i,j,k)=cw2(i,(j+1)/2,k)
-          enddo
-          enddo
-          do j=2,ny,2
-          do i=sp%yst(1), sp%yen(1)
-             cw2b(i,j,k)=cw2c(i,j/2,k)
-          enddo
-          enddo
+             do j=1,ny-1,2
+                do i=sp%yst(1), sp%yen(1)
+                   cw2b(i,j,k)=cw2(i,(j+1)/2,k)
+                enddo
+             enddo
+             do j=2,ny,2
+                do i=sp%yst(1), sp%yen(1)
+                   cw2b(i,j,k)=cw2c(i,j/2,k)
+                enddo
+             enddo
           enddo
        else
           cw2(:,:,:)=0.
           do k = sp%yst(3), sp%yen(3)
-          do j = sp%yst(2), sp%yen(2)
-          do i = sp%yst(1), sp%yen(1)
-             cw2(i,j,k)=cw2b(i,j,k) 
-          enddo
-          enddo
+             do j = sp%yst(2), sp%yen(2)
+                do i = sp%yst(1), sp%yen(1)
+                   cw2(i,j,k)=cw2b(i,j,k) 
+                enddo
+             enddo
           enddo
 
           call inversion5_v2(a3,cw2,sp)
 
           do k = sp%yst(3), sp%yen(3)
-          do j = sp%yst(2), sp%yen(2)
-          do i = sp%yst(1), sp%yen(1)
-             cw2b(i,j,k)=cw2(i,j,k) 
-          enddo
-          enddo
+             do j = sp%yst(2), sp%yen(2)
+                do i = sp%yst(1), sp%yen(1)
+                   cw2b(i,j,k)=cw2(i,j,k) 
+                enddo
+             enddo
           enddo
        endif
-!we have to go back in X pencils
+       !we have to go back in X pencils
        call transpose_y_to_x(cw2b,cw1b,sp)
     endif
 
@@ -1288,9 +1288,9 @@ contains
        end do
     end do
 #endif
-!stop
+    !stop
     ! post-processing backward
-    
+
     do k = sp%xst(3),sp%xen(3)
        do j = sp%xst(2),sp%xen(2)
           cw1(1,j,k)=cw1b(1,j,k)
@@ -1361,7 +1361,7 @@ contains
 #endif
     ! back to X-pencil
     call transpose_y_to_x(cw2b,cw1,sp)
-    
+
     ! POST PROCESSING IN Z
     do k = sp%xst(3),sp%xen(3)
        do j = sp%xst(2),sp%xen(2)
@@ -1396,7 +1396,7 @@ contains
     else if (bcz==0) then 
        call transpose_z_to_y(rhs,rw2,ph)   
     end if
-    
+
     do k=ph%yst(3),ph%yen(3)
        do i=ph%yst(1),ph%yen(1)
           do j=1,ny/2
@@ -1421,17 +1421,17 @@ contains
     call transpose_x_to_y(rw1b,rw2,ph)
     call transpose_y_to_z(rw2,rhs,ph)
 
-  !  call decomp_2d_fft_finalize
+    !  call decomp_2d_fft_finalize
+
 
-   
 
     return
   end subroutine poisson_11x
 
 
-  
+
   subroutine abxyz(ax,ay,az,bx,by,bz,nx,ny,nz,bcx,bcy,bcz)
-	
+
     use param
 
     implicit none
@@ -1489,695 +1489,695 @@ contains
     return
   end subroutine abxyz
 
-! ***********************************************************
-!
-subroutine waves ()
-!
-!***********************************************************
-
-USE derivX 
-USE derivY 
-USE derivZ 
-USE param
-USE decomp_2d
-USE variables
-use decomp_2d_fft
-
-implicit none
-
-integer :: i,j,k
-real(mytype) :: w,wp,w1,w1p 
-complex(mytype) :: xyzk
-complex(mytype) :: ytt,xtt,ztt,yt1,xt1,yt2,xt2
-complex(mytype) :: xtt1,ytt1,ztt1,zt1,zt2,tmp1,tmp2,tmp3
-complex(mytype) :: tmp4,tmp5,tmp6
-
-xkx(:)=0. ; xk2(:)=0. ; yky(:)=0. ; yk2(:)=0.
-zkz(:)=0. ; zk2(:)=0.
-
-!WAVE NUMBER IN X
-if (bcx==0) then
-   do i=1,nx/2+1
-      w=2.*pi*(i-1)/nx
-      wp=acix6*2.*dx*sin(w/2.)+(bcix6*2.*dx)*sin(3./2.*w)
-      wp=wp/(1.+2.*alcaix6*cos(w))
-      xkx(i)=cmplx(nx*wp/xlx,nx*wp/xlx, kind=mytype)
-      exs(i)=cmplx(nx*w/xlx,nx*w/xlx, kind=mytype)
-      xk2(i)=cmplx((nx*wp/xlx)**2,(nx*wp/xlx)**2, kind=mytype)
-   enddo
-   do i=nx/2+2,nx
-      xkx(i)=xkx(nx-i+2)
-      exs(i)=exs(nx-i+2)
-      xk2(i)=xk2(nx-i+2)
-   enddo
-else
-   do i=1,nx
-      w=2.*pi*0.5*(i-1)/nxm
-      wp=acix6*2.*dx*sin(w/2.)+(bcix6*2.*dx)*sin(3./2.*w)
-      wp=wp/(1.+2.*alcaix6*cos(w))
-      xkx(i)=cmplx(nxm*wp/xlx,nxm*wp/xlx, kind=mytype)
-      exs(i)=cmplx(nxm*w/xlx,nxm*w/xlx, kind=mytype)
-      xk2(i)=cmplx((nxm*wp/xlx)**2,(nxm*wp/xlx)**2, kind=mytype)
-   enddo
-   xkx(1)=0.
-   exs(1)=0.
-   xk2(1)=0.
-endif
-
-!WAVE NUMBER IN Y
-if (bcy==0) then
-   do j=1,ny/2+1
-      w=2.*pi*(j-1)/ny
-      wp=aciy6*2.*dy*sin(w/2.)+(bciy6*2.*dy)*sin(3./2.*w)
-      wp=wp/(1.+2.*alcaiy6*cos(w))
-      if (istret==0) yky(j)=cmplx(ny*wp/yly,ny*wp/yly, kind=mytype)
-      if (istret.ne.0) yky(j)=cmplx(ny*wp,ny*wp, kind=mytype)
-      eys(j)=cmplx(ny*w/yly,ny*w/yly, kind=mytype)
-      yk2(j)=cmplx((ny*wp/yly)**2,(ny*wp/yly)**2, kind=mytype)
-   enddo
-   do j=ny/2+2,ny
-      yky(j)=yky(ny-j+2)
-      eys(j)=eys(ny-j+2)
-      yk2(j)=yk2(ny-j+2)
-   enddo
-else
-   do j=1,ny
-      w=2.*pi*0.5*(j-1)/nym
-      wp=aciy6*2.*dy*sin(w/2.)+(bciy6*2.*dy)*sin(3./2.*w)
-      wp=wp/(1.+2.*alcaiy6*cos(w))
-      if (istret==0) yky(j)=cmplx(nym*wp/yly,nym*wp/yly, kind=mytype)
-      if (istret.ne.0) yky(j)=cmplx(nym*wp,nym*wp, kind=mytype)
-      eys(j)=cmplx(nym*w/yly,nym*w/yly, kind=mytype)
-      yk2(j)=cmplx((nym*wp/yly)**2,(nym*wp/yly)**2, kind=mytype)
-   enddo
-   yky(1)=0.
-   eys(1)=0.
-   yk2(1)=0.
-endif
-
-!WAVE NUMBER IN Z
-if (bcz==0) then
-   do k=1,nz/2+1
-      w=2.*pi*(k-1)/nz
-      wp=aciz6*2.*dz*sin(w/2.)+(bciz6*2.*dz)*sin(3./2.*w)
-      wp=wp/(1.+2.*alcaiz6*cos(w))
-      zkz(k)=cmplx(nz*wp/zlz,nz*wp/zlz, kind=mytype)
-      ezs(k)=cmplx(nz*w/zlz,nz*w/zlz, kind=mytype)
-      zk2(k)=cmplx((nz*wp/zlz)**2,(nz*wp/zlz)**2, kind=mytype)
-   enddo
-else
-   do k=1,nz/2+1
-      w=2.*pi*0.5*(k-1)/nzm
-      w1=2.*pi*0.5*(nzm-k+1)/nzm
-      wp=aciz6*2.*dz*sin(w/2.)+(bciz6*2.*dz)*sin(3./2.*w)
-      wp=wp/(1.+2.*alcaiz6*cos(w))
-      w1p=aciz6*2.*dz*sin(w1/2.)+(bciz6*2.*dz)*sin(3./2.*w1)
-      w1p=w1p/(1.+2.*alcaiz6*cos(w1))     
-      zkz(k)=cmplx(nzm*wp/zlz,-nzm*w1p/zlz, kind=mytype)
-      ezs(k)=cmplx(nzm*w/zlz,nzm*w1/zlz, kind=mytype)
-      zk2(k)=cmplx((nzm*wp/zlz)**2,(nzm*w1p/zlz)**2, kind=mytype)
-   enddo
-endif
-!
-!if (nrank==0) then
-!   do i=1,nx
-!      print *,i,ezs(i)
-!   enddo
-!endif
-!stop
-
-if ((bcx==0).and.(bcz==0).and.bcy.ne.0) then
-do k = sp%yst(3), sp%yen(3)
-do j = sp%yst(2), sp%yen(2)
-do i = sp%yst(1), sp%yen(1)
-   xtt=cmplx((bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
-        cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
-        dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)),&
-        (bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
-        cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
-        dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)), kind=mytype)
-   ytt=cmplx((biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
-        ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
-        diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)),&
-        (biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
-        ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
-        diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)), kind=mytype)
-   ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
-        ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
-        diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)),&
-        (biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
-        ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
-        diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)), kind=mytype)
-   xtt1=cmplx((aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)),&
-        (aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)), kind=mytype)
-   ytt1=cmplx((aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)),&
-        (aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)), kind=mytype)
-   ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
-        (aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)), kind=mytype)
-   xt1=cmplx((1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)),&
-        (1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)), kind=mytype)
-   yt1=cmplx((1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)),&
-        (1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)), kind=mytype)
-   zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
-        (1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)), kind=mytype)
-   xt2=xk2(i)*((((ytt1+ytt)/yt1)*((ztt1+ztt)/zt1))**2)
-   yt2=yk2(j)*((((xtt1+xtt)/xt1)*((ztt1+ztt)/zt1))**2)
-   zt2=zk2(k)*((((xtt1+xtt)/xt1)*((ytt1+ytt)/yt1))**2)
-   xyzk=xt2+yt2+zt2
-   kxyz(i,j,k)=xyzk
-!   print *,i,j,k, kxyz(i,j,k)
-enddo
-enddo
-enddo
-else
-if (bcz==0) then
-      do k = sp%xst(3),sp%xen(3)
-      do j = sp%xst(2),sp%xen(2)
-      do i = sp%xst(1),sp%xen(1)
-         xtt=cmplx((bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
-              cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
-              dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)),&
-              (bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
-              cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
-              dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)), kind=mytype)
-         ytt=cmplx((biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
-              ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
-              diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)),&
-              (biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
-              ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
-              diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)), kind=mytype)
-         ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
-              ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
-              diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)),&
-              (biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
-              ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
-              diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)), kind=mytype)
-         xtt1=cmplx((aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)),&
-              (aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)), kind=mytype)
-         ytt1=cmplx((aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)),&
-              (aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)), kind=mytype)
-         ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
-              (aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)), kind=mytype)
-         xt1=cmplx((1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)),&
-              (1.+2.*ailcaix6*cos(real(exs(i))*dx)), kind=mytype)
-         yt1=cmplx((1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)),&
-              (1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)), kind=mytype)
-         zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
-              (1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)), kind=mytype)
-         xt2=xk2(i)*((((ytt1+ytt)/yt1)*((ztt1+ztt)/zt1))**2)
-         yt2=yk2(j)*((((xtt1+xtt)/xt1)*((ztt1+ztt)/zt1))**2)
-         zt2=zk2(k)*((((xtt1+xtt)/xt1)*((ytt1+ytt)/yt1))**2)
-         xyzk=xt2+yt2+zt2
-         kxyz(i,j,k)=xyzk
-!   print *,i,j,k, kxyz(i,j,k)
-      enddo
-      enddo
-      enddo
-   else
-      do k = sp%xst(3),sp%xen(3)
-      do j = sp%xst(2),sp%xen(2)
-      do i = sp%xst(1),sp%xen(1)  
-         xtt=cmplx((bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
-              cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
-              dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)),&
-              (bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
-              cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
-              dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)), kind=mytype)
-         ytt=cmplx((biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
-              ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
-              diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)),&
-              (biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
-              ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
-              diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)), kind=mytype)
-         !
-         ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
-              ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
-              diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)),&
-              (biciz6*2.*cos(aimag(ezs(k))*3.*dz/2.)+&
-              ciciz6*2.*cos(aimag(ezs(k))*5.*dz/2.)+&
-              diciz6*2.*cos(aimag(ezs(k))*7.*dz/2.)), kind=mytype)
-         !
-         xtt1=cmplx((aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)),&
-              (aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)), kind=mytype)
-         ytt1=cmplx((aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)),&
-              (aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)), kind=mytype)
-         !
-         ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
-              (aiciz6*2.*cos(aimag(ezs(k))*dz/2.)), kind=mytype)
-         !
-         xt1=cmplx((1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)),&
-              (1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)), kind=mytype)
-         yt1=cmplx((1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)),&
-              (1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)), kind=mytype)
-         zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
-              (1.+2.*ailcaiz6*cos(aimag(ezs(k))*dz)), kind=mytype)
-         
-         tmp1=cmplx(real(ztt1+ztt, kind=mytype)/real(zt1, kind=mytype),&
-              aimag(ztt1+ztt)/aimag(zt1), kind=mytype)
-         tmp2=cmplx(real(ytt1+ytt, kind=mytype)/real(yt1, kind=mytype),&
-              real(ytt1+ytt, kind=mytype)/real(yt1, kind=mytype), kind=mytype)
-         tmp3=cmplx(real(xtt1+xtt, kind=mytype)/real(xt1, kind=mytype),&
-              real(xtt1+xtt, kind=mytype)/real(xt1, kind=mytype), kind=mytype)
-         
-         tmp4=cmplx((real(tmp1, kind=mytype)*real(tmp2, kind=mytype))**2,(aimag(tmp1)*aimag(tmp2))**2, kind=mytype)
-         tmp5=cmplx((real(tmp1, kind=mytype)*real(tmp3, kind=mytype))**2,(aimag(tmp1)*aimag(tmp3))**2, kind=mytype)
-         tmp6=cmplx((real(tmp3, kind=mytype)*real(tmp2, kind=mytype))**2,(aimag(tmp3)*aimag(tmp2))**2, kind=mytype)
-         
-         tmp1=cmplx(real(tmp4, kind=mytype)*real(xk2(i), kind=mytype),aimag(tmp4)*aimag(xk2(i)), kind=mytype)
-         tmp2=cmplx(real(tmp5, kind=mytype)*real(yk2(j), kind=mytype),aimag(tmp5)*aimag(yk2(j)), kind=mytype)
-         tmp3=cmplx(real(tmp6, kind=mytype)*real(zk2(k), kind=mytype),aimag(tmp6)*aimag(zk2(k)), kind=mytype)
-
-         xyzk=tmp1+tmp2+tmp3
-         kxyz(i,j,k)=xyzk
-!         print *,i,j,k,zt1,yt1
-      enddo
-      enddo
-      enddo
-   endif
-endif
-
-
-!          do k=1,1!nz
-!          do j=1,ny
-!          do i=1,1!!nx
-!             print *,j,a(i,j,k,3),kxyz(i,j,k)
-!          enddo
-!          enddo
-!          enddo
-
-end subroutine waves
-
-!**************************************************************************
-!
-subroutine matrice_refinement()
-!
-!**************************************************************************
-
-USE decomp_2d
-USE variables
-USE param
-USE var
-USE MPI
-USE derivX 
-USE derivY 
-USE derivZ 
-
-implicit none
-
-integer :: i,j,k
-
-complex(mytype),dimension(sp%yst(1):sp%yen(1)) :: transx
-complex(mytype),dimension(sp%yst(2):sp%yen(2)) :: transy
-complex(mytype),dimension(sp%yst(3):sp%yen(3)) :: transz
-real(mytype) :: xa0,xa1 
-complex(mytype) :: ytt,xtt,ztt,yt1,xt1,yt2,xt2
-complex(mytype) :: xtt1,ytt1,ztt1,zt1,zt2,tmp1,tmp2,tmp3
-
-do i = sp%yst(1),sp%yen(1)
-   xtt=cmplx((bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
-        cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
-        dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)),&
-        (bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
-        cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
-        dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)), kind=mytype)
-   xtt1=cmplx((aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)),&
-        (aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)), kind=mytype)
-   xt1=cmplx((1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)),&
-        (1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)), kind=mytype)
-   transx(i)=cmplx(real(xtt1+xtt, kind=mytype)/real(xt1, kind=mytype),&
-        real(xtt1+xtt, kind=mytype)/real(xt1, kind=mytype), kind=mytype)!(xtt+xtt)/xt1
-enddo
-do j = sp%yst(2),sp%yen(2)
-   ytt=cmplx((biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
-        ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
-        diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)),&
-        (biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
-        ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
-        diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)), kind=mytype)
-   ytt1=cmplx((aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)),&
-        (aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)), kind=mytype)
-   yt1=cmplx((1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)),&
-        (1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)), kind=mytype)
-   transy(j)=cmplx(real(ytt1+ytt, kind=mytype)/real(yt1, kind=mytype),&
-        real(ytt1+ytt, kind=mytype)/real(yt1, kind=mytype), kind=mytype)!(ytt+ytt)/yt1
-enddo
-if (bcz==0) then
-   do k = sp%yst(3),sp%yen(3)
-      ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
-           ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
-           diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)),&
-           (biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
-           ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
-           diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)), kind=mytype)
-      ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
-           (aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)), kind=mytype)
-      zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
-           (1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)), kind=mytype)
-      transz(k)=cmplx(real(ztt1+ztt, kind=mytype)/real(zt1, kind=mytype),&
-           aimag(ztt1+ztt)/aimag(zt1), kind=mytype)!(ztt+ztt)/zt1
-   enddo
-else
-   do k = sp%yst(3),sp%yen(3)
-      ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
-           ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)),&
-           (biciz6*2.*cos(aimag(ezs(k))*3.*dz/2.)+&
-           ciciz6*2.*cos(aimag(ezs(k))*5.*dz/2.)), kind=mytype)
-      ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
-           (aiciz6*2.*cos(aimag(ezs(k))*dz/2.)), kind=mytype)
-      zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
-           (1.+2.*ailcaiz6*cos(aimag(ezs(k))*dz)), kind=mytype)
-      transz(k)=cmplx(real(ztt1+ztt, kind=mytype)/real(zt1, kind=mytype),&
-           aimag(ztt1+ztt)/aimag(zt1), kind=mytype)!(ztt+ztt)/zt1
-   enddo
-endif
-
-if ((istret==1).or.(istret==2)) then
-
-  
-   xa0=alpha/pi+1./2./beta/pi
-   if (istret==1) xa1=1./4./beta/pi
-   if (istret==2) xa1=-1./4./beta/pi
-!
-!construction of the pentadiagonal matrice
-!
-   do k=sp%yst(3),sp%yen(3)
-   do j=1,ny/2
-   do i=sp%yst(1),sp%yen(1)
-      cw22(i,j,k)=cmplx(real(yky(2*j-1), kind=mytype)*real(transx(i), kind=mytype)*real(transz(k), kind=mytype),&
-           aimag(yky(2*j-1))*aimag(transx(i))*aimag(transz(k)), kind=mytype)
-      cw2(i,j,k)=cmplx(real(yky(2*j), kind=mytype)*real(transx(i), kind=mytype)*real(transz(k), kind=mytype),&
-           aimag(yky(2*j))*aimag(transx(i))*aimag(transz(k)), kind=mytype)
-   enddo
-   enddo
-   enddo
-
-
-   
-
-!main diagonal 
-   do k=sp%yst(3),sp%yen(3)
-   do j=2,ny/2-1
-   do i=sp%yst(1),sp%yen(1)
-      a(i,j,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(2*j-1), kind=mytype)*real(transy(2*j-1), kind=mytype)&
-           *real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
-           -(real(zk2(k), kind=mytype)*real(transy(2*j-1), kind=mytype)*real(transy(2*j-1), kind=mytype)*&
-           real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
-           -real(cw22(i,j,k), kind=mytype)*real(cw22(i,j,k), kind=mytype)*xa0*xa0-&
-           xa1*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j-1,k), kind=mytype)+real(cw22(i,j,k), kind=mytype)*&
-           real(cw22(i,j+1,k), kind=mytype)),&
-           -(aimag(xk2(i))*aimag(transy(2*j-1))*aimag(transy(2*j-1))*aimag(transz(k))*aimag(transz(k)))&
-           -(aimag(zk2(k))*aimag(transy(2*j-1))*aimag(transy(2*j-1))*aimag(transx(i))*aimag(transx(i)))&
-           -aimag(cw22(i,j,k))*aimag(cw22(i,j,k))*xa0*xa0-&
-           xa1*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j-1,k))+aimag(cw22(i,j,k))*aimag(cw22(i,j+1,k))), kind=mytype)
-      a2(i,j,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(2*j), kind=mytype)*real(transy(2*j), kind=mytype)*&
-           real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
-           -(real(zk2(k), kind=mytype)*real(transy(2*j), kind=mytype)*real(transy(2*j), kind=mytype)*&
-           real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
-           -real(cw2(i,j,k), kind=mytype)*real(cw2(i,j,k), kind=mytype)*xa0*xa0-&
-           xa1*xa1*(real(cw2(i,j,k), kind=mytype)*real(cw2(i,j-1,k), kind=mytype)+real(cw2(i,j,k), kind=mytype)*&
-           real(cw2(i,j+1,k), kind=mytype)),&
-           -(aimag(xk2(i))*aimag(transy(2*j))*aimag(transy(2*j))*aimag(transz(k))*aimag(transz(k)))&
-           -(aimag(zk2(k))*aimag(transy(2*j))*aimag(transy(2*j))*aimag(transx(i))*aimag(transx(i)))&
-           -aimag(cw2(i,j,k))*aimag(cw2(i,j,k))*xa0*xa0-&
-           xa1*xa1*(aimag(cw2(i,j,k))*aimag(cw2(i,j-1,k))+aimag(cw2(i,j,k))*aimag(cw2(i,j+1,k))), kind=mytype)
-   enddo
-   enddo
-   do i=sp%yst(1),sp%yen(1)
-      a(i,1,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(1), kind=mytype)*real(transy(1), kind=mytype)*&
-           real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
-           -(real(zk2(k), kind=mytype)*real(transy(1), kind=mytype)*real(transy(1), kind=mytype)*&
-           real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
-           -real(cw22(i,1,k), kind=mytype)*real(cw22(i,1,k), kind=mytype)*xa0*xa0-xa1*xa1*(real(cw22(i,1,k), kind=mytype)*&
-           real(cw22(i,2,k), kind=mytype)),&
-           -(aimag(xk2(i))*aimag(transy(1))*aimag(transy(1))*aimag(transz(k))*aimag(transz(k)))&
-           -(aimag(zk2(k))*aimag(transy(1))*aimag(transy(1))*aimag(transx(i))*aimag(transx(i)))&
-           -aimag(cw22(i,1,k))*aimag(cw22(i,1,k))*xa0*xa0-xa1*xa1*(aimag(cw22(i,1,k))*aimag(cw22(i,2,k))), kind=mytype)
-      a(i,ny/2,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(ny-2), kind=mytype)*real(transy(ny-2), kind=mytype)&
-           *real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
-           -(real(zk2(k), kind=mytype)*real(transy(ny-2), kind=mytype)*real(transy(ny-2), kind=mytype)*&
-           real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
-           -real(cw22(i,ny/2,k), kind=mytype)*real(cw22(i,ny/2,k), kind=mytype)*xa0*xa0-&
-           xa1*xa1*(real(cw22(i,ny/2,k), kind=mytype)*real(cw22(i,ny/2-1,k), kind=mytype)),&
-           -(aimag(xk2(i))*aimag(transy(ny-2))*aimag(transy(ny-2))*aimag(transz(k))*aimag(transz(k)))&
-           -(aimag(zk2(k))*aimag(transy(ny-2))*aimag(transy(ny-2))*aimag(transx(i))*aimag(transx(i)))&
-           -aimag(cw22(i,ny/2,k))*aimag(cw22(i,ny/2,k))*xa0*xa0-&
-           xa1*xa1*(aimag(cw22(i,ny/2,k))*aimag(cw22(i,ny/2-1,k))), kind=mytype)
-      a2(i,1,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(2), kind=mytype)*real(transy(2), kind=mytype)*&
-           real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
-           -(real(zk2(k), kind=mytype)*real(transy(2), kind=mytype)*real(transy(2), kind=mytype)*&
-           real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
-           -real(cw2(i,1,k), kind=mytype)*real(cw2(i,1,k), kind=mytype)*(xa0-xa1)*(xa0+xa1)-xa1*xa1*(real(cw2(i,1,k), kind=mytype)*&
-           real(cw2(i,2,k), kind=mytype)),&
-           -(aimag(xk2(i))*aimag(transy(2))*aimag(transy(2))*aimag(transz(k))*aimag(transz(k)))&
-           -(aimag(zk2(k))*aimag(transy(2))*aimag(transy(2))*aimag(transx(i))*aimag(transx(i)))&
-           -aimag(cw2(i,1,k))*aimag(cw2(i,1,k))*(xa0-xa1)*(xa0+xa1)-xa1*xa1*(aimag(cw2(i,1,k))*aimag(cw2(i,2,k))), kind=mytype)
-      a2(i,ny/2,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(ny-1), kind=mytype)*real(transy(ny-1), kind=mytype)*&
-           real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
-           -(real(zk2(k), kind=mytype)*real(transy(ny-1), kind=mytype)*real(transy(ny-1), kind=mytype)*&
-           real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
-           -real(cw2(i,ny/2,k), kind=mytype)*real(cw2(i,ny/2,k), kind=mytype)*(xa0+xa1)*(xa0+xa1)-&
-           xa1*xa1*(real(cw2(i,ny/2,k), kind=mytype)*real(cw2(i,ny/2-1,k), kind=mytype)),&
-           -(aimag(xk2(i))*aimag(transy(ny-1))*aimag(transy(ny-1))*aimag(transz(k))*aimag(transz(k)))&
-           -(aimag(zk2(k))*aimag(transy(ny-1))*aimag(transy(ny-1))*aimag(transx(i))*aimag(transx(i)))&
-           -aimag(cw2(i,ny/2,k))*aimag(cw2(i,ny/2,k))*(xa0+xa1)*(xa0+xa1)-&
-           xa1*xa1*(aimag(cw2(i,ny/2,k))*aimag(cw2(i,ny/2-1,k))), kind=mytype)
-   enddo
-   enddo
-   
- 
-
- 
-
-!sup diag +1
-   do k=sp%yst(3),sp%yen(3)
-   do j=2,ny/2-1
-   do i=sp%yst(1),sp%yen(1)   
-      a(i,j,k,4)=cmplx(xa0*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j+1,k), kind=mytype)+real(cw22(i,j+1,k), kind=mytype)*&
-           real(cw22(i,j+1,k), kind=mytype)),&
-           xa0*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j+1,k))+aimag(cw22(i,j+1,k))*aimag(cw22(i,j+1,k))), kind=mytype)
-      a2(i,j,k,4)=cmplx(xa0*xa1*(real(cw2(i,j,k), kind=mytype)*real(cw2(i,j+1,k), kind=mytype)+real(cw2(i,j+1,k), kind=mytype)*&
-           real(cw2(i,j+1,k), kind=mytype)),&
-           xa0*xa1*(aimag(cw2(i,j,k))*aimag(cw2(i,j+1,k))+aimag(cw2(i,j+1,k))*aimag(cw2(i,j+1,k))), kind=mytype)
-   enddo
-   enddo
-   do i=sp%yst(1),sp%yen(1)   
-      a(i,1,k,4)=2.*cmplx((xa0*xa1*(real(cw22(i,1,k), kind=mytype)*real(cw22(i,2,k), kind=mytype)+real(cw22(i,2,k), kind=mytype)*&
-           real(cw22(i,2,k), kind=mytype))),&
-           (xa0*xa1*(aimag(cw22(i,1,k))*aimag(cw22(i,2,k))+aimag(cw22(i,2,k))*aimag(cw22(i,2,k)))), kind=mytype)
-      a2(i,1,k,4)=cmplx((xa0-xa1)*xa1*(real(cw2(i,1,k), kind=mytype)*real(cw2(i,2,k), kind=mytype))&
-           +xa0*xa1*(real(cw2(i,2,k), kind=mytype)*real(cw2(i,2,k), kind=mytype)),&
-           (xa0-xa1)*xa1*(aimag(cw2(i,1,k))*aimag(cw2(i,2,k)))&
-           +xa0*xa1*(aimag(cw2(i,2,k))*aimag(cw2(i,2,k))), kind=mytype)
-      a2(i,ny/2-1,k,4)=cmplx(xa0*xa1*(real(cw2(i,ny/2-1,k), kind=mytype)*real(cw2(i,ny/2,k), kind=mytype))&
-           +(xa0+xa1)*xa1*(real(cw2(i,ny/2,k), kind=mytype)*real(cw2(i,ny/2,k), kind=mytype)),&
-           xa0*xa1*(aimag(cw2(i,ny/2-1,k))*aimag(cw2(i,ny/2,k)))&
-           +(xa0+xa1)*xa1*(aimag(cw2(i,ny/2,k))*aimag(cw2(i,ny/2,k))), kind=mytype)
-      a2(i,ny/2,k,4)=0.
-   enddo
-   enddo
-
-
-
-!sup diag +2
-   do k=sp%yst(3),sp%yen(3)
-   do i=sp%yst(1),sp%yen(1)   
-   do j=1,ny/2-2
-      a(i,j,k,5)=cmplx(-real(cw22(i,j+1,k), kind=mytype)*real(cw22(i,j+2,k), kind=mytype)*xa1*xa1,&
-           -aimag(cw22(i,j+1,k))*aimag(cw22(i,j+2,k))*xa1*xa1, kind=mytype)
-      a2(i,j,k,5)=cmplx(-real(cw2(i,j+1,k), kind=mytype)*real(cw2(i,j+2,k), kind=mytype)*xa1*xa1,&
-           -aimag(cw2(i,j+1,k))*aimag(cw2(i,j+2,k))*xa1*xa1, kind=mytype)
-   enddo
-   a(i,1,k,5)=cmplx(real(a(i,1,k,5), kind=mytype)*2.,aimag(a(i,1,k,5))*2., kind=mytype)
-   a(i,ny/2-1,k,5)=0.
-   a(i,ny/2,k,5)=0.
-   a2(i,ny/2-1,k,5)=0.
-   a2(i,ny/2,k,5)=0. 
-   enddo
-   enddo
-
-
-
-!inf diag -1
-   do k=sp%yst(3),sp%yen(3)
-   do i=sp%yst(1),sp%yen(1)   
-   do j=2,ny/2
-      a(i,j,k,2)=cmplx(xa0*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j-1,k), kind=mytype)+real(cw22(i,j-1,k), kind=mytype)*&
-           real(cw22(i,j-1,k), kind=mytype)),&
-           xa0*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j-1,k))+aimag(cw22(i,j-1,k))*aimag(cw22(i,j-1,k))), kind=mytype)
-      a2(i,j,k,2)=cmplx(xa0*xa1*(real(cw2(i,j,k), kind=mytype)*real(cw2(i,j-1,k), kind=mytype)+real(cw2(i,j-1,k), kind=mytype)*&
-           real(cw2(i,j-1,k), kind=mytype)),&
-           xa0*xa1*(aimag(cw2(i,j,k))*aimag(cw2(i,j-1,k))+aimag(cw2(i,j-1,k))*aimag(cw2(i,j-1,k))), kind=mytype)
-   enddo
-   a(i,1,k,2)=0.
-   a2(i,1,k,2)=0.
-   a2(i,2,k,2)=cmplx(xa0*xa1*(real(cw2(i,2,k), kind=mytype)*real(cw2(i,1,k), kind=mytype))&
-        +(xa0+xa1)*xa1*(real(cw2(i,1,k), kind=mytype)*real(cw2(i,1,k), kind=mytype)),&
-        xa0*xa1*(aimag(cw2(i,2,k))*aimag(cw2(i,1,k)))&
-        +(xa0+xa1)*xa1*(aimag(cw2(i,1,k))*aimag(cw2(i,1,k))), kind=mytype)
-   a2(i,ny/2,k,2)=cmplx((xa0+xa1)*xa1*(real(cw2(i,ny/2,k), kind=mytype)*real(cw2(i,ny/2-1,k), kind=mytype))&
-        +xa0*xa1*(real(cw2(i,ny/2-1,k), kind=mytype)*real(cw2(i,ny/2-1,k), kind=mytype)),&
-        (xa0+xa1)*xa1*(aimag(cw2(i,ny/2,k))*aimag(cw2(i,ny/2-1,k)))&
-        +xa0*xa1*(aimag(cw2(i,ny/2-1,k))*aimag(cw2(i,ny/2-1,k))), kind=mytype)
-   enddo
-   enddo
-!inf diag -2
-   do k=sp%yst(3),sp%yen(3)
-   do i=sp%yst(1),sp%yen(1)  
-   do j=3,ny/2
-      a(i,j,k,1)=cmplx(-real(cw22(i,j-1,k), kind=mytype)*real(cw22(i,j-2,k), kind=mytype)*xa1*xa1,&
-           -aimag(cw22(i,j-1,k))*aimag(cw22(i,j-2,k))*xa1*xa1, kind=mytype)
-      a2(i,j,k,1)=cmplx(-real(cw2(i,j-1,k), kind=mytype)*real(cw2(i,j-2,k), kind=mytype)*xa1*xa1,&
-           -aimag(cw2(i,j-1,k))*aimag(cw2(i,j-2,k))*xa1*xa1, kind=mytype)
-   enddo
-   a(i,1,k,1)=0.
-   a(i,2,k,1)=0.
-   a2(i,1,k,1)=0.
-   a2(i,2,k,1)=0.
-   enddo
-   enddo
-!not to have a singular matrice
-   do k=sp%yst(3),sp%yen(3)
-   do i=sp%yst(1),sp%yen(1)
-      if ((real(xk2(i), kind=mytype)==0.).and.(real(zk2(k), kind=mytype)==0)) then
-         a(i,1,k,3)=cmplx(1.,1., kind=mytype)
-         a(i,1,k,4)=0.
-         a(i,1,k,5)=0.
-      endif
-   enddo
-   enddo
-
-else
-   xa0=alpha/pi+1./2./beta/pi
-   xa1=-1./4./beta/pi 
-!
-!construction of the pentadiagonal matrice
-!   
-   do k=sp%yst(3),sp%yen(3)
-   do j=1,nym
-   do i=sp%yst(1),sp%yen(1)
-      cw22(i,j,k)=cmplx(real(yky(j), kind=mytype)*real(transx(i), kind=mytype)*real(transz(k), kind=mytype),&
-           aimag(yky(j))*aimag(transx(i))*aimag(transz(k)), kind=mytype)
-   enddo
-   enddo
-   enddo
-
-!main diagonal 
-   do k=sp%yst(3),sp%yen(3)
-   do j=2,nym-1
-   do i=sp%yst(1),sp%yen(1)
-      a3(i,j,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(j), kind=mytype)*real(transy(j), kind=mytype)*&
-           real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
-           -(real(zk2(k), kind=mytype)*real(transy(j), kind=mytype)*real(transy(j), kind=mytype)*&
-           real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
-           -real(cw22(i,j,k), kind=mytype)*real(cw22(i,j,k), kind=mytype)*xa0*xa0-&
-           xa1*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j-1,k), kind=mytype)+real(cw22(i,j,k), kind=mytype)*&
-           real(cw22(i,j+1,k), kind=mytype)),&
-           -(aimag(xk2(i))*aimag(transy(j))*aimag(transy(j))*aimag(transz(k))*aimag(transz(k)))&
-           -(aimag(zk2(k))*aimag(transy(j))*aimag(transy(j))*aimag(transx(i))*aimag(transx(i)))&
-           -aimag(cw22(i,j,k))*aimag(cw22(i,j,k))*xa0*xa0-&
-           xa1*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j-1,k))+aimag(cw22(i,j,k))*aimag(cw22(i,j+1,k))), kind=mytype)
-   enddo
-   enddo
-   enddo
-
-   do k=sp%yst(3),sp%yen(3)
-   do i=sp%yst(1),sp%yen(1)
-      a3(i,1,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(1), kind=mytype)*real(transy(1), kind=mytype)*&
-           real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
-           -(real(zk2(k), kind=mytype)*real(transy(1), kind=mytype)*real(transy(1), kind=mytype)*real(transx(i), kind=mytype)*&
-           real(transx(i), kind=mytype))&
-           -real(cw22(i,1,k), kind=mytype)*real(cw22(i,1,k), kind=mytype)*xa0*xa0-xa1*xa1*(real(cw22(i,1,k), kind=mytype)*&
-           real(cw22(i,2,k), kind=mytype)),&
-           -(aimag(xk2(i))*aimag(transy(1))*aimag(transy(1))*aimag(transz(k))*aimag(transz(k)))&
-           -(aimag(zk2(k))*aimag(transy(1))*aimag(transy(1))*aimag(transx(i))*aimag(transx(i)))&
-           -aimag(cw22(i,1,k))*aimag(cw22(i,1,k))*xa0*xa0-xa1*xa1*(aimag(cw22(i,1,k))*aimag(cw22(i,2,k))), kind=mytype)
-      a3(i,nym,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(nym), kind=mytype)*real(transy(nym), kind=mytype)*&
-           real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
-           -(real(zk2(k), kind=mytype)*real(transy(nym), kind=mytype)*real(transy(nym), kind=mytype)*real(transx(i), kind=mytype)*&
-           real(transx(i), kind=mytype))&
-           -real(cw22(i,nym,k), kind=mytype)*real(cw22(i,nym,k), kind=mytype)*xa0*xa0-&
-           xa1*xa1*(real(cw22(i,nym,k), kind=mytype)*real(cw22(i,nym-1,k), kind=mytype)),&
-           -(aimag(xk2(i))*aimag(transy(nym))*aimag(transy(nym))*aimag(transz(k))*aimag(transz(k)))&
-           -(aimag(zk2(k))*aimag(transy(nym))*aimag(transy(nym))*aimag(transx(i))*aimag(transx(i)))&
-           -aimag(cw22(i,nym,k))*aimag(cw22(i,nym,k))*xa0*xa0-&
-           xa1*xa1*(aimag(cw22(i,nym,k))*aimag(cw22(i,nym-1,k))), kind=mytype)
-   enddo
-   enddo
-
-   
-
-
-!sup diag +1
-   do k=sp%yst(3),sp%yen(3)
-   do i=sp%yst(1),sp%yen(1)
-   do j=2,nym-1
-      a3(i,j,k,4)=cmplx(xa0*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j+1,k), kind=mytype)+real(cw22(i,j+1,k), kind=mytype)*&
-           real(cw22(i,j+1,k), kind=mytype)),&
-           xa0*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j+1,k))+aimag(cw22(i,j+1,k))*aimag(cw22(i,j+1,k))), kind=mytype)
-   enddo
-   a3(i,1,k,4)=cmplx((xa0*xa1*(real(cw22(i,1,k), kind=mytype)*real(cw22(i,2,k), kind=mytype)+real(cw22(i,2,k), kind=mytype)*&
-        real(cw22(i,2,k), kind=mytype))),&
-        (xa0*xa1*(aimag(cw22(i,1,k))*aimag(cw22(i,2,k))+aimag(cw22(i,2,k))*aimag(cw22(i,2,k)))), kind=mytype)
-   enddo
-   enddo
-!sup diag +2
-   do k=sp%yst(3),sp%yen(3)
-   do i=sp%yst(1),sp%yen(1)
-   do j=1,nym-2
-      a3(i,j,k,5)=cmplx(-real(cw22(i,j+1,k), kind=mytype)*real(cw22(i,j+2,k), kind=mytype)*xa1*xa1,&
-           -aimag(cw22(i,j+1,k))*aimag(cw22(i,j+2,k))*xa1*xa1, kind=mytype)
-   enddo
-   !a3(i,1,k,5)=a3(i,1,k,5)*2.
-   !a3(i,1,k,5)=0.
-   a3(i,nym-1,k,5)=0.
-   a3(i,nym,k,5)=0.
-   enddo
-   enddo
-
-
-!inf diag -1
-   do k=sp%yst(3),sp%yen(3)
-   do i=sp%yst(1),sp%yen(1)
-   do j=2,nym
-      a3(i,j,k,2)=cmplx(xa0*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j-1,k), kind=mytype)+real(cw22(i,j-1,k), kind=mytype)*&
-           real(cw22(i,j-1,k), kind=mytype)),&
-           xa0*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j-1,k))+aimag(cw22(i,j-1,k))*aimag(cw22(i,j-1,k))), kind=mytype)
-   enddo
-   a3(i,1,k,2)=0.
-   enddo
-   enddo
-!inf diag -2
-   do k=sp%yst(3),sp%yen(3)
-   do i=sp%yst(1),sp%yen(1)
-   do j=3,nym
-      a3(i,j,k,1)=cmplx(-real(cw22(i,j-1,k), kind=mytype)*real(cw22(i,j-2,k), kind=mytype)*xa1*xa1,&
-           -aimag(cw22(i,j-1,k))*aimag(cw22(i,j-2,k))*xa1*xa1, kind=mytype)
-   enddo
-   a3(i,1,k,1)=0.
-   a3(i,2,k,1)=0.
-   enddo
-   enddo 
-
-!not to have a singular matrice
-!   do k=sp%yst(3),sp%yen(3)
-!   do i=sp%yst(1),sp%yen(1)
-!      if ((xkx(i)==0.).and.(zkz(k)==0)) then
-if (nrank==0) then
-   a3(1,1,1,3)=cmplx(1.,1., kind=mytype)
-   a3(1,1,1,4)=0.
-   a3(1,1,1,5)=0.
-endif
-!      endif
-!   enddo
-!   enddo
-endif
-
-
-   
-
-
-return
-end subroutine matrice_refinement
+  ! ***********************************************************
+  !
+  subroutine waves ()
+    !
+    !***********************************************************
+
+    USE derivX 
+    USE derivY 
+    USE derivZ 
+    USE param
+    USE decomp_2d
+    USE variables
+    use decomp_2d_fft
+
+    implicit none
+
+    integer :: i,j,k
+    real(mytype) :: w,wp,w1,w1p 
+    complex(mytype) :: xyzk
+    complex(mytype) :: ytt,xtt,ztt,yt1,xt1,yt2,xt2
+    complex(mytype) :: xtt1,ytt1,ztt1,zt1,zt2,tmp1,tmp2,tmp3
+    complex(mytype) :: tmp4,tmp5,tmp6
+
+    xkx(:)=0. ; xk2(:)=0. ; yky(:)=0. ; yk2(:)=0.
+    zkz(:)=0. ; zk2(:)=0.
+
+    !WAVE NUMBER IN X
+    if (bcx==0) then
+       do i=1,nx/2+1
+          w=2.*pi*(i-1)/nx
+          wp=acix6*2.*dx*sin(w/2.)+(bcix6*2.*dx)*sin(3./2.*w)
+          wp=wp/(1.+2.*alcaix6*cos(w))
+          xkx(i)=cmplx(nx*wp/xlx,nx*wp/xlx, kind=mytype)
+          exs(i)=cmplx(nx*w/xlx,nx*w/xlx, kind=mytype)
+          xk2(i)=cmplx((nx*wp/xlx)**2,(nx*wp/xlx)**2, kind=mytype)
+       enddo
+       do i=nx/2+2,nx
+          xkx(i)=xkx(nx-i+2)
+          exs(i)=exs(nx-i+2)
+          xk2(i)=xk2(nx-i+2)
+       enddo
+    else
+       do i=1,nx
+          w=2.*pi*0.5*(i-1)/nxm
+          wp=acix6*2.*dx*sin(w/2.)+(bcix6*2.*dx)*sin(3./2.*w)
+          wp=wp/(1.+2.*alcaix6*cos(w))
+          xkx(i)=cmplx(nxm*wp/xlx,nxm*wp/xlx, kind=mytype)
+          exs(i)=cmplx(nxm*w/xlx,nxm*w/xlx, kind=mytype)
+          xk2(i)=cmplx((nxm*wp/xlx)**2,(nxm*wp/xlx)**2, kind=mytype)
+       enddo
+       xkx(1)=0.
+       exs(1)=0.
+       xk2(1)=0.
+    endif
+
+    !WAVE NUMBER IN Y
+    if (bcy==0) then
+       do j=1,ny/2+1
+          w=2.*pi*(j-1)/ny
+          wp=aciy6*2.*dy*sin(w/2.)+(bciy6*2.*dy)*sin(3./2.*w)
+          wp=wp/(1.+2.*alcaiy6*cos(w))
+          if (istret==0) yky(j)=cmplx(ny*wp/yly,ny*wp/yly, kind=mytype)
+          if (istret.ne.0) yky(j)=cmplx(ny*wp,ny*wp, kind=mytype)
+          eys(j)=cmplx(ny*w/yly,ny*w/yly, kind=mytype)
+          yk2(j)=cmplx((ny*wp/yly)**2,(ny*wp/yly)**2, kind=mytype)
+       enddo
+       do j=ny/2+2,ny
+          yky(j)=yky(ny-j+2)
+          eys(j)=eys(ny-j+2)
+          yk2(j)=yk2(ny-j+2)
+       enddo
+    else
+       do j=1,ny
+          w=2.*pi*0.5*(j-1)/nym
+          wp=aciy6*2.*dy*sin(w/2.)+(bciy6*2.*dy)*sin(3./2.*w)
+          wp=wp/(1.+2.*alcaiy6*cos(w))
+          if (istret==0) yky(j)=cmplx(nym*wp/yly,nym*wp/yly, kind=mytype)
+          if (istret.ne.0) yky(j)=cmplx(nym*wp,nym*wp, kind=mytype)
+          eys(j)=cmplx(nym*w/yly,nym*w/yly, kind=mytype)
+          yk2(j)=cmplx((nym*wp/yly)**2,(nym*wp/yly)**2, kind=mytype)
+       enddo
+       yky(1)=0.
+       eys(1)=0.
+       yk2(1)=0.
+    endif
+
+    !WAVE NUMBER IN Z
+    if (bcz==0) then
+       do k=1,nz/2+1
+          w=2.*pi*(k-1)/nz
+          wp=aciz6*2.*dz*sin(w/2.)+(bciz6*2.*dz)*sin(3./2.*w)
+          wp=wp/(1.+2.*alcaiz6*cos(w))
+          zkz(k)=cmplx(nz*wp/zlz,nz*wp/zlz, kind=mytype)
+          ezs(k)=cmplx(nz*w/zlz,nz*w/zlz, kind=mytype)
+          zk2(k)=cmplx((nz*wp/zlz)**2,(nz*wp/zlz)**2, kind=mytype)
+       enddo
+    else
+       do k=1,nz/2+1
+          w=2.*pi*0.5*(k-1)/nzm
+          w1=2.*pi*0.5*(nzm-k+1)/nzm
+          wp=aciz6*2.*dz*sin(w/2.)+(bciz6*2.*dz)*sin(3./2.*w)
+          wp=wp/(1.+2.*alcaiz6*cos(w))
+          w1p=aciz6*2.*dz*sin(w1/2.)+(bciz6*2.*dz)*sin(3./2.*w1)
+          w1p=w1p/(1.+2.*alcaiz6*cos(w1))     
+          zkz(k)=cmplx(nzm*wp/zlz,-nzm*w1p/zlz, kind=mytype)
+          ezs(k)=cmplx(nzm*w/zlz,nzm*w1/zlz, kind=mytype)
+          zk2(k)=cmplx((nzm*wp/zlz)**2,(nzm*w1p/zlz)**2, kind=mytype)
+       enddo
+    endif
+    !
+    !if (nrank==0) then
+    !   do i=1,nx
+    !      print *,i,ezs(i)
+    !   enddo
+    !endif
+    !stop
+
+    if ((bcx==0).and.(bcz==0).and.bcy.ne.0) then
+       do k = sp%yst(3), sp%yen(3)
+          do j = sp%yst(2), sp%yen(2)
+             do i = sp%yst(1), sp%yen(1)
+                xtt=cmplx((bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
+                     cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
+                     dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)),&
+                     (bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
+                     cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
+                     dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)), kind=mytype)
+                ytt=cmplx((biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
+                     ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
+                     diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)),&
+                     (biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
+                     ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
+                     diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)), kind=mytype)
+                ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
+                     ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
+                     diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)),&
+                     (biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
+                     ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
+                     diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)), kind=mytype)
+                xtt1=cmplx((aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)),&
+                     (aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)), kind=mytype)
+                ytt1=cmplx((aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)),&
+                     (aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)), kind=mytype)
+                ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
+                     (aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)), kind=mytype)
+                xt1=cmplx((1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)),&
+                     (1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)), kind=mytype)
+                yt1=cmplx((1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)),&
+                     (1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)), kind=mytype)
+                zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
+                     (1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)), kind=mytype)
+                xt2=xk2(i)*((((ytt1+ytt)/yt1)*((ztt1+ztt)/zt1))**2)
+                yt2=yk2(j)*((((xtt1+xtt)/xt1)*((ztt1+ztt)/zt1))**2)
+                zt2=zk2(k)*((((xtt1+xtt)/xt1)*((ytt1+ytt)/yt1))**2)
+                xyzk=xt2+yt2+zt2
+                kxyz(i,j,k)=xyzk
+                !   print *,i,j,k, kxyz(i,j,k)
+             enddo
+          enddo
+       enddo
+    else
+       if (bcz==0) then
+          do k = sp%xst(3),sp%xen(3)
+             do j = sp%xst(2),sp%xen(2)
+                do i = sp%xst(1),sp%xen(1)
+                   xtt=cmplx((bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
+                        cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
+                        dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)),&
+                        (bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
+                        cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
+                        dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)), kind=mytype)
+                   ytt=cmplx((biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
+                        ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
+                        diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)),&
+                        (biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
+                        ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
+                        diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)), kind=mytype)
+                   ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
+                        ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
+                        diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)),&
+                        (biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
+                        ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
+                        diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)), kind=mytype)
+                   xtt1=cmplx((aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)),&
+                        (aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)), kind=mytype)
+                   ytt1=cmplx((aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)),&
+                        (aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)), kind=mytype)
+                   ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
+                        (aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)), kind=mytype)
+                   xt1=cmplx((1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)),&
+                        (1.+2.*ailcaix6*cos(real(exs(i))*dx)), kind=mytype)
+                   yt1=cmplx((1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)),&
+                        (1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)), kind=mytype)
+                   zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
+                        (1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)), kind=mytype)
+                   xt2=xk2(i)*((((ytt1+ytt)/yt1)*((ztt1+ztt)/zt1))**2)
+                   yt2=yk2(j)*((((xtt1+xtt)/xt1)*((ztt1+ztt)/zt1))**2)
+                   zt2=zk2(k)*((((xtt1+xtt)/xt1)*((ytt1+ytt)/yt1))**2)
+                   xyzk=xt2+yt2+zt2
+                   kxyz(i,j,k)=xyzk
+                   !   print *,i,j,k, kxyz(i,j,k)
+                enddo
+             enddo
+          enddo
+       else
+          do k = sp%xst(3),sp%xen(3)
+             do j = sp%xst(2),sp%xen(2)
+                do i = sp%xst(1),sp%xen(1)  
+                   xtt=cmplx((bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
+                        cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
+                        dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)),&
+                        (bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
+                        cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
+                        dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)), kind=mytype)
+                   ytt=cmplx((biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
+                        ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
+                        diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)),&
+                        (biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
+                        ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
+                        diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)), kind=mytype)
+                   !
+                   ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
+                        ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
+                        diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)),&
+                        (biciz6*2.*cos(aimag(ezs(k))*3.*dz/2.)+&
+                        ciciz6*2.*cos(aimag(ezs(k))*5.*dz/2.)+&
+                        diciz6*2.*cos(aimag(ezs(k))*7.*dz/2.)), kind=mytype)
+                   !
+                   xtt1=cmplx((aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)),&
+                        (aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)), kind=mytype)
+                   ytt1=cmplx((aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)),&
+                        (aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)), kind=mytype)
+                   !
+                   ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
+                        (aiciz6*2.*cos(aimag(ezs(k))*dz/2.)), kind=mytype)
+                   !
+                   xt1=cmplx((1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)),&
+                        (1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)), kind=mytype)
+                   yt1=cmplx((1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)),&
+                        (1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)), kind=mytype)
+                   zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
+                        (1.+2.*ailcaiz6*cos(aimag(ezs(k))*dz)), kind=mytype)
+
+                   tmp1=cmplx(real(ztt1+ztt, kind=mytype)/real(zt1, kind=mytype),&
+                        aimag(ztt1+ztt)/aimag(zt1), kind=mytype)
+                   tmp2=cmplx(real(ytt1+ytt, kind=mytype)/real(yt1, kind=mytype),&
+                        real(ytt1+ytt, kind=mytype)/real(yt1, kind=mytype), kind=mytype)
+                   tmp3=cmplx(real(xtt1+xtt, kind=mytype)/real(xt1, kind=mytype),&
+                        real(xtt1+xtt, kind=mytype)/real(xt1, kind=mytype), kind=mytype)
+
+                   tmp4=cmplx((real(tmp1, kind=mytype)*real(tmp2, kind=mytype))**2,(aimag(tmp1)*aimag(tmp2))**2, kind=mytype)
+                   tmp5=cmplx((real(tmp1, kind=mytype)*real(tmp3, kind=mytype))**2,(aimag(tmp1)*aimag(tmp3))**2, kind=mytype)
+                   tmp6=cmplx((real(tmp3, kind=mytype)*real(tmp2, kind=mytype))**2,(aimag(tmp3)*aimag(tmp2))**2, kind=mytype)
+
+                   tmp1=cmplx(real(tmp4, kind=mytype)*real(xk2(i), kind=mytype),aimag(tmp4)*aimag(xk2(i)), kind=mytype)
+                   tmp2=cmplx(real(tmp5, kind=mytype)*real(yk2(j), kind=mytype),aimag(tmp5)*aimag(yk2(j)), kind=mytype)
+                   tmp3=cmplx(real(tmp6, kind=mytype)*real(zk2(k), kind=mytype),aimag(tmp6)*aimag(zk2(k)), kind=mytype)
+
+                   xyzk=tmp1+tmp2+tmp3
+                   kxyz(i,j,k)=xyzk
+                   !         print *,i,j,k,zt1,yt1
+                enddo
+             enddo
+          enddo
+       endif
+    endif
+
+
+    !          do k=1,1!nz
+    !          do j=1,ny
+    !          do i=1,1!!nx
+    !             print *,j,a(i,j,k,3),kxyz(i,j,k)
+    !          enddo
+    !          enddo
+    !          enddo
+
+  end subroutine waves
+
+  !**************************************************************************
+  !
+  subroutine matrice_refinement()
+    !
+    !**************************************************************************
+
+    USE decomp_2d
+    USE variables
+    USE param
+    USE var
+    USE MPI
+    USE derivX 
+    USE derivY 
+    USE derivZ 
+
+    implicit none
+
+    integer :: i,j,k
+
+    complex(mytype),dimension(sp%yst(1):sp%yen(1)) :: transx
+    complex(mytype),dimension(sp%yst(2):sp%yen(2)) :: transy
+    complex(mytype),dimension(sp%yst(3):sp%yen(3)) :: transz
+    real(mytype) :: xa0,xa1 
+    complex(mytype) :: ytt,xtt,ztt,yt1,xt1,yt2,xt2
+    complex(mytype) :: xtt1,ytt1,ztt1,zt1,zt2,tmp1,tmp2,tmp3
+
+    do i = sp%yst(1),sp%yen(1)
+       xtt=cmplx((bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
+            cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
+            dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)),&
+            (bicix6*2.*cos(real(exs(i), kind=mytype)*3.*dx/2.)+&
+            cicix6*2.*cos(real(exs(i), kind=mytype)*5.*dx/2.)+&
+            dicix6*2.*cos(real(exs(i), kind=mytype)*7.*dx/2.)), kind=mytype)
+       xtt1=cmplx((aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)),&
+            (aicix6*2.*cos(real(exs(i), kind=mytype)*dx/2.)), kind=mytype)
+       xt1=cmplx((1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)),&
+            (1.+2.*ailcaix6*cos(real(exs(i), kind=mytype)*dx)), kind=mytype)
+       transx(i)=cmplx(real(xtt1+xtt, kind=mytype)/real(xt1, kind=mytype),&
+            real(xtt1+xtt, kind=mytype)/real(xt1, kind=mytype), kind=mytype)!(xtt+xtt)/xt1
+    enddo
+    do j = sp%yst(2),sp%yen(2)
+       ytt=cmplx((biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
+            ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
+            diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)),&
+            (biciy6*2.*cos(real(eys(j), kind=mytype)*3.*dy/2.)+&
+            ciciy6*2.*cos(real(eys(j), kind=mytype)*5.*dy/2.)+&
+            diciy6*2.*cos(real(eys(j), kind=mytype)*7.*dy/2.)), kind=mytype)
+       ytt1=cmplx((aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)),&
+            (aiciy6*2.*cos(real(eys(j), kind=mytype)*dy/2.)), kind=mytype)
+       yt1=cmplx((1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)),&
+            (1.+2.*ailcaiy6*cos(real(eys(j), kind=mytype)*dy)), kind=mytype)
+       transy(j)=cmplx(real(ytt1+ytt, kind=mytype)/real(yt1, kind=mytype),&
+            real(ytt1+ytt, kind=mytype)/real(yt1, kind=mytype), kind=mytype)!(ytt+ytt)/yt1
+    enddo
+    if (bcz==0) then
+       do k = sp%yst(3),sp%yen(3)
+          ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
+               ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
+               diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)),&
+               (biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
+               ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)+&
+               diciz6*2.*cos(real(ezs(k), kind=mytype)*7.*dz/2.)), kind=mytype)
+          ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
+               (aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)), kind=mytype)
+          zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
+               (1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)), kind=mytype)
+          transz(k)=cmplx(real(ztt1+ztt, kind=mytype)/real(zt1, kind=mytype),&
+               aimag(ztt1+ztt)/aimag(zt1), kind=mytype)!(ztt+ztt)/zt1
+       enddo
+    else
+       do k = sp%yst(3),sp%yen(3)
+          ztt=cmplx((biciz6*2.*cos(real(ezs(k), kind=mytype)*3.*dz/2.)+&
+               ciciz6*2.*cos(real(ezs(k), kind=mytype)*5.*dz/2.)),&
+               (biciz6*2.*cos(aimag(ezs(k))*3.*dz/2.)+&
+               ciciz6*2.*cos(aimag(ezs(k))*5.*dz/2.)), kind=mytype)
+          ztt1=cmplx((aiciz6*2.*cos(real(ezs(k), kind=mytype)*dz/2.)),&
+               (aiciz6*2.*cos(aimag(ezs(k))*dz/2.)), kind=mytype)
+          zt1=cmplx((1.+2.*ailcaiz6*cos(real(ezs(k), kind=mytype)*dz)),&
+               (1.+2.*ailcaiz6*cos(aimag(ezs(k))*dz)), kind=mytype)
+          transz(k)=cmplx(real(ztt1+ztt, kind=mytype)/real(zt1, kind=mytype),&
+               aimag(ztt1+ztt)/aimag(zt1), kind=mytype)!(ztt+ztt)/zt1
+       enddo
+    endif
+
+    if ((istret==1).or.(istret==2)) then
+
+
+       xa0=alpha/pi+1./2./beta/pi
+       if (istret==1) xa1=1./4./beta/pi
+       if (istret==2) xa1=-1./4./beta/pi
+       !
+       !construction of the pentadiagonal matrice
+       !
+       do k=sp%yst(3),sp%yen(3)
+          do j=1,ny/2
+             do i=sp%yst(1),sp%yen(1)
+                cw22(i,j,k)=cmplx(real(yky(2*j-1), kind=mytype)*real(transx(i), kind=mytype)*real(transz(k), kind=mytype),&
+                     aimag(yky(2*j-1))*aimag(transx(i))*aimag(transz(k)), kind=mytype)
+                cw2(i,j,k)=cmplx(real(yky(2*j), kind=mytype)*real(transx(i), kind=mytype)*real(transz(k), kind=mytype),&
+                     aimag(yky(2*j))*aimag(transx(i))*aimag(transz(k)), kind=mytype)
+             enddo
+          enddo
+       enddo
+
+
+
+
+       !main diagonal 
+       do k=sp%yst(3),sp%yen(3)
+          do j=2,ny/2-1
+             do i=sp%yst(1),sp%yen(1)
+                a(i,j,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(2*j-1), kind=mytype)*real(transy(2*j-1), kind=mytype)&
+                     *real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
+                     -(real(zk2(k), kind=mytype)*real(transy(2*j-1), kind=mytype)*real(transy(2*j-1), kind=mytype)*&
+                     real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
+                     -real(cw22(i,j,k), kind=mytype)*real(cw22(i,j,k), kind=mytype)*xa0*xa0-&
+                     xa1*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j-1,k), kind=mytype)+real(cw22(i,j,k), kind=mytype)*&
+                     real(cw22(i,j+1,k), kind=mytype)),&
+                     -(aimag(xk2(i))*aimag(transy(2*j-1))*aimag(transy(2*j-1))*aimag(transz(k))*aimag(transz(k)))&
+                     -(aimag(zk2(k))*aimag(transy(2*j-1))*aimag(transy(2*j-1))*aimag(transx(i))*aimag(transx(i)))&
+                     -aimag(cw22(i,j,k))*aimag(cw22(i,j,k))*xa0*xa0-&
+                     xa1*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j-1,k))+aimag(cw22(i,j,k))*aimag(cw22(i,j+1,k))), kind=mytype)
+                a2(i,j,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(2*j), kind=mytype)*real(transy(2*j), kind=mytype)*&
+                     real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
+                     -(real(zk2(k), kind=mytype)*real(transy(2*j), kind=mytype)*real(transy(2*j), kind=mytype)*&
+                     real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
+                     -real(cw2(i,j,k), kind=mytype)*real(cw2(i,j,k), kind=mytype)*xa0*xa0-&
+                     xa1*xa1*(real(cw2(i,j,k), kind=mytype)*real(cw2(i,j-1,k), kind=mytype)+real(cw2(i,j,k), kind=mytype)*&
+                     real(cw2(i,j+1,k), kind=mytype)),&
+                     -(aimag(xk2(i))*aimag(transy(2*j))*aimag(transy(2*j))*aimag(transz(k))*aimag(transz(k)))&
+                     -(aimag(zk2(k))*aimag(transy(2*j))*aimag(transy(2*j))*aimag(transx(i))*aimag(transx(i)))&
+                     -aimag(cw2(i,j,k))*aimag(cw2(i,j,k))*xa0*xa0-&
+                     xa1*xa1*(aimag(cw2(i,j,k))*aimag(cw2(i,j-1,k))+aimag(cw2(i,j,k))*aimag(cw2(i,j+1,k))), kind=mytype)
+             enddo
+          enddo
+          do i=sp%yst(1),sp%yen(1)
+             a(i,1,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(1), kind=mytype)*real(transy(1), kind=mytype)*&
+                  real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
+                  -(real(zk2(k), kind=mytype)*real(transy(1), kind=mytype)*real(transy(1), kind=mytype)*&
+                  real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
+                  -real(cw22(i,1,k), kind=mytype)*real(cw22(i,1,k), kind=mytype)*xa0*xa0-xa1*xa1*(real(cw22(i,1,k), kind=mytype)*&
+                  real(cw22(i,2,k), kind=mytype)),&
+                  -(aimag(xk2(i))*aimag(transy(1))*aimag(transy(1))*aimag(transz(k))*aimag(transz(k)))&
+                  -(aimag(zk2(k))*aimag(transy(1))*aimag(transy(1))*aimag(transx(i))*aimag(transx(i)))&
+                  -aimag(cw22(i,1,k))*aimag(cw22(i,1,k))*xa0*xa0-xa1*xa1*(aimag(cw22(i,1,k))*aimag(cw22(i,2,k))), kind=mytype)
+             a(i,ny/2,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(ny-2), kind=mytype)*real(transy(ny-2), kind=mytype)&
+                  *real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
+                  -(real(zk2(k), kind=mytype)*real(transy(ny-2), kind=mytype)*real(transy(ny-2), kind=mytype)*&
+                  real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
+                  -real(cw22(i,ny/2,k), kind=mytype)*real(cw22(i,ny/2,k), kind=mytype)*xa0*xa0-&
+                  xa1*xa1*(real(cw22(i,ny/2,k), kind=mytype)*real(cw22(i,ny/2-1,k), kind=mytype)),&
+                  -(aimag(xk2(i))*aimag(transy(ny-2))*aimag(transy(ny-2))*aimag(transz(k))*aimag(transz(k)))&
+                  -(aimag(zk2(k))*aimag(transy(ny-2))*aimag(transy(ny-2))*aimag(transx(i))*aimag(transx(i)))&
+                  -aimag(cw22(i,ny/2,k))*aimag(cw22(i,ny/2,k))*xa0*xa0-&
+                  xa1*xa1*(aimag(cw22(i,ny/2,k))*aimag(cw22(i,ny/2-1,k))), kind=mytype)
+             a2(i,1,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(2), kind=mytype)*real(transy(2), kind=mytype)*&
+                  real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
+                  -(real(zk2(k), kind=mytype)*real(transy(2), kind=mytype)*real(transy(2), kind=mytype)*&
+                  real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
+                  -real(cw2(i,1,k), kind=mytype)*real(cw2(i,1,k), kind=mytype)*(xa0-xa1)*(xa0+xa1)-xa1*xa1*(real(cw2(i,1,k), kind=mytype)*&
+                  real(cw2(i,2,k), kind=mytype)),&
+                  -(aimag(xk2(i))*aimag(transy(2))*aimag(transy(2))*aimag(transz(k))*aimag(transz(k)))&
+                  -(aimag(zk2(k))*aimag(transy(2))*aimag(transy(2))*aimag(transx(i))*aimag(transx(i)))&
+                  -aimag(cw2(i,1,k))*aimag(cw2(i,1,k))*(xa0-xa1)*(xa0+xa1)-xa1*xa1*(aimag(cw2(i,1,k))*aimag(cw2(i,2,k))), kind=mytype)
+             a2(i,ny/2,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(ny-1), kind=mytype)*real(transy(ny-1), kind=mytype)*&
+                  real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
+                  -(real(zk2(k), kind=mytype)*real(transy(ny-1), kind=mytype)*real(transy(ny-1), kind=mytype)*&
+                  real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
+                  -real(cw2(i,ny/2,k), kind=mytype)*real(cw2(i,ny/2,k), kind=mytype)*(xa0+xa1)*(xa0+xa1)-&
+                  xa1*xa1*(real(cw2(i,ny/2,k), kind=mytype)*real(cw2(i,ny/2-1,k), kind=mytype)),&
+                  -(aimag(xk2(i))*aimag(transy(ny-1))*aimag(transy(ny-1))*aimag(transz(k))*aimag(transz(k)))&
+                  -(aimag(zk2(k))*aimag(transy(ny-1))*aimag(transy(ny-1))*aimag(transx(i))*aimag(transx(i)))&
+                  -aimag(cw2(i,ny/2,k))*aimag(cw2(i,ny/2,k))*(xa0+xa1)*(xa0+xa1)-&
+                  xa1*xa1*(aimag(cw2(i,ny/2,k))*aimag(cw2(i,ny/2-1,k))), kind=mytype)
+          enddo
+       enddo
+
+
+
+
+
+       !sup diag +1
+       do k=sp%yst(3),sp%yen(3)
+          do j=2,ny/2-1
+             do i=sp%yst(1),sp%yen(1)   
+                a(i,j,k,4)=cmplx(xa0*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j+1,k), kind=mytype)+real(cw22(i,j+1,k), kind=mytype)*&
+                     real(cw22(i,j+1,k), kind=mytype)),&
+                     xa0*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j+1,k))+aimag(cw22(i,j+1,k))*aimag(cw22(i,j+1,k))), kind=mytype)
+                a2(i,j,k,4)=cmplx(xa0*xa1*(real(cw2(i,j,k), kind=mytype)*real(cw2(i,j+1,k), kind=mytype)+real(cw2(i,j+1,k), kind=mytype)*&
+                     real(cw2(i,j+1,k), kind=mytype)),&
+                     xa0*xa1*(aimag(cw2(i,j,k))*aimag(cw2(i,j+1,k))+aimag(cw2(i,j+1,k))*aimag(cw2(i,j+1,k))), kind=mytype)
+             enddo
+          enddo
+          do i=sp%yst(1),sp%yen(1)   
+             a(i,1,k,4)=2.*cmplx((xa0*xa1*(real(cw22(i,1,k), kind=mytype)*real(cw22(i,2,k), kind=mytype)+real(cw22(i,2,k), kind=mytype)*&
+                  real(cw22(i,2,k), kind=mytype))),&
+                  (xa0*xa1*(aimag(cw22(i,1,k))*aimag(cw22(i,2,k))+aimag(cw22(i,2,k))*aimag(cw22(i,2,k)))), kind=mytype)
+             a2(i,1,k,4)=cmplx((xa0-xa1)*xa1*(real(cw2(i,1,k), kind=mytype)*real(cw2(i,2,k), kind=mytype))&
+                  +xa0*xa1*(real(cw2(i,2,k), kind=mytype)*real(cw2(i,2,k), kind=mytype)),&
+                  (xa0-xa1)*xa1*(aimag(cw2(i,1,k))*aimag(cw2(i,2,k)))&
+                  +xa0*xa1*(aimag(cw2(i,2,k))*aimag(cw2(i,2,k))), kind=mytype)
+             a2(i,ny/2-1,k,4)=cmplx(xa0*xa1*(real(cw2(i,ny/2-1,k), kind=mytype)*real(cw2(i,ny/2,k), kind=mytype))&
+                  +(xa0+xa1)*xa1*(real(cw2(i,ny/2,k), kind=mytype)*real(cw2(i,ny/2,k), kind=mytype)),&
+                  xa0*xa1*(aimag(cw2(i,ny/2-1,k))*aimag(cw2(i,ny/2,k)))&
+                  +(xa0+xa1)*xa1*(aimag(cw2(i,ny/2,k))*aimag(cw2(i,ny/2,k))), kind=mytype)
+             a2(i,ny/2,k,4)=0.
+          enddo
+       enddo
+
+
+
+       !sup diag +2
+       do k=sp%yst(3),sp%yen(3)
+          do i=sp%yst(1),sp%yen(1)   
+             do j=1,ny/2-2
+                a(i,j,k,5)=cmplx(-real(cw22(i,j+1,k), kind=mytype)*real(cw22(i,j+2,k), kind=mytype)*xa1*xa1,&
+                     -aimag(cw22(i,j+1,k))*aimag(cw22(i,j+2,k))*xa1*xa1, kind=mytype)
+                a2(i,j,k,5)=cmplx(-real(cw2(i,j+1,k), kind=mytype)*real(cw2(i,j+2,k), kind=mytype)*xa1*xa1,&
+                     -aimag(cw2(i,j+1,k))*aimag(cw2(i,j+2,k))*xa1*xa1, kind=mytype)
+             enddo
+             a(i,1,k,5)=cmplx(real(a(i,1,k,5), kind=mytype)*2.,aimag(a(i,1,k,5))*2., kind=mytype)
+             a(i,ny/2-1,k,5)=0.
+             a(i,ny/2,k,5)=0.
+             a2(i,ny/2-1,k,5)=0.
+             a2(i,ny/2,k,5)=0. 
+          enddo
+       enddo
+
+
+
+       !inf diag -1
+       do k=sp%yst(3),sp%yen(3)
+          do i=sp%yst(1),sp%yen(1)   
+             do j=2,ny/2
+                a(i,j,k,2)=cmplx(xa0*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j-1,k), kind=mytype)+real(cw22(i,j-1,k), kind=mytype)*&
+                     real(cw22(i,j-1,k), kind=mytype)),&
+                     xa0*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j-1,k))+aimag(cw22(i,j-1,k))*aimag(cw22(i,j-1,k))), kind=mytype)
+                a2(i,j,k,2)=cmplx(xa0*xa1*(real(cw2(i,j,k), kind=mytype)*real(cw2(i,j-1,k), kind=mytype)+real(cw2(i,j-1,k), kind=mytype)*&
+                     real(cw2(i,j-1,k), kind=mytype)),&
+                     xa0*xa1*(aimag(cw2(i,j,k))*aimag(cw2(i,j-1,k))+aimag(cw2(i,j-1,k))*aimag(cw2(i,j-1,k))), kind=mytype)
+             enddo
+             a(i,1,k,2)=0.
+             a2(i,1,k,2)=0.
+             a2(i,2,k,2)=cmplx(xa0*xa1*(real(cw2(i,2,k), kind=mytype)*real(cw2(i,1,k), kind=mytype))&
+                  +(xa0+xa1)*xa1*(real(cw2(i,1,k), kind=mytype)*real(cw2(i,1,k), kind=mytype)),&
+                  xa0*xa1*(aimag(cw2(i,2,k))*aimag(cw2(i,1,k)))&
+                  +(xa0+xa1)*xa1*(aimag(cw2(i,1,k))*aimag(cw2(i,1,k))), kind=mytype)
+             a2(i,ny/2,k,2)=cmplx((xa0+xa1)*xa1*(real(cw2(i,ny/2,k), kind=mytype)*real(cw2(i,ny/2-1,k), kind=mytype))&
+                  +xa0*xa1*(real(cw2(i,ny/2-1,k), kind=mytype)*real(cw2(i,ny/2-1,k), kind=mytype)),&
+                  (xa0+xa1)*xa1*(aimag(cw2(i,ny/2,k))*aimag(cw2(i,ny/2-1,k)))&
+                  +xa0*xa1*(aimag(cw2(i,ny/2-1,k))*aimag(cw2(i,ny/2-1,k))), kind=mytype)
+          enddo
+       enddo
+       !inf diag -2
+       do k=sp%yst(3),sp%yen(3)
+          do i=sp%yst(1),sp%yen(1)  
+             do j=3,ny/2
+                a(i,j,k,1)=cmplx(-real(cw22(i,j-1,k), kind=mytype)*real(cw22(i,j-2,k), kind=mytype)*xa1*xa1,&
+                     -aimag(cw22(i,j-1,k))*aimag(cw22(i,j-2,k))*xa1*xa1, kind=mytype)
+                a2(i,j,k,1)=cmplx(-real(cw2(i,j-1,k), kind=mytype)*real(cw2(i,j-2,k), kind=mytype)*xa1*xa1,&
+                     -aimag(cw2(i,j-1,k))*aimag(cw2(i,j-2,k))*xa1*xa1, kind=mytype)
+             enddo
+             a(i,1,k,1)=0.
+             a(i,2,k,1)=0.
+             a2(i,1,k,1)=0.
+             a2(i,2,k,1)=0.
+          enddo
+       enddo
+       !not to have a singular matrice
+       do k=sp%yst(3),sp%yen(3)
+          do i=sp%yst(1),sp%yen(1)
+             if ((real(xk2(i), kind=mytype)==0.).and.(real(zk2(k), kind=mytype)==0)) then
+                a(i,1,k,3)=cmplx(1.,1., kind=mytype)
+                a(i,1,k,4)=0.
+                a(i,1,k,5)=0.
+             endif
+          enddo
+       enddo
+
+    else
+       xa0=alpha/pi+1./2./beta/pi
+       xa1=-1./4./beta/pi 
+       !
+       !construction of the pentadiagonal matrice
+       !   
+       do k=sp%yst(3),sp%yen(3)
+          do j=1,nym
+             do i=sp%yst(1),sp%yen(1)
+                cw22(i,j,k)=cmplx(real(yky(j), kind=mytype)*real(transx(i), kind=mytype)*real(transz(k), kind=mytype),&
+                     aimag(yky(j))*aimag(transx(i))*aimag(transz(k)), kind=mytype)
+             enddo
+          enddo
+       enddo
+
+       !main diagonal 
+       do k=sp%yst(3),sp%yen(3)
+          do j=2,nym-1
+             do i=sp%yst(1),sp%yen(1)
+                a3(i,j,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(j), kind=mytype)*real(transy(j), kind=mytype)*&
+                     real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
+                     -(real(zk2(k), kind=mytype)*real(transy(j), kind=mytype)*real(transy(j), kind=mytype)*&
+                     real(transx(i), kind=mytype)*real(transx(i), kind=mytype))&
+                     -real(cw22(i,j,k), kind=mytype)*real(cw22(i,j,k), kind=mytype)*xa0*xa0-&
+                     xa1*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j-1,k), kind=mytype)+real(cw22(i,j,k), kind=mytype)*&
+                     real(cw22(i,j+1,k), kind=mytype)),&
+                     -(aimag(xk2(i))*aimag(transy(j))*aimag(transy(j))*aimag(transz(k))*aimag(transz(k)))&
+                     -(aimag(zk2(k))*aimag(transy(j))*aimag(transy(j))*aimag(transx(i))*aimag(transx(i)))&
+                     -aimag(cw22(i,j,k))*aimag(cw22(i,j,k))*xa0*xa0-&
+                     xa1*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j-1,k))+aimag(cw22(i,j,k))*aimag(cw22(i,j+1,k))), kind=mytype)
+             enddo
+          enddo
+       enddo
+
+       do k=sp%yst(3),sp%yen(3)
+          do i=sp%yst(1),sp%yen(1)
+             a3(i,1,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(1), kind=mytype)*real(transy(1), kind=mytype)*&
+                  real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
+                  -(real(zk2(k), kind=mytype)*real(transy(1), kind=mytype)*real(transy(1), kind=mytype)*real(transx(i), kind=mytype)*&
+                  real(transx(i), kind=mytype))&
+                  -real(cw22(i,1,k), kind=mytype)*real(cw22(i,1,k), kind=mytype)*xa0*xa0-xa1*xa1*(real(cw22(i,1,k), kind=mytype)*&
+                  real(cw22(i,2,k), kind=mytype)),&
+                  -(aimag(xk2(i))*aimag(transy(1))*aimag(transy(1))*aimag(transz(k))*aimag(transz(k)))&
+                  -(aimag(zk2(k))*aimag(transy(1))*aimag(transy(1))*aimag(transx(i))*aimag(transx(i)))&
+                  -aimag(cw22(i,1,k))*aimag(cw22(i,1,k))*xa0*xa0-xa1*xa1*(aimag(cw22(i,1,k))*aimag(cw22(i,2,k))), kind=mytype)
+             a3(i,nym,k,3)=cmplx(-(real(xk2(i), kind=mytype)*real(transy(nym), kind=mytype)*real(transy(nym), kind=mytype)*&
+                  real(transz(k), kind=mytype)*real(transz(k), kind=mytype))&
+                  -(real(zk2(k), kind=mytype)*real(transy(nym), kind=mytype)*real(transy(nym), kind=mytype)*real(transx(i), kind=mytype)*&
+                  real(transx(i), kind=mytype))&
+                  -real(cw22(i,nym,k), kind=mytype)*real(cw22(i,nym,k), kind=mytype)*xa0*xa0-&
+                  xa1*xa1*(real(cw22(i,nym,k), kind=mytype)*real(cw22(i,nym-1,k), kind=mytype)),&
+                  -(aimag(xk2(i))*aimag(transy(nym))*aimag(transy(nym))*aimag(transz(k))*aimag(transz(k)))&
+                  -(aimag(zk2(k))*aimag(transy(nym))*aimag(transy(nym))*aimag(transx(i))*aimag(transx(i)))&
+                  -aimag(cw22(i,nym,k))*aimag(cw22(i,nym,k))*xa0*xa0-&
+                  xa1*xa1*(aimag(cw22(i,nym,k))*aimag(cw22(i,nym-1,k))), kind=mytype)
+          enddo
+       enddo
+
+
+
+
+       !sup diag +1
+       do k=sp%yst(3),sp%yen(3)
+          do i=sp%yst(1),sp%yen(1)
+             do j=2,nym-1
+                a3(i,j,k,4)=cmplx(xa0*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j+1,k), kind=mytype)+real(cw22(i,j+1,k), kind=mytype)*&
+                     real(cw22(i,j+1,k), kind=mytype)),&
+                     xa0*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j+1,k))+aimag(cw22(i,j+1,k))*aimag(cw22(i,j+1,k))), kind=mytype)
+             enddo
+             a3(i,1,k,4)=cmplx((xa0*xa1*(real(cw22(i,1,k), kind=mytype)*real(cw22(i,2,k), kind=mytype)+real(cw22(i,2,k), kind=mytype)*&
+                  real(cw22(i,2,k), kind=mytype))),&
+                  (xa0*xa1*(aimag(cw22(i,1,k))*aimag(cw22(i,2,k))+aimag(cw22(i,2,k))*aimag(cw22(i,2,k)))), kind=mytype)
+          enddo
+       enddo
+       !sup diag +2
+       do k=sp%yst(3),sp%yen(3)
+          do i=sp%yst(1),sp%yen(1)
+             do j=1,nym-2
+                a3(i,j,k,5)=cmplx(-real(cw22(i,j+1,k), kind=mytype)*real(cw22(i,j+2,k), kind=mytype)*xa1*xa1,&
+                     -aimag(cw22(i,j+1,k))*aimag(cw22(i,j+2,k))*xa1*xa1, kind=mytype)
+             enddo
+             !a3(i,1,k,5)=a3(i,1,k,5)*2.
+             !a3(i,1,k,5)=0.
+             a3(i,nym-1,k,5)=0.
+             a3(i,nym,k,5)=0.
+          enddo
+       enddo
+
+
+       !inf diag -1
+       do k=sp%yst(3),sp%yen(3)
+          do i=sp%yst(1),sp%yen(1)
+             do j=2,nym
+                a3(i,j,k,2)=cmplx(xa0*xa1*(real(cw22(i,j,k), kind=mytype)*real(cw22(i,j-1,k), kind=mytype)+real(cw22(i,j-1,k), kind=mytype)*&
+                     real(cw22(i,j-1,k), kind=mytype)),&
+                     xa0*xa1*(aimag(cw22(i,j,k))*aimag(cw22(i,j-1,k))+aimag(cw22(i,j-1,k))*aimag(cw22(i,j-1,k))), kind=mytype)
+             enddo
+             a3(i,1,k,2)=0.
+          enddo
+       enddo
+       !inf diag -2
+       do k=sp%yst(3),sp%yen(3)
+          do i=sp%yst(1),sp%yen(1)
+             do j=3,nym
+                a3(i,j,k,1)=cmplx(-real(cw22(i,j-1,k), kind=mytype)*real(cw22(i,j-2,k), kind=mytype)*xa1*xa1,&
+                     -aimag(cw22(i,j-1,k))*aimag(cw22(i,j-2,k))*xa1*xa1, kind=mytype)
+             enddo
+             a3(i,1,k,1)=0.
+             a3(i,2,k,1)=0.
+          enddo
+       enddo
+
+       !not to have a singular matrice
+       !   do k=sp%yst(3),sp%yen(3)
+       !   do i=sp%yst(1),sp%yen(1)
+       !      if ((xkx(i)==0.).and.(zkz(k)==0)) then
+       if (nrank==0) then
+          a3(1,1,1,3)=cmplx(1.,1., kind=mytype)
+          a3(1,1,1,4)=0.
+          a3(1,1,1,5)=0.
+       endif
+       !      endif
+       !   enddo
+       !   enddo
+    endif
+
+
+
+
+
+    return
+  end subroutine matrice_refinement
 
 end module decomp_2d_poisson
 
diff --git a/src/post.f90 b/src/post.f90
index 256df90b..3574cf64 100644
--- a/src/post.f90
+++ b/src/post.f90
@@ -38,7 +38,7 @@ PROGRAM post
   phi1=zero; phim1=zero
   diss1=zero; dissm1=zero
   pre1=zero; prem1=zero
-  
+
   read_phi=0; read_u=0; read_ibm=0
   open(10,file='post.prm',status='unknown',form='formatted')
   read (10,'(A1)') a
@@ -76,7 +76,7 @@ PROGRAM post
   ttsize=(read_phi*numscalar+read_u*3)*nx*ny*nz
   tstart=0.;t1=0.;trank=0.;tranksum=0.;ttotal=0.
   call cpu_time(tstart)
-  
+
   do ii=1, nt
      call cpu_time(t1)
      ifile = (ii-1)*icrfile+file1
diff --git a/src/schemes.f90 b/src/schemes.f90
index b2a834fd..09d24311 100644
--- a/src/schemes.f90
+++ b/src/schemes.f90
@@ -87,7 +87,7 @@ subroutine schemes()
   if (nclz1.eq.1.and.nclzn.eq.2) derzz => derzz_12
   if (nclz1.eq.2.and.nclzn.eq.1) derzz => derzz_21
   if (nclz1.eq.2.and.nclzn.eq.2) derzz => derzz_22
-  
+
   call first_derivative(alfa1x,af1x,bf1x,cf1x,df1x,alfa2x,af2x,alfanx,afnx,bfnx,&
        cfnx,dfnx,alfamx,afmx,alfaix,afix,bfix,&
        ffx,fsx,fwx,ffxp,fsxp,fwxp,dx,nx,nclx1,nclxn)
@@ -226,38 +226,38 @@ end subroutine schemes
 !*******************************************************************
 !
 subroutine prepare (b,c,f,s,w,n)
-! 
-!*******************************************************************
+  ! 
+  !*******************************************************************
 
-use decomp_2d, only : mytype
-use param, only : one
+  use decomp_2d, only : mytype
+  use param, only : one
 
-implicit none
+  implicit none
 
-integer :: i,n
-real(mytype), dimension(n) :: b,c,f,s,w
+  integer :: i,n
+  real(mytype), dimension(n) :: b,c,f,s,w
 
-do i=1,n
-   w(i)=c(i)
-enddo
-do i=2,n
-   s(i)=b(i-1)/w(i-1)
-   w(i)=w(i)-f(i-1)*s(i)
-enddo
-do i=1,n
-   w(i)=one/w(i)
-enddo
+  do i=1,n
+     w(i)=c(i)
+  enddo
+  do i=2,n
+     s(i)=b(i-1)/w(i-1)
+     w(i)=w(i)-f(i-1)*s(i)
+  enddo
+  do i=1,n
+     w(i)=one/w(i)
+  enddo
 
-return
+  return
 end subroutine prepare
 
 !*******************************************************************
 !
 subroutine first_derivative(alfa1,af1,bf1,cf1,df1,alfa2,af2,alfan,afn,bfn,&
-       cfn,dfn,alfam,afm,alfai,afi,bfi,&
-       ff,fs,fw,ffp,fsp,fwp,d,n,ncl1,ncln)
-! 
-!*******************************************************************
+     cfn,dfn,alfam,afm,alfai,afi,bfi,&
+     ff,fs,fw,ffp,fsp,fwp,d,n,ncl1,ncln)
+  ! 
+  !*******************************************************************
 
   use decomp_2d, only : mytype, nrank
   use param
@@ -274,7 +274,7 @@ subroutine first_derivative(alfa1,af1,bf1,cf1,df1,alfa2,af2,alfan,afn,bfn,&
 
   ff=zero;fs=zero;fw=zero;ffp=zero;fsp=zero;fwp=zero
   fb=zero;fc=zero
-  
+
   if (ifirstder==1) then    ! Second-order central
      alfai= zero
      afi  = one/(two*d)
@@ -392,13 +392,13 @@ end subroutine first_derivative
 
 !*******************************************************************
 subroutine second_derivative(alsa1,as1,bs1,&
-       cs1,ds1,alsa2,as2,alsan,asn,bsn,csn,dsn,alsam,&
-       asm,alsa3,as3,bs3,alsat,ast,bst,&
-       alsa4,as4,bs4,cs4,&
-       alsatt,astt,bstt,cstt,&
-       alsai,asi,bsi,csi,dsi,&
-       sf,ss,sw,sfp,ssp,swp,d2,n,ncl1,ncln)
-!*******************************************************************
+     cs1,ds1,alsa2,as2,alsan,asn,bsn,csn,dsn,alsam,&
+     asm,alsa3,as3,bs3,alsat,ast,bst,&
+     alsa4,as4,bs4,cs4,&
+     alsatt,astt,bstt,cstt,&
+     alsai,asi,bsi,csi,dsi,&
+     sf,ss,sw,sfp,ssp,swp,d2,n,ncl1,ncln)
+  !*******************************************************************
 
   use decomp_2d, only : mytype, nrank
   use param
@@ -683,8 +683,8 @@ subroutine interpolation(dx,nxm,nx,nclx1,nclxn,&
      cfi6,cci6,cbi6,cfip6,csip6,cwip6,csi6,&
      cwi6,cifi6,cici6,cibi6,cifip6,&
      cisip6,ciwip6,cisi6,ciwi6)
-! 
-!*******************************************************************
+  ! 
+  !*******************************************************************
 
   use decomp_2d, only : mytype
   use param, only : ipinter, zero, one, two, three, nine, ten
@@ -696,10 +696,10 @@ subroutine interpolation(dx,nxm,nx,nclx1,nclxn,&
   real(mytype) :: alcaix6,acix6,bcix6
   real(mytype) :: ailcaix6,aicix6,bicix6,cicix6,dicix6
   real(mytype),dimension(nxm) :: cfx6,ccx6,cbx6,cfxp6,ciwxp6,csxp6,&
-  cwxp6,csx6,cwx6,cifx6,cicx6,cisx6
+       cwxp6,csx6,cwx6,cifx6,cicx6,cisx6
   real(mytype),dimension(nxm) :: cibx6,cifxp6,cisxp6,ciwx6
   real(mytype),dimension(nx) :: cfi6,cci6,cbi6,cfip6,csip6,cwip6,csi6,&
-  cwi6,cifi6,cici6,cibi6,cifip6
+       cwi6,cifi6,cici6,cibi6,cifip6
   real(mytype),dimension(nx) :: cisip6,ciwip6,cisi6,ciwi6
 
   integer :: i
diff --git a/src/statistics.f90 b/src/statistics.f90
index 2892a8ba..bf51251f 100644
--- a/src/statistics.f90
+++ b/src/statistics.f90
@@ -260,14 +260,14 @@ subroutine CONVERGENCE_STATISTIC2(ux1,ep1,tik1,tik2,tak1,tak2)
 
         if (nrank .eq. 0) then
 
-          print *,'RMS=',rms1
+           print *,'RMS=',rms1
 
-          write(filename,"('stats/rms',I8.8)") itime
-          open(67,file=trim(filename),status='unknown',form='formatted')
-          write(67,"(2E14.6,I14)") t,rms1,itime
-          close(67)
+           write(filename,"('stats/rms',I8.8)") itime
+           open(67,file=trim(filename),status='unknown',form='formatted')
+           write(67,"(2E14.6,I14)") t,rms1,itime
+           close(67)
 
-	  rms1=zero
+           rms1=zero
 
         end if
      endif
diff --git a/src/tools.f90 b/src/tools.f90
index 89923bcf..a44ba762 100644
--- a/src/tools.f90
+++ b/src/tools.f90
@@ -125,146 +125,146 @@ end subroutine stabiltemp
 !===================================================
 subroutine cfl_compute(uxmax,uymax,uzmax)
 
-use param
-use variables
-use var
-    
-implicit none
-
-real(mytype),intent(in) :: uxmax,uymax,uzmax
-real(mytype) :: cfl_x_adv,cfl_x_diff,cfl_y_adv,cfl_y_diff,cfl_z_adv,cfl_z_diff
-real(mytype) :: cfl_conv_lim, cfl_diff_lim
-real(mytype) :: sigma_conv(3), sigma_diff(3)
-real(mytype) :: visc
-
-! Set the constants (this is true for periodic boundaries)
-sigma_conv=[0.0, sqrt(3.0), 2.85]
-sigma_diff=[2.0, 2.5, 2.9]
-
-if(jles==0) then
-visc=xnu
-elseif (jles==1) then
-visc=20*fpi2*xnu
-endif
-
-! This is considering 1D peridic boundaries
-! Do x-direction
-cfl_x_adv=abs(uxmax)*dt/dx
-cfl_x_diff=visc*dt/dx**2.0
-! Do y-direction
-cfl_y_adv=abs(uymax)*dt/dy
-cfl_y_diff=visc*dt/dy**2.0
-! Do z-direction
-cfl_z_adv=abs(uzmax)*dt/dz
-cfl_z_diff=visc*dt/dz**2.0
-
-! So far we will focus on uniform grids
-if(nrank==0) then
-    write(*,*) ' '
-    write(*,1002) cfl_x_adv, cfl_x_diff
-1002  format('CFL x-direction (Adv and Diff) =',F9.4,',',F9.4)
-    write(*,1003) cfl_y_adv, cfl_y_diff
-1003  format('CFL y-direction (Adv and Diff) =',F9.4,',',F9.4)
-    write(*,1004) cfl_z_adv, cfl_z_diff
-1004  format('CFL z-direction (Adv and Diff) =',F9.4,',',F9.4)
-    cfl_conv_lim=sigma_conv(itimescheme)/sqrt(3.0)
-    cfl_diff_lim=sigma_diff(itimescheme)/6.0
-    write(*,1005) cfl_conv_lim, cfl_diff_lim
-    write(*,*) ' '
-1005  format('CFL limits (Adv and Diff) : ',F9.4,',',F9.4)
-endif
-
-end subroutine
+  use param
+  use variables
+  use var
+
+  implicit none
+
+  real(mytype),intent(in) :: uxmax,uymax,uzmax
+  real(mytype) :: cfl_x_adv,cfl_x_diff,cfl_y_adv,cfl_y_diff,cfl_z_adv,cfl_z_diff
+  real(mytype) :: cfl_conv_lim, cfl_diff_lim
+  real(mytype) :: sigma_conv(3), sigma_diff(3)
+  real(mytype) :: visc
+
+  ! Set the constants (this is true for periodic boundaries)
+  sigma_conv=[0.0, sqrt(3.0), 2.85]
+  sigma_diff=[2.0, 2.5, 2.9]
+
+  if(jles==0) then
+     visc=xnu
+  elseif (jles==1) then
+     visc=20*fpi2*xnu
+  endif
+
+  ! This is considering 1D peridic boundaries
+  ! Do x-direction
+  cfl_x_adv=abs(uxmax)*dt/dx
+  cfl_x_diff=visc*dt/dx**2.0
+  ! Do y-direction
+  cfl_y_adv=abs(uymax)*dt/dy
+  cfl_y_diff=visc*dt/dy**2.0
+  ! Do z-direction
+  cfl_z_adv=abs(uzmax)*dt/dz
+  cfl_z_diff=visc*dt/dz**2.0
+
+  ! So far we will focus on uniform grids
+  if(nrank==0) then
+     write(*,*) ' '
+     write(*,1002) cfl_x_adv, cfl_x_diff
+1002 format('CFL x-direction (Adv and Diff) =',F9.4,',',F9.4)
+     write(*,1003) cfl_y_adv, cfl_y_diff
+1003 format('CFL y-direction (Adv and Diff) =',F9.4,',',F9.4)
+     write(*,1004) cfl_z_adv, cfl_z_diff
+1004 format('CFL z-direction (Adv and Diff) =',F9.4,',',F9.4)
+     cfl_conv_lim=sigma_conv(itimescheme)/sqrt(3.0)
+     cfl_diff_lim=sigma_diff(itimescheme)/6.0
+     write(*,1005) cfl_conv_lim, cfl_diff_lim
+     write(*,*) ' '
+1005 format('CFL limits (Adv and Diff) : ',F9.4,',',F9.4)
+  endif
+
+end subroutine cfl_compute
 !*******************************************************************
 subroutine test_scalar_min_max(phi)
 
-USE decomp_2d
-USE decomp_2d_poisson
-USE variables
-USE param
-USE var
-USE MPI
+  USE decomp_2d
+  USE decomp_2d_poisson
+  USE variables
+  USE param
+  USE var
+  USE MPI
 
-implicit none
+  implicit none
 
-integer :: code,ierror,ijk,is
-real(mytype) :: phimax,phimin,phimax1,phimin1
-real(mytype),dimension(xsize(1),xsize(2),xsize(3),numscalar) :: phi
+  integer :: code,ierror,ijk,is
+  real(mytype) :: phimax,phimin,phimax1,phimin1
+  real(mytype),dimension(xsize(1),xsize(2),xsize(3),numscalar) :: phi
 
-do is=1, numscalar
- phimax=-1609.;phimin=1609.
- do ijk=1,xsize(1)*xsize(2)*xsize(3)
-   if (phi(ijk,1,1,is).gt.phimax) phimax=phi(ijk,1,1,is)
-   if (phi(ijk,1,1,is).lt.phimin) phimin=phi(ijk,1,1,is)
- enddo
+  do is=1, numscalar
+     phimax=-1609.;phimin=1609.
+     do ijk=1,xsize(1)*xsize(2)*xsize(3)
+        if (phi(ijk,1,1,is).gt.phimax) phimax=phi(ijk,1,1,is)
+        if (phi(ijk,1,1,is).lt.phimin) phimin=phi(ijk,1,1,is)
+     enddo
 
- call MPI_REDUCE(phimax,phimax1,1,real_type,MPI_MAX,0,MPI_COMM_WORLD,code)
- call MPI_REDUCE(phimin,phimin1,1,real_type,MPI_MIN,0,MPI_COMM_WORLD,code)
+     call MPI_REDUCE(phimax,phimax1,1,real_type,MPI_MAX,0,MPI_COMM_WORLD,code)
+     call MPI_REDUCE(phimin,phimin1,1,real_type,MPI_MIN,0,MPI_COMM_WORLD,code)
 
- if (nrank.eq.0) then
+     if (nrank.eq.0) then
 
-  print *,'Phi'//char(48+is)//' min max=', real(phimin1,4), real(phimax1,4)
+        print *,'Phi'//char(48+is)//' min max=', real(phimin1,4), real(phimax1,4)
 
-  if (abs(phimax1).ge.10.) then !if phi control turned off
-   stop 'Scalar diverged! FATALITY!'
-  endif
- endif
+        if (abs(phimax1).ge.10.) then !if phi control turned off
+           stop 'Scalar diverged! FATALITY!'
+        endif
+     endif
 
-enddo
+  enddo
 
-return
+  return
 end subroutine test_scalar_min_max
 !*******************************************************************
 subroutine test_speed_min_max(ux,uy,uz)
 
-USE decomp_2d
-USE decomp_2d_poisson
-USE variables
-USE param
-USE var
-USE MPI
-
-implicit none
-
-integer :: code,ierror,ijk
-real(mytype) :: uxmax,uymax,uzmax,uxmin,uymin,uzmin
-real(mytype) :: uxmax1,uymax1,uzmax1,uxmin1,uymin1,uzmin1
-real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: ux,uy,uz
-
-uxmax=-1609.;uymax=-1609.;uzmax=-1609.;uxmin=1609.;uymin=1609.;uzmin=1609.
-do ijk=1,xsize(1)*xsize(2)*xsize(3)
-   if ((iibm.eq.0).or.(ep1(ijk,1,1).eq.zero)) then
-      if (ux(ijk,1,1).gt.uxmax) uxmax=ux(ijk,1,1)
-      if (uy(ijk,1,1).gt.uymax) uymax=uy(ijk,1,1)
-      if (uz(ijk,1,1).gt.uzmax) uzmax=uz(ijk,1,1)
-      if (ux(ijk,1,1).lt.uxmin) uxmin=ux(ijk,1,1)
-      if (uy(ijk,1,1).lt.uymin) uymin=uy(ijk,1,1)
-      if (uz(ijk,1,1).lt.uzmin) uzmin=uz(ijk,1,1)
-   endif
-enddo
-
-call MPI_REDUCE(uxmax,uxmax1,1,real_type,MPI_MAX,0,MPI_COMM_WORLD,code)
-call MPI_REDUCE(uymax,uymax1,1,real_type,MPI_MAX,0,MPI_COMM_WORLD,code)
-call MPI_REDUCE(uzmax,uzmax1,1,real_type,MPI_MAX,0,MPI_COMM_WORLD,code)
-call MPI_REDUCE(uxmin,uxmin1,1,real_type,MPI_MIN,0,MPI_COMM_WORLD,code)
-call MPI_REDUCE(uymin,uymin1,1,real_type,MPI_MIN,0,MPI_COMM_WORLD,code)
-call MPI_REDUCE(uzmin,uzmin1,1,real_type,MPI_MIN,0,MPI_COMM_WORLD,code)
-
-if (nrank.eq.0) then
-
-  print *,'U,V,W min=',real(uxmin1,4),real(uymin1,4),real(uzmin1,4)
-  print *,'U,V,W max=',real(uxmax1,4),real(uymax1,4),real(uzmax1,4)
- !print *,'CFL=',real(abs(max(uxmax1,uymax1,uzmax1)*dt)/min(dx,dy,dz),4)
-
-  if((abs(uxmax1).ge.10.).OR.(abs(uymax1).ge.10.).OR.(abs(uzmax1).ge.10.)) then
-   stop 'Velocity diverged! FATALITY!'
-  endif
+  USE decomp_2d
+  USE decomp_2d_poisson
+  USE variables
+  USE param
+  USE var
+  USE MPI
+
+  implicit none
+
+  integer :: code,ierror,ijk
+  real(mytype) :: uxmax,uymax,uzmax,uxmin,uymin,uzmin
+  real(mytype) :: uxmax1,uymax1,uzmax1,uxmin1,uymin1,uzmin1
+  real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: ux,uy,uz
+
+  uxmax=-1609.;uymax=-1609.;uzmax=-1609.;uxmin=1609.;uymin=1609.;uzmin=1609.
+  do ijk=1,xsize(1)*xsize(2)*xsize(3)
+     if ((iibm.eq.0).or.(ep1(ijk,1,1).eq.zero)) then
+        if (ux(ijk,1,1).gt.uxmax) uxmax=ux(ijk,1,1)
+        if (uy(ijk,1,1).gt.uymax) uymax=uy(ijk,1,1)
+        if (uz(ijk,1,1).gt.uzmax) uzmax=uz(ijk,1,1)
+        if (ux(ijk,1,1).lt.uxmin) uxmin=ux(ijk,1,1)
+        if (uy(ijk,1,1).lt.uymin) uymin=uy(ijk,1,1)
+        if (uz(ijk,1,1).lt.uzmin) uzmin=uz(ijk,1,1)
+     endif
+  enddo
+
+  call MPI_REDUCE(uxmax,uxmax1,1,real_type,MPI_MAX,0,MPI_COMM_WORLD,code)
+  call MPI_REDUCE(uymax,uymax1,1,real_type,MPI_MAX,0,MPI_COMM_WORLD,code)
+  call MPI_REDUCE(uzmax,uzmax1,1,real_type,MPI_MAX,0,MPI_COMM_WORLD,code)
+  call MPI_REDUCE(uxmin,uxmin1,1,real_type,MPI_MIN,0,MPI_COMM_WORLD,code)
+  call MPI_REDUCE(uymin,uymin1,1,real_type,MPI_MIN,0,MPI_COMM_WORLD,code)
+  call MPI_REDUCE(uzmin,uzmin1,1,real_type,MPI_MIN,0,MPI_COMM_WORLD,code)
 
-endif
+  if (nrank.eq.0) then
 
- !if (mod(itime,imodulo).eq.0) call cfl_compute(uxmax,uymax,uzmax)
+     print *,'U,V,W min=',real(uxmin1,4),real(uymin1,4),real(uzmin1,4)
+     print *,'U,V,W max=',real(uxmax1,4),real(uymax1,4),real(uzmax1,4)
+     !print *,'CFL=',real(abs(max(uxmax1,uymax1,uzmax1)*dt)/min(dx,dy,dz),4)
 
-return
+     if((abs(uxmax1).ge.10.).OR.(abs(uymax1).ge.10.).OR.(abs(uzmax1).ge.10.)) then
+        stop 'Velocity diverged! FATALITY!'
+     endif
+
+  endif
+
+  !if (mod(itime,imodulo).eq.0) call cfl_compute(uxmax,uymax,uzmax)
+
+  return
 end subroutine test_speed_min_max
 !*******************************************************************
 subroutine restart(ux1,uy1,uz1,dux1,duy1,duz1,ep1,pp3,phi1,dphi1,px1,py1,pz1,iresflg)
@@ -496,7 +496,7 @@ subroutine stretching()
         if (istret==1) yeta(j)=real(j-1,mytype)*(one/nym)
         if (istret==2) yeta(j)=real(j-1,mytype)*(one/nym)-half
         if (istret==3) yeta(j)=real(j-1,mytype)*(half/nym)-half
-      den1=sqrt(alpha*beta+one)
+        den1=sqrt(alpha*beta+one)
         xnum=den1/sqrt(alpha/pi)/sqrt(beta)/sqrt(pi)
         den=two*sqrt(alpha/pi)*sqrt(beta)*pi*sqrt(pi)
         den3=((sin(pi*yeta(j)))*(sin(pi*yeta(j)))/beta/pi)+alpha/pi
@@ -564,106 +564,106 @@ subroutine stretching()
      enddo
   endif
 
-!Mapping!!, metric terms
-if (istret .ne. 3) then
-  do j=1,ny
-     ppy(j)=yly*(alpha/pi+(one/pi/beta)*sin(pi*yeta(j))*sin(pi*yeta(j)))
-     pp2y(j)=ppy(j)*ppy(j)
-     pp4y(j)=(-two/beta*cos(pi*yeta(j))*sin(pi*yeta(j)))
-  enddo
-  do j=1,ny
-     ppyi(j)=yly*(alpha/pi+(one/pi/beta)*sin(pi*yetai(j))*sin(pi*yetai(j)))
-     pp2yi(j)=ppyi(j)*ppyi(j)
-     pp4yi(j)=(-two/beta*cos(pi*yetai(j))*sin(pi*yetai(j)))
-  enddo
-endif
-
-if (istret .eq. 3) then
-do j=1,ny
-   ppy(j)=yly*(alpha/pi+(one/pi/beta)*sin(pi*yeta(j))*sin(pi*yeta(j)))
-   pp2y(j)=ppy(j)*ppy(j)
-   pp4y(j)=(-two/beta*cos(pi*yeta(j))*sin(pi*yeta(j)))/two
-enddo
-do j=1,ny
-   ppyi(j)=yly*(alpha/pi+(one/pi/beta)*sin(pi*yetai(j))*sin(pi*yetai(j)))
-   pp2yi(j)=ppyi(j)*ppyi(j)
-   pp4yi(j)=(-two/beta*cos(pi*yetai(j))*sin(pi*yetai(j)))/two
-enddo
-endif
-
-!   yp(1) = 0.0
-!   yp(2) = 0.01
-!   coeff0= 1.1
-!   blender1 = 0.0
-!   blender2 = 0.0
-!   do j=3,ny
-!!      yeta(j)=(j-1.)*(1./ny)
-!!      yp(j)=-beta*cos(pi*yeta(j))/sin(yeta(j)*pi)
-!
-!     if (yp(j-1).LE.3.5*1.0) then
-!       dy_plus_target = 8.0
-!       !Calculate re_tau guess somewhere
-!      dy_plus_current= (yp(j-1)-yp(j-2))*85.0
-!       !dy_plus_coeff is from 1 to 0
-!       dy_plus_coeff = (dy_plus_target-dy_plus_current)/dy_plus_target
-!       coeff = coeff0**dy_plus_coeff
-!
-!       dy_plus_coeff_old1 = dy_plus_coeff   !will be required for blenders
-!     else if (yp(j-1).GE.39.0*1.0) then
-!       dy_plus_target = 10.0
-!       !Calculate re_tau guess somewhere
-!       dy_plus_current= (yp(j-1)-yp(j-2))*85.0
-!       !dy_plus_coeff is from 1 to 0
-!       dy_plus_coeff = (dy_plus_target-dy_plus_current)/dy_plus_target
-!
-!       if (blender2.LT.1.0) blender2 = blender2 + 0.1   !carry the coeff smoothly
-!       coeff = coeff0**((1.0-blender2)*dy_plus_coeff_old2+blender2*dy_plus_coeff)
-!     else
-!       dy_plus_target = 80.0
-!       !Calculate re_tau guess somewhere
-!       dy_plus_current= (yp(j-1)-yp(j-2))*85.0
-!       !dy_plus_coeff is from 1 to 0
-!       dy_plus_coeff = (dy_plus_target-dy_plus_current)/dy_plus_target
-!
-!       if (blender1.LT.1.0) blender1 = blender1 + 0.1   !carry the coeff smoothly
-!       coeff = coeff0**((1.0-blender1)*dy_plus_coeff_old1+blender1*dy_plus_coeff)
-!
-!       dy_plus_coeff_old2 = dy_plus_coeff   !will be required for blenders
-!     endif
-!     yp(j) = yp(j-1)+(yp(j-1)-yp(j-2))*coeff
-!   enddo
-!
-!   !Normalize to yly
-!   ypmax = yp(ny)
-!   yp = yp/ypmax*yly
-
-if (nrank==0) then
-open(10,file='yp.dat', form='formatted')
-do j=1,ny
-write(10,*)yp(j)
-enddo
-close(10)
-endif
+  !Mapping!!, metric terms
+  if (istret .ne. 3) then
+     do j=1,ny
+        ppy(j)=yly*(alpha/pi+(one/pi/beta)*sin(pi*yeta(j))*sin(pi*yeta(j)))
+        pp2y(j)=ppy(j)*ppy(j)
+        pp4y(j)=(-two/beta*cos(pi*yeta(j))*sin(pi*yeta(j)))
+     enddo
+     do j=1,ny
+        ppyi(j)=yly*(alpha/pi+(one/pi/beta)*sin(pi*yetai(j))*sin(pi*yetai(j)))
+        pp2yi(j)=ppyi(j)*ppyi(j)
+        pp4yi(j)=(-two/beta*cos(pi*yetai(j))*sin(pi*yetai(j)))
+     enddo
+  endif
+
+  if (istret .eq. 3) then
+     do j=1,ny
+        ppy(j)=yly*(alpha/pi+(one/pi/beta)*sin(pi*yeta(j))*sin(pi*yeta(j)))
+        pp2y(j)=ppy(j)*ppy(j)
+        pp4y(j)=(-two/beta*cos(pi*yeta(j))*sin(pi*yeta(j)))/two
+     enddo
+     do j=1,ny
+        ppyi(j)=yly*(alpha/pi+(one/pi/beta)*sin(pi*yetai(j))*sin(pi*yetai(j)))
+        pp2yi(j)=ppyi(j)*ppyi(j)
+        pp4yi(j)=(-two/beta*cos(pi*yetai(j))*sin(pi*yetai(j)))/two
+     enddo
+  endif
+
+  !   yp(1) = 0.0
+  !   yp(2) = 0.01
+  !   coeff0= 1.1
+  !   blender1 = 0.0
+  !   blender2 = 0.0
+  !   do j=3,ny
+  !!      yeta(j)=(j-1.)*(1./ny)
+  !!      yp(j)=-beta*cos(pi*yeta(j))/sin(yeta(j)*pi)
+  !
+  !     if (yp(j-1).LE.3.5*1.0) then
+  !       dy_plus_target = 8.0
+  !       !Calculate re_tau guess somewhere
+  !      dy_plus_current= (yp(j-1)-yp(j-2))*85.0
+  !       !dy_plus_coeff is from 1 to 0
+  !       dy_plus_coeff = (dy_plus_target-dy_plus_current)/dy_plus_target
+  !       coeff = coeff0**dy_plus_coeff
+  !
+  !       dy_plus_coeff_old1 = dy_plus_coeff   !will be required for blenders
+  !     else if (yp(j-1).GE.39.0*1.0) then
+  !       dy_plus_target = 10.0
+  !       !Calculate re_tau guess somewhere
+  !       dy_plus_current= (yp(j-1)-yp(j-2))*85.0
+  !       !dy_plus_coeff is from 1 to 0
+  !       dy_plus_coeff = (dy_plus_target-dy_plus_current)/dy_plus_target
+  !
+  !       if (blender2.LT.1.0) blender2 = blender2 + 0.1   !carry the coeff smoothly
+  !       coeff = coeff0**((1.0-blender2)*dy_plus_coeff_old2+blender2*dy_plus_coeff)
+  !     else
+  !       dy_plus_target = 80.0
+  !       !Calculate re_tau guess somewhere
+  !       dy_plus_current= (yp(j-1)-yp(j-2))*85.0
+  !       !dy_plus_coeff is from 1 to 0
+  !       dy_plus_coeff = (dy_plus_target-dy_plus_current)/dy_plus_target
+  !
+  !       if (blender1.LT.1.0) blender1 = blender1 + 0.1   !carry the coeff smoothly
+  !       coeff = coeff0**((1.0-blender1)*dy_plus_coeff_old1+blender1*dy_plus_coeff)
+  !
+  !       dy_plus_coeff_old2 = dy_plus_coeff   !will be required for blenders
+  !     endif
+  !     yp(j) = yp(j-1)+(yp(j-1)-yp(j-2))*coeff
+  !   enddo
+  !
+  !   !Normalize to yly
+  !   ypmax = yp(ny)
+  !   yp = yp/ypmax*yly
+
+  if (nrank==0) then
+     open(10,file='yp.dat', form='formatted')
+     do j=1,ny
+        write(10,*)yp(j)
+     enddo
+     close(10)
+  endif
 
 end subroutine stretching
 
 !*****************************************************************
 !
 subroutine inversion5_v1(aaa,eee,spI)
-!
-!*****************************************************************
+  !
+  !*****************************************************************
 
-USE decomp_2d
-!USE decomp_2d_poisson
-USE variables
-USE param
-USE var
-USE MPI
+  USE decomp_2d
+  !USE decomp_2d_poisson
+  USE variables
+  USE param
+  USE var
+  USE MPI
 
-implicit none
+  implicit none
 
-! decomposition object for spectral space
-TYPE(DECOMP_INFO) :: spI
+  ! decomposition object for spectral space
+  TYPE(DECOMP_INFO) :: spI
 
 #ifdef DOUBLE_PREC
   real(mytype), parameter :: epsilon = 1.e-16
@@ -671,142 +671,142 @@ TYPE(DECOMP_INFO) :: spI
   real(mytype), parameter :: epsilon = 1.e-8
 #endif
 
-complex(mytype),dimension(spI%yst(1):spI%yen(1),ny/2,spI%yst(3):spI%yen(3),5) :: aaa
-complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(2):spI%yen(2),spI%yst(3):spI%yen(3)) :: eee
-integer :: i,j,k,m,mi,jc
-integer,dimension(2) :: ja,jb
-complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(3):spI%yen(3)) :: sr
-complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(3):spI%yen(3)) :: a1,b1
-
-real(mytype) :: tmp1,tmp2,tmp3,tmp4
-
-do i=1,2
-   ja(i)=4-i
-   jb(i)=5-i
-enddo
-do m=1,ny/2-2
-   do i=1,2
-      mi=m+i
-      do k=spI%yst(3),spI%yen(3)
-      do j=spI%yst(1),spI%yen(1)
-         if (real(aaa(j,m,k,3), kind=mytype).ne.zero) tmp1=real(aaa(j,mi,k,3-i), kind=mytype)/real(aaa(j,m,k,3), kind=mytype)
-         if (aimag(aaa(j,m,k,3)).ne.zero)tmp2=aimag(aaa(j,mi,k,3-i))/aimag(aaa(j,m,k,3))
-         sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-         eee(j,mi,k)=cmplx(real(eee(j,mi,k), kind=mytype)-tmp1*real(eee(j,m,k), kind=mytype),&
-              aimag(eee(j,mi,k))-tmp2*aimag(eee(j,m,k)), kind=mytype)
-      enddo
-      enddo
-      do jc=ja(i),jb(i)
-      do k=spI%yst(3),spI%yen(3)
-      do j=spI%yst(1),spI%yen(1)
-         aaa(j,mi,k,jc)=cmplx(real(aaa(j,mi,k,jc), kind=mytype)-real(sr(j,k), kind=mytype)*real(aaa(j,m,k,jc+i), kind=mytype),&
-              aimag(aaa(j,mi,k,jc))-aimag(sr(j,k))*aimag(aaa(j,m,k,jc+i)), kind=mytype)
-      enddo
-      enddo
-      enddo
-   enddo
-enddo
-
-
-do k=spI%yst(3),spI%yen(3)
-do j=spI%yst(1),spI%yen(1)
-   if (abs(real(aaa(j,ny/2-1,k,3), kind=mytype)).gt.epsilon) then
-      tmp1=real(aaa(j,ny/2,k,2), kind=mytype)/real(aaa(j,ny/2-1,k,3), kind=mytype)
-   else
-      tmp1=zero
-   endif
-   if (abs(aimag(aaa(j,ny/2-1,k,3))).gt.epsilon) then
-      tmp2=aimag(aaa(j,ny/2,k,2))/aimag(aaa(j,ny/2-1,k,3))
-   else
-      tmp2=zero
-   endif
-   sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-   b1(j,k)=cmplx(real(aaa(j,ny/2,k,3), kind=mytype)-tmp1*real(aaa(j,ny/2-1,k,4), kind=mytype),&
-        aimag(aaa(j,ny/2,k,3))-tmp2*aimag(aaa(j,ny/2-1,k,4)), kind=mytype)
-
-   if (abs(real(b1(j,k), kind=mytype)).gt.epsilon) then
-      tmp1=real(sr(j,k), kind=mytype)/real(b1(j,k), kind=mytype)
-      tmp3=real(eee(j,ny/2,k), kind=mytype)/real(b1(j,k), kind=mytype)-tmp1*real(eee(j,ny/2-1,k), kind=mytype)
-   else
-      tmp1=zero
-      tmp3=zero
-   endif
-   if (abs(aimag(b1(j,k))).gt.epsilon) then
-      tmp2=aimag(sr(j,k))/aimag(b1(j,k))
-      tmp4=aimag(eee(j,ny/2,k))/aimag(b1(j,k))-tmp2*aimag(eee(j,ny/2-1,k))
-   else
-      tmp2=zero
-      tmp4=zero
-   endif
-   a1(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-   eee(j,ny/2,k)=cmplx(tmp3,tmp4, kind=mytype)
-
-   if (abs(real(aaa(j,ny/2-1,k,3), kind=mytype)).gt.epsilon) then
-      tmp1=one/real(aaa(j,ny/2-1,k,3), kind=mytype)
-   else
-      tmp1=zero
-   endif
-   if (abs(aimag(aaa(j,ny/2-1,k,3))).gt.epsilon) then
-      tmp2=one/aimag(aaa(j,ny/2-1,k,3))
-   else
-      tmp2=zero
-   endif
-   b1(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-   a1(j,k)=cmplx(real(aaa(j,ny/2-1,k,4), kind=mytype)*real(b1(j,k), kind=mytype),&
-        aimag(aaa(j,ny/2-1,k,4))*aimag(b1(j,k)), kind=mytype)
-   eee(j,ny/2-1,k)=cmplx(real(eee(j,ny/2-1,k))*real(b1(j,k))-real(a1(j,k))*real(eee(j,ny/2,k)),&
-        aimag(eee(j,ny/2-1,k))*aimag(b1(j,k))-aimag(a1(j,k))*aimag(eee(j,ny/2,k)), kind=mytype)
-enddo
-enddo
-
-do i=ny/2-2,1,-1
-do k=spI%yst(3),spI%yen(3)
-do j=spI%yst(1),spI%yen(1)
-   if (abs(real(aaa(j,i,k,3), kind=mytype)).gt.epsilon) then
-      tmp1=one/real(aaa(j,i,k,3), kind=mytype)
-   else
-      tmp1=zero
-   endif
-   if (abs(aimag(aaa(j,i,k,3))).gt.epsilon) then
-      tmp2=one/aimag(aaa(j,i,k,3))
-   else
-      tmp2=zero
-   endif
-   sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-   a1(j,k)=cmplx(real(aaa(j,i,k,4), kind=mytype)*real(sr(j,k), kind=mytype),&
-        aimag(aaa(j,i,k,4))*aimag(sr(j,k)), kind=mytype)
-   b1(j,k)=cmplx(real(aaa(j,i,k,5), kind=mytype)*real(sr(j,k), kind=mytype),&
-        aimag(aaa(j,i,k,5))*aimag(sr(j,k)), kind=mytype)
-   eee(j,i,k)=cmplx(real(eee(j,i,k), kind=mytype)*real(sr(j,k), kind=mytype)-&
-        real(a1(j,k), kind=mytype)*real(eee(j,i+1,k), kind=mytype)-&
-        real(b1(j,k), kind=mytype)*real(eee(j,i+2,k), kind=mytype),&
-        aimag(eee(j,i,k))*aimag(sr(j,k))-&
-        aimag(a1(j,k))*aimag(eee(j,i+1,k))-aimag(b1(j,k))*aimag(eee(j,i+2,k)), kind=mytype)
-enddo
-enddo
-enddo
-
-return
+  complex(mytype),dimension(spI%yst(1):spI%yen(1),ny/2,spI%yst(3):spI%yen(3),5) :: aaa
+  complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(2):spI%yen(2),spI%yst(3):spI%yen(3)) :: eee
+  integer :: i,j,k,m,mi,jc
+  integer,dimension(2) :: ja,jb
+  complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(3):spI%yen(3)) :: sr
+  complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(3):spI%yen(3)) :: a1,b1
+
+  real(mytype) :: tmp1,tmp2,tmp3,tmp4
+
+  do i=1,2
+     ja(i)=4-i
+     jb(i)=5-i
+  enddo
+  do m=1,ny/2-2
+     do i=1,2
+        mi=m+i
+        do k=spI%yst(3),spI%yen(3)
+           do j=spI%yst(1),spI%yen(1)
+              if (real(aaa(j,m,k,3), kind=mytype).ne.zero) tmp1=real(aaa(j,mi,k,3-i), kind=mytype)/real(aaa(j,m,k,3), kind=mytype)
+              if (aimag(aaa(j,m,k,3)).ne.zero)tmp2=aimag(aaa(j,mi,k,3-i))/aimag(aaa(j,m,k,3))
+              sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+              eee(j,mi,k)=cmplx(real(eee(j,mi,k), kind=mytype)-tmp1*real(eee(j,m,k), kind=mytype),&
+                   aimag(eee(j,mi,k))-tmp2*aimag(eee(j,m,k)), kind=mytype)
+           enddo
+        enddo
+        do jc=ja(i),jb(i)
+           do k=spI%yst(3),spI%yen(3)
+              do j=spI%yst(1),spI%yen(1)
+                 aaa(j,mi,k,jc)=cmplx(real(aaa(j,mi,k,jc), kind=mytype)-real(sr(j,k), kind=mytype)*real(aaa(j,m,k,jc+i), kind=mytype),&
+                      aimag(aaa(j,mi,k,jc))-aimag(sr(j,k))*aimag(aaa(j,m,k,jc+i)), kind=mytype)
+              enddo
+           enddo
+        enddo
+     enddo
+  enddo
+
+
+  do k=spI%yst(3),spI%yen(3)
+     do j=spI%yst(1),spI%yen(1)
+        if (abs(real(aaa(j,ny/2-1,k,3), kind=mytype)).gt.epsilon) then
+           tmp1=real(aaa(j,ny/2,k,2), kind=mytype)/real(aaa(j,ny/2-1,k,3), kind=mytype)
+        else
+           tmp1=zero
+        endif
+        if (abs(aimag(aaa(j,ny/2-1,k,3))).gt.epsilon) then
+           tmp2=aimag(aaa(j,ny/2,k,2))/aimag(aaa(j,ny/2-1,k,3))
+        else
+           tmp2=zero
+        endif
+        sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+        b1(j,k)=cmplx(real(aaa(j,ny/2,k,3), kind=mytype)-tmp1*real(aaa(j,ny/2-1,k,4), kind=mytype),&
+             aimag(aaa(j,ny/2,k,3))-tmp2*aimag(aaa(j,ny/2-1,k,4)), kind=mytype)
+
+        if (abs(real(b1(j,k), kind=mytype)).gt.epsilon) then
+           tmp1=real(sr(j,k), kind=mytype)/real(b1(j,k), kind=mytype)
+           tmp3=real(eee(j,ny/2,k), kind=mytype)/real(b1(j,k), kind=mytype)-tmp1*real(eee(j,ny/2-1,k), kind=mytype)
+        else
+           tmp1=zero
+           tmp3=zero
+        endif
+        if (abs(aimag(b1(j,k))).gt.epsilon) then
+           tmp2=aimag(sr(j,k))/aimag(b1(j,k))
+           tmp4=aimag(eee(j,ny/2,k))/aimag(b1(j,k))-tmp2*aimag(eee(j,ny/2-1,k))
+        else
+           tmp2=zero
+           tmp4=zero
+        endif
+        a1(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+        eee(j,ny/2,k)=cmplx(tmp3,tmp4, kind=mytype)
+
+        if (abs(real(aaa(j,ny/2-1,k,3), kind=mytype)).gt.epsilon) then
+           tmp1=one/real(aaa(j,ny/2-1,k,3), kind=mytype)
+        else
+           tmp1=zero
+        endif
+        if (abs(aimag(aaa(j,ny/2-1,k,3))).gt.epsilon) then
+           tmp2=one/aimag(aaa(j,ny/2-1,k,3))
+        else
+           tmp2=zero
+        endif
+        b1(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+        a1(j,k)=cmplx(real(aaa(j,ny/2-1,k,4), kind=mytype)*real(b1(j,k), kind=mytype),&
+             aimag(aaa(j,ny/2-1,k,4))*aimag(b1(j,k)), kind=mytype)
+        eee(j,ny/2-1,k)=cmplx(real(eee(j,ny/2-1,k))*real(b1(j,k))-real(a1(j,k))*real(eee(j,ny/2,k)),&
+             aimag(eee(j,ny/2-1,k))*aimag(b1(j,k))-aimag(a1(j,k))*aimag(eee(j,ny/2,k)), kind=mytype)
+     enddo
+  enddo
+
+  do i=ny/2-2,1,-1
+     do k=spI%yst(3),spI%yen(3)
+        do j=spI%yst(1),spI%yen(1)
+           if (abs(real(aaa(j,i,k,3), kind=mytype)).gt.epsilon) then
+              tmp1=one/real(aaa(j,i,k,3), kind=mytype)
+           else
+              tmp1=zero
+           endif
+           if (abs(aimag(aaa(j,i,k,3))).gt.epsilon) then
+              tmp2=one/aimag(aaa(j,i,k,3))
+           else
+              tmp2=zero
+           endif
+           sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+           a1(j,k)=cmplx(real(aaa(j,i,k,4), kind=mytype)*real(sr(j,k), kind=mytype),&
+                aimag(aaa(j,i,k,4))*aimag(sr(j,k)), kind=mytype)
+           b1(j,k)=cmplx(real(aaa(j,i,k,5), kind=mytype)*real(sr(j,k), kind=mytype),&
+                aimag(aaa(j,i,k,5))*aimag(sr(j,k)), kind=mytype)
+           eee(j,i,k)=cmplx(real(eee(j,i,k), kind=mytype)*real(sr(j,k), kind=mytype)-&
+                real(a1(j,k), kind=mytype)*real(eee(j,i+1,k), kind=mytype)-&
+                real(b1(j,k), kind=mytype)*real(eee(j,i+2,k), kind=mytype),&
+                aimag(eee(j,i,k))*aimag(sr(j,k))-&
+                aimag(a1(j,k))*aimag(eee(j,i+1,k))-aimag(b1(j,k))*aimag(eee(j,i+2,k)), kind=mytype)
+        enddo
+     enddo
+  enddo
+
+  return
 
 end subroutine inversion5_v1
 
 !*****************************************************************
 !
 subroutine inversion5_v2(aaa,eee,spI)
-!
-!*****************************************************************
+  !
+  !*****************************************************************
 
-USE decomp_2d
-!USE decomp_2d_poisson
-USE variables
-USE param
-USE var
-USE MPI
+  USE decomp_2d
+  !USE decomp_2d_poisson
+  USE variables
+  USE param
+  USE var
+  USE MPI
 
-implicit none
+  implicit none
 
-! decomposition object for spectral space
-TYPE(DECOMP_INFO) :: spI
+  ! decomposition object for spectral space
+  TYPE(DECOMP_INFO) :: spI
 
 #ifdef DOUBLE_PREC
   real(mytype), parameter :: epsilon = 1.e-16
@@ -814,120 +814,120 @@ TYPE(DECOMP_INFO) :: spI
   real(mytype), parameter :: epsilon = 1.e-8
 #endif
 
-complex(mytype),dimension(spI%yst(1):spI%yen(1),nym,spI%yst(3):spI%yen(3),5) :: aaa
-complex(mytype),dimension(spI%yst(1):spI%yen(1),nym,spI%yst(3):spI%yen(3)) :: eee
-integer :: i,j,k,m,mi,jc
-integer,dimension(2) :: ja,jb
-complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(3):spI%yen(3)) :: sr
-complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(3):spI%yen(3)) :: a1,b1
-
-real(mytype) :: tmp1,tmp2,tmp3,tmp4
-
-do i=1,2
-   ja(i)=4-i
-   jb(i)=5-i
-enddo
-do m=1,nym-2
-   do i=1,2
-   mi=m+i
-   do k=spI%yst(3),spI%yen(3)
-   do j=spI%yst(1),spI%yen(1)
-      if (real(aaa(j,m,k,3), kind=mytype).ne.zero) tmp1=real(aaa(j,mi,k,3-i), kind=mytype)/real(aaa(j,m,k,3), kind=mytype)
-      if (aimag(aaa(j,m,k,3)).ne.zero)tmp2=aimag(aaa(j,mi,k,3-i))/aimag(aaa(j,m,k,3))
-      sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-      eee(j,mi,k)=cmplx(real(eee(j,mi,k), kind=mytype)-tmp1*real(eee(j,m,k), kind=mytype),&
-           aimag(eee(j,mi,k))-tmp2*aimag(eee(j,m,k)), kind=mytype)
-   enddo
-   enddo
-   do jc=ja(i),jb(i)
-   do k=spI%yst(3),spI%yen(3)
-   do j=spI%yst(1),spI%yen(1)
-      aaa(j,mi,k,jc)=cmplx(real(aaa(j,mi,k,jc), kind=mytype)-real(sr(j,k), kind=mytype)*real(aaa(j,m,k,jc+i), kind=mytype),&
-           aimag(aaa(j,mi,k,jc))-aimag(sr(j,k))*aimag(aaa(j,m,k,jc+i)), kind=mytype)
-   enddo
-   enddo
-   enddo
-enddo
-enddo
-do k=spI%yst(3),spI%yen(3)
-do j=spI%yst(1),spI%yen(1)
-   if (abs(real(aaa(j,nym-1,k,3), kind=mytype)).gt.epsilon) then
-      tmp1=real(aaa(j,nym,k,2), kind=mytype)/real(aaa(j,nym-1,k,3), kind=mytype)
-   else
-      tmp1=zero
-   endif
-   if (abs(aimag(aaa(j,nym-1,k,3))).gt.epsilon) then
-      tmp2=aimag(aaa(j,nym,k,2))/aimag(aaa(j,nym-1,k,3))
-   else
-      tmp2=zero
-   endif
-   sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-   b1(j,k)=cmplx(real(aaa(j,nym,k,3), kind=mytype)-tmp1*real(aaa(j,nym-1,k,4), kind=mytype),&
-        aimag(aaa(j,nym,k,3))-tmp2*aimag(aaa(j,nym-1,k,4)), kind=mytype)
-   if (abs(real(b1(j,k), kind=mytype)).gt.epsilon) then
-      tmp1=real(sr(j,k), kind=mytype)/real(b1(j,k), kind=mytype)
-      tmp3=real(eee(j,nym,k), kind=mytype)/real(b1(j,k), kind=mytype)-tmp1*real(eee(j,nym-1,k), kind=mytype)
-   else
-      tmp1=zero
-      tmp3=zero
-   endif
-   if (abs(aimag(b1(j,k))).gt.epsilon) then
-      tmp2=aimag(sr(j,k))/aimag(b1(j,k))
-      tmp4=aimag(eee(j,nym,k))/aimag(b1(j,k))-tmp2*aimag(eee(j,nym-1,k))
-   else
-      tmp2=zero
-      tmp4=zero
-   endif
-   a1(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-   eee(j,nym,k)=cmplx(tmp3,tmp4, kind=mytype)
-
-   if (abs(real(aaa(j,nym-1,k,3), kind=mytype)).gt.epsilon) then
-      tmp1=one/real(aaa(j,nym-1,k,3), kind=mytype)
-   else
-      tmp1=zero
-   endif
-   if (abs(aimag(aaa(j,nym-1,k,3))).gt.epsilon) then
-      tmp2=one/aimag(aaa(j,nym-1,k,3))
-   else
-      tmp2=zero
-   endif
-   b1(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-   a1(j,k)=cmplx(real(aaa(j,nym-1,k,4), kind=mytype)*real(b1(j,k), kind=mytype),&
-        aimag(aaa(j,nym-1,k,4))*aimag(b1(j,k)), kind=mytype)
-   eee(j,nym-1,k)=cmplx(real(eee(j,nym-1,k), kind=mytype)*real(b1(j,k), kind=mytype)-&
-        real(a1(j,k), kind=mytype)*real(eee(j,nym,k), kind=mytype),&
-        aimag(eee(j,nym-1,k))*aimag(b1(j,k))-aimag(a1(j,k))*aimag(eee(j,nym,k)), kind=mytype)
-enddo
-enddo
-
-do i=nym-2,1,-1
-do k=spI%yst(3),spI%yen(3)
-do j=spI%yst(1),spI%yen(1)
-   if (abs(real(aaa(j,i,k,3), kind=mytype)).gt.epsilon) then
-      tmp1=one/real(aaa(j,i,k,3), kind=mytype)
-   else
-      tmp1=zero
-   endif
-   if (abs(aimag(aaa(j,i,k,3))).gt.epsilon) then
-      tmp2=one/aimag(aaa(j,i,k,3))
-   else
-      tmp2=zero
-   endif
-   sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
-   a1(j,k)=cmplx(real(aaa(j,i,k,4), kind=mytype)*real(sr(j,k), kind=mytype),&
-        aimag(aaa(j,i,k,4))*aimag(sr(j,k)), kind=mytype)
-   b1(j,k)=cmplx(real(aaa(j,i,k,5), kind=mytype)*real(sr(j,k), kind=mytype),&
-        aimag(aaa(j,i,k,5))*aimag(sr(j,k)), kind=mytype)
-   eee(j,i,k)=cmplx(real(eee(j,i,k), kind=mytype)*real(sr(j,k), kind=mytype)-&
-        real(a1(j,k), kind=mytype)*real(eee(j,i+1,k), kind=mytype)-&
-        real(b1(j,k), kind=mytype)*real(eee(j,i+2,k), kind=mytype),&
-        aimag(eee(j,i,k))*aimag(sr(j,k))-&
-        aimag(a1(j,k))*aimag(eee(j,i+1,k))-aimag(b1(j,k))*aimag(eee(j,i+2,k)), kind=mytype)
-enddo
-enddo
-enddo
-
-return
+  complex(mytype),dimension(spI%yst(1):spI%yen(1),nym,spI%yst(3):spI%yen(3),5) :: aaa
+  complex(mytype),dimension(spI%yst(1):spI%yen(1),nym,spI%yst(3):spI%yen(3)) :: eee
+  integer :: i,j,k,m,mi,jc
+  integer,dimension(2) :: ja,jb
+  complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(3):spI%yen(3)) :: sr
+  complex(mytype),dimension(spI%yst(1):spI%yen(1),spI%yst(3):spI%yen(3)) :: a1,b1
+
+  real(mytype) :: tmp1,tmp2,tmp3,tmp4
+
+  do i=1,2
+     ja(i)=4-i
+     jb(i)=5-i
+  enddo
+  do m=1,nym-2
+     do i=1,2
+        mi=m+i
+        do k=spI%yst(3),spI%yen(3)
+           do j=spI%yst(1),spI%yen(1)
+              if (real(aaa(j,m,k,3), kind=mytype).ne.zero) tmp1=real(aaa(j,mi,k,3-i), kind=mytype)/real(aaa(j,m,k,3), kind=mytype)
+              if (aimag(aaa(j,m,k,3)).ne.zero)tmp2=aimag(aaa(j,mi,k,3-i))/aimag(aaa(j,m,k,3))
+              sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+              eee(j,mi,k)=cmplx(real(eee(j,mi,k), kind=mytype)-tmp1*real(eee(j,m,k), kind=mytype),&
+                   aimag(eee(j,mi,k))-tmp2*aimag(eee(j,m,k)), kind=mytype)
+           enddo
+        enddo
+        do jc=ja(i),jb(i)
+           do k=spI%yst(3),spI%yen(3)
+              do j=spI%yst(1),spI%yen(1)
+                 aaa(j,mi,k,jc)=cmplx(real(aaa(j,mi,k,jc), kind=mytype)-real(sr(j,k), kind=mytype)*real(aaa(j,m,k,jc+i), kind=mytype),&
+                      aimag(aaa(j,mi,k,jc))-aimag(sr(j,k))*aimag(aaa(j,m,k,jc+i)), kind=mytype)
+              enddo
+           enddo
+        enddo
+     enddo
+  enddo
+  do k=spI%yst(3),spI%yen(3)
+     do j=spI%yst(1),spI%yen(1)
+        if (abs(real(aaa(j,nym-1,k,3), kind=mytype)).gt.epsilon) then
+           tmp1=real(aaa(j,nym,k,2), kind=mytype)/real(aaa(j,nym-1,k,3), kind=mytype)
+        else
+           tmp1=zero
+        endif
+        if (abs(aimag(aaa(j,nym-1,k,3))).gt.epsilon) then
+           tmp2=aimag(aaa(j,nym,k,2))/aimag(aaa(j,nym-1,k,3))
+        else
+           tmp2=zero
+        endif
+        sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+        b1(j,k)=cmplx(real(aaa(j,nym,k,3), kind=mytype)-tmp1*real(aaa(j,nym-1,k,4), kind=mytype),&
+             aimag(aaa(j,nym,k,3))-tmp2*aimag(aaa(j,nym-1,k,4)), kind=mytype)
+        if (abs(real(b1(j,k), kind=mytype)).gt.epsilon) then
+           tmp1=real(sr(j,k), kind=mytype)/real(b1(j,k), kind=mytype)
+           tmp3=real(eee(j,nym,k), kind=mytype)/real(b1(j,k), kind=mytype)-tmp1*real(eee(j,nym-1,k), kind=mytype)
+        else
+           tmp1=zero
+           tmp3=zero
+        endif
+        if (abs(aimag(b1(j,k))).gt.epsilon) then
+           tmp2=aimag(sr(j,k))/aimag(b1(j,k))
+           tmp4=aimag(eee(j,nym,k))/aimag(b1(j,k))-tmp2*aimag(eee(j,nym-1,k))
+        else
+           tmp2=zero
+           tmp4=zero
+        endif
+        a1(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+        eee(j,nym,k)=cmplx(tmp3,tmp4, kind=mytype)
+
+        if (abs(real(aaa(j,nym-1,k,3), kind=mytype)).gt.epsilon) then
+           tmp1=one/real(aaa(j,nym-1,k,3), kind=mytype)
+        else
+           tmp1=zero
+        endif
+        if (abs(aimag(aaa(j,nym-1,k,3))).gt.epsilon) then
+           tmp2=one/aimag(aaa(j,nym-1,k,3))
+        else
+           tmp2=zero
+        endif
+        b1(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+        a1(j,k)=cmplx(real(aaa(j,nym-1,k,4), kind=mytype)*real(b1(j,k), kind=mytype),&
+             aimag(aaa(j,nym-1,k,4))*aimag(b1(j,k)), kind=mytype)
+        eee(j,nym-1,k)=cmplx(real(eee(j,nym-1,k), kind=mytype)*real(b1(j,k), kind=mytype)-&
+             real(a1(j,k), kind=mytype)*real(eee(j,nym,k), kind=mytype),&
+             aimag(eee(j,nym-1,k))*aimag(b1(j,k))-aimag(a1(j,k))*aimag(eee(j,nym,k)), kind=mytype)
+     enddo
+  enddo
+
+  do i=nym-2,1,-1
+     do k=spI%yst(3),spI%yen(3)
+        do j=spI%yst(1),spI%yen(1)
+           if (abs(real(aaa(j,i,k,3), kind=mytype)).gt.epsilon) then
+              tmp1=one/real(aaa(j,i,k,3), kind=mytype)
+           else
+              tmp1=zero
+           endif
+           if (abs(aimag(aaa(j,i,k,3))).gt.epsilon) then
+              tmp2=one/aimag(aaa(j,i,k,3))
+           else
+              tmp2=zero
+           endif
+           sr(j,k)=cmplx(tmp1,tmp2, kind=mytype)
+           a1(j,k)=cmplx(real(aaa(j,i,k,4), kind=mytype)*real(sr(j,k), kind=mytype),&
+                aimag(aaa(j,i,k,4))*aimag(sr(j,k)), kind=mytype)
+           b1(j,k)=cmplx(real(aaa(j,i,k,5), kind=mytype)*real(sr(j,k), kind=mytype),&
+                aimag(aaa(j,i,k,5))*aimag(sr(j,k)), kind=mytype)
+           eee(j,i,k)=cmplx(real(eee(j,i,k), kind=mytype)*real(sr(j,k), kind=mytype)-&
+                real(a1(j,k), kind=mytype)*real(eee(j,i+1,k), kind=mytype)-&
+                real(b1(j,k), kind=mytype)*real(eee(j,i+2,k), kind=mytype),&
+                aimag(eee(j,i,k))*aimag(sr(j,k))-&
+                aimag(a1(j,k))*aimag(eee(j,i+1,k))-aimag(b1(j,k))*aimag(eee(j,i+2,k)), kind=mytype)
+        enddo
+     enddo
+  enddo
+
+  return
 
 end subroutine inversion5_v2
 
@@ -936,157 +936,157 @@ end subroutine inversion5_v2
 !********************************************************************
 subroutine tripping(tb,ta) !TRIPPING SUBROUTINE FOR TURBULENT BOUNDARY LAYERS
 
-USE param
-USE variables
-USE decomp_2d
-USE MPI
+  USE param
+  USE variables
+  USE decomp_2d
+  USE MPI
+
+  implicit none
 
-implicit none
+  integer :: i,j,k
+  real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: tb, ta
+  integer :: seed0, ii, code
+  real(mytype) :: z_pos, randx, p_tr, b_tr, x_pos, y_pos, A_tr
 
-integer :: i,j,k
-real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: tb, ta
-integer :: seed0, ii, code
-real(mytype) :: z_pos, randx, p_tr, b_tr, x_pos, y_pos, A_tr
+  !Done in X-Pencils
+  seed0=randomseed !Seed for random number
+  !A_tr=A_trip*min(1.0,0.8+real(itime)/200.0)
+  !xs_tr=4.0/2.853
+  !ys_tr=2.0/2.853
+  !ts_tr=4.0/2.853
+  !x0_tr=40.0/2.853
+  A_tr = 0.1*dt
 
-!Done in X-Pencils
-seed0=randomseed !Seed for random number
-!A_tr=A_trip*min(1.0,0.8+real(itime)/200.0)
-!xs_tr=4.0/2.853
-!ys_tr=2.0/2.853
-!ts_tr=4.0/2.853
-!x0_tr=40.0/2.853
-A_tr = 0.1*dt
+  if ((itime.eq.ifirst).and.(nrank.eq.0)) then
+     call random_seed(SIZE=ii)
+     call random_seed(PUT=seed0*(/ (1, i = 1, ii) /))
 
-if ((itime.eq.ifirst).and.(nrank.eq.0)) then
-call random_seed(SIZE=ii)
-call random_seed(PUT=seed0*(/ (1, i = 1, ii) /))
+     !DEBUG:
+     !call random_number(randx)
+     !call MPI_BCAST(randx,1,real_type,0,MPI_COMM_WORLD,code)
+     !write(*,*) 'RANDOM:', nrank, randx, ii
+     !First random generation of h_nxt
 
-!DEBUG:
-!call random_number(randx)
-!call MPI_BCAST(randx,1,real_type,0,MPI_COMM_WORLD,code)
-!write(*,*) 'RANDOM:', nrank, randx, ii
-!First random generation of h_nxt
 
+     do j=1,z_modes
 
-  do j=1,z_modes
+        call random_number(randx)
+        h_coeff(j)=1.0*(randx-0.5)
+     enddo
+     h_coeff=h_coeff/sqrt(DBLE(z_modes)) 
+  endif
 
-    call random_number(randx)
-    h_coeff(j)=1.0*(randx-0.5)
-  enddo
-    h_coeff=h_coeff/sqrt(DBLE(z_modes)) 
-endif
-
-!Initialization h_nxt  (always bounded by xsize(3)^2 operations)
-if (itime.eq.ifirst) then
-  call MPI_BCAST(h_coeff,z_modes,real_type,0,MPI_COMM_WORLD,code)
-  nxt_itr=0
-  do k=1,xsize(3)
-     h_nxt(k)=0.0
-     z_pos=-zlz/2.0+(xstart(3)+(k-1)-1)*dz
-   do j=1,z_modes
-      h_nxt(k)= h_nxt(k)+h_coeff(j)*sin(2.0*pi*j*z_pos/zlz)
-   enddo
-  enddo
-end if
-
-
-
-!Time-loop
- i=int(t/ts_tr)
-    if (i.ge.nxt_itr) then  !Nxt_itr is a global variable
-        nxt_itr=i+1
-        
-        !First random generation of h
-        h_i(:)=h_nxt(:)
-        if (nrank .eq. 0) then
-         do j=1,z_modes
-            call random_number(randx)
-            h_coeff(j)=1.0*(randx-0.5)
-         enddo
+  !Initialization h_nxt  (always bounded by xsize(3)^2 operations)
+  if (itime.eq.ifirst) then
+     call MPI_BCAST(h_coeff,z_modes,real_type,0,MPI_COMM_WORLD,code)
+     nxt_itr=0
+     do k=1,xsize(3)
+        h_nxt(k)=0.0
+        z_pos=-zlz/2.0+(xstart(3)+(k-1)-1)*dz
+        do j=1,z_modes
+           h_nxt(k)= h_nxt(k)+h_coeff(j)*sin(2.0*pi*j*z_pos/zlz)
+        enddo
+     enddo
+  end if
+
+
+
+  !Time-loop
+  i=int(t/ts_tr)
+  if (i.ge.nxt_itr) then  !Nxt_itr is a global variable
+     nxt_itr=i+1
+
+     !First random generation of h
+     h_i(:)=h_nxt(:)
+     if (nrank .eq. 0) then
+        do j=1,z_modes
+           call random_number(randx)
+           h_coeff(j)=1.0*(randx-0.5)
+        enddo
         h_coeff=h_coeff/sqrt(DBLE(z_modes)) !Non-dimensionalization
-        end if
-        
-        call MPI_BCAST(h_coeff,z_modes,real_type,0,MPI_COMM_WORLD,code)
-          
-        
-        !Initialization h_nxt  (always bounded by z_steps^2 operations)
-       do k=1,xsize(3)
-          h_nxt(k)=0.0
-          z_pos=-zlz/2.0+(xstart(3)+(k-1)-1)*dz
-          do j=1,z_modes
-            h_nxt(k)= h_nxt(k)+h_coeff(j)*sin(2.0*pi*j*z_pos/zlz)
-          enddo
+     end if
+
+     call MPI_BCAST(h_coeff,z_modes,real_type,0,MPI_COMM_WORLD,code)
+
+
+     !Initialization h_nxt  (always bounded by z_steps^2 operations)
+     do k=1,xsize(3)
+        h_nxt(k)=0.0
+        z_pos=-zlz/2.0+(xstart(3)+(k-1)-1)*dz
+        do j=1,z_modes
+           h_nxt(k)= h_nxt(k)+h_coeff(j)*sin(2.0*pi*j*z_pos/zlz)
         enddo
-     endif
+     enddo
+  endif
 
 
- !Time coefficient
+  !Time coefficient
   p_tr=t/ts_tr-i
   b_tr=3.0*p_tr**2-2.0*p_tr**3
-  
+
   !Creation of tripping velocity
   do i=1,xsize(1)
-    x_pos=(xstart(1)+(i-1)-1)*dx
-    do j=1,xsize(2) 
-      !y_pos=(xstart(2)+(j-1)-1)*dy   
-      y_pos=yp(xstart(2)+(j-1))
-      do k=1,xsize(3)
-       !g(z)*EXP_F(X,Y)
-       ta(i,j,k)=((1.0-b_tr)*h_i(k)+b_tr*h_nxt(k))
-       !ta(i,j,k)=A_tr*exp(-((x_pos-x0_tr)/xs_tr)**2-(y_pos/ys_tr)**2)*ta(i,j,k)
-       ta(i,j,k)=A_tr*exp(-((x_pos-x0_tr)/xs_tr)**2-((y_pos-0.5)/ys_tr)**2)*ta(i,j,k)  
-       tb(i,j,k)=tb(i,j,k)+ta(i,j,k)
-       
-       z_pos=-zlz/2.0+(xstart(3)+(k-1)-1)*dz
-      ! if ((((x_pos-x0_tr)**2).le.9.0e-3).and.(y_pos.le.0.0001).and.((z_pos).le.0.03))then
-      !       open(442,file='tripping.dat',form='formatted',position='APPEND')
-      !  write(442,*) t,ta(i,j,k)
-      !  close(442)   
-      ! end if
-
-      enddo
-    enddo
+     x_pos=(xstart(1)+(i-1)-1)*dx
+     do j=1,xsize(2) 
+        !y_pos=(xstart(2)+(j-1)-1)*dy   
+        y_pos=yp(xstart(2)+(j-1))
+        do k=1,xsize(3)
+           !g(z)*EXP_F(X,Y)
+           ta(i,j,k)=((1.0-b_tr)*h_i(k)+b_tr*h_nxt(k))
+           !ta(i,j,k)=A_tr*exp(-((x_pos-x0_tr)/xs_tr)**2-(y_pos/ys_tr)**2)*ta(i,j,k)
+           ta(i,j,k)=A_tr*exp(-((x_pos-x0_tr)/xs_tr)**2-((y_pos-0.5)/ys_tr)**2)*ta(i,j,k)  
+           tb(i,j,k)=tb(i,j,k)+ta(i,j,k)
+
+           z_pos=-zlz/2.0+(xstart(3)+(k-1)-1)*dz
+           ! if ((((x_pos-x0_tr)**2).le.9.0e-3).and.(y_pos.le.0.0001).and.((z_pos).le.0.03))then
+           !       open(442,file='tripping.dat',form='formatted',position='APPEND')
+           !  write(442,*) t,ta(i,j,k)
+           !  close(442)   
+           ! end if
+
+        enddo
+     enddo
   enddo
-    
-return   
+
+  return   
 end subroutine tripping
 !********************************************************************
 function rl(complexnumber)
 
-USE param
+  USE param
 
-implicit none
+  implicit none
 
-real(mytype) :: rl
-complex(mytype) :: complexnumber
+  real(mytype) :: rl
+  complex(mytype) :: complexnumber
 
-rl = real(complexnumber, kind=mytype)
+  rl = real(complexnumber, kind=mytype)
 
 end function rl
 !********************************************************************
 function iy(complexnumber)
 
-USE param
+  USE param
 
-implicit none
+  implicit none
 
-real(mytype) :: iy
-complex(mytype) :: complexnumber
+  real(mytype) :: iy
+  complex(mytype) :: complexnumber
 
-iy = aimag(complexnumber)
+  iy = aimag(complexnumber)
 
 end function iy
 !********************************************************************
 function cx(realpart,imaginarypart)
 
-USE param
+  USE param
 
-implicit none
+  implicit none
 
-complex(mytype) :: cx
-real(mytype) :: realpart, imaginarypart
+  complex(mytype) :: cx
+  real(mytype) :: realpart, imaginarypart
 
-cx = cmplx(realpart, imaginarypart, kind=mytype)
+  cx = cmplx(realpart, imaginarypart, kind=mytype)
 
 end function cx
 !********************************************************************
@@ -1097,7 +1097,7 @@ subroutine simu_stats(iwhen)
   USE simulation_stats
   USE var
   USE MPI
-  
+
   implicit none
 
   integer :: iwhen
@@ -1143,7 +1143,7 @@ subroutine simu_stats(iwhen)
         print *,''
      endif
   endif
-  
+
 end subroutine simu_stats
 
 SUBROUTINE calc_temp_eos(temp, rho, phi, mweight, xlen, ylen, zlen)
@@ -1170,7 +1170,7 @@ SUBROUTINE calc_temp_eos(temp, rho, phi, mweight, xlen, ylen, zlen)
      CALL calc_mweight(mweight, phi, xlen, ylen, zlen)
      temp(:,:,:) = temp(:,:,:) * mweight(:,:,:)
   ENDIF
-  
+
 ENDSUBROUTINE calc_temp_eos
 
 SUBROUTINE calc_rho_eos(rho, temp, phi, mweight, xlen, ylen, zlen)
@@ -1197,7 +1197,7 @@ SUBROUTINE calc_rho_eos(rho, temp, phi, mweight, xlen, ylen, zlen)
      CALL calc_mweight(mweight, phi, xlen, ylen, zlen)
      rho(:,:,:) = rho(:,:,:) * mweight(:,:,:)
   ENDIF
-  
+
 ENDSUBROUTINE calc_rho_eos
 
 SUBROUTINE calc_mweight(mweight, phi, xlen, ylen, zlen)
@@ -1208,14 +1208,14 @@ SUBROUTINE calc_mweight(mweight, phi, xlen, ylen, zlen)
   USE var, ONLY : numscalar
 
   IMPLICIT NONE
-  
+
   INTEGER, INTENT(IN) :: xlen, ylen, zlen
   REAL(mytype), INTENT(IN), DIMENSION(xlen, ylen, zlen, numscalar) :: phi
-  
+
   !! LOCALS
   REAL(mytype), DIMENSION(xlen, ylen, zlen) :: mweight
   INTEGER :: is
-  
+
   mweight(:,:,:) = zero
   DO is = 1, numscalar
      IF (massfrac(is)) THEN
@@ -1223,5 +1223,5 @@ SUBROUTINE calc_mweight(mweight, phi, xlen, ylen, zlen)
      ENDIF
   ENDDO
   mweight(:,:,:) = one / mweight(:,:,:)
-  
+
 ENDSUBROUTINE calc_mweight
diff --git a/src/transeq.f90 b/src/transeq.f90
index 279eb8cc..66e46327 100644
--- a/src/transeq.f90
+++ b/src/transeq.f90
@@ -242,16 +242,16 @@ CONTAINS
 
     ! If LES modelling is enabled, add the SGS stresses
     if (ilesmod.ne.0.and.jles.le.3.) then
-        ! Wall model for LES
-        if (iwall.eq.1) then 
-        call compute_SGS(sgsx1,sgsy1,sgsz1,ux1,uy1,uz1,ep1,1)
-        else
-        call compute_SGS(sgsx1,sgsy1,sgsz1,ux1,uy1,uz1,ep1,0)
-        endif
-        ! Calculate SGS stresses (conservative/non-conservative formulation)
-        dux1(:,:,:,1) = dux1(:,:,:,1) + sgsx1(:,:,:)
-        duy1(:,:,:,1) = duy1(:,:,:,1) + sgsy1(:,:,:)
-        duz1(:,:,:,1) = duz1(:,:,:,1) + sgsz1(:,:,:)
+       ! Wall model for LES
+       if (iwall.eq.1) then 
+          call compute_SGS(sgsx1,sgsy1,sgsz1,ux1,uy1,uz1,ep1,1)
+       else
+          call compute_SGS(sgsx1,sgsy1,sgsz1,ux1,uy1,uz1,ep1,0)
+       endif
+       ! Calculate SGS stresses (conservative/non-conservative formulation)
+       dux1(:,:,:,1) = dux1(:,:,:,1) + sgsx1(:,:,:)
+       duy1(:,:,:,1) = duy1(:,:,:,1) + sgsy1(:,:,:)
+       duz1(:,:,:,1) = duz1(:,:,:,1) + sgsz1(:,:,:)
     endif
 
     !! Gravity
@@ -271,7 +271,7 @@ CONTAINS
   end subroutine momentum_rhs_eq
   !************************************************************
 
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !!
   !!  SUBROUTINE: momentum_full_viscstress_tensor
   !!      AUTHOR: Paul Bartholomew
@@ -288,7 +288,7 @@ CONTAINS
   !!              contributions not accounted for in the
   !!              incompressible solver.
   !!
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   subroutine momentum_full_viscstress_tensor(ta1, tb1, tc1, divu3)
 
     USE param
@@ -330,16 +330,16 @@ CONTAINS
     use decomp_2d
     use param
     use variables
-    
+
     implicit none
 
     !! Inputs
     real(mytype), dimension(xsize(1), xsize(2), xsize(3)), intent(in) :: peculiar_density1
     real(mytype), intent(in) :: richardson
-    
+
     !! InOut
     real(mytype), dimension(xsize(1), xsize(2), xsize(3), ntime) :: dux1, duy1, duz1
-    
+
     !! Locals
     integer :: istart, jstart, kstart
     integer :: iend, jend, kend
@@ -373,7 +373,7 @@ CONTAINS
     else
        kend = xsize(3)
     endif
-    
+
     do k = kstart, kend
        do j = jstart, jend
           do i = istart, iend
@@ -381,7 +381,7 @@ CONTAINS
           enddo
        enddo
     enddo
-    
+
     !! Y-gravity
     if (nclx1.eq.2) then
        istart = 2
@@ -420,7 +420,7 @@ CONTAINS
           enddo
        enddo
     enddo
-    
+
     !! Z-gravity
     if (nclx1.eq.2) then
        istart = 2
@@ -459,8 +459,8 @@ CONTAINS
           enddo
        enddo
     enddo
-    
-    
+
+
   end subroutine momentum_gravity
 
   subroutine scalar_transport_eq(dphi1, rho1, ux1, uy1, uz1, phi1, schmidt)
diff --git a/src/variables.f90 b/src/variables.f90
index 79c91dd5..f18e020b 100644
--- a/src/variables.f90
+++ b/src/variables.f90
@@ -50,7 +50,7 @@ module var
   real(mytype), save, allocatable, dimension(:,:,:) :: ep1, diss1, pre1, depo, depof, kine
   real(mytype), save, allocatable, dimension(:,:,:,:) :: dux1,duy1,duz1  ! Output of convdiff
   real(mytype), save, allocatable, dimension(:,:,:,:,:) :: dphi1
-  
+
   !arrays for post processing
   real(mytype), save, allocatable, dimension(:,:,:) :: f1,fm1
   real(mytype), save, allocatable, dimension(:,:,:) :: uxm1, uym1, phim1, prem1, dissm1
@@ -64,7 +64,7 @@ module var
   real(mytype), save, allocatable, dimension(:,:,:) :: u3sum,v3sum,w3sum,u4sum,v4sum,w4sum
   real(mytype), save, allocatable, dimension(:,:,:) :: uvsum,uwsum,vwsum,disssum,presum,tsum
   real(mytype), save, allocatable, dimension(:,:,:,:) :: psum,ppsum,upsum,vpsum,wpsum
-  
+
   !arrays for extra statistics collection
   real(mytype), save, allocatable, dimension(:,:,:) :: dudxsum,utmapsum
 
@@ -73,13 +73,13 @@ module var
 
   ! define all work arrays here
   real(mytype), save, allocatable, dimension(:,:,:) :: ta1,tb1,tc1,td1,&
-  te1,tf1,tg1,th1,ti1,di1
+       te1,tf1,tg1,th1,ti1,di1
   real(mytype), save, allocatable, dimension(:,:,:) :: pp1,pgy1,pgz1
   real(mytype), save, allocatable, dimension(:,:,:) :: ta2,tb2,tc2,td2,&
-  te2,tf2,tg2,th2,ti2,tj2,di2
+       te2,tf2,tg2,th2,ti2,tj2,di2
   real(mytype), save, allocatable, dimension(:,:,:) :: pp2,ppi2,pgy2,pgz2,pgzi2,dip2,dipp2,duxdxp2,uyp2,uzp2,upi2,duydypi2
   real(mytype), save, allocatable, dimension(:,:,:) :: ta3,tb3,tc3,td3,&
-  te3,tf3,tg3,th3,ti3,di3
+       te3,tf3,tg3,th3,ti3,di3
   real(mytype), save, allocatable, dimension(:,:,:) :: pgz3,ppi3,dip3,dipp3,duxydxyp3,uzp3
 
   integer, save :: nxmsize, nymsize, nzmsize
@@ -103,17 +103,17 @@ contains
     if (nrank==0) print *,'Initializing variables...'
 
     if (nclx) then
-      nxmsize = xsize(1)
+       nxmsize = xsize(1)
     else
        nxmsize = xsize(1) -1
     endif
     if (ncly) then
-      nymsize = ysize(2)
+       nymsize = ysize(2)
     else
        nymsize = ysize(2) -1
     endif
     if (nclz) then
-      nzmsize = zsize(3)
+       nzmsize = zsize(3)
     else
        nzmsize = zsize(3) -1
     endif
@@ -240,7 +240,7 @@ contains
     allocate(duxydxyp3(ph1%zst(1):ph1%zen(1),ph1%zst(2):ph1%zen(2),zsize(3)))
     allocate(uzp3(ph1%zst(1):ph1%zen(1),ph1%zst(2):ph1%zen(2),zsize(3)))
     allocate(dipp3(ph1%zst(1):ph1%zen(1),ph1%zst(2):ph1%zen(2),zsize(3)))
-    
+
     ! if all periodic
     !   allocate (pp3(ph%zst(1):ph%zen(1),ph%zst(2):ph%zen(2),ph%zst(3):ph%zen(3)))
     !   allocate (dv3(ph%zst(1):ph%zen(1),ph%zst(2):ph%zen(2),ph%zst(3):ph%zen(3)))
@@ -250,18 +250,18 @@ contains
     call alloc_z(po3,ph,.true.)
 
     if(ilesmod.ne.0) then
-    call alloc_x(sgsx1);call alloc_x(sgsy1); call alloc_x(sgsz1)
-    call alloc_x(sxx1);call alloc_x(syy1); call alloc_x(szz1)
-    call alloc_x(sxy1);call alloc_x(sxz1); call alloc_x(syz1)
-    call alloc_x(nut1);call alloc_x(srt_smag); call alloc_x(srt_wale)
-    call alloc_y(sgsx2);call alloc_y(sgsy2); call alloc_y(sgsz2)
-    call alloc_y(sxx2) ;call alloc_y(syy2);  call alloc_y(szz2)
-    call alloc_y(sxy2) ;call alloc_y(sxz2);  call alloc_y(syz2)
-    call alloc_y(nut2) ;call alloc_y(srt_smag2); call alloc_y(srt_wale2)
-    call alloc_z(sgsx3);call alloc_z(sgsy3); call alloc_z(sgsz3)
-    call alloc_z(sxx3) ;call alloc_z(syy3);  call alloc_z(szz3)
-    call alloc_z(sxy3) ;call alloc_z(sxz3);  call alloc_z(syz3)
-    call alloc_z(nut3)
+       call alloc_x(sgsx1);call alloc_x(sgsy1); call alloc_x(sgsz1)
+       call alloc_x(sxx1);call alloc_x(syy1); call alloc_x(szz1)
+       call alloc_x(sxy1);call alloc_x(sxz1); call alloc_x(syz1)
+       call alloc_x(nut1);call alloc_x(srt_smag); call alloc_x(srt_wale)
+       call alloc_y(sgsx2);call alloc_y(sgsy2); call alloc_y(sgsz2)
+       call alloc_y(sxx2) ;call alloc_y(syy2);  call alloc_y(szz2)
+       call alloc_y(sxy2) ;call alloc_y(sxz2);  call alloc_y(syz2)
+       call alloc_y(nut2) ;call alloc_y(srt_smag2); call alloc_y(srt_wale2)
+       call alloc_z(sgsx3);call alloc_z(sgsy3); call alloc_z(sgsz3)
+       call alloc_z(sxx3) ;call alloc_z(syy3);  call alloc_z(szz3)
+       call alloc_z(sxy3) ;call alloc_z(sxz3);  call alloc_z(syz3)
+       call alloc_z(nut3)
     endif
 
 
diff --git a/src/visu.f90 b/src/visu.f90
index 548f5149..4f748011 100644
--- a/src/visu.f90
+++ b/src/visu.f90
@@ -78,7 +78,7 @@ subroutine VISU_INSTA (ux1,uy1,uz1,phi1,ep1,protection)
   if (nrank .eq. 0) call paraview()
 
   if (protection) then !Files that must be protected from rewriting
-                       !when visu runs from post-processing
+     !when visu runs from post-processing
      save_ux=0; save_uy=0; save_uz=0; save_ibm=0; save_phi=0; save_pre=0
   endif
 
@@ -451,17 +451,17 @@ subroutine VISU_PRE (pp3,ta1,tb1,di1,ta2,tb2,di2,ta3,di3,nxmsize,nymsize,nzmsize
   pre1=tb1
 
   if (save_pre.eq.1) then
-    uvisu=0._mytype
-    call fine_to_coarseV(1,pre1,uvisu)
-    write(filename,"('./data/pre',I4.4)") itime/ioutput
-    call decomp_2d_write_one(1,uvisu,filename,2)
+     uvisu=0._mytype
+     call fine_to_coarseV(1,pre1,uvisu)
+     write(filename,"('./data/pre',I4.4)") itime/ioutput
+     call decomp_2d_write_one(1,uvisu,filename,2)
   endif
 
   if (save_prem.eq.1) then
-    tb1=0._mytype
-    call mean_plane_z(pre1,xsize(1),xsize(2),xsize(3),tb1(:,:,1))
-    write(filename,"('./data/prem',I4.4)") itime/ioutput
-    call decomp_2d_write_plane(1,tb1,3,1,filename)
+     tb1=0._mytype
+     call mean_plane_z(pre1,xsize(1),xsize(2),xsize(3),tb1(:,:,1))
+     write(filename,"('./data/prem',I4.4)") itime/ioutput
+     call decomp_2d_write_plane(1,tb1,3,1,filename)
   endif
 
   return
-- 
GitLab