diff --git a/Makefile b/Makefile
index 5753b5964b55075df6952dfafce2596a6a096c6e..b64810030f1137ed71de6a619f4188d99c864dfe 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ else ifeq ($(CMP),gcc)
 FC = mpif90
 #FFLAGS = -O3 -funroll-loops -floop-optimize -g -Warray-bounds -fcray-pointer -x f95-cpp-input
 ifeq ($(BUILD),debug)
-FFLAGS = -cpp -g3 -Og
+FFLAGS = -cpp -g3 -Og -fexceptions -ftrapv
 FFLAGS += -ffpe-trap=invalid,zero -fcheck=bounds -fimplicit-none
 else
 FFLAGS = -cpp -O3 -funroll-loops -floop-optimize -g
diff --git a/decomp2d/io.f90 b/decomp2d/io.f90
index 84a0b2216640535b9e10c3bdc15191dbdce3612c..31c47f696c638ca12d70dc7f6b56163ce20f06ac 100644
--- a/decomp2d/io.f90
+++ b/decomp2d/io.f90
@@ -1176,8 +1176,13 @@ contains
             subsizes(1)*subsizes(2)*subsizes(3), &
             real_type, MPI_STATUS_IGNORE, ierror)
     end if
+
+!    write(*,*) 'Sizes(1) = ', sizes(1)
+!    write(*,*) 'Sizes(2) = ', sizes(2)
+!    write(*,*) 'Sizes(3) = ', sizes(3)
+!    write(*,*) 'MPI_OFFSET_KIND = ', MPI_OFFSET_KIND
     
-    fh_disp(idx) = fh_disp(idx) + sizes(1) * sizes(2) * sizes(3) * disp_bytes
+    fh_disp(idx) = fh_disp(idx) + int(sizes(1),MPI_OFFSET_KIND) * int(sizes(2),MPI_OFFSET_KIND) * int(sizes(3),MPI_OFFSET_KIND) * int(disp_bytes,MPI_OFFSET_KIND)
     
     if (opened_new) then
        call decomp_2d_close_io(io_name, full_io_name)
diff --git a/docs/pages/user_guide/parameters.rst b/docs/pages/user_guide/parameters.rst
index 47cd158bf5b5056f4fce707d453da54560868dc4..aa3036cf1bf154e8769cac5b367b9d14a075ea33 100644
--- a/docs/pages/user_guide/parameters.rst
+++ b/docs/pages/user_guide/parameters.rst
@@ -245,7 +245,7 @@ cex,cey,ra,nobjmax,nraf,nvol,iforces
 ForceCVs
 --------
 
-xld, xrd, yld, yud
+xld, xrd, yld, yud, zld, zrd
 
 LMN
 ---
diff --git a/src/BC-Cylinder.f90 b/src/BC-Cylinder.f90
index a3c998874d3274af85906f54166b3dc4e6b0a558..dfc6751398f050462844182b01d20a2810a4f80e 100644
--- a/src/BC-Cylinder.f90
+++ b/src/BC-Cylinder.f90
@@ -26,7 +26,7 @@ contains
     use param, only : one, two, ten
     use ibm_param
     use dbg_schemes, only: sqrt_prec
-    use ellipsoid_utils, only: EllipsoidalRadius, NormalizeQuaternion
+   !  use ellipsoid_utils, only: EllipsoidalRadius, NormalizeQuaternion
 
     implicit none
 
@@ -39,7 +39,6 @@ contains
     real(mytype)               :: xm,ym,zm,r,rads2,kcon
     real(mytype)               :: zeromach
     real(mytype)               :: cexx,ceyy,cezz,dist_axi
-    real(mytype)               :: point(3)
 
     zeromach=one
     do while ((one + zeromach / two) .gt. one)
@@ -47,10 +46,6 @@ contains
     end do
     zeromach = ten*zeromach
 
-   !  orientation=[oriw, orii, orij, orik]
-    call NormalizeQuaternion(orientation)
-   !  shape=[shx, shy, shz]
-   !  write(*,*) shape, 'SHAPE'
 
 
     ! Intitialise epsi
@@ -67,7 +62,6 @@ contains
        ceyy=cey
        cezz=cez
     endif
-    position=[cexx,ceyy,cezz]
    !  write(*,*) position
    !  ce=[cexx, ceyy, cezz]
     !
@@ -81,18 +75,11 @@ contains
           ym=yp(j)
           do i=nxi,nxf
              xm=real(i-1,mytype)*dx
-             point=[xm, ym, zm]
-            !  call EllipsoidalRadius(point, position, orientation, shape, r)
-            !  r=sqrt_prec((xm-cexx)**two+(ym-ceyy)**two+(zm-cezz)**two)
              r=sqrt_prec((xm-cexx)**two+(ym-ceyy)**two)
-
-             if (r-ra.gt.zeromach) then
-               !  write(*,*) i, j, k
+             if (r-ra(1).gt.zeromach) then
                 cycle
              endif
-            !  write(*,*) i, j, k, zm
              epsi(i,j,k)=remp
-            !  write(*,*) remp
           enddo
        enddo
     enddo
@@ -252,12 +239,12 @@ contains
    !  call NormalizeQuaternion(orientation)
    !  position=[cex,cey,cez]
    !  linearVelocity=[lvx,lvy,lvz]
-   !  angularVelocity=[zero,zero,zero,avz]
+   !  angularVelocity=[avx,avy,avz]
 
-   !  write(*,*) 'set shape            = ', shape
-   !  write(*,*) 'set orientation      = ', orientation
-   !  write(*,*) 'set position         = ', position
-   !  write(*,*) 'set linear velocity  = ', linearVelocity
+   !  write(*,*) 'set shape = ', shape
+   !  write(*,*) 'set orientation = ', orientation
+   !  write(*,*) 'set position = ', position
+   !  write(*,*) 'set linear velocity = ', linearVelocity
    !  write(*,*) 'set angular velocity = ', angularVelocity
 
     if (iscalar==1) then
diff --git a/src/BC-Ellipsoid.f90 b/src/BC-Ellipsoid.f90
index eeab8ed72aa6c5b034646f47ede758573b95e487..106f12879ba57ac7143365b3dcf7c2cd40b39757 100644
--- a/src/BC-Ellipsoid.f90
+++ b/src/BC-Ellipsoid.f90
@@ -16,25 +16,26 @@ PUBLIC :: init_ellip, boundary_conditions_ellip, postprocess_ellip, &
 
 contains
 
-subroutine geomcomplex_ellip(epsi,nxi,nxf,ny,nyi,nyf,nzi,nzf,dx,yp,remp)
+subroutine geomcomplex_ellip(epsi,nxi,nxf,ny,nyi,nyf,nzi,nzf,dx,yp,dz,remp)
 
     use decomp_2d, only : mytype
     use param, only : one, two, ten
     use ibm_param
     use dbg_schemes, only: sqrt_prec
-    use ellipsoid_utils, only: NormalizeQuaternion, is_inside_ellipsoid, ellipInertiaCalculate
+    use ellipsoid_utils, only: NormalizeQuaternion, EllipsoidalRadius, EllipsoidalRadius_debug
+    use complex_geometry, only: nraf,nyraf
 
     implicit none
 
     integer                    :: nxi,nxf,ny,nyi,nyf,nzi,nzf
     real(mytype),dimension(nxi:nxf,nyi:nyf,nzi:nzf) :: epsi
     real(mytype),dimension(ny) :: yp
-    real(mytype)               :: dx
+    real(mytype)               :: dx,dz
     real(mytype)               :: remp
-    integer                    :: i,j,k
+    integer                    :: i,j,k, i_body
     real(mytype)               :: xm,ym,zm,r,rads2,kcon
     real(mytype)               :: zeromach
-    real(mytype)               :: cexx,ceyy,cezz,dist_axi,eqr
+    real(mytype)               :: cexx,ceyy,cezz,dist_axi
     real(mytype)               :: point(3)
     logical                    :: is_inside
 
@@ -43,22 +44,11 @@ subroutine geomcomplex_ellip(epsi,nxi,nxf,ny,nyi,nyf,nzi,nzf,dx,yp,remp)
         zeromach = zeromach/two
     end do
     zeromach = ten*zeromach
-
-    if (t.eq.0) then 
-        eqr=(shx*shy*shz)**(1.0/3.0)
-        shape=[shx/eqr,shy/eqr,shz/eqr]
-
-        orientation=[oriw,orii,orij,orik]
-        call NormalizeQuaternion(orientation)
-        position=[cex,cey,cez]
-        linearVelocity=[lvx,lvy,lvz]
-        angularVelocity=[zero,avx,avy,avz]
-        call ellipInertiaCalculate(shape,rho_s,inertia)
-    endif
-
-
+    is_inside=.false.
     !  orientation=[oriw, orii, orij, orik]
-    ! call NormalizeQuaternion(orientation)
+    do i = 1,nbody 
+        call NormalizeQuaternion(orientation(i,:))
+    enddo
     !  shape=[shx, shy, shz]
     !  write(*,*) shape, 'SHAPE'
 
@@ -66,6 +56,7 @@ subroutine geomcomplex_ellip(epsi,nxi,nxf,ny,nyi,nyf,nzi,nzf,dx,yp,remp)
     ! Intitialise epsi
     epsi(:,:,:)=zero
 
+    
 
     ! Update center of moving ellipsoid
     ! if (t.ne.0.) then
@@ -88,21 +79,56 @@ subroutine geomcomplex_ellip(epsi,nxi,nxf,ny,nyi,nyf,nzi,nzf,dx,yp,remp)
     zm=(real(k-1,mytype))*dz
     ! write(*,*) k, zm
         do j=nyi,nyf
+        ! ym=(real(j-1,mytype))*dy
         ym=yp(j)
+        if (ym /= ym) then
+            write(*,*) "ym = ", ym, " should be ", ((real(j-1,mytype))*dy), ", as j = ", j,  " (or maybe it should be ", ((real(j-1,mytype))*dy)/real(nraf,mytype), ")"
+            if (j.lt.(nyraf)) then 
+                write(*,*) "yp(j-1) = ", yp(j-1), " yp(j+1) = ", yp(j+1)
+            endif
+        endif
+
         do i=nxi,nxf
             xm=real(i-1,mytype)*dx
             point=[xm, ym, zm]
             ! call EllipsoidalRadius(point, position, orientation, shape, r)
-            call is_inside_ellipsoid(point, position, orientation, shape, ra, zeromach, is_inside)
-            !  r=sqrt_prec((xm-cexx)**two+(ym-ceyy)**two+(zm-cezz)**two)
-            !  r=sqrt_prec((xm-cexx)**two+(ym-ceyy)**two)
+            do i_body = 1,nbody
+                if (cube_flag.eq.0) then 
+                    ! if ((nrank.eq.0).and.(torq_debug.eq.1)) then 
+                    !     write(*,*) "Point, position, orientation, shape for body", i_body, " = "
+                    !     write(*,*) point,position(i_body,:),orientation(i_body,:),shape(i_body,:)
+                    ! endif
+                    call EllipsoidalRadius(point,position(i_body,:),orientation(i_body,:),shape(i_body,:),r)
+                    ! if (r /= r) then 
+                    !     write(*,*) "Point, position, orientation, shape for body", i_body, " = "
+                    !     write(*,*) point,position(i_body,:),orientation(i_body,:),shape(i_body,:)
+                    !     write(*,*) "R calculated = ", r
+                    !     write(*,*) "Timestep = ", itime
+                    ! endif
+
+                    is_inside = (r-ra(i_body)).lt.zeromach
+                    ! if (is_inside) then 
+                    !     call EllipsoidalRadius_debug(point,position(i_body,:),orientation(i_body,:),shape(i_body,:),r)
+                    ! endif
+                    if (ra(i_body) /= ra(i_body)) then
+                        write(*,*) "Nrank = ", nrank
+                        write(*,*) "Point = ", point
+                    endif
+                else if (cube_flag.eq.1) then
+                    is_inside = (abs(xm-position(i_body,1)).lt.ra(i_body)).and.(abs(ym-position(i_body,2)).lt.ra(i_body)).and.(abs(zm-position(i_body,3)).lt.ra(i_body))
+                endif
+                !  r=sqrt_prec((xm-cexx)**two+(ym-ceyy)**two+(zm-cezz)**two)
+                !  r=sqrt_prec((xm-cexx)**two+(ym-ceyy)**two)
+                if (is_inside) then
+                    !  write(*,*) i, j, k
+                    epsi(i,j,k)=remp
+                    cycle
+                endif
+            enddo 
+            ! write(*,*) is_inside
 
-            if (.not.is_inside) then
-                !  write(*,*) i, j, k
-                cycle
-            endif
             !  write(*,*) i, j, k, zm
-            epsi(i,j,k)=remp
+            ! epsi(i,j,k)=remp
             !  write(*,*) remp
         enddo
         enddo
@@ -138,9 +164,15 @@ subroutine inflow (phi)
 
     implicit none
 
-    integer  :: j,k,is
+    integer  :: i,j,k,is
     real(mytype),dimension(xsize(1),xsize(2),xsize(3),numscalar) :: phi
 
+    if ((shear_flow_ybc.eq.1).or.(shear_flow_zbc.eq.1)) then 
+        u1 = 0.0_mytype
+        u2 = 0.0_mytype
+    endif
+
+
     !call random_number(bxo)
     !call random_number(byo)
     !call random_number(bzo)
@@ -152,6 +184,33 @@ subroutine inflow (phi)
         enddo
     enddo
 
+    if (shear_flow_ybc.eq.1) then 
+        do k=1,xsize(3)
+            do i=1,xsize(1)
+                byxn(i,k)=+shear_velocity
+            enddo
+        enddo
+        do k=1,xsize(3)
+            do i=1,xsize(1)
+                byx1(i,k)=-shear_velocity
+            enddo 
+        enddo 
+    endif   
+
+    if (shear_flow_zbc.eq.1) then
+        do j=1,xsize(2)
+            do i=1,xsize(1)
+                bzxn(i,j)=+shear_velocity
+            enddo
+        enddo
+        do j=1,xsize(2)
+            do i=1,xsize(1)
+                bzx1(i,j)=-shear_velocity
+            enddo 
+        enddo 
+    endif   
+
+
     if (iscalar.eq.1) then
         do is=1, numscalar
         do k=1,xsize(3)
@@ -245,7 +304,7 @@ subroutine init_ellip (ux1,uy1,uz1,phi1)
     USE MPI
     USE ibm_param
     use dbg_schemes, only: exp_prec
-    use ellipsoid_utils, only: NormalizeQuaternion,ellipInertiaCalculate
+    use ellipsoid_utils, only: NormalizeQuaternion,ellipInertiaCalculate,ellipMassCalculate
 
 
     implicit none
@@ -253,56 +312,76 @@ subroutine init_ellip (ux1,uy1,uz1,phi1)
     real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: ux1,uy1,uz1
     real(mytype),dimension(xsize(1),xsize(2),xsize(3),numscalar) :: phi1
 
-    real(mytype) :: y,um,eqr
-    integer :: k,j,i,ii,is,code
+    real(mytype) :: y,um,eqr,ym
+    integer :: k,j,i,ii,is,code,jj
+
+    ! write(*,*) 'INSIDE INIT ELLIP'
 
-    eqr=(shx*shy*shz)**(1.0/3.0)
-    shape=[shx/eqr,shy/eqr,shz/eqr]
+    ! eqr=(sh(1)*sh(2)*sh(3))**(1.0/3.0)
+    ! shape=sh(:)/eqr
 
-    orientation=[oriw,orii,orij,orik]
-    call NormalizeQuaternion(orientation)
-    position=[cex,cey,cez]
-    linearVelocity=[lvx,lvy,lvz]
-    angularVelocity=[zero,avx,avy,avz]
-    call ellipInertiaCalculate(shape,rho_s,inertia)
+    ! orientation=ori
+    ! call NormalizeQuaternion(orientation)
+    ! position=ce
+    ! linearVelocity=lv
+    ! angularVelocity=[zero, av(1), av(2), av(3)]
+    ! call ellipInertiaCalculate(shape,rho_s,inertia)
+    ! call ellipMassCalculate(shape,rho_s,ellip_m)
     
-    if (nrank==0) then 
-        write(*,*) 'set shape              = ', shape
-        write(*,*) 'set orientation        = ', orientation
-        write(*,*) 'set position           = ', position
-        write(*,*) 'set linear velocity    = ', linearVelocity
-        write(*,*) 'set angular velocity   = ', angularVelocity
-        write(*,*) 'set moment of inertia  = ', inertia
-        write(*,*) 'density of solid       = ', rho_s
-    end if
+    ! if (nrank==0) then 
+    !     write(*,*) 'set shape             = ', shape
+    !     write(*,*) 'set orientation       = ', orientation
+    !     write(*,*) 'set position          = ', position
+    !     write(*,*) 'set linear velocity   = ', linearVelocity
+    !     write(*,*) 'set angular velocity  = ', angularVelocity
+    !     write(*,*) 'set moment of inertia = ', inertia
+    !     write(*,*) 'density of solid      = ', rho_s
+    ! end if
 
     if (iscalar==1) then
 
         phi1(:,:,:,:) = zero !change as much as you want
 
     endif
+    ! if (shear_flow_ybc.eq.1) then 
+    !     do i=1,xsize(1)
+    !         do j=1,xsize(2)
+    !             jj=j+xstart(2)-1
+    !             ym=real(jj)*dy
+    !             do k=1,xsize(3)
+    !                 ux1(i,j,k)=real((jj-(ny/2)))/(yly/2.0)*shear_velocity
+    !             enddo
+    !         enddo
+    !     enddo
+    ! else 
+        ux1=zero;
+    ! endif
 
-    ux1=zero; uy1=zero; uz1=zero
+    
+    uy1=zero; uz1=zero
 
     if (iin.ne.0) then
         call system_clock(count=code)
         if (iin.eq.2) code=0
-        call random_seed(size = ii)
-        call random_seed(put = code+63946*(nrank+1)*(/ (i - 1, i = 1, ii) /))
-
-        call random_number(ux1)
-        call random_number(uy1)
-        call random_number(uz1)
-
-        do k=1,xsize(3)
-        do j=1,xsize(2)
-            do i=1,xsize(1)
-                ux1(i,j,k)=init_noise*(ux1(i,j,k)-0.5)
-                uy1(i,j,k)=init_noise*(uy1(i,j,k)-0.5)
-                uz1(i,j,k)=init_noise*(uz1(i,j,k)-0.5)
-            enddo
-        enddo
-        enddo
+        
+	if (init_noise.gt.0.001) then 
+	   call random_seed(size = ii)
+           call random_seed(put = code+63946*(nrank+1)*(/ (i - 1, i = 1, ii) /))
+
+           call random_number(ux1)
+           call random_number(uy1)
+           call random_number(uz1)
+
+           do k=1,xsize(3)
+           do j=1,xsize(2)
+               do i=1,xsize(1)
+                   ux1(i,j,k)=init_noise*(ux1(i,j,k)-0.5)
+                   uy1(i,j,k)=init_noise*(uy1(i,j,k)-0.5)
+                   uz1(i,j,k)=init_noise*(uz1(i,j,k)-0.5)
+               enddo
+           enddo
+           enddo
+        endif
 
         !modulation of the random noise
         do k=1,xsize(3)
@@ -325,6 +404,12 @@ subroutine init_ellip (ux1,uy1,uz1,phi1)
         do i=1,xsize(1)
             ux1(i,j,k)=ux1(i,j,k)+u1
             uy1(i,j,k)=uy1(i,j,k)
+            if (shear_flow_ybc.eq.1) then 
+                ux1(i,j,k)=ux1(i,j,k)+((j+xstart(2)-1-1)*dy-yly/2.)/(yly/2.0)*shear_velocity
+            endif
+            if (shear_flow_zbc.eq.1) then
+                ux1(i,j,k)=ux1(i,j,k)+((k+xstart(3)-1-1)*dz-zlz/2.)/(zlz/2.0)*shear_velocity
+            endif
             uz1(i,j,k)=uz1(i,j,k)
         enddo
         enddo
@@ -450,4 +535,4 @@ subroutine visu_ellip(ux1, uy1, uz1, pp3, phi1, ep1, num)
 end subroutine visu_ellip
 
 end module ellip
-  
\ No newline at end of file
+  
diff --git a/src/case.f90 b/src/case.f90
index a089f0d8b3e34b7112b705b4783b1f0986fdbb12..e613c258845c6a914ab76ef2dd2a74e66ef8a5df 100644
--- a/src/case.f90
+++ b/src/case.f90
@@ -307,6 +307,7 @@ contains
     real(mytype),dimension(xsize(1),xsize(2),xsize(3),nrhotime) :: rho
     real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: ep
     real(mytype), dimension(ph1%zst(1):ph1%zen(1), ph1%zst(2):ph1%zen(2), nzmsize, npress), intent(in) :: pp
+    real(mytype)      :: dummy1,dummy2,dummy3
 
     if (itype.eq.itype_user) then
 
@@ -367,7 +368,8 @@ contains
     endif
 
     if (iforces.eq.1) then
-       call force(ux,uy,ep)
+      !  write(*,*) "Calling force from case"
+      !  call force(ux,uy,uz,ep,dummy1,dummy2,dummy3,0)
        call restart_forces(1)
     endif
 
diff --git a/src/ellip_utils.f90 b/src/ellip_utils.f90
index 1e5ab49b00ece7ae3121f7e2e1223ab5ec001271..deaa590cdcd1bdf09baba2a91a0ba8048579dde9 100644
--- a/src/ellip_utils.f90
+++ b/src/ellip_utils.f90
@@ -7,6 +7,7 @@ module ellipsoid_utils
     use decomp_2d, only: mytype
     use param, only: zero, one, two
     use dbg_schemes, only: sqrt_prec, cos_prec, exp_prec, sin_prec
+    use ibm_param
     
     implicit none
     ! public QuatRot, cross, IsoKernel, AnIsoKernel, int2str
@@ -133,15 +134,18 @@ contains
 
 
     subroutine NormalizeQuaternion(quaternion) 
-      real(mytype), intent(in) :: quaternion(4)
+      real(mytype), intent(inout) :: quaternion(4)
       real(mytype) :: normalizedQuaternion(4)
     
       ! Compute the magnitude of the quaternion
       real(mytype) :: magnitude
       magnitude = sqrt(quaternion(1)**2 + quaternion(2)**2 + quaternion(3)**2 + quaternion(4)**2)
-    
+      if (magnitude < 0.0001) then
+        magnitude = one
+        write(*,*) "Tried to normalize a zero quaternion"
+      endif
       ! Normalize the quaternion
-      normalizedQuaternion = quaternion / magnitude
+      quaternion = quaternion / magnitude
     
     end subroutine NormalizeQuaternion
 
@@ -213,10 +217,65 @@ contains
          scaled_point(i)=rotated_point(i)/shape(i)
       end do 
 
-      radius=sqrt_prec(scaled_point(1)**two+scaled_point(2)**two+scaled_point(3)**two)
+      radius=sqrt_prec(scaled_point(1)**2+scaled_point(2)**2+scaled_point(3)**2)
+
+      ! if (radius /= radius) then
+      !     write(*,*) "Got an error in grid check!"
+      !     write(*,*) "Radius = ", radius
+      !     write(*,*) "point = ", point
+      !     write(*,*) "Body centre = ", centre
+      !     write(*,*) "Translated point = ", trans_point
+      !     write(*,*) "Orientation = ", orientation
+      !     write(*,*) "Rotated point = ", rotated_point
+      !     write(*,*) "Scaled Point = ", scaled_point
+      ! endif
 
    end subroutine    
 
+   subroutine EllipsoidalRadius_debug(point, centre, orientation, shape, radius)
+    real(mytype), intent(in) :: point(3), centre(3), orientation(4), shape(3)
+    real(mytype), intent(out) :: radius
+    real(mytype) :: trans_point(3),rotated_point(3),scaled_point(3), orientation_c(4)
+    integer :: i
+
+    !translate point to body frame
+    trans_point = point-centre
+
+    write(*,*) "Translated point = ", trans_point
+
+    write(*,*) "Orientation = ", orientation
+
+    call QuaternionConjugate(orientation, orientation_c)
+
+    write(*,*) "Orientation inverse = ", orientation_c
+
+    !rotate point into body frame (using inverse(conjugate) of orientation)
+    call RotatePoint(trans_point, orientation, rotated_point)
+
+    write(*,*) "Rotated point = ", rotated_point
+    do i = 1,3
+       scaled_point(i)=rotated_point(i)/shape(i)
+    end do 
+
+    write(*,*) "Scaled point  = ", scaled_point
+
+    radius=sqrt_prec(scaled_point(1)**2+scaled_point(2)**2+scaled_point(3)**2)
+
+    write(*,*) "Radius = ", radius
+
+    ! if (radius /= radius) then
+    !     write(*,*) "Got an error in grid check!"
+    !     write(*,*) "Radius = ", radius
+    !     write(*,*) "point = ", point
+    !     write(*,*) "Body centre = ", centre
+    !     write(*,*) "Translated point = ", trans_point
+    !     write(*,*) "Orientation = ", orientation
+    !     write(*,*) "Rotated point = ", rotated_point
+    !     write(*,*) "Scaled Point = ", scaled_point
+    ! endif
+
+ end subroutine    
+
   subroutine CrossProduct(a, b, result)
     real(mytype), intent(in) :: a(3), b(3)
     real(mytype), intent(inout) :: result(3)
@@ -226,8 +285,8 @@ contains
     result(3) = a(1) * b(2) - a(2) * b(1)
   end subroutine CrossProduct
   
-  subroutine CalculatePointVelocity(point, center, linearVelocity, angularVelocity, pointVelocity)
-    real(mytype), intent(in) :: point(3), center(3), linearVelocity(3), angularVelocity(3)
+  subroutine CalculatePointVelocity(point, center, angularVelocity, linearVelocity, pointVelocity)
+    real(mytype), intent(in) :: point(3), center(3), linearVelocity(3), angularVelocity(4)
     real(mytype), intent(out) :: pointVelocity(3)
     real(mytype) :: crossed(3)
     ! Compute the distance vector from the center to the point
@@ -236,28 +295,51 @@ contains
   
     ! Compute the cross product of angular velocity and distance vector
     
-    call CrossProduct(angularVelocity, distance, crossed)
+    call CrossProduct(distance, angularVelocity(2:4), crossed)
+
   
     ! Calculate the velocity at the point
     pointVelocity = crossed + linearVelocity
   end subroutine CalculatePointVelocity
 
+  subroutine CalculatePointVelocity_Multi(point, pointVelocity)
+    real(mytype), intent(in) :: point(3)
+    real(mytype), intent(out):: pointVelocity(3)
+    real(mytype)             :: radii(10),r
+    integer                  :: i,i_closest
+ 
+    radii(:) = 10000000.
+    do i = 1,nbody
+      call EllipsoidalRadius(point, position(i,:), orientation(i,:), shape(i,:), r)
+      radii(i) = r
+    enddo
+    i_closest=1
+    if (nbody.gt.1) then 
+      do i = 2,nbody
+        if (radii(i) < radii(i_closest)) then 
+          i_closest=i
+        endif
+      enddo
+    endif
+
+    call CalculatePointVelocity(point, position(i_closest,:), angularVelocity(i_closest,:), linearVelocity(i_closest, :), pointVelocity)
+    end subroutine
+
   subroutine is_inside_ellipsoid(point, centre, orientation, shape, ra, zeromach, is_inside)
     real(mytype), intent(in) :: point(3), centre(3), orientation(4), shape(3), ra, zeromach
     logical,intent(out)      :: is_inside
     real(mytype)             :: r
 
     call EllipsoidalRadius(point,centre,orientation,shape,r)
-
+    
     is_inside = ((r-ra).lt.zeromach)
 
   end subroutine is_inside_ellipsoid
 
 
-  subroutine navierFieldGen(center, linearVelocity, angularVelocity, ep1, ep1_x, ep1_y, ep1_z)
+  subroutine navierFieldGen(ep1, ep1_x, ep1_y, ep1_z)
     use param
     use decomp_2d
-    real(mytype), intent(in) :: center(3), linearVelocity(3), angularVelocity(3)
     real(mytype), dimension(xsize(1),xsize(2),xsize(3)), intent(in) :: ep1
     real(mytype),dimension(xsize(1),xsize(2),xsize(3)),intent(out) :: ep1_x, ep1_y, ep1_z
     real(mytype) :: xm, ym, zm, point(3), x_pv, y_pv, z_pv, pointVelocity(3)
@@ -271,7 +353,7 @@ contains
           xm=real(i+xstart(1)-2, mytype)*dx
           point=[xm,ym,zm]
           if (ep1(i,j,k).eq.1) then 
-            call CalculatePointVelocity(point, center, linearVelocity, angularVelocity, pointVelocity)
+            call CalculatePointVelocity_Multi(point, pointVelocity)
             x_pv=pointVelocity(1)
             y_pv=pointVelocity(2)
             z_pv=pointVelocity(3)
@@ -314,11 +396,11 @@ contains
 
     end subroutine lab_to_body
 
-    subroutine omega_stepper(omega_n, ang_accel, dt, omega_n1)
-      real(mytype),intent(in) :: omega_n(4), ang_accel(4), dt
+    subroutine omega_stepper(omega_n, ang_accel, time_step, omega_n1)
+      real(mytype),intent(in) :: omega_n(4), ang_accel(4), time_step
       real(mytype),intent(out):: omega_n1(4)
       
-      omega_n1 = omega_n + ang_accel * dt
+      omega_n1 = omega_n + ang_accel * time_step
 
     end subroutine omega_stepper
 
@@ -329,9 +411,9 @@ contains
       real(mytype)            :: mag, re_part, im_sc, im_part(3), omega_n1(4)
 
       call QuaternionNorm(omega_q, mag)
-      re_part=cos_prec(mag*dt*half)
+      re_part=cos_prec(mag*time_step*half)
       if (mag.gt.zero) then
-        im_sc = sin_prec(mag*dt*half)/mag
+        im_sc = sin_prec(mag*time_step*half)/mag
       else 
         im_sc = zero
       endif
@@ -342,6 +424,85 @@ contains
 
     end subroutine orientation_stepper
 
+    SUBROUTINE ConvertToMovingRotatingFrame(vI, positionI, originO, vO, Omega, vR)
+      IMPLICIT NONE
+    
+      ! Arguments:
+      ! vI       : Velocity in the inertial frame (3-element array)
+      ! positionI: Position in the inertial frame (3-element array)
+      ! originO  : Position of the origin of the rotating frame in the inertial frame (3-element array)
+      ! vO       : Linear velocity of the origin of the rotating frame
+      ! Omega    : Angular velocity of the rotating frame (3-element array)
+      ! vR       : Velocity in the moving and rotating frame (Output, 3-element array)
+    
+      real(mytype), INTENT(IN)  :: vI(3), positionI(3), originO(3), vO(3), Omega(3)
+      real(mytype), INTENT(OUT) :: vR(3)
+      real(mytype) :: r(3), crossProduct_v(3)
+    
+      ! Compute r = positionI - originO
+      r(1) = positionI(1) - originO(1)
+      r(2) = positionI(2) - originO(2)
+      r(3) = positionI(3) - originO(3)
+    
+      ! Compute Omega x r (cross product)
+      crossProduct_v(1) = Omega(2)*r(3) - Omega(3)*r(2)
+      crossProduct_v(2) = Omega(3)*r(1) - Omega(1)*r(3)
+      crossProduct_v(3) = Omega(1)*r(2) - Omega(2)*r(1)
+    
+      ! Compute vR = vI - vO - Omega x r
+      vR(1) = vI(1) - vO(1) - crossProduct_v(1)
+      vR(2) = vI(2) - vO(2) - crossProduct_v(2)
+      vR(3) = vI(3) - vO(3) - crossProduct_v(3)
+    
+    END SUBROUTINE ConvertToMovingRotatingFrame
+
+    subroutine coriolis_force(omega, vr, fcoriolis)
+      implicit none
+        
+      ! Arguments:
+      ! omega      : Angular velocity of the rotating frame (3-element array)
+      ! vr         : Velocity in the rotating frame (3-element array)
+      ! fcoriolis  : Coriolis force (Output, 3-element array)
+    
+      real(mytype), intent(in)  :: omega(3), vr(3)
+      real(mytype), intent(out) :: fcoriolis(3)
+    
+      ! Compute 2 * omega x vr (cross product)
+      fcoriolis(1) = 2.0_mytype * (omega(2)*vr(3) - omega(3)*vr(2))
+      fcoriolis(2) = 2.0_mytype * (omega(3)*vr(1) - omega(1)*vr(3))
+      fcoriolis(3) = 2.0_mytype * (omega(1)*vr(2) - omega(2)*vr(1))
+    
+    end subroutine coriolis_force
+
+    subroutine centrifugal_force(omega, r, fcentrifugal)
+      implicit none
+    
+      ! Parameters:
+      integer, parameter :: mytype = selected_real_kind(p=15) ! Assuming double precision
+    
+      ! Arguments:
+      ! omega        : Angular velocity of the rotating frame (3-element array)
+      ! r            : Position vector in the rotating frame (3-element array)
+      ! fcentrifugal : Centrifugal force (Output, 3-element array)
+    
+      real(mytype), intent(in)  :: omega(3), r(3)
+      real(mytype), intent(out) :: fcentrifugal(3)
+      real(mytype) :: cross_product_omega_r(3)
+    
+      ! Compute omega x r (cross product)
+      cross_product_omega_r(1) = omega(2)*r(3) - omega(3)*r(2)
+      cross_product_omega_r(2) = omega(3)*r(1) - omega(1)*r(3)
+      cross_product_omega_r(3) = omega(1)*r(2) - omega(2)*r(1)
+    
+      ! Compute fcentrifugal = -omega x (omega x r)
+      fcentrifugal(1) = -(omega(2)*cross_product_omega_r(3) - omega(3)*cross_product_omega_r(2))
+      fcentrifugal(2) = -(omega(3)*cross_product_omega_r(1) - omega(1)*cross_product_omega_r(3))
+      fcentrifugal(3) = -(omega(1)*cross_product_omega_r(2) - omega(2)*cross_product_omega_r(1))
+    
+    end subroutine centrifugal_force
+    
+    
+    
     subroutine invert_3x3_matrix(matrix, inverse)
       real(mytype), intent(in) :: matrix(3, 3)
       real(mytype), intent(out) :: inverse(3, 3)
@@ -389,7 +550,7 @@ contains
     subroutine accel_get(omega, inertia, torque_b, ang_accel)
       real(mytype),intent(in)  :: omega(4),inertia(3,3),torque_b(4) 
       real(mytype),intent(out) :: ang_accel(4)
-      real(mytype)             :: inertia_inv(3,3),omega_v(3),torque_v(3),test(3),crossed(3)
+      real(mytype)             :: inertia_inv(3,3),omega_v(3),torque_v(3),test(3),crossed(3),ang_accel_v(3)
 
       ! write(*,*) 'inverting ', inertia
       call invert_3x3_matrix(inertia,inertia_inv)
@@ -397,7 +558,9 @@ contains
       torque_v=torque_b(2:4)
       call matrix_vector_multiply(inertia,omega_v,test)
       call CrossProduct(omega_v,test,crossed)
-      call matrix_vector_multiply(inertia_inv,(torque_v-crossed),ang_accel)
+      call matrix_vector_multiply(inertia_inv,(torque_v-crossed),ang_accel_v)
+      ang_accel(:)=0_mytype
+      ang_accel(2:4)=ang_accel_v(1:3)
 
     end subroutine accel_get
 
@@ -459,10 +622,10 @@ contains
 
     end subroutine ang_full_step
 
-    subroutine ang_step(q,omega_q,torque_vec,time_step,q1,omega1)
+    subroutine ang_step(q,omega_q,torque_vec,inertia,time_step,q1,omega1)
       use param
-      use ibm_param
-      real(mytype),intent(in) :: q(4),omega_q(4),torque_vec(3),time_step
+      ! use ibm_param, only: inertia
+      real(mytype),intent(in) :: q(4),omega_q(4),torque_vec(3),inertia(3,3),time_step
       real(mytype),intent(out):: q1(4),omega1(4)
       real(mytype)            :: torque_q(4),omega_b(4),torque_b(4),omega_half_b(4),omega1_b(4)
       real(mytype)            :: ang_accel_b(4), omega_half(4)
@@ -494,10 +657,13 @@ contains
     end subroutine ang_step
 
 
-    subroutine lin_step(position,linearVelocity,linearAcceleration,time_step,position_1,linearVelocity_1)
-      real(mytype),intent(in)   :: position(3),linearVelocity(3),linearAcceleration(3),time_step
+    subroutine lin_step(position,linearVelocity,linearForce,ellip_m,time_step,position_1,linearVelocity_1)
+      ! use ibm_param, only: ellip_m
+      real(mytype),intent(in)   :: position(3),linearVelocity(3),linearForce(3),ellip_m,time_step
       real(mytype),intent(out)  :: position_1(3),linearVelocity_1(3)
+      real(mytype)              :: linearAcceleration(3)
 
+      linearAcceleration(:) = linearForce(:) / ellip_m
       position_1(:) = position(:) + time_step*linearVelocity(:)
       linearVelocity_1 = linearVelocity(:) + time_step*linearAcceleration(:)
 
@@ -538,5 +704,38 @@ contains
       inertia(3,3)=i3
 
     end subroutine ellipInertiaCalculate
+
+    subroutine ibm_bcimp_calc(pointVelocity, lind, bcimp)
+      real(mytype), intent(in)  :: pointVelocity(3)
+      integer, intent(in)       :: lind
+      real(mytype), intent(out) :: bcimp
+      real(mytype)              :: x_pv, y_pv, z_pv
+
+      x_pv=pointVelocity(1)
+      y_pv=pointVelocity(2)
+      z_pv=pointVelocity(3)
+      if (lind.eq.0) then
+        bcimp=zero
+      elseif (lind.eq.1) then
+        bcimp=x_pv
+        ! write(*,*) bcimp
+      elseif (lind.eq.2) then
+        bcimp=y_pv
+      elseif (lind.eq.3) then
+        bcimp=z_pv
+      elseif (lind.eq.4) then 
+        bcimp=x_pv*x_pv
+      elseif (lind.eq.5) then
+        bcimp=y_pv*y_pv
+      elseif (lind.eq.6) then 
+        bcimp=z_pv*z_pv
+      elseif (lind.eq.7) then
+        bcimp=x_pv*y_pv 
+      elseif (lind.eq.8) then 
+        bcimp=x_pv*z_pv
+      elseif (lind.eq.9) then
+        bcimp=y_pv*z_pv
+      endif
+    end subroutine
       
 end module ellipsoid_utils
diff --git a/src/forces.f90 b/src/forces.f90
index 2d0952dfadc2d122a42dde29dc3d6c882584a253..316f367de15034955401013e6fdb952f0e6f17c1 100644
--- a/src/forces.f90
+++ b/src/forces.f90
@@ -17,11 +17,13 @@ module forces
   implicit none
 
   integer :: nvol,iforces
-  real(mytype),save,allocatable,dimension(:,:,:) :: ux01, uy01, ux11, uy11, ppi1
-  real(mytype),allocatable,dimension(:) :: xld,xrd,yld,yud
-  integer,allocatable,dimension(:) :: icvlf,icvrt,jcvlw,jcvup
-  integer,allocatable,dimension(:) :: icvlf_lx,icvrt_lx,icvlf_ly,icvrt_ly
-  integer,allocatable,dimension(:) :: jcvlw_lx,jcvup_lx,jcvlw_ly,jcvup_ly
+  real(mytype),save,allocatable,dimension(:,:,:) :: ux01, uy01, ux11, uy11, ppi1, uz01, uz11
+  real(mytype),allocatable,dimension(:) :: xld, xrd, yld, yud, zld, zrd, xld2, xrd2, yld2, yud2, zld2, zrd2
+  integer,allocatable,dimension(:) :: icvlf,icvrt,jcvlw,jcvup,zcvlf,zcvrt
+  integer,allocatable,dimension(:) :: icvlf_lx, icvrt_lx, icvlf_ly, icvrt_ly, icvlf_lz, icvrt_lz
+  integer,allocatable,dimension(:) :: jcvlw_lx, jcvup_lx, jcvlw_ly, jcvup_ly, jcvlw_lz, jcvup_lz
+  integer,allocatable,dimension(:) :: zcvlf_lx, zcvrt_lx, zcvlf_ly, zcvrt_ly, zcvlf_lz, zcvrt_lz
+  
 
   character(len=*), parameter :: io_restart_forces = "restart-forces-io", &
        resfile = "restart-forces"
@@ -38,25 +40,52 @@ contains
 
     integer :: iv,stp1,stp2,h
 
+   !  write(*,*) 'Inside INIT_FORCES'
+
     call alloc_x(ux01)
     call alloc_x(uy01)
     call alloc_x(ux11)
     call alloc_x(uy11)
     call alloc_x(ppi1)
+    call alloc_x(uz01)
+    call alloc_x(uz11)
 
     ux01 = zero
     uy01 = zero
     ux11 = zero
     uy11 = zero
-
-    allocate(icvlf(nvol),icvrt(nvol),jcvlw(nvol),jcvup(nvol))
-    allocate(icvlf_lx(nvol),icvrt_lx(nvol),icvlf_ly(nvol),icvrt_ly(nvol))
-    allocate(jcvlw_lx(nvol),jcvup_lx(nvol),jcvlw_ly(nvol),jcvup_ly(nvol))
-
+    uz01 = zero
+    uz11 = zero    
+
+   !  write(*,*) 'Alloc_x called'
+
+    allocate(icvlf(nvol), icvrt(nvol), jcvlw(nvol), jcvup(nvol), zcvlf(nvol), zcvrt(nvol))
+    allocate(icvlf_lx(nvol), icvrt_lx(nvol), icvlf_ly(nvol), icvrt_ly(nvol), icvlf_lz(nvol), icvrt_lz(nvol))
+    allocate(jcvlw_lx(nvol), jcvup_lx(nvol), jcvlw_ly(nvol), jcvup_ly(nvol), jcvlw_lz(nvol), jcvup_lz(nvol))
+    allocate(zcvlf_lx(nvol), zcvrt_lx(nvol), zcvlf_ly(nvol), zcvrt_ly(nvol), zcvlf_lz(nvol), zcvrt_lz(nvol))
+    allocate(xld2(nvol), xrd2(nvol), yld2(nvol), yud2(nvol), zld2(nvol), zrd2(nvol))
+
+   !  write(*,*) 'allocate called'
+
+   !  if ((iibm.ne.0).and.(t.ne.0.)) then
+   !    xld2(:) = xld(:) + (t-ifirst*dt)*ubcx
+   !    xrd2(:) = xrd(:) + (t-ifirst*dt)*ubcx
+   !    yld2(:) = yld(:) + (t-ifirst*dt)*ubcy
+   !    yud2(:) = yud(:) + (t-ifirst*dt)*ubcy
+   ! else
+      xld2(:) = xld(:)
+      xrd2(:) = xrd(:)
+      yld2(:) = yld(:)
+      yud2(:) = yud(:)
+      zld2(:) = zld(:)
+      zrd2(:) = zrd(:)
+   ! endif
+  
     !     Definition of the Control Volume
     !*****************************************************************
     !! xld,xrd,yld,yud: limits of control volume (!!don't use cex and cey anymore!!)
 
+
     do iv=1,nvol
        ! ok for istret=0 (!!to do for istret=1!!)
        icvlf(iv) = nint(xld(iv)/dx)+1
@@ -82,10 +111,24 @@ contains
        icvrt_lx(iv) = icvrt(iv)
        jcvlw_lx(iv) = max(jcvlw(iv)+1-xstart(2),1)
        jcvup_lx(iv) = min(jcvup(iv)+1-xstart(2),xsize(2))
+       jcvlw_lz(iv) = max(jcvlw(iv)+1-zstart(2),1)
+       jcvup_lz(iv) = min(jcvup(iv)+1-zstart(2),zsize(2))       
+  
        icvlf_ly(iv) = max(icvlf(iv)+1-ystart(1),1)
        icvrt_ly(iv) = min(icvrt(iv)+1-ystart(1),ysize(1))
+       icvlf_lz(iv) = max(icvlf(iv)+1-zstart(1),1)
+       icvrt_lz(iv) = min(icvrt(iv)+1-zstart(1),zsize(1))   
        jcvlw_ly(iv) = jcvlw(iv)
        jcvup_ly(iv) = jcvup(iv)
+
+       zcvlf(iv) = nint(zld(iv)/dz)+1
+       zcvrt(iv) = nint(zrd(iv)/dz)+1
+       zcvlf_lx(iv) = max(zcvlf(iv)+1-xstart(3),1)
+       zcvrt_lx(iv) = min(zcvrt(iv)+1-xstart(3),xsize(3)) 
+       zcvlf_ly(iv) = max(zcvlf(iv)+1-ystart(3),1)
+       zcvrt_ly(iv) = min(zcvrt(iv)+1-ystart(3),ysize(3))       
+       zcvlf_lz(iv) = zcvlf(iv)
+       zcvrt_lz(iv) = zcvrt(iv) 
     enddo
 
     if (nrank==0) then
@@ -104,6 +147,8 @@ contains
           write(*,"('     xrd, icvrt     : (',F6.2,',',I6,')')") xrd(iv), icvrt(iv)
           write(*,"('     yld, jcvlw     : (',F6.2,',',I6,')')") yld(iv), jcvlw(iv)
           write(*,"('     yud, jcvup     : (',F6.2,',',I6,')')") yud(iv), jcvup(iv)
+          write(*,"('     zld, zcvlf     : (',F6.2,',',I6,')')") zld(iv), zcvlf(iv)
+          write(*,"('     zrd, zcvrt     : (',F6.2,',',I6,')')") zrd(iv), zcvrt(iv)      
        enddo
        write(*,*) '==========================================================='
     endif
@@ -113,9 +158,153 @@ contains
     call decomp_2d_register_variable(io_restart_forces, "uy01", 1, 0, 0, mytype)
     call decomp_2d_register_variable(io_restart_forces, "ux11", 1, 0, 0, mytype)
     call decomp_2d_register_variable(io_restart_forces, "uy11", 1, 0, 0, mytype)
-    
+    call decomp_2d_register_variable(io_restart_forces, "uz01", 1, 0, 0, mytype)
+    call decomp_2d_register_variable(io_restart_forces, "uz11", 1, 0, 0, mytype)
+
   end subroutine init_forces
 
+  subroutine update_forces
+
+   USE decomp_2d
+   USE decomp_2d_io, only : decomp_2d_register_variable, decomp_2d_init_io
+   USE param
+   USE variables
+   implicit none
+
+   integer :: iv,stp1,stp2,h
+
+!   !  write(*,*) 'Inside INIT_FORCES'
+
+!    call alloc_x(ux01)
+!    call alloc_x(uy01)
+!    call alloc_x(ux11)
+!    call alloc_x(uy11)
+!    call alloc_x(ppi1)
+!    call alloc_x(uz01)
+!    call alloc_x(uz11)
+
+!    ux01 = zero
+!    uy01 = zero
+!    ux11 = zero
+!    uy11 = zero
+!    uz01 = zero
+!    uz11 = zero    
+
+!   !  write(*,*) 'Alloc_x called'
+
+!    allocate(icvlf(nvol), icvrt(nvol), jcvlw(nvol), jcvup(nvol), zcvlf(nvol), zcvrt(nvol))
+!    allocate(icvlf_lx(nvol), icvrt_lx(nvol), icvlf_ly(nvol), icvrt_ly(nvol), icvlf_lz(nvol), icvrt_lz(nvol))
+!    allocate(jcvlw_lx(nvol), jcvup_lx(nvol), jcvlw_ly(nvol), jcvup_ly(nvol), jcvlw_lz(nvol), jcvup_lz(nvol))
+!    allocate(zcvlf_lx(nvol), zcvrt_lx(nvol), zcvlf_ly(nvol), zcvrt_ly(nvol))
+!    allocate(xld2(nvol), xrd2(nvol), yld2(nvol), yud2(nvol), zld2(nvol), zrd2(nvol))
+
+  !  write(*,*) 'allocate called'
+
+  !  if ((iibm.ne.0).and.(t.ne.0.)) then
+  !    xld2(:) = xld(:) + (t-ifirst*dt)*ubcx
+  !    xrd2(:) = xrd(:) + (t-ifirst*dt)*ubcx
+  !    yld2(:) = yld(:) + (t-ifirst*dt)*ubcy
+  !    yud2(:) = yud(:) + (t-ifirst*dt)*ubcy
+  ! else
+     xld2(:) = xld(:)
+     xrd2(:) = xrd(:)
+     yld2(:) = yld(:)
+     yud2(:) = yud(:)
+     zld2(:) = zld(:)
+     zrd2(:) = zrd(:)
+  ! endif
+ 
+   !     Definition of the Control Volume
+   !*****************************************************************
+   !! xld,xrd,yld,yud: limits of control volume (!!don't use cex and cey anymore!!)
+
+
+   do iv=1,nvol
+      ! ok for istret=0 (!!to do for istret=1!!)
+      icvlf(iv) = nint(xld(iv)/dx)+1
+      icvrt(iv) = nint(xrd(iv)/dx)+1
+      if (istret.eq.0) then 
+        jcvlw(iv) = nint(yld(iv)/dy)+1
+        jcvup(iv) = nint(yud(iv)/dy)+1
+      else
+        stp1=0
+        stp2=0
+        do h = 1, ny-1  
+          if ((-yp(h+1)-yp(h)+two*yld(iv)).lt.(yld(iv)-yp(h)).and.(stp1.eq.0)) then 
+            jcvlw(iv) = h+1
+            stp1=1
+          endif
+          if ((-yp(h+1)-yp(h)+two*yud(iv)).lt.(yud(iv)-yp(h)).and.(stp2.eq.0)) then
+            jcvup(iv) = h
+            stp2=1 
+          endif
+        enddo
+      endif
+      icvlf_lx(iv) = icvlf(iv)
+      icvrt_lx(iv) = icvrt(iv)
+      jcvlw_lx(iv) = max(jcvlw(iv)+1-xstart(2),1)
+      jcvup_lx(iv) = min(jcvup(iv)+1-xstart(2),xsize(2))
+      jcvlw_lz(iv) = max(jcvlw(iv)+1-zstart(2),1)
+      jcvup_lz(iv) = min(jcvup(iv)+1-zstart(2),zsize(2))       
+ 
+      icvlf_ly(iv) = max(icvlf(iv)+1-ystart(1),1)
+      icvrt_ly(iv) = min(icvrt(iv)+1-ystart(1),ysize(1))
+      icvlf_lz(iv) = max(icvlf(iv)+1-zstart(1),1)
+      icvrt_lz(iv) = min(icvrt(iv)+1-zstart(1),zsize(1))   
+      jcvlw_ly(iv) = jcvlw(iv)
+      jcvup_ly(iv) = jcvup(iv)
+
+      zcvlf(iv) = nint(zld(iv)/dz)+1
+      zcvrt(iv) = nint(zrd(iv)/dz)+1
+      zcvlf_lx(iv) = max(zcvlf(iv)+1-xstart(3),1)
+      zcvrt_lx(iv) = min(zcvrt(iv)+1-xstart(3),xsize(3)) 
+      zcvlf_ly(iv) = max(zcvlf(iv)+1-ystart(3),1)
+      zcvrt_ly(iv) = min(zcvrt(iv)+1-ystart(3),ysize(3))        
+   enddo
+
+   ! if (nrank==0) then
+   !    write(*,*) '========================Forces============================='
+   !    write(*,*) '                       (icvlf)      (icvrt) '
+   !    write(*,*) '                (jcvup) B____________C '
+   !    write(*,*) '                        \            \ '
+   !    write(*,*) '                        \     __     \ '
+   !    write(*,*) '                        \    \__\    \ '
+   !    write(*,*) '                        \            \ '
+   !    write(*,*) '                        \       CV   \ '
+   !    write(*,*) '                (jcvlw) A____________D '
+   !    do iv=1,nvol
+   !       write(*,"(' Control Volume     : #',I1)") iv
+   !       write(*,"('     xld, icvlf     : (',F6.2,',',I6,')')") xld(iv), icvlf(iv)
+   !       write(*,"('     xrd, icvrt     : (',F6.2,',',I6,')')") xrd(iv), icvrt(iv)
+   !       write(*,"('     yld, jcvlw     : (',F6.2,',',I6,')')") yld(iv), jcvlw(iv)
+   !       write(*,"('     yud, jcvup     : (',F6.2,',',I6,')')") yud(iv), jcvup(iv)
+   !       write(*,"('     zld, zcvlf     : (',F6.2,',',I6,')')") zld(iv), zcvlf(iv)
+   !       write(*,"('     zrd, zcvrt     : (',F6.2,',',I6,')')") zrd(iv), zcvrt(iv)      
+   !    enddo
+   !    write(*,*) '==========================================================='
+   ! endif
+
+   ! call decomp_2d_init_io(io_restart_forces)
+   ! call decomp_2d_register_variable(io_restart_forces, "ux01", 1, 0, 0, mytype)
+   ! call decomp_2d_register_variable(io_restart_forces, "uy01", 1, 0, 0, mytype)
+   ! call decomp_2d_register_variable(io_restart_forces, "ux11", 1, 0, 0, mytype)
+   ! call decomp_2d_register_variable(io_restart_forces, "uy11", 1, 0, 0, mytype)
+   ! call decomp_2d_register_variable(io_restart_forces, "uz01", 1, 0, 0, mytype)
+   ! call decomp_2d_register_variable(io_restart_forces, "uz11", 1, 0, 0, mytype)
+
+
+  end subroutine update_forces
+!   if ((iibm.ne.0).and.(t.ne.0.)) then
+   !    xld2(:) = xld(:) + (t-ifirst*dt)*ubcx
+   !    xrd2(:) = xrd(:) + (t-ifirst*dt)*ubcx
+   !    yld2(:) = yld(:) + (t-ifirst*dt)*ubcy
+   !    yud2(:) = yud(:) + (t-ifirst*dt)*ubcy
+   ! else
+   !    xld2(:) = xld(:)
+   !    xrd2(:) = xrd(:)
+   !    yld2(:) = yld(:)
+   !    yud2(:) = yud(:)
+   ! endif
   subroutine restart_forces(itest1)
 
     USE decomp_2d
@@ -127,7 +316,7 @@ contains
     implicit none
 
     integer :: ierror,code,itest1
-    integer :: ierror_o=0 !error to open sauve file during restart
+    integer :: ierror_o=0 !error to open save file during restart
     character(len=30) :: filename, filestart
 
 
@@ -154,12 +343,18 @@ contains
        call decomp_2d_write_one(1,uy01,resfile,"uy01",0,io_restart_forces)
        call decomp_2d_write_one(1,ux11,resfile,"ux11",0,io_restart_forces)
        call decomp_2d_write_one(1,uy11,resfile,"uy11",0,io_restart_forces)
+       call decomp_2d_write_one(1,uz01,resfile,"uz01",0,io_restart_forces)
+       call decomp_2d_write_one(1,uz11,resfile,"uz11",0,io_restart_forces)
+
     else
        !read
        call decomp_2d_read_one(1,ux01,resfile,"ux01",io_restart_forces)
        call decomp_2d_read_one(1,uy01,resfile,"uy01",io_restart_forces)
        call decomp_2d_read_one(1,ux11,resfile,"ux11",io_restart_forces)
        call decomp_2d_read_one(1,uy11,resfile,"uy11",io_restart_forces)
+       call decomp_2d_read_one(1,uz01,resfile,"uz01",io_restart_forces)
+       call decomp_2d_read_one(1,uz11,resfile,"uz11",io_restart_forces)
+
     endif
 
     call decomp_2d_end_io(io_restart_forces, resfile)
@@ -176,48 +371,80 @@ contains
 
   end subroutine restart_forces
 
-  subroutine force(ux1,uy1,ep1)
+  subroutine force(ux1,uy1,uz1,ep1,dra1,dra2,dra3,record_var)
 
     USE param
     USE variables
     USE decomp_2d
     USE MPI
     USE ibm_param
+    USE ellipsoid_utils, only : CrossProduct,centrifugal_force,coriolis_force
 
-    use var, only : ta1, tb1, tc1, td1, di1
-    use var, only : ux2, uy2, ta2, tb2, tc2, td2, di2
-
+    use var, only : ta1, tb1, tc1, td1, te1, tf1, tg1, th1, ti1, di1
+    use var, only : ux2, uy2, uz2, ta2, tb2, tc2, td2, te2, tf2, tg2, th2, ti2, di2
+    use var, only : ux3, uy3, uz3, ta3, tb3, tc3, td3, te3, tf3, tg3, th3, ti3, di3
+      
+  
     implicit none
     character(len=30) :: filename, filename2
     integer :: nzmsize
-    integer                                             :: i, iv, j, k, kk, code, jj
+    integer                                             :: i, iv, j, k, kk, code, jj, ii
     integer                                             :: nvect1,nvect2,nvect3
 
-    real(mytype), dimension(xsize(1),xsize(2),xsize(3)),intent(in) :: ux1, uy1
+    real(mytype), dimension(xsize(1),xsize(2),xsize(3)),intent(in) :: ux1, uy1, uz1
     real(mytype), dimension(xsize(1),xsize(2),xsize(3)),intent(in) :: ep1
+    integer, intent(in) ::record_var
+    real(mytype), intent(out)                                       :: dra1(10),dra2(10),dra3(10)
 
     real(mytype), dimension(ysize(1),ysize(2),ysize(3)) :: ppi2
+    real(mytype), dimension(zsize(1),zsize(2),zsize(3)) :: ppi3
 
-    real(mytype), dimension(nz) :: yLift,xDrag
-    real(mytype) :: yLift_mean,xDrag_mean
+    real(mytype), dimension(nz) :: yLift,xDrag, zLat
+    real(mytype) :: yLift_mean,xDrag_mean,zLat_mean
+    real(mytype) :: xm,ym,zm,rotationalComponent(3)
 
     real(mytype), dimension(ny-1) :: del_y
 
-    real(mytype), dimension(nz) :: tunstxl, tunstyl
-    real(mytype), dimension(nz) :: tconvxl,tconvyl
+    real(mytype), dimension(nz) :: tunstxl, tunstyl, tunstzl
+    real(mytype), dimension(nz) :: tconvxl,tconvyl,tconvzl
     real(mytype), dimension(nz) :: tpresxl,tpresyl
-    real(mytype), dimension(nz) :: tdiffxl,tdiffyl
+    real(mytype), dimension(nz) :: tdiffxl,tdiffyl,tdiffzl
 
-    real(mytype), dimension(nz) :: tunstx, tunsty
-    real(mytype), dimension(nz) :: tconvx,tconvy
+    real(mytype), dimension(nz) :: tunstx, tunsty, tunstz
+    real(mytype), dimension(nz) :: tconvx,tconvy,tconvz
     real(mytype), dimension(nz) :: tpresx,tpresy
-    real(mytype), dimension(nz) :: tdiffx,tdiffy
+    real(mytype), dimension(nz) :: tdiffx,tdiffy,tdiffz
+
+        
+    
+    real(mytype), dimension(ny) :: tconvxl2, tconvyl2, tconvzl2
+    real(mytype), dimension(ny) :: tdiffxl2, tdiffyl2, tdiffzl2
+    real(mytype), dimension(ny) :: tconvx2, tconvy2, tconvz2
+    real(mytype), dimension(ny) :: tdiffx2, tdiffy2, tdiffz2
+    real(mytype), dimension(ny) :: tpreszl, tpresz
+    
+    
+    real(mytype) :: uxmid,uymid,uzmid,prmid
+    real(mytype) :: dudxmid,dudymid,dudzmid,dvdxmid,dvdymid,dvdzmid
+    real(mytype) :: dwdxmid,dwdymid,dwdzmid
+    real(mytype) :: fac,fac1,fac2,fac3,tsumx,tsumy,tsumz,centrifugal(3),coriolis(3)
+    real(mytype) :: fcvx,fcvy,fcvz,fprx,fpry,fprz,fdix,fdiy,fdiz
+    real(mytype) :: xmom,ymom,zmom
+    real(mytype), dimension(ny) :: ztpresx, ztpresy
+    real(mytype), dimension(nz) :: zyLift, zxDrag, zzLat
+    real(mytype) :: zyLift_mean, zxDrag_mean, zzLat_mean
+
 
-    real(mytype) :: uxmid,uymid,prmid
-    real(mytype) :: dudxmid,dudymid,dvdxmid,dvdymid
-    real(mytype) :: fac,tsumx,tsumy
-    real(mytype) :: fcvx,fcvy,fprx,fpry,fdix,fdiy
-    real(mytype) :: xmom,ymom
+    real(mytype), dimension(nz) :: drag1, drag2, drag11, drag22
+    real(mytype), dimension(nz) :: drag3, drag4, drag33, drag44
+    real(mytype) :: mom1, mom2, mom3, tp1, tp2, tp3
+
+   !  write(*,*) 'Inside FORCE'
+
+  
+    dra1(:) = zero
+    dra2(:) = zero
+    dra3(:) = zero
 
     nvect1=xsize(1)*xsize(2)*xsize(3)
     nvect2=ysize(1)*ysize(2)*ysize(3)
@@ -232,21 +459,41 @@ contains
     enddo
 
     if (itime.eq.1) then
+      do iv=1,nvol
+         ! if (nrank.eq.0) then 
+         !    open(12,file="Body1.dat",status='unknown',form='formatted')
+         ! endif
+         if ((nrank .eq. 0).and.(record_var.eq.1)) then
+            write(filename,"('forces.dat',I1.1)") iv
+            open(38+(iv-1),file=filename,status='unknown',form='formatted')
+            write(38+(iv-1),*) t, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero
+            call flush(38+(iv-1))
+            ! write(*,*) 'Opened file: ', filename, 'number = ', 38+(iv-1)
+         endif
+      enddo
        do k = 1, xsize(3)
           do j = 1, xsize(2)
              do i = 1, xsize(1)
                 ux11(i,j,k)=ux1(i,j,k)
                 uy11(i,j,k)=uy1(i,j,k)
+                uz11(i,j,k)=uz1(i,j,k)
              enddo
           enddo
        enddo
        return
     elseif (itime.eq.2) then
+      if ((nrank .eq. 0).and.(record_var.eq.1)) then
+         do i = 1,nvol
+            write(38+(iv-1),*) t, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero
+            call flush(38+(iv-1))
+         enddo
+      endif
        do k = 1, xsize(3)
           do j = 1, xsize(2)
              do i = 1, xsize(1)
                 ux01(i,j,k)=ux1(i,j,k)
                 uy01(i,j,k)=uy1(i,j,k)
+                uz01(i,j,k)=uz1(i,j,k)
              enddo
           enddo
        enddo
@@ -255,18 +502,40 @@ contains
 
     call derx (ta1,ux1,di1,sx,ffx,fsx,fwx,xsize(1),xsize(2),xsize(3),0,1)    ! dudx !x is 1
     call derx (tb1,uy1,di1,sx,ffxp,fsxp,fwxp,xsize(1),xsize(2),xsize(3),1,2) ! dvdx !y is 2
+    call derx (te1,uz1,di1,sx,ffxp,fsxp,fwxp,xsize(1),xsize(2),xsize(3),1,3) ! dw/dx!z is 3
+
     call transpose_x_to_y(ta1,ta2) ! dudx
     call transpose_x_to_y(tb1,tb2) ! dvdx
+    call transpose_x_to_y(te1,te2) ! dw/dx
 
     call transpose_x_to_y(ux1,ux2)
     call transpose_x_to_y(uy1,uy2)
+    call transpose_x_to_y(uz1,uz2)
     call transpose_x_to_y(ppi1,ppi2)
 
     call dery (tc2,ux2,di2,sy,ffyp,fsyp,fwyp,ppy,ysize(1),ysize(2),ysize(3),1,1) ! dudy !x is 1
     call dery (td2,uy2,di2,sy,ffy,fsy,fwy,ppy,ysize(1),ysize(2),ysize(3),0,2)    ! dvdy !y is 2
+    call dery (tf2,uz2,di2,sy,ffyp,fsyp,fwyp,ppy,ysize(1),ysize(2),ysize(3),1,3) ! dw/dy!z is 3
+    
+
+    call transpose_y_to_z(ux2,ux3)
+    call transpose_y_to_z(uy2,uy3)
+    call transpose_y_to_z(uz2,uz3)
+
+    call derz (tg3,ux3,di3,sz,ffzp,fszp,fwzp,zsize(1),zsize(2),zsize(3),1,1)  ! du/dz
+    call derz (th3,uy3,di3,sz,ffzp,fszp,fwzp,zsize(1),zsize(2),zsize(3),1,2)  ! dv/dz
+    call derz (ti3,uz3,di3,sz,ffz,fsz,fwz,zsize(1),zsize(2),zsize(3),0,3)     ! dw/dz
+  
+    call transpose_z_to_y(tg3,tg2) ! du/dz
+    call transpose_z_to_y(th3,th2) ! dv/dz
+    call transpose_z_to_y(ti3,ti2) ! 
+
     call transpose_y_to_x(tc2,tc1) ! dudy
     call transpose_y_to_x(td2,td1) ! dvdy
-
+    call transpose_y_to_x(th2,th1) ! dv/dz
+    call transpose_y_to_x(tf2,tf1) ! dw/dy
+    call transpose_y_to_x(tg2,tg1) ! 
+    call transpose_y_to_x(ti2,ti1) ! 
     !*****************************************************************
     !      Drag and Lift coefficients
     !*****************************************************************
@@ -285,31 +554,48 @@ contains
 
        tunstxl=zero
        tunstyl=zero
-       do k=1,xsize(3)
+       tunstzl=zero
+       do k=zcvlf_lx(iv),zcvrt_lx(iv)
           tsumx=zero
           tsumy=zero
+          tsumz=zero
+          zm=real(xstart(3)+k-1,mytype)*dz
           do j=jcvlw_lx(iv),jcvup_lx(iv)
+            ym=real(xstart(2)+j-1,mytype)*dy
              do i=icvlf_lx(iv),icvrt_lx(iv)
+               xm=real(xstart(1)+i-1,mytype)*dx
+
+               fac1   = (onepfive*ux1(i,j,k)-two*ux01(i,j,k)+half*ux11(i,j,k))*(one-ep1(i,j,k))
+               fac2   = (onepfive*uy1(i,j,k)-two*uy01(i,j,k)+half*uy11(i,j,k))*(one-ep1(i,j,k))
+               fac3   = (onepfive*uz1(i,j,k)-two*uz01(i,j,k)+half*uz11(i,j,k))*(one-ep1(i,j,k))
+
+                call coriolis_force(angularVelocity(iv,2:4),[fac1,fac2,fac3],coriolis)
+                call centrifugal_force(angularVelocity(iv,2:4), [xm,ym,zm]-position(iv,:),centrifugal)
                 !     The velocity time rate has to be relative to the cell center, 
                 !     and not to the nodes, because, here, we have an integral 
                 !     relative to the volume, and, therefore, this has a sense 
                 !     of a "source".
                 !         fac   = (1.5*ux1(i,j,k)-2.0*ux01(i,j,k)+0.5*ux11(i,j,k))*epcv1(i,j,k)
-                fac   = (onepfive*ux1(i,j,k)-two*ux01(i,j,k)+half*ux11(i,j,k))*(one-ep1(i,j,k))
-                tsumx = tsumx+fac*dx*del_y(j+(xstart(2)-1))/dt    !tsumx+fac*dx*dy/dt
+                !  tsumx = tsumx+(fac1-coriolis(1)-centrifugal(1))*dx*del_y(j+(xstart(2)-1))*dz/dt    !tsumx+fac*dx*dy/dt
+                tsumx = tsumx+fac1*dx*del_y(min(ny-1,j+xstart(2)-1))*dz/dt
                 !sumx(k) = sumx(k)+dudt1*dx*dy
 
                 !         fac   = (1.5*uy1(i,j,k)-2.0*uy01(i,j,k)+0.5*uy11(i,j,k))*epcv1(i,j,k)
-                fac   = (onepfive*uy1(i,j,k)-two*uy01(i,j,k)+half*uy11(i,j,k))*(one-ep1(i,j,k))
-                tsumy = tsumy+fac*dx*del_y(j+(xstart(2)-1))/dt !tsumy+fac*dx*dy/dt
+               !  tsumy = tsumy+(fac2-coriolis(2)-centrifugal(2))*dx*del_y(j+(xstart(2)-1))*dz/dt !tsumy+fac*dx*dy/dt
+                tsumy = tsumy+fac2*dx*del_y(min(ny-1,j+xstart(2)-1))*dz/dt
                 !sumy(k) = sumy(k)+dudt1*dx*dy
+
+               !  tsumz = tsumz+(fac3-coriolis(3)-centrifugal(3))*dx*del_y(j+(xstart(2)-1))*dz/dt     
+                tsumz = tsumz+fac3*dx*del_y(min(ny-1,j+xstart(2)-1))*dz/dt
              enddo
           enddo
           tunstxl(xstart(3)-1+k)=tsumx
           tunstyl(xstart(3)-1+k)=tsumy
+          tunstzl(xstart(3)-1+k)=tsumz
        enddo
        call MPI_ALLREDUCE(tunstxl,tunstx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
        call MPI_ALLREDUCE(tunstyl,tunsty,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tunstzl,tunstz,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
 
 !!$!*********************************************************************************
 !!$!     Secondly, the surface momentum fluxes
@@ -322,175 +608,438 @@ contains
 !!$!        \    \__\    \
 !!$!        \            \
 !!$!        \       CV   \
-!!$!(jcvlw) A____________D      
+!!$!(jcvlw) A____________D     
+       
+  drag1(:)=0.
+  drag2(:)=0.
+  drag3(:)=0.
+  drag4(:)=0.
+  
+  drag11(:)=0.
+  drag22(:)=0.
+  drag33(:)=0.
+  drag44(:)=0.
 
        tconvxl=zero
        tconvyl=zero
+       tconvzl=zero
        tdiffxl=zero
        tdiffyl=zero
+       tdiffzl=zero
        tpresxl=zero
        tpresyl=zero
+       tpreszl=zero
+
+       tconvxl2=zero
+       tconvyl2=zero
+       tconvzl2=zero
+       tdiffxl2=zero
+       tdiffyl2=zero
+       tdiffzl2=zero  
        !BC and AD : x-pencils
        !AD
        if ((jcvlw(iv).ge.xstart(2)).and.(jcvlw(iv).le.xend(2))) then
           j=jcvlw(iv)-xstart(2)+1
-          do k=1,xsize(3)
+          jj=jcvlw(iv)
+          ym=real(jj,mytype)*dy
+          do k=zcvlf_lx(iv),zcvrt_lx(iv)
              kk=xstart(3)-1+k
+             zm=real(kk,mytype)*dz
              fcvx=zero
              fcvy=zero
+             fcvz=zero
              fpry=zero
              fdix=zero
              fdiy=zero
+             fdiz=zero
              do i=icvlf_lx(iv),icvrt_lx(iv)-1
+                
+                ii=xstart(1)+i-1
+                xm=real(ii,mytype)*dx
+               !  write(*,*) 'Calculating force at upper y boundary', [xm,ym,zm]
+
                 !momentum flux
-                uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k))
-                uymid = half*(uy1(i,j,k)+uy1(i+1,j,k))
-                fcvx  = fcvx -uxmid*uymid*dx
-                fcvy  = fcvy -uymid*uymid*dx
+                call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+                uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+                uymid = half*(uy1(i,j,k)+uy1(i+1,j,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+                uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+                fcvx  = fcvx -uxmid*uymid*dx*dz
+                fcvy  = fcvy -uymid*uymid*dx*dz
+                fcvz  = fcvz -uymid*uzmid*dx*dz
+
 
                 !pressure
                 prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
-                fpry  = fpry +prmid*dx
+                fpry  = fpry +prmid*dx*dz
 
                 !viscous term
                 dudymid = half*(tc1(i,j,k)+tc1(i+1,j,k))
                 dvdxmid = half*(tb1(i,j,k)+tb1(i+1,j,k))
                 dvdymid = half*(td1(i,j,k)+td1(i+1,j,k))
-                fdix    = fdix -(xnu*(dudymid+dvdxmid)*dx)
-                fdiy    = fdiy -two*xnu*dvdymid*dx
+                dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+                dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+
+                fdix    = fdix -(xnu*(dudymid+dvdxmid)*dx*dz)
+                fdiy    = fdiy -two*xnu*dvdymid*dx*dz
+                fdiz    = fdiz -(xnu*(dwdymid+dvdzmid)*dx*dz)
+
 
              enddo
 
              tconvxl(kk)=tconvxl(kk)+fcvx
              tconvyl(kk)=tconvyl(kk)+fcvy
+             tconvzl(kk)=tconvzl(kk)+fcvz
              tpresyl(kk)=tpresyl(kk)+fpry
              tdiffxl(kk)=tdiffxl(kk)+fdix
              tdiffyl(kk)=tdiffyl(kk)+fdiy
+             tdiffzl(kk)=tdiffzl(kk)+fdiz
           enddo
        endif
        !BC
        if ((jcvup(iv).ge.xstart(2)).and.(jcvup(iv).le.xend(2))) then
           j=jcvup(iv)-xstart(2)+1
-          do k=1,xsize(3)
-             kk=xstart(3)-1+k
+          jj=jcvup(iv)
+          ym=real(jj,mytype)*dy
+          do k=zcvlf_lx(iv),zcvrt_lx(iv)
+             kk=xstart(3)-1+k   
+             zm=real(kk,mytype)*dz
              fcvx=zero
              fcvy=zero
+             fcvz=zero
              fpry=zero
              fdix=zero
              fdiy=zero
+             fdiz=zero
              do i=icvlf_lx(iv),icvrt_lx(iv)-1
-                !momentum flux
-                uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k))
-                uymid = half*(uy1(i,j,k)+uy1(i+1,j,k))
-                fcvx= fcvx +uxmid*uymid*dx
-                fcvy= fcvy +uymid*uymid*dx
+               ii=xstart(1)+i-1
+               xm=real(ii,mytype)*dx
+               ! write(*,*) 'xm = ', xm
+               ! write(*,*) 'Calculating force at lower y boundary', [xm,ym,zm]
+
+               !momentum flux
+               call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+               uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+               uymid = half*(uy1(i,j,k)+uy1(i+1,j,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+               uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+                fcvx = fcvx +uxmid*uymid*dx*dz
+                fcvy = fcvy +uymid*uymid*dx*dz
+                fcvz = fcvz +uymid*uzmid*dx*dz
+
 
                 !pressure
                 prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
-                fpry = fpry -prmid*dx
+                fpry = fpry -prmid*dx*dz
 
                 !viscous term
                 dudymid = half*(tc1(i,j,k)+tc1(i+1,j,k))
                 dvdxmid = half*(tb1(i,j,k)+tb1(i+1,j,k))
                 dvdymid = half*(td1(i,j,k)+td1(i+1,j,k))
-                fdix = fdix +(xnu*(dudymid+dvdxmid)*dx)
-                fdiy = fdiy +two*xnu*dvdymid*dx
+                dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+                dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+  
+                fdix = fdix + (xnu*(dudymid+dvdxmid)*dx*dz)
+                fdiy = fdiy + two*xnu*dvdymid*dx*dz
+                fdiz = fdiz + (xnu*(dwdymid+dvdzmid)*dx*dz)
 
              enddo
              tconvxl(kk)=tconvxl(kk)+fcvx
              tconvyl(kk)=tconvyl(kk)+fcvy
+             tconvzl(kk)=tconvzl(kk)+fcvz
+
              tpresyl(kk)=tpresyl(kk)+fpry
              tdiffxl(kk)=tdiffxl(kk)+fdix
              tdiffyl(kk)=tdiffyl(kk)+fdiy
+             tdiffzl(kk)=tdiffzl(kk)+fdiz
+
           enddo
        endif
        !AB and DC : y-pencils
        !AB
        if ((icvlf(iv).ge.ystart(1)).and.(icvlf(iv).le.yend(1))) then
           i=icvlf(iv)-ystart(1)+1
-          do k=1,ysize(3)
-             kk=ystart(3)-1+k
+          ii=icvlf(iv)-1
+          xm=real(ii,mytype)*dx
+          do k=zcvlf_ly(iv),zcvrt_ly(iv)
+             kk=ystart(3)+k-1
+             zm=real(kk,mytype)*dz
              fcvx=zero
              fcvy=zero
+             fcvz=zero
              fprx=zero
              fdix=zero
              fdiy=zero
+             fdiz=zero
              do j=jcvlw_ly(iv),jcvup_ly(iv)-1
+
+                jj=ystart(2)+j-1
+                ym=real(jj,mytype)*dz
+               !  write(*,*) 'Calculating force at left x boundary', [xm,ym,zm]
                 !momentum flux
-                uxmid = half*(ux2(i,j,k)+ux2(i,j+1,k))
-                uymid = half*(uy2(i,j,k)+uy2(i,j+1,k))
-                fcvx= fcvx -uxmid*uxmid*del_y(j)
-                fcvy= fcvy -uxmid*uymid*del_y(j)
+                call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+                uxmid = half*(ux2(i,j,k)+ux2(i,j+1,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+                uymid = half*(uy2(i,j,k)+uy2(i,j+1,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+                uzmid = half*(uz2(i,j,k)+uz2(i,j+1,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+
+                fcvx = fcvx -uxmid*uxmid*del_y(j)*dz
+                fcvy = fcvy -uxmid*uymid*del_y(j)*dz
+                fcvz = fcvz -uxmid*uzmid*del_y(j)*dz
+
 
                 !pressure
-                prmid=half*(ppi2(i,j,k)+ppi2(i,j+1,k))
-                fprx = fprx +prmid*del_y(j)
+                prmid = half*(ppi2(i,j,k)+ppi2(i,j+1,k))
+                fprx = fprx +prmid*del_y(j)*dz
 
                 !viscous term
                 dudxmid = half*(ta2(i,j,k)+ta2(i,j+1,k))
                 dudymid = half*(tc2(i,j,k)+tc2(i,j+1,k))
                 dvdxmid = half*(tb2(i,j,k)+tb2(i,j+1,k))
-                fdix = fdix -two*xnu*dudxmid*del_y(j)
-                fdiy = fdiy -xnu*(dvdxmid+dudymid)*del_y(j)
+                dwdxmid = half*(te2(i,j,k)+te2(i,j+1,k))
+                dudzmid = half*(tg2(i,j,k)+tg2(i,j+1,k))
+
+                fdix = fdix -two*xnu*dudxmid*del_y(j)*dz
+                fdiy = fdiy -xnu*(dvdxmid+dudymid)*del_y(j)*dz
+                fdiz = fdiz -xnu*(dwdxmid+dudzmid)*del_y(j)*dz
              enddo
              tconvxl(kk)=tconvxl(kk)+fcvx
              tconvyl(kk)=tconvyl(kk)+fcvy
+             tconvzl(kk)=tconvzl(kk)+fcvz
+
              tpresxl(kk)=tpresxl(kk)+fprx
              tdiffxl(kk)=tdiffxl(kk)+fdix
              tdiffyl(kk)=tdiffyl(kk)+fdiy
+             tdiffzl(kk)=tdiffzl(kk)+fdiz
+
           enddo
        endif
        !DC
        if ((icvrt(iv).ge.ystart(1)).and.(icvrt(iv).le.yend(1))) then
           i=icvrt(iv)-ystart(1)+1
-          do k=1,ysize(3)
+          ii=icvrt(iv)
+          xm=real(ii,mytype)*dx
+          do k=zcvlf_ly(iv),zcvrt_ly(iv)
              kk=ystart(3)-1+k
+             zm=real(kk,mytype)*dz
              fcvx=zero
              fcvy=zero
+             fcvz=zero
              fprx=zero
              fdix=zero
              fdiy=zero
-             do j=jcvlw_ly(iv),jcvup_ly(iv)-1
+             fdiz=zero
+             do j=jcvlw_ly(iv),jcvup_ly(iv)-1 !!!What's going on here?
+                jj=ystart(2)+j-1
+                ym=real(jj,mytype)*dy
+               !  write(*,*) 'Calculating force at right x boundary', [xm,ym,zm]
+
                 !momentum flux
-                uxmid = half*(ux2(i,j,k)+ux2(i,j+1,k))
-                uymid = half*(uy2(i,j,k)+uy2(i,j+1,k))
-                fcvx= fcvx +uxmid*uxmid*del_y(j)
-                fcvy= fcvy +uxmid*uymid*del_y(j)
+                call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+                uxmid = half*(ux2(i,j,k)+ux2(i,j+1,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+                uymid = half*(uy2(i,j,k)+uy2(i,j+1,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+                uzmid = half*(uz2(i,j,k)+uz2(i,j+1,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+
+                fcvx = fcvx + uxmid*uxmid*del_y(j)*dz
+                fcvy = fcvy + uxmid*uymid*del_y(j)*dz
+                fcvz = fcvz + uxmid*uzmid*del_y(j)*dz
+
 
                 !pressure
-                prmid=half*(ppi2(i,j,k)+ppi2(i,j+1,k))
-                fprx = fprx -prmid*del_y(j)
+                prmid = half*(ppi2(i,j,k)+ppi2(i,j+1,k))
+                fprx = fprx -prmid*del_y(j)*dz
 
                 !viscous term
                 dudxmid = half*(ta2(i,j,k)+ta2(i,j+1,k))
                 dudymid = half*(tc2(i,j,k)+tc2(i,j+1,k))
                 dvdxmid = half*(tb2(i,j,k)+tb2(i,j+1,k))
-                fdix = fdix +two*xnu*dudxmid*del_y(j)
-                fdiy = fdiy +xnu*(dvdxmid+dudymid)*del_y(j)
+                dwdxmid = half*(te2(i,j,k)+te2(i,j+1,k))
+                dudzmid = half*(tg2(i,j,k)+tg2(i,j+1,k))
+
+                fdix = fdix + two*xnu*dudxmid*del_y(j)*dz
+                fdiy = fdiy + xnu*(dvdxmid+dudymid)*del_y(j)*dz
+                fdiz = fdiz + xnu*(dwdxmid+dudzmid)*del_y(j)*dz
+
              enddo
              tconvxl(kk)=tconvxl(kk)+fcvx
              tconvyl(kk)=tconvyl(kk)+fcvy
+             tconvzl(kk)=tconvzl(kk)+fcvz
+
              tpresxl(kk)=tpresxl(kk)+fprx
              tdiffxl(kk)=tdiffxl(kk)+fdix
              tdiffyl(kk)=tdiffyl(kk)+fdiy
+             tdiffzl(kk)=tdiffzl(kk)+fdiz
+
           enddo
        endif
+
+       !Left & Right : 
+       !Left
+       if ((zcvlf(iv).ge.xstart(3)).and.(zcvlf(iv).le.xend(3))) then
+         k=zcvlf(iv)-xstart(3)+1
+         kk=zcvlf(iv)
+         zm=real(kk,mytype)*dz
+ 
+         fcvx=zero
+         fcvy=zero
+         fcvz=zero
+         fprz=zero
+         fdix=zero
+         fdiy=zero
+         fdiz=zero
+         do j=jcvlw_lx(iv),jcvup_lx(iv)
+          kk = xstart(2)-1+j
+          jj = xstart(2)-1+j
+
+          ym=real(jj,mytype)*dy
+            do i=icvlf_lx(iv),icvrt_lx(iv)-1
+               ii=xstart(1)+i-1
+               xm=real(ii,mytype)*dx
+               ! write(*,*) 'Calculating force at left z boundary', [xm,ym,zm]
+
+               !momentum flux
+               call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+               uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+               uymid = half*(uy1(i,j,k)+uy1(i+1,j,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+               uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+ 
+               fcvx= fcvx +uxmid*uzmid*dx*dy
+               fcvy= fcvy +uymid*uzmid*dx*dy
+               fcvz= fcvz +uzmid*uzmid*dx*dy
+ 
+               !pressure
+               prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+               fprz = fprz -prmid*dx*dy
+ 
+               !viscous term
+               dudzmid = half*(tg1(i,j,k)+tg1(i+1,j,k))
+               dwdxmid = half*(te1(i,j,k)+te1(i+1,j,k))
+               dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+               dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+               dwdzmid = half*(ti1(i,j,k)+ti1(i+1,j,k))
+                                                                   
+               fdix = fdix +(xnu*(dudzmid+dwdxmid)*dx*dy)
+               fdiy = fdiy +(xnu*(dvdzmid+dwdymid)*dx*dy)
+               fdiz = fdiz +two*xnu*dwdzmid*dx*dy
+            enddo
+         enddo
+ !print*, kk
+ !        drag3(kk)=drag3(kk)+fcvx   ! Should be size ny
+ !        print*, drag3(kk)
+         tconvxl2(kk)=tconvxl2(kk)+fcvx
+         tconvyl2(kk)=tconvyl2(kk)+fcvy
+         tconvzl2(kk)=tconvzl2(kk)+fcvz
+         tpreszl(kk) =tpreszl(kk) +fprz
+         tdiffxl2(kk)=tdiffxl2(kk)+fdix
+         tdiffyl2(kk)=tdiffyl2(kk)+fdiy
+         tdiffzl2(kk)=tdiffzl2(kk)+fdiz        
+      endif 
+      !Right
+      if ((zcvrt(iv).ge.xstart(3)).and.(zcvrt(iv).le.xend(3))) then
+         k=zcvrt(iv)-xstart(3)+1
+         kk=zcvrt(iv)
+         zm=real(kk,mytype)*dz
+ !        kk=nrank+1
+ 
+         fcvx=zero
+         fcvy=zero
+         fcvz=zero
+         fprz=zero
+         fdix=zero
+         fdiy=zero
+         fdiz=zero
+ !        do k=1,xsize(3)
+         do j=jcvlw_lx(iv),jcvup_lx(iv)
+         !  kk = xstart(2)-1+j 
+          jj = xstart(2)-1+j
+          ym=real(jj,mytype)*dy
+           do i=icvlf_lx(iv),icvrt_lx(iv)-1
+             ii=xstart(1)+i-1
+             xm=real(ii,mytype)*dx
+               !momentum flux
+             call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+            !  write(*,*) 'Calculating force at right z boundary', [xm,ym,zm]
+
+             uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+             uymid = half*(uy1(i,j,k)+uy1(i+1,j,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+             uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+ 
+               fcvx= fcvx -uxmid*uzmid*dx*dy
+               fcvy= fcvy -uymid*uzmid*dx*dy
+               fcvz= fcvz -uzmid*uzmid*dx*dy
+ 
+               !pressure
+               prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+               fprz = fprz +prmid*dx*dy
+ 
+               !viscous term
+               dudzmid = half*(tg1(i,j,k)+tg1(i+1,j,k))
+               dwdxmid = half*(te1(i,j,k)+te1(i+1,j,k))
+               dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+               dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+               dwdzmid = half*(ti1(i,j,k)+ti1(i+1,j,k))
+                                                   
+               fdix = fdix -(xnu*(dudzmid+dwdxmid)*dx*dy)
+               fdiy = fdiy -(xnu*(dvdzmid+dwdymid)*dx*dy)
+               fdiz = fdiz -two*xnu*dwdzmid*dx*dy
+ 
+            enddo
+         enddo
+ !        drag4(kk)=drag4(kk)+fcvx    ! Should be size ny
+         tconvxl2(kk)=tconvxl2(kk)+fcvx
+         tconvyl2(kk)=tconvyl2(kk)+fcvy
+         tconvzl2(kk)=tconvzl2(kk)+fcvz
+         tpreszl(kk) =tpreszl(kk) +fprz
+         tdiffxl2(kk)=tdiffxl2(kk)+fdix
+         tdiffyl2(kk)=tdiffyl2(kk)+fdiy
+         tdiffzl2(kk)=tdiffzl2(kk)+fdiz
+      endif     
+     
        call MPI_ALLREDUCE(tconvxl,tconvx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
        call MPI_ALLREDUCE(tconvyl,tconvy,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tconvzl,tconvz,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+
        call MPI_ALLREDUCE(tpresxl,tpresx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
        call MPI_ALLREDUCE(tpresyl,tpresy,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
        call MPI_ALLREDUCE(tdiffxl,tdiffx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
        call MPI_ALLREDUCE(tdiffyl,tdiffy,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffzl,tdiffz,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+
+       call MPI_ALLREDUCE(tconvxl2,tconvx2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tconvyl2,tconvy2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tconvzl2,tconvz2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)     
+       call MPI_ALLREDUCE(tpreszl, tpresz ,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffxl2,tdiffx2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffyl2,tdiffy2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffzl2,tdiffz2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+  
 
+       tp1 = sum(tpresx(:))/dt
+       tp2 = sum(tpresy(:))/dt
+       tp3 = sum(tpresz(:))/dt
+    
+       mom1 = sum(tunstx(:)) + sum(tconvx(:)) + (-2.0*tconv2_sign+1.0)*sum(tconvx2(:)) !if tconv2sign == 1.0, multiply by -1
+       mom2 = sum(tunsty(:)) + sum(tconvy(:)) + (-2.0*tconv2_sign+1.0)*sum(tconvy2(:))
+       mom3 = sum(tunstz(:)) + sum(tconvz(:)) + (-2.0*tconv2_sign+1.0)*sum(tconvz2(:))
+  
+       dra1(iv) = (sum(tdiffx) + sum(tdiffx2) + tp1 - mom1)
+       dra2(iv) = (sum(tdiffy) + sum(tdiffy2) + tp2 - mom2)
+       dra3(iv) = (sum(tdiffz) + sum(tdiffz2) + tp3 - mom3)
+       
        do k=1,zsize(3)
 
           tpresx(k)=tpresx(k)/dt
           tpresy(k)=tpresy(k)/dt
+          tpresz(k)=tpresz(k)/dt
 
-          xmom    = tunstx(k)+tconvx(k)
-          ymom    = tunsty(k)+tconvy(k)
-          xDrag(k) = two*(tdiffx(k)+tpresx(k)-xmom)
-          yLift(k) = two*(tdiffy(k)+tpresy(k)-ymom)
+
+          xmom    = tunstx(k)+tconvx(k)+tconvx2(k)
+          ymom    = tunsty(k)+tconvy(k)+tconvy2(k)
+          zmom    = tunstz(k)+tconvz(k)+tconvz2(k)
+          xDrag(k) = (tdiffx(k)+tdiffx2(k)+tpresx(k)-xmom)
+          yLift(k) = (tdiffy(k)+tdiffy2(k)+tpresy(k)-ymom)
+          zLat(k)  = (tdiffz(k)+tdiffz2(k)+tpresz(k)-zmom)
 
        enddo
 
@@ -498,36 +1047,877 @@ contains
        xDrag_mean = sum(xDrag(:))/real(nz,mytype)
        yLift_mean = sum(yLift(:))/real(nz,mytype)
 
-       !     if ((itime==ifirst).or.(itime==0)) then
-       !        if (nrank .eq. 0) then
-       !        write(filename,"('aerof',I1.1)") iv
-       !        open(38+(iv-1),file=filename,status='unknown',form='formatted')
-       !        endif
-       !     endif
-       if (nrank .eq. 0) then
-          write(38,*) t,xDrag_mean,yLift_mean
-          call flush(38)
-       endif
-       if (mod(itime, icheckpoint).eq.0) then
-          if (nrank .eq. 0) then
-             write(filename,"('forces.dat',I7.7)") itime
-             call system("cp forces.dat " //filename)
-          endif
+      !  xDrag_tot = sum(xDrag(:))
+      !  yLift_tot = sum(yLift(:))
+      !  zLat_tot  = sum(zLat(:))
+      
+      if ((itime==ifirst).or.(itime==0)) then
+         
+      endif
+       if ((nrank .eq. 0).and.(record_var.eq.1).and.(mod(itime,ilist)==0)) then
+         ! write(*,*) 'TIME STEP = ', itime
+         write(38+(iv-1),*) t,dra1,dra2,dra3, sum(tdiffx), sum(tdiffx2), tp1, -mom1, -sum(tunstx(:)), -sum(tconvx(:)), -sum(tconvx2(:))
+         !  write(*,*) 'written to file number', 38+(iv-1), t, dra1,dra2,dra3
+          call flush(38+(iv-1))
+
+         !  write(12 ,*) t, position(1), position(2), position(3), orientation(1), orientation(2), orientation(3), orientation(4), linearVelocity(1), linearVelocity(2), linearVelocity(3), angularVelocity(1), angularVelocity(2), angularVelocity(3), linearAcceleration(1), linearAcceleration(2), linearAcceleration(3)
        endif
+      !  if (mod(itime, ioutput).eq.0) then
+      !     if (nrank .eq. 0) then
+      !        write(filename,"('forces.dat',I7.7)") itime
+      !        call system("cp forces.dat " //filename)
+      !     endif
+      !  endif
     enddo
 
-    do k = 1, xsize(3)
-       do j = 1, xsize(2)
-          do i = 1, xsize(1)
-             ux11(i,j,k)=ux01(i,j,k)
-             uy11(i,j,k)=uy01(i,j,k)
-             ux01(i,j,k)=ux1(i,j,k)
-             uy01(i,j,k)=uy1(i,j,k)
-          enddo
-       enddo
-    enddo
+    if (torques_flag.eq.0) then !only update velocity fields if torque calculation not called.
+      do k = 1, xsize(3)
+         do j = 1, xsize(2)
+            do i = 1, xsize(1)
+               ux11(i,j,k)=ux01(i,j,k)
+               uy11(i,j,k)=uy01(i,j,k)
+               uz11(i,j,k)=uz01(i,j,k)
+               ux01(i,j,k)=ux1(i,j,k)
+               uy01(i,j,k)=uy1(i,j,k)
+               uz01(i,j,k)=uz1(i,j,k)
+            enddo
+         enddo
+      enddo
+   endif
 
     return
 
   end subroutine force
+
+
+  subroutine torque_calc(ux1,uy1,uz1,ep1,dra1,dra2,dra3,record_var)
+
+   USE param
+   USE variables
+   USE decomp_2d
+   USE MPI
+   USE ibm_param
+   USE ellipsoid_utils, only : CrossProduct,centrifugal_force,coriolis_force
+
+   use var, only : ta1, tb1, tc1, td1, te1, tf1, tg1, th1, ti1, di1
+   use var, only : ux2, uy2, uz2, ta2, tb2, tc2, td2, te2, tf2, tg2, th2, ti2, di2
+   use var, only : ux3, uy3, uz3, ta3, tb3, tc3, td3, te3, tf3, tg3, th3, ti3, di3
+     
+ 
+   implicit none
+   character(len=30) :: filename, filename2
+   integer :: nzmsize
+   integer                                             :: i, iv, j, k, kk, code, jj, ii
+   integer                                             :: nvect1,nvect2,nvect3
+
+   real(mytype), dimension(xsize(1),xsize(2),xsize(3)),intent(in) :: ux1, uy1, uz1
+   real(mytype), dimension(xsize(1),xsize(2),xsize(3)),intent(in) :: ep1
+   integer, intent(in) ::record_var
+   real(mytype), intent(out)                                       :: dra1(10),dra2(10),dra3(10)
+
+   real(mytype), dimension(ysize(1),ysize(2),ysize(3)) :: ppi2
+   real(mytype), dimension(zsize(1),zsize(2),zsize(3)) :: ppi3
+
+   real(mytype), dimension(nz) :: yLift,xDrag, zLat
+   real(mytype) :: yLift_mean,xDrag_mean,zLat_mean
+   real(mytype) :: xm,ym,zm,rotationalComponent(3)
+
+   real(mytype), dimension(ny-1) :: del_y
+
+   real(mytype), dimension(nz) :: tunstxl, tunstyl, tunstzl
+   real(mytype), dimension(nz) :: tconvxl,tconvyl,tconvzl
+   real(mytype), dimension(nz) :: tpresxl,tpresyl
+   real(mytype), dimension(nz) :: tdiffxl,tdiffyl,tdiffzl
+
+   real(mytype), dimension(nz) :: tunstx, tunsty, tunstz
+   real(mytype), dimension(nz) :: tconvx,tconvy,tconvz
+   real(mytype), dimension(nz) :: tpresx,tpresy
+   real(mytype), dimension(nz) :: tdiffx,tdiffy,tdiffz
+
+       
+   
+   real(mytype), dimension(ny) :: tconvxl2, tconvyl2, tconvzl2
+   real(mytype), dimension(ny) :: tdiffxl2, tdiffyl2, tdiffzl2
+   real(mytype), dimension(ny) :: tconvx2, tconvy2, tconvz2
+   real(mytype), dimension(ny) :: tdiffx2, tdiffy2, tdiffz2
+   real(mytype), dimension(ny) :: tpreszl, tpresz
+   
+   
+   real(mytype) :: uxmid,uymid,uzmid,prmid
+   real(mytype) :: dudxmid,dudymid,dudzmid,dvdxmid,dvdymid,dvdzmid
+   real(mytype) :: dwdxmid,dwdymid,dwdzmid
+   real(mytype) :: fac,fac1,fac2,fac3,tsumx,tsumy,tsumz,centrifugal(3),coriolis(3)
+   real(mytype) :: fcvx,fcvy,fcvz,fprx,fpry,fprz,fdix,fdiy,fdiz
+   real(mytype) :: xmom,ymom,zmom
+   real(mytype), dimension(ny) :: ztpresx, ztpresy
+   real(mytype), dimension(nz) :: zyLift, zxDrag, zzLat
+   real(mytype) :: zyLift_mean, zxDrag_mean, zzLat_mean
+
+
+   real(mytype), dimension(nz) :: drag1, drag2, drag11, drag22
+   real(mytype), dimension(nz) :: drag3, drag4, drag33, drag44
+   real(mytype) :: mom1, mom2, mom3, tp1, tp2, tp3
+   real(mytype) :: radial(3),angular_velocity_result(3)
+
+  !  write(*,*) 'Inside FORCE'
+
+   dra1(:) = zero
+   dra2(:) = zero
+   dra3(:) = zero
+ 
+
+   nvect1=xsize(1)*xsize(2)*xsize(3)
+   nvect2=ysize(1)*ysize(2)*ysize(3)
+   nvect3=zsize(1)*zsize(2)*zsize(3)
+
+   do jj = 1, ny-1
+      if (istret.eq.0) then
+         del_y(jj)=dy
+      else
+         del_y(jj)=yp(jj+1)-yp(jj) 
+      endif
+   enddo
+
+   if (itime.eq.1) then
+     do iv=1,nvol
+        if ((nrank .eq. 0).and.(record_var.eq.1)) then
+           write(filename,"('torques.dat',I1.1)") iv
+           open(45+(iv-1),file=filename,status='unknown',form='formatted')
+           write(45+(iv-1),*) t, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero
+           call flush(45+(iv-1))
+           ! write(*,*) 'Opened file: ', filename, 'number = ', 38+(iv-1)
+        endif
+        if ((nrank.eq.0).and.(torq_debug.eq.1).and.(iv.eq.1)) then
+            open(100,file="backside.dat",status='unknown',form='formatted')
+            open(101,file="frontside.dat",status='unknown',form='formatted')
+            open(102,file="lowerside.dat",status='unknown',form='formatted')
+            open(103,file="upperside.dat",status='unknown',form='formatted')
+            open(104,file="leftside.dat",status='unknown',form='formatted')
+            open(105,file="rightside.dat",status='unknown',form='formatted')
+            open(106,file="ytorque_decomposition.dat",status='unknown',form='formatted')
+            open(107,file="ztorque_decomposition.dat",status='unknown',form='formatted')
+        endif
+
+     enddo
+      do k = 1, xsize(3)
+         do j = 1, xsize(2)
+            do i = 1, xsize(1)
+               ux11(i,j,k)=ux1(i,j,k)
+               uy11(i,j,k)=uy1(i,j,k)
+               uz11(i,j,k)=uz1(i,j,k)
+            enddo
+         enddo
+      enddo
+      return
+   elseif (itime.eq.2) then
+      ! if ((nrank .eq. 0).and.(record_var.eq.1)) then
+      !    do i = 1,nvol 
+      !       ! write(*,*) "TRYING TO WRITE AT ITIME=2, t = ", t
+      !       write(45+(iv-1),*) t, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero
+      !       call flush(45+(iv-1))
+      !    enddo
+      ! endif
+      do k = 1, xsize(3)
+         do j = 1, xsize(2)
+            do i = 1, xsize(1)
+               ux01(i,j,k)=ux1(i,j,k)
+               uy01(i,j,k)=uy1(i,j,k)
+               uz01(i,j,k)=uz1(i,j,k)
+            enddo
+         enddo
+      enddo
+      return
+   endif
+
+   call derx (ta1,ux1,di1,sx,ffx,fsx,fwx,xsize(1),xsize(2),xsize(3),0,1)    ! dudx !x is 1
+   call derx (tb1,uy1,di1,sx,ffxp,fsxp,fwxp,xsize(1),xsize(2),xsize(3),1,2) ! dvdx !y is 2
+   call derx (te1,uz1,di1,sx,ffxp,fsxp,fwxp,xsize(1),xsize(2),xsize(3),1,3) ! dw/dx!z is 3
+
+   call transpose_x_to_y(ta1,ta2) ! dudx
+   call transpose_x_to_y(tb1,tb2) ! dvdx
+   call transpose_x_to_y(te1,te2) ! dw/dx
+
+   call transpose_x_to_y(ux1,ux2)
+   call transpose_x_to_y(uy1,uy2)
+   call transpose_x_to_y(uz1,uz2)
+   call transpose_x_to_y(ppi1,ppi2)
+
+   call dery (tc2,ux2,di2,sy,ffyp,fsyp,fwyp,ppy,ysize(1),ysize(2),ysize(3),1,1) ! dudy !x is 1
+   call dery (td2,uy2,di2,sy,ffy,fsy,fwy,ppy,ysize(1),ysize(2),ysize(3),0,2)    ! dvdy !y is 2
+   call dery (tf2,uz2,di2,sy,ffyp,fsyp,fwyp,ppy,ysize(1),ysize(2),ysize(3),1,3) ! dw/dy!z is 3
+   
+
+   call transpose_y_to_z(ux2,ux3)
+   call transpose_y_to_z(uy2,uy3)
+   call transpose_y_to_z(uz2,uz3)
+
+   call derz (tg3,ux3,di3,sz,ffzp,fszp,fwzp,zsize(1),zsize(2),zsize(3),1,1)  ! du/dz
+   call derz (th3,uy3,di3,sz,ffzp,fszp,fwzp,zsize(1),zsize(2),zsize(3),1,2)  ! dv/dz
+   call derz (ti3,uz3,di3,sz,ffz,fsz,fwz,zsize(1),zsize(2),zsize(3),0,3)     ! dw/dz
+ 
+   call transpose_z_to_y(tg3,tg2) ! du/dz
+   call transpose_z_to_y(th3,th2) ! dv/dz
+   call transpose_z_to_y(ti3,ti2) ! 
+
+   call transpose_y_to_x(tc2,tc1) ! dudy
+   call transpose_y_to_x(td2,td1) ! dvdy
+   call transpose_y_to_x(th2,th1) ! dv/dz
+   call transpose_y_to_x(tf2,tf1) ! dw/dy
+   call transpose_y_to_x(tg2,tg1) ! 
+   call transpose_y_to_x(ti2,ti1) ! 
+   !*****************************************************************
+   !      Drag and Lift coefficients
+   !*****************************************************************
+   do iv=1,nvol
+
+      !*****************************************************************
+      !        Calculation of the momentum terms
+      !*****************************************************************
+      !
+      !     Calculation of the momentum terms. First we integrate the 
+      !     time rate of momentum along the CV.
+      !
+      !         Excluding the body internal cells. If the centroid 
+      !         of the cell falls inside the body the cell is 
+      !         excluded.
+
+      tunstxl=zero
+      tunstyl=zero
+      tunstzl=zero
+      do k=zcvlf_lx(iv),zcvrt_lx(iv)
+         tsumx=zero
+         tsumy=zero
+         tsumz=zero
+         zm=real(xstart(3)+k-1,mytype)*dz
+         do j=jcvlw_lx(iv),jcvup_lx(iv)
+           ym=real(xstart(2)+j-1,mytype)*dy
+            do i=icvlf_lx(iv),icvrt_lx(iv)
+              xm=real(xstart(1)+i-1,mytype)*dx
+
+              fac1   = (onepfive*ux1(i,j,k)-two*ux01(i,j,k)+half*ux11(i,j,k))*(one-ep1(i,j,k))
+              fac2   = (onepfive*uy1(i,j,k)-two*uy01(i,j,k)+half*uy11(i,j,k))*(one-ep1(i,j,k))
+              fac3   = (onepfive*uz1(i,j,k)-two*uz01(i,j,k)+half*uz11(i,j,k))*(one-ep1(i,j,k))
+
+               ! call coriolis_force(angularVelocity,[fac1,fac2,fac3],coriolis)
+               ! call centrifugal_force(angularVelocity, [xm,ym,zm]-position,centrifugal)
+               !     The velocity time rate has to be relative to the cell center, 
+               !     and not to the nodes, because, here, we have an integral 
+               !     relative to the volume, and, therefore, this has a sense 
+               !     of a "source".
+               !         fac   = (1.5*ux1(i,j,k)-2.0*ux01(i,j,k)+0.5*ux11(i,j,k))*epcv1(i,j,k)
+               call crossProduct(([fac1,fac2,fac3]),[xm,ym,zm]-position(iv,:), angular_velocity_result)
+               tsumx = tsumx+angular_velocity_result(1)*dx*del_y(j+(xstart(2)-1))*dz/dt    !tsumx+fac*dx*dy/dt
+               ! tsumx = tsumx+fac1*dx*del_y(j+xstart(2)-1)*dz/dt
+               !sumx(k) = sumx(k)+dudt1*dx*dy
+
+               !         fac   = (1.5*uy1(i,j,k)-2.0*uy01(i,j,k)+0.5*uy11(i,j,k))*epcv1(i,j,k)
+               tsumy = tsumy+angular_velocity_result(2)*dx*del_y(j+(xstart(2)-1))*dz/dt !tsumy+fac*dx*dy/dt
+               ! tsumy = tsumy+fac2*dx*del_y(j+xstart(2)-1)*dz/dt
+               !sumy(k) = sumy(k)+dudt1*dx*dy
+
+               tsumz = tsumz+angular_velocity_result(3)*dx*del_y(j+(xstart(2)-1))*dz/dt     
+               ! tsumz = tsumz+fac3*dx*del_y(j+xstart(2)-1)*dz/dt
+            enddo
+         enddo
+         tunstxl(xstart(3)-1+k)=tsumx
+         tunstyl(xstart(3)-1+k)=tsumy
+         tunstzl(xstart(3)-1+k)=tsumz
+      enddo
+      call MPI_ALLREDUCE(tunstxl,tunstx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tunstyl,tunsty,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tunstzl,tunstz,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+
+!!$!*********************************************************************************
+!!$!     Secondly, the surface momentum fluxes
+!!$!*********************************************************************************
+!!$
+!!$!       (icvlf)      (icvrt)
+!!$!(jcvup) B____________C  
+!!$!        \            \
+!!$!        \     __     \
+!!$!        \    \__\    \
+!!$!        \            \
+!!$!        \       CV   \
+!!$!(jcvlw) A____________D     
+      
+ drag1(:)=0.
+ drag2(:)=0.
+ drag3(:)=0.
+ drag4(:)=0.
+ 
+ drag11(:)=0.
+ drag22(:)=0.
+ drag33(:)=0.
+ drag44(:)=0.
+
+      tconvxl=zero
+      tconvyl=zero
+      tconvzl=zero
+      tdiffxl=zero
+      tdiffyl=zero
+      tdiffzl=zero
+      tpresxl=zero
+      tpresyl=zero
+      tpreszl=zero
+
+      tconvxl2=zero
+      tconvyl2=zero
+      tconvzl2=zero
+      tdiffxl2=zero
+      tdiffyl2=zero
+      tdiffzl2=zero  
+      !BC and AD : x-pencils
+      !AD
+      if ((jcvlw(iv).ge.xstart(2)).and.(jcvlw(iv).le.xend(2))) then
+         j=jcvlw(iv)-xstart(2)+1
+         jj=jcvlw(iv)
+         ym=real(jj,mytype)*dy
+         do k=zcvlf_lx(iv),zcvrt_lx(iv)
+            kk=xstart(3)-1+k
+            zm=real(kk,mytype)*dz
+            fcvx=zero
+            fcvy=zero
+            fcvz=zero
+            fprx=zero
+            fpry=zero
+            fprz=zero
+            fdix=zero
+            fdiy=zero
+            fdiz=zero
+            do i=icvlf_lx(iv),icvrt_lx(iv)-1
+               
+               ii=xstart(1)+i-1
+               xm=real(ii,mytype)*dx
+              !  write(*,*) 'Calculating force at upper y boundary', [xm,ym,zm]
+               radial = [xm,ym,zm]-position(iv,:)
+
+               !momentum flux
+               call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+               uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+               uymid = half*(uy1(i,j,k)+uy1(i+1,j,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+               uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+               fcvx = fcvx -(uymid*radial(3)-uzmid*radial(2))*uymid*dx*dz
+               fcvy = fcvy -(uzmid*radial(1)-uxmid*radial(3))*uymid*dx*dz
+               fcvz = fcvz -(uxmid*radial(2)-uymid*radial(1))*uymid*dx*dz
+
+
+               !pressure
+               prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+               ! fpry  = fpry +prmid*dx*dz*(radial(1)-radial(3))
+               fprz = fprz +prmid*dx*dz*(radial(1))
+               fprx = fprx +prmid*dx*dz*(-radial(3))
+
+               !viscous term
+               dudymid = half*(tc1(i,j,k)+tc1(i+1,j,k))
+               dvdxmid = half*(tb1(i,j,k)+tb1(i+1,j,k))
+               dvdymid = half*(td1(i,j,k)+td1(i+1,j,k))
+               dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+               dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+
+               fdix = fdix - (xnu*(two*dvdymid)*radial(3)-xnu*(dwdymid+dvdzmid)*radial(2))*dx*dz
+               fdiy = fdiy - (xnu*(dwdymid+dvdzmid)*radial(1)-xnu*(dudymid+dvdxmid)*radial(3))*dx*dz
+               fdiz = fdiz - (xnu*(dudymid+dvdxmid)*radial(2)-xnu*(two*dvdymid)*radial(1))*dx*dz
+
+               ! fdix    = fdix -(xnu*(dudymid+dvdxmid)*dx*dz)
+               ! fdiy    = fdiy -two*xnu*dvdymid*dx*dz
+               ! fdiz    = fdiz -(xnu*(dwdymid+dvdzmid)*dx*dz)
+
+
+            enddo
+
+            tconvxl(1)=tconvxl(1)+fcvx
+            tconvyl(1)=tconvyl(1)+fcvy
+            tconvzl(1)=tconvzl(1)+fcvz
+
+            tpresxl(1)=tpresxl(1)+fprx
+            ! tpresyl(1)=tpresyl(1)+fpry
+            tpreszl(1)=tpreszl(1)+fprz
+
+            tdiffxl(1)=tdiffxl(1)+fdix
+            tdiffyl(1)=tdiffyl(1)+fdiy
+            tdiffzl(1)=tdiffzl(1)+fdiz
+         enddo
+
+      endif
+      !BC
+      if ((jcvup(iv).ge.xstart(2)).and.(jcvup(iv).le.xend(2))) then
+         j=jcvup(iv)-xstart(2)+1
+         jj=jcvup(iv)
+         ym=real(jj,mytype)*dy
+         do k=zcvlf_lx(iv),zcvrt_lx(iv)
+            kk=xstart(3)-1+k   
+            zm=real(kk,mytype)*dz
+            fcvx=zero
+            fcvy=zero
+            fcvz=zero
+            fprx=zero
+            fpry=zero
+            fprz=zero
+            fdix=zero
+            fdiy=zero
+            fdiz=zero
+            do i=icvlf_lx(iv),icvrt_lx(iv)-1
+              ii=xstart(1)+i-1
+              xm=real(ii,mytype)*dx
+              ! write(*,*) 'xm = ', xm
+              ! write(*,*) 'Calculating force at lower y boundary', [xm,ym,zm]
+
+              !momentum flux
+              call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+              radial = [xm,ym,zm]-position(iv,:)
+              uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+              uymid = half*(uy1(i,j,k)+uy1(i+1,j,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+              uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+               fcvx = fcvx +(uymid*radial(3)-uzmid*radial(2))*uymid*dx*dz
+               fcvy = fcvy +(uzmid*radial(1)-uxmid*radial(3))*uymid*dx*dz
+               fcvz = fcvz +(uxmid*radial(2)-uymid*radial(1))*uymid*dx*dz
+
+
+               !pressure
+               prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+               ! fpry = fpry -prmid*dx*dz*(radial(1)-radial(3))
+               fprz = fprz -prmid*dx*dz*(radial(1))
+               fprx = fprx -prmid*dx*dz*(-radial(3))
+
+               !viscous term
+               dudymid = half*(tc1(i,j,k)+tc1(i+1,j,k))
+               dvdxmid = half*(tb1(i,j,k)+tb1(i+1,j,k))
+               dvdymid = half*(td1(i,j,k)+td1(i+1,j,k))
+               dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+               dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+ 
+               fdix = fdix + (xnu*(two*dvdymid)*radial(3)-xnu*(dwdymid+dvdzmid)*radial(2))*dx*dz
+               fdiy = fdiy + (xnu*(dwdymid+dvdzmid)*radial(1)-xnu*(dudymid+dvdxmid)*radial(3))*dx*dz
+               fdiz = fdiz + (xnu*(dudymid+dvdxmid)*radial(2)-xnu*(two*dvdymid)*radial(1))*dx*dz
+
+               ! fdix = fdix + (xnu*(dudymid+dvdxmid)*dx*dz)
+               ! fdiy = fdiy + (xnu*(two*dvdymid)*dx*dz)
+               ! fdiz = fdiz + (xnu*(dwdymid+dvdzmid)*dx*dz)
+
+            enddo
+            tconvxl(2)=tconvxl(2)+fcvx
+            tconvyl(2)=tconvyl(2)+fcvy
+            tconvzl(2)=tconvzl(2)+fcvz
+
+            tpresxl(2)=tpresxl(2)+fprx
+            ! tpresyl(2)=tpresyl(2)+fpry
+            tpreszl(2)=tpreszl(2)+fprz
+
+            tdiffxl(2)=tdiffxl(2)+fdix
+            tdiffyl(2)=tdiffyl(2)+fdiy
+            tdiffzl(2)=tdiffzl(2)+fdiz
+
+         enddo
+      endif
+      !AB and DC : y-pencils
+      !AB
+      if ((icvlf(iv).ge.ystart(1)).and.(icvlf(iv).le.yend(1))) then
+         i=icvlf(iv)-ystart(1)+1
+         ii=icvlf(iv)-1
+         xm=real(ii,mytype)*dx
+         do k=zcvlf_ly(iv),zcvrt_ly(iv)
+            kk=ystart(3)+k-1
+            zm=real(kk,mytype)*dz
+            fcvx=zero
+            fcvy=zero
+            fcvz=zero
+            fprx=zero
+            fpry=zero
+            fprz=zero
+            fdix=zero
+            fdiy=zero
+            fdiz=zero
+            do j=jcvlw_ly(iv),jcvup_ly(iv)-1
+
+               jj=ystart(2)+j-1
+               ym=real(jj,mytype)*dz
+              !  write(*,*) 'Calculating force at left x boundary', [xm,ym,zm]
+               !momentum flux
+               radial = [xm,ym,zm]-position(iv,:)
+
+               call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+               uxmid = half*(ux2(i,j,k)+ux2(i,j+1,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+               uymid = half*(uy2(i,j,k)+uy2(i,j+1,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+               uzmid = half*(uz2(i,j,k)+uz2(i,j+1,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+               fcvx = fcvx -(uymid*radial(3)-uzmid*radial(2))*uxmid*del_y(j)*dz
+               fcvy = fcvy -(uzmid*radial(1)-uxmid*radial(3))*uxmid*del_y(j)*dz
+               fcvz = fcvz -(uxmid*radial(2)-uymid*radial(1))*uxmid*del_y(j)*dz
+
+
+               !pressure
+               prmid = half*(ppi2(i,j,k)+ppi2(i,j+1,k))
+               ! fprx = fprx +prmid*del_y(j)*dz*(radial(3)-radial(2))
+
+               fpry = fpry + prmid*del_y(j)*dz*(radial(3))
+               fprz = fprz + prmid*del_y(j)*dz*(-radial(2))
+
+               !viscous term
+               dudxmid = half*(ta2(i,j,k)+ta2(i,j+1,k))
+               dudymid = half*(tc2(i,j,k)+tc2(i,j+1,k))
+               dvdxmid = half*(tb2(i,j,k)+tb2(i,j+1,k))
+               dwdxmid = half*(te2(i,j,k)+te2(i,j+1,k))
+               dudzmid = half*(tg2(i,j,k)+tg2(i,j+1,k))
+
+               fdix = fdix - (xnu*(dvdxmid+dudymid)*radial(3)-xnu*(dwdxmid+dudzmid)*radial(2))*del_y(j)*dz
+               fdiy = fdiy - (xnu*(dwdxmid+dudzmid)*radial(1)-xnu*(two*dudxmid)*radial(3))*del_y(j)*dz
+               fdiz = fdiz - (xnu*(two*dudxmid)*radial(2)-xnu*(dvdxmid+dudymid)*radial(1))*del_y(j)*dz
+
+
+               ! fdix = fdix -two*xnu*dudxmid*del_y(j)*dz
+               ! fdiy = fdiy -xnu*(dvdxmid+dudymid)*del_y(j)*dz
+               ! fdiz = fdiz -xnu*(dwdxmid+dudzmid)*del_y(j)*dz
+            enddo
+            tconvxl(3)=tconvxl(3)+fcvx
+            tconvyl(3)=tconvyl(3)+fcvy
+            tconvzl(3)=tconvzl(3)+fcvz
+
+            ! tpresxl(3)=tpresxl(3)+fprx
+            tpresyl(3)=tpresyl(3)+fpry
+            tpreszl(3)=tpreszl(3)+fprz
+
+            tdiffxl(3)=tdiffxl(3)+fdix
+            tdiffyl(3)=tdiffyl(3)+fdiy
+            tdiffzl(3)=tdiffzl(3)+fdiz
+
+         enddo
+      endif
+      !DC
+      if ((icvrt(iv).ge.ystart(1)).and.(icvrt(iv).le.yend(1))) then
+         i=icvrt(iv)-ystart(1)+1
+         ii=icvrt(iv)
+         xm=real(ii,mytype)*dx
+         do k=zcvlf_ly(iv),zcvrt_ly(iv)
+            kk=ystart(3)-1+k
+            zm=real(kk,mytype)*dz
+            fcvx=zero
+            fcvy=zero
+            fcvz=zero
+            fprx=zero
+            fpry=zero
+            fprz=zero
+            fdix=zero
+            fdiy=zero
+            fdiz=zero
+            do j=jcvlw_ly(iv),jcvup_ly(iv)-1 
+               jj=ystart(2)+j-1
+               ym=real(jj,mytype)*dy
+              !  write(*,*) 'Calculating force at right x boundary', [xm,ym,zm]
+
+               radial = [xm,ym,zm]-position(iv,:)
+
+               !momentum flux
+               call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+               uxmid = half*(ux2(i,j,k)+ux2(i,j+1,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+               uymid = half*(uy2(i,j,k)+uy2(i,j+1,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+               uzmid = half*(uz2(i,j,k)+uz2(i,j+1,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+
+               fcvx = fcvx +(uymid*radial(3)-uzmid*radial(2))*uxmid*del_y(j)*dz
+               fcvy = fcvy +(uzmid*radial(1)-uxmid*radial(3))*uxmid*del_y(j)*dz
+               fcvz = fcvz +(uxmid*radial(2)-uymid*radial(1))*uxmid*del_y(j)*dz
+
+
+
+               !pressure
+               prmid = half*(ppi2(i,j,k)+ppi2(i,j+1,k))
+               ! fprx = fprx -prmid*del_y(j)*dz*(radial(3)-radial(2))
+
+               fpry = fpry - prmid*del_y(j)*dz*(radial(3))
+               fprz = fprz - prmid*del_y(j)*dz*(-radial(2))  
+
+               !viscous term
+               dudxmid = half*(ta2(i,j,k)+ta2(i,j+1,k))
+               dudymid = half*(tc2(i,j,k)+tc2(i,j+1,k))
+               dvdxmid = half*(tb2(i,j,k)+tb2(i,j+1,k))
+               dwdxmid = half*(te2(i,j,k)+te2(i,j+1,k))
+               dudzmid = half*(tg2(i,j,k)+tg2(i,j+1,k))
+
+               fdix = fdix + (xnu*(dvdxmid+dudymid)*radial(3)-xnu*(dwdxmid+dudzmid)*radial(2))*del_y(j)*dz
+               fdiy = fdiy + (xnu*(dwdxmid+dudzmid)*radial(1)-xnu*(two*dudxmid)*radial(3))*del_y(j)*dz
+               fdiz = fdiz + (xnu*(two*dudxmid)*radial(2)-xnu*(dvdxmid+dudymid)*radial(1))*del_y(j)*dz
+
+
+               ! fdix = fdix + two*xnu*dudxmid*del_y(j)*dz
+               ! fdiy = fdiy + xnu*(dvdxmid+dudymid)*del_y(j)*dz
+               ! fdiz = fdiz + xnu*(dwdxmid+dudzmid)*del_y(j)*dz
+
+            enddo
+            tconvxl(4)=tconvxl(4)+fcvx
+            tconvyl(4)=tconvyl(4)+fcvy
+            tconvzl(4)=tconvzl(4)+fcvz
+
+            ! tpresxl(4)=tpresxl(4)+fprx
+            tpresyl(4)=tpresyl(4)+fpry
+            tpreszl(4)=tpreszl(4)+fprz
+
+            tdiffxl(4)=tdiffxl(4)+fdix
+            tdiffyl(4)=tdiffyl(4)+fdiy
+            tdiffzl(4)=tdiffzl(4)+fdiz
+
+         enddo
+      endif
+
+      !Left & Right : 
+      !Left
+      if ((zcvlf(iv).ge.xstart(3)).and.(zcvlf(iv).le.xend(3))) then
+        k=zcvlf(iv)-xstart(3)+1
+        kk=zcvlf(iv)
+        zm=real(kk,mytype)*dz
+
+        fcvx=zero
+        fcvy=zero
+        fcvz=zero
+        fprx=zero
+        fpry=zero
+        fprz=zero
+        fdiy=zero
+        fdiz=zero
+        do j=jcvlw_lx(iv),jcvup_lx(iv)
+         kk = xstart(2)-1+j
+         jj = xstart(2)-1+j
+
+         ym=real(jj,mytype)*dy
+           do i=icvlf_lx(iv),icvrt_lx(iv)-1
+              ii=xstart(1)+i-1
+              xm=real(ii,mytype)*dx
+              ! write(*,*) 'Calculating force at left z boundary', [xm,ym,zm]
+
+              !momentum flux
+              radial = [xm,ym,zm]-position(iv,:)
+
+              call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+              uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+              uymid = half*(uy1(i,j,k)+uy1(i+1,j,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+              uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+              fcvx = fcvx -(uymid*radial(3)-uzmid*radial(2))*uzmid*dx*del_y(j) !!!CHANGE
+              fcvy = fcvy -(uzmid*radial(1)-uxmid*radial(3))*uzmid*dx*del_y(j)
+              fcvz = fcvz -(uxmid*radial(2)-uymid*radial(1))*uzmid*dx*del_y(j)
+
+              !pressure
+              prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+            !   fprz = fprz -prmid*dx*dy*(radial(2)-radial(1))
+              fprx = fprx +prmid*dx*dy*(radial(2))
+              fpry = fpry +prmid*dx*dy*(-radial(1))
+
+              !viscous term
+              dudzmid = half*(tg1(i,j,k)+tg1(i+1,j,k))
+              dwdxmid = half*(te1(i,j,k)+te1(i+1,j,k))
+              dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+              dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+              dwdzmid = half*(ti1(i,j,k)+ti1(i+1,j,k))
+              
+              fdix = fdix - (xnu*(dvdzmid+dwdymid)*radial(3)-xnu*(two*dwdzmid)*radial(2))*dx*dy
+              fdiy = fdiy - (xnu*(two*dwdzmid)*radial(1)-xnu*(dudzmid+dwdxmid)*radial(3))*dx*dy
+              fdiz = fdiz - (xnu*(dudzmid+dwdxmid)*radial(2)-xnu*(dvdzmid+dwdymid)*radial(1))*dx*dy
+
+
+            !   fdix = fdix +(xnu*(dudzmid+dwdxmid)*dx*dy)
+            !   fdiy = fdiy +(xnu*(dvdzmid+dwdymid)*dx*dy)
+            !   fdiz = fdiz +(xnu*(two*dwdzmid)*dx*dy)
+           enddo
+        enddo
+!print*, kk
+!        drag3(kk)=drag3(kk)+fcvx   ! Should be size ny
+!        print*, drag3(kk)
+        tconvxl2(5)=tconvxl2(5)+fcvx
+        tconvyl2(5)=tconvyl2(5)+fcvy
+        tconvzl2(5)=tconvzl2(5)+fcvz
+        tpresxl(5) =tpresxl(5) +fprx
+        tpresyl(5) =tpresyl(5) +fpry
+      !   tpreszl(5) =tpreszl(5) +fprz
+        tdiffxl2(5)=tdiffxl2(5)+fdix
+        tdiffyl2(5)=tdiffyl2(5)+fdiy
+        tdiffzl2(5)=tdiffzl2(5)+fdiz        
+     endif 
+     !Right
+     if ((zcvrt(iv).ge.xstart(3)).and.(zcvrt(iv).le.xend(3))) then
+        k=zcvrt(iv)-xstart(3)+1
+        kk=zcvrt(iv)
+        zm=real(kk,mytype)*dz
+!        kk=nrank+1
+
+        fcvx=zero
+        fcvy=zero
+        fcvz=zero
+        fprx=zero
+        fpry=zero
+        fprz=zero
+        fdix=zero
+        fdiy=zero
+        fdiz=zero
+      !  do k=1,xsize(3)
+        do j=jcvlw_lx(iv),jcvup_lx(iv)
+        !  kk = xstart(2)-1+j 
+         jj = xstart(2)-1+j
+         ym=real(jj,mytype)*dy
+          do i=icvlf_lx(iv),icvrt_lx(iv)-1
+            ii=xstart(1)+i-1
+            xm=real(ii,mytype)*dx
+              !momentum flux
+            radial = [xm,ym,zm]-position(iv,:)
+
+            call crossProduct(angularVelocity(iv,2:4),[xm,ym,zm]-position(iv,:),rotationalComponent)
+           !  write(*,*) 'Calculating force at right z boundary', [xm,ym,zm]
+
+            uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k)) - linearVelocity(iv,1) + rotationalComponent(1)
+            uymid = half*(uy1(i,j,k)+uy1(i+1,j,k)) - linearVelocity(iv,2) + rotationalComponent(2)
+            uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k)) - linearVelocity(iv,3) + rotationalComponent(3)
+
+            fcvx = fcvx +(uymid*radial(3)-uzmid*radial(2))*uzmid*dx*del_y(j)
+            fcvy = fcvy +(uzmid*radial(1)-uxmid*radial(3))*uzmid*dx*del_y(j)
+            fcvz = fcvz +(uxmid*radial(2)-uymid*radial(1))*uzmid*dx*del_y(j)
+
+
+              !pressure
+              prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+            !   fprz = fprz +prmid*dx*dy*(radial(2)-radial(1))
+
+              fprx = fprx -prmid*dx*dy*(radial(2))
+              fpry = fpry -prmid*dx*dy*(-radial(1))
+
+              !viscous term
+              dudzmid = half*(tg1(i,j,k)+tg1(i+1,j,k))
+              dwdxmid = half*(te1(i,j,k)+te1(i+1,j,k))
+              dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+              dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+              dwdzmid = half*(ti1(i,j,k)+ti1(i+1,j,k))
+                            
+              fdix = fdix + (xnu*(dvdzmid+dwdymid)*radial(3)-xnu*(two*dwdzmid)*radial(2))*dx*dy
+              fdiy = fdiy + (xnu*(two*dwdzmid)*radial(1)-xnu*(dudzmid+dwdxmid)*radial(3))*dx*dy
+              fdiz = fdiz + (xnu*(dudzmid+dwdxmid)*radial(2)-xnu*(dvdzmid+dwdymid)*radial(1))*dx*dy
+
+            !   fdix = fdix -(xnu*(dudzmid+dwdxmid)*dx*dy)
+            !   fdiy = fdiy -(xnu*(dvdzmid+dwdymid)*dx*dy)
+            !   fdiz = fdiz -two*xnu*dwdzmid*dx*dy
+
+           enddo
+        enddo
+!        drag4(kk)=drag4(kk)+fcvx    ! Should be size ny
+        tconvxl2(6)=tconvxl2(6)+fcvx
+        tconvyl2(6)=tconvyl2(6)+fcvy
+        tconvzl2(6)=tconvzl2(6)+fcvz
+        
+        tpresxl(6) =tpresxl(6) +fprx !!!!!!!!
+        tpresyl(6) =tpresyl(6) +fpry
+      !   tpreszl(6) =tpreszl(6) +fprz
+
+        tdiffxl2(6)=tdiffxl2(6)+fdix
+        tdiffyl2(6)=tdiffyl2(6)+fdiy
+        tdiffzl2(6)=tdiffzl2(6)+fdiz
+     endif     
+    
+      call MPI_ALLREDUCE(tconvxl,tconvx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tconvyl,tconvy,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tconvzl,tconvz,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+
+      call MPI_ALLREDUCE(tpresxl,tpresx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tpresyl,tpresy,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tdiffxl,tdiffx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tdiffyl,tdiffy,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tdiffzl,tdiffz,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+
+      call MPI_ALLREDUCE(tconvxl2,tconvx2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tconvyl2,tconvy2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tconvzl2,tconvz2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)     
+      call MPI_ALLREDUCE(tpreszl, tpresz ,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tdiffxl2,tdiffx2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tdiffyl2,tdiffy2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+      call MPI_ALLREDUCE(tdiffzl2,tdiffz2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+ 
+
+      tp1 = sum(tpresx(:))/dt
+      tp2 = sum(tpresy(:))/dt
+      tp3 = sum(tpresz(:))/dt
+   
+      mom1 = sum(tunstx(:)) + sum(tconvx(:)) + (-2.0*tconv2_sign+1.0)*sum(tconvx2(:)) !if tconv2sign == 1.0, multiply by -1
+      mom2 = sum(tunsty(:)) + sum(tconvy(:)) + (-2.0*tconv2_sign+1.0)*sum(tconvy2(:))
+      mom3 = sum(tunstz(:)) + sum(tconvz(:)) + (-2.0*tconv2_sign+1.0)*sum(tconvz2(:))
+ 
+      dra1(iv) = -(sum(tdiffx) + sum(tdiffx2) + tp1 - mom1)*(1.0-2.0*torq_flip)
+      dra2(iv) = -(sum(tdiffy) + sum(tdiffy2) + tp2 - mom2)*(1.0-2.0*torq_flip)
+      dra3(iv) = -(sum(tdiffz) + sum(tdiffz2) + tp3 - mom3)*(1.0-2.0*torq_flip)
+      
+      ! do k=1,zsize(3)
+
+      !    tpresx(k)=tpresx(k)/dt
+      !    tpresy(k)=tpresy(k)/dt
+      !    tpresz(k)=tpresz(k)/dt
+
+
+      !    xmom    = tunstx(k)+tconvx(k)+tconvx2(k)
+      !    ymom    = tunsty(k)+tconvy(k)+tconvy2(k)
+      !    zmom    = tunstz(k)+tconvz(k)+tconvz2(k)
+      !    xDrag(k) = (tdiffx(k)+tdiffx2(k)+tpresx(k)-xmom)
+      !    yLift(k) = (tdiffy(k)+tdiffy2(k)+tpresy(k)-ymom)
+      !    zLat(k)  = (tdiffz(k)+tdiffz2(k)+tpresz(k)-zmom)
+
+      ! enddo
+
+      !Edited by F. Schuch
+      xDrag_mean = sum(xDrag(:))/real(nz,mytype)
+      yLift_mean = sum(yLift(:))/real(nz,mytype)
+
+     !  xDrag_tot = sum(xDrag(:))
+     !  yLift_tot = sum(yLift(:))
+     !  zLat_tot  = sum(zLat(:))
+     
+     if ((itime==ifirst).or.(itime==0)) then
+        
+     endif
+      if ((nrank .eq. 0).and.(record_var.eq.1).and.(mod(itime,ilist)==0)) then
+        ! write(*,*) 'TIME STEP = ', itime
+        write(45+(iv-1),*) t,dra1,dra2,dra3, sum(tdiffx), sum(tdiffx2), tp1, -mom1, -sum(tunstx(:)), -sum(tconvx(:)), -sum(tconvx2(:))
+        !  write(*,*) 'written to file number', 38+(iv-1), t, dra1,dra2,dra3
+         call flush(45+(iv-1))
+
+         if (torq_debug.eq.1) then 
+            write(100,*) t,dra1,dra2,dra3, tdiffx(1), tdiffx2(1), tpresx(1)/dt,  -tunstx(1), -tconvx(1), -tconvx2(1)
+            write(101,*) t,dra1,dra2,dra3, tdiffx(2), tdiffx2(2), tpresx(2)/dt,  -tunstx(2), -tconvx(2), -tconvx2(2)
+            write(102,*) t,dra1,dra2,dra3, tdiffx(3), tdiffx2(3), tpresx(3)/dt,  -tunstx(3), -tconvx(3), -tconvx2(3)
+            write(103,*) t,dra1,dra2,dra3, tdiffx(4), tdiffx2(4), tpresx(4)/dt,  -tunstx(4), -tconvx(4), -tconvx2(4)
+            write(104,*) t,dra1,dra2,dra3, tdiffx(5), tdiffx2(5), tpresx(5)/dt,  -tunstx(5), -tconvx(5), -tconvx2(5)
+            write(105,*) t,dra1,dra2,dra3, tdiffx(6), tdiffx2(6), tpresx(6)/dt,  -tunstx(6), -tconvx(6), -tconvx2(6)
+   
+            write(106,*) t,dra1,dra2,dra3, sum(tdiffy), sum(tdiffy2), tp2, -mom2, -sum(tunsty(:)), -sum(tconvy(:)), -sum(tconvy2(:))
+            write(107,*) t,dra1,dra2,dra3, sum(tdiffz), sum(tdiffz2), tp3, -mom3, -sum(tunstz(:)), -sum(tconvz(:)), -sum(tconvz2(:))
+            
+            call flush(100)
+            call flush(101)
+            call flush(102)
+            call flush(103)
+            call flush(104)
+            call flush(105)
+
+            call flush(106)
+            call flush(107)
+            endif
+      endif
+     !  if (mod(itime, ioutput).eq.0) then
+     !     if (nrank .eq. 0) then
+     !        write(filename,"('forces.dat',I7.7)") itime
+     !        call system("cp forces.dat " //filename)
+     !     endif
+     !  endif
+   enddo
+
+   do k = 1, xsize(3)
+      do j = 1, xsize(2)
+         do i = 1, xsize(1)
+            ux11(i,j,k)=ux01(i,j,k)
+            uy11(i,j,k)=uy01(i,j,k)
+            uz11(i,j,k)=uz01(i,j,k)
+            ux01(i,j,k)=ux1(i,j,k)
+            uy01(i,j,k)=uy1(i,j,k)
+            uz01(i,j,k)=uz1(i,j,k)
+         enddo
+      enddo
+   enddo
+
+   return
+
+ end subroutine torque_calc
 end module forces
diff --git a/src/forces_draft.f90 b/src/forces_draft.f90
new file mode 100644
index 0000000000000000000000000000000000000000..46ed168268a84c7a3abc9079c64d580bf7ce2234
--- /dev/null
+++ b/src/forces_draft.f90
@@ -0,0 +1,918 @@
+!################################################################################
+!This file is part of Xcompact3d.
+!
+!Xcompact3d
+!Copyright (c) 2012 Eric Lamballais and Sylvain Laizet
+!eric.lamballais@univ-poitiers.fr / sylvain.laizet@gmail.com
+!
+!    Xcompact3d is free software: you can redistribute it and/or modify
+!    it under the terms of the GNU General Public License as published by
+!    the Free Software Foundation.
+!
+!    Xcompact3d is distributed in the hope that it will be useful,
+!    but WITHOUT ANY WARRANTY; without even the implied warranty of
+!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+!    GNU General Public License for more details.
+!
+!    You should have received a copy of the GNU General Public License
+!    along with the code.  If not, see <http://www.gnu.org/licenses/>.
+!-------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------
+!    We kindly request that you cite Xcompact3d/Incompact3d in your
+!    publications and presentations. The following citations are suggested:
+!
+!    1-Laizet S. & Lamballais E., 2009, High-order compact schemes for
+!    incompressible flows: a simple and efficient method with the quasi-spectral
+!    accuracy, J. Comp. Phys.,  vol 228 (15), pp 5989-6015
+!
+!    2-Laizet S. & Li N., 2011, Incompact3d: a powerful tool to tackle turbulence
+!    problems with up to 0(10^5) computational cores, Int. J. of Numerical
+!    Methods in Fluids, vol 67 (11), pp 1735-1757
+!################################################################################
+
+!=======================================================================
+! This program computes the drag and lift coefficients alongo a 
+! cylinder by the control ! volume (CV) technique for 2D (pencil) 
+! decomposition. 
+!
+! Adpated from Leandro Pinto PhD Thesis (2012) by Gabriel Narvaez Campo 
+! 08-2018 Nucleo de Estudos em Transicao e Turbulencia (NETT/IPH/UFRGS)
+!
+!=======================================================================
+
+module forces
+    USE decomp_2d
+    USE ibm
+    implicit none
+  
+    integer :: nvol,iforces
+    real(mytype),save,allocatable,dimension(:,:,:) :: ux01, uy01, ux11, uy11, ppi1, uz01, uz11 
+    real(mytype),allocatable,dimension(:) :: xld, xrd, yld, yud, xld2, xrd2, yld2, yud2, zld, zrd
+    integer,allocatable,dimension(:) :: icvlf, icvrt, jcvlw, jcvup, zcvlf, zcvrt
+    integer,allocatable,dimension(:) :: icvlf_lx, icvrt_lx, icvlf_ly, icvrt_ly, icvlf_lz, icvrt_lz
+    integer,allocatable,dimension(:) :: jcvlw_lx, jcvup_lx, jcvlw_ly, jcvup_ly, jcvlw_lz, jcvup_lz
+    integer,allocatable,dimension(:) :: zcvlf_lx, zcvrt_lx, zcvlf_ly, zcvrt_ly
+    
+  
+  contains
+  
+    subroutine init_forces
+  
+      USE decomp_2d
+      USE param
+      USE variables
+      implicit none
+  
+      integer :: iv
+  
+      call alloc_x(ux01)
+      call alloc_x(uy01)
+      call alloc_x(ux11)
+      call alloc_x(uy11)
+      call alloc_x(ppi1)
+      call alloc_x(uz01)
+      call alloc_x(uz11)
+  
+      ux01 = zero
+      uy01 = zero
+      ux11 = zero
+      uy11 = zero
+      uz01 = zero
+      uz11 = zero    
+  
+      allocate(icvlf(nvol), icvrt(nvol), jcvlw(nvol), jcvup(nvol), zcvlf(nvol), zcvrt(nvol))
+      allocate(icvlf_lx(nvol), icvrt_lx(nvol), icvlf_ly(nvol), icvrt_ly(nvol), icvlf_lz(nvol), icvrt_lz(nvol))
+      allocate(jcvlw_lx(nvol), jcvup_lx(nvol), jcvlw_ly(nvol), jcvup_ly(nvol), jcvlw_lz(nvol), jcvup_lz(nvol))
+      allocate(zcvlf_lx(nvol), zcvrt_lx(nvol), zcvlf_ly(nvol), zcvrt_ly(nvol))
+      allocate(xld2(nvol), xrd2(nvol), yld2(nvol), yud2(nvol))
+  
+      ! Update Control Volume based on moving cylinder case
+      ! if ((iibm.ne.0).and.(t.ne.0.)) then
+      !    xld2(:) = xld(:) + (t-ifirst*dt)*ubcx
+      !    xrd2(:) = xrd(:) + (t-ifirst*dt)*ubcx
+      !    yld2(:) = yld(:) + (t-ifirst*dt)*ubcy
+      !    yud2(:) = yud(:) + (t-ifirst*dt)*ubcy
+      ! else
+      !    xld2(:) = xld(:)
+      !    xrd2(:) = xrd(:)
+      !    yld2(:) = yld(:)
+      !    yud2(:) = yud(:)
+      ! endif
+      
+      !     Definition of the Control Volume
+      !*****************************************************************
+      !! xld,xrd,yld,yud: limits of control volume (!!don't use cex and cey anymore!!)
+      do iv=1,nvol
+         ! ok for istret=0 (!!to do for istret=1!!)
+         icvlf(iv) = nint(xld2(iv)/dx)+1
+         icvrt(iv) = nint(xrd2(iv)/dx)+1
+         jcvlw(iv) = nint(yld2(iv)/dy)+1
+         jcvup(iv) = nint(yud2(iv)/dy)+1
+  
+         icvlf_lx(iv) = icvlf(iv)
+         icvrt_lx(iv) = icvrt(iv)
+         jcvlw_lx(iv) = max(jcvlw(iv)+1-xstart(2),1)
+         jcvup_lx(iv) = min(jcvup(iv)+1-xstart(2),xsize(2))
+         jcvlw_lz(iv) = max(jcvlw(iv)+1-zstart(2),1)
+         jcvup_lz(iv) = min(jcvup(iv)+1-zstart(2),zsize(2))       
+  
+         icvlf_ly(iv) = max(icvlf(iv)+1-ystart(1),1)
+         icvrt_ly(iv) = min(icvrt(iv)+1-ystart(1),ysize(1))
+         icvlf_lz(iv) = max(icvlf(iv)+1-zstart(1),1)
+         icvrt_lz(iv) = min(icvrt(iv)+1-zstart(1),zsize(1))       
+         jcvlw_ly(iv) = jcvlw(iv)
+         jcvup_ly(iv) = jcvup(iv)
+  
+         zcvlf(iv) = nint(zld(iv)/dz)+1
+         zcvrt(iv) = nint(zrd(iv)/dz)+1
+         zcvlf_lx(iv) = max(zcvlf(iv)+1-xstart(3),1)
+         zcvrt_lx(iv) = min(zcvrt(iv)+1-xstart(3),xsize(3)) 
+         zcvlf_ly(iv) = max(zcvlf(iv)+1-ystart(3),1)
+         zcvrt_ly(iv) = min(zcvrt(iv)+1-ystart(3),ysize(3))             
+      enddo
+  
+      if (nrank==0) then
+         print *,'========================Forces============================='
+         print *,'                       (icvlf)      (icvrt) '
+         print *,'                (jcvup) B____________C '
+         print *,'                        \            \ '
+         print *,'                        \     __     \ '
+         print *,'                        \    \__\    \ '
+         print *,'                        \            \ '
+         print *,'                        \       CV   \ '
+         print *,'                (jcvlw) A____________D '
+         do iv=1,nvol
+            write(*,"(' Control Volume     : #',I1)") iv
+            write(*,"('     xld, icvlf     : (',F6.2,',',I6,')')") xld(iv), icvlf(iv)
+            write(*,"('     xrd, icvrt     : (',F6.2,',',I6,')')") xrd(iv), icvrt(iv)
+            write(*,"('     yld, jcvlw     : (',F6.2,',',I6,')')") yld(iv), jcvlw(iv)
+            write(*,"('     yud, jcvup     : (',F6.2,',',I6,')')") yud(iv), jcvup(iv)
+         enddo
+         print *,'==========================================================='
+      endif
+    end subroutine init_forces
+  
+  !***********************************************************************
+  ! 
+!     subroutine update_forces
+!   ! 
+!   !***********************************************************************
+  
+!       USE decomp_2d
+!       USE param
+!       USE variables
+!       implicit none
+  
+!       integer :: iv
+  
+!       ! Update Control Volume based on moving cylinder case
+!       if ((iibm.ne.0).and.(t.ne.0.)) then
+!          xld2(:) = xld(:) + (t-ifirst*dt)*ubcx
+!          xrd2(:) = xrd(:) + (t-ifirst*dt)*ubcx
+!          yld2(:) = yld(:) + (t-ifirst*dt)*ubcy
+!          yud2(:) = yud(:) + (t-ifirst*dt)*ubcy
+!       else
+!          xld2(:) = xld(:)
+!          xrd2(:) = xrd(:)
+!          yld2(:) = yld(:)
+!          yud2(:) = yud(:)
+!       endif
+  
+!       !     Definition of the Control Volume
+!       !*****************************************************************
+!       !! xld,xrd,yld,yud: limits of control volume (!!don't use cex and cey anymore!!)
+!       do iv=1,nvol
+!          ! ok for istret=0 (!!to do for istret=1!!)
+!          icvlf(iv) = nint(xld2(iv)/dx)+1
+!          icvrt(iv) = nint(xrd2(iv)/dx)+1
+!          jcvlw(iv) = nint(yld2(iv)/dy)+1
+!          jcvup(iv) = nint(yud2(iv)/dy)+1
+  
+!          icvlf_lx(iv) = icvlf(iv)
+!          icvrt_lx(iv) = icvrt(iv)
+!          jcvlw_lx(iv) = max(jcvlw(iv)+1-xstart(2),1)
+!          jcvup_lx(iv) = min(jcvup(iv)+1-xstart(2),xsize(2))
+!          jcvlw_lz(iv) = max(jcvlw(iv)+1-zstart(2),1)
+!          jcvup_lz(iv) = min(jcvup(iv)+1-zstart(2),zsize(2))       
+  
+!          icvlf_ly(iv) = max(icvlf(iv)+1-ystart(1),1)
+!          icvrt_ly(iv) = min(icvrt(iv)+1-ystart(1),ysize(1))
+!          icvlf_lz(iv) = max(icvlf(iv)+1-zstart(1),1)
+!          icvrt_lz(iv) = min(icvrt(iv)+1-zstart(1),zsize(1))       
+!          jcvlw_ly(iv) = jcvlw(iv)
+!          jcvup_ly(iv) = jcvup(iv)
+  
+!          zcvlf(iv) = nint(zld(iv)/dz)+1
+!          zcvrt(iv) = nint(zrd(iv)/dz)+1
+!          zcvlf_lx(iv) = max(zcvlf(iv)+1-xstart(3),1)
+!          zcvrt_lx(iv) = min(zcvrt(iv)+1-xstart(3),xsize(3)) 
+!          zcvlf_ly(iv) = max(zcvlf(iv)+1-ystart(3),1)
+!          zcvrt_ly(iv) = min(zcvrt(iv)+1-ystart(3),ysize(3)) 
+!       enddo
+!     end subroutine update_forces
+  
+!   !***********************************************************************
+  ! 
+    subroutine restart_forces(itest1)
+  ! 
+  !***********************************************************************
+  
+      USE decomp_2d
+      USE decomp_2d_io
+      USE variables
+      USE param
+      USE MPI
+  
+      implicit none
+  
+      integer :: fh,ierror,code,itest1
+      integer :: ierror_o=0 !error to open sauve file during restart
+      character(len=30) :: filename, filestart
+      integer (kind=MPI_OFFSET_KIND) :: filesize, disp
+  
+      write(filename, "('restart-forces',I7.7)") itime
+      write(filestart,"('restart-forces',I7.7)") ifirst-1
+  
+      if (itest1==1) then !write
+         if (mod(itime, icheckpoint).ne.0) then
+            return
+         endif
+  
+         call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, &
+              MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, &
+              fh, ierror)
+         filesize = 0_MPI_OFFSET_KIND
+         call MPI_FILE_SET_SIZE(fh,filesize,ierror)  ! guarantee overwriting
+         disp = 0_MPI_OFFSET_KIND
+         call decomp_2d_write_var(fh,disp,1,ux01)
+         call decomp_2d_write_var(fh,disp,1,uy01)
+         call decomp_2d_write_var(fh,disp,1,ux11)
+         call decomp_2d_write_var(fh,disp,1,uy11)
+         call decomp_2d_write_var(fh,disp,1,uz01)
+         call decomp_2d_write_var(fh,disp,1,uz11)       
+         call MPI_FILE_CLOSE(fh,ierror)
+      else !read
+         call MPI_FILE_OPEN(MPI_COMM_WORLD, filestart, &
+              MPI_MODE_RDONLY, MPI_INFO_NULL, &
+              fh, ierror_o)
+         disp = 0_MPI_OFFSET_KIND
+         call decomp_2d_read_var(fh,disp,1,ux01)
+         call decomp_2d_read_var(fh,disp,1,uy01)
+         call decomp_2d_read_var(fh,disp,1,ux11)
+         call decomp_2d_read_var(fh,disp,1,uy11)
+         call decomp_2d_read_var(fh,disp,1,uz01)
+         call decomp_2d_read_var(fh,disp,1,uz11)       
+         call MPI_FILE_CLOSE(fh,ierror_o)
+      endif
+  
+      if (nrank.eq.0) then
+         if (ierror_o .ne. 0) then !Included by Felipe Schuch
+            print *,'==========================================================='
+            print *,'Error: Impossible to read '//trim(filestart)
+            print *,'==========================================================='
+            call MPI_ABORT(MPI_COMM_WORLD,code,ierror)
+         endif
+      endif
+  
+    end subroutine restart_forces
+  end module forces
+  
+  !***********************************************************************
+  subroutine force(ux1,uy1,uz1,ep1)
+  !***********************************************************************
+  
+    USE forces
+    USE param
+    USE variables
+    USE decomp_2d
+    USE MPI
+    USE ibm
+    use var, only : ta1, tb1, tc1, td1, te1, tf1, tg1, th1, ti1, di1
+    use var, only : ux2, uy2, uz2, ta2, tb2, tc2, td2, te2, tf2, tg2, th2, ti2, di2
+    use var, only : ux3, uy3, uz3, ta3, tb3, tc3, td3, te3, tf3, tg3, th3, ti3, di3
+      
+  
+    implicit none
+    character(len=30) :: filename, filename2
+    integer :: nzmsize
+    integer                                             :: i, iv, j, k, kk, code, jj
+    integer                                             :: nvect1,nvect2,nvect3
+  
+    real(mytype), dimension(xsize(1),xsize(2),xsize(3)) :: ux1, uy1, uz1
+    real(mytype), dimension(xsize(1),xsize(2),xsize(3)),intent(in) :: ep1
+  
+    real(mytype), dimension(ysize(1),ysize(2),ysize(3)) :: ppi2
+    real(mytype), dimension(zsize(1),zsize(2),zsize(3)) :: ppi3
+  
+    real(mytype), dimension(nz) :: yLift, xDrag, zLat
+    real(mytype) :: yLift_mean, xDrag_mean, zLat_mean
+  
+    real(mytype), dimension(nz) :: tunstxl, tunstyl, tunstzl
+    real(mytype), dimension(nz) :: tconvxl, tconvyl, tconvzl
+    real(mytype), dimension(nz) :: tpresxl, tpresyl
+    real(mytype), dimension(nz) :: tdiffxl, tdiffyl, tdiffzl
+  
+    real(mytype), dimension(nz) :: tunstx, tunsty, tunstz  
+    real(mytype), dimension(nz) :: tconvx, tconvy, tconvz
+    real(mytype), dimension(nz) :: tpresx, tpresy 
+    real(mytype), dimension(nz) :: tdiffx, tdiffy, tdiffz
+    
+    
+    real(mytype), dimension(ny) :: tconvxl2, tconvyl2, tconvzl2
+    real(mytype), dimension(ny) :: tdiffxl2, tdiffyl2, tdiffzl2
+    real(mytype), dimension(ny) :: tconvx2, tconvy2, tconvz2
+    real(mytype), dimension(ny) :: tdiffx2, tdiffy2, tdiffz2
+    real(mytype), dimension(ny) :: tpreszl, tpresz
+    
+    
+    
+  
+    real(mytype) :: uxmid, uymid, uzmid, prmid
+    real(mytype) :: dudxmid, dudymid, dudzmid, dvdxmid, dvdymid, dvdzmid
+    real(mytype) :: dwdxmid, dwdymid, dwdzmid
+    real(mytype) :: fac,tsumx, tsumy, tsumz
+    real(mytype) :: fcvx, fcvy, fcvz, fprx, fpry, fprz, fdix, fdiy, fdiz
+    real(mytype) :: xmom, ymom, zmom
+    real(mytype), dimension(ny) :: ztpresx, ztpresy
+    real(mytype), dimension(nz) :: zyLift, zxDrag, zzLat
+    real(mytype) :: zyLift_mean, zxDrag_mean, zzLat_mean
+    
+    
+    
+    real(mytype), dimension(nz) :: drag1, drag2, drag11, drag22
+    real(mytype), dimension(nz) :: drag3, drag4, drag33, drag44
+    real(mytype) :: mom1, mom2, mom3, tp1, tp2, tp3, dra1, dra2, dra3
+  
+  !  if (imove.eq.1) then
+  !     ux1(:,:,:) = ux1(:,:,:) + 0.5
+  !  endif
+  
+    nvect1=xsize(1)*xsize(2)*xsize(3)
+    nvect2=ysize(1)*ysize(2)*ysize(3)
+    nvect3=zsize(1)*zsize(2)*zsize(3)
+  
+    if (itime.eq.1) then
+       do k = 1, xsize(3)
+          do j = 1, xsize(2)
+             do i = 1, xsize(1)
+                ux11(i,j,k)=ux1(i,j,k)
+                uy11(i,j,k)=uy1(i,j,k)
+                uz11(i,j,k)=uz1(i,j,k)
+             enddo
+          enddo
+       enddo
+       return
+    elseif (itime.eq.2) then
+       do k = 1, xsize(3)
+          do j = 1, xsize(2)
+             do i = 1, xsize(1)
+                ux01(i,j,k)=ux1(i,j,k)
+                uy01(i,j,k)=uy1(i,j,k)
+                uz01(i,j,k)=uz1(i,j,k)
+             enddo
+          enddo
+       enddo
+       return
+    endif
+  !print*, t
+  !if (nrank.eq.0) print*, ppi1
+  
+    call derx (ta1,ux1,di1,sx,ffx,fsx,fwx,xsize(1),xsize(2),xsize(3),0,ubcx)    ! du/dx
+    call derx (tb1,uy1,di1,sx,ffxp,fsxp,fwxp,xsize(1),xsize(2),xsize(3),1,ubcy) ! dv/dx
+    call derx (te1,uz1,di1,sx,ffxp,fsxp,fwxp,xsize(1),xsize(2),xsize(3),1,ubcz) ! dw/dx
+      
+    call transpose_x_to_y(ta1,ta2) ! du/dx
+    call transpose_x_to_y(tb1,tb2) ! dv/dx
+    call transpose_x_to_y(te1,te2) ! dw/dx
+  
+    call transpose_x_to_y(ux1,ux2)
+    call transpose_x_to_y(uy1,uy2)
+    call transpose_x_to_y(uz1,uz2)
+    call transpose_x_to_y(ppi1,ppi2)
+  
+  
+    call dery (tc2,ux2,di2,sy,ffyp,fsyp,fwyp,ppy,ysize(1),ysize(2),ysize(3),1,ubcx) ! du/dy
+    call dery (td2,uy2,di2,sy,ffy,fsy,fwy,ppy,ysize(1),ysize(2),ysize(3),0,ubcy)    ! dv/dy
+    call dery (tf2,uz2,di2,sy,ffyp,fsyp,fwyp,ppy,ysize(1),ysize(2),ysize(3),1,ubcz) ! dw/dy
+  
+    call transpose_y_to_z(ux2,ux3)
+    call transpose_y_to_z(uy2,uy3)
+    call transpose_y_to_z(uz2,uz3)
+  !!!!!!!  call transpose_y_to_z(ppi2,ppi3)
+    
+  !!!!!!!  call transpose_y_to_z(te2,te3) ! dw/dx
+  !!!!!!!  call transpose_y_to_z(tf2,tf3) ! dw/dy
+    
+    
+    call derz (tg3,ux3,di3,sz,ffzp,fszp,fwzp,zsize(1),zsize(2),zsize(3),1,ubcx)  ! du/dz
+    call derz (th3,uy3,di3,sz,ffzp,fszp,fwzp,zsize(1),zsize(2),zsize(3),1,ubcy)  ! dv/dz
+    call derz (ti3,uz3,di3,sz,ffz,fsz,fwz,zsize(1),zsize(2),zsize(3),0,ubcz)     ! dw/dz
+  
+  
+    call transpose_z_to_y(tg3,tg2) ! du/dz
+    call transpose_z_to_y(th3,th2) ! dv/dz
+    call transpose_z_to_y(ti3,ti2) ! 
+    
+    
+    call transpose_y_to_x(tc2,tc1) ! du/dy
+    call transpose_y_to_x(td2,td1) ! dv/dy
+    call transpose_y_to_x(th2,th1) ! dv/dz
+    call transpose_y_to_x(tf2,tf1) ! dw/dy
+    call transpose_y_to_x(tg2,tg1) ! 
+    call transpose_y_to_x(ti2,ti1) ! 
+  
+  
+  
+  
+  
+    !*****************************************************************
+    !      Drag and Lift coefficients
+    !*****************************************************************
+    do iv=1,nvol
+  
+       !*****************************************************************
+       !        Calculation of the momentum terms
+       !*****************************************************************
+       !
+       !     Calculation of the momentum terms. First we integrate the 
+       !     time rate of momentum along the CV.
+       !
+       !         Excluding the body internal cells. If the centroid 
+       !         of the cell falls inside the body the cell is 
+       !         excluded.
+  
+       tunstxl=zero
+       tunstyl=zero
+       tunstzl=zero
+  !     do k=1,xsize(3)
+       do k=zcvlf_lx(iv),zcvrt_lx(iv)
+          tsumx=zero
+          tsumy=zero
+          tsumz=zero
+          do j=jcvlw_lx(iv),jcvup_lx(iv)
+             do i=icvlf_lx(iv),icvrt_lx(iv)
+                !     The velocity time rate has to be relative to the cell center, 
+                !     and not to the nodes, because, here, we have an integral 
+                !     relative to the volume, and, therefore, this has a sense 
+                !     of a "source".
+                fac   = (onepfive*ux1(i,j,k)-two*ux01(i,j,k)+half*ux11(i,j,k))*(one-ep1(i,j,k))
+                tsumx = tsumx+fac*dx*dy*dz/dt
+  
+                fac   = (onepfive*uy1(i,j,k)-two*uy01(i,j,k)+half*uy11(i,j,k))*(one-ep1(i,j,k))
+                tsumy = tsumy+fac*dx*dy*dz/dt
+                
+                fac   = (onepfive*uz1(i,j,k)-two*uz01(i,j,k)+half*uz11(i,j,k))*(one-ep1(i,j,k))
+                tsumz = tsumz+fac*dx*dy*dz/dt              
+             enddo
+          enddo
+          tunstxl(xstart(3)-1+k)=tsumx
+          tunstyl(xstart(3)-1+k)=tsumy
+          tunstzl(xstart(3)-1+k)=tsumz
+       enddo
+       call MPI_ALLREDUCE(tunstxl,tunstx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tunstyl,tunsty,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tunstzl,tunstz,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+  
+  !!$!*********************************************************************************
+  !!$!     Secondly, the surface momentum fluxes
+  !!$!*********************************************************************************
+  !!$
+  !!$!       (icvlf)      (icvrt)
+  !!$!(jcvup) B____________C  
+  !!$!        \            \
+  !!$!        \     __     \
+  !!$!        \    \__\    \
+  !!$!        \            \
+  !!$!        \       CV   \
+  !!$!(jcvlw) A____________D      
+  
+  drag1(:)=0.
+  drag2(:)=0.
+  drag3(:)=0.
+  drag4(:)=0.
+  
+  drag11(:)=0.
+  drag22(:)=0.
+  drag33(:)=0.
+  drag44(:)=0.
+  
+       tconvxl=zero
+       tconvyl=zero
+       tconvzl=zero
+       tdiffxl=zero
+       tdiffyl=zero
+       tdiffzl=zero
+       tpresxl=zero
+       tpresyl=zero
+       tpreszl=zero
+       
+       tconvxl2=zero
+       tconvyl2=zero
+       tconvzl2=zero
+       tdiffxl2=zero
+       tdiffyl2=zero
+       tdiffzl2=zero     
+       !BC and AD : x-pencils
+       !AD
+       if ((jcvlw(iv).ge.xstart(2)).and.(jcvlw(iv).le.xend(2))) then
+          j=jcvlw(iv)-xstart(2)+1
+  !        do k=1,xsize(3)
+          do k=zcvlf_lx(iv),zcvrt_lx(iv)
+             kk=xstart(3)-1+k
+             fcvx=zero
+             fcvy=zero
+             fcvz=zero
+             fpry=zero
+             fdix=zero
+             fdiy=zero
+             fdiz=zero
+             do i=icvlf_lx(iv),icvrt_lx(iv)-1
+                !momentum flux
+                uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k))
+                uymid = half*(uy1(i,j,k)+uy1(i+1,j,k))
+                uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k))
+  
+                fcvx= fcvx -uxmid*uymid*dx*dz
+                fcvy= fcvy -uymid*uymid*dx*dz
+                fcvz= fcvz -uymid*uzmid*dx*dz
+  
+                !pressure
+                prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+                fpry = fpry +prmid*dx*dz
+  
+                !viscous term
+                dudymid = half*(tc1(i,j,k)+tc1(i+1,j,k))
+                dvdxmid = half*(tb1(i,j,k)+tb1(i+1,j,k))
+                dvdymid = half*(td1(i,j,k)+td1(i+1,j,k))
+                dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+                dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+                
+                fdix = fdix -(xnu*(dudymid+dvdxmid)*dx*dz)
+                fdiy = fdiy -two*xnu*dvdymid*dx*dz
+                fdiz = fdiz -(xnu*(dwdymid+dvdzmid)*dx*dz)
+  
+             enddo
+  !           drag1(kk)=drag1(kk)+fcvx
+             tconvxl(kk)=tconvxl(kk)+fcvx
+             tconvyl(kk)=tconvyl(kk)+fcvy
+             tconvzl(kk)=tconvzl(kk)+fcvz
+             tpresyl(kk)=tpresyl(kk)+fpry
+             tdiffxl(kk)=tdiffxl(kk)+fdix
+             tdiffyl(kk)=tdiffyl(kk)+fdiy
+             tdiffzl(kk)=tdiffzl(kk)+fdiz
+          enddo
+       endif
+       !BC
+       if ((jcvup(iv).ge.xstart(2)).and.(jcvup(iv).le.xend(2))) then
+          j=jcvup(iv)-xstart(2)+1
+  !        do k=1,xsize(3)
+          do k=zcvlf_lx(iv),zcvrt_lx(iv)
+             kk=xstart(3)-1+k
+             fcvx=zero
+             fcvy=zero
+             fcvz=zero
+             fpry=zero
+             fdix=zero
+             fdiy=zero
+             fdiz=zero
+             do i=icvlf_lx(iv),icvrt_lx(iv)-1
+                !momentum flux
+                uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k))
+                uymid = half*(uy1(i,j,k)+uy1(i+1,j,k))
+                uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k))
+                fcvx= fcvx +uxmid*uymid*dx*dz
+                fcvy= fcvy +uymid*uymid*dx*dz
+                fcvz= fcvz +uymid*uzmid*dx*dz
+  
+                !pressure
+                prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+                fpry = fpry -prmid*dx*dz
+  
+                !viscous term
+                dudymid = half*(tc1(i,j,k)+tc1(i+1,j,k))
+                dvdxmid = half*(tb1(i,j,k)+tb1(i+1,j,k))
+                dvdymid = half*(td1(i,j,k)+td1(i+1,j,k))
+                dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+                dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+  
+                fdix = fdix +(xnu*(dudymid+dvdxmid)*dx*dz)
+                fdiy = fdiy +two*xnu*dvdymid*dx*dz
+                fdiz = fdiz +(xnu*(dwdymid+dvdzmid)*dx*dz)
+  
+             enddo
+  !           drag2(kk)=drag2(kk)+fcvx
+             tconvxl(kk)=tconvxl(kk)+fcvx
+             tconvyl(kk)=tconvyl(kk)+fcvy
+             tconvzl(kk)=tconvzl(kk)+fcvz
+             tpresyl(kk)=tpresyl(kk)+fpry
+             tdiffxl(kk)=tdiffxl(kk)+fdix
+             tdiffyl(kk)=tdiffyl(kk)+fdiy
+             tdiffzl(kk)=tdiffzl(kk)+fdiz
+          enddo
+       endif
+       !AB and DC : y-pencils
+       !AB
+       
+  !     print*,icvlf(iv) 
+       if ((icvlf(iv).ge.ystart(1)).and.(icvlf(iv).le.yend(1))) then
+          i=icvlf(iv)-ystart(1)+1
+  !        do k=1,ysize(3)
+          do k=zcvlf_ly(iv),zcvrt_ly(iv)
+             kk=ystart(3)-1+k
+             fcvx=zero
+             fcvy=zero
+             fcvz=zero
+             fprx=zero
+             fdix=zero
+             fdiy=zero
+             fdiz=zero
+             do j=1,ysize(2)-1
+  !           do j=jcvlw_ly(iv),jcvup_ly(iv)-1
+                !momentum flux
+                uxmid = half*(ux2(i,j,k)+ux2(i,j+1,k))
+                uymid = half*(uy2(i,j,k)+uy2(i,j+1,k))
+                uzmid = half*(uz2(i,j,k)+uz2(i,j+1,k))
+                fcvx= fcvx -uxmid*uxmid*dy*dz
+                fcvy= fcvy -uxmid*uymid*dy*dz
+                fcvz= fcvz -uxmid*uzmid*dy*dz
+  
+                !pressure
+                prmid=half*(ppi2(i,j,k)+ppi2(i,j+1,k))
+                fprx = fprx +prmid*dy*dz
+  
+                !viscous term
+                dudxmid = half*(ta2(i,j,k)+ta2(i,j+1,k))
+                dudymid = half*(tc2(i,j,k)+tc2(i,j+1,k))
+                dvdxmid = half*(tb2(i,j,k)+tb2(i,j+1,k))
+                dwdxmid = half*(te2(i,j,k)+te2(i,j+1,k))
+                dudzmid = half*(tg2(i,j,k)+tg2(i,j+1,k))
+                
+                fdix = fdix -two*xnu*dudxmid*dy*dz
+                fdiy = fdiy -xnu*(dvdxmid+dudymid)*dy*dz
+                fdiz = fdiz -xnu*(dwdxmid+dudzmid)*dy*dz
+             enddo
+             tconvxl(kk)=tconvxl(kk)+fcvx
+             tconvyl(kk)=tconvyl(kk)+fcvy
+             tconvzl(kk)=tconvzl(kk)+fcvz
+             tpresxl(kk)=tpresxl(kk)+fprx
+             tdiffxl(kk)=tdiffxl(kk)+fdix
+             tdiffyl(kk)=tdiffyl(kk)+fdiy
+             tdiffzl(kk)=tdiffzl(kk)+fdiz
+          enddo
+       endif
+       !DC
+       if ((icvrt(iv).ge.ystart(1)).and.(icvrt(iv).le.yend(1))) then
+          i=icvrt(iv)-ystart(1)+1
+  !        do k=1,ysize(3)
+          do k=zcvlf_ly(iv),zcvrt_ly(iv)
+             kk=ystart(3)-1+k
+             fcvx=zero
+             fcvy=zero
+             fcvz=zero
+             fprx=zero
+             fdix=zero
+             fdiy=zero
+             fdiz=zero
+             do j=1,ysize(2)-1
+  !           do j=jcvlw_ly(iv),jcvup_ly(iv)-1
+                !momentum flux
+                uxmid = half*(ux2(i,j,k)+ux2(i,j+1,k))
+                uymid = half*(uy2(i,j,k)+uy2(i,j+1,k))
+                uzmid = half*(uz2(i,j,k)+uz2(i,j+1,k))
+                fcvx= fcvx +uxmid*uxmid*dy*dz
+                fcvy= fcvy +uxmid*uymid*dy*dz
+                fcvz= fcvz +uxmid*uzmid*dy*dz
+  
+                !pressure
+                prmid=half*(ppi2(i,j,k)+ppi2(i,j+1,k))
+                fprx = fprx -prmid*dy*dz
+  
+                !viscous term
+                dudxmid = half*(ta2(i,j,k)+ta2(i,j+1,k))
+                dudymid = half*(tc2(i,j,k)+tc2(i,j+1,k))
+                dvdxmid = half*(tb2(i,j,k)+tb2(i,j+1,k))
+                dwdxmid = half*(te2(i,j,k)+te2(i,j+1,k))
+                dudzmid = half*(tg2(i,j,k)+tg2(i,j+1,k))
+  
+                fdix = fdix +two*xnu*dudxmid*dy*dz
+                fdiy = fdiy +xnu*(dvdxmid+dudymid)*dy*dz
+                fdiz = fdiz +xnu*(dwdxmid+dudzmid)*dy*dz
+             enddo
+             tconvxl(kk)=tconvxl(kk)+fcvx
+             tconvyl(kk)=tconvyl(kk)+fcvy
+             tconvzl(kk)=tconvzl(kk)+fcvz
+             tpresxl(kk)=tpresxl(kk)+fprx
+             tdiffxl(kk)=tdiffxl(kk)+fdix
+             tdiffyl(kk)=tdiffyl(kk)+fdiy
+             tdiffzl(kk)=tdiffzl(kk)+fdiz
+          enddo
+       endif
+       
+          
+       !Left & Right : 
+       !Left
+       if ((zcvlf(iv).ge.xstart(3)).and.(zcvlf(iv).le.xend(3))) then
+          k=zcvlf(iv)-xstart(3)+1
+          
+  
+          fcvx=zero
+          fcvy=zero
+          fcvz=zero
+          fprz=zero
+          fdix=zero
+          fdiy=zero
+          fdiz=zero
+          do j=jcvlw_lx(iv),jcvup_lx(iv)
+           kk = xstart(2)-1+j
+             do i=icvlf_lx(iv),icvrt_lx(iv)-1
+                !momentum flux
+                uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k))
+                uymid = half*(uy1(i,j,k)+uy1(i+1,j,k))
+                uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k))
+  
+                fcvx= fcvx +uxmid*uzmid*dx*dy
+                fcvy= fcvy +uymid*uzmid*dx*dy
+                fcvz= fcvz +uzmid*uzmid*dx*dy
+  
+                !pressure
+                prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+                fprz = fprz -prmid*dx*dy
+  
+                !viscous term
+                dudzmid = half*(tg1(i,j,k)+tg1(i+1,j,k))
+                dwdxmid = half*(te1(i,j,k)+te1(i+1,j,k))
+                dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+                dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+                dwdzmid = half*(ti1(i,j,k)+ti1(i+1,j,k))
+                                                                    
+                fdix = fdix +(xnu*(dudzmid+dwdxmid)*dx*dy)
+                fdiy = fdiy +(xnu*(dvdzmid+dwdymid)*dx*dy)
+                fdiz = fdiz +two*xnu*dwdzmid*dx*dy
+             enddo
+          enddo
+  !print*, kk
+  !        drag3(kk)=drag3(kk)+fcvx   ! Should be size ny
+  !        print*, drag3(kk)
+          tconvxl2(kk)=tconvxl2(kk)+fcvx
+          tconvyl2(kk)=tconvyl2(kk)+fcvy
+          tconvzl2(kk)=tconvzl2(kk)+fcvz
+          tpreszl(kk) =tpreszl(kk) +fprz
+          tdiffxl2(kk)=tdiffxl2(kk)+fdix
+          tdiffyl2(kk)=tdiffyl2(kk)+fdiy
+          tdiffzl2(kk)=tdiffzl2(kk)+fdiz        
+       endif 
+       !Right
+       if ((zcvrt(iv).ge.xstart(3)).and.(zcvrt(iv).le.xend(3))) then
+          k=zcvrt(iv)-xstart(3)+1
+  !        kk=nrank+1
+  
+          fcvx=zero
+          fcvy=zero
+          fcvz=zero
+          fprz=zero
+          fdix=zero
+          fdiy=zero
+          fdiz=zero
+  !        do k=1,xsize(3)
+          do j=jcvlw_lx(iv),jcvup_lx(iv)
+           kk = xstart(2)-1+j 
+            do i=icvlf_lx(iv),icvrt_lx(iv)-1
+                !momentum flux
+                uxmid = half*(ux1(i,j,k)+ux1(i+1,j,k))
+                uymid = half*(uy1(i,j,k)+uy1(i+1,j,k))
+                uzmid = half*(uz1(i,j,k)+uz1(i+1,j,k))
+  
+                fcvx= fcvx -uxmid*uzmid*dx*dy
+                fcvy= fcvy -uymid*uzmid*dx*dy
+                fcvz= fcvz -uzmid*uzmid*dx*dy
+  
+                !pressure
+                prmid = half*(ppi1(i,j,k)+ppi1(i+1,j,k))
+                fprz = fprz +prmid*dx*dy
+  
+                !viscous term
+                dudzmid = half*(tg1(i,j,k)+tg1(i+1,j,k))
+                dwdxmid = half*(te1(i,j,k)+te1(i+1,j,k))
+                dvdzmid = half*(th1(i,j,k)+th1(i+1,j,k))
+                dwdymid = half*(tf1(i,j,k)+tf1(i+1,j,k))
+                dwdzmid = half*(ti1(i,j,k)+ti1(i+1,j,k))
+                                                    
+                fdix = fdix -(xnu*(dudzmid+dwdxmid)*dx*dy)
+                fdiy = fdiy -(xnu*(dvdzmid+dwdymid)*dx*dy)
+                fdiz = fdiz -two*xnu*dwdzmid*dx*dy
+  
+             enddo
+          enddo
+  !        drag4(kk)=drag4(kk)+fcvx    ! Should be size ny
+          tconvxl2(kk)=tconvxl2(kk)+fcvx
+          tconvyl2(kk)=tconvyl2(kk)+fcvy
+          tconvzl2(kk)=tconvzl2(kk)+fcvz
+          tpreszl(kk) =tpreszl(kk) +fprz
+          tdiffxl2(kk)=tdiffxl2(kk)+fdix
+          tdiffyl2(kk)=tdiffyl2(kk)+fdiy
+          tdiffzl2(kk)=tdiffzl2(kk)+fdiz
+       endif     
+      
+       
+       call MPI_ALLREDUCE(tconvxl,tconvx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tconvyl,tconvy,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tconvzl,tconvz,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tpresxl,tpresx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tpresyl,tpresy,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffxl,tdiffx,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffyl,tdiffy,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffzl,tdiffz,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+  
+       call MPI_ALLREDUCE(tconvxl2,tconvx2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tconvyl2,tconvy2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tconvzl2,tconvz2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)     
+       call MPI_ALLREDUCE(tpreszl, tpresz ,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffxl2,tdiffx2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffyl2,tdiffy2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+       call MPI_ALLREDUCE(tdiffzl2,tdiffz2,ny,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+  
+       
+  !     call MPI_ALLREDUCE(drag1,drag11,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+  !     call MPI_ALLREDUCE(drag2,drag22,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+  !     call MPI_ALLREDUCE(drag3,drag33,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+  !     call MPI_ALLREDUCE(drag4,drag44,nz,real_type,MPI_SUM,MPI_COMM_WORLD,code)
+                      
+  
+       tp1 = sum(tpresx(:))/dt
+       tp2 = sum(tpresy(:))/dt
+       tp3 = sum(tpresz(:))/dt
+    
+       mom1 = sum(tunstx(:) + tconvx(:) + tconvx2(:))
+       mom2 = sum(tunsty(:) + tconvy(:) + tconvy2(:))
+       mom3 = sum(tunstz(:) + tconvz(:) + tconvz2(:))
+  
+       dra1 = 2.0*(sum(tdiffx) + sum(tdiffx2) + tp1 - mom1)
+       dra2 = 2.0*(sum(tdiffy) + sum(tdiffy2) + tp2 - mom2)
+       dra3 = 2.0*(sum(tdiffz) + sum(tdiffz2) + tp3 - mom3)
+  !print*, dra1, tp1, mom1
+  !print*, dra1, mom1, tp1, sum(tpresx(:))/dt, sum(tunstx(:)), sum(tconvx(:)), sum(tdiffx)
+  
+  !     do k=zcvlf(iv),zcvrt(iv)
+  !        tpresx(k)=tpresx(k)/dt
+  !        tpresy(k)=tpresy(k)/dt
+  !        tpresz(k)=tpresz(k)/dt
+  
+  !        xmom    = tunstx(k)+tconvx(k)
+  !        ymom    = tunsty(k)+tconvy(k)
+  !        zmom    = tunstz(k)+tconvz(k)
+  !        xDrag(k) = two*(tdiffx(k)+tpresx(k)-xmom)
+  !        yLift(k) = two*(tdiffy(k)+tpresy(k)-ymom)
+  !        zLat(k) = two*(tdiffz(k)+tpresz(k)-zmom)
+  !     enddo
+  !     xDrag_mean = sum(xDrag(zcvlf(iv):zcvrt(iv)))/real(zcvrt(iv)-zcvlf(iv))
+  !     yLift_mean = sum(yLift(zcvlf(iv):zcvrt(iv)))/real(zcvrt(iv)-zcvlf(iv))
+  !     zLat_mean = sum(zLat(zcvlf(iv):zcvrt(iv)))/real(zcvrt(iv)-zcvlf(iv))
+  
+            
+       if ((itime==ifirst).or.(itime==0)) then
+          if (nrank .eq. 0) then
+          write(filename,"('aerof',I1.1)") iv
+          open(38+(iv-1),file=filename,status='unknown',form='formatted')
+          endif
+       endif
+       if (nrank .eq. 0) then
+          write(38+(iv-1),*) t, dra1, dra2, dra3!, sum(drag11), sum(drag22), sum(drag11)+sum(drag22), sum(drag33), sum(drag44), sum(drag33)+sum(drag44)  
+  !        write(38+(iv-1),*) t, dra1, dra2, dra3
+       endif
+       if (itime==ilast) then
+          if (nrank .eq. 0) then
+             close(38+(iv-1))
+             write(filename,"('aerof',I1.1)") iv
+             write(filename2,"('aerof',I1.1,'-',I7.7)") iv, itime
+             call system("mv " //filename //filename2)
+          endif
+       endif
+       
+       
+    enddo
+  
+    do k = 1, xsize(3)
+       do j = 1, xsize(2)
+          do i = 1, xsize(1)
+             ux11(i,j,k)=ux01(i,j,k)
+             uy11(i,j,k)=uy01(i,j,k)
+             uz11(i,j,k)=uz01(i,j,k)
+             ux01(i,j,k)=ux1(i,j,k)
+             uy01(i,j,k)=uy1(i,j,k)
+          enddo
+       enddo
+    enddo
+  
+  !  if (imove.eq.1) then
+  !     ux1(:,:,:) = ux1(:,:,:) - 0.5
+  !  endif
+  
+    return
+  
+  end subroutine force
+  
+  
+  
+  
+  
+  
\ No newline at end of file
diff --git a/src/genepsi3d.f90 b/src/genepsi3d.f90
index f53b8a6a9bd5d57c7e92edb37a7e0b023f88bcb6..72f48df5bcd29df566d7ec8ebee8c5b7816fe1db 100644
--- a/src/genepsi3d.f90
+++ b/src/genepsi3d.f90
@@ -55,11 +55,11 @@ contains
 
     IMPLICIT NONE
 
-    INTEGER :: nxi,nxf,ny,nyi,nyf,nzi,nzf
+    INTEGER, intent(in) :: nxi,nxf,ny,nyi,nyf,nzi,nzf
     REAL(mytype),DIMENSION(nxi:nxf,nyi:nyf,nzi:nzf) :: epsi
-    REAL(mytype)               :: dx,dz
-    REAL(mytype),DIMENSION(ny) :: yp
-    REAL(mytype)               :: remp
+    REAL(mytype), intent(in)               :: dx,dz
+    REAL(mytype),DIMENSION(ny), intent(in) :: yp
+    REAL(mytype), intent(in)               :: remp
 
     IF (itype.EQ.itype_cyl) THEN
 
@@ -79,12 +79,110 @@ contains
 
     ELSEIF (itype.EQ.itype_ellip) THEN
 
-       CALL geomcomplex_ellip(epsi, nxi, nxf, ny, nyi, nyf, nzi, nzf, dx, yp, remp)
+       CALL geomcomplex_ellip(epsi, nxi, nxf, ny, nyi, nyf, nzi, nzf, dx, yp, dz, remp)
 
     ENDIF
 
   end subroutine geomcomplex
 !############################################################################
+
+  subroutine param_assign()
+   
+   use ibm_param
+   use ellipsoid_utils, only: NormalizeQuaternion,ellipInertiaCalculate,ellipMassCalculate
+   use param
+   use var, only: nrank
+   real(mytype) :: eqr, ori_dummy(4), ellip_m_dummy, inertia_dummy(3,3)
+   integer :: i,ii,j
+
+   do i =1,nbody 
+         ii = (i-1)*3
+         ! write(*,*) sh(ii+1), sh(ii+2), sh(ii+3)
+         eqr=(sh(ii+1)*sh(ii+2)*sh(ii+3))**(1.0/3.0)
+         if (eqr.lt.0.001) then
+            eqr=1.0
+         endif
+         do j = 1,3
+            shape(i,j) = sh(ii+j)/eqr
+         enddo
+         if (nrank==0) then 
+            write(*,*) "Body ", i, ", eqr = ", eqr
+         
+            write(*,*) i, "'s shape = ", shape(i,:)
+         endif
+      enddo
+
+
+      do i = 1,nbody
+         ii = (i-1)*4
+         do j = 1,4 
+            ori_dummy(j) = ori(ii+j)
+         enddo
+         
+         ! write(*,*) "Body, ", i, "orientation = ", ori_dummy
+         call NormalizeQuaternion(ori_dummy)
+
+         if (nrank==0) then 
+            write(*,*) "Body, ", i, "orientation = ", ori_dummy
+         endif
+         orientation(i,:) = ori_dummy
+      enddo
+      ! call NormalizeQuaternion(orientation)
+      do i = 1,nbody
+         ii = (i-1)*3
+         do j = 1,3
+            position(i,j) = ce(ii+j)
+            ! write(*,*) ce(i,j), position(j,i)
+         enddo
+         if (nrank==0) then
+         write(*,*) "Nbody", i, "position = ", position(i, :)
+         endif
+      enddo
+      ! write(*,*) "CE =       ", ce
+      ! write(*,*) "Position = ", position
+      ! position=ce
+      do i = 1,nbody
+         ii = (i-1)*3
+         do j = 1,3
+            linearVelocity(i,j) = lv(ii+j)
+         enddo
+      enddo
+
+      do i = 1,nbody
+         ii = (i-1)*3
+         angularVelocity(i,1)=zero
+         do j = 1,3
+            angularVelocity(i,j+1)=av(ii+j)
+         enddo
+         if (nrank==0) then 
+            write(*,*) "Nbody", i, "angvel = ", angularVelocity(i, :)
+         endif
+      enddo
+      ! write(*,*) "Ra = ", ra
+
+      do i = 1,nbody
+         if (nrank==0) then 
+         write(*,*) "Nbody = ", i, "Radius = ", ra(i)
+         endif
+      enddo
+      do i = 1,nbody
+         call ellipInertiaCalculate(shape(i,:),rho_s(i),inertia_dummy)
+         inertia(i,:,:) = inertia_dummy
+         if (nrank==0) then 
+
+            write(*,*) "Nbody", i, "InertiaM = ", inertia(i,:,:)
+         endif
+      enddo
+      do i = 1,nbody
+         call ellipMassCalculate(shape(i,:), rho_s(i), ellip_m_dummy)
+         ellip_m(i) = ellip_m_dummy
+         if (nrank==0) then 
+
+            write(*,*) "Nbody", i, "ellip_m = ", ellip_m(i)
+         endif
+      enddo
+
+  end subroutine param_assign
 !############################################################################
   subroutine genepsi3d(ep1)
 
@@ -94,6 +192,7 @@ contains
     USE complex_geometry
     use decomp_2d
 
+
     implicit none
 
     !*****************************************************************!
@@ -109,6 +208,8 @@ contains
     !
     logical :: dir_exists
     real(mytype),dimension(xsize(1),xsize(2),xsize(3)) :: ep1
+
+   !  call param_assign()
     !
     if (nrank==0.and.mod(itime,ilist)==0) then
       write(*,*)'==========================================================='
@@ -213,12 +314,25 @@ contains
     else
        dyraf =yly/real(nyraf-1, mytype)
     endif
+   !  write(*,*) ny, size(yp), size(ypraf), nraf
     do j=1,ny-1
        do jraf=1,nraf
           ypraf(jraf+nraf*(j-1))=yp(j)+real(jraf-1, mytype)*(yp(j+1)-yp(j))/real(nraf, mytype)
+         !  if (ypraf(jraf+nraf*(j-1)) /= ypraf(jraf+nraf*(j-1))) then 
+         !    write(*,*) "At j = ", j, ", jraf = ", jraf, "ypraf = ", yp(j)+real(jraf-1, mytype)*(yp(j+1)-yp(j))/real(nraf, mytype)
+         !  endif
        enddo
     enddo
+    if (ncly) then 
+      do jraf = 1,nraf
+         ypraf(jraf+nraf*(ny-1))=yp(ny)+real(jraf-1,mytype)*(yly-yp(ny))/real(nraf,mytype)
+      enddo
+    endif
+   !  write(*,*) yp
     if(.not.ncly)ypraf(nyraf)=yp(ny)
+   !  if(.not.ncly)write(*,*) "Changed ypraf (", nyraf, "). To ", yp(ny)
+   !  write(*,*) ypraf
+
     yepsi=zero
     call geomcomplex(yepsi,ystart(1),yend(1),nyraf,1,nyraf,ystart(3),yend(3),dx,ypraf,dz,one)
     ! if (nrank==0) print*,'    step 3'
diff --git a/src/ibm.f90 b/src/ibm.f90
index 8a156a9172cecac412b5b3fd3b301f5cc28f221e..14c56931d9eb53bb7916501cabc6e7bf696d7b84 100644
--- a/src/ibm.f90
+++ b/src/ibm.f90
@@ -402,7 +402,7 @@ subroutine cubsplx(u,lind)
   USE decomp_2d
   USE variables
   USE ibm_param
-  USE ellipsoid_utils, ONLY: CalculatePointVelocity
+  USE ellipsoid_utils, ONLY: CalculatePointVelocity_Multi, ibm_bcimp_calc
     !
   implicit none
   !
@@ -420,7 +420,7 @@ subroutine cubsplx(u,lind)
   integer                                            :: inxi,inxf
   real(mytype)                                       :: ana_resi,ana_resf         ! Position of Boundary (Analytically)
   real(mytype)                                       :: point(3),pointVelocity(3)
-  real(mytype)                                       :: xm,ym,zm,x_pv,y_pv,z_pv
+  real(mytype)                                       :: xm,ym,zm
   !
   ! Initialise Arrays
   xa(:)=0.
@@ -431,14 +431,16 @@ subroutine cubsplx(u,lind)
 !   write(*,*) lind
   !
   do k=1,xsize(3)
-   zm=real(xstart(3)+k-2,mytype)*dz
+   zm=real(xstart(3)+k-1,mytype)*dz
      do j=1,xsize(2)
-      ym=real(xstart(2)+j-2,mytype)*dy
+      ym=real(xstart(2)+j-1,mytype)*dy
         if(nobjx(j,k).ne.0)then
            ia=0
            do i=1,nobjx(j,k)          
               !  1st Boundary - I DON'T UNDERSTAND THIS XM CONVERSION.
-            xm=real(xstart(1)+i-2,mytype)*dx
+            ! write(*,*) "Nobjx = ", nobjx(j,k)
+            ! xm=real(xstart(1)+i-2,mytype)*dx
+            xm = xi(i,j,k)
               nxpif=npif
               ia=ia+1
               if (ianal.eq.0) then
@@ -449,32 +451,17 @@ subroutine cubsplx(u,lind)
                  xa(ia)=ana_resi
               endif
               point=[xm,ym,zm]
-              call CalculatePointVelocity(point, position, angularVelocity, linearVelocity, pointVelocity)
-              x_pv=pointVelocity(1)
-              y_pv=pointVelocity(2)
-              z_pv=pointVelocity(3)
-              if (lind.eq.0) then
-               bcimp=zero
-              elseif (lind.eq.1) then
-               bcimp=x_pv
-               ! write(*,*) bcimp
-              elseif (lind.eq.2) then
-               bcimp=y_pv
-              elseif (lind.eq.3) then
-               bcimp=z_pv
-              elseif (lind.eq.4) then 
-               bcimp=x_pv*x_pv
-              elseif (lind.eq.5) then
-               bcimp=y_pv*y_pv
-              elseif (lind.eq.6) then 
-               bcimp=z_pv*z_pv
-              elseif (lind.eq.7) then
-               bcimp=x_pv*y_pv 
-              elseif (lind.eq.8) then 
-               bcimp=x_pv*z_pv
-              elseif (lind.eq.9) then
-               bcimp=y_pv*z_pv
-              endif
+              call CalculatePointVelocity_Multi(point, pointVelocity)
+            !   write(*,*) "Called CPV at ", point, "returned", pointVelocity
+            !   call EllipsoidalRadius(point, position, orientation, shape, dummy)
+            !   dummy = maxval(abs((point(2:3)-position(2:3))))
+            !   if ((dummy.gt.(1.01)).or.(dummy.lt.0.99)) then
+            !    write(*,*) "At ", point, "r = ", dummy
+            !   endif
+            !   write(*,*) "Radius = ", dummy
+               ! write(*,*) "radius = ", dummy, "At point", point
+              call ibm_bcimp_calc(pointVelocity, lind, bcimp)
+
               ya(ia)=bcimp
               if(xi(i,j,k).gt.0.)then ! Immersed Object
                  inxi=0
@@ -519,6 +506,19 @@ subroutine cubsplx(u,lind)
                  call analitic_x(j,xf(i,j,k),ana_resf,k) ! Calculate the position of BC analytically
                  xa(ia)=ana_resf
               endif              
+              xm = xf(i,j,k)
+              point=[xm,ym,zm]
+              call CalculatePointVelocity_Multi(point, pointVelocity)
+            !   write(*,*) "Called CPV at ", point, "returned", pointVelocity
+            !   call EllipsoidalRadius(point, position, orientation, shape, dummy)
+            !   dummy = maxval(abs(point-position))
+            !   write(*,*) "Radius = ", dummy
+            !   dummy = maxval(abs(point-position))
+            !   if ((dummy.gt.(1.01)).or.(dummy.lt.0.99)) then
+            !    write(*,*) "At ", point, "r = ", dummy
+            !   endif
+              call ibm_bcimp_calc(pointVelocity, lind, bcimp)  !take correct part of pointVelocity for equation type.
+
               ya(ia)=bcimp              
               if(xf(i,j,k).lt.xlx)then ! Immersed Object
                  inxf=0
@@ -594,7 +594,7 @@ subroutine cubsply(u,lind)
   USE decomp_2d
   USE variables
   USE ibm_param
-  USE ellipsoid_utils, ONLY: CalculatePointVelocity
+  USE ellipsoid_utils, ONLY: CalculatePointVelocity_Multi, ibm_bcimp_calc
   !
   implicit none
   !
@@ -612,7 +612,7 @@ subroutine cubsply(u,lind)
   integer                                            :: inxi,inxf  
   real(mytype)                                       :: ana_resi,ana_resf
   real(mytype)                                       :: point(3),pointVelocity(3)
-  real(mytype)                                       :: xm,ym,zm,x_pv,y_pv,z_pv
+  real(mytype)                                       :: xm,ym,zm
   !
   ! Initialise Arrays
   xa(:)=0.
@@ -624,13 +624,13 @@ subroutine cubsply(u,lind)
 
   !
   do k=1,ysize(3)
-   zm=real(ystart(3)+k-2,mytype)*dz
+   zm=real(ystart(3)+k-1,mytype)*dz
      do i=1,ysize(1)
-      xm=real(ystart(1)+i-2,mytype)*dx
+      xm=real(ystart(1)+i-1,mytype)*dx
         if(nobjy(i,k).ne.0)then
            ia=0
            do j=1,nobjy(i,k)   
-            ym=real(ystart(2)+j-2,mytype)*dy       
+            ! ym=real(ystart(2)+j-2,mytype)*dy       
               !  1st Boundary
               nypif=npif
               ia=ia+1
@@ -641,33 +641,12 @@ subroutine cubsply(u,lind)
                  call analitic_y(i,yi(j,i,k),ana_resi,k) ! Calculate the position of BC analytically
                  xa(ia)=ana_resi
               endif  
+              ym = yi(j,i,k)
               point=[xm,ym,zm]
-              call CalculatePointVelocity(point, position, angularVelocity, linearVelocity, pointVelocity)
-            !   write(*,*) 'PV = ', pointVelocity
-              x_pv=pointVelocity(1)
-              y_pv=pointVelocity(2)
-              z_pv=pointVelocity(3)
-              if (lind.eq.0) then
-               bcimp=zero
-              elseif (lind.eq.1) then
-               bcimp=x_pv
-              elseif (lind.eq.2) then
-               bcimp=y_pv
-              elseif (lind.eq.3) then
-               bcimp=z_pv
-              elseif (lind.eq.4) then 
-               bcimp=x_pv*x_pv
-              elseif (lind.eq.5) then
-               bcimp=y_pv*y_pv
-              elseif (lind.eq.6) then 
-               bcimp=z_pv*z_pv
-              elseif (lind.eq.7) then
-               bcimp=x_pv*y_pv 
-              elseif (lind.eq.8) then 
-               bcimp=x_pv*z_pv
-              elseif (lind.eq.9) then
-               bcimp=y_pv*z_pv
-              endif
+              call CalculatePointVelocity_Multi(point, pointVelocity)
+              
+              call ibm_bcimp_calc(pointVelocity, lind, bcimp)  !take correct part of pointVelocity for equation type.
+
               ya(ia)=bcimp
               if(yi(j,i,k).gt.0.)then ! Immersed Object
                  jy=1
@@ -677,8 +656,6 @@ subroutine cubsply(u,lind)
                  jy=jy-1
                  jpoli=jy+1
                  if(nyipif(j,i,k).lt.npif)nypif=nyipif(j,i,k)
-                 write(*,*) 'Range in y of 1 - ',nypif
-                 write(*,*) 'jy = ', jy
                  do jpif=1,nypif
                     ia=ia+1
                     if(izap.eq.1)then ! Skip First Points
@@ -719,6 +696,12 @@ subroutine cubsply(u,lind)
                  call analitic_y(i,yf(j,i,k),ana_resf,k) ! Calculate the position of BC analytically
                  xa(ia)=ana_resf
               endif  
+              ym = yf(j,i,k)
+              point=[xm,ym,zm]
+              call CalculatePointVelocity_Multi(point, pointVelocity)
+              
+              call ibm_bcimp_calc(pointVelocity, lind, bcimp)  !take correct part of pointVelocity for equation type.
+
               ya(ia)=bcimp
               if(yf(j,i,k).lt.yly)then ! Immersed Object
                  jy=1
@@ -794,7 +777,7 @@ subroutine cubsplz(u,lind)
   USE decomp_2d
   USE variables
   USE ibm_param
-  USE ellipsoid_utils, ONLY: CalculatePointVelocity
+  USE ellipsoid_utils, ONLY: CalculatePointVelocity_Multi, ibm_bcimp_calc
   !
   implicit none
   !
@@ -812,7 +795,7 @@ subroutine cubsplz(u,lind)
   integer                                            :: inxi,inxf  
   real(mytype)                                       :: ana_resi,ana_resf
   real(mytype)                                       :: point(3),pointVelocity(3)
-  real(mytype)                                       :: xm,ym,zm,x_pv,y_pv,z_pv
+  real(mytype)                                       :: xm,ym,zm
 
   !
   ! Initialise Arrays
@@ -825,14 +808,14 @@ subroutine cubsplz(u,lind)
 
   !
   do j=1,zsize(2)
-   ym=real(zstart(2)+j-2,mytype)*dy
+   ym=real(zstart(2)+j-1,mytype)*dy
      do i=1,zsize(1)
-      xm=real(zstart(1)+i-2,mytype)*dx
+      xm=real(zstart(1)+i-1,mytype)*dx
         if(nobjz(i,j).ne.0)then
            ia=0
            do k=1,nobjz(i,j)          
               !  1st Boundary
-            zm=real(zstart(3)+k-2,mytype)*dz
+            ! zm=real(zstart(3)+k-2,mytype)*dz
               nzpif=npif
               ia=ia+1
               if (ianal.eq.0) then
@@ -842,44 +825,21 @@ subroutine cubsplz(u,lind)
 !                 call analitic_z(i,zi(k,i,j),ana_resi,j) ! Calculate the position of BC analytically
                  xa(ia)=ana_resi
               endif  
+              zm = zi(k,i,j)
               point=[xm,ym,zm]
-              call CalculatePointVelocity(point, position, angularVelocity, linearVelocity, pointVelocity)
-              x_pv=pointVelocity(1)
-              y_pv=pointVelocity(2)
-              z_pv=pointVelocity(3)
-              if (lind.eq.0) then
-               bcimp=zero
-              elseif (lind.eq.1) then
-               bcimp=x_pv
-              elseif (lind.eq.2) then
-               bcimp=y_pv
-              elseif (lind.eq.3) then
-               bcimp=z_pv
-              elseif (lind.eq.4) then 
-               bcimp=x_pv*x_pv
-              elseif (lind.eq.5) then
-               bcimp=y_pv*y_pv
-              elseif (lind.eq.6) then 
-               bcimp=z_pv*z_pv
-              elseif (lind.eq.7) then
-               bcimp=x_pv*y_pv 
-              elseif (lind.eq.8) then 
-               bcimp=x_pv*z_pv
-              elseif (lind.eq.9) then
-               bcimp=y_pv*z_pv
-              endif
+              call CalculatePointVelocity_Multi(point, pointVelocity)
+              
+              call ibm_bcimp_calc(pointVelocity, lind, bcimp)  !take correct part of pointVelocity for equation type.
+
               ya(ia)=bcimp
               if(zi(k,i,j).gt.0.)then ! Immersed Object
                  inxi=0
                  kz=zi(k,i,j)/dz+1
                  kpoli=kz+1
                  if(nzipif(k,i,j).lt.npif)nzpif=nzipif(k,i,j)
-                 write(*,*) 'Range in z of 1 - ',nzpif
-                 write(*,*) 'kz = ', kz
                  do kpif=1,nzpif
                     ia=ia+1
                     if(izap.eq.1)then ! Skip First Points
-                        write(*,*) 'attempting to access index ', kz, ' - ', kpif
                        xa(ia)=(kz-1)*dz-kpif*dz
                        ya(ia)=u(i,j,kz-kpif)
                     else              ! Don't Skip any Points
@@ -913,6 +873,12 @@ subroutine cubsplz(u,lind)
                  !call analitic_z(i,zf(k,i,j),ana_resf,j) ! Calculate the position of BC analytically
                  xa(ia)=ana_resf
               endif
+              zm = zi(k,i,j)
+              point=[xm,ym,zm]
+              call CalculatePointVelocity_Multi(point, pointVelocity)
+              
+              call ibm_bcimp_calc(pointVelocity, lind, bcimp)  !take correct part of pointVelocity for equation type.
+
               ya(ia)=bcimp
               if(zf(k,i,j).lt.zlz)then  ! Immersed Object
                  inxf=0
@@ -1037,6 +1003,12 @@ subroutine cubic_spline(xa,ya,n,x,y)
    alpha(nc)= three*yprf - three*(aa(nc)-aa(nc-1))/hh(nc-1)
    !
    do i=2,nc-1
+      if (hh(i).lt.0.0004) then 
+       write(*,*) 'i = ', i, 'dividing by ', hh(i)
+      end if 
+      if (hh(i-1).lt.0.0004) then 
+         write(*,*) 'i = ', i, 'dividing by hh(i-1)=  ', hh(i)
+        end if 
       alpha(i)=(three/hh(i))*(aa(i+1)-aa(i))-(three/hh(i-1))*(aa(i)-aa(i-1))
    enddo
    ll(1)=two*hh(1)
@@ -1094,9 +1066,9 @@ subroutine ana_y_cyl(i,y_pos,ana_res)
      ceyy = cey
   endif
   if (y_pos.gt.ceyy) then     ! Impose analytical BC
-      ana_res=ceyy + sqrt(ra**2.0-((i+ystart(1)-1-1)*dx-cexx)**2.0)
+      ana_res=ceyy + sqrt(ra(1)**2.0-((i+ystart(1)-1-1)*dx-cexx)**2.0)
   else
-      ana_res=ceyy - sqrt(ra**2.0-((i+ystart(1)-1-1)*dx-cexx)**2.0)
+      ana_res=ceyy - sqrt(ra(1)**2.0-((i+ystart(1)-1-1)*dx-cexx)**2.0)
   endif     
   !
   return
@@ -1125,9 +1097,9 @@ subroutine ana_x_cyl(j,x_pos,ana_res)
      ceyy = cey
   endif
   if (x_pos.gt.cexx) then     ! Impose analytical BC
-      ana_res = cexx + sqrt(ra**2.0-(yp(j+xstart(2)-1)-ceyy)**2.0)
+      ana_res = cexx + sqrt(ra(1)**2.0-(yp(j+xstart(2)-1)-ceyy)**2.0)
   else
-      ana_res = cexx - sqrt(ra**2.0-(yp(j+xstart(2)-1)-ceyy)**2.0)
+      ana_res = cexx - sqrt(ra(1)**2.0-(yp(j+xstart(2)-1)-ceyy)**2.0)
   endif     
   !
   return
diff --git a/src/module_param.f90 b/src/module_param.f90
index e375dddb011a24d9d2ea816c20419fb0ee1b6fe0..33778c7054ce1e11ad08450f89f6eb303d0adbe5 100644
--- a/src/module_param.f90
+++ b/src/module_param.f90
@@ -617,11 +617,12 @@ end module simulation_stats
 !############################################################################
 module ibm_param
   use decomp_2d, only : mytype
-  real(mytype) :: cex,cey,cez,shx,shy,shz,oriw,orii,orij,orik,lvx,lvy,lvz,avx,avy,avz,ra,ubcx,ubcy,ubcz,rads,rho_s,c_air
-  real(mytype) :: position(3),orientation(4),linearVelocity(3),angularVelocity(4),linearAcceleration(3),torque(3),shape(3),inertia(3,3)
-  real(mytype) :: position_1(3),linearVelocity_1(3),orientation_1(4),angularVelocity_1(4)
-  real(mytype) :: chord,thickness,omega
+  real(mytype) :: cex,cey,cez,shx,shy,shz,oriw,orii,orij,orik,lvx,lvy,lvz,avx,avy,avz,ubcx,ubcy,ubcz,rads,c_air,cvl_scalar,grav_y,grav_x,grav_z
+  real(mytype) :: position(10,3),orientation(10,4),linearVelocity(10,3),angularVelocity(10,4),linearAcceleration(3),linearForce(10,3),torque(10,3),shape(10,3)
+  real(mytype) :: position_1(3),linearVelocity_1(3),orientation_1(4),angularVelocity_1(4),ra(10),rho_s(10),ellip_m(10),inertia(10,3,3)
+  real(mytype) :: chord,thickness,omega, tconv2_sign, shear_velocity
+  real(mytype) :: ce(30),sh(30),ori(40), lv(30), av(30)
   integer :: inana ! Analytical BC as Input
-  integer :: imove
+  integer :: imove, nozdrift, force_csv, bodies_fixed, cube_flag, torques_flag,orientations_free, shear_flow_ybc, shear_flow_zbc,torq_debug, torq_flip, ztorq_only, nbody
 end module ibm_param
 !############################################################################
diff --git a/src/navier.f90 b/src/navier.f90
index ee7b048893419161212bf69403ecb667d8ac5eaa..59b8f3bfd9128bf5031e59a47e9c67352cc39bbd 100644
--- a/src/navier.f90
+++ b/src/navier.f90
@@ -289,7 +289,7 @@ contains
        tb1(:,:,:) = uy1(:,:,:)
        tc1(:,:,:) = uz1(:,:,:)
     else if (itype.eq.itype_ellip) then
-       call navierFieldGen(position, linearVelocity, angularVelocity,ep1, ep1_ux, ep1_uy, ep1_uz)
+       call navierFieldGen(ep1, ep1_ux, ep1_uy, ep1_uz)
        ta1(:,:,:) = (one - ep1(:,:,:)) * ux1(:,:,:) + ep1(:,:,:)*ep1_ux(:,:,:)
        tb1(:,:,:) = (one - ep1(:,:,:)) * uy1(:,:,:) + ep1(:,:,:)*ep1_uy(:,:,:)
        tc1(:,:,:) = (one - ep1(:,:,:)) * uz1(:,:,:) + ep1(:,:,:)*ep1_uz(:,:,:)
diff --git a/src/parameters.f90 b/src/parameters.f90
index 3dbc63d09701b4e644c4cebd4ac7dfb8d80bec95..8c9bcc49bd98e1ec13379611a020c316d936f2c2 100644
--- a/src/parameters.f90
+++ b/src/parameters.f90
@@ -29,7 +29,7 @@ subroutine parameter(input_i3d)
 
   use probes, only : nprobes, setup_probes, flag_all_digits, flag_extra_probes, xyzprobes
   use visu, only : output2D
-  use forces, only : iforces, nvol, xld, xrd, yld, yud!, zld, zrd
+  use forces, only : iforces, nvol, xld, xrd, yld, yud, zld, zrd
 
   implicit none
 
@@ -60,8 +60,11 @@ subroutine parameter(input_i3d)
   NAMELIST /LESModel/ jles, smagcst, smagwalldamp, nSmag, walecst, maxdsmagcst, iwall
   NAMELIST /WallModel/ smagwalldamp
   NAMELIST /Tripping/ itrip,A_tr,xs_tr_tbl,ys_tr_tbl,ts_tr_tbl,x0_tr_tbl
-  NAMELIST /ibmstuff/ cex,cey,cez,shx,shy,shz,oriw,orii,orij,orik,lvx,lvy,lvz,avx,avy,avz,ra,nobjmax,nraf,nvol,iforces, npif, izap, ianal, imove, thickness, chord, omega ,ubcx,ubcy,ubcz,rads,rho_s, c_air
-  NAMELIST /ForceCVs/ xld, xrd, yld, yud!, zld, zrd
+  NAMELIST /ibmstuff/ ce,sh,ori,lv,av,ra, &
+      nobjmax,nraf,nvol,iforces, cvl_scalar, npif, izap, ianal, imove, thickness, chord, omega , &
+      ubcx,ubcy,ubcz,rads,rho_s, c_air, grav_x,grav_y,grav_z, nozdrift, force_csv, bodies_fixed, cube_flag, tconv2_sign, &
+      torques_flag, orientations_free, shear_flow_ybc, shear_flow_zbc, shear_velocity, torq_debug, torq_flip, ztorq_only, nbody
+  NAMELIST /ForceCVs/ xld, xrd, yld, yud, zld, zrd
   NAMELIST /LMN/ dens1, dens2, prandtl, ilmn_bound, ivarcoeff, ilmn_solve_temp, &
        massfrac, mol_weight, imultispecies, primary_species, &
        Fr, ibirman_eos
@@ -116,7 +119,7 @@ subroutine parameter(input_i3d)
      read(10, nml=ProbesParam); rewind(10)
   endif
   if (iforces.eq.1) then
-     allocate(xld(nvol), xrd(nvol), yld(nvol), yud(nvol))!, zld(nvol), zrd(nvol))
+     allocate(xld(nvol), xrd(nvol), yld(nvol), yud(nvol), zld(nvol), zrd(nvol))
      read(10, nml=ForceCVs); rewind(10)
   endif
   
@@ -252,7 +255,11 @@ subroutine parameter(input_i3d)
   dy2 = dy * dy
   dz2 = dz * dz
 
-  xnu=one/re
+  if (re.gt.0.001) then 
+   xnu=one/re
+  else 
+   xnu=zero
+  endif
   !! Constant pressure gradient, re = Re_tau -> use to compute Re_centerline
   if (cpg) then
     re_cent = (re/0.116_mytype)**(1.0_mytype/0.88_mytype)
@@ -571,7 +578,7 @@ subroutine parameter_defaults()
   use probes, only : nprobes, flag_all_digits, flag_extra_probes
   use visu, only : output2D
   use forces, only : iforces, nvol
-  use ibm_param, only : oriw,rho_s
+  use ibm_param, only : oriw,rho_s,cvl_scalar,grav_y,grav_x,grav_z,nozdrift,force_csv,nbody,ra
 
   implicit none
 
@@ -613,6 +620,14 @@ subroutine parameter_defaults()
   itest=1
   oriw=one
   rho_s=one
+  cvl_scalar=onepfive
+  grav_y=zero
+  grav_x=zero
+  grav_z=zero
+  nozdrift=0
+  force_csv=0
+  nbody=1
+  ra(:) = 1.0
 
   !! Gravity field
   gravx = zero
diff --git a/src/tools.f90 b/src/tools.f90
index 8088d224bd0cbc92f4ae9fe8d0a032ff991b25e6..fe4712e5036d40aaa7974d6f1db1e7a49963cafb 100644
--- a/src/tools.f90
+++ b/src/tools.f90
@@ -130,6 +130,11 @@ contains
          call MPI_ABORT(MPI_COMM_WORLD,code,ierror)
          stop
        endif
+       if (uxmin1 /= uxmin1) then
+         write(*,*) 'NaN solutions for flow and body occurred!'
+         call MPI_ABORT(MPI_COMM_WORLD,code,ierror)
+         stop
+       endif
 
     endif
 
@@ -161,8 +166,8 @@ contains
           call cpu_time(time1)
           write(*,*) '==========================================================='
           write(*,"(' Time step =',i7,'/',i7,', Time unit =',F9.4)") itime,ilast,t
-          write(*,*) 'Centroid position = ',real(position(1),4),real(position(2),4),real(position(3),4)
-          write(*,*) 'Orientation = ',real(orientation(1),4),real(orientation(2),4),real(orientation(3),4),real(orientation(4),4)
+         !  write(*,*) 'Centroid position = ',real(position(1),4),real(position(2),4),real(position(3),4)
+         !  write(*,*) 'Orientation       = ',real(orientation(1),4),real(orientation(2),4),real(orientation(3),4),real(orientation(4),4)
        endif
     else if ((iwhen == 3).and.(itime > ifirst)) then !AT THE END OF A TIME STEP
        if (nrank == 0.and.(mod(itime, ilist) == 0 .or. itime == ifirst .or. itime==ilast)) then
diff --git a/src/xcompact3d.f90 b/src/xcompact3d.f90
index 6e97086bc9a03bba0e95db6d712fd86dcb6e0ae1..96055fa11db18408af622395a2460014d4255f75 100644
--- a/src/xcompact3d.f90
+++ b/src/xcompact3d.f90
@@ -16,13 +16,46 @@ program xcompact3d
   use ibm_param
   use ibm, only : body
   use genepsi, only : genepsi3d
-  use ellipsoid_utils, only: lin_step, ang_step
-
+  use ellipsoid_utils, only: lin_step, ang_step, QuaternionNorm
+  use forces, only : force, init_forces, iforces,update_forces, xld,xrd,yld,yud,zld,zrd,torque_calc,nvol
   implicit none
+  real(mytype)  :: dummy,drag(10),lift(10),lat(10),grav_effy(10),grav_effx(10),grav_effz(10),xtorq(10),ytorq(10),ztorq(10),maxrad
+  integer :: iounit,ierr,i
+  real, dimension(100) :: x
+  character(len=30) :: filename!, filename2
+
 
 
   call init_xcompact3d()
 
+  iounit = 135
+  !Print forces out on ellip
+  if ((nrank==0).and.(force_csv.eq.1)) then 
+   open(unit=20, file='force_out.dat', status='unknown',form='formatted')
+   ! if (ierr /= 0) then
+   !    print *, 'Error opening file.'
+   !    stop
+   ! end if
+   write(*,*) 'Outputting forces' 
+  end if
+
+  if (nrank==0) then
+   do i = 1,nbody
+      write(filename,"('body.dat',I1.1)") i
+      open(unit=11+i, file=filename, status='unknown', form='formatted')
+   enddo
+  endif
+!   do i = 1,100
+!    x(i) = i
+!   enddo
+!   open(unit=3, file='testcsv.dat', status='new',action='write',iostat=ierr)
+
+!   do i = 1,100
+!    write(3,*) x(i)
+!   enddo
+
+  
+
   do itime=ifirst,ilast
      !t=itime*dt
      t=t0 + (itime0 + itime + 1 - ifirst)*dt
@@ -39,6 +72,7 @@ program xcompact3d
         call apply_spatial_filter(ux1,uy1,uz1,phi1)
      endif
 
+
      do itr=1,iadvance_time
 
         call set_fluid_properties(rho1,mu1)
@@ -47,6 +81,24 @@ program xcompact3d
         if (imove.eq.1) then ! update epsi for moving objects
           if ((iibm.eq.2).or.(iibm.eq.3)) then
              call genepsi3d(ep1)
+             do i = 1,nobjmax 
+               maxrad = max(shape(i,1),shape(i,2),shape(i,3))
+               if (iforces.eq.1) then
+                  xld(i) = position(i,1) - maxrad * ra(i) * cvl_scalar
+                  xrd(i) = position(i,1) + maxrad * ra(i) * cvl_scalar
+                  yld(i) = position(i,2) - maxrad * ra(i) * cvl_scalar
+                  yud(i) = position(i,2) + maxrad * ra(i) * cvl_scalar
+                  zld(i) = position(i,3) - maxrad * ra(i) * cvl_scalar
+                  zrd(i) = position(i,3) + maxrad * ra(i) * cvl_scalar
+                  ! write(*,*) "CV bounds = ", xld(i), xrd(i), yld(i), yud(i), zld(i), zrd(i)
+                  
+               endif
+            enddo
+            if (itime.eq.ifirst) then 
+               call init_forces()
+            else 
+               call update_forces()
+            endif
           else if (iibm.eq.1) then
              call body(ux1,uy1,uz1,ep1)
           endif
@@ -77,18 +129,85 @@ program xcompact3d
         call test_flow(rho1,ux1,uy1,uz1,phi1,ep1,drho1,divu3)
 
         !Add force calculation here
+      !   if (nrank.eq.0) then 
+      !   write(*,*) 'Going to call force from xcompact3d, itr = ', itr
+      !   endif 
+        call force(ux1,uy1,uz1,ep1,drag,lift,lat,1)
+        grav_effx = grav_x*(rho_s-1.0)
+        grav_effy = grav_y*(rho_s-1.0)
+        grav_effz = grav_z*(rho_s-1.0)
+        do i = 1,nbody
+         linearForce(i,:) = [drag(i)-grav_effx(i), lift(i)-grav_effy(i), lat(i)-grav_effz(i)]
+        enddo
+        if (nozdrift==1) then
+            linearForce(:,3)=zero
+        endif
 
-        linearAcceleration(:)=zero
-        torque(:)=zero
+        if (bodies_fixed==1) then
+            linearForce(:,:)=zero
+        endif
 
-        call lin_step(position,linearVelocity,linearAcceleration,dt,position_1,linearVelocity_1)
-        call ang_step(orientation,angularVelocity,torque,dt,orientation_1,angularVelocity_1)
+        if ((nrank==0).and.(force_csv.eq.1)) then
+         ! open(unit=20, file='force_out.dat', action='write')
+         write(20, *) linearForce(1,1), linearForce(1,2), linearForce(1,3)
+         write(*,*) 'Writing forces', linearForce(1,1), linearForce(1,2), linearForce(1,3)
+         flush(20)
+        endif 
+        
+        
+        if (torques_flag.eq.1) then 
+         call torque_calc(ux1,uy1,uz1,ep1,xtorq,ytorq,ztorq,1)
+        endif 
+        if (orientations_free.eq.1) then 
+         do i = 1,nvol 
+            torque(i,:) = [xtorq(i), ytorq(i), ztorq(i)]
+         enddo
+         if (ztorq_only.eq.1) then
+            torque(:,1) = zero
+            torque(:,2) = zero
+         endif
+        else 
+         torque(:,:) = zero
+        endif
+      !   if (nrank==0) then
 
-        position = position_1
-        linearVelocity = linearVelocity_1
+      !   if (bodies_fixed==0) then 
+        do i = 1,nvol
 
-        orientation = orientation_1
-        angularVelocity = angularVelocity_1
+         call lin_step(position(i,:),linearVelocity(i,:),linearForce(i,:),ellip_m(i),dt,position_1,linearVelocity_1)
+         call ang_step(orientation(i,:),angularVelocity(i,:),torque(i,:),inertia(i,:,:),dt,orientation_1,angularVelocity_1)
+        
+         position(i,:) = position_1
+         linearVelocity(i,:) = linearVelocity_1
+
+         orientation(i,:) = orientation_1
+         angularVelocity(i,:) = angularVelocity_1
+        enddo
+
+
+      if ((nrank==0).and.(mod(itime,ilist)==0)) then
+         do i = 1,nbody
+            write(11+i ,*) t, position(i,1), position(i,2), position(i,3), orientation(i,1), orientation(i,2), orientation(i,3), orientation(i,4), linearVelocity(i,1), linearVelocity(i,2), linearVelocity(i,3), angularVelocity(i,2), angularVelocity(i,3), angularVelocity(i,4), linearForce(i,1), linearForce(i,2), linearForce(i,3), torque(i,1), torque(i,2), torque(i,3)
+            flush(11+i)
+         enddo
+      endif
+
+         if ((nrank==0).and.(mod(itime,ilist)==0)) then 
+            do i = 1,nbody
+               write(*,*) "Body", i
+               write(*,*) "Position =         ", position(i,:)
+               write(*,*) "Orientation =      ", orientation(i,:)
+               write(*,*) "Linear velocity =  ", linearVelocity(i,:)
+               write(*,*) "Angular velocity = ", angularVelocity(i,:)
+               write(*,*) "Linear Force = ", linearForce(i,:)
+               write(*,*) "Torque = ", torque(i,:)
+            enddo
+         ! call QuaternionNorm(angularVelocity,dummy)
+
+         ! write(*,*) 'Norm of angvel = ', dummy
+         endif   
+
+      !   endif 
 
       !   if (nrank==0) then 
       !    write(*,*) 'Centroid position is ', position
@@ -105,6 +224,8 @@ program xcompact3d
 
   enddo !! End time loop
 
+  close(iounit)
+
   call finalise_xcompact3d()
 
 end program xcompact3d
@@ -140,11 +261,11 @@ subroutine init_xcompact3d()
 
   use visu, only : visu_init, visu_ready
 
-  use genepsi, only : genepsi3d, epsi_init
+  use genepsi, only : genepsi3d, epsi_init, param_assign
   use ibm, only : body
 
   use probes, only : init_probes
-
+!   use case, only : param_assign
   implicit none
 
   integer :: ierr
@@ -201,6 +322,7 @@ subroutine init_xcompact3d()
   call decomp_info_init(nxm, nym, nz, ph3)
 
   call init_variables()
+  call param_assign()
 
   call schemes()
 
@@ -212,6 +334,7 @@ subroutine init_xcompact3d()
   endif
 
   if ((iibm.eq.2).or.(iibm.eq.3)) then
+   !   call boundary_conditions()
      call genepsi3d(ep1)
   else if (iibm.eq.1) then
      call epsi_init(ep1)
@@ -219,7 +342,7 @@ subroutine init_xcompact3d()
   endif
 
   if (iforces.eq.1) then
-     call init_forces()
+   !   call init_forces()
      if (irestart==1) then
         call restart_forces(0)
      endif