/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator

   Original Version:
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   See the README file in the top-level LAMMPS directory.

   -----------------------------------------------------------------------

   USER-CUDA Package and associated modifications:
   https://sourceforge.net/projects/lammpscuda/

   Christian Trott, christian.trott@tu-ilmenau.de
   Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
   Theoretical Physics II, University of Technology Ilmenau, Germany

   See the README file in the USER-CUDA directory.

   This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */

/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */

/* ----------------------------------------------------------------------
   Contributing author: Paul Crozier (SNL)
------------------------------------------------------------------------- */

#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "pair_gran_hooke_cuda.h"
#include "pair_gran_hooke_cuda_cu.h"
#include "cuda_data.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "modify.h"
#include "fix_pour.h"
#include "cuda_neigh_list.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"

using namespace LAMMPS_NS;

/* ---------------------------------------------------------------------- */

PairGranHookeCuda::PairGranHookeCuda(LAMMPS *lmp) : PairGranHooke(lmp)
{
  cuda = lmp->cuda;
   if(cuda == NULL)
        error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");

        allocated2 = false;
        cuda->shared_data.pair.cudable_force = 1;
        cuda->setSystemParams();
}

/* ----------------------------------------------------------------------
   remember pointer to arrays in cuda shared data
------------------------------------------------------------------------- */

void PairGranHookeCuda::allocate()
{
        if(! allocated) PairGranHooke::allocate();
        if(! allocated2)
        {
                allocated2 = true;
                 int n = atom->ntypes;
                cuda->shared_data.pair.cutsq     = cutsq;
                memory->create(cuda->shared_data.pair.coeff1,n+1,n+1,
                               "pair:cuda_coeff1");
                memory->create(cuda->shared_data.pair.coeff2,
                               n+1,n+1,"pair:cuda_coeff2");
                cuda->shared_data.pair.coeff1[0][0]=kn;
                cuda->shared_data.pair.coeff1[0][1]=kt;
                cuda->shared_data.pair.coeff1[1][0]=gamman;
                cuda->shared_data.pair.coeff1[1][1]=gammat;
                cuda->shared_data.pair.coeff2[0][0]=xmu;
                cuda->shared_data.pair.coeff2[0][1]=dampflag;
        }
}

/* ---------------------------------------------------------------------- */

void PairGranHookeCuda::compute(int eflag, int vflag)
{
             cuda->shared_data.pair.use_block_per_atom = 0;
        //cuda->cu_debugdata->memset_device(0);
        if (eflag || vflag) ev_setup(eflag,vflag);
        if(eflag) cuda->cu_eng_vdwl->upload();
        if(vflag) cuda->cu_virial->upload();

        Cuda_PairGranHookeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);

    if(not cuda->shared_data.pair.collect_forces_later)
    {
          if(eflag) cuda->cu_eng_vdwl->download();
          if(vflag) cuda->cu_virial->download();
    }
        //cuda->cu_debugdata->download();
        //printf("%lf %lf %lf %lf %lf %lf\n",1.0e-6*cuda->debugdata[0],1.0e-6*cuda->debugdata[1],1.0e-6*cuda->debugdata[2],1.0e-6*cuda->debugdata[3],1.0e-6*cuda->debugdata[4],1.0e-6*cuda->debugdata[5]);

}

/* ---------------------------------------------------------------------- */

void PairGranHookeCuda::settings(int narg, char **arg)
{
        PairGranHooke::settings(narg, arg);
 }

/* ---------------------------------------------------------------------- */

void PairGranHookeCuda::coeff(int narg, char **arg)
{
        PairGranHooke::coeff(narg, arg);
        allocate();
}

void PairGranHookeCuda::init_style()
{
        int i;
        MYDBG(printf("# CUDA PairGranHookeCuda::init_style start\n"); )
  // request regular or rRESPA neighbor lists

  int irequest;

  if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {

  }
  else
  {
          irequest = neighbor->request(this);
    neighbor->requests[irequest]->full = 1;
    neighbor->requests[irequest]->half = 0;
    neighbor->requests[irequest]->gran = 1;
    neighbor->requests[irequest]->cudable = 1;
    //neighbor->style=0; //0=NSQ neighboring
  }

  if (!atom->radius_flag || !atom->omega_flag || !atom->torque_flag)
    error->all(FLERR,"Pair granular requires atom attributes radius, omega, torque");
  if (comm->ghost_velocity == 0)
    error->all(FLERR,"Pair granular requires ghost atoms store velocity");

  // need a half neigh list and optionally a granular history neigh list

  dt = update->dt;



  // check for Fix freeze and set freeze_group_bit

  for (i = 0; i < modify->nfix; i++)
    if (strcmp(modify->fix[i]->style,"freeze") == 0) break;
  if (i < modify->nfix) freeze_group_bit = modify->fix[i]->groupbit;
  else freeze_group_bit = 0;

  cuda->shared_data.pair.freeze_group_bit=freeze_group_bit;
  // check for Fix pour and set pour_type and pour_maxdiam

  int pour_type = 0;
  double pour_maxrad = 0.0;
  for (i = 0; i < modify->nfix; i++)
    if (strcmp(modify->fix[i]->style,"pour") == 0) break;
  if (i < modify->nfix) {
    pour_type = ((FixPour *) modify->fix[i])->ntype;
    pour_maxrad = ((FixPour *) modify->fix[i])->radius_hi;
  }

  // set maxrad_dynamic and maxrad_frozen for each type
  // include future Fix pour particles as dynamic

  for (i = 1; i <= atom->ntypes; i++)
    onerad_dynamic[i] = onerad_frozen[i] = 0.0;
  if (pour_type) onerad_dynamic[pour_type] = pour_maxrad;

  double *radius = atom->radius;
  int *mask = atom->mask;
  int *type = atom->type;
  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++){
    if (mask[i] & freeze_group_bit)
      onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]);
    else
      onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]);
  }

  MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,
                MPI_DOUBLE,MPI_MAX,world);
  MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,
                MPI_DOUBLE,MPI_MAX,world);

  MYDBG(printf("# CUDA PairGranHookeCuda::init_style end\n"); )
}

void PairGranHookeCuda::init_list(int id, NeighList *ptr)
{
        MYDBG(printf("# CUDA PairGranHookeCuda::init_list\n");)
        PairGranHooke::init_list(id, ptr);
        #ifndef CUDA_USE_BINNING
        // right now we can only handle verlet (id 0), not respa
        if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
        // see Neighbor::init() for details on lammps lists' logic
        #endif
        MYDBG(printf("# CUDA PairGranHookeCuda::init_list end\n");)
}

void PairGranHookeCuda::ev_setup(int eflag, int vflag)
{
        int maxeatomold=maxeatom;
        PairGranHooke::ev_setup(eflag,vflag);

  if (eflag_atom && atom->nmax > maxeatomold)
        {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}

  if (eflag_atom && atom->nmax > maxeatomold)
        {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6  );}

}