diff --git a/doc/src/Eqs/pair_ufm.jpg b/doc/src/Eqs/pair_ufm.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..40273da68063274cd6d6496d38f93ff97121df91
Binary files /dev/null and b/doc/src/Eqs/pair_ufm.jpg differ
diff --git a/doc/src/Eqs/pair_ufm.tex b/doc/src/Eqs/pair_ufm.tex
new file mode 100644
index 0000000000000000000000000000000000000000..8a6dde55202f0d19d55e9309fac41d00876f4b5c
--- /dev/null
+++ b/doc/src/Eqs/pair_ufm.tex
@@ -0,0 +1,14 @@
+\documentclass[12pt]{article}
+
+\begin{document}
+
+$$
+	E = -\varepsilon\, \ln{\left[1-\exp{\left(-r^{2}/\sigma^{2}\right)}\right]} \qquad  r < r_c
+$$
+
+$$
+  \varepsilon = p\,k_B\,T
+$$
+
+\end{document}
+
diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt
index 35fe153473a371fc21c89627cfe782cd8bbad530..02ce6f2882170665c584111f15c9f4df6881bdb7 100644
--- a/doc/src/Section_commands.txt
+++ b/doc/src/Section_commands.txt
@@ -1022,6 +1022,7 @@ KOKKOS, o = USER-OMP, t = OPT.
 "tip4p/cut (o)"_pair_coul.html,
 "tip4p/long (o)"_pair_coul.html,
 "tri/lj"_pair_tri_lj.html,
+"ufm (got)"_pair_ufm.html,
 "vashishta (ko)"_pair_vashishta.html,
 "vashishta/table (o)"_pair_vashishta.html,
 "yukawa (gok)"_pair_yukawa.html,
diff --git a/doc/src/fix_ti_spring.txt b/doc/src/fix_ti_spring.txt
index afb1dcf8fffe855b41ac19f62e6504afd8b13540..191f9e7c6b8d8667b5baef112f20d16fb435a382 100644
--- a/doc/src/fix_ti_spring.txt
+++ b/doc/src/fix_ti_spring.txt
@@ -34,7 +34,7 @@ by performing a nonequilibrium thermodynamic integration between the
 solid of interest and an Einstein crystal. A detailed explanation of
 how to use this command and choose its parameters for optimal
 performance and accuracy is given in the paper by
-"Freitas"_#Freitas. The paper also presents a short summary of the
+"Freitas"_#Freitas1. The paper also presents a short summary of the
 theory of nonequilibrium thermodynamic integrations.
 
 The thermodynamic integration procedure is performed by rescaling the
@@ -67,13 +67,13 @@ of lambda is kept equal to zero and the fix has no other effect on the
 dynamics of the system.
 
 The processes described above is known as nonequilibrium thermodynamic
-integration and is has been shown ("Freitas"_#Freitas) to present a
+integration and is has been shown ("Freitas"_#Freitas1) to present a
 much superior efficiency when compared to standard equilibrium
 methods. The reason why the switching it is made in both directions
 (potential to Einstein crystal and back) is to eliminate the
 dissipated heat due to the nonequilibrium process. Further details
 about nonequilibrium thermodynamic integration and its implementation
-in LAMMPS is available in "Freitas"_#Freitas.
+in LAMMPS is available in "Freitas"_#Freitas1.
 
 The {function} keyword allows the use of two different lambda
 paths. Option {1} results in a constant rate of change of lambda with
@@ -94,7 +94,7 @@ thermodynamic integration. The use of option {2} is recommended since
 it results in better accuracy and less dissipation without any
 increase in computational resources cost.
 
-NOTE: As described in "Freitas"_#Freitas, it is important to keep the
+NOTE: As described in "Freitas"_#Freitas1, it is important to keep the
 center-of-mass fixed during the thermodynamic integration. A nonzero
 total velocity will result in divergences during the integration due
 to the fact that the atoms are 'attached' to their equilibrium
@@ -156,7 +156,7 @@ The keyword default is function = 1.
 
 :line
 
-:link(Freitas)
+:link(Freitas1)
 [(Freitas)] Freitas, Asta, and de Koning, Computational Materials
 Science, 112, 333 (2016).
 
diff --git a/doc/src/lammps.book b/doc/src/lammps.book
index 54762a885e6743df62ca8979cd92b842ca91783a..d4e73453b0aebf5d7d02f432d9d189e05f5e56df 100644
--- a/doc/src/lammps.book
+++ b/doc/src/lammps.book
@@ -512,6 +512,7 @@ pair_tersoff_mod.html
 pair_tersoff_zbl.html
 pair_thole.html
 pair_tri_lj.html
+pair_ufm.html
 pair_vashishta.html
 pair_yukawa.html
 pair_yukawa_colloid.html
diff --git a/doc/src/pair_ufm.txt b/doc/src/pair_ufm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88a22864ccaf960472dbe5ea3f985ea2d375ab23
--- /dev/null
+++ b/doc/src/pair_ufm.txt
@@ -0,0 +1,135 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+pair_style ufm command :h3
+pair_style ufm/gpu command :h3
+pair_style ufm/omp command :h3
+pair_style ufm/opt command :h3
+
+[Syntax:]
+
+pair_style ufm cutoff :pre
+
+cutoff = global cutoff for {ufm} interactions (distance units) :ul
+
+[Examples:]
+
+pair_style ufm 4.0
+pair_coeff 1 1 100.0 1.0 2.5
+pair_coeff * * 100.0 1.0 :pre
+
+
+pair_style ufm 4.0
+pair_coeff * * 10.0 1.0
+variable prefactor equal ramp(10,100)
+fix 1 all adapt 1 pair ufm epsilon * * v_prefactor :pre
+
+[Description:]
+
+Style {ufm} computes pairwise interactions using the Uhlenbeck-Ford model (UFM) potential "(Paula Leite2016)"_#PL2 which is given by
+
+:c,image(Eqs/pair_ufm.jpg)
+
+where rc is the cutoff, sigma is a distance-scale and epsilon is an energy-scale, i.e., a product of Boltzmann constant kB, temperature T and the Uhlenbeck-Ford p-parameter which is responsible
+to control the softness of the interactions "(Paula Leite2017)"_#PL1.
+This model is useful as a reference system for fluid-phase free-energy calculations "(Paula Leite2016)"_#PL2. 
+
+The following coefficients must be defined for each pair of atom types
+via the "pair_coeff"_pair_coeff.html command as in the examples above,
+or in the data file or restart files read by the
+"read_data"_read_data.html or "read_restart"_read_restart.html
+commands, or by mixing as described below:
+
+epsilon (energy units)
+sigma (distance units)
+cutoff (distance units) :ul
+
+The last coefficient is optional.  If not specified, the global {ufm}
+cutoff is used.
+
+
+The "fix adapt"_fix_adapt.html command can be used to vary epsilon and sigma for this pair style over the course of a simulation, in which case
+pair_coeff settings for epsilon and sigma must still be specified, but will be
+overridden.  For example these commands will vary the prefactor epsilon for
+all pairwise interactions from 10.0 at the beginning to 100.0 at the end
+of a run:
+
+variable prefactor equal ramp(10,100)
+fix 1 all adapt 1 pair ufm epsilon * * v_prefactor :pre
+
+NOTE: The thermodynamic integration procedure can be performed with this potential using "fix adapt"_fix_adapt.html. This command will rescale the force on each atom by varying a scale variable, which always starts with value 1.0. The syntax is the same described above, however, changing epsilon to scale. A detailed explanation of how to use this command and perform nonequilibrium thermodynamic integration in LAMMPS is given in the paper by "(Freitas)"_#Freitas2.
+
+:line
+
+Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
+functionally the same as the corresponding style without the suffix.
+They have been optimized to run faster, depending on your available
+hardware, as discussed in "Section 5"_Section_accelerate.html
+of the manual.  The accelerated styles take the same arguments and
+should produce the same results, except for round-off and precision
+issues.
+
+These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
+USER-OMP and OPT packages, respectively.  They are only enabled if
+LAMMPS was built with those packages.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info.
+
+You can specify the accelerated styles explicitly in your input script
+by including their suffix, or you can use the "-suffix command-line
+switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
+use the "suffix"_suffix.html command in your input script.
+
+See "Section 5"_Section_accelerate.html of the manual for
+more instructions on how to use the accelerated styles effectively.
+
+:line
+
+[Mixing, shift, table, tail correction, restart, rRESPA info]:
+
+For atom type pairs I,J and I != J, the A coefficient and cutoff
+distance for this pair style can be mixed.  A is always mixed via a
+{geometric} rule.  The cutoff is mixed according to the pair_modify
+mix value.  The default mix value is {geometric}.  See the
+"pair_modify" command for details.
+
+This pair style support the "pair_modify"_pair_modify.html shift option for the energy of the pair interaction.
+
+The "pair_modify"_pair_modify.html table and tail are not relevant for this
+pair style.
+
+This pair style does not support the "pair_modify"_pair_modify.html tail option for adding long-range tail corrections to energy and pressure.
+
+This pair style writes its information to "binary restart
+files"_restart.html, so pair_style and pair_coeff commands do not need
+to be specified in an input script that reads a restart file.
+
+This pair style can only be used via the {pair} keyword of the
+"run_style respa"_run_style.html command.  It does not support the
+{inner}, {middle}, {outer} keywords.
+
+:line
+
+[Restrictions:] none
+
+[Related commands:]
+
+"pair_coeff"_pair_coeff.html, "fix adapt"_fix_adapt.html
+
+[Default:] none
+
+
+:link(PL1)
+[(Paula Leite2017)] Paula Leite, Santos-Florez, and de Koning, Phys Rev E, 96,
+32115 (2017).
+
+:link(PL2)
+[(Paula Leite2016)] Paula Leite , Freitas, Azevedo, and de Koning, J Chem Phys, 126,
+044509 (2016).
+
+:link(Freitas2)
+[(Freitas)] Freitas, Asta, and de Koning, Computational Materials Science, 112, 333 (2016).
diff --git a/doc/src/pairs.txt b/doc/src/pairs.txt
index f1c4e77041a720d917efc38bfa0858ce5f50bc92..b4defe03850db0dd3917ae1966c83fa12cf7d580 100644
--- a/doc/src/pairs.txt
+++ b/doc/src/pairs.txt
@@ -102,6 +102,7 @@ Pair Styles :h1
    pair_tersoff_zbl
    pair_thole
    pair_tri_lj
+   pair_ufm
    pair_vashishta
    pair_yukawa
    pair_yukawa_colloid
diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile
index ee2eb72632cadfc8d2ce126f034b245efebbe2eb..5f692cf66c8bbe9834ca36f81786fef1ad2e9ae5 100644
--- a/lib/gpu/Nvidia.makefile
+++ b/lib/gpu/Nvidia.makefile
@@ -76,7 +76,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
        $(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
        $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
        $(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
-       $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o
+       $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
+       $(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o 
 
 CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
        $(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
@@ -131,7 +132,8 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
        $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
        $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
        $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
-       $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h
+       $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h \
+       $(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm_cubin.h
 
 all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
 
@@ -705,6 +707,18 @@ $(OBJ_DIR)/dpd.cubin: lal_dpd.cu lal_precision.h lal_preprocessor.h
 $(OBJ_DIR)/dpd_cubin.h: $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd.cubin
 	$(BIN2C) -c -n dpd $(OBJ_DIR)/dpd.cubin > $(OBJ_DIR)/dpd_cubin.h
 
+$(OBJ_DIR)/ufm.cubin: lal_ufm.cu lal_precision.h lal_preprocessor.h
+	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_ufm.cu
+
+$(OBJ_DIR)/ufm_cubin.h: $(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm.cubin
+	$(BIN2C) -c -n ufm $(OBJ_DIR)/ufm.cubin > $(OBJ_DIR)/ufm_cubin.h
+
+$(OBJ_DIR)/lal_ufm.o: $(ALL_H) lal_ufm.h lal_ufm.cpp $(OBJ_DIR)/ufm_cubin.h $(OBJ_DIR)/lal_base_atomic.o
+	$(CUDR) -o $@ -c lal_ufm.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_ufm_ext.o: $(ALL_H) lal_ufm.h lal_ufm_ext.cpp lal_base_atomic.h
+	$(CUDR) -o $@ -c lal_ufm_ext.cpp -I$(OBJ_DIR)
+
 $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o
 	$(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
 
diff --git a/lib/gpu/README b/lib/gpu/README
index b26897e885379c95eb8cda4b69850dad70879dff..15b65516acbb59c6f1a4b6b610d1de7712c77ed0 100644
--- a/lib/gpu/README
+++ b/lib/gpu/README
@@ -135,6 +135,7 @@ Current styles supporting GPU acceleration:
     38  yukawa/colloid
     39  yukawa
     40  pppm
+    41  ufm
 
 
                      MULTIPLE LAMMPS PROCESSES
diff --git a/lib/gpu/lal_ufm.cpp b/lib/gpu/lal_ufm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c7aa2cca3907fd409f04df46854464c29e3a2d90
--- /dev/null
+++ b/lib/gpu/lal_ufm.cpp
@@ -0,0 +1,172 @@
+/***************************************************************************
+                                   ufm.cpp
+                             -------------------
+                            Rodolfo Paula Leite (Unicamp/Brazil)
+                            Maurice de Koning (Unicamp/Brazil)
+
+  Class for acceleration of the ufm pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : pl.rodolfo@gmail.com
+                           dekoning@ifi.unicamp.br
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "ufm_cl.h"
+#elif defined(USE_CUDART)
+const char *ufm=0;
+#else
+#include "ufm_cubin.h"
+#endif
+
+#include "lal_ufm.h"
+#include <cassert>
+using namespace LAMMPS_AL;
+#define UFMT UFM<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+UFMT::UFM() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
+}
+
+template <class numtyp, class acctyp>
+UFMT::~UFM() {
+  clear();
+}
+ 
+template <class numtyp, class acctyp>
+int UFMT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int UFMT::init(const int ntypes,
+                          double **host_cutsq, double **host_uf1, 
+                          double **host_uf2, double **host_uf3, 
+                          double **host_uf4, double **host_offset, 
+                          double *host_special_lj, const int nlocal,
+                          const int nall, const int max_nbors,
+                          const int maxspecial, const double cell_size,
+                          const double gpu_split, FILE *_screen) {
+  int success;
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
+                            _screen,ufm,"k_ufm");
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  shared_types=false;
+  int max_shared_types=this->device->max_shared_types();
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  uf1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,uf1,host_write,host_uf1,host_uf2,
+			 host_cutsq);
+
+  uf3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,uf3,host_write,host_uf3,host_uf4,
+		         host_offset);
+
+  UCL_H_Vec<double> dview;
+  sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  dview.view(host_special_lj,4,*(this->ucl_device));
+  ucl_copy(sp_lj,dview,false);
+
+  _allocated=true;
+  this->_max_bytes=uf1.row_bytes()+uf3.row_bytes()+sp_lj.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void UFMT::reinit(const int ntypes, double **host_cutsq, double **host_uf1,
+                 double **host_uf2, double **host_uf3,
+                 double **host_uf4, double **host_offset) {
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+  
+  for (int i=0; i<_lj_types*_lj_types; i++)
+    host_write[i]=0.0;
+  
+  this->atom->type_pack4(ntypes,_lj_types,uf1,host_write,host_uf1,host_uf2,
+                         host_cutsq);
+  this->atom->type_pack4(ntypes,_lj_types,uf3,host_write,host_uf3,host_uf4,
+                         host_offset);
+}
+
+template <class numtyp, class acctyp>
+void UFMT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  uf1.clear();
+  uf3.clear();
+  sp_lj.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double UFMT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(UFM<numtyp,acctyp>);
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+void UFMT::loop(const bool _eflag, const bool _vflag) {
+  // Compute the block size and grid size to keep all cores busy
+  const int BX=this->block_size();
+  int eflag, vflag;
+  if (_eflag)
+    eflag=1;
+  else
+    eflag=0;
+
+  if (_vflag)
+    vflag=1;
+  else
+    vflag=0;
+  
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+  if (shared_types) {
+    this->k_pair_fast.set_size(GX,BX);
+    this->k_pair_fast.run(&this->atom->x, &uf1, &uf3, &sp_lj,
+                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->ans->force, &this->ans->engv, &eflag,
+                          &vflag, &ainum, &nbor_pitch, 
+                          &this->_threads_per_atom);
+  } else {
+    this->k_pair.set_size(GX,BX);
+    this->k_pair.run(&this->atom->x, &uf1, &uf3, &_lj_types, &sp_lj,
+                     &this->nbor->dev_nbor, &this->_nbor_data->begin(), 
+                     &this->ans->force, &this->ans->engv, &eflag, &vflag,
+                     &ainum, &nbor_pitch, &this->_threads_per_atom);
+  }
+  this->time_pair.stop();
+}
+
+template class UFM<PRECISION,ACC_PRECISION>;
diff --git a/lib/gpu/lal_ufm.cu b/lib/gpu/lal_ufm.cu
new file mode 100644
index 0000000000000000000000000000000000000000..51c4df3b5b9cfac3516ece739aebb0e7b3afd123
--- /dev/null
+++ b/lib/gpu/lal_ufm.cu
@@ -0,0 +1,188 @@
+/***************************************************************************
+                                   ufm.cu
+                             -------------------
+                            Rodolfo Paula Leite (Unicamp/Brazil)
+                            Maurice de Koning (Unicamp/Brazil)
+
+  Device code for acceleration of the ufm pair style
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : pl.rodolfo@gmail.com
+                           dekoning@ifi.unicamp.br
+ ***************************************************************************/
+
+#ifdef NV_KERNEL
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+texture<float4> pos_tex;
+#else
+texture<int4,1> pos_tex;
+#endif
+#else
+#define pos_tex x_
+#endif
+
+__kernel void k_ufm(const __global numtyp4 *restrict x_,
+                   const __global numtyp4 *restrict uf1,
+                   const __global numtyp4 *restrict uf3,
+                   const int lj_types,
+                   const __global numtyp *restrict sp_lj,
+                   const __global int * dev_nbor, 
+                   const __global int * dev_packed, 
+                   __global acctyp4 *restrict ans, 
+                   __global acctyp *restrict engv, 
+                   const int eflag, const int vflag, const int inum,
+                   const int nbor_pitch, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  acctyp energy=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+  
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    __local int n_stride;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+  
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+
+    numtyp factor_lj;
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+  
+      int j=dev_packed[nbor];
+      factor_lj = sp_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+ 
+      int mtype=itype*lj_types+jtype;
+      if (rsq<uf1[mtype].z) {
+        numtyp expuf = exp(- rsq * uf1[mtype].y);
+        numtyp force = factor_lj * uf1[mtype].x * expuf / (1.0 - expuf);
+      
+        f.x += delx*force;
+        f.y += dely*force;
+        f.z += delz*force;
+
+        if (eflag>0) {
+          energy += - factor_lj * uf3[mtype].x*log(1.0 - expuf) - uf3[mtype].z;
+        }
+        if (vflag>0) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+    store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+                  ans,engv);
+  } // if ii
+}
+
+__kernel void k_ufm_fast(const __global numtyp4 *restrict x_,
+                        const __global numtyp4 *restrict uf1_in,
+                        const __global numtyp4 *restrict uf3_in, 
+                        const __global numtyp *restrict sp_lj_in,
+                        const __global int * dev_nbor, 
+                        const __global int * dev_packed, 
+                        __global acctyp4 *restrict ans, 
+                        __global acctyp *restrict engv, 
+                        const int eflag, const int vflag, const int inum, 
+                        const int nbor_pitch, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+  
+  __local numtyp4 uf1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 uf3[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp sp_lj[4];
+  if (tid<4)
+    sp_lj[tid]=sp_lj_in[tid];
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    uf1[tid]=uf1_in[tid];
+    if (eflag>0)
+      uf3[tid]=uf3_in[tid];
+  }
+  
+  acctyp energy=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  __syncthreads();
+  
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    __local int n_stride;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int iw=ix.w;
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+
+    numtyp factor_lj;
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+  
+      int j=dev_packed[nbor];
+      factor_lj = sp_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int mtype=itype+jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+        
+      if (rsq<uf1[mtype].z) {
+        numtyp expuf = exp(- rsq * uf1[mtype].y);
+        numtyp force = factor_lj * uf1[mtype].x * expuf / (1.0 - expuf);
+      
+        f.x += delx*force;
+        f.y += dely*force;
+        f.z += delz*force;
+
+        if (eflag>0) {
+          energy += - factor_lj * uf3[mtype].x * log(1.0 - expuf) - uf3[mtype].z;
+        }
+        if (vflag>0) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+    store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+                  ans,engv);
+  } // if ii
+}
+
diff --git a/lib/gpu/lal_ufm.h b/lib/gpu/lal_ufm.h
new file mode 100644
index 0000000000000000000000000000000000000000..aeeaacbe99003c36e01d1a49586f60ec0bacb4ac
--- /dev/null
+++ b/lib/gpu/lal_ufm.h
@@ -0,0 +1,86 @@
+/***************************************************************************
+                                   ufm.h
+                             -------------------
+                            Rodolfo Paula Leite (Unicamp/Brazil)
+                            Maurice de Koning (Unicamp/Brazil)
+
+   Class for acceleration of the ufm pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                :  pl.rodolfo@gmail.com
+                            dekoning@ifi.unicamp.br
+ ***************************************************************************/
+
+#ifndef LAL_UFM_H
+#define LAL_UFM_H
+
+#include "lal_base_atomic.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class UFM : public BaseAtomic<numtyp, acctyp> {
+ public:
+  UFM();
+  ~UFM();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    * 
+    * Returns:
+    * -  0 if successfull
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq,
+           double **host_uf1, double **host_uf2, double **host_uf3,
+           double **host_uf4, double **host_offset, double *host_special_lj,
+           const int nlocal, const int nall, const int max_nbors, 
+           const int maxspecial, const double cell_size, 
+           const double gpu_split, FILE *screen);
+  
+  /// Send updated coeffs from host to device (to be compatible with fix adapt)
+  void reinit(const int ntypes, double **host_cutsq,
+              double **host_uf1, double **host_uf2, double **host_uf3,
+              double **host_uf4, double **host_offset);
+  
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// uf1.x = uf1, uf1.y = uf2, uf1.z = cutsq
+  UCL_D_Vec<numtyp4> uf1;
+  /// uf3.x = uf3, uf3.y = uf4, uf3.z = offset
+  UCL_D_Vec<numtyp4> uf3;
+  /// Special LJ values
+  UCL_D_Vec<numtyp> sp_lj;
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types 
+  int _lj_types;
+
+ private:
+  bool _allocated;
+  void loop(const bool _eflag, const bool _vflag);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_ufm_ext.cpp b/lib/gpu/lal_ufm_ext.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ae4a5fb8fc9fe11e658271154178610173541683
--- /dev/null
+++ b/lib/gpu/lal_ufm_ext.cpp
@@ -0,0 +1,143 @@
+/***************************************************************************
+                                   ufm_ext.cpp
+                           ------------------------------
+                            Rodolfo Paula Leite (Unicamp/Brazil)
+                            Maurice de Koning (Unicamp/Brazil)
+
+  Functions for LAMMPS access to ufm acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : 
+    email                : pl.rodolfo@gmail.com
+                           dekoning@ifi.unicamp.br
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <math.h>
+
+#include "lal_ufm.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static UFM<PRECISION,ACC_PRECISION> UFMLMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
+                 double **host_uf2, double **host_uf3, double **host_uf4,
+                 double **offset, double *special_lj, const int inum, const int nall,
+		 const int max_nbors,  const int maxspecial, const double cell_size,
+		 int &gpu_mode, FILE *screen) {
+  UFMLMF.clear();
+  gpu_mode=UFMLMF.device->gpu_mode();
+  double gpu_split=UFMLMF.device->particle_split();
+  int first_gpu=UFMLMF.device->first_device();
+  int last_gpu=UFMLMF.device->last_device();
+  int world_me=UFMLMF.device->world_me();
+  int gpu_rank=UFMLMF.device->gpu_rank();
+  int procs_per_gpu=UFMLMF.device->procs_per_gpu();
+
+  UFMLMF.device->init_message(screen,"ufm",first_gpu,last_gpu);
+
+  bool message=false;
+  if (UFMLMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=UFMLMF.init(ntypes, cutsq, host_uf1, host_uf2, host_uf3,
+                       host_uf4, offset, special_lj, inum, nall, 300,
+                       maxspecial, cell_size, gpu_split, screen);
+
+  UFMLMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=UFMLMF.init(ntypes, cutsq, host_uf1, host_uf2, host_uf3, host_uf4,
+                         offset, special_lj, inum, nall, 300, maxspecial,
+                         cell_size, gpu_split, screen);
+
+    UFMLMF.device->gpu_barrier();
+    if (message) 
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    UFMLMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+// ---------------------------------------------------------------------------
+// Copy updated coeffs from host to device
+// ---------------------------------------------------------------------------
+void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
+                    double **host_uf2, double **host_uf3, double **host_uf4,
+                    double **offset) {
+  int world_me=UFMLMF.device->world_me();
+  int gpu_rank=UFMLMF.device->gpu_rank();
+  int procs_per_gpu=UFMLMF.device->procs_per_gpu();
+  
+  if (world_me==0)
+    UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, host_uf4, offset);
+  UFMLMF.device->world_barrier();
+  
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (gpu_rank==i && world_me!=0)
+      UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, host_uf4, offset);
+    UFMLMF.device->gpu_barrier();
+  }
+}
+
+void ufml_gpu_clear() {
+  UFMLMF.clear();
+}
+
+int ** ufml_gpu_compute_n(const int ago, const int inum_full,
+                        const int nall, double **host_x, int *host_type,
+                        double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        int **ilist, int **jnum, const double cpu_time,
+                        bool &success) {
+  return UFMLMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                       subhi, tag, nspecial, special, eflag, vflag, eatom,
+                       vatom, host_start, ilist, jnum, cpu_time, success);
+}  
+			
+void ufml_gpu_compute(const int ago, const int inum_full, const int nall,
+                     double **host_x, int *host_type, int *ilist, int *numj,
+                     int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start,
+                     const double cpu_time, bool &success) {
+  UFMLMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
+                firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success);
+}
+
+double ufml_gpu_bytes() {
+  return UFMLMF.host_memory_usage();
+}
+
+
diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh
index f4aeaa2706c7e66bce6e04fe91985a80d7f7f1e0..88f47a3dc4959c8936d9053fe4c23bd3a5d475f4 100644
--- a/src/GPU/Install.sh
+++ b/src/GPU/Install.sh
@@ -131,6 +131,8 @@ action pair_zbl_gpu.cpp
 action pair_zbl_gpu.h
 action pppm_gpu.cpp pppm.cpp
 action pppm_gpu.h pppm.cpp
+action pair_ufm_gpu.cpp
+action pair_ufm_gpu.h
 
 # edit 2 Makefile.package files to include/exclude package info
 
diff --git a/src/GPU/pair_ufm_gpu.cpp b/src/GPU/pair_ufm_gpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..96af0dc0695ef3ecc6b728c9432d0f0d56aefea9
--- /dev/null
+++ b/src/GPU/pair_ufm_gpu.cpp
@@ -0,0 +1,240 @@
+/* -*- c++ -*- ----------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+ 
+ Copyright (2003) Sandia Corporation.  Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software.  This software is distributed under
+ the GNU General Public License.
+ 
+ See the README file in the top-level LAMMPS directory.
+ ------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing author:
+            Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com
+            Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
+ ------------------------------------------------------------------------- */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "pair_ufm_gpu.h"
+#include "atom.h"
+#include "atom_vec.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "integrate.h"
+#include "memory.h"
+#include "error.h"
+#include "neigh_request.h"
+#include "universe.h"
+#include "update.h"
+#include "domain.h"
+#include <string.h>
+#include "gpu_extra.h"
+
+using namespace LAMMPS_NS;
+
+// External functions from cuda library for atom decomposition
+
+int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
+                 double **host_uf2, double **host_uf3, double **host_uf4,
+                 double **offset, double *special_lj, const int nlocal,
+                 const int nall, const int max_nbors, const int maxspecial,
+                 const double cell_size, int &gpu_mode, FILE *screen);
+
+int ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
+                   double **host_uf2, double **host_uf3, double **host_uf4,
+		   double **offset);
+
+void ufml_gpu_clear();
+int ** ufml_gpu_compute_n(const int ago, const int inum,
+                         const int nall, double **host_x, int *host_type,
+                         double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag,
+                         const bool eatom, const bool vatom, int &host_start,
+                         int **ilist, int **jnum,
+                         const double cpu_time, bool &success);
+void ufml_gpu_compute(const int ago, const int inum, const int nall,
+                     double **host_x, int *host_type, int *ilist, int *numj,
+                     int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start,
+                     const double cpu_time, bool &success);
+double ufml_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairUFMGPU::PairUFMGPU(LAMMPS *lmp) : PairUFM(lmp), gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  cpu_time = 0.0;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairUFMGPU::~PairUFMGPU()
+{
+  ufml_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairUFMGPU::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+  if (gpu_mode != GPU_FORCE) {
+    inum = atom->nlocal;
+    firstneigh = ufml_gpu_compute_n(neighbor->ago, inum, nall,
+                                   atom->x, atom->type, domain->sublo,
+                                   domain->subhi, atom->tag, atom->nspecial,
+                                   atom->special, eflag, vflag, eflag_atom,
+                                   vflag_atom, host_start,
+                                   &ilist, &numneigh, cpu_time, success);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    ufml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
+                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
+                    vflag_atom, host_start, cpu_time, success);
+  }
+  if (!success)
+    error->one(FLERR,"Insufficient memory on accelerator");
+
+  if (host_start<inum) {
+    cpu_time = MPI_Wtime();
+    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
+    cpu_time = MPI_Wtime() - cpu_time;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairUFMGPU::init_style()
+{
+//  cut_respa = NULL;
+
+  if (force->newton_pair)
+    error->all(FLERR,"Cannot use newton pair with ufm/gpu pair style");
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double cut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        cut = init_one(i,j);
+        cut *= cut;
+        if (cut > maxcut)
+          maxcut = cut;
+        cutsq[i][j] = cutsq[j][i] = cut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  int maxspecial=0;
+  if (atom->molecular)
+    maxspecial=atom->maxspecial;
+  int success = ufml_gpu_init(atom->ntypes+1, cutsq, uf1, uf2, uf3, uf4,
+                             offset, force->special_lj, atom->nlocal,
+                             atom->nlocal+atom->nghost, 300, maxspecial,
+                             cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success,error,world);
+
+  if (gpu_mode == GPU_FORCE) {
+    int irequest = neighbor->request(this,instance_me);
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full = 1;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairUFMGPU::reinit()
+{
+  Pair::reinit();
+
+  ufml_gpu_reinit(atom->ntypes+1, cutsq, uf1, uf2, uf3, uf4, offset);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairUFMGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + ufml_gpu_bytes();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int vflag,
+                               int *ilist, int *numneigh, int **firstneigh) {
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,expuf,factor_lj;
+  int *jlist;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int *type = atom->type;
+  double *special_lj = force->special_lj;
+
+
+  // loop over neighbors of my atoms
+
+  for (ii = start; ii < inum; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	expuf = exp(- rsq * uf2[itype][jtype]);
+        fpair = factor_lj * uf1[itype][jtype] * expuf /(1.0 - expuf);
+
+	f[i][0] += delx*fpair;
+        f[i][1] += dely*fpair;
+        f[i][2] += delz*fpair;
+
+        if (eflag) {
+          evdwl = -factor_lj * uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype];
+        }
+
+        if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
+      }
+    }
+  }
+}
diff --git a/src/GPU/pair_ufm_gpu.h b/src/GPU/pair_ufm_gpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..59b883f3aa06023ef5f3c5ffc439576c4fb17ff0
--- /dev/null
+++ b/src/GPU/pair_ufm_gpu.h
@@ -0,0 +1,65 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing author:
+            Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com
+            Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
+ ------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(ufm/gpu,PairUFMGPU)
+
+#else
+
+#ifndef LMP_PAIR_UFM_GPU_H
+#define LMP_PAIR_UFM_GPU_H
+
+#include "pair_ufm.h"
+
+namespace LAMMPS_NS {
+
+class PairUFMGPU : public PairUFM {
+ public:
+  PairUFMGPU(LAMMPS *lmp);
+  ~PairUFMGPU();
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int);
+  void init_style();
+  void reinit();
+  double memory_usage();
+
+ enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+ private:
+  int gpu_mode;
+  double cpu_time;
+};
+
+}
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Insufficient memory on accelerator
+
+There is insufficient memory on one of the devices specified for the gpu
+package
+
+E: Cannot use newton pair with ufm/gpu pair style
+
+Self-explanatory.
+
+*/
diff --git a/src/OPT/Install.sh b/src/OPT/Install.sh
index ca1231c615c40a95164b90a6e79fbc5335461744..c6ae2b914b8c248052669cc0eea4f1e206d47a67 100644
--- a/src/OPT/Install.sh
+++ b/src/OPT/Install.sh
@@ -46,3 +46,5 @@ action pair_lj_long_coul_long_opt.cpp pair_lj_long_coul_long.cpp
 action pair_lj_long_coul_long_opt.h pair_lj_long_coul_long.cpp
 action pair_morse_opt.cpp
 action pair_morse_opt.h
+action pair_ufm_opt.cpp
+action pair_ufm_opt.h
diff --git a/src/OPT/pair_ufm_opt.cpp b/src/OPT/pair_ufm_opt.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f6f4c4ce3e7bc2a7a04a97db049c2a239e863288
--- /dev/null
+++ b/src/OPT/pair_ufm_opt.cpp
@@ -0,0 +1,198 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing author:
+            Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com
+            Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
+ ------------------------------------------------------------------------- */
+
+#include <stdlib.h>
+#include <math.h>
+#include "pair_ufm_opt.h"
+#include "atom.h"
+#include "force.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairUFMOpt::PairUFMOpt(LAMMPS *lmp) : PairUFM(lmp) {}
+
+/* ---------------------------------------------------------------------- */
+
+void PairUFMOpt::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  if (evflag) {
+    if (eflag) {
+      if (force->newton_pair) return eval<1,1,1>();
+      else return eval<1,1,0>();
+    } else {
+      if (force->newton_pair) return eval<1,0,1>();
+      else return eval<1,0,0>();
+    }
+  } else {
+    if (force->newton_pair) return eval<0,0,1>();
+    else return eval<0,0,0>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template < int EVFLAG, int EFLAG, int NEWTON_PAIR >
+void PairUFMOpt::eval()
+{
+  typedef struct { double x,y,z; } vec3_t;
+
+  typedef struct {
+    double cutsq,uf1,uf2,uf3,uf4,scale,offset;
+    double _pad[2];
+  } fast_alpha_t;
+
+  int i,j,ii,jj,inum,jnum,itype,jtype,sbindex;
+  double factor_lj;
+  double evdwl = 0.0;
+
+  double** _noalias x = atom->x;
+  double** _noalias f = atom->f;
+  int* _noalias type = atom->type;
+  int nlocal = atom->nlocal;
+  double* _noalias special_lj = force->special_lj;
+
+  inum = list->inum;
+  int* _noalias ilist = list->ilist;
+  int** _noalias firstneigh = list->firstneigh;
+  int* _noalias numneigh = list->numneigh;
+
+  vec3_t* _noalias xx = (vec3_t*)x[0];
+  vec3_t* _noalias ff = (vec3_t*)f[0];
+
+  int ntypes = atom->ntypes;
+  int ntypes2 = ntypes*ntypes;
+
+  fast_alpha_t* _noalias fast_alpha =
+    (fast_alpha_t*) malloc(ntypes2*sizeof(fast_alpha_t));
+  for (i = 0; i < ntypes; i++) for (j = 0; j < ntypes; j++) {
+    fast_alpha_t& a = fast_alpha[i*ntypes+j];
+    a.cutsq = cutsq[i+1][j+1];
+    a.uf1 = uf1[i+1][j+1];
+    a.uf2 = uf2[i+1][j+1];
+    a.uf3 = uf3[i+1][j+1];
+    a.uf4 = uf4[i+1][j+1];
+    a.scale = scale[i+1][j+1];
+    a.offset = offset[i+1][j+1];
+  }
+  fast_alpha_t* _noalias tabsix = fast_alpha;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    double xtmp = xx[i].x;
+    double ytmp = xx[i].y;
+    double ztmp = xx[i].z;
+    itype = type[i] - 1;
+    int* _noalias jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    double tmpfx = 0.0;
+    double tmpfy = 0.0;
+    double tmpfz = 0.0;
+
+    fast_alpha_t* _noalias tabsixi = (fast_alpha_t*)&tabsix[itype*ntypes];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      sbindex = sbmask(j);
+
+      if (sbindex == 0) {
+        double delx = xtmp - xx[j].x;
+        double dely = ytmp - xx[j].y;
+        double delz = ztmp - xx[j].z;
+        double rsq = delx*delx + dely*dely + delz*delz;
+
+        jtype = type[j] - 1;
+
+        fast_alpha_t& a = tabsixi[jtype];
+
+        if (rsq < a.cutsq) {
+          double expuf = exp(- rsq * a.uf2);
+          double fpair = a.scale * a.uf1 * expuf / (1.0 - expuf);
+
+          tmpfx += delx*fpair;
+          tmpfy += dely*fpair;
+          tmpfz += delz*fpair;
+          if (NEWTON_PAIR || j < nlocal) {
+            ff[j].x -= delx*fpair;
+            ff[j].y -= dely*fpair;
+            ff[j].z -= delz*fpair;
+          }
+
+          if (EFLAG) evdwl = - a.uf3 * log(1.0 - expuf) - a.offset;
+
+          if (EVFLAG)
+            ev_tally(i,j,nlocal,NEWTON_PAIR,
+                     evdwl,0.0,fpair,delx,dely,delz);
+        }
+
+      } else {
+        factor_lj = special_lj[sbindex];
+        j &= NEIGHMASK;
+
+        double delx = xtmp - xx[j].x;
+        double dely = ytmp - xx[j].y;
+        double delz = ztmp - xx[j].z;
+        double rsq = delx*delx + dely*dely + delz*delz;
+
+        int jtype1 = type[j];
+        jtype = jtype1 - 1;
+
+        fast_alpha_t& a = tabsixi[jtype];
+        if (rsq < a.cutsq) {
+          fast_alpha_t& a = tabsixi[jtype];
+          double expuf = exp(- rsq * a.uf2);
+          double fpair = a.scale * factor_lj * a.uf1 * expuf / (1.0 - expuf);
+
+          tmpfx += delx*fpair;
+          tmpfy += dely*fpair;
+          tmpfz += delz*fpair;
+          if (NEWTON_PAIR || j < nlocal) {
+            ff[j].x -= delx*fpair;
+            ff[j].y -= dely*fpair;
+            ff[j].z -= delz*fpair;
+          }
+
+          if (EFLAG) {
+            evdwl = - a.uf3 * log(1.0 - expuf) - a.offset;
+            evdwl *= factor_lj;
+          }
+
+          if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
+                               evdwl,0.0,fpair,delx,dely,delz);
+        }
+      }
+    }
+
+    ff[i].x += tmpfx;
+    ff[i].y += tmpfy;
+    ff[i].z += tmpfz;
+  }
+
+  free(fast_alpha); fast_alpha = 0;
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
diff --git a/src/OPT/pair_ufm_opt.h b/src/OPT/pair_ufm_opt.h
new file mode 100644
index 0000000000000000000000000000000000000000..edac708403ee3c8dfb906088d1196208cd93c1b4
--- /dev/null
+++ b/src/OPT/pair_ufm_opt.h
@@ -0,0 +1,45 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing author:
+            Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com
+            Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
+ ------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(ufm/opt,PairUFMOpt)
+
+#else
+
+#ifndef LMP_PAIR_UFM_OPT_H
+#define LMP_PAIR_UFM_OPT_H
+
+#include "pair_ufm.h"
+
+namespace LAMMPS_NS {
+
+class PairUFMOpt : public PairUFM {
+ public:
+  PairUFMOpt(class LAMMPS *);
+  void compute(int, int);
+
+ private:
+  template < int EVFLAG, int EFLAG, int NEWTON_PAIR > void eval();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_ufm_omp.cpp b/src/USER-OMP/pair_ufm_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b2e2cd29ee9026a9647bc825ef9dd63a1825dc79
--- /dev/null
+++ b/src/USER-OMP/pair_ufm_omp.cpp
@@ -0,0 +1,159 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing author:
+            Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com
+            Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
+ ------------------------------------------------------------------------- */
+
+#include <math.h>
+#include "pair_ufm_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include "suffix.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairUFMOMP::PairUFMOMP(LAMMPS *lmp) :
+  PairUFM(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  suffix_flag |= Suffix::OMP;
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairUFMOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+        else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+        else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    thr->timer(Timer::PAIR);
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairUFMOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,expuf,factor;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
+  dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int * _noalias const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * _noalias const special_lj = force->special_lj;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i].x;
+    ytmp = x[i].y;
+    ztmp = x[i].z;
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j].x;
+      dely = ytmp - x[j].y;
+      delz = ztmp - x[j].z;
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        expuf = exp(- rsq * uf2[itype][jtype]);
+        fpair = factor * scale[itype][jtype] * uf1[itype][jtype] * expuf /(1.0 - expuf);
+
+        fxtmp += delx*fpair;
+        fytmp += dely*fpair;
+        fztmp += delz*fpair;
+        if (NEWTON_PAIR || j < nlocal) {
+          f[j].x -= delx*fpair;
+          f[j].y -= dely*fpair;
+          f[j].z -= delz*fpair;
+        }
+
+        if (EFLAG) {
+          evdwl = -uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype];
+          evdwl *= factor;
+        }
+
+        if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+                                 evdwl,0.0,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i].x += fxtmp;
+    f[i].y += fytmp;
+    f[i].z += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairUFMOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairUFM::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_ufm_omp.h b/src/USER-OMP/pair_ufm_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..2a01da15d0cf920ce36bbab8af9d0b766a084b1c
--- /dev/null
+++ b/src/USER-OMP/pair_ufm_omp.h
@@ -0,0 +1,50 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing author:
+            Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com
+            Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
+ ------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(ufm/omp,PairUFMOMP)
+
+#else
+
+#ifndef LMP_PAIR_UFM_OMP_H
+#define LMP_PAIR_UFM_OMP_H
+
+#include "pair_ufm.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairUFMOMP : public PairUFM, public ThrOMP {
+
+ public:
+  PairUFMOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/pair_ufm.cpp b/src/pair_ufm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6462c0e79745f20c2f8f3fd1bae6adc9fa6f8168
--- /dev/null
+++ b/src/pair_ufm.cpp
@@ -0,0 +1,376 @@
+/* -*- c++ -*- ----------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+ 
+ Copyright (2003) Sandia Corporation.  Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software.  This software is distributed under
+ the GNU General Public License.
+ 
+ See the README file in the top-level LAMMPS directory.
+ ------------------------------------------------------------------------- */
+
+/* -----------------------------------------------------------------------
+ Contributing author:   
+            Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com
+            Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
+ ------------------------------------------------------------------------- */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pair_ufm.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "update.h"
+#include "integrate.h"
+#include "respa.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+/* ---------------------------------------------------------------------- */
+
+PairUFM::PairUFM(LAMMPS *lmp) : Pair(lmp)
+{
+  writedata = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairUFM::~PairUFM()
+{
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+    memory->destroy(cut);
+    memory->destroy(epsilon);
+    memory->destroy(sigma);
+    memory->destroy(scale);
+    memory->destroy(uf1);
+    memory->destroy(uf2);
+    memory->destroy(uf3);
+    memory->destroy(uf4);
+    memory->destroy(offset);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairUFM::compute(int eflag, int vflag)
+{
+  int i,j,ii,jj,inum,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq, expuf, factor;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_lj = force->special_lj;
+  int newton_pair = force->newton_pair;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        expuf = exp(- rsq * uf2[itype][jtype]);
+        fpair = factor * scale[itype][jtype] * uf1[itype][jtype] * expuf /(1.0 - expuf);
+
+        f[i][0] += delx*fpair;
+        f[i][1] += dely*fpair;
+        f[i][2] += delz*fpair;
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= delx*fpair;
+          f[j][1] -= dely*fpair;
+          f[j][2] -= delz*fpair;
+        }
+
+        if (eflag) {
+          evdwl = -uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype];
+            evdwl *= factor;
+        }
+
+        if (evflag) ev_tally(i,j,nlocal,newton_pair,
+                             evdwl,0.0,fpair,delx,dely,delz);
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairUFM::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  memory->create(setflag,n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  memory->create(cutsq,n+1,n+1,"pair:cutsq");
+
+  memory->create(cut,n+1,n+1,"pair:cut");
+  memory->create(epsilon,n+1,n+1,"pair:epsilon");
+  memory->create(sigma,n+1,n+1,"pair:sigma");
+  memory->create(scale,n+1,n+1,"pair:scale");
+  memory->create(uf1,n+1,n+1,"pair:uf1");
+  memory->create(uf2,n+1,n+1,"pair:uf2");
+  memory->create(uf3,n+1,n+1,"pair:uf3");
+  memory->create(uf4,n+1,n+1,"pair:uf4");
+  memory->create(offset,n+1,n+1,"pair:offset");
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairUFM::settings(int narg, char **arg)
+{
+  if (narg != 1) error->all(FLERR,"Illegal pair_style command");
+
+  cut_global = force->numeric(FLERR,arg[0]);
+
+  // reset cutoffs that have been explicitly set
+
+  if (allocated) {
+    int i,j;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i+1; j <= atom->ntypes; j++)
+        if (setflag[i][j]) cut[i][j] = cut_global;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairUFM::coeff(int narg, char **arg)
+{
+  if (narg < 4 || narg > 5)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi);
+  force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi);
+
+  double epsilon_one = force->numeric(FLERR,arg[2]);
+  double sigma_one = force->numeric(FLERR,arg[3]);
+
+  double cut_one = cut_global;
+
+  if (narg == 5) cut_one = force->numeric(FLERR,arg[4]);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      epsilon[i][j] = epsilon_one;
+      sigma[i][j] = sigma_one;
+      scale[i][j] = 1.0;
+      cut[i][j] = cut_one;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairUFM::init_one(int i, int j)
+{
+  if (setflag[i][j] == 0) {
+    epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
+                               sigma[i][i],sigma[j][j]);
+    sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
+    cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
+  }
+ 
+  uf1[i][j] = 2.0 * epsilon[i][j] / pow(sigma[i][j],2.0);
+  uf2[i][j] = 1.0 / pow(sigma[i][j],2.0);
+  uf3[i][j] = epsilon[i][j];
+  uf4[i][j] = sigma[i][j];
+
+  if (offset_flag) {
+    double ratio = pow(cut[i][j] / sigma[i][j],2.0);
+    offset[i][j] = - epsilon[i][j] * log ( 1.0 - exp( -ratio )) ;
+  } else offset[i][j] = 0.0;
+
+  uf1[j][i] = uf1[i][j];
+  uf2[j][i] = uf2[i][j];
+  uf3[j][i] = uf3[i][j];
+  uf4[j][i] = uf4[i][j];
+  scale[j][i] = scale[i][j];
+  offset[j][i] = offset[i][j];
+
+  return cut[i][j];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairUFM::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+
+  int i,j;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      fwrite(&setflag[i][j],sizeof(int),1,fp);
+      if (setflag[i][j]) {
+        fwrite(&epsilon[i][j],sizeof(double),1,fp);
+        fwrite(&sigma[i][j],sizeof(double),1,fp);
+        fwrite(&cut[i][j],sizeof(double),1,fp);
+         }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairUFM::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+  allocate();
+
+  int i,j;
+  int me = comm->me;
+  for (i = 1; i <= atom->ntypes; i++)
+    for (j = i; j <= atom->ntypes; j++) {
+      if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
+      MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
+      if (setflag[i][j]) {
+        if (me == 0) {
+          fread(&epsilon[i][j],sizeof(double),1,fp);
+          fread(&sigma[i][j],sizeof(double),1,fp);
+          fread(&cut[i][j],sizeof(double),1,fp);
+        }
+        MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
+        MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+void PairUFM::write_restart_settings(FILE *fp)
+{
+  fwrite(&cut_global,sizeof(double),1,fp);
+  fwrite(&offset_flag,sizeof(int),1,fp);
+  fwrite(&mix_flag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+void PairUFM::read_restart_settings(FILE *fp)
+{
+  int me = comm->me;
+  if (me == 0) {
+    fread(&cut_global,sizeof(double),1,fp);
+    fread(&offset_flag,sizeof(int),1,fp);
+    fread(&mix_flag,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
+  MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void PairUFM::write_data(FILE *fp)
+{
+  for (int i = 1; i <= atom->ntypes; i++)
+    fprintf(fp,"%d %g %g\n",i,epsilon[i][i],sigma[i][i]);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes all pairs to data file
+------------------------------------------------------------------------- */
+
+void PairUFM::write_data_all(FILE *fp)
+{
+  for (int i = 1; i <= atom->ntypes; i++)
+    for (int j = i; j <= atom->ntypes; j++)
+      fprintf(fp,"%d %d %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],cut[i][j]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairUFM::single(int i, int j, int itype, int jtype, double rsq,
+                         double factor_coul, double factor_lj,
+                         double &fforce)
+{
+  double expuf,phiuf;
+  expuf = exp(- rsq * uf2[itype][jtype]);
+  fforce = factor_lj * uf1[itype][jtype] * expuf /(1.0 - expuf);
+  phiuf = - uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype];
+  return factor_lj * phiuf;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void *PairUFM::extract(const char *str, int &dim)
+{
+  dim = 2;
+  if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
+  if (strcmp(str,"sigma") == 0) return (void *) sigma;
+  if (strcmp(str,"scale") == 0) return (void *) scale;
+  return NULL;
+}
diff --git a/src/pair_ufm.h b/src/pair_ufm.h
new file mode 100644
index 0000000000000000000000000000000000000000..2161c2acafa069adea48db1807e5e7659e6a6378
--- /dev/null
+++ b/src/pair_ufm.h
@@ -0,0 +1,76 @@
+/* -*- c++ -*- ----------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+ 
+ Copyright (2003) Sandia Corporation.  Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software.  This software is distributed under
+ the GNU General Public License.
+ 
+ See the README file in the top-level LAMMPS directory.
+ ------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing author:
+            Rodolfo Paula Leite (Unicamp/Brazil) - pl.rodolfo@gmail.com
+            Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br
+ ------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(ufm,PairUFM)
+
+#else
+
+#ifndef LMP_PAIR_UFM_H
+#define LMP_PAIR_UFM_H
+
+#include "pair.h"
+
+namespace LAMMPS_NS {
+
+class PairUFM : public Pair {
+ public:
+  PairUFM(class LAMMPS *);
+  virtual ~PairUFM();
+  virtual void compute(int, int);
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+  void write_data(FILE *);
+  void write_data_all(FILE *);
+  double single(int, int, int, int, double, double, double, double &);
+  void *extract(const char *, int &);
+
+ protected:
+  double cut_global;
+  double **cut,**scale;
+  double **epsilon,**sigma;
+  double **uf1,**uf2,**uf3,**uf4,**offset;
+
+  virtual void allocate();
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Illegal ... command
+
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
+
+E: Incorrect args for pair coefficients
+
+Self-explanatory.  Check the input script or data file.
+
+*/