diff --git a/examples/USER/scafacos/data.NaCl b/examples/USER/scafacos/data.NaCl
index 2d804dec9507672180ba6ce1b9b695116656cf6c..23807e23d5d7ba6b4e55dd6025783d5a008b9f59 100644
--- a/examples/USER/scafacos/data.NaCl
+++ b/examples/USER/scafacos/data.NaCl
@@ -1,25 +1,25 @@
- LAMMPS Description
- 
-          8 atoms
- 
-           2 atom types
- 
-   0.0000000000000000        2.000000000000000      xlo xhi
-   0.0000000000000000        2.000000000000000      ylo yhi
-   0.0000000000000000        2.000000000000000      zlo zhi
- 
- Masses
- 
-              1   22.98976928     
-              2   35.45
- 
- Atoms
- 
-    1    1   1.0    0.0    0.0     0.0
-    2    2   -1.0   1.0    0.0     0.0
-    3    2   -1.0   0.0    1.0     0.0
-    4    1   1.0    1.0    1.0     0.0
-    5    2   -1.0   0.0    0.0     1.0
-    6    1   1.0    1.0    0.0     1.0
-    7    1   1.0    0.0    1.0     1.0
-    8    2   -1.0   1.0    1.0     1.0
+LAMMPS Description
+
+8 atoms
+
+2 atom types
+
+0 2 xlo xhi 
+0 2 ylo yhi 
+0 2 zlo zhi 
+
+Masses
+
+1  22.98976928
+2  35.45
+
+Atoms
+
+1  2 1 0 0 0
+2  1 -1 1 0 0
+3  1 -1 0 1 0
+4  2 1 1 1 0
+5  1 -1 0 0 1
+6  2 1 1 0 1
+7  2 1 0 1 1
+8  1 -1 1 1 1
diff --git a/examples/USER/scafacos/data.cloud_wall b/examples/USER/scafacos/data.cloud_wall
new file mode 100644
index 0000000000000000000000000000000000000000..1592ec666a85ef7895b71f62fa9948535d426cd7
--- /dev/null
+++ b/examples/USER/scafacos/data.cloud_wall
@@ -0,0 +1,316 @@
+LAMMPS Description
+
+300 atoms
+
+1 atom types
+
+0 10 xlo xhi 
+0 10 ylo yhi 
+0 10 zlo zhi 
+
+Masses
+
+1  1.0
+
+Atoms
+
+1  1 1 0 0 4.5
+2  1 -1 0 0 5.5
+3  1 1 0 1 4.5
+4  1 -1 0 1 5.5
+5  1 1 0 2 4.5
+6  1 -1 0 2 5.5
+7  1 1 0 3 4.5
+8  1 -1 0 3 5.5
+9  1 1 0 4 4.5
+10  1 -1 0 4 5.5
+11  1 1 0 5 4.5
+12  1 -1 0 5 5.5
+13  1 1 0 6 4.5
+14  1 -1 0 6 5.5
+15  1 1 0 7 4.5
+16  1 -1 0 7 5.5
+17  1 1 0 8 4.5
+18  1 -1 0 8 5.5
+19  1 1 0 9 4.5
+20  1 -1 0 9 5.5
+21  1 1 1 0 4.5
+22  1 -1 1 0 5.5
+23  1 1 1 1 4.5
+24  1 -1 1 1 5.5
+25  1 1 1 2 4.5
+26  1 -1 1 2 5.5
+27  1 1 1 3 4.5
+28  1 -1 1 3 5.5
+29  1 1 1 4 4.5
+30  1 -1 1 4 5.5
+31  1 1 1 5 4.5
+32  1 -1 1 5 5.5
+33  1 1 1 6 4.5
+34  1 -1 1 6 5.5
+35  1 1 1 7 4.5
+36  1 -1 1 7 5.5
+37  1 1 1 8 4.5
+38  1 -1 1 8 5.5
+39  1 1 1 9 4.5
+40  1 -1 1 9 5.5
+41  1 1 2 0 4.5
+42  1 -1 2 0 5.5
+43  1 1 2 1 4.5
+44  1 -1 2 1 5.5
+45  1 1 2 2 4.5
+46  1 -1 2 2 5.5
+47  1 1 2 3 4.5
+48  1 -1 2 3 5.5
+49  1 1 2 4 4.5
+50  1 -1 2 4 5.5
+51  1 1 2 5 4.5
+52  1 -1 2 5 5.5
+53  1 1 2 6 4.5
+54  1 -1 2 6 5.5
+55  1 1 2 7 4.5
+56  1 -1 2 7 5.5
+57  1 1 2 8 4.5
+58  1 -1 2 8 5.5
+59  1 1 2 9 4.5
+60  1 -1 2 9 5.5
+61  1 1 3 0 4.5
+62  1 -1 3 0 5.5
+63  1 1 3 1 4.5
+64  1 -1 3 1 5.5
+65  1 1 3 2 4.5
+66  1 -1 3 2 5.5
+67  1 1 3 3 4.5
+68  1 -1 3 3 5.5
+69  1 1 3 4 4.5
+70  1 -1 3 4 5.5
+71  1 1 3 5 4.5
+72  1 -1 3 5 5.5
+73  1 1 3 6 4.5
+74  1 -1 3 6 5.5
+75  1 1 3 7 4.5
+76  1 -1 3 7 5.5
+77  1 1 3 8 4.5
+78  1 -1 3 8 5.5
+79  1 1 3 9 4.5
+80  1 -1 3 9 5.5
+81  1 1 4 0 4.5
+82  1 -1 4 0 5.5
+83  1 1 4 1 4.5
+84  1 -1 4 1 5.5
+85  1 1 4 2 4.5
+86  1 -1 4 2 5.5
+87  1 1 4 3 4.5
+88  1 -1 4 3 5.5
+89  1 1 4 4 4.5
+90  1 -1 4 4 5.5
+91  1 1 4 5 4.5
+92  1 -1 4 5 5.5
+93  1 1 4 6 4.5
+94  1 -1 4 6 5.5
+95  1 1 4 7 4.5
+96  1 -1 4 7 5.5
+97  1 1 4 8 4.5
+98  1 -1 4 8 5.5
+99  1 1 4 9 4.5
+100  1 -1 4 9 5.5
+101  1 1 5 0 4.5
+102  1 -1 5 0 5.5
+103  1 1 5 1 4.5
+104  1 -1 5 1 5.5
+105  1 1 5 2 4.5
+106  1 -1 5 2 5.5
+107  1 1 5 3 4.5
+108  1 -1 5 3 5.5
+109  1 1 5 4 4.5
+110  1 -1 5 4 5.5
+111  1 1 5 5 4.5
+112  1 -1 5 5 5.5
+113  1 1 5 6 4.5
+114  1 -1 5 6 5.5
+115  1 1 5 7 4.5
+116  1 -1 5 7 5.5
+117  1 1 5 8 4.5
+118  1 -1 5 8 5.5
+119  1 1 5 9 4.5
+120  1 -1 5 9 5.5
+121  1 1 6 0 4.5
+122  1 -1 6 0 5.5
+123  1 1 6 1 4.5
+124  1 -1 6 1 5.5
+125  1 1 6 2 4.5
+126  1 -1 6 2 5.5
+127  1 1 6 3 4.5
+128  1 -1 6 3 5.5
+129  1 1 6 4 4.5
+130  1 -1 6 4 5.5
+131  1 1 6 5 4.5
+132  1 -1 6 5 5.5
+133  1 1 6 6 4.5
+134  1 -1 6 6 5.5
+135  1 1 6 7 4.5
+136  1 -1 6 7 5.5
+137  1 1 6 8 4.5
+138  1 -1 6 8 5.5
+139  1 1 6 9 4.5
+140  1 -1 6 9 5.5
+141  1 1 7 0 4.5
+142  1 -1 7 0 5.5
+143  1 1 7 1 4.5
+144  1 -1 7 1 5.5
+145  1 1 7 2 4.5
+146  1 -1 7 2 5.5
+147  1 1 7 3 4.5
+148  1 -1 7 3 5.5
+149  1 1 7 4 4.5
+150  1 -1 7 4 5.5
+151  1 1 7 5 4.5
+152  1 -1 7 5 5.5
+153  1 1 7 6 4.5
+154  1 -1 7 6 5.5
+155  1 1 7 7 4.5
+156  1 -1 7 7 5.5
+157  1 1 7 8 4.5
+158  1 -1 7 8 5.5
+159  1 1 7 9 4.5
+160  1 -1 7 9 5.5
+161  1 1 8 0 4.5
+162  1 -1 8 0 5.5
+163  1 1 8 1 4.5
+164  1 -1 8 1 5.5
+165  1 1 8 2 4.5
+166  1 -1 8 2 5.5
+167  1 1 8 3 4.5
+168  1 -1 8 3 5.5
+169  1 1 8 4 4.5
+170  1 -1 8 4 5.5
+171  1 1 8 5 4.5
+172  1 -1 8 5 5.5
+173  1 1 8 6 4.5
+174  1 -1 8 6 5.5
+175  1 1 8 7 4.5
+176  1 -1 8 7 5.5
+177  1 1 8 8 4.5
+178  1 -1 8 8 5.5
+179  1 1 8 9 4.5
+180  1 -1 8 9 5.5
+181  1 1 9 0 4.5
+182  1 -1 9 0 5.5
+183  1 1 9 1 4.5
+184  1 -1 9 1 5.5
+185  1 1 9 2 4.5
+186  1 -1 9 2 5.5
+187  1 1 9 3 4.5
+188  1 -1 9 3 5.5
+189  1 1 9 4 4.5
+190  1 -1 9 4 5.5
+191  1 1 9 5 4.5
+192  1 -1 9 5 5.5
+193  1 1 9 6 4.5
+194  1 -1 9 6 5.5
+195  1 1 9 7 4.5
+196  1 -1 9 7 5.5
+197  1 1 9 8 4.5
+198  1 -1 9 8 5.5
+199  1 1 9 9 4.5
+200  1 -1 9 9 5.5
+201  1 -1 9.28495 2.13839 8.88019
+202  1 1 4.99281 4.17459 9.83905
+203  1 -1 4.91265 6.89408 2.39989
+204  1 1 4.43647 3.68895 8.86086
+205  1 -1 0.659075 7.07271 0.179131
+206  1 1 7.791 3.40021 0.969703
+207  1 -1 1.18008 3.63874 7.28751
+208  1 1 8.51522 5.24681 6.37702
+209  1 -1 4.24226 9.60726 3.16084
+210  1 1 8.43745 8.23344 9.2883
+211  1 -1 8.48509 8.84988 9.43407
+212  1 1 2.81127 8.9903 0.00909212
+213  1 -1 6.38283 6.20858 9.92482
+214  1 1 4.59962 5.7925 7.52571
+215  1 -1 7.03797 7.09336 8.15957
+216  1 1 6.68103 8.04734 7.95661
+217  1 -1 2.531 8.47145 1.6209
+218  1 1 6.71915 8.79876 9.59581
+219  1 -1 4.96758 0.0381298 0.827927
+220  1 1 9.22955 1.04572 0.84722
+221  1 -1 2.3224 2.57084 8.07306
+222  1 1 1.94283 3.17375 3.92051
+223  1 -1 2.34735 1.91295 1.29127
+224  1 1 3.33928 3.30688 0.892089
+225  1 -1 1.19738 4.40402 8.70835
+226  1 1 7.44541 4.94803 8.28211
+227  1 -1 5.93272 1.18886 1.56518
+228  1 1 8.50709 8.70343 1.24939
+229  1 -1 5.54016 3.38865 8.61698
+230  1 1 9.47644 0.573085 3.05941
+231  1 -1 9.39695 4.46542 1.84205
+232  1 1 3.52268 5.60212 0.333999
+233  1 -1 3.69009 9.40954 6.10446
+234  1 1 3.96836 6.15307 7.57803
+235  1 -1 2.02535 0.0418407 3.21642
+236  1 1 2.97488 8.79711 8.33242
+237  1 -1 2.4122 1.79458 3.04173
+238  1 1 9.72355 3.67773 1.52435
+239  1 -1 8.55216 6.1623 1.53201
+240  1 1 4.98973 2.41459 9.84381
+241  1 -1 8.8901 5.9006 1.97649
+242  1 1 9.09932 2.23783 1.42554
+243  1 -1 6.70722 8.21769 1.21953
+244  1 1 6.83768 0.84508 3.25165
+245  1 -1 0.222115 3.07945 0.51825
+246  1 1 0.503918 9.34932 6.25278
+247  1 -1 0.803159 8.7017 9.46211
+248  1 1 4.88636 5.00147 9.65639
+249  1 -1 1.62258 0.767285 9.63596
+250  1 1 2.70143 3.01111 7.74859
+251  1 -1 4.41574 5.31824 0.538729
+252  1 1 1.64724 5.18097 3.59205
+253  1 -1 2.33672 3.21408 6.6081
+254  1 1 7.46603 1.53668 9.09844
+255  1 -1 3.61269 8.44556 6.99789
+256  1 1 6.95465 6.83045 9.31002
+257  1 -1 5.91831 9.01549 3.4626
+258  1 1 6.56503 8.42229 3.27105
+259  1 -1 4.50822 9.59753 3.47025
+260  1 1 4.17357 5.27384 7.34774
+261  1 -1 7.70968 6.5292 3.54779
+262  1 1 4.7977 4.94239 6.24947
+263  1 -1 9.24016 9.36994 6.71263
+264  1 1 7.36888 8.75922 0.52403
+265  1 -1 9.92895 5.87551 6.21586
+266  1 1 3.86308 6.71601 9.69083
+267  1 -1 8.90048 0.298719 0.573852
+268  1 1 6.58753 6.67768 1.83984
+269  1 -1 8.672 0.367497 2.21864
+270  1 1 3.44519 3.30359 6.52249
+271  1 -1 7.24717 3.25113 3.41567
+272  1 1 9.53447 5.81336 1.79208
+273  1 -1 1.01722 6.42534 0.715
+274  1 1 3.58808 4.92392 7.00979
+275  1 -1 1.21399 3.56951 6.34505
+276  1 1 3.50336 0.942722 2.76989
+277  1 -1 9.45475 6.06299 0.659023
+278  1 1 3.44464 4.03075 6.20179
+279  1 -1 0.949331 5.40183 8.51385
+280  1 1 6.41118 2.62135 2.31132
+281  1 -1 3.58837 9.78355 7.04966
+282  1 1 9.2267 3.19593 2.10384
+283  1 -1 1.83092 2.35627 3.93061
+284  1 1 4.97203 4.92287 1.8049
+285  1 -1 7.4097 4.757 8.604
+286  1 1 0.746575 7.69038 0.89134
+287  1 -1 8.54862 6.59135 2.18888
+288  1 1 2.18747 4.82994 0.761718
+289  1 -1 5.71622 2.51116 6.85522
+290  1 1 6.95554 1.83187 8.31157
+291  1 -1 7.31818 6.60081 2.63208
+292  1 1 0.744495 2.73429 9.86022
+293  1 -1 5.1573 8.70962 2.53418
+294  1 1 2.40385 1.54057 1.9297
+295  1 -1 3.42609 2.25856 2.28437
+296  1 1 6.66173 3.70851 9.70052
+297  1 -1 7.88966 1.4343 8.91223
+298  1 1 3.91118 5.22253 6.29642
+299  1 -1 9.17618 3.98313 9.82158
+300  1 1 4.95424 5.93521 1.3652
diff --git a/examples/USER/scafacos/data.hammersley_shphere b/examples/USER/scafacos/data.hammersley_shphere
new file mode 100644
index 0000000000000000000000000000000000000000..e0d067adfb0e0846b70bdede187e1f6dd9a98b40
--- /dev/null
+++ b/examples/USER/scafacos/data.hammersley_shphere
@@ -0,0 +1,1016 @@
+LAMMPS Description
+
+1000 atoms
+
+1 atom types
+
+-50.5 51.5 xlo xhi 
+-50.5 51.5 ylo yhi 
+-50.5 51.5 zlo zhi 
+
+Masses
+
+1  1.0
+
+Atoms
+
+1  1 1 0.5 0.5 0
+2  1 1 0.468393 0.5 0.001
+3  1 1 0.5 0.544677 0.002
+4  1 -1 0.5 0.44531 0.003
+5  1 -1 0.544632 0.544632 0.004
+6  1 1 0.450125 0.450125 0.005
+7  1 1 0.445392 0.554608 0.006
+8  1 -1 0.558953 0.441047 0.007
+9  1 -1 0.582303 0.534091 0.008
+10  1 -1 0.412748 0.463859 0.009
+11  1 -1 0.461923 0.591925 0.01
+12  1 -1 0.539915 0.403637 0.011
+13  1 -1 0.541669 0.600597 0.012
+14  1 -1 0.456652 0.395348 0.013
+15  1 -1 0.391453 0.544962 0.014
+16  1 1 0.6123 0.453484 0.015
+17  1 1 0.623064 0.524479 0.016
+18  1 1 0.373213 0.47478 0.017
+19  1 1 0.474063 0.630397 0.018
+20  1 1 0.526635 0.366099 0.019
+21  1 1 0.57778 0.616406 0.02
+22  1 -1 0.42034 0.38078 0.021
+23  1 1 0.378037 0.581493 0.022
+24  1 -1 0.62464 0.416718 0.023
+25  1 1 0.627256 0.585029 0.024
+26  1 1 0.370187 0.413262 0.025
+27  1 1 0.411589 0.632316 0.026
+28  1 -1 0.590049 0.365233 0.027
+29  1 -1 0.532185 0.661803 0.028
+30  1 -1 0.467263 0.335418 0.029
+31  1 1 0.332691 0.53328 0.03
+32  1 -1 0.669987 0.466187 0.031
+33  1 -1 0.675153 0.517251 0.032
+34  1 1 0.322224 0.482491 0.033
+35  1 -1 0.482236 0.680356 0.034
+36  1 1 0.518014 0.317105 0.035
+37  1 -1 0.618181 0.644004 0.036
+38  1 1 0.380251 0.354085 0.037
+39  1 -1 0.352203 0.621294 0.038
+40  1 -1 0.649651 0.377185 0.039
+41  1 -1 0.672821 0.592375 0.04
+42  1 -1 0.325124 0.406527 0.041
+43  1 -1 0.405443 0.676904 0.042
+44  1 1 0.595626 0.321096 0.043
+45  1 -1 0.559536 0.696264 0.044
+46  1 1 0.439823 0.301622 0.045
+47  1 1 0.299535 0.56081 0.046
+48  1 -1 0.702526 0.438564 0.047
+49  1 1 0.704562 0.562053 0.048
+50  1 -1 0.293427 0.437337 0.049
+51  1 1 0.436734 0.70856 0.05
+52  1 -1 0.563862 0.289475 0.051
+53  1 -1 0.604663 0.69581 0.052
+54  1 1 0.394391 0.30242 0.053
+55  1 -1 0.30067 0.606544 0.054
+56  1 -1 0.701061 0.392531 0.055
+57  1 -1 0.677732 0.645861 0.056
+58  1 1 0.320783 0.352921 0.057
+59  1 1 0.351715 0.680686 0.058
+60  1 1 0.649479 0.31786 0.059
+61  1 -1 0.523278 0.736343 0.06
+62  1 1 0.476542 0.261822 0.061
+63  1 -1 0.260006 0.523637 0.062
+64  1 -1 0.741793 0.476185 0.063
+65  1 1 0.744458 0.512009 0.064
+66  1 1 0.253771 0.487904 0.065
+67  1 -1 0.487817 0.747983 0.066
+68  1 1 0.512268 0.250279 0.067
+69  1 -1 0.669062 0.686531 0.068
+70  1 1 0.329791 0.312203 0.069
+71  1 -1 0.310949 0.671346 0.07
+72  1 1 0.690295 0.327527 0.071
+73  1 1 0.73367 0.610518 0.072
+74  1 -1 0.264839 0.388777 0.073
+75  1 1 0.388079 0.736638 0.074
+76  1 -1 0.612614 0.261897 0.075
+77  1 -1 0.589275 0.749507 0.076
+78  1 1 0.410188 0.248992 0.077
+79  1 1 0.247505 0.590344 0.078
+80  1 -1 0.753971 0.409128 0.079
+81  1 1 0.763163 0.565919 0.08
+82  1 -1 0.235341 0.433706 0.081
+83  1 -1 0.433335 0.766142 0.082
+84  1 -1 0.567034 0.232386 0.083
+85  1 1 0.642606 0.737923 0.084
+86  1 -1 0.356626 0.260795 0.085
+87  1 -1 0.259524 0.644136 0.086
+88  1 -1 0.741738 0.355108 0.087
+89  1 -1 0.727545 0.668759 0.088
+90  1 -1 0.271292 0.330378 0.089
+91  1 1 0.329522 0.729863 0.09
+92  1 1 0.671329 0.26899 0.091
+93  1 -1 0.542409 0.785898 0.092
+94  1 -1 0.457385 0.212711 0.093
+95  1 -1 0.21133 0.54282 0.094
+96  1 -1 0.790041 0.456976 0.095
+97  1 -1 0.791403 0.543226 0.096
+98  1 1 0.207246 0.456574 0.097
+99  1 -1 0.456375 0.794097 0.098
+100  1 1 0.543823 0.204571 0.099
+101  1 -1 0.67871 0.740962 0.1
+102  1 -1 0.320499 0.25797 0.101
+103  1 -1 0.256911 0.680287 0.102
+104  1 1 0.744142 0.318932 0.103
+105  1 1 0.761831 0.656935 0.104
+106  1 -1 0.23706 0.3424 0.105
+107  1 -1 0.34174 0.764041 0.106
+108  1 1 0.658916 0.234865 0.107
+109  1 1 0.575416 0.801079 0.108
+110  1 1 0.424278 0.1977 0.109
+111  1 1 0.196487 0.576026 0.11
+112  1 -1 0.804718 0.423672 0.111
+113  1 1 0.796931 0.606244 0.112
+114  1 -1 0.201914 0.393343 0.113
+115  1 1 0.392933 0.799233 0.114
+116  1 -1 0.607475 0.199627 0.115
+117  1 -1 0.636914 0.78948 0.116
+118  1 1 0.362575 0.20944 0.117
+119  1 1 0.208366 0.637933 0.118
+120  1 1 0.792701 0.361563 0.119
+121  1 1 0.740781 0.718231 0.12
+122  1 -1 0.258356 0.280986 0.121
+123  1 1 0.280208 0.742503 0.122
+124  1 -1 0.720565 0.256644 0.123
+125  1 1 0.516172 0.829185 0.124
+126  1 1 0.483772 0.169679 0.125
+127  1 1 0.16855 0.516283 0.126
+128  1 -1 0.832572 0.483662 0.127
+129  1 1 0.833989 0.508199 0.128
+130  1 1 0.164901 0.491774 0.129
+131  1 1 0.491747 0.836202 0.13
+132  1 -1 0.50828 0.162701 0.131
+133  1 1 0.733403 0.745151 0.132
+134  1 -1 0.265849 0.254064 0.133
+135  1 1 0.253284 0.734894 0.134
+136  1 1 0.747492 0.264368 0.135
+137  1 -1 0.813381 0.638912 0.136
+138  1 -1 0.185651 0.360659 0.137
+139  1 1 0.360232 0.815311 0.138
+140  1 1 0.640192 0.183732 0.139
+141  1 1 0.624879 0.823736 0.14
+142  1 -1 0.374749 0.175298 0.141
+143  1 -1 0.174339 0.625621 0.142
+144  1 1 0.826615 0.374011 0.143
+145  1 1 0.842559 0.576924 0.144
+146  1 -1 0.156454 0.422854 0.145
+147  1 1 0.422634 0.844526 0.146
+148  1 1 0.577585 0.154498 0.147
+149  1 1 0.689978 0.800007 0.148
+150  1 1 0.309494 0.199157 0.149
+151  1 -1 0.198327 0.691032 0.15
+152  1 -1 0.802499 0.308445 0.151
+153  1 1 0.79353 0.706727 0.152
+154  1 1 0.20568 0.292716 0.153
+155  1 -1 0.292163 0.795106 0.154
+156  1 1 0.708388 0.204112 0.155
+157  1 1 0.562034 0.857513 0.156
+158  1 -1 0.437804 0.141555 0.157
+159  1 1 0.140629 0.562357 0.158
+160  1 1 0.860292 0.437483 0.159
+161  1 1 0.863849 0.544876 0.16
+162  1 1 0.135233 0.45501 0.161
+163  1 -1 0.454898 0.86568 0.162
+164  1 -1 0.545214 0.133412 0.163
+165  1 1 0.727805 0.791905 0.164
+166  1 -1 0.271638 0.207381 0.165
+167  1 -1 0.206672 0.728916 0.166
+168  1 1 0.794034 0.270533 0.167
+169  1 -1 0.825296 0.684278 0.168
+170  1 1 0.173933 0.315285 0.169
+171  1 1 0.314851 0.826833 0.17
+172  1 1 0.685581 0.172404 0.171
+173  1 1 0.600652 0.86371 0.172
+174  1 1 0.399117 0.135454 0.173
+175  1 -1 0.134623 0.601113 0.174
+176  1 1 0.866203 0.398658 0.175
+177  1 -1 0.861599 0.619456 0.176
+178  1 1 0.137595 0.380277 0.177
+179  1 -1 0.380013 0.863207 0.178
+180  1 1 0.620251 0.135996 0.179
+181  1 1 0.672735 0.843166 0.18
+182  1 1 0.326891 0.156092 0.181
+183  1 -1 0.155354 0.67348 0.182
+184  1 -1 0.84538 0.32615 0.183
+185  1 -1 0.793406 0.753094 0.184
+186  1 -1 0.205978 0.246375 0.185
+187  1 -1 0.245846 0.794635 0.186
+188  1 1 0.75468 0.204756 0.187
+189  1 1 0.528743 0.889654 0.188
+190  1 1 0.471199 0.109552 0.189
+191  1 1 0.108762 0.528859 0.19
+192  1 -1 0.892024 0.471083 0.191
+193  1 -1 0.892806 0.528975 0.192
+194  1 1 0.106416 0.470968 0.193
+195  1 1 0.47091 0.894357 0.194
+196  1 1 0.529146 0.104873 0.195
+197  1 1 0.759289 0.800588 0.196
+198  1 1 0.240212 0.198834 0.197
+199  1 -1 0.198258 0.760284 0.198
+200  1 -1 0.802314 0.239222 0.199
+201  1 -1 0.85729 0.679845 0.2
+202  1 -1 0.142042 0.319819 0.201
+203  1 1 0.319485 0.858623 0.202
+204  1 -1 0.680848 0.140716 0.203
+205  1 -1 0.626404 0.88263 0.204
+206  1 -1 0.373366 0.116674 0.205
+207  1 1 0.115982 0.626862 0.206
+208  1 -1 0.884707 0.37291 0.207
+209  1 -1 0.891174 0.608253 0.208
+210  1 -1 0.108134 0.391556 0.209
+211  1 -1 0.391366 0.892554 0.21
+212  1 -1 0.608824 0.106762 0.211
+213  1 1 0.70146 0.855626 0.212
+214  1 1 0.298194 0.143762 0.213
+215  1 -1 0.143154 0.702151 0.214
+216  1 1 0.857451 0.297506 0.215
+217  1 -1 0.824416 0.753177 0.216
+218  1 1 0.175042 0.2464 0.217
+219  1 -1 0.245979 0.825498 0.218
+220  1 -1 0.75444 0.173965 0.219
+221  1 -1 0.550708 0.911131 0.22
+222  1 1 0.449209 0.0881999 0.221
+223  1 1 0.0875343 0.550873 0.222
+224  1 -1 0.913128 0.449046 0.223
+225  1 1 0.910784 0.571278 0.224
+226  1 1 0.0885655 0.428609 0.225
+227  1 1 0.428497 0.912082 0.226
+228  1 1 0.571615 0.0872745 0.227
+229  1 -1 0.741576 0.843012 0.228
+230  1 1 0.258052 0.15646 0.229
+231  1 -1 0.155934 0.742319 0.23
+232  1 1 0.844589 0.257313 0.231
+233  1 1 0.85662 0.725827 0.232
+234  1 -1 0.142845 0.273834 0.233
+235  1 -1 0.273497 0.857687 0.234
+236  1 -1 0.726838 0.141783 0.235
+237  1 -1 0.593035 0.914305 0.236
+238  1 1 0.406829 0.0850902 0.237
+239  1 1 0.0844883 0.593306 0.238
+240  1 -1 0.91611 0.406559 0.239
+241  1 1 0.898465 0.653705 0.24
+242  1 1 0.100968 0.346076 0.241
+243  1 1 0.345859 0.899595 0.242
+244  1 1 0.654357 0.099844 0.243
+245  1 -1 0.674048 0.892646 0.244
+246  1 1 0.325711 0.10681 0.245
+247  1 -1 0.106269 0.674529 0.246
+248  1 -1 0.894268 0.325233 0.247
+249  1 -1 0.812767 0.797779 0.248
+250  1 1 0.186811 0.201819 0.249
+251  1 1 0.20142 0.813608 0.25
+252  1 1 0.798977 0.185975 0.251
+253  1 1 0.510655 0.93403 0.252
+254  1 -1 0.489331 0.0654 0.253
+255  1 -1 0.0648335 0.510683 0.254
+256  1 -1 0.93573 0.489303 0.255
+257  1 -1 0.936389 0.505356 0.256
+258  1 1 0.0630535 0.494638 0.257
+259  1 -1 0.494631 0.937501 0.258
+260  1 -1 0.505376 0.0619474 0.259
+261  1 1 0.806332 0.813944 0.26
+262  1 1 0.193287 0.185665 0.261
+263  1 -1 0.185277 0.807092 0.262
+264  1 1 0.815109 0.192531 0.263
+265  1 -1 0.905145 0.673671 0.264
+266  1 -1 0.0943645 0.326118 0.265
+267  1 -1 0.325909 0.906124 0.266
+268  1 1 0.674299 0.0933911 0.267
+269  1 -1 0.664463 0.911252 0.268
+270  1 1 0.335343 0.0882633 0.269
+271  1 -1 0.0877809 0.66485 0.27
+272  1 -1 0.912699 0.334958 0.271
+273  1 -1 0.935341 0.592162 0.272
+274  1 -1 0.0641588 0.407732 0.273
+275  1 -1 0.407626 0.936338 0.274
+276  1 -1 0.592478 0.0631673 0.275
+277  1 -1 0.743769 0.874701 0.276
+278  1 -1 0.255958 0.124881 0.277
+279  1 1 0.124464 0.744313 0.278
+280  1 -1 0.87595 0.255418 0.279
+281  1 1 0.87024 0.754013 0.28
+282  1 1 0.129357 0.245711 0.281
+283  1 -1 0.245436 0.871043 0.282
+284  1 1 0.754838 0.128558 0.283
+285  1 1 0.582539 0.943318 0.284
+286  1 -1 0.417373 0.056212 0.285
+287  1 -1 0.0557451 0.582714 0.286
+288  1 -1 0.944719 0.4172 0.287
+289  1 1 0.950072 0.549912 0.288
+290  1 1 0.049464 0.450037 0.289
+291  1 -1 0.449985 0.950997 0.29
+292  1 1 0.550066 0.0485441 0.291
+293  1 -1 0.784113 0.854987 0.292
+294  1 1 0.215602 0.144656 0.293
+295  1 1 0.144302 0.784681 0.294
+296  1 -1 0.85605 0.215037 0.295
+297  1 -1 0.899918 0.720113 0.296
+298  1 1 0.0996916 0.279673 0.297
+299  1 -1 0.279459 0.900696 0.298
+300  1 1 0.720753 0.0989177 0.299
+301  1 -1 0.627634 0.940125 0.3
+302  1 -1 0.372245 0.0594575 0.301
+303  1 -1 0.0590421 0.627875 0.302
+304  1 1 0.941371 0.372005 0.303
+305  1 -1 0.938504 0.638917 0.304
+306  1 -1 0.0610908 0.360954 0.305
+307  1 -1 0.360827 0.939312 0.306
+308  1 1 0.6393 0.0602882 0.307
+309  1 -1 0.712615 0.909794 0.308
+310  1 1 0.287194 0.0898385 0.309
+311  1 -1 0.0894727 0.712996 0.31
+312  1 -1 0.910891 0.286815 0.311
+313  1 1 0.85451 0.798293 0.312
+314  1 -1 0.145181 0.201446 0.313
+315  1 -1 0.201187 0.855127 0.314
+316  1 1 0.79907 0.144568 0.315
+317  1 -1 0.539888 0.963199 0.316
+318  1 1 0.460078 0.0364083 0.317
+319  1 -1 0.0360177 0.539956 0.318
+320  1 1 0.96437 0.460011 0.319
+321  1 -1 0.965598 0.528605 0.32
+322  1 -1 0.0340178 0.471372 0.321
+323  1 1 0.471348 0.966364 0.322
+324  1 -1 0.528675 0.0332573 0.323
+325  1 -1 0.810011 0.850596 0.324
+326  1 -1 0.189741 0.149123 0.325
+327  1 1 0.148844 0.810506 0.326
+328  1 1 0.851433 0.189249 0.327
+329  1 1 0.921914 0.705924 0.328
+330  1 -1 0.0777578 0.293916 0.329
+331  1 -1 0.293757 0.922568 0.33
+332  1 1 0.706401 0.077108 0.331
+333  1 -1 0.653199 0.945316 0.332
+334  1 -1 0.346686 0.054348 0.333
+335  1 -1 0.0540141 0.653429 0.334
+336  1 -1 0.946318 0.346457 0.335
+337  1 1 0.956741 0.620383 0.336
+338  1 1 0.0429248 0.379529 0.337
+339  1 1 0.379441 0.957407 0.338
+340  1 -1 0.620646 0.0422625 0.339
+341  1 1 0.738531 0.909272 0.34
+342  1 -1 0.2613 0.0904377 0.341
+343  1 -1 0.0901489 0.738868 0.342
+344  1 1 0.910138 0.260965 0.343
+345  1 -1 0.878055 0.787643 0.344
+346  1 -1 0.121684 0.212159 0.345
+347  1 -1 0.211963 0.878574 0.346
+348  1 1 0.788233 0.121169 0.347
+349  1 1 0.564106 0.972003 0.348
+350  1 -1 0.435852 0.0276823 0.349
+351  1 -1 0.0273696 0.564191 0.35
+352  1 -1 0.972941 0.435767 0.351
+353  1 -1 0.971529 0.57587 0.352
+354  1 -1 0.028166 0.424081 0.353
+355  1 1 0.424033 0.972137 0.354
+356  1 -1 0.576016 0.0275631 0.355
+357  1 -1 0.780489 0.888059 0.356
+358  1 1 0.219336 0.111698 0.357
+359  1 -1 0.111457 0.780839 0.358
+360  1 1 0.888782 0.218989 0.359
+361  1 1 0.90865 0.751803 0.36
+362  1 1 0.0911022 0.248045 0.361
+363  1 1 0.247893 0.909143 0.362
+364  1 -1 0.752257 0.0906132 0.363
+365  1 -1 0.611173 0.968129 0.364
+366  1 -1 0.388762 0.0315974 0.365
+367  1 -1 0.0313256 0.611303 0.366
+368  1 1 0.968944 0.388633 0.367
+369  1 1 0.952042 0.668029 0.368
+370  1 -1 0.0477021 0.331876 0.369
+371  1 1 0.331782 0.952551 0.37
+372  1 -1 0.668311 0.0471973 0.371
+373  1 -1 0.701276 0.939436 0.372
+374  1 1 0.298614 0.0603244 0.373
+375  1 1 0.0600867 0.701495 0.374
+376  1 -1 0.940149 0.298397 0.375
+377  1 -1 0.854883 0.82967 0.376
+378  1 -1 0.14493 0.170157 0.377
+379  1 -1 0.169985 0.855255 0.378
+380  1 -1 0.830185 0.144561 0.379
+381  1 -1 0.517866 0.985058 0.38
+382  1 1 0.482125 0.0146965 0.381
+383  1 1 0.0144527 0.517884 0.382
+384  1 -1 0.985789 0.482107 0.383
+385  1 1 0.986028 0.517901 0.384
+386  1 1 0.0137344 0.48209 0.385
+387  1 -1 0.482081 0.986501 0.386
+388  1 -1 0.517927 0.0132664 0.387
+389  1 1 0.831653 0.857018 0.388
+390  1 -1 0.168191 0.142814 0.389
+391  1 1 0.142648 0.831963 0.39
+392  1 -1 0.857517 0.167884 0.391
+393  1 1 0.943853 0.7033 0.392
+394  1 1 0.0559471 0.296609 0.393
+395  1 -1 0.296518 0.944251 0.394
+396  1 -1 0.703572 0.0555526 0.395
+397  1 1 0.670399 0.958419 0.396
+398  1 -1 0.329527 0.0413825 0.397
+399  1 -1 0.0411862 0.670546 0.398
+400  1 1 0.959008 0.329382 0.399
+401  1 -1 0.976641 0.613195 0.4
+402  1 -1 0.0231611 0.386758 0.401
+403  1 1 0.386712 0.977034 0.402
+404  1 1 0.613334 0.0227722 0.403
+405  1 1 0.757415 0.917758 0.404
+406  1 1 0.242483 0.0820765 0.405
+407  1 -1 0.0819126 0.757618 0.406
+408  1 -1 0.918249 0.242282 0.407
+409  1 1 0.89831 0.787898 0.408
+410  1 -1 0.101539 0.211993 0.409
+411  1 1 0.211885 0.89861 0.41
+412  1 1 0.788221 0.101243 0.411
+413  1 -1 0.578189 0.985945 0.412
+414  1 -1 0.421783 0.0138796 0.413
+415  1 -1 0.0137062 0.578245 0.414
+416  1 -1 0.986465 0.421727 0.415
+417  1 1 0.988409 0.566334 0.416
+418  1 -1 0.0114227 0.433643 0.417
+419  1 -1 0.433621 0.988743 0.418
+420  1 -1 0.566401 0.0110932 0.419
+421  1 -1 0.798855 0.892792 0.42
+422  1 -1 0.201047 0.10708 0.421
+423  1 1 0.106953 0.799049 0.422
+424  1 1 0.893172 0.200856 0.423
+425  1 1 0.926967 0.748844 0.424
+426  1 -1 0.0729011 0.251079 0.425
+427  1 -1 0.251003 0.927229 0.426
+428  1 -1 0.749071 0.0726425 0.427
+429  1 1 0.626105 0.978449 0.428
+430  1 -1 0.373858 0.0214111 0.429
+431  1 1 0.0212734 0.626178 0.43
+432  1 -1 0.978862 0.373786 0.431
+433  1 -1 0.968411 0.661144 0.432
+434  1 1 0.0314604 0.338812 0.433
+435  1 1 0.338768 0.968666 0.434
+436  1 -1 0.661275 0.0312086 0.435
+437  1 -1 0.717504 0.945641 0.436
+438  1 -1 0.28244 0.0542439 0.437
+439  1 -1 0.0541307 0.717616 0.438
+440  1 1 0.945981 0.28233 0.439
+441  1 1 0.871862 0.828815 0.44
+442  1 -1 0.128049 0.171106 0.441
+443  1 1 0.171028 0.87204 0.442
+444  1 1 0.829049 0.127874 0.443
+445  1 -1 0.530467 0.995919 0.444
+446  1 -1 0.469526 0.00396944 0.445
+447  1 -1 0.00385999 0.530481 0.446
+448  1 1 0.996247 0.469512 0.447
+449  1 -1 0.995455 0.542666 0.448
+450  1 1 0.00444188 0.457325 0.449
+451  1 1 0.457316 0.995659 0.45
+452  1 -1 0.542692 0.00424161 0.451
+453  1 1 0.820429 0.880817 0.452
+454  1 -1 0.17951 0.11911 0.453
+455  1 1 0.119039 0.820551 0.454
+456  1 1 0.881031 0.179391 0.455
+457  1 1 0.942098 0.729376 0.456
+458  1 1 0.0578245 0.270584 0.457
+459  1 -1 0.270544 0.942251 0.458
+460  1 -1 0.729494 0.0576748 0.459
+461  1 1 0.650519 0.975125 0.46
+462  1 1 0.349457 0.0247992 0.461
+463  1 -1 0.0247255 0.650566 0.462
+464  1 -1 0.975346 0.349411 0.463
+465  1 1 0.978969 0.638898 0.464
+466  1 1 0.0209627 0.361082 0.465
+467  1 1 0.361062 0.979104 0.466
+468  1 -1 0.638956 0.0208318 0.467
+469  1 -1 0.740598 0.937137 0.468
+470  1 1 0.259372 0.0628077 0.469
+471  1 -1 0.0627541 0.740658 0.47
+472  1 1 0.937298 0.259314 0.471
+473  1 -1 0.889756 0.811939 0.472
+474  1 1 0.110201 0.188026 0.473
+475  1 1 0.187993 0.88984 0.474
+476  1 1 0.812039 0.11012 0.475
+477  1 1 0.555048 0.996381 0.476
+478  1 1 0.444947 0.00357257 0.477
+479  1 1 0.0035278 0.555058 0.478
+480  1 -1 0.996515 0.444938 0.479
+481  1 1 0.991159 0.591447 0.48
+482  1 1 0.00880229 0.408546 0.481
+483  1 1 0.408539 0.991234 0.482
+484  1 -1 0.591467 0.00873146 0.483
+485  1 1 0.782721 0.912084 0.484
+486  1 1 0.217261 0.0878909 0.485
+487  1 -1 0.087867 0.782755 0.486
+488  1 1 0.912155 0.21723 0.487
+489  1 -1 0.918992 0.772584 0.488
+490  1 1 0.0809891 0.227403 0.489
+491  1 -1 0.227392 0.919029 0.49
+492  1 1 0.772618 0.0809555 0.491
+493  1 -1 0.603542 0.989096 0.492
+494  1 -1 0.396454 0.0108893 0.493
+495  1 -1 0.0108765 0.603548 0.494
+496  1 1 0.989134 0.396449 0.495
+497  1 -1 0.964238 0.685653 0.496
+498  1 1 0.0357553 0.314345 0.497
+499  1 1 0.314343 0.964249 0.498
+500  1 -1 0.685658 0.0357479 0.499
+501  1 1 0.696996 0.959557 0.5
+502  1 -1 0.303004 0.040444 0.501
+503  1 -1 0.0404468 0.696994 0.502
+504  1 -1 0.959549 0.303008 0.503
+505  1 -1 0.857854 0.849177 0.504
+506  1 1 0.142152 0.150829 0.505
+507  1 -1 0.150837 0.85784 0.506
+508  1 -1 0.849154 0.14217 0.507
+509  1 1 0.506135 0.999898 0.508
+510  1 1 0.493865 0.00011865 0.509
+511  1 1 0.000137652 0.506135 0.51
+512  1 -1 0.999841 0.493866 0.511
+513  1 -1 0.999847 0.503067 0.512
+514  1 1 0.000178438 0.496933 0.513
+515  1 1 0.496933 0.999795 0.514
+516  1 -1 0.503067 0.000234459 0.515
+517  1 -1 0.851197 0.855534 0.516
+518  1 1 0.148826 0.14449 0.517
+519  1 1 0.144514 0.85115 0.518
+520  1 1 0.855459 0.148876 0.519
+521  1 -1 0.960388 0.694017 0.52
+522  1 1 0.0396495 0.305999 0.521
+523  1 -1 0.306016 0.960311 0.522
+524  1 -1 0.693967 0.0397307 0.523
+525  1 -1 0.688286 0.962571 0.524
+526  1 1 0.311732 0.0374741 0.525
+527  1 -1 0.0375214 0.688249 0.526
+528  1 1 0.962429 0.311771 0.527
+529  1 -1 0.989016 0.600395 0.528
+530  1 -1 0.0110396 0.399617 0.529
+531  1 1 0.399629 0.988902 0.53
+532  1 -1 0.600359 0.0111574 0.531
+533  1 1 0.774665 0.916576 0.532
+534  1 1 0.225371 0.0834787 0.533
+535  1 -1 0.0835348 0.774592 0.534
+536  1 -1 0.916407 0.225446 0.535
+537  1 -1 0.912948 0.779603 0.536
+538  1 -1 0.0871126 0.220438 0.537
+539  1 1 0.22048 0.912825 0.538
+540  1 1 0.779477 0.0872389 0.539
+541  1 -1 0.594231 0.989408 0.54
+542  1 1 0.405784 0.0106715 0.541
+543  1 -1 0.0107533 0.5942 0.542
+544  1 -1 0.989163 0.405816 0.543
+545  1 1 0.995353 0.551859 0.544
+546  1 1 0.0047358 0.44815 0.545
+547  1 -1 0.44816 0.995173 0.546
+548  1 -1 0.55183 0.00491958 0.547
+549  1 -1 0.813365 0.88665 0.548
+550  1 1 0.186696 0.113426 0.549
+551  1 1 0.113503 0.813241 0.55
+552  1 -1 0.886418 0.186823 0.551
+553  1 1 0.937123 0.737107 0.552
+554  1 -1 0.06297 0.262943 0.553
+555  1 1 0.262995 0.936935 0.554
+556  1 1 0.736953 0.063161 0.555
+557  1 -1 0.641309 0.976336 0.556
+558  1 1 0.358723 0.0237733 0.557
+559  1 1 0.0238843 0.641244 0.558
+560  1 1 0.976003 0.35879 0.559
+561  1 -1 0.97412 0.647005 0.56
+562  1 1 0.0259968 0.353031 0.561
+563  1 -1 0.353067 0.973885 0.562
+564  1 -1 0.646895 0.0262355 0.563
+565  1 1 0.731072 0.938759 0.564
+566  1 -1 0.268989 0.0613556 0.565
+567  1 1 0.0614726 0.73095 0.566
+568  1 1 0.938409 0.269113 0.567
+569  1 -1 0.880979 0.816593 0.568
+570  1 1 0.119128 0.183495 0.569
+571  1 1 0.183585 0.880764 0.57
+572  1 1 0.816324 0.119345 0.571
+573  1 -1 0.545476 0.992695 0.572
+574  1 1 0.454538 0.00745132 0.573
+575  1 1 0.00759931 0.545448 0.574
+576  1 -1 0.992251 0.454565 0.575
+577  1 -1 0.993437 0.527277 0.576
+578  1 1 0.00671769 0.472732 0.577
+579  1 1 0.47274 0.993126 0.578
+580  1 1 0.527251 0.00703308 0.579
+581  1 -1 0.829203 0.86773 0.58
+582  1 1 0.170905 0.132392 0.581
+583  1 -1 0.132515 0.828984 0.582
+584  1 1 0.86736 0.171127 0.583
+585  1 1 0.944269 0.713469 0.584
+586  1 -1 0.0558856 0.286605 0.585
+587  1 -1 0.28668 0.943958 0.586
+588  1 1 0.713244 0.0562003 0.587
+589  1 1 0.662969 0.964432 0.588
+590  1 -1 0.337091 0.0357376 0.589
+591  1 1 0.0359093 0.662849 0.59
+592  1 -1 0.963917 0.337212 0.591
+593  1 1 0.975993 0.622339 0.592
+594  1 -1 0.0241894 0.377708 0.593
+595  1 -1 0.377756 0.975626 0.594
+596  1 -1 0.622197 0.0245602 0.595
+597  1 -1 0.749682 0.922425 0.596
+598  1 1 0.250418 0.0777441 0.597
+599  1 -1 0.0779153 0.74948 0.598
+600  1 1 0.921912 0.250622 0.599
+601  1 1 0.891692 0.794241 0.6
+602  1 1 0.108472 0.205882 0.601
+603  1 -1 0.206007 0.891362 0.602
+604  1 1 0.793867 0.108806 0.603
+605  1 1 0.568791 0.984202 0.604
+606  1 1 0.431239 0.0160094 0.605
+607  1 1 0.0162231 0.568731 0.606
+608  1 1 0.983561 0.4313 0.607
+609  1 1 0.982464 0.574595 0.608
+610  1 1 0.0177556 0.425439 0.609
+611  1 -1 0.425473 0.982023 0.61
+612  1 1 0.574492 0.0182014 0.611
+613  1 -1 0.787874 0.893172 0.612
+614  1 1 0.212262 0.107014 0.613
+615  1 -1 0.107202 0.7876 0.614
+616  1 1 0.892608 0.212539 0.615
+617  1 -1 0.915621 0.752593 0.616
+618  1 -1 0.0845837 0.247532 0.617
+619  1 1 0.247657 0.91521 0.618
+620  1 -1 0.752216 0.0849988 0.619
+621  1 1 0.615048 0.971555 0.62
+622  1 -1 0.385011 0.0286864 0.621
+623  1 -1 0.0289298 0.61493 0.622
+624  1 1 0.970825 0.38513 0.623
+625  1 -1 0.955055 0.665978 0.624
+626  1 1 0.0451863 0.33411 0.625
+627  1 -1 0.334199 0.95457 0.626
+628  1 1 0.665711 0.0456755 0.627
+629  1 1 0.703969 0.938193 0.628
+630  1 -1 0.296143 0.0620486 0.629
+631  1 1 0.0622917 0.703744 0.63
+632  1 1 0.937463 0.296371 0.631
+633  1 -1 0.855338 0.826053 0.632
+634  1 -1 0.144864 0.174132 0.633
+635  1 -1 0.17432 0.854932 0.634
+636  1 -1 0.825491 0.145274 0.635
+637  1 -1 0.52066 0.980705 0.636
+638  1 -1 0.479353 0.0195786 0.637
+639  1 -1 0.0198644 0.520635 0.638
+640  1 -1 0.979848 0.479377 0.639
+641  1 1 0.979774 0.514724 0.64
+642  1 1 0.0205185 0.485285 0.641
+643  1 -1 0.485294 0.979187 0.642
+644  1 -1 0.514697 0.0211107 0.643
+645  1 1 0.828028 0.8488 0.644
+646  1 -1 0.172178 0.15142 0.645
+647  1 1 0.151642 0.827613 0.646
+648  1 -1 0.848135 0.172597 0.647
+649  1 1 0.935425 0.696216 0.648
+650  1 1 0.0648583 0.303912 0.649
+651  1 -1 0.30404 0.934856 0.65
+652  1 -1 0.69583 0.0654317 0.651
+653  1 1 0.668701 0.945462 0.652
+654  1 -1 0.331413 0.0548379 0.653
+655  1 1 0.0551396 0.668473 0.654
+656  1 -1 0.944557 0.331642 0.655
+657  1 1 0.962851 0.606924 0.656
+658  1 1 0.0374699 0.39315 0.657
+659  1 1 0.393225 0.962207 0.658
+660  1 1 0.6067 0.0381189 0.659
+661  1 1 0.750973 0.901762 0.66
+662  1 -1 0.249207 0.0985254 0.661
+663  1 -1 0.0988149 0.750612 0.662
+664  1 -1 0.900894 0.24957 0.663
+665  1 1 0.884501 0.774341 0.664
+666  1 1 0.115783 0.225861 0.665
+667  1 -1 0.226065 0.883932 0.666
+668  1 1 0.77373 0.116356 0.667
+669  1 -1 0.577663 0.964483 0.668
+670  1 -1 0.422396 0.03587 0.669
+671  1 1 0.0362254 0.577544 0.67
+672  1 1 0.963417 0.422516 0.671
+673  1 -1 0.965593 0.560328 0.672
+674  1 -1 0.0347718 0.439719 0.673
+675  1 1 0.439767 0.964861 0.674
+676  1 -1 0.560185 0.0355081 0.675
+677  1 -1 0.785659 0.870706 0.676
+678  1 1 0.214571 0.129593 0.677
+679  1 1 0.129894 0.785197 0.678
+680  1 -1 0.869804 0.215036 0.679
+681  1 -1 0.904456 0.732411 0.68
+682  1 1 0.0958792 0.267782 0.681
+683  1 -1 0.267976 0.903783 0.682
+684  1 -1 0.731829 0.0965569 0.683
+685  1 -1 0.621247 0.948824 0.684
+686  1 1 0.378857 0.051559 0.685
+687  1 -1 0.0519446 0.621039 0.686
+688  1 -1 0.947667 0.379066 0.687
+689  1 -1 0.939026 0.648028 0.688
+690  1 -1 0.06136 0.352102 0.689
+691  1 -1 0.352232 0.938252 0.69
+692  1 1 0.647636 0.0621385 0.691
+693  1 1 0.705036 0.913638 0.692
+694  1 1 0.295149 0.0867361 0.693
+695  1 -1 0.0871122 0.704665 0.694
+696  1 -1 0.91251 0.295523 0.695
+697  1 1 0.846453 0.80258 0.696
+698  1 -1 0.153869 0.197702 0.697
+699  1 1 0.197984 0.845807 0.698
+700  1 1 0.801731 0.154519 0.699
+701  1 1 0.530907 0.957214 0.7
+702  1 -1 0.469123 0.0432226 0.701
+703  1 1 0.0436619 0.530847 0.702
+704  1 1 0.955896 0.469182 0.703
+705  1 -1 0.955039 0.536374 0.704
+706  1 -1 0.0454073 0.463661 0.705
+707  1 1 0.463697 0.954143 0.706
+708  1 1 0.536267 0.0463088 0.707
+709  1 -1 0.794868 0.846105 0.708
+710  1 1 0.20543 0.154244 0.709
+711  1 -1 0.154595 0.794271 0.71
+712  1 1 0.845051 0.20603 0.711
+713  1 1 0.903223 0.706076 0.712
+714  1 1 0.0971952 0.294138 0.713
+715  1 1 0.294353 0.902384 0.714
+716  1 1 0.705431 0.0980386 0.715
+717  1 -1 0.638821 0.929037 0.716
+718  1 -1 0.361327 0.07142 0.717
+719  1 -1 0.0718796 0.638524 0.718
+720  1 1 0.927658 0.361625 0.719
+721  1 1 0.931991 0.622407 0.72
+722  1 1 0.0684813 0.377727 0.721
+723  1 -1 0.377862 0.931043 0.722
+724  1 -1 0.622003 0.0694348 0.723
+725  1 -1 0.717943 0.890288 0.724
+726  1 1 0.282302 0.110151 0.725
+727  1 1 0.110592 0.717452 0.726
+728  1 -1 0.888965 0.282796 0.727
+729  1 -1 0.84912 0.775919 0.728
+730  1 -1 0.151283 0.224399 0.729
+731  1 1 0.22472 0.848311 0.73
+732  1 1 0.774958 0.152096 0.731
+733  1 -1 0.55152 0.939911 0.732
+734  1 -1 0.448542 0.0606106 0.733
+735  1 1 0.0611352 0.551397 0.734
+736  1 1 0.938337 0.448665 0.735
+737  1 1 0.933839 0.578023 0.736
+738  1 1 0.0666893 0.422072 0.737
+739  1 1 0.422167 0.932779 0.738
+740  1 -1 0.577737 0.0677547 0.739
+741  1 1 0.750364 0.860164 0.74
+742  1 -1 0.249949 0.140287 0.741
+743  1 -1 0.14074 0.749736 0.742
+744  1 1 0.858805 0.250581 0.743
+745  1 1 0.867273 0.735743 0.744
+746  1 -1 0.133199 0.26456 0.745
+747  1 -1 0.264865 0.866327 0.746
+748  1 -1 0.734829 0.13415 0.747
+749  1 -1 0.592524 0.924188 0.748
+750  1 1 0.407598 0.0763718 0.749
+751  1 1 0.0769344 0.592279 0.75
+752  1 1 0.9225 0.407844 0.751
+753  1 -1 0.901953 0.657891 0.752
+754  1 1 0.0985912 0.342323 0.753
+755  1 -1 0.342538 0.900861 0.754
+756  1 -1 0.657245 0.099689 0.755
+757  1 -1 0.671636 0.893707 0.756
+758  1 1 0.328603 0.106841 0.757
+759  1 1 0.107392 0.671157 0.758
+760  1 -1 0.892055 0.329085 0.759
+761  1 1 0.807501 0.796384 0.76
+762  1 1 0.192939 0.20404 0.761
+763  1 1 0.204466 0.80662 0.762
+764  1 -1 0.795106 0.193824 0.763
+765  1 -1 0.507816 0.92455 0.764
+766  1 -1 0.492196 0.076073 0.765
+767  1 -1 0.0766996 0.507793 0.766
+768  1 -1 0.922671 0.492219 0.767
+769  1 1 0.922037 0.50777 0.768
+770  1 -1 0.078599 0.492242 0.769
+771  1 1 0.492254 0.920761 0.77
+772  1 1 0.507734 0.0798819 0.771
+773  1 -1 0.791151 0.802072 0.772
+774  1 1 0.2093 0.198396 0.773
+775  1 1 0.198867 0.790246 0.774
+776  1 1 0.80066 0.21021 0.775
+777  1 -1 0.882184 0.666612 0.776
+778  1 1 0.118425 0.333653 0.777
+779  1 1 0.33392 0.880964 0.778
+780  1 -1 0.665812 0.119651 0.779
+781  1 -1 0.651454 0.885567 0.78
+782  1 1 0.348794 0.115064 0.781
+783  1 -1 0.115698 0.650957 0.782
+784  1 -1 0.883665 0.349293 0.783
+785  1 1 0.902061 0.587698 0.784
+786  1 -1 0.098615 0.41245 0.785
+787  1 -1 0.412598 0.900705 0.786
+788  1 -1 0.587253 0.0999777 0.787
+789  1 1 0.720782 0.843964 0.788
+790  1 -1 0.2796 0.15663 0.789
+791  1 1 0.157228 0.720016 0.79
+792  1 -1 0.842172 0.280369 0.791
+793  1 1 0.833266 0.731667 0.792
+794  1 1 0.167326 0.268745 0.793
+795  1 1 0.269159 0.832079 0.794
+796  1 1 0.730425 0.16852 0.795
+797  1 1 0.571327 0.896606 0.796
+798  1 1 0.428803 0.104119 0.797
+799  1 1 0.104847 0.571066 0.798
+800  1 1 0.89442 0.429066 0.799
+801  1 1 0.897285 0.546527 0.8
+802  1 -1 0.103462 0.45356 0.801
+803  1 -1 0.453648 0.895787 0.802
+804  1 1 0.546264 0.104968 0.803
+805  1 1 0.746143 0.811444 0.804
+806  1 -1 0.254333 0.189158 0.805
+807  1 1 0.189764 0.745189 0.806
+808  1 1 0.809628 0.255293 0.807
+809  1 1 0.843889 0.692033 0.808
+810  1 1 0.156796 0.308349 0.809
+811  1 1 0.308734 0.842516 0.81
+812  1 1 0.69088 0.158176 0.811
+813  1 -1 0.606516 0.875913 0.812
+814  1 -1 0.393702 0.124858 0.813
+815  1 1 0.125632 0.606079 0.814
+816  1 1 0.873589 0.394142 0.815
+817  1 1 0.868666 0.619287 0.816
+818  1 -1 0.132112 0.380965 0.817
+819  1 1 0.381218 0.867106 0.818
+820  1 -1 0.618528 0.13368 0.819
+821  1 1 0.674837 0.842099 0.82
+822  1 1 0.325543 0.158644 0.821
+823  1 1 0.159392 0.674075 0.822
+824  1 -1 0.839856 0.326309 0.823
+825  1 -1 0.789881 0.746967 0.824
+826  1 1 0.210768 0.253586 0.825
+827  1 -1 0.254142 0.788579 0.826
+828  1 -1 0.745299 0.212077 0.827
+829  1 1 0.530071 0.87618 0.828
+830  1 1 0.469999 0.124688 0.829
+831  1 1 0.125562 0.529931 0.83
+832  1 1 0.87356 0.470139 0.831
+833  1 1 0.873015 0.525215 0.832
+834  1 -1 0.127873 0.474845 0.833
+835  1 -1 0.474905 0.871233 0.834
+836  1 1 0.525034 0.129665 0.835
+837  1 1 0.74357 0.778887 0.836
+838  1 1 0.257029 0.221799 0.837
+839  1 1 0.222488 0.742369 0.838
+840  1 -1 0.776819 0.258236 0.839
+841  1 -1 0.828467 0.662818 0.84
+842  1 1 0.172367 0.337595 0.841
+843  1 1 0.33801 0.826796 0.842
+844  1 -1 0.661572 0.174047 0.843
+845  1 -1 0.615933 0.843836 0.844
+846  1 -1 0.384371 0.157064 0.845
+847  1 -1 0.15797 0.615324 0.846
+848  1 -1 0.841119 0.384983 0.847
+849  1 -1 0.846597 0.593631 0.848
+850  1 1 0.154342 0.406623 0.849
+851  1 1 0.406878 0.844715 0.85
+852  1 -1 0.592866 0.156234 0.851
+853  1 1 0.676921 0.807888 0.852
+854  1 -1 0.323575 0.192974 0.853
+855  1 -1 0.193841 0.675927 0.854
+856  1 1 0.805288 0.324574 0.855
+857  1 1 0.7781 0.714299 0.856
+858  1 1 0.222705 0.286322 0.857
+859  1 -1 0.286946 0.776485 0.858
+860  1 1 0.712426 0.22433 0.859
+861  1 1 0.544587 0.84411 0.86
+862  1 -1 0.455547 0.156921 0.861
+863  1 1 0.157959 0.544319 0.862
+864  1 1 0.840997 0.455816 0.863
+865  1 1 0.838095 0.55653 0.864
+866  1 1 0.162955 0.443645 0.865
+867  1 1 0.443822 0.835988 0.866
+868  1 -1 0.556 0.165075 0.867
+869  1 -1 0.6966 0.775544 0.868
+870  1 -1 0.304033 0.225344 0.869
+871  1 -1 0.226237 0.69533 0.87
+872  1 -1 0.772865 0.305311 0.871
+873  1 1 0.783348 0.677002 0.872
+874  1 1 0.217599 0.32359 0.873
+875  1 -1 0.324185 0.781448 0.874
+876  1 -1 0.675216 0.219511 0.875
+877  1 -1 0.574184 0.821124 0.876
+878  1 -1 0.426074 0.179991 0.877
+879  1 -1 0.181113 0.573667 0.878
+880  1 -1 0.817759 0.426594 0.879
+881  1 1 0.803899 0.61509 0.88
+882  1 1 0.197198 0.385326 0.881
+883  1 1 0.385744 0.801698 0.882
+884  1 -1 0.613835 0.199413 0.883
+885  1 -1 0.631562 0.791951 0.884
+886  1 1 0.368932 0.209146 0.885
+887  1 1 0.21025 0.630571 0.886
+888  1 -1 0.78864 0.36993 0.887
+889  1 -1 0.729733 0.716052 0.888
+890  1 1 0.271166 0.284793 0.889
+891  1 -1 0.285644 0.727929 0.89
+892  1 1 0.713499 0.272982 0.891
+893  1 1 0.509521 0.810234 0.892
+894  1 -1 0.490518 0.191032 0.893
+895  1 -1 0.192307 0.509443 0.894
+896  1 -1 0.806409 0.490597 0.895
+897  1 1 0.804979 0.513107 0.896
+898  1 1 0.196321 0.486949 0.897
+899  1 -1 0.487005 0.802369 0.898
+900  1 -1 0.512939 0.198949 0.899
+901  1 -1 0.702828 0.721045 0.9
+902  1 -1 0.298077 0.279941 0.901
+903  1 1 0.280934 0.701012 0.902
+904  1 -1 0.718067 0.299905 0.903
+905  1 1 0.767075 0.624318 0.904
+906  1 1 0.234173 0.376263 0.905
+907  1 -1 0.376848 0.764571 0.906
+908  1 -1 0.622563 0.236695 0.907
+909  1 1 0.599038 0.771528 0.908
+910  1 1 0.401448 0.229803 0.909
+911  1 -1 0.231144 0.598063 0.91
+912  1 -1 0.767505 0.40243 0.911
+913  1 -1 0.775222 0.567148 0.912
+914  1 -1 0.226196 0.433198 0.913
+915  1 -1 0.433547 0.772375 0.914
+916  1 -1 0.566102 0.229065 0.915
+917  1 1 0.644063 0.737044 0.916
+918  1 1 0.356719 0.264243 0.917
+919  1 -1 0.265539 0.642493 0.918
+920  1 1 0.733153 0.358301 0.919
+921  1 1 0.718892 0.660269 0.92
+922  1 1 0.282362 0.340649 0.921
+923  1 1 0.341575 0.716374 0.922
+924  1 1 0.657491 0.284901 0.923
+925  1 1 0.540491 0.761886 0.924
+926  1 1 0.459755 0.239702 0.925
+927  1 1 0.241303 0.539998 0.926
+928  1 -1 0.757082 0.460252 0.927
+929  1 1 0.755918 0.536358 0.928
+930  1 1 0.245728 0.463875 0.929
+931  1 -1 0.464111 0.75261 0.93
+932  1 -1 0.53565 0.249066 0.931
+933  1 -1 0.651203 0.70128 0.932
+934  1 -1 0.349833 0.300098 0.933
+935  1 -1 0.301489 0.649122 0.934
+936  1 1 0.697107 0.351933 0.935
+937  1 1 0.7107 0.624538 0.936
+938  1 1 0.290841 0.376373 0.937
+939  1 1 0.377293 0.707603 0.938
+940  1 -1 0.621778 0.293968 0.939
+941  1 1 0.559117 0.730011 0.94
+942  1 1 0.441347 0.271792 0.941
+943  1 1 0.273614 0.558185 0.942
+944  1 -1 0.724545 0.442288 0.943
+945  1 1 0.716953 0.576129 0.944
+946  1 1 0.284879 0.424514 0.945
+947  1 1 0.425164 0.713269 0.946
+948  1 1 0.574179 0.288603 0.947
+949  1 1 0.596159 0.700124 0.948
+950  1 1 0.40472 0.301705 0.949
+951  1 -1 0.303556 0.594391 0.95
+952  1 1 0.694572 0.406509 0.951
+953  1 1 0.659268 0.642582 0.952
+954  1 1 0.342317 0.358837 0.953
+955  1 -1 0.360273 0.656078 0.954
+956  1 -1 0.638272 0.345546 0.955
+957  1 1 0.51132 0.704782 0.956
+958  1 -1 0.488803 0.297452 0.957
+959  1 -1 0.299717 0.511072 0.958
+960  1 1 0.697988 0.489055 0.959
+961  1 1 0.69513 0.51801 0.96
+962  1 -1 0.307224 0.482207 0.961
+963  1 1 0.482427 0.690387 0.962
+964  1 -1 0.517349 0.312037 0.963
+965  1 1 0.619063 0.643276 0.964
+966  1 -1 0.382542 0.358654 0.965
+967  1 -1 0.360616 0.615828 0.966
+968  1 1 0.63739 0.385829 0.967
+969  1 1 0.655724 0.582012 0.968
+970  1 1 0.346649 0.419238 0.969
+971  1 -1 0.42051 0.650935 0.97
+972  1 1 0.578194 0.351525 0.971
+973  1 1 0.548857 0.657572 0.972
+974  1 1 0.451999 0.345188 0.973
+975  1 -1 0.348003 0.547128 0.974
+976  1 1 0.649121 0.453763 0.975
+977  1 -1 0.646729 0.543528 0.976
+978  1 -1 0.356287 0.457366 0.977
+979  1 -1 0.458282 0.640626 0.978
+980  1 1 0.54078 0.362537 0.979
+981  1 -1 0.566752 0.623062 0.98
+982  1 -1 0.434905 0.379993 0.981
+983  1 -1 0.383134 0.563391 0.982
+984  1 -1 0.613631 0.438364 0.983
+985  1 -1 0.59748 0.579004 0.984
+986  1 1 0.405567 0.423466 0.985
+987  1 -1 0.426024 0.591277 0.986
+988  1 -1 0.571322 0.411999 0.987
+989  1 1 0.511337 0.608293 0.988
+990  1 1 0.48914 0.396264 0.989
+991  1 -1 0.401042 0.51036 0.99
+992  1 1 0.593927 0.490167 0.991
+993  1 1 0.587477 0.516843 0.992
+994  1 1 0.418131 0.484237 0.993
+995  1 1 0.485399 0.575834 0.994
+996  1 -1 0.513336 0.430738 0.995
+997  1 -1 0.535388 0.552265 0.996
+998  1 -1 0.469337 0.454714 0.997
+999  1 1 0.463006 0.525048 0.998
+1000  1 -1 0.526172 0.482279 0.999
diff --git a/examples/USER/scafacos/in.scafacos.cw.ewald b/examples/USER/scafacos/in.scafacos.cw.ewald
new file mode 100644
index 0000000000000000000000000000000000000000..f8444dd01fabca613d680927fae7064547c32da9
--- /dev/null
+++ b/examples/USER/scafacos/in.scafacos.cw.ewald
@@ -0,0 +1,31 @@
+
+units		lj
+atom_style  charge	
+
+read_data data.cloud_wall
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* * 
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos ewald  0.001
+kspace_modify scafacos tolerance field 
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz 
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass 
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
diff --git a/examples/USER/scafacos/in.scafacos.cw.fmm b/examples/USER/scafacos/in.scafacos.cw.fmm
new file mode 100644
index 0000000000000000000000000000000000000000..24a273958185968c4827c41c86078e214bf2c1ed
--- /dev/null
+++ b/examples/USER/scafacos/in.scafacos.cw.fmm
@@ -0,0 +1,37 @@
+units	    lj	
+atom_style  charge	
+
+read_data data.cloud_wall
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* * 
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+kspace_style scafacos fmm 1.0e-3 
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz 
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass 
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
diff --git a/examples/USER/scafacos/in.scafacos.cw.p2nfft b/examples/USER/scafacos/in.scafacos.cw.p2nfft
new file mode 100644
index 0000000000000000000000000000000000000000..7a2391bc77b57c37ff393a5a0e3c03ef276bb599
--- /dev/null
+++ b/examples/USER/scafacos/in.scafacos.cw.p2nfft
@@ -0,0 +1,31 @@
+
+units		lj
+atom_style  charge	
+
+read_data data.cloud_wall
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* * 
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos p2nfft  0.001
+kspace_modify scafacos tolerance field 
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz 
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass 
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
diff --git a/examples/USER/scafacos/in.scafacos.ewald b/examples/USER/scafacos/in.scafacos.ewald
new file mode 100644
index 0000000000000000000000000000000000000000..6c26c71884b061806bd4fcd4fb4a39e14139f363
--- /dev/null
+++ b/examples/USER/scafacos/in.scafacos.ewald
@@ -0,0 +1,37 @@
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge	
+
+read_data data.NaCl
+
+replicate 8 8 8
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* * 
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos ewald 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
diff --git a/examples/USER/scafacos/in.scafacos.fmm b/examples/USER/scafacos/in.scafacos.fmm
new file mode 100644
index 0000000000000000000000000000000000000000..18d7464fb84842c6b85f1d9548c7b7121988c72b
--- /dev/null
+++ b/examples/USER/scafacos/in.scafacos.fmm
@@ -0,0 +1,37 @@
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge	
+
+read_data data.NaCl
+
+replicate 8 8 8 
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* * 
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify    scafacos tolerance energy_rel
+
+timestep	0.005
+thermo          10
+
+run		100
diff --git a/examples/USER/scafacos/in.scafacos.hsph.direct b/examples/USER/scafacos/in.scafacos.hsph.direct
new file mode 100644
index 0000000000000000000000000000000000000000..60448880ec2b3731f9dd76723c005a4866223001
--- /dev/null
+++ b/examples/USER/scafacos/in.scafacos.hsph.direct
@@ -0,0 +1,34 @@
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge	
+
+read_data data.hammersley_shphere
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* * 
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos direct 0.001
+
+timestep	0.005
+thermo          1
+run		20
diff --git a/examples/USER/scafacos/in.scafacos.hsph.fmm b/examples/USER/scafacos/in.scafacos.hsph.fmm
new file mode 100644
index 0000000000000000000000000000000000000000..515809dbde3360210a4fb389c001dce3eadcf9a7
--- /dev/null
+++ b/examples/USER/scafacos/in.scafacos.hsph.fmm
@@ -0,0 +1,37 @@
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge	
+
+read_data data.hammersley_shphere
+change_box all boundary f f f 
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* * 
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+
+timestep	0.005
+thermo          1
+
+run		20
diff --git a/examples/USER/scafacos/in.scafacos.hsph.p2nfft b/examples/USER/scafacos/in.scafacos.hsph.p2nfft
new file mode 100644
index 0000000000000000000000000000000000000000..d690a29e494d77d1ecd338b5fd73005063757522
--- /dev/null
+++ b/examples/USER/scafacos/in.scafacos.hsph.p2nfft
@@ -0,0 +1,36 @@
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge	
+
+read_data data.hammersley_shphere
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* * 
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify scafacos tolerance potential
+
+timestep	0.005
+thermo          1
+
+run		20
diff --git a/examples/USER/scafacos/in.scafacos.p2nfft b/examples/USER/scafacos/in.scafacos.p2nfft
new file mode 100644
index 0000000000000000000000000000000000000000..7fa2ccdd9e95b7e2b391e19bc10ace5a843b001e
--- /dev/null
+++ b/examples/USER/scafacos/in.scafacos.p2nfft
@@ -0,0 +1,37 @@
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge	
+
+read_data data.NaCl
+
+replicate 8 8 8 
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* * 
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.ewald.16 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.ewald.16
new file mode 100644
index 0000000000000000000000000000000000000000..ffdc7e3a3c6d34e288dc72aa4b7298ff4abdc461
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.ewald.16
@@ -0,0 +1,92 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  2 by 2 by 4 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos ewald  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver ewald ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.313 | 3.501 | 3.689 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49647271            0   0.49647271            0 
+      10      300  0.061254978  0.014582562   0.44286522   0.02180093   0.46466616            0 
+      20      300   0.12240911  0.058693359   0.37869251  0.087746571   0.46643909            0 
+      30      300   0.18348265   0.13468789   0.26730177    0.2013584   0.46866017            0 
+      40      300   0.24441123   0.50949535  0.083356437   0.76169555   0.84505198            0 
+      50      300   0.30493236    1.1731116 -0.055261984    1.7538018    1.6985399            0 
+      60      300   0.36615515    1.3589639  -0.33351524     2.031651    1.6981358            0 
+      70      300   0.42717433    1.6482648  -0.76570045    2.4641559    1.6984554            0 
+      80      300   0.48137259    2.8640899   -2.4038488    4.2818144    1.8779656            0 
+      90      300    0.5323379    93.168442   -2.5911448    139.28682    136.69568            0 
+     100      300    0.5756321    94.146897   -1.3480439    140.74961    139.40157            0 
+Loop time of 0.575655 on 16 procs for 100 steps with 300 atoms
+
+Performance: 75045.007 tau/day, 173.715 timesteps/s
+98.7% CPU use with 16 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.2159e-05 | 1.739e-05  | 2.3127e-05 |   0.0 |  0.00
+Kspace  | 0.57008    | 0.57086    | 0.57137    |   0.1 | 99.17
+Neigh   | 0.00025606 | 0.00061357 | 0.001369   |   0.0 |  0.11
+Comm    | 0.0023558  | 0.0027267  | 0.003104   |   0.4 |  0.47
+Output  | 0.00029898 | 0.00032344 | 0.00037599 |   0.0 |  0.06
+Modify  | 8.2493e-05 | 0.00014143 | 0.00021815 |   0.0 |  0.02
+Other   |            | 0.0009701  |            |       |  0.17
+
+Nlocal:    18.75 ave 39 max 6 min
+Histogram: 6 1 1 0 1 2 2 1 1 1
+Nghost:    122.812 ave 195 max 63 min
+Histogram: 8 0 0 0 0 0 0 1 3 4
+Neighs:    160.625 ave 598 max 13 min
+Histogram: 8 2 1 1 1 0 0 2 0 1
+
+Total # of neighbors = 2570
+Ave neighs/atom = 8.56667
+Neighbor list builds = 23
+Dangerous builds = 16
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.ewald.8 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.ewald.8
new file mode 100644
index 0000000000000000000000000000000000000000..52d887f4754c0f166aa202de55360291838abd5e
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.ewald.8
@@ -0,0 +1,92 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  2 by 2 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos ewald  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver ewald ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.317 | 3.317 | 3.317 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49647271            0   0.49647271            0 
+      10      300  0.067519665  0.014718629   0.45088339   0.02200435   0.47288774            0 
+      20      300   0.13502312   0.05922597   0.38470912  0.088542825   0.47325194            0 
+      30      300   0.20248008   0.13587829   0.27058048   0.20313804   0.47371852            0 
+      40      300   0.26985955   0.51353118  0.088432648   0.76772911   0.85616176            0 
+      50      300   0.33801103    1.1760001 -0.058088247    1.7581201    1.7000319            0 
+      60      300   0.40665984    1.3627885  -0.33736672    2.0373688    1.7000021            0 
+      70      300   0.47533679    1.6529365  -0.77082139    2.4711401    1.7003187            0 
+      80      300   0.54396391    2.9569837   -2.4624654    4.4206907    1.9582253            0 
+      90      300   0.61429667    81.642726   -2.5370215    122.05588    119.51885            0 
+     100      300   0.68513632    85.047974    -1.128107    127.14672    126.01861            0 
+Loop time of 0.685155 on 8 procs for 100 steps with 300 atoms
+
+Performance: 63051.442 tau/day, 145.952 timesteps/s
+99.7% CPU use with 8 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.0967e-05 | 1.4216e-05 | 1.8358e-05 |   0.0 |  0.00
+Kspace  | 0.67942    | 0.67981    | 0.6803     |   0.0 | 99.22
+Neigh   | 0.00073242 | 0.0010653  | 0.0015635  |   0.8 |  0.16
+Comm    | 0.0029492  | 0.0031102  | 0.0033047  |   0.2 |  0.45
+Output  | 0.00021768 | 0.00023454 | 0.00028443 |   0.0 |  0.03
+Modify  | 0.00016046 | 0.00018132 | 0.00019431 |   0.0 |  0.03
+Other   |            | 0.0007433  |            |       |  0.11
+
+Nlocal:    37.5 ave 46 max 31 min
+Histogram: 2 0 0 2 1 0 2 0 0 1
+Nghost:    203.875 ave 212 max 192 min
+Histogram: 1 0 1 0 0 2 1 0 0 3
+Neighs:    321.625 ave 599 max 112 min
+Histogram: 1 2 0 1 1 0 1 1 0 1
+
+Total # of neighbors = 2573
+Ave neighs/atom = 8.57667
+Neighbor list builds = 23
+Dangerous builds = 16
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.1 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.1
new file mode 100644
index 0000000000000000000000000000000000000000..47e0be01cecab6def841bfba070c509a59575584
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.1
@@ -0,0 +1,99 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+units	    lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+kspace_style scafacos fmm 1.0e-3
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...3
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.34 | 3.34 | 3.34 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49646402            0   0.49646402            0 
+      10      300  0.036603451  0.015455559   0.47335833   0.02310606   0.49646439            0 
+      20      300  0.073534727   0.06229069   0.40334177  0.093124582   0.49646635            0 
+      30      300   0.10984373   0.14310163   0.28254277   0.21393694   0.49647971            0 
+      40      300   0.14610171   0.52929788  0.089669015   0.79130033   0.88096934            0 
+      50      300   0.18255496    1.1963022 -0.082792461    1.7884718    1.7056794            0 
+      60      300    0.2194376    1.3928167  -0.37659239     2.082261    1.7056686            0 
+      70      300   0.25588369    1.7069009  -0.84571914    2.5518169    1.7060978            0 
+      80      300   0.29101205    15.358343    -3.368063    22.960722    19.592659            0 
+      90      300   0.32697225    42.280432   -2.1623864    63.209247     61.04686            0 
+     100      300   0.36395645     41.48079  -0.89904529    62.013782    61.114736            0 
+Loop time of 0.363968 on 1 procs for 100 steps with 300 atoms
+
+Performance: 118691.709 tau/day, 274.749 timesteps/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.2875e-05 | 1.2875e-05 | 1.2875e-05 |   0.0 |  0.00
+Kspace  | 0.35715    | 0.35715    | 0.35715    |   0.0 | 98.13
+Neigh   | 0.0039768  | 0.0039768  | 0.0039768  |   0.0 |  1.09
+Comm    | 0.0012023  | 0.0012023  | 0.0012023  |   0.0 |  0.33
+Output  | 0.0001502  | 0.0001502  | 0.0001502  |   0.0 |  0.04
+Modify  | 0.0009768  | 0.0009768  | 0.0009768  |   0.0 |  0.27
+Other   |            | 0.0005031  |            |       |  0.14
+
+Nlocal:    300 ave 300 max 300 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    374 ave 374 max 374 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    2459 ave 2459 max 2459 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 2459
+Ave neighs/atom = 8.19667
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.16 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.16
new file mode 100644
index 0000000000000000000000000000000000000000..f104355d87ac4a30d313627b86c50de266780ed2
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.16
@@ -0,0 +1,99 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+units	    lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  2 by 2 by 4 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+kspace_style scafacos fmm 1.0e-3
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...3
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.313 | 3.501 | 3.689 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49646402            0   0.49646402            0 
+      10      300  0.018873215  0.015455559   0.47335833   0.02310606   0.49646439            0 
+      20      300  0.035197735   0.06229069   0.40334177  0.093124582   0.49646635            0 
+      30      300  0.051513195   0.14310163   0.28254277   0.21393694   0.49647971            0 
+      40      300  0.067400217   0.52929788  0.089669015   0.79130033   0.88096934            0 
+      50      300  0.083569527    1.1963022 -0.082792461    1.7884718    1.7056794            0 
+      60      300  0.099931955    1.3928167  -0.37659239     2.082261    1.7056686            0 
+      70      300   0.11602688    1.7069009  -0.84571914    2.5518169    1.7060978            0 
+      80      300   0.13271379    15.358343    -3.368063    22.960722    19.592659            0 
+      90      300   0.15055513    42.280432   -2.1623864    63.209247     61.04686            0 
+     100      300   0.16817498     41.48079  -0.89904529    62.013782    61.114736            0 
+Loop time of 0.168194 on 16 procs for 100 steps with 300 atoms
+
+Performance: 256846.586 tau/day, 594.552 timesteps/s
+97.7% CPU use with 16 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.4544e-05 | 2.0206e-05 | 2.6941e-05 |   0.0 |  0.01
+Kspace  | 0.16313    | 0.16373    | 0.16423    |   0.1 | 97.34
+Neigh   | 0.00016737 | 0.00039591 | 0.00084519 |   0.0 |  0.24
+Comm    | 0.0021157  | 0.0026083  | 0.0032525  |   0.7 |  1.55
+Output  | 0.00026298 | 0.00030228 | 0.00035977 |   0.0 |  0.18
+Modify  | 0.00010705 | 0.00015062 | 0.00020647 |   0.0 |  0.09
+Other   |            | 0.0009913  |            |       |  0.59
+
+Nlocal:    18.75 ave 36 max 6 min
+Histogram: 4 3 1 0 0 1 2 1 2 2
+Nghost:    127 ave 196 max 71 min
+Histogram: 8 0 0 0 0 0 0 1 6 1
+Neighs:    153.688 ave 491 max 10 min
+Histogram: 8 1 1 1 1 1 0 0 0 3
+
+Total # of neighbors = 2459
+Ave neighs/atom = 8.19667
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.2 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.2
new file mode 100644
index 0000000000000000000000000000000000000000..43ded2d4da486c20c7780aedc44713a2e6bdf9a6
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.2
@@ -0,0 +1,99 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+units	    lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 1 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+kspace_style scafacos fmm 1.0e-3
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...3
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.354 | 3.354 | 3.355 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49646402            0   0.49646402            0 
+      10      300  0.021326542  0.015455559   0.47335833   0.02310606   0.49646439            0 
+      20      300  0.043129683   0.06229069   0.40334177  0.093124582   0.49646635            0 
+      30      300  0.064425707   0.14310163   0.28254277   0.21393694   0.49647971            0 
+      40      300  0.085822344   0.52929788  0.089669015   0.79130033   0.88096934            0 
+      50      300   0.10737753    1.1963022 -0.082792461    1.7884718    1.7056794            0 
+      60      300   0.12947297    1.3928167  -0.37659239     2.082261    1.7056686            0 
+      70      300   0.15108895    1.7069009  -0.84571914    2.5518169    1.7060978            0 
+      80      300   0.17378163    15.358343    -3.368063    22.960722    19.592659            0 
+      90      300   0.19809985    42.280432   -2.1623864    63.209247     61.04686            0 
+     100      300   0.22268319     41.48079  -0.89904529    62.013782    61.114736            0 
+Loop time of 0.222696 on 2 procs for 100 steps with 300 atoms
+
+Performance: 193986.156 tau/day, 449.042 timesteps/s
+98.4% CPU use with 2 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.1683e-05 | 1.5378e-05 | 1.9073e-05 |   0.0 |  0.01
+Kspace  | 0.21627    | 0.2168     | 0.21734    |   0.1 | 97.35
+Neigh   | 0.0016344  | 0.0021609  | 0.0026875  |   1.1 |  0.97
+Comm    | 0.0022857  | 0.0023286  | 0.0023715  |   0.1 |  1.05
+Output  | 0.00015521 | 0.00019228 | 0.00022936 |   0.0 |  0.09
+Modify  | 0.00052834 | 0.00054049 | 0.00055265 |   0.0 |  0.24
+Other   |            | 0.0006541  |            |       |  0.29
+
+Nlocal:    150 ave 159 max 141 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Nghost:    392 ave 395 max 389 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Neighs:    1229.5 ave 1773 max 686 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+
+Total # of neighbors = 2459
+Ave neighs/atom = 8.19667
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.4 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.4
new file mode 100644
index 0000000000000000000000000000000000000000..12c74993e95221862767e12310cda2976c50e666
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.4
@@ -0,0 +1,99 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+units	    lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+kspace_style scafacos fmm 1.0e-3
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...3
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.333 | 3.333 | 3.333 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49646402            0   0.49646402            0 
+      10      300  0.017563343  0.015455559   0.47335833   0.02310606   0.49646439            0 
+      20      300  0.034884214   0.06229069   0.40334177  0.093124582   0.49646635            0 
+      30      300  0.052014828   0.14310163   0.28254277   0.21393694   0.49647971            0 
+      40      300  0.069101095   0.52929788  0.089669015   0.79130033   0.88096934            0 
+      50      300  0.085633039    1.1963022 -0.082792461    1.7884718    1.7056794            0 
+      60      300   0.10165119    1.3928167  -0.37659239     2.082261    1.7056686            0 
+      70      300   0.11729789    1.7069009  -0.84571914    2.5518169    1.7060978            0 
+      80      300    0.1342802    15.358343    -3.368063    22.960722    19.592659            0 
+      90      300   0.15266848    42.280432   -2.1623864    63.209247     61.04686            0 
+     100      300   0.17004442     41.48079  -0.89904529    62.013782    61.114736            0 
+Loop time of 0.170072 on 4 procs for 100 steps with 300 atoms
+
+Performance: 254010.216 tau/day, 587.987 timesteps/s
+98.8% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.5497e-05 | 1.6868e-05 | 1.9789e-05 |   0.0 |  0.01
+Kspace  | 0.16417    | 0.16472    | 0.16507    |   0.1 | 96.86
+Neigh   | 0.00082564 | 0.0012031  | 0.0015855  |   0.9 |  0.71
+Comm    | 0.0026572  | 0.0029335  | 0.003197   |   0.4 |  1.72
+Output  | 0.00019169 | 0.00021791 | 0.00026536 |   0.0 |  0.13
+Modify  | 0.00031304 | 0.00032955 | 0.00035334 |   0.0 |  0.19
+Other   |            | 0.0006474  |            |       |  0.38
+
+Nlocal:    75 ave 81 max 70 min
+Histogram: 2 0 0 0 0 0 0 1 0 1
+Nghost:    282.5 ave 290 max 274 min
+Histogram: 1 0 0 1 0 0 0 0 1 1
+Neighs:    614.75 ave 981 max 285 min
+Histogram: 1 1 0 0 0 0 0 1 0 1
+
+Total # of neighbors = 2459
+Ave neighs/atom = 8.19667
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.8 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.8
new file mode 100644
index 0000000000000000000000000000000000000000..1fea13cd13a33a44486ea9dd3c2c7c4ba898d987
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.fmm.8
@@ -0,0 +1,99 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+units	    lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  2 by 2 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+kspace_style scafacos fmm 1.0e-3
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...3
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.317 | 3.317 | 3.317 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49646402            0   0.49646402            0 
+      10      300  0.016258478  0.015455559   0.47335833   0.02310606   0.49646439            0 
+      20      300  0.031442165   0.06229069   0.40334177  0.093124582   0.49646635            0 
+      30      300  0.046462774   0.14310163   0.28254277   0.21393694   0.49647971            0 
+      40      300  0.061176538   0.52929788  0.089669015   0.79130033   0.88096934            0 
+      50      300  0.076244354    1.1963022 -0.082792461    1.7884718    1.7056794            0 
+      60      300  0.091396809    1.3928167  -0.37659239     2.082261    1.7056686            0 
+      70      300   0.10625911    1.7069009  -0.84571914    2.5518169    1.7060978            0 
+      80      300    0.1204896    15.358343    -3.368063    22.960722    19.592659            0 
+      90      300   0.13657618    42.280432   -2.1623864    63.209247     61.04686            0 
+     100      300   0.15163732     41.48079  -0.89904529    62.013782    61.114736            0 
+Loop time of 0.151654 on 8 procs for 100 steps with 300 atoms
+
+Performance: 284858.554 tau/day, 659.395 timesteps/s
+97.7% CPU use with 8 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.3828e-05 | 1.6987e-05 | 1.955e-05  |   0.0 |  0.01
+Kspace  | 0.14633    | 0.14689    | 0.14725    |   0.1 | 96.86
+Neigh   | 0.00047135 | 0.00067887 | 0.00094771 |   0.0 |  0.45
+Comm    | 0.0027649  | 0.0029218  | 0.0032592  |   0.3 |  1.93
+Output  | 0.00022578 | 0.00024724 | 0.00030136 |   0.0 |  0.16
+Modify  | 0.00018954 | 0.00021681 | 0.0002358  |   0.0 |  0.14
+Other   |            | 0.0006794  |            |       |  0.45
+
+Nlocal:    37.5 ave 45 max 31 min
+Histogram: 1 1 1 1 1 0 1 0 1 1
+Nghost:    200 ave 209 max 189 min
+Histogram: 1 0 0 0 1 4 0 0 0 2
+Neighs:    307.375 ave 514 max 115 min
+Histogram: 2 1 0 1 1 0 0 0 1 2
+
+Total # of neighbors = 2459
+Ave neighs/atom = 8.19667
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.1 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.1
new file mode 100644
index 0000000000000000000000000000000000000000..bd9325dc4becfdd6488ee070b75e860bd117d5be
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.1
@@ -0,0 +1,92 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos p2nfft  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.34 | 3.34 | 3.34 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49683273            0   0.49683273            0 
+      10      300  0.082981586  0.015479312   0.47369009  0.023141571   0.49683166            0 
+      20      300   0.16677213  0.062386358   0.40356181  0.093267605   0.49682941            0 
+      30      300    0.2506392   0.14331637    0.2825636   0.21425798   0.49682157            0 
+      40      300   0.33466673   0.53041843  0.089505208   0.79297556   0.88248077            0 
+      50      300   0.41812825    1.1948397 -0.083317439    1.7862853    1.7029679            0 
+      60      300   0.50167894    1.3915614  -0.37745551    2.0803842    1.7029287            0 
+      70      300   0.58574796    1.7061978  -0.84746071    2.5507657     1.703305            0 
+      80      300    0.6700325    20.692093     -3.32971     30.93468     27.60497            0 
+      90      300   0.75608635    48.999403   -2.1632167    73.254107    71.090891            0 
+     100      300   0.84190726    51.199785  -0.81127924    76.543678    75.732399            0 
+Loop time of 0.841921 on 1 procs for 100 steps with 300 atoms
+
+Performance: 51311.251 tau/day, 118.776 timesteps/s
+100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.1206e-05 | 1.1206e-05 | 1.1206e-05 |   0.0 |  0.00
+Kspace  | 0.835      | 0.835      | 0.835      |   0.0 | 99.18
+Neigh   | 0.0040102  | 0.0040102  | 0.0040102  |   0.0 |  0.48
+Comm    | 0.0012431  | 0.0012431  | 0.0012431  |   0.0 |  0.15
+Output  | 0.0001545  | 0.0001545  | 0.0001545  |   0.0 |  0.02
+Modify  | 0.0010202  | 0.0010202  | 0.0010202  |   0.0 |  0.12
+Other   |            | 0.000484   |            |       |  0.06
+
+Nlocal:    300 ave 300 max 300 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    381 ave 381 max 381 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    2461 ave 2461 max 2461 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 2461
+Ave neighs/atom = 8.20333
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.16 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.16
new file mode 100644
index 0000000000000000000000000000000000000000..1e385c5fdafc9f823c0de74dfcaaef9d97c24c2e
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.16
@@ -0,0 +1,92 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  2 by 2 by 4 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos p2nfft  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.313 | 3.501 | 3.689 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49683273            0   0.49683273            0 
+      10      300  0.029647827  0.015479312   0.47369009  0.023141571   0.49683166            0 
+      20      300  0.059298515  0.062386358   0.40356181  0.093267605   0.49682941            0 
+      30      300  0.088926077   0.14331637    0.2825636   0.21425798   0.49682157            0 
+      40      300   0.11850166   0.53041843  0.089505208   0.79297556   0.88248077            0 
+      50      300   0.14820433    1.1948397 -0.083317439    1.7862853    1.7029679            0 
+      60      300   0.17807317    1.3915614  -0.37745551    2.0803842    1.7029287            0 
+      70      300   0.20803642    1.7061978  -0.84746071    2.5507657     1.703305            0 
+      80      300   0.23797083    20.692093     -3.32971     30.93468     27.60497            0 
+      90      300   0.26835561    48.999403   -2.1632167    73.254107    71.090891            0 
+     100      300   0.29766583    51.199785  -0.81127924    76.543678    75.732399            0 
+Loop time of 0.297693 on 16 procs for 100 steps with 300 atoms
+
+Performance: 145116.000 tau/day, 335.917 timesteps/s
+98.8% CPU use with 16 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.1683e-05 | 1.6928e-05 | 2.4557e-05 |   0.0 |  0.01
+Kspace  | 0.29245    | 0.29325    | 0.29366    |   0.1 | 98.51
+Neigh   | 0.00017214 | 0.00042973 | 0.0010228  |   0.0 |  0.14
+Comm    | 0.0021176  | 0.0024613  | 0.0028479  |   0.5 |  0.83
+Output  | 0.00028467 | 0.00033538 | 0.00041103 |   0.0 |  0.11
+Modify  | 8.893e-05  | 0.00015014 | 0.00027537 |   0.0 |  0.05
+Other   |            | 0.001048   |            |       |  0.35
+
+Nlocal:    18.75 ave 33 max 6 min
+Histogram: 2 6 0 0 0 0 2 1 2 3
+Nghost:    128.875 ave 198 max 71 min
+Histogram: 7 1 0 0 0 0 0 1 5 2
+Neighs:    153.812 ave 490 max 14 min
+Histogram: 8 0 3 0 1 1 0 0 1 2
+
+Total # of neighbors = 2461
+Ave neighs/atom = 8.20333
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.2 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.2
new file mode 100644
index 0000000000000000000000000000000000000000..3f54069ffdc2066d1e535a21060bab10d97812a9
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.2
@@ -0,0 +1,92 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 1 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos p2nfft  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.354 | 3.354 | 3.355 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49683273            0   0.49683273            0 
+      10      300  0.049199581  0.015479312   0.47369009  0.023141571   0.49683166            0 
+      20      300   0.09860301  0.062386358   0.40356181  0.093267605   0.49682941            0 
+      30      300   0.14826894   0.14331637    0.2825636   0.21425798   0.49682157            0 
+      40      300   0.19769144   0.53041843  0.089505208   0.79297556   0.88248077            0 
+      50      300   0.24735355    1.1948397 -0.083317439    1.7862853    1.7029679            0 
+      60      300   0.29692888    1.3915614  -0.37745551    2.0803842    1.7029287            0 
+      70      300   0.34639764    1.7061978  -0.84746071    2.5507657     1.703305            0 
+      80      300   0.39633083    20.692093     -3.32971     30.93468     27.60497            0 
+      90      300   0.44779778    48.999403   -2.1632167    73.254107    71.090891            0 
+     100      300   0.49988627    51.199785  -0.81127924    76.543678    75.732399            0 
+Loop time of 0.499909 on 2 procs for 100 steps with 300 atoms
+
+Performance: 86415.782 tau/day, 200.037 timesteps/s
+99.5% CPU use with 2 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 9.0599e-06 | 1.3709e-05 | 1.8358e-05 |   0.0 |  0.00
+Kspace  | 0.49314    | 0.49365    | 0.49416    |   0.1 | 98.75
+Neigh   | 0.0016146  | 0.0021083  | 0.0026021  |   1.1 |  0.42
+Comm    | 0.002754   | 0.0028276  | 0.0029013  |   0.1 |  0.57
+Output  | 0.00015783 | 0.00019348 | 0.00022912 |   0.0 |  0.04
+Modify  | 0.00049257 | 0.00049472 | 0.00049686 |   0.0 |  0.10
+Other   |            | 0.0006217  |            |       |  0.12
+
+Nlocal:    150 ave 157 max 143 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Nghost:    399 ave 402 max 396 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Neighs:    1230.5 ave 1756 max 705 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+
+Total # of neighbors = 2461
+Ave neighs/atom = 8.20333
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.4 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.4
new file mode 100644
index 0000000000000000000000000000000000000000..8b94402658282684814ee95b4952f2ad97a23b3a
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.4
@@ -0,0 +1,92 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos p2nfft  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.333 | 3.333 | 3.333 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49683273            0   0.49683273            0 
+      10      300  0.031973124  0.015479312   0.47369009  0.023141571   0.49683166            0 
+      20      300  0.064628601  0.062386358   0.40356181  0.093267605   0.49682941            0 
+      30      300  0.096747875   0.14331637    0.2825636   0.21425798   0.49682157            0 
+      40      300   0.12864041   0.53041843  0.089505208   0.79297556   0.88248077            0 
+      50      300     0.161134    1.1948397 -0.083317439    1.7862853    1.7029679            0 
+      60      300   0.19359422    1.3915614  -0.37745551    2.0803842    1.7029287            0 
+      70      300   0.22573543    1.7061978  -0.84746071    2.5507657     1.703305            0 
+      80      300   0.25922132    20.692093     -3.32971     30.93468     27.60497            0 
+      90      300   0.29452014    48.999403   -2.1632167    73.254107    71.090891            0 
+     100      300   0.33031607    51.199785  -0.81127924    76.543678    75.732399            0 
+Loop time of 0.330333 on 4 procs for 100 steps with 300 atoms
+
+Performance: 130777.056 tau/day, 302.725 timesteps/s
+99.7% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.0014e-05 | 1.3471e-05 | 1.5497e-05 |   0.0 |  0.00
+Kspace  | 0.32504    | 0.3255     | 0.3258     |   0.1 | 98.54
+Neigh   | 0.00083303 | 0.0011883  | 0.0015609  |   0.8 |  0.36
+Comm    | 0.0024087  | 0.0025436  | 0.0026674  |   0.2 |  0.77
+Output  | 0.00017405 | 0.00020278 | 0.00024986 |   0.0 |  0.06
+Modify  | 0.00028658 | 0.00031012 | 0.00033331 |   0.0 |  0.09
+Other   |            | 0.0005748  |            |       |  0.17
+
+Nlocal:    75 ave 81 max 69 min
+Histogram: 1 0 0 0 1 1 0 0 0 1
+Nghost:    287 ave 296 max 278 min
+Histogram: 1 0 1 0 0 0 0 1 0 1
+Neighs:    615.25 ave 964 max 286 min
+Histogram: 1 1 0 0 0 0 0 1 0 1
+
+Total # of neighbors = 2461
+Ave neighs/atom = 8.20333
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.8 b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.8
new file mode 100644
index 0000000000000000000000000000000000000000..3de3c25017f34b05f17fbdf81b447844e4659e8d
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.cw.g++.p2nfft.8
@@ -0,0 +1,92 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+
+units		lj
+atom_style  charge
+
+read_data data.cloud_wall
+  orthogonal box = (0 0 0) to (10 10 10)
+  2 by 2 by 2 MPI processor grid
+  reading atoms ...
+  300 atoms
+
+velocity	all set 0.0 0.0 0.0 mom no
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix     1 all nve
+kspace_style scafacos p2nfft  0.001
+kspace_modify scafacos tolerance field
+
+timestep	0.005
+
+thermo_style custom step atoms cpu temp pe ke etotal ecoul
+
+run_style verlet
+
+#dump simple all custom 1000 id x y z vx vy vz
+#dump dmp    all custom 1000 part.dump id mol x y z vx vy vz fx fy fz q mass
+#dump dmpvtk all vtk 1000 vtk/part_*.vtk id mol x y z vx vy vz fx fy fz q mass
+#dump_modify dmpvtk pad 7
+
+thermo 10
+run	100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.317 | 3.317 | 3.317 Mbytes
+Step Atoms CPU Temp PotEng KinEng TotEng E_coul 
+       0      300            0            0   0.49683273            0   0.49683273            0 
+      10      300  0.023941755  0.015479312   0.47369009  0.023141571   0.49683166            0 
+      20      300  0.047938108  0.062386358   0.40356181  0.093267605   0.49682941            0 
+      30      300  0.071953773   0.14331637    0.2825636   0.21425798   0.49682157            0 
+      40      300  0.095941782   0.53041843  0.089505208   0.79297556   0.88248077            0 
+      50      300   0.12011361    1.1948397 -0.083317439    1.7862853    1.7029679            0 
+      60      300   0.14433384    1.3915614  -0.37745551    2.0803842    1.7029287            0 
+      70      300   0.16875505    1.7061978  -0.84746071    2.5507657     1.703305            0 
+      80      300     0.193331    20.692093     -3.32971     30.93468     27.60497            0 
+      90      300   0.21844888    48.999403   -2.1632167    73.254107    71.090891            0 
+     100      300   0.24417853    51.199785  -0.81127924    76.543678    75.732399            0 
+Loop time of 0.244198 on 8 procs for 100 steps with 300 atoms
+
+Performance: 176905.349 tau/day, 409.503 timesteps/s
+99.7% CPU use with 8 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 9.7752e-06 | 1.4246e-05 | 1.955e-05  |   0.0 |  0.01
+Kspace  | 0.23946    | 0.23972    | 0.24009    |   0.0 | 98.17
+Neigh   | 0.00049043 | 0.00071204 | 0.00095773 |   0.0 |  0.29
+Comm    | 0.0025063  | 0.0026675  | 0.0027597  |   0.2 |  1.09
+Output  | 0.00019646 | 0.00021604 | 0.00026321 |   0.0 |  0.09
+Modify  | 0.00017905 | 0.0001913  | 0.00020766 |   0.0 |  0.08
+Other   |            | 0.0006773  |            |       |  0.28
+
+Nlocal:    37.5 ave 42 max 33 min
+Histogram: 2 1 0 1 0 0 1 0 1 2
+Nghost:    202.25 ave 212 max 194 min
+Histogram: 1 0 2 1 0 2 0 1 0 1
+Neighs:    307.625 ave 505 max 129 min
+Histogram: 3 0 0 1 1 0 0 0 1 2
+
+Total # of neighbors = 2461
+Ave neighs/atom = 8.20333
+Neighbor list builds = 15
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.ewald.16 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.ewald.16
new file mode 100644
index 0000000000000000000000000000000000000000..f2223064e0f072887dd4ec3807831159f6cfca20
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.ewald.16
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  2 by 2 by 4 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  2 by 2 by 4 MPI processor grid
+  4096 atoms
+  Time spent = 0.000503302 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos ewald 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver ewald ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.401 | 3.401 | 3.401 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87413076            0    1.3753199    1.4996338 
+      10    1.5001167  -0.84976141            0    1.3998642    1.4997504 
+      20    1.5002402  -0.84977464            0    1.4000363     1.499874 
+      30    1.5003611  -0.84978325            0    1.4002089    1.4999948 
+      40    1.5004736  -0.84977869            0    1.4003823    1.5001073 
+      50    1.5005703   -0.8497468            0    1.4005591    1.5002039 
+      60    1.5006418  -0.84967646            0    1.4007366    1.5002754 
+      70    1.5006785  -0.84955497            0    1.4009132    1.5003121 
+      80    1.5006711    -0.849369            0    1.4010881    1.5003047 
+      90     1.500611  -0.84910811            0    1.4012589    1.5002446 
+     100    1.5004911  -0.84870167            0    1.4014854    1.5001247 
+Loop time of 46.4857 on 16 procs for 100 steps with 4096 atoms
+
+Performance: 929.317 tau/day, 2.151 timesteps/s
+99.8% CPU use with 16 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00023437 | 0.00024788 | 0.00026894 |   0.0 |  0.00
+Kspace  | 46.476     | 46.476     | 46.476     |   0.0 | 99.98
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0062859  | 0.0067717  | 0.0072649  |   0.3 |  0.01
+Output  | 0.0004127  | 0.00044075 | 0.00050807 |   0.0 |  0.00
+Modify  | 0.00099325 | 0.0010343  | 0.0010939  |   0.1 |  0.00
+Other   |            | 0.001459   |            |       |  0.00
+
+Nlocal:    256 ave 256 max 256 min
+Histogram: 16 0 0 0 0 0 0 0 0 0
+Nghost:    1265 ave 1265 max 1265 min
+Histogram: 16 0 0 0 0 0 0 0 0 0
+Neighs:    4096 ave 4096 max 4096 min
+Histogram: 16 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:47
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.ewald.8 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.ewald.8
new file mode 100644
index 0000000000000000000000000000000000000000..8d3702a8b3460c0ceb512883e6ccd5f9d2ea8016
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.ewald.8
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  2 by 2 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  2 by 2 by 2 MPI processor grid
+  4096 atoms
+  Time spent = 0.000281811 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos ewald 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver ewald ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.416 | 3.416 | 3.416 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87413076            0    1.3753199    1.4996338 
+      10    1.5001095  -0.85040058            0    1.3992143    1.4997433 
+      20    1.5002258  -0.85039456            0    1.3993947    1.4998595 
+      30    1.5003395  -0.85038368            0    1.3995761    1.4999732 
+      40    1.5004452  -0.85035944            0    1.3997589    1.5000789 
+      50    1.5005354  -0.85030783            0    1.3999457     1.500169 
+      60    1.5006008  -0.85021779            0    1.4001339    1.5002344 
+      70     1.500632   -0.8500769            0    1.4003215    1.5002656 
+      80    1.5006197  -0.84987187            0    1.4005081    1.5002533 
+      90    1.5005554  -0.84959251            0     1.400691     1.500189 
+     100     1.500432  -0.84916846            0    1.4009301    1.5000657 
+Loop time of 86.1477 on 8 procs for 100 steps with 4096 atoms
+
+Performance: 501.464 tau/day, 1.161 timesteps/s
+99.9% CPU use with 8 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.000314   | 0.00033289 | 0.00035048 |   0.0 |  0.00
+Kspace  | 86.136     | 86.136     | 86.136     |   0.0 | 99.99
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0069821  | 0.0073864  | 0.0077834  |   0.3 |  0.01
+Output  | 0.00038719 | 0.00041264 | 0.00047517 |   0.0 |  0.00
+Modify  | 0.0018711  | 0.0019454  | 0.0020905  |   0.1 |  0.00
+Other   |            | 0.001762   |            |       |  0.00
+
+Nlocal:    512 ave 512 max 512 min
+Histogram: 8 0 0 0 0 0 0 0 0 0
+Nghost:    1685 ave 1685 max 1685 min
+Histogram: 8 0 0 0 0 0 0 0 0 0
+Neighs:    8192 ave 8192 max 8192 min
+Histogram: 8 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:01:28
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.1 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.1
new file mode 100644
index 0000000000000000000000000000000000000000..0749de38328001446bc80b8ac184a2fbd6b5b2a8
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.1
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  1 by 1 by 1 MPI processor grid
+  4096 atoms
+  Time spent = 0.000584364 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify    scafacos tolerance energy_rel
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.723 | 3.723 | 3.723 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87378819            0    1.3756625    1.4996338 
+      10    1.5000004  -0.87378882            0    1.3756625    1.4996342 
+      20    1.4999998  -0.87378782            0    1.3756625    1.4996336 
+      30    1.4999934  -0.87377823            0    1.3756625    1.4996272 
+      40    1.4999745  -0.87374997            0    1.3756625    1.4996083 
+      50    1.4999347  -0.87369019            0    1.3756625    1.4995685 
+      60    1.4998637  -0.87358381            0    1.3756625    1.4994975 
+      70    1.4997506  -0.87341428            0    1.3756624    1.4993845 
+      80    1.4995842  -0.87316464            0    1.3756624     1.499218 
+      90    1.4993536  -0.87281897            0    1.3756624    1.4989876 
+     100    1.4990503  -0.87236411            0    1.3756624    1.4986843 
+Loop time of 5.26537 on 1 procs for 100 steps with 4096 atoms
+
+Performance: 8204.550 tau/day, 18.992 timesteps/s
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.0011349  | 0.0011349  | 0.0011349  |   0.0 |  0.02
+Kspace  | 5.2375     | 5.2375     | 5.2375     |   0.0 | 99.47
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0090225  | 0.0090225  | 0.0090225  |   0.0 |  0.17
+Output  | 0.0004127  | 0.0004127  | 0.0004127  |   0.0 |  0.01
+Modify  | 0.012851   | 0.012851   | 0.012851   |   0.0 |  0.24
+Other   |            | 0.004441   |            |       |  0.08
+
+Nlocal:    4096 ave 4096 max 4096 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    5165 ave 5165 max 5165 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    65536 ave 65536 max 65536 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:06
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.16 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.16
new file mode 100644
index 0000000000000000000000000000000000000000..6af26f7b8176893d9b7cfda574047a77160bb78f
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.16
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  2 by 2 by 4 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  2 by 2 by 4 MPI processor grid
+  4096 atoms
+  Time spent = 0.000507593 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify    scafacos tolerance energy_rel
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.401 | 3.401 | 3.401 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87378819            0    1.3756625    1.4996338 
+      10    1.5000004  -0.87378882            0    1.3756625    1.4996342 
+      20    1.4999998  -0.87378782            0    1.3756625    1.4996336 
+      30    1.4999934  -0.87377823            0    1.3756625    1.4996272 
+      40    1.4999745  -0.87374997            0    1.3756625    1.4996083 
+      50    1.4999347  -0.87369019            0    1.3756625    1.4995685 
+      60    1.4998637  -0.87358381            0    1.3756625    1.4994975 
+      70    1.4997506  -0.87341428            0    1.3756624    1.4993845 
+      80    1.4995842  -0.87316464            0    1.3756624     1.499218 
+      90    1.4993536  -0.87281897            0    1.3756624    1.4989876 
+     100    1.4990503  -0.87236411            0    1.3756624    1.4986843 
+Loop time of 0.570389 on 16 procs for 100 steps with 4096 atoms
+
+Performance: 75737.813 tau/day, 175.319 timesteps/s
+99.3% CPU use with 16 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00024104 | 0.00025283 | 0.00029206 |   0.0 |  0.04
+Kspace  | 0.56153    | 0.56176    | 0.56189    |   0.0 | 98.49
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0055203  | 0.0057825  | 0.0061858  |   0.2 |  1.01
+Output  | 0.00038505 | 0.00040831 | 0.00048184 |   0.0 |  0.07
+Modify  | 0.00096965 | 0.00099662 | 0.0010159  |   0.0 |  0.17
+Other   |            | 0.001192   |            |       |  0.21
+
+Nlocal:    256 ave 256 max 256 min
+Histogram: 16 0 0 0 0 0 0 0 0 0
+Nghost:    1265 ave 1265 max 1265 min
+Histogram: 16 0 0 0 0 0 0 0 0 0
+Neighs:    4096 ave 4096 max 4096 min
+Histogram: 16 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:03
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.2 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.2
new file mode 100644
index 0000000000000000000000000000000000000000..bcc61485ca55fc6149ee9aa1bae38ebbd14e1119
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.2
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  1 by 1 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  1 by 1 by 2 MPI processor grid
+  4096 atoms
+  Time spent = 0.000455141 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify    scafacos tolerance energy_rel
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.634 | 3.634 | 3.634 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87378819            0    1.3756625    1.4996338 
+      10    1.5000004  -0.87378882            0    1.3756625    1.4996342 
+      20    1.4999998  -0.87378782            0    1.3756625    1.4996336 
+      30    1.4999934  -0.87377823            0    1.3756625    1.4996272 
+      40    1.4999745  -0.87374997            0    1.3756625    1.4996083 
+      50    1.4999347  -0.87369019            0    1.3756625    1.4995685 
+      60    1.4998637  -0.87358381            0    1.3756625    1.4994975 
+      70    1.4997506  -0.87341428            0    1.3756624    1.4993845 
+      80    1.4995842  -0.87316464            0    1.3756624     1.499218 
+      90    1.4993536  -0.87281897            0    1.3756624    1.4989876 
+     100    1.4990503  -0.87236411            0    1.3756624    1.4986843 
+Loop time of 2.73149 on 2 procs for 100 steps with 4096 atoms
+
+Performance: 15815.560 tau/day, 36.610 timesteps/s
+99.7% CPU use with 2 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00070763 | 0.00071537 | 0.00072312 |   0.0 |  0.03
+Kspace  | 2.7111     | 2.7112     | 2.7112     |   0.0 | 99.26
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0096555  | 0.0097489  | 0.0098424  |   0.1 |  0.36
+Output  | 0.00036025 | 0.0003823  | 0.00040436 |   0.0 |  0.01
+Modify  | 0.0063472  | 0.0064594  | 0.0065715  |   0.1 |  0.24
+Other   |            | 0.00299    |            |       |  0.11
+
+Nlocal:    2048 ave 2048 max 2048 min
+Histogram: 2 0 0 0 0 0 0 0 0 0
+Nghost:    3685 ave 3685 max 3685 min
+Histogram: 2 0 0 0 0 0 0 0 0 0
+Neighs:    32768 ave 32768 max 32768 min
+Histogram: 2 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:04
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.4 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.4
new file mode 100644
index 0000000000000000000000000000000000000000..54e010c8fdfb254a3738a2cf150ed2f8defe7a06
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.4
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  1 by 2 by 2 MPI processor grid
+  4096 atoms
+  Time spent = 0.000329494 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify    scafacos tolerance energy_rel
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.497 | 3.497 | 3.497 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87378819            0    1.3756625    1.4996338 
+      10    1.5000004  -0.87378882            0    1.3756625    1.4996342 
+      20    1.4999998  -0.87378782            0    1.3756625    1.4996336 
+      30    1.4999934  -0.87377823            0    1.3756625    1.4996272 
+      40    1.4999745  -0.87374997            0    1.3756625    1.4996083 
+      50    1.4999347  -0.87369019            0    1.3756625    1.4995685 
+      60    1.4998637  -0.87358381            0    1.3756625    1.4994975 
+      70    1.4997506  -0.87341428            0    1.3756624    1.4993845 
+      80    1.4995842  -0.87316464            0    1.3756624     1.499218 
+      90    1.4993536  -0.87281897            0    1.3756624    1.4989876 
+     100    1.4990503  -0.87236411            0    1.3756624    1.4986843 
+Loop time of 1.53742 on 4 procs for 100 steps with 4096 atoms
+
+Performance: 28099.005 tau/day, 65.044 timesteps/s
+99.7% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00044894 | 0.00045562 | 0.0004611  |   0.0 |  0.03
+Kspace  | 1.5223     | 1.5225     | 1.5225     |   0.0 | 99.03
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0085156  | 0.0086777  | 0.0088782  |   0.1 |  0.56
+Output  | 0.00034738 | 0.0003686  | 0.0004015  |   0.0 |  0.02
+Modify  | 0.0032606  | 0.0033693  | 0.0034289  |   0.1 |  0.22
+Other   |            | 0.002084   |            |       |  0.14
+
+Nlocal:    1024 ave 1024 max 1024 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:    2525 ave 2525 max 2525 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Neighs:    16384 ave 16384 max 16384 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:03
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.8 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.8
new file mode 100644
index 0000000000000000000000000000000000000000..d5d38d680455f08d1735281f996e6a04c368fb20
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.fmm.8
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  2 by 2 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  2 by 2 by 2 MPI processor grid
+  4096 atoms
+  Time spent = 0.000284672 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify    scafacos tolerance energy_rel
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.416 | 3.416 | 3.416 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87378819            0    1.3756625    1.4996338 
+      10    1.5000004  -0.87378882            0    1.3756625    1.4996342 
+      20    1.4999998  -0.87378782            0    1.3756625    1.4996336 
+      30    1.4999934  -0.87377823            0    1.3756625    1.4996272 
+      40    1.4999745  -0.87374997            0    1.3756625    1.4996083 
+      50    1.4999347  -0.87369019            0    1.3756625    1.4995685 
+      60    1.4998637  -0.87358381            0    1.3756625    1.4994975 
+      70    1.4997506  -0.87341428            0    1.3756624    1.4993845 
+      80    1.4995842  -0.87316464            0    1.3756624     1.499218 
+      90    1.4993536  -0.87281897            0    1.3756624    1.4989876 
+     100    1.4990503  -0.87236411            0    1.3756624    1.4986843 
+Loop time of 0.902102 on 8 procs for 100 steps with 4096 atoms
+
+Performance: 47888.152 tau/day, 110.852 timesteps/s
+99.7% CPU use with 8 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00030732 | 0.00031349 | 0.00032663 |   0.0 |  0.03
+Kspace  | 0.89112    | 0.89138    | 0.8916     |   0.0 | 98.81
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0064399  | 0.0066807  | 0.0070164  |   0.2 |  0.74
+Output  | 0.00034571 | 0.00036666 | 0.00041723 |   0.0 |  0.04
+Modify  | 0.0018673  | 0.0019273  | 0.0020192  |   0.1 |  0.21
+Other   |            | 0.001431   |            |       |  0.16
+
+Nlocal:    512 ave 512 max 512 min
+Histogram: 8 0 0 0 0 0 0 0 0 0
+Nghost:    1685 ave 1685 max 1685 min
+Histogram: 8 0 0 0 0 0 0 0 0 0
+Neighs:    8192 ave 8192 max 8192 min
+Histogram: 8 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:02
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.1 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.1
new file mode 100644
index 0000000000000000000000000000000000000000..d8b782e41dcaa8e7753ac1936d0fc8f534cdbb43
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.1
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  1 by 1 by 1 MPI processor grid
+  4096 atoms
+  Time spent = 0.000631332 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.723 | 3.723 | 3.723 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87394226            0    1.3755084    1.4996338 
+      10    1.5000144  -0.87365638            0     1.375816    1.4996482 
+      20    1.5000284  -0.87366526            0    1.3758281    1.4996622 
+      30    1.5000364  -0.87366516            0    1.3758401    1.4996702 
+      40    1.5000313   -0.8736496            0     1.375848    1.4996651 
+      50    1.5000039  -0.87360069            0    1.3758558    1.4996377 
+      60    1.4999432  -0.87350118            0    1.3758644     1.499577 
+      70     1.499838  -0.87333517            0    1.3758726    1.4994719 
+      80    1.4996772  -0.87308644            0    1.3758801    1.4993111 
+      90    1.4994505  -0.87274307            0    1.3758836    1.4990844 
+     100    1.4991498   -0.8722911            0    1.3758846    1.4987838 
+Loop time of 9.59682 on 1 procs for 100 steps with 4096 atoms
+
+Performance: 4501.489 tau/day, 10.420 timesteps/s
+100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.0011375  | 0.0011375  | 0.0011375  |   0.0 |  0.01
+Kspace  | 9.5688     | 9.5688     | 9.5688     |   0.0 | 99.71
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.009017   | 0.009017   | 0.009017   |   0.0 |  0.09
+Output  | 0.0004344  | 0.0004344  | 0.0004344  |   0.0 |  0.00
+Modify  | 0.012987   | 0.012987   | 0.012987   |   0.0 |  0.14
+Other   |            | 0.004481   |            |       |  0.05
+
+Nlocal:    4096 ave 4096 max 4096 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    5165 ave 5165 max 5165 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    65536 ave 65536 max 65536 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:10
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.16 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.16
new file mode 100644
index 0000000000000000000000000000000000000000..43bcfb6770e2b5b9671fe9043a884eb02f46fab4
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.16
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  2 by 2 by 4 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  2 by 2 by 4 MPI processor grid
+  4096 atoms
+  Time spent = 0.000500917 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.401 | 3.401 | 3.401 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87394226            0    1.3755084    1.4996338 
+      10    1.5000144  -0.87365638            0     1.375816    1.4996482 
+      20    1.5000284  -0.87366526            0    1.3758281    1.4996622 
+      30    1.5000364  -0.87366516            0    1.3758401    1.4996702 
+      40    1.5000313   -0.8736496            0     1.375848    1.4996651 
+      50    1.5000039  -0.87360069            0    1.3758558    1.4996377 
+      60    1.4999432  -0.87350118            0    1.3758644     1.499577 
+      70     1.499838  -0.87333517            0    1.3758726    1.4994719 
+      80    1.4996772  -0.87308644            0    1.3758801    1.4993111 
+      90    1.4994505  -0.87274307            0    1.3758836    1.4990844 
+     100    1.4991498   -0.8722911            0    1.3758846    1.4987838 
+Loop time of 1.20528 on 16 procs for 100 steps with 4096 atoms
+
+Performance: 35842.175 tau/day, 82.968 timesteps/s
+99.3% CPU use with 16 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00021839 | 0.00025332 | 0.00029278 |   0.0 |  0.02
+Kspace  | 1.1945     | 1.1948     | 1.195      |   0.0 | 99.13
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0066545  | 0.0069329  | 0.0072389  |   0.2 |  0.58
+Output  | 0.0003922  | 0.00042732 | 0.00054955 |   0.0 |  0.04
+Modify  | 0.0010166  | 0.0011965  | 0.0014412  |   0.3 |  0.10
+Other   |            | 0.001724   |            |       |  0.14
+
+Nlocal:    256 ave 256 max 256 min
+Histogram: 16 0 0 0 0 0 0 0 0 0
+Nghost:    1265 ave 1265 max 1265 min
+Histogram: 16 0 0 0 0 0 0 0 0 0
+Neighs:    4096 ave 4096 max 4096 min
+Histogram: 16 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.2 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.2
new file mode 100644
index 0000000000000000000000000000000000000000..9f4f44153b2afb42e57d7eb05fc78be9dcd50db0
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.2
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  1 by 1 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  1 by 1 by 2 MPI processor grid
+  4096 atoms
+  Time spent = 0.00044775 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.634 | 3.634 | 3.634 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87394226            0    1.3755084    1.4996338 
+      10    1.5000144  -0.87365638            0     1.375816    1.4996482 
+      20    1.5000284  -0.87366526            0    1.3758281    1.4996622 
+      30    1.5000364  -0.87366516            0    1.3758401    1.4996702 
+      40    1.5000313   -0.8736496            0     1.375848    1.4996651 
+      50    1.5000039  -0.87360069            0    1.3758558    1.4996377 
+      60    1.4999432  -0.87350118            0    1.3758644     1.499577 
+      70     1.499838  -0.87333517            0    1.3758726    1.4994719 
+      80    1.4996772  -0.87308644            0    1.3758801    1.4993111 
+      90    1.4994505  -0.87274307            0    1.3758836    1.4990844 
+     100    1.4991498   -0.8722911            0    1.3758846    1.4987838 
+Loop time of 5.14681 on 2 procs for 100 steps with 4096 atoms
+
+Performance: 8393.542 tau/day, 19.429 timesteps/s
+99.9% CPU use with 2 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00070882 | 0.00072873 | 0.00074863 |   0.0 |  0.01
+Kspace  | 5.1257     | 5.1258     | 5.1258     |   0.0 | 99.59
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.010188   | 0.010291   | 0.010394   |   0.1 |  0.20
+Output  | 0.00042391 | 0.00044322 | 0.00046253 |   0.0 |  0.01
+Modify  | 0.0063772  | 0.006551   | 0.0067248  |   0.2 |  0.13
+Other   |            | 0.003026   |            |       |  0.06
+
+Nlocal:    2048 ave 2048 max 2048 min
+Histogram: 2 0 0 0 0 0 0 0 0 0
+Nghost:    3685 ave 3685 max 3685 min
+Histogram: 2 0 0 0 0 0 0 0 0 0
+Neighs:    32768 ave 32768 max 32768 min
+Histogram: 2 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:05
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.4 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.4
new file mode 100644
index 0000000000000000000000000000000000000000..d6a78bb193a584343065d243ec60df57bfae6f89
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.4
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  1 by 2 by 2 MPI processor grid
+  4096 atoms
+  Time spent = 0.000317335 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.497 | 3.497 | 3.497 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87394226            0    1.3755084    1.4996338 
+      10    1.5000144  -0.87365638            0     1.375816    1.4996482 
+      20    1.5000284  -0.87366526            0    1.3758281    1.4996622 
+      30    1.5000364  -0.87366516            0    1.3758401    1.4996702 
+      40    1.5000313   -0.8736496            0     1.375848    1.4996651 
+      50    1.5000039  -0.87360069            0    1.3758558    1.4996377 
+      60    1.4999432  -0.87350118            0    1.3758644     1.499577 
+      70     1.499838  -0.87333517            0    1.3758726    1.4994719 
+      80    1.4996772  -0.87308644            0    1.3758801    1.4993111 
+      90    1.4994505  -0.87274307            0    1.3758836    1.4990844 
+     100    1.4991498   -0.8722911            0    1.3758846    1.4987838 
+Loop time of 2.94274 on 4 procs for 100 steps with 4096 atoms
+
+Performance: 14680.187 tau/day, 33.982 timesteps/s
+99.7% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00048852 | 0.00049287 | 0.00050163 |   0.0 |  0.02
+Kspace  | 2.9277     | 2.9279     | 2.928      |   0.0 | 99.49
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0081494  | 0.0083126  | 0.0084655  |   0.1 |  0.28
+Output  | 0.00034475 | 0.00040233 | 0.00043464 |   0.0 |  0.01
+Modify  | 0.0034239  | 0.0035145  | 0.0036252  |   0.1 |  0.12
+Other   |            | 0.00216    |            |       |  0.07
+
+Nlocal:    1024 ave 1024 max 1024 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:    2525 ave 2525 max 2525 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Neighs:    16384 ave 16384 max 16384 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:03
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.8 b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.8
new file mode 100644
index 0000000000000000000000000000000000000000..70716bbcbd9db2713af034deb38f2e9bbcca4cd8
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.g++.p2nfft.8
@@ -0,0 +1,102 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.NaCl
+  orthogonal box = (0 0 0) to (2 2 2)
+  2 by 2 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+replicate 8 8 8
+  orthogonal box = (0 0 0) to (16 16 16)
+  2 by 2 by 2 MPI processor grid
+  4096 atoms
+  Time spent = 0.000422239 secs
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify    scafacos tolerance field
+
+timestep	0.005
+thermo          10
+
+run		100
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 16 16 16
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.416 | 3.416 | 3.416 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.87394226            0    1.3755084    1.4996338 
+      10    1.5000144  -0.87365638            0     1.375816    1.4996482 
+      20    1.5000284  -0.87366526            0    1.3758281    1.4996622 
+      30    1.5000364  -0.87366516            0    1.3758401    1.4996702 
+      40    1.5000313   -0.8736496            0     1.375848    1.4996651 
+      50    1.5000039  -0.87360069            0    1.3758558    1.4996377 
+      60    1.4999432  -0.87350118            0    1.3758644     1.499577 
+      70     1.499838  -0.87333517            0    1.3758726    1.4994719 
+      80    1.4996772  -0.87308644            0    1.3758801    1.4993111 
+      90    1.4994505  -0.87274307            0    1.3758836    1.4990844 
+     100    1.4991498   -0.8722911            0    1.3758846    1.4987838 
+Loop time of 1.75933 on 8 procs for 100 steps with 4096 atoms
+
+Performance: 24554.819 tau/day, 56.840 timesteps/s
+99.4% CPU use with 8 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00030684 | 0.00031838 | 0.00032926 |   0.0 |  0.02
+Kspace  | 1.7474     | 1.7477     | 1.748      |   0.0 | 99.34
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0068667  | 0.0072413  | 0.0075011  |   0.2 |  0.41
+Output  | 0.00036955 | 0.00038695 | 0.00044942 |   0.0 |  0.02
+Modify  | 0.0018206  | 0.0019438  | 0.0020213  |   0.1 |  0.11
+Other   |            | 0.001739   |            |       |  0.10
+
+Nlocal:    512 ave 512 max 512 min
+Histogram: 8 0 0 0 0 0 0 0 0 0
+Nghost:    1685 ave 1685 max 1685 min
+Histogram: 8 0 0 0 0 0 0 0 0 0
+Neighs:    8192 ave 8192 max 8192 min
+Histogram: 8 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 65536
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:02
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.1 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.1
new file mode 100644
index 0000000000000000000000000000000000000000..37d12bb37d0cefbd253b38e28cf057b3d7941319
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.1
@@ -0,0 +1,105 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos direct 0.001
+
+timestep	0.005
+thermo          1
+run		20
+Setting up ScaFaCoS with solver direct ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 10.3 | 10.3 | 10.3 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417787            0    1.6235721   0.00141207 
+       1    18.780041   -10.770002            0    17.371889  0.017679155 
+       2    65.289192   -11.084705            0    86.751149   0.06146194 
+       3    121.92987   -7.0625759            0    175.64933   0.11478234 
+       4    185.78164   -5.8777512            0    272.51604   0.17489112 
+       5    286.36222    -4.382053            0    424.73173   0.26957567 
+       6    481.42206   -4.3095567            0     717.1014    0.4532011 
+       7    488.59167   -3.8685194            0     728.2861   0.45995044 
+       8    497.85287   -3.0417966            0    742.99073   0.46866874 
+       9    499.61615    -3.419003            0     745.2558   0.47032866 
+      10    502.63684   -2.8360961            0    750.36521   0.47317227 
+      11     504.4846   -2.7628105            0    753.20736   0.47491172 
+      12    506.54485   -2.8460356            0    756.21142   0.47685119 
+      13    508.27211    -2.730935            0    758.91482    0.4784772 
+      14    510.57045   -2.6094877            0    762.48033   0.48064081 
+      15    513.14798   -2.7150827            0    766.23717   0.48306726 
+      16    515.78124   -2.3961811            0    770.50201   0.48554615 
+      17    515.70265   -2.2982683            0    770.48215   0.48547216 
+      18     515.7081   -2.1515983            0    770.63699    0.4854773 
+      19    515.74906   -2.0581436            0    770.79182   0.48551586 
+      20    515.70883   -1.8922577            0    770.89742   0.48547798 
+Loop time of 0.52055 on 1 procs for 20 steps with 1000 atoms
+
+Performance: 16597.836 tau/day, 38.421 timesteps/s
+99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00027752 | 0.00027752 | 0.00027752 |   0.0 |  0.05
+Kspace  | 0.35879    | 0.35879    | 0.35879    |   0.0 | 68.93
+Neigh   | 0.15946    | 0.15946    | 0.15946    |   0.0 | 30.63
+Comm    | 0.00024056 | 0.00024056 | 0.00024056 |   0.0 |  0.05
+Output  | 0.00034761 | 0.00034761 | 0.00034761 |   0.0 |  0.07
+Modify  | 0.00071192 | 0.00071192 | 0.00071192 |   0.0 |  0.14
+Other   |            | 0.0007269  |            |       |  0.14
+
+Nlocal:    1000 ave 1000 max 1000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    0 ave 0 max 0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    247817 ave 247817 max 247817 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 247817
+Ave neighs/atom = 247.817
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.2 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.2
new file mode 100644
index 0000000000000000000000000000000000000000..7a22e62bce8ea2de434645634ce01c59dc4c273a
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.2
@@ -0,0 +1,105 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  1 by 1 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos direct 0.001
+
+timestep	0.005
+thermo          1
+run		20
+Setting up ScaFaCoS with solver direct ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.48 | 6.861 | 7.243 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417787            0    1.6235721   0.00141207 
+       1    18.780041   -10.770002            0    17.371889  0.017679155 
+       2    65.289192   -11.084705            0    86.751149   0.06146194 
+       3    121.92987   -7.0625759            0    175.64933   0.11478234 
+       4    185.78164   -5.8777512            0    272.51604   0.17489112 
+       5    286.36222    -4.382053            0    424.73173   0.26957567 
+       6    481.42206   -4.3095567            0     717.1014    0.4532011 
+       7    488.59167   -3.8685194            0     728.2861   0.45995044 
+       8    497.85287   -3.0417966            0    742.99073   0.46866874 
+       9    499.61615    -3.419003            0     745.2558   0.47032866 
+      10    502.63684   -2.8360961            0    750.36521   0.47317227 
+      11     504.4846   -2.7628105            0    753.20736   0.47491172 
+      12    506.54485   -2.8460356            0    756.21142   0.47685119 
+      13    508.27211    -2.730935            0    758.91482    0.4784772 
+      14    510.57045   -2.6094877            0    762.48033   0.48064081 
+      15    513.14798   -2.7150827            0    766.23717   0.48306726 
+      16    515.78124   -2.3961811            0    770.50201   0.48554615 
+      17    515.70265   -2.2982683            0    770.48215   0.48547216 
+      18     515.7081   -2.1515983            0    770.63699    0.4854773 
+      19    515.74906   -2.0581436            0    770.79182   0.48551586 
+      20    515.70883   -1.8922577            0    770.89742   0.48547798 
+Loop time of 0.330519 on 2 procs for 20 steps with 1000 atoms
+
+Performance: 26140.700 tau/day, 60.511 timesteps/s
+99.6% CPU use with 2 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.0002749  | 0.00027692 | 0.00027895 |   0.0 |  0.08
+Kspace  | 0.21565    | 0.24173    | 0.26782    |   5.3 | 73.14
+Neigh   | 0.058644   | 0.084906   | 0.11117    |   9.0 | 25.69
+Comm    | 0.002033   | 0.0022832  | 0.0025334  |   0.5 |  0.69
+Output  | 0.00035667 | 0.0004549  | 0.00055313 |   0.0 |  0.14
+Modify  | 0.0004704  | 0.00050521 | 0.00054002 |   0.0 |  0.15
+Other   |            | 0.0003613  |            |       |  0.11
+
+Nlocal:    500 ave 516 max 484 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Nghost:    456.5 ave 475 max 438 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Neighs:    123908 ave 172139 max 75678 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+
+Total # of neighbors = 247817
+Ave neighs/atom = 247.817
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.4 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.4
new file mode 100644
index 0000000000000000000000000000000000000000..51d0f8c47e944aeac18d81e3f95f3b3393adacad
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.4
@@ -0,0 +1,105 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos direct 0.001
+
+timestep	0.005
+thermo          1
+run		20
+Setting up ScaFaCoS with solver direct ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.837 | 5.123 | 5.6 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417787            0    1.6235721   0.00141207 
+       1    18.780041   -10.770002            0    17.371889  0.017679155 
+       2    65.289192   -11.084705            0    86.751149   0.06146194 
+       3    121.92987   -7.0625759            0    175.64933   0.11478234 
+       4    185.78164   -5.8777512            0    272.51604   0.17489112 
+       5    286.36222    -4.382053            0    424.73173   0.26957567 
+       6    481.42206   -4.3095567            0     717.1014    0.4532011 
+       7    488.59167   -3.8685194            0     728.2861   0.45995044 
+       8    497.85287   -3.0417966            0    742.99073   0.46866874 
+       9    499.61615    -3.419003            0     745.2558   0.47032866 
+      10    502.63684   -2.8360961            0    750.36521   0.47317227 
+      11     504.4846   -2.7628105            0    753.20736   0.47491172 
+      12    506.54485   -2.8460356            0    756.21142   0.47685119 
+      13    508.27211    -2.730935            0    758.91482    0.4784772 
+      14    510.57045   -2.6094877            0    762.48033   0.48064081 
+      15    513.14798   -2.7150827            0    766.23717   0.48306726 
+      16    515.78124   -2.3961811            0    770.50201   0.48554615 
+      17    515.70265   -2.2982683            0    770.48215   0.48547216 
+      18     515.7081   -2.1515983            0    770.63699    0.4854773 
+      19    515.74906   -2.0581436            0    770.79182   0.48551586 
+      20    515.70883   -1.8922577            0    770.89742   0.48547798 
+Loop time of 0.197141 on 4 procs for 20 steps with 1000 atoms
+
+Performance: 43826.435 tau/day, 101.450 timesteps/s
+99.6% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.0002718  | 0.00027621 | 0.00028539 |   0.0 |  0.14
+Kspace  | 0.12861    | 0.14654    | 0.16542    |   3.7 | 74.33
+Neigh   | 0.027906   | 0.046895   | 0.064943   |   6.6 | 23.79
+Comm    | 0.0022087  | 0.0024236  | 0.0026295  |   0.4 |  1.23
+Output  | 0.0003252  | 0.00044608 | 0.0006001  |   0.0 |  0.23
+Modify  | 0.00016618 | 0.0001756  | 0.00018811 |   0.0 |  0.09
+Other   |            | 0.0003839  |            |       |  0.19
+
+Nlocal:    250 ave 259 max 238 min
+Histogram: 1 0 0 1 0 0 0 0 0 2
+Nghost:    672.25 ave 683 max 663 min
+Histogram: 2 0 0 0 0 0 0 0 1 1
+Neighs:    61954.2 ave 97157 max 25016 min
+Histogram: 1 0 0 1 0 0 1 0 0 1
+
+Total # of neighbors = 247817
+Ave neighs/atom = 247.817
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.8 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.8
new file mode 100644
index 0000000000000000000000000000000000000000..90119a101bc36af8367249b449094a6e03ff4e6f
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.direct.8
@@ -0,0 +1,105 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  2 by 2 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos direct 0.001
+
+timestep	0.005
+thermo          1
+run		20
+Setting up ScaFaCoS with solver direct ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.164 | 4.26 | 4.546 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417787            0    1.6235721   0.00141207 
+       1    18.780041   -10.770002            0    17.371889  0.017679155 
+       2    65.289192   -11.084705            0    86.751149   0.06146194 
+       3    121.92987   -7.0625759            0    175.64933   0.11478234 
+       4    185.78164   -5.8777512            0    272.51604   0.17489112 
+       5    286.36222    -4.382053            0    424.73173   0.26957567 
+       6    481.42206   -4.3095567            0     717.1014    0.4532011 
+       7    488.59167   -3.8685194            0     728.2861   0.45995044 
+       8    497.85287   -3.0417966            0    742.99073   0.46866874 
+       9    499.61615    -3.419003            0     745.2558   0.47032866 
+      10    502.63684   -2.8360961            0    750.36521   0.47317227 
+      11     504.4846   -2.7628105            0    753.20736   0.47491172 
+      12    506.54485   -2.8460356            0    756.21142   0.47685119 
+      13    508.27211    -2.730935            0    758.91482    0.4784772 
+      14    510.57045   -2.6094877            0    762.48033   0.48064081 
+      15    513.14798   -2.7150827            0    766.23717   0.48306726 
+      16    515.78124   -2.3961811            0    770.50201   0.48554615 
+      17    515.70265   -2.2982683            0    770.48215   0.48547216 
+      18     515.7081   -2.1515983            0    770.63699    0.4854773 
+      19    515.74906   -2.0581436            0    770.79182   0.48551586 
+      20    515.70883   -1.8922577            0    770.89742   0.48547798 
+Loop time of 0.107014 on 8 procs for 20 steps with 1000 atoms
+
+Performance: 80736.875 tau/day, 186.891 timesteps/s
+98.8% CPU use with 8 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00027704 | 0.00029281 | 0.00035691 |   0.0 |  0.27
+Kspace  | 0.069458   | 0.07877    | 0.089146   |   2.5 | 73.61
+Neigh   | 0.013525   | 0.024156   | 0.033524   |   4.7 | 22.57
+Comm    | 0.002552   | 0.0029828  | 0.0033691  |   0.5 |  2.79
+Output  | 0.00037909 | 0.00051123 | 0.00070429 |   0.0 |  0.48
+Modify  | 8.9645e-05 | 9.5487e-05 | 0.000103   |   0.0 |  0.09
+Other   |            | 0.0002059  |            |       |  0.19
+
+Nlocal:    125 ave 133 max 113 min
+Histogram: 2 0 0 0 0 1 1 0 2 2
+Nghost:    773.625 ave 788 max 764 min
+Histogram: 1 1 2 1 1 0 0 0 1 1
+Neighs:    30977.1 ave 50690 max 10447 min
+Histogram: 1 1 1 0 1 1 0 0 2 1
+
+Total # of neighbors = 247817
+Ave neighs/atom = 247.817
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.1 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.1
new file mode 100644
index 0000000000000000000000000000000000000000..432ea72b67526be5a3bec6777dbbd51ff254f00c
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.1
@@ -0,0 +1,109 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...3
+
+timestep	0.005
+thermo          1
+
+run		20
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 10.3 | 10.3 | 10.3 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417141            0    1.6235786   0.00141207 
+       1    18.780412   -10.770009            0    17.372438  0.017679504 
+       2    65.294131   -11.084501            0    86.758754   0.06146659 
+       3    121.92555   -7.0612033            0    175.64423   0.11477827 
+       4    185.71165   -5.8781334            0    272.41077   0.17482523 
+       5    286.28339   -4.3800108            0    424.61565   0.26950146 
+       6    481.28097   -4.3052012            0    716.89433   0.45306828 
+       7    487.26022   -3.8672741            0    726.29216   0.45869703 
+       8    493.65478   -3.0242687            0    736.71742   0.46471675 
+       9    495.66203   -3.4336343            0    739.31592   0.46660633 
+      10    498.41831   -2.8837072            0    743.99613   0.46920104 
+      11    499.20944   -2.7724783            0    745.29287   0.46994579 
+      12    500.97345   -2.8281484            0    747.88057    0.4716064 
+      13    507.46412   -2.7752775            0    757.65971   0.47771658 
+      14    525.35729   -2.5749814            0    784.67292   0.49456085 
+      15     563.9578   -2.9982381            0    842.09253    0.5308986 
+      16    645.47602   -2.5519203            0    964.69389   0.60763822 
+      17    647.09276   -2.2568468            0    967.41166   0.60916019 
+      18    647.12596   -2.2791003            0    967.43915   0.60919144 
+      19    647.24862   -2.2495226            0    967.65253   0.60930691 
+      20    647.51175   -2.0239179            0    968.27244   0.60955462 
+Loop time of 0.644143 on 1 procs for 20 steps with 1000 atoms
+
+Performance: 13413.173 tau/day, 31.049 timesteps/s
+99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00027657 | 0.00027657 | 0.00027657 |   0.0 |  0.04
+Kspace  | 0.4788     | 0.4788     | 0.4788     |   0.0 | 74.33
+Neigh   | 0.16279    | 0.16279    | 0.16279    |   0.0 | 25.27
+Comm    | 0.00021696 | 0.00021696 | 0.00021696 |   0.0 |  0.03
+Output  | 0.00054836 | 0.00054836 | 0.00054836 |   0.0 |  0.09
+Modify  | 0.00075102 | 0.00075102 | 0.00075102 |   0.0 |  0.12
+Other   |            | 0.000757   |            |       |  0.12
+
+Nlocal:    1000 ave 1000 max 1000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    0 ave 0 max 0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    244342 ave 244342 max 244342 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 244342
+Ave neighs/atom = 244.342
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.2 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.2
new file mode 100644
index 0000000000000000000000000000000000000000..4d4b1100e3e6a5a0829013cebdc670cf61f0b2e2
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.2
@@ -0,0 +1,109 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  1 by 1 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...3
+
+timestep	0.005
+thermo          1
+
+run		20
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.48 | 6.861 | 7.243 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417141            0    1.6235786   0.00141207 
+       1    18.780412   -10.770009            0    17.372438  0.017679504 
+       2    65.294131   -11.084501            0    86.758754   0.06146659 
+       3    121.92555   -7.0612033            0    175.64423   0.11477827 
+       4    185.71165   -5.8781334            0    272.41077   0.17482523 
+       5    286.28339   -4.3800108            0    424.61565   0.26950146 
+       6    481.28097   -4.3052012            0    716.89433   0.45306828 
+       7    487.26022   -3.8672741            0    726.29216   0.45869703 
+       8    493.65478   -3.0242687            0    736.71742   0.46471675 
+       9    495.66203   -3.4336343            0    739.31592   0.46660633 
+      10    498.41831   -2.8837072            0    743.99613   0.46920104 
+      11    499.20944   -2.7724783            0    745.29287   0.46994579 
+      12    500.97345   -2.8281484            0    747.88057    0.4716064 
+      13    507.46412   -2.7752775            0    757.65971   0.47771658 
+      14    525.35729   -2.5749814            0    784.67292   0.49456085 
+      15     563.9578   -2.9982381            0    842.09253    0.5308986 
+      16    645.47602   -2.5519203            0    964.69389   0.60763822 
+      17    647.09276   -2.2568468            0    967.41166   0.60916019 
+      18    647.12596   -2.2791003            0    967.43915   0.60919144 
+      19    647.24862   -2.2495226            0    967.65253   0.60930691 
+      20    647.51175   -2.0239179            0    968.27244   0.60955462 
+Loop time of 0.563049 on 2 procs for 20 steps with 1000 atoms
+
+Performance: 15345.024 tau/day, 35.521 timesteps/s
+99.4% CPU use with 2 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00024986 | 0.00026596 | 0.00028205 |   0.0 |  0.05
+Kspace  | 0.45103    | 0.47692    | 0.50281    |   3.7 | 84.70
+Neigh   | 0.055771   | 0.081813   | 0.10785    |   9.1 | 14.53
+Comm    | 0.0022206  | 0.0024507  | 0.0026808  |   0.5 |  0.44
+Output  | 0.00050044 | 0.00061882 | 0.00073719 |   0.0 |  0.11
+Modify  | 0.00029945 | 0.00044227 | 0.00058508 |   0.0 |  0.08
+Other   |            | 0.0005381  |            |       |  0.10
+
+Nlocal:    500 ave 509 max 491 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Nghost:    455.5 ave 467 max 444 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Neighs:    122171 ave 171834 max 72508 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+
+Total # of neighbors = 244342
+Ave neighs/atom = 244.342
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.4 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.4
new file mode 100644
index 0000000000000000000000000000000000000000..9634a0174bbaf30f6b4fbc3583460106eb9ceaed
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.4
@@ -0,0 +1,109 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...3
+
+timestep	0.005
+thermo          1
+
+run		20
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.837 | 5.123 | 5.6 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417141            0    1.6235786   0.00141207 
+       1    18.780412   -10.770009            0    17.372438  0.017679504 
+       2    65.294131   -11.084501            0    86.758754   0.06146659 
+       3    121.92555   -7.0612033            0    175.64423   0.11477827 
+       4    185.71165   -5.8781334            0    272.41077   0.17482523 
+       5    286.28339   -4.3800108            0    424.61565   0.26950146 
+       6    481.28097   -4.3052012            0    716.89433   0.45306828 
+       7    487.26022   -3.8672741            0    726.29216   0.45869703 
+       8    493.65478   -3.0242687            0    736.71742   0.46471675 
+       9    495.66203   -3.4336343            0    739.31592   0.46660633 
+      10    498.41831   -2.8837072            0    743.99613   0.46920104 
+      11    499.20944   -2.7724783            0    745.29287   0.46994579 
+      12    500.97345   -2.8281484            0    747.88057    0.4716064 
+      13    507.46412   -2.7752775            0    757.65971   0.47771658 
+      14    525.35729   -2.5749814            0    784.67292   0.49456085 
+      15     563.9578   -2.9982381            0    842.09253    0.5308986 
+      16    645.47602   -2.5519203            0    964.69389   0.60763822 
+      17    647.09276   -2.2568468            0    967.41166   0.60916019 
+      18    647.12596   -2.2791003            0    967.43915   0.60919144 
+      19    647.24862   -2.2495226            0    967.65253   0.60930691 
+      20    647.51175   -2.0239179            0    968.27244   0.60955462 
+Loop time of 0.508149 on 4 procs for 20 steps with 1000 atoms
+
+Performance: 17002.884 tau/day, 39.359 timesteps/s
+99.4% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00024509 | 0.00026453 | 0.00028753 |   0.0 |  0.05
+Kspace  | 0.43933    | 0.45836    | 0.47734    |   2.2 | 90.20
+Neigh   | 0.025956   | 0.045068   | 0.064242   |   7.2 |  8.87
+Comm    | 0.002799   | 0.0030612  | 0.0033174  |   0.4 |  0.60
+Output  | 0.00047231 | 0.00058293 | 0.00076509 |   0.0 |  0.11
+Modify  | 0.00016856 | 0.0002442  | 0.00042009 |   0.0 |  0.05
+Other   |            | 0.0005655  |            |       |  0.11
+
+Nlocal:    250 ave 259 max 240 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Nghost:    668.75 ave 679 max 657 min
+Histogram: 1 0 0 0 0 1 1 0 0 1
+Neighs:    61085.5 ave 95363 max 24964 min
+Histogram: 1 0 0 1 0 0 0 1 0 1
+
+Total # of neighbors = 244342
+Ave neighs/atom = 244.342
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.8 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.8
new file mode 100644
index 0000000000000000000000000000000000000000..a6a020b031f47c11df37a1f2330cc893c37a5eb5
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.fmm.8
@@ -0,0 +1,109 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  2 by 2 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos fmm 0.001
+kspace_modify scafacos tolerance energy_rel
+kspace_modify scafacos fmm_tuning 1
+ScaFaCoS setting fmm inhomogen tuning ...3
+
+timestep	0.005
+thermo          1
+
+run		20
+Setting up ScaFaCoS with solver fmm ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.164 | 4.26 | 4.546 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417141            0    1.6235786   0.00141207 
+       1    18.780412   -10.770009            0    17.372438  0.017679504 
+       2    65.294131   -11.084501            0    86.758754   0.06146659 
+       3    121.92555   -7.0612033            0    175.64423   0.11477827 
+       4    185.71165   -5.8781334            0    272.41077   0.17482523 
+       5    286.28339   -4.3800108            0    424.61565   0.26950146 
+       6    481.28097   -4.3052012            0    716.89433   0.45306828 
+       7    487.26022   -3.8672741            0    726.29216   0.45869703 
+       8    493.65478   -3.0242687            0    736.71742   0.46471675 
+       9    495.66203   -3.4336343            0    739.31592   0.46660633 
+      10    498.41831   -2.8837072            0    743.99613   0.46920104 
+      11    499.20944   -2.7724783            0    745.29287   0.46994579 
+      12    500.97345   -2.8281484            0    747.88057    0.4716064 
+      13    507.46412   -2.7752775            0    757.65971   0.47771658 
+      14    525.35729   -2.5749814            0    784.67292   0.49456085 
+      15     563.9578   -2.9982381            0    842.09253    0.5308986 
+      16    645.47602   -2.5519203            0    964.69389   0.60763822 
+      17    647.09276   -2.2568468            0    967.41166   0.60916019 
+      18    647.12596   -2.2791003            0    967.43915   0.60919144 
+      19    647.24862   -2.2495226            0    967.65253   0.60930691 
+      20    647.51175   -2.0239179            0    968.27244   0.60955462 
+Loop time of 0.494261 on 8 procs for 20 steps with 1000 atoms
+
+Performance: 17480.630 tau/day, 40.464 timesteps/s
+99.3% CPU use with 8 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00027752 | 0.00030088 | 0.0003736  |   0.0 |  0.06
+Kspace  | 0.45443    | 0.46295    | 0.47316    |   1.0 | 93.67
+Neigh   | 0.014408   | 0.02515    | 0.034048   |   4.6 |  5.09
+Comm    | 0.0036254  | 0.0041741  | 0.0048032  |   0.6 |  0.84
+Output  | 0.00072742 | 0.0009357  | 0.0012984  |   0.0 |  0.19
+Modify  | 0.00011778 | 0.00015756 | 0.00019836 |   0.0 |  0.03
+Other   |            | 0.000589   |            |       |  0.12
+
+Nlocal:    125 ave 137 max 111 min
+Histogram: 1 1 0 0 0 2 2 1 0 1
+Nghost:    768.875 ave 788 max 761 min
+Histogram: 4 0 2 0 0 0 1 0 0 1
+Neighs:    30542.8 ave 48077 max 10011 min
+Histogram: 1 1 1 0 1 1 0 0 0 3
+
+Total # of neighbors = 244342
+Ave neighs/atom = 244.342
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.1 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.1
new file mode 100644
index 0000000000000000000000000000000000000000..f9192cdfaff1ff364d21ff0af49f8fc3cdd9c0d2
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.1
@@ -0,0 +1,107 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify scafacos tolerance potential
+
+timestep	0.005
+thermo          1
+
+run		20
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 10.3 | 10.3 | 10.3 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417787            0    1.6235721   0.00141207 
+       1    18.780041   -10.770002            0    17.371889  0.017679155 
+       2    65.289192   -11.084705            0    86.751149   0.06146194 
+       3    121.92987   -7.0625759            0    175.64933   0.11478234 
+       4    185.78164   -5.8777511            0    272.51603   0.17489112 
+       5    286.36221   -4.3820531            0    424.73172   0.26957566 
+       6    481.42203   -4.3095567            0    717.10136   0.45320108 
+       7    488.59165   -3.8685193            0    728.28607   0.45995042 
+       8    497.85288   -3.0417938            0    742.99075   0.46866875 
+       9    499.61619   -3.4190063            0    745.25585   0.47032869 
+      10    502.63691   -2.8360951            0    750.36531   0.47317234 
+      11     504.4847   -2.7628089            0    753.20751   0.47491181 
+      12    506.54494   -2.8460319            0    756.21157   0.47685128 
+      13     508.2722   -2.7309328            0    758.91497   0.47847729 
+      14    510.57053   -2.6094792            0    762.48045   0.48064089 
+      15    513.14804   -2.7150819            0    766.23726   0.48306731 
+      16    515.78127   -2.3961749            0    770.50206   0.48554618 
+      17    515.70267   -2.2982581            0    770.48219   0.48547219 
+      18    515.70813   -2.1516075            0    770.63702   0.48547732 
+      19    515.74908   -2.0581483            0    770.79185   0.48551588 
+      20    515.70881    -1.892235            0    770.89742   0.48547797 
+Loop time of 1.34132 on 1 procs for 20 steps with 1000 atoms
+
+Performance: 6441.409 tau/day, 14.911 timesteps/s
+100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00028849 | 0.00028849 | 0.00028849 |   0.0 |  0.02
+Kspace  | 1.1744     | 1.1744     | 1.1744     |   0.0 | 87.56
+Neigh   | 0.16435    | 0.16435    | 0.16435    |   0.0 | 12.25
+Comm    | 0.00021219 | 0.00021219 | 0.00021219 |   0.0 |  0.02
+Output  | 0.00051928 | 0.00051928 | 0.00051928 |   0.0 |  0.04
+Modify  | 0.00079513 | 0.00079513 | 0.00079513 |   0.0 |  0.06
+Other   |            | 0.0007439  |            |       |  0.06
+
+Nlocal:    1000 ave 1000 max 1000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    0 ave 0 max 0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    247817 ave 247817 max 247817 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 247817
+Ave neighs/atom = 247.817
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.2 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.2
new file mode 100644
index 0000000000000000000000000000000000000000..4db3e28dbf06d397c59ad8ad6bb5d280dc960343
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.2
@@ -0,0 +1,107 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  1 by 1 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify scafacos tolerance potential
+
+timestep	0.005
+thermo          1
+
+run		20
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.48 | 6.861 | 7.243 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417787            0    1.6235721   0.00141207 
+       1    18.780041   -10.770002            0    17.371889  0.017679155 
+       2    65.289192   -11.084705            0    86.751149   0.06146194 
+       3    121.92987   -7.0625759            0    175.64933   0.11478234 
+       4    185.78164   -5.8777511            0    272.51603   0.17489112 
+       5    286.36221   -4.3820531            0    424.73172   0.26957566 
+       6    481.42203   -4.3095567            0    717.10136   0.45320108 
+       7    488.59165   -3.8685193            0    728.28607   0.45995042 
+       8    497.85288   -3.0417938            0    742.99075   0.46866875 
+       9    499.61619   -3.4190063            0    745.25585   0.47032869 
+      10    502.63691   -2.8360951            0    750.36531   0.47317234 
+      11     504.4847   -2.7628089            0    753.20751   0.47491181 
+      12    506.54494   -2.8460319            0    756.21157   0.47685128 
+      13     508.2722   -2.7309328            0    758.91497   0.47847729 
+      14    510.57053   -2.6094792            0    762.48045   0.48064089 
+      15    513.14804   -2.7150819            0    766.23726   0.48306731 
+      16    515.78127   -2.3961749            0    770.50206   0.48554618 
+      17    515.70267   -2.2982581            0    770.48219   0.48547219 
+      18    515.70813   -2.1516075            0    770.63702   0.48547732 
+      19    515.74908   -2.0581483            0    770.79185   0.48551588 
+      20    515.70881    -1.892235            0    770.89742   0.48547797 
+Loop time of 0.858675 on 2 procs for 20 steps with 1000 atoms
+
+Performance: 10062.016 tau/day, 23.292 timesteps/s
+100.0% CPU use with 2 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00026321 | 0.00026667 | 0.00027013 |   0.0 |  0.03
+Kspace  | 0.74531    | 0.77182    | 0.79833    |   3.0 | 89.89
+Neigh   | 0.055915   | 0.082578   | 0.10924    |   9.3 |  9.62
+Comm    | 0.0021703  | 0.002401   | 0.0026317  |   0.5 |  0.28
+Output  | 0.00048423 | 0.00061262 | 0.000741   |   0.0 |  0.07
+Modify  | 0.00034285 | 0.0004636  | 0.00058436 |   0.0 |  0.05
+Other   |            | 0.0005329  |            |       |  0.06
+
+Nlocal:    500 ave 516 max 484 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Nghost:    456.5 ave 475 max 438 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+Neighs:    123908 ave 172139 max 75678 min
+Histogram: 1 0 0 0 0 0 0 0 0 1
+
+Total # of neighbors = 247817
+Ave neighs/atom = 247.817
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:01
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.4 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.4
new file mode 100644
index 0000000000000000000000000000000000000000..3a2d6531069d93ed3ad48b30afed6d346536bb4b
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.4
@@ -0,0 +1,107 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify scafacos tolerance potential
+
+timestep	0.005
+thermo          1
+
+run		20
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.837 | 5.123 | 5.6 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417787            0    1.6235721   0.00141207 
+       1    18.780041   -10.770002            0    17.371889  0.017679155 
+       2    65.289192   -11.084705            0    86.751149   0.06146194 
+       3    121.92987   -7.0625759            0    175.64933   0.11478234 
+       4    185.78164   -5.8777511            0    272.51603   0.17489112 
+       5    286.36221   -4.3820531            0    424.73172   0.26957566 
+       6    481.42203   -4.3095567            0    717.10136   0.45320108 
+       7    488.59165   -3.8685193            0    728.28607   0.45995042 
+       8    497.85288   -3.0417938            0    742.99075   0.46866875 
+       9    499.61619   -3.4190063            0    745.25585   0.47032869 
+      10    502.63691   -2.8360951            0    750.36531   0.47317234 
+      11     504.4847   -2.7628089            0    753.20751   0.47491181 
+      12    506.54494   -2.8460319            0    756.21157   0.47685128 
+      13     508.2722   -2.7309328            0    758.91497   0.47847729 
+      14    510.57053   -2.6094792            0    762.48045   0.48064089 
+      15    513.14804   -2.7150819            0    766.23726   0.48306731 
+      16    515.78127   -2.3961749            0    770.50206   0.48554618 
+      17    515.70267   -2.2982581            0    770.48219   0.48547219 
+      18    515.70813   -2.1516075            0    770.63702   0.48547732 
+      19    515.74908   -2.0581483            0    770.79185   0.48551588 
+      20    515.70881    -1.892235            0    770.89742   0.48547797 
+Loop time of 0.540235 on 4 procs for 20 steps with 1000 atoms
+
+Performance: 15993.051 tau/day, 37.021 timesteps/s
+99.3% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00025725 | 0.00028253 | 0.00031233 |   0.0 |  0.05
+Kspace  | 0.47152    | 0.48989    | 0.50709    |   1.9 | 90.68
+Neigh   | 0.028196   | 0.045694   | 0.064338   |   6.3 |  8.46
+Comm    | 0.0026579  | 0.003028   | 0.0033965  |   0.5 |  0.56
+Output  | 0.00049663 | 0.00064254 | 0.00084853 |   0.0 |  0.12
+Modify  | 0.00019264 | 0.0002293  | 0.00030923 |   0.0 |  0.04
+Other   |            | 0.0004709  |            |       |  0.09
+
+Nlocal:    250 ave 259 max 238 min
+Histogram: 1 0 0 1 0 0 0 0 0 2
+Nghost:    672.25 ave 683 max 663 min
+Histogram: 2 0 0 0 0 0 0 0 1 1
+Neighs:    61954.2 ave 97157 max 25016 min
+Histogram: 1 0 0 1 0 0 1 0 0 1
+
+Total # of neighbors = 247817
+Ave neighs/atom = 247.817
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:00
diff --git a/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.8 b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.8
new file mode 100644
index 0000000000000000000000000000000000000000..08e68531a9ebb9a8c02c1726aa5e87e2a897b130
--- /dev/null
+++ b/examples/USER/scafacos/log.23Jul18.scafacos.hsph.g++.p2nfft.8
@@ -0,0 +1,107 @@
+LAMMPS (29 Jun 2018)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:87)
+  using 1 OpenMP thread(s) per MPI task
+# Point dipoles in a 2d box
+
+units		lj
+atom_style  charge
+
+read_data data.hammersley_shphere
+  orthogonal box = (-50.5 -50.5 -50.5) to (51.5 51.5 51.5)
+  2 by 2 by 2 MPI processor grid
+  reading atoms ...
+  1000 atoms
+change_box all boundary f f f
+
+velocity	all create 1.5 49893
+
+neighbor	1.0 bin
+neigh_modify	delay 0
+
+fix             1 all nve
+
+# LAMMPS computes pairwise and long-range Coulombics
+
+#pair_style      coul/long 3.0
+#pair_coeff      * *
+#kspace_style    pppm 1.0e-3
+
+# Scafacos computes entire long-range Coulombics
+# use dummy pair style to perform atom sorting
+
+pair_style	zero 1.0
+pair_coeff	* *
+
+#fix		2 all scafacos p3m tolerance field 0.001
+
+kspace_style    scafacos p2nfft 0.001
+kspace_modify scafacos tolerance potential
+
+timestep	0.005
+thermo          1
+
+run		20
+Setting up ScaFaCoS with solver p2nfft ...
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2
+  ghost atom cutoff = 2
+  binsize = 1, bins = 102 102 102
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.164 | 4.26 | 4.546 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0          1.5  -0.62417787            0    1.6235721   0.00141207 
+       1    18.780041   -10.770002            0    17.371889  0.017679155 
+       2    65.289192   -11.084705            0    86.751149   0.06146194 
+       3    121.92987   -7.0625759            0    175.64933   0.11478234 
+       4    185.78164   -5.8777511            0    272.51603   0.17489112 
+       5    286.36221   -4.3820531            0    424.73172   0.26957566 
+       6    481.42203   -4.3095567            0    717.10136   0.45320108 
+       7    488.59165   -3.8685193            0    728.28607   0.45995042 
+       8    497.85288   -3.0417938            0    742.99075   0.46866875 
+       9    499.61619   -3.4190063            0    745.25585   0.47032869 
+      10    502.63691   -2.8360951            0    750.36531   0.47317234 
+      11     504.4847   -2.7628089            0    753.20751   0.47491181 
+      12    506.54494   -2.8460319            0    756.21157   0.47685128 
+      13     508.2722   -2.7309328            0    758.91497   0.47847729 
+      14    510.57053   -2.6094792            0    762.48045   0.48064089 
+      15    513.14804   -2.7150819            0    766.23726   0.48306731 
+      16    515.78127   -2.3961749            0    770.50206   0.48554618 
+      17    515.70267   -2.2982581            0    770.48219   0.48547219 
+      18    515.70813   -2.1516075            0    770.63702   0.48547732 
+      19    515.74908   -2.0581483            0    770.79185   0.48551588 
+      20    515.70881    -1.892235            0    770.89742   0.48547797 
+Loop time of 0.317433 on 8 procs for 20 steps with 1000 atoms
+
+Performance: 27218.342 tau/day, 63.005 timesteps/s
+99.6% CPU use with 8 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.00027013 | 0.00028479 | 0.00029206 |   0.0 |  0.09
+Kspace  | 0.27742    | 0.28666    | 0.29702    |   1.4 | 90.31
+Neigh   | 0.014795   | 0.025382   | 0.03481    |   4.8 |  8.00
+Comm    | 0.0034907  | 0.0038758  | 0.0042439  |   0.5 |  1.22
+Output  | 0.00055361 | 0.00073573 | 0.0010755  |   0.0 |  0.23
+Modify  | 0.00012279 | 0.00014156 | 0.00016165 |   0.0 |  0.04
+Other   |            | 0.0003535  |            |       |  0.11
+
+Nlocal:    125 ave 133 max 113 min
+Histogram: 2 0 0 0 0 1 1 0 2 2
+Nghost:    773.625 ave 788 max 764 min
+Histogram: 1 1 2 1 1 0 0 0 1 1
+Neighs:    30977.1 ave 50690 max 10447 min
+Histogram: 1 1 1 0 1 1 0 0 2 1
+
+Total # of neighbors = 247817
+Ave neighs/atom = 247.817
+Neighbor list builds = 19
+Dangerous builds = 18
+Total wall time: 0:00:00