From 23b82879333398e96331b8ed8ef17b97d0e63e08 Mon Sep 17 00:00:00 2001 From: Oliver Henrich <o.henrich@epcc.ed.ac.uk> Date: Tue, 14 Mar 2017 11:36:44 +0000 Subject: [PATCH] Updated documentation and simple setup tool --- doc/src/PDF/USER-CGDNA-overview.pdf | Bin 3429483 -> 3429423 bytes examples/USER/cgdna/util/generate.py | 630 ++++++++++++++++++ .../{generate_input.py => generate_simple.py} | 18 +- examples/USER/cgdna/util/sequence.txt | 7 +- examples/USER/cgdna/util/sequence_simple.txt | 4 + 5 files changed, 646 insertions(+), 13 deletions(-) create mode 100644 examples/USER/cgdna/util/generate.py rename examples/USER/cgdna/util/{generate_input.py => generate_simple.py} (96%) create mode 100644 examples/USER/cgdna/util/sequence_simple.txt diff --git a/doc/src/PDF/USER-CGDNA-overview.pdf b/doc/src/PDF/USER-CGDNA-overview.pdf index c86943541b51786da393397a979cacd6163c8969..b0d257adafdb81bad8335a44c0d30413515d418b 100644 GIT binary patch delta 6208 zcmajj<yX`Jvj=dxySp0%mfWSKyFnU3V9BLZ;0H(vf^>s)r<9b0NQi`Vcc--Uv(LHr z+_(3|=bZ1HnLl7=j>`Wa*1?5@BZeb|BZs4eqlTk3p?3(Qh>Cz2(t8*9*nut+W6vc; zqTso@b!H>(_}%^ueY#`t!i8b{N=CDP{SyKqGodZPTLgZhr=!;Q0ca@p`^v`6)5vI^ zSHG|ByyXJjJJw?X3%8fR#_!+mIR&wMjRyo9jMjK2Q!sORiGF61I5zN-(K9ue{mfAc z`zUXFFvUj$+|cLfp+i4+;sAQmH7}@86U?8V3S4&97DYaItN!%)`BQI6HI0(FEn95S z5ZY9CZ~9QG-uHbM@<2s)WHfUAol_6<<d<x^wXtK~e#bp$p0W$8b*c4Eo??m6W!cxU zs>5%eG`!l(R7wiPv9&T4S5{%avYETCOxND8Ory4ZEqA@qm|1Qwac%>2%@m!oQWfYS z=`M3qNg8^zKHm0oK^C(s;f}lIwX0nvtDl$M+&TG3g~Fw$>j9|fGm69X^o=}JQSrGt zQt8RvwPGil*(zW(iqe#t%GNXocU35=Wi3x5@V2E$(bH3N6ULdJZJ`-8zvp7P)N<LE znKhl`yxKF#Yo@L`FbZVWXwY2{9$^SB%So1wTgANJe~Ch?O+V<Vu3bLi)@=vLi7(OL z<b8B$Y{=bv%%xvz5;=28$g*ri?z(Z(rWp)gn$Ckw>8I!QwJiJf!aSq{UC=Ag|D;~$ z{2nv%Yi@gW_*m(Rf4x_=OScsDy?U2|5huzUjl<8bhJ?mlav8YAF!VTUJ>6}Ij1wlV z$ocAV#`p1SM{~YY52;zu`PTjBI5y(ad4y545E|qlB5^$}#mL!*oL=f&0r3Na0-C0i z`(EiOS<JBf>nfAZ?tkNEECiI{2+3d*GZisK<1>88CHez~+^5U8M{%}KL0Xaut9zF8 zYCj1`1g@C|bsGRQh1jyecFrnFSh+g?@Xm;HwnHhg;7ZGwua73W%`wW*ph%Ohn5}~c zo8crj?%3J<wa?zt4*6WR!cu2Z?e~H`?~Hgl8uJE0+zjKCnw6`W#K^r=D!<<K#|<<5 zs>CEry&isMl-Sp>wSKS+5oj(xZR{$OYLtImIiZej2_G0o`tr5>mH0T4*KIE9c=m0H zAh7TGY|>$b^2cU}5??%4U*i?kZ(mN0t2y0_p~ASsaw%u7r7K%2@faqofska_-;o&b zaf=5sEv_@3i<8r!e;_KLxWxc!r}+{sL&f%copv!45p#s-wA7wWm@e&SFUXTmU9vQV zAJdv*^d1PFr4B{4VRGKX%yjN^E!9E~5LS{0@uE{<u1uk7cS2iT@C-H}^Z!OH->1P^ z{DYjZ2?{r2!lQqT7NFlvk$8j=plJN|s%8Vhil-Z*PZz(Qy;2oZY&}s&YWEBPk8hdS zlcnay_5TWC*52T{9K&L(pWctq6c((1Hchdt3<oeX!6saSpCR0oa?ueF-9s%P`>2?* zC8_;t0RmSVKiM9PZ$>w??u#{^=G$@kghpqbgz5(Y^hNIGCwtCMm0v7hE=Ic7&?^d- zRaS{;9R-?IiB@=GlEO;P^UIQl1U)NA?ASk)dA9u02<MkM2M0VoNBN$W$7*f|bgh0n z4F<9wR*9>BK95I>@`%MTGxoMn*poN)nA5!PqpVoE!`=5DA`@>vJ*iW(Eu8De?<G~c z%1l4JC)*FeIbY3_FwB0BFn#=o)qf(TRRn=`*osYke1xmJ_PA;fX{xr`H4&57zb(A` zP2Kse^N^AsuD>-&7Mm23##|(IYU@(oDiVMm$v7x)U*y@%O_(u7olfi;*Mx1F-V9K- z!4BPEOaHd}G`eAK=NVzdTS0jB(}msWyn}s8S@oQ3VuI24O)Ci7>zsu_Q>h&Bh;=uf zd8Qah4ifvF8=eKSpUr2Ua`tBPw?1qH-x9=G6>Ul2R$M0wMS2Eic1(mvYV{O{hX5$} zpzmvKuh}#GgYEBO@k85gYcg_Q{$yAXIH>deo7Tx0jzM^cP!m*PH|aq3MIw8ROgx%9 z&tDGH@VqFjFLSmnc3{qCpig>@vjLUz4l^^e){hLI+tZ`d)A=r5gct@5(VWH(A=3_g z#+g7GC6C~7mKqPkV9^&idig$9O#s{?KfmFGVp5!m+8V0z524Y#BK~?<Q}gR#@6J3K z<az4L=+BEsT_*2vjDn3{+~zLKaJ#scO8e8n4^wlYxofZFnKt~>Y@Pk1=MgloP3IWN zH;U31tsK7yPf_pq#>7YL)6&XqPK!^pr0}K3tn63)GA-2r<z{Q9#a)HWIU9i3gu*Qu zI>WrQW(+kKxNPe)mSP~K&n_L2CFl{;`b;VSP})b1aL}XYkFX-nyX{u~tDu^7S@e+R z;jtHzu|M`ASezM^80|cC=^#nGPWJ3pI@mK1kv2$DoNse26@gvWRv=>iL&A}1IpUf& z%A)$Pa$==ilJoEyMaWXJHwnnRe#Vi}G)|q%P$nDa1xV2`+Hd(n1P~P3secis^+>gb z#-3?<Srg>m$m;5BIDHapmzumRb@1KJC}FA$u$KKBAi(I~Ck*D#AZY4394Tn2P%@f$ z#nLJ9foGT4mmfJXde3@|g_b<ON5nuv|3``{25!|fI)$?f70PRI<#)ijBN*Lyb1$4T zVb|xOl$R!IIj6HRPk#$@M@Tl+to!oghcP<Azh9X0R`2@_$}&$Xyq3ZP%sa5#W0X$1 z*ft4eQCpllM%)u<NGZp=()12tA`1P}D$oxj##^qlC(3Cu)`RS<e;N^(^@dp9L8|WY z(I}LYGYoeXkZ>mZBoY9Dj+W7&70thDy_PlGq9~N%tcZC8R?d+Dcf*e~BTr$auexH| zYz>HIwCUeYVpM@qSc`nG7gFkMI0s~&11}Ej_jg~9w1Xxftx|+CYCd*BY?@ksKaZfx z#A+i`p{w!|jq+QQP>;UJ#ElOqiA>XTbX*K<L`}0#^wZ89DAEEzG7d?}#VM|uvbQ!j z;7vV@VEf&DG?-iYTnM6@<j$Jh-8hM6772m=y}cq~<G=p%GiZcBU$oom0uHn7eG0uw zutm%ly-C7nG`^!JsWH?U2Q_@R(zk)z(9<_i%k=nnL2Mz!eDZz8A%RFWJP|aSp;6kD z#9o?>W|-f!t^@(Yv7FNMYsW5iFQLHw`>xJcu`}7Ff1}=`D|%xiesk!6h^&*d%@3qo zy!5yVD08Uwpon|bQ!0DYAh?y&85=8@doii7Uwm14>u{v$sZkc{z)H|-yCGjYRfL%l z3^nb352}Rfk4haM7lPCS)f7H`op-(*-5Y_tf5LsMXG#XNG7gp<hQ`lEqnIqwUyfh3 zjhWkW@OaHmcCMY^+C9~eMs4erBP!E5B8V*tBa&Hj{v-!NvHF>_Cd7?G&1-2=d<(>H zN#lc#jAv=Y-Y|<8v6X-jK&00d`z;Q3h`kvH_8}gT-4)UV@nR0z^wGsEpP9R2Y1X@0 zRw7jNbKe2ecZ0eg3~v#X-c!|3tKwRHl4_G|dO1>~3Y4o`Z5`T=vTCIazgPByo#5_c zBC_ob%9WHL&?<m8;`}G<GPY;c-H|^+qi>r`MKOsa?lBUMPh9=3z3Y$8aiy(G1d1<1 z&HT{pkF^7GzpxT|B~e!7NdY%Kl@fuebgCs2&(}b=^jNN4kKBg9dzsClgsG@~BBa+L zvLD#*vHCwKH&qda1!)^hjdx0+5gu0sey~5w%ztLvu6tH%oT6}^1{S!*I9O*l*0hFn zFmKl=u7a@2-8VW5cG-%(wpbA|pV3X+D}}LaVK$y#9=294|4pvej#waJ5e9yS|E4Is z*XVE<aF}rat+C;7;Beva;P6}VUK8k2APa*2XJu}UF(#Np1W;)?BCAX78}fzSQe>)4 zju>kVekZP=8oCoDVN`FXkp6kN<$=5#4)(Taut|4Oms=$<pp4bHnbP_*V6`wk<2D=b zQp=h~dw%ErcIM=i581~I_lLb?*Wtde%uk<K2R(92yQ?tUorDwpEfvtOJWCy4ns~Iq zL3Ze|n*?QqJHV0cLsrEr4$8}rdhi&xs@Sxdt?#lXvB>}8)Rrx5thTE9o-Au2Jon9v z;9*u<y|F(jEXL#L5k@rc;BJ7^+ETSa`yug%L9VcYr1^NikGiI8knzk`^>1YAl!bkm ziG<+rDErKeEr{z^)pg22AFHiYmImk|;|Pkv3@zhX0JyWT3xi*zR87p2l8?FMG1)}j z`ba81V-#%*saF~2Msb!@_-uo5Vx2qMZNETRp{~{kJT2+bdH`Xl7A_jaSvk6`ap(&4 zS5A|Fb+?Q2+_zelj?!E?<K(Q^{m{TyMI4aqAcw`hqQXZ()t0INibF#p_e2_&s9`)h zh6B`(K%VGv;{}spN?36`M7Qw50mnbF$eNjE2+esW-~UhezhcMAj_OApk3T=}vz3e5 z;#f<hd*5xFHF34fFPi+LXal;yN(;w_?G+~e{Y%5lsj+%dpLT_o+4n;}k|vd_dspY0 z^Nn**e`cQ&C6O;SqS$NoX;4Z~cCd~jCPeqI29^aQ@4(c!N7jC%tMdYjL-Pp;NrSUf z#^dkA*xg8&*PjAHE0?S?gFe$VjR%}p5wv3Y{TSd3iHR20#Obd;zgMB;jjb}K-IU0_ z!V7IpWQkCiY-7l)-uNy{us~j%{!!}b5?>zp6qxLR_omT9gvz{{IYoY-hl(bGPtWm) z5E!W2;KU(AxAB*0i*lYnY=-V{e<{<9t%mLe6+^bjw$PM-rNJu{O!2|tu;1x9$EOY3 z63Zf05Es@xv5qXy+~>5M4$C*-iDO5!mfU<zpSVNlf?@t$|27Y#uF*q`vG$Hu&1Wph zJSV^(ys~CD%;<Dz?3caljOxFvO74UN?1;qa$;ieQ_g@RPwN(t5j(X>NUXB);PDv5F z3=?p(Fld<KlA+4PMqF6ZZsHdkL$Mlu1Iztsf4MH`5a?UNV=-fpl=?&tH!X?`3vBA$ z7V;$@HojVF;U9FkuLva{d{#-?BhLTCtt^V`E*aA(k@u8oW)72@j~MIpAeUkQoP9r~ z5}^g47N960lXTlTy=oo(aWcK;p}$i6xrN@PQH#F&F$zby8gEc$l<!!Ez-sEGvz}P5 zxt(~*^|kaRtER~#tz(P3*<&D`RKrs;a|>Xuk?mkXo*a3Xk*&dF8AbMll!w;RpoX4B zdZlnTFeA<zy#9zjftb}T?_nVX$h1<=v%Js|CwwNXj3upXk28bdh#t3U6YJV=r5&?4 zT^#9r!(_JeAi%d975-xgwt87rk8w{ciiuM)$cvlr*5=aRa{hHXz6Q9!q?!(*mh4_| z{OBT|{5*d5Z=U#w;MP1a?ts->HG9j<)C<XVE((5EG4}AzRMsq<wF>G0A5*<|`!TvV zF9ftFv0iNo4S(Z+PL))d7HLD4SO+wvD;>$5nhN9%48`5fM7cbd5+k0<m(B5b%oLkl z$yq$nSMVQcPM>A5GFlAWTmP+Z@4Grqye}YQyo5lO7Z{mwDb11JUTu;Zo@BfJG?9!2 zz>6zV;6TyxSZA-mRhJ*Y%0z@!Rr*1Hhp&uB&Q8sTG{aTyLx)Ds+!^&@4#=-v{POvt z?OW@4v*dHz4$HZqJYeZ%{$|=FFejz1)I3-{TXkQ|?p!Fna7VG|ay2J@m6Pkl-%?k? zT*B3vCibe(^=u^<zdK06Zw~FAZ=-HDi5;qCvKkz>BYb9>7n=v{sCe98y!?pbDQ$+G zG&MxgG?KDS)pT;X%C3ZO80rm=+HY-@_Xvt(*qU$8QS{KK*wT;BvvUw*6Pn(9HZ~M} zu^n1=G5qc#Qh|Yng9#Lfjt7RRA+9+*pif$4-}M{`F3oouxlc__=I#K)0~;S#_<z-w zy66*+`eg4eq!9tmu`qjy%{m3vC=C&=i`V`U&PdWYrI4LKb{+xkeYzGDv#-e-c(S2i zaih`B#<KXI<JbbTJ|kr^y|HPnIU-mpG)?Da+xMBe9#2Tu_h*~P)Bm-ezpSeq1kMeW z2e&$r%9+pWsV8Mgj#iu5?+RNB*tq<4T+q|&h~ExLq(lQm-{kgX_1#zmCB3VVURf`} zeiFVFNHTM$q~tY7F?q+7>`0GktBt2Hz(EJ;VZk?nGC4$i3O^U|SUx}%P}$*b7(?#! z|GDw5te=}BRVMUB-sK`gx3mX`EeMzA@W&Tco&X$x!QR(5Ov;=*SPKL{r>q$YrhUy3 z|HN^r$ddz=Yu-N^!<KatRke0K8dN!5V&~Q3u7h}H&1faux&N_!^`kVs*Xc!W_9fo| zg%L9OH{Eb6yq?#_{M()%q`WZG6h3invi^ji-;L4N4f*uA#qY5eUt+2;H8;OqQgc;< z<?y7;_^R90(*8V6G*5Q{oSGJ#65UovA4l*eKA0Q$@oeO2Je4-nv7iC^UDx=iC1yVA z$npJtKvBZID-5@nP~DO;=BX;}COx_A)Li0e@x}+yiEgD2<I-S`zwbf`?-bUe;9C`M zDCzZYD)mcBIo97>^_|-c8)IiYHsYMEWCuNs$n@;G@LH$dE<dtmKAAGrrzb(3Keu$j z-G6fdxgmY&u*;yg-B_EJyx3&ZLXF*I%^p(8CijdN1rkq8`EO+Y`hzBZ9CnlM<mZs7 zm4D|!#OvE<6H>^N9C)0Sz&$tQ0<S`SzXk<<3wk^~hCP;0T`PcTnEu7=!j9gC&PG!9 z=o|Pj+Pl0_VcVOjvrU{j^XZd91iioBSqL$IXII2|9Z$J(t2^9AQOOo-@g|aO>4B(% z#mugekmR5DUek-8+v<DpS$6>08gG8{R?zPQ`9<{N$vWh=VxiTk7aX_yI>dIEsqAjv zJHrsyDCm^eM=RX7do(|+DFj~dI~936qj+ry;0WP};E3T!;7H-f;K<=9;3(m!;Hcqf z;Ar9K;OOBP;27bU;F#f9;8@|<;Mn0f;5gy9;JD#<;9kM;!tufJ!-3!gT6t{*o%Y!T zK|&yqAP598;l$$8aJ7-Q^0H;%kQWo+2Z{3kU$eDaT}S{$PXr_f5s`-|%8CdGfaL`g z!9ofUS$QEv2p9wr5)f37VvzWMN6fMK)NNr<FM9@20bz#!eVVdPL)7(kNCwW|)BWy5 zM58d1LA5cEe;eYD{H>nr>xc4?btK;TZ$kY>Mw1Ru>rLGZdo#^2P{VW|1vWMoF)@)a z85x_<fC(<<{qrDaFvK_ab~o5J$J6&YV9Z~WXiA#az}N_Cr;F*Qt!astYE{&HwP24@ z$gle4)g>d^yDMN}%s2s;m@y!a3w_fq=DVkUTY{1&aZX7lK4#XATcjPLbG|XUr;sSp zNav_FhC)4ROP>fwBvCoR-o?qCs+G7YZ4@OKY03==Nm&lNNtSxVrx!y%mixHrsZ##b z(qpqmqInl@9<!rB*Q|3C&2ipVc6(P$V?}Pws+_p3*#^*yETE&3ry~(hTyyQq<r|;3 zC!#9A%wj~9s7)t3QxeMQyqQ!P_<NbqCe0W-Cwp2W{*k99veU_f$h^z?;T%SHw!6@= zfP4}WR8POide3d0ES^u?o=+XHjzAM`0Y(X$z(?L3VP+4m{^3Iz&bUr~8{fzBDb{iY zrWcQ>v;k;*{L<+dM3XW&>{n)`_?X5n;+ABYWS(OCYpRO<S*B}CZabBOTgMRlsQTZ^ z*$Ki><He3@d1{RWc6!O!ragqUhsq+ye@&&mYi2fvg?HLSVKKFdd%-}q2BgyIXIHds zG2Er8Vtv<<oKzc~y!``<x;dZNwb?IOkM&m;`$wSFAzOAL^%HkgAx%u$>k1ZWTd>1p z(owMk<hSMJpoggrqC!&V!+aK*7c9bqJH@nM8OtTgk)hgzb2!z)R0p-#NTp*IDRY`i zC8&OzW7u%BykF;I5fOn)!t`8EX@VweuA^ukyrGj+#G*79`j6vWXS1kZ>4D|?r0`^t z�mqwF-@eBfw=QALZ4hA+Acj+ft<!X04WqFjMa)Zj~ad-}Q^}088$|JJMT?;e;{O z0HHB@Yde&}bvc}`4AdSBcx?&jcL^a#D)<22JCM}YGooO3*?jtIeIZ6nArKZTtAdsy G)_(x~Q?oSy delta 6247 zcmajhRaX=YptfNIX@)M5W+-VSrMr>tZg2=m$)RhcOS&0g2x%NpkRG}_UO=Q9>DaTr zZ>@d053Zx<`2lx*<<zS~q|nG$k*JYqk*_1sBQcsWI|NZh-e)yD@Uw$|ny7iLh!FpJ zFx*p~GvT!evUkBp*fs9d`X6cPiNydhRU?@VfI4zIwj5m{9aAU@)6KPdIvjLZs0kR8 z6ZJBeOAZ5$FP&Q_cMkqOm)roy@TPRAO>1W^Gd9VG?Ed)o?6aWk3CY(szhXrQUXwJa zre#@*Jda9=K=H>5{P)4iQd}?SfhJh}J;|9FmMtKh%k=osa7b2W1}8uc!R733oIede zfX!=si}dMLn+F(|F>yT{vZ(hrzq>S#FITyh-5Bd|Dr$*}{T81dJUo=YZLaOmvF1!0 z_4{8CLk7+XK-`{K0JDkvMDuFe&HS|bofoKF#+0Voyrn^RDF|HLj+ZH)fk7vl<ufy# z?$aW6qPD@kQp1+}YGKyXeDzoPdXo5`EfWh4C#<$Ax2J)cHT=|~cVpQTk%_Aoa<t9R zh~iB9#IO5;hit6_!to#I0PFgL@U=uN_YI-h%98=klU-E!{!V9{oIzW8)mvy@v6{yI zUx2#n<L@#%acc10@_!6ad3q(_bur7MG5~8W_Ze*TpJ#!0k4yZMwUXijg_G^__HXQl zy7`LFsa<~Chs$=g=O$iW*H#N|_|@C9Zm{X|+Um8I?eARknhl4efO>VPPd&UOZ+c02 zUieVM!a>+Im{V#)k6=-<wckyZkns;M&ue^5BV2S-GhgtoL|B%1dwFhExwZ9}JY!6W zc)MK5i4X<|P!`@)A7Wwg<il(wDoOG_MZSnt0@r?+G|BPsX)fktP-ZW^Q;N)F&YJ1; zlRDoU4yR*>)B5N48QAhT?DSu`cz%;CD{9jwa*5?RF(up`It{m2AIqsp_|H2ewUsj} z<5b;qujqhNFytW7D{u~9l4-3ftJIOxCQx;DV((_7eXFpp5Iqx2di|z2Y8qQ@E5!uC zE%3TacP8iOCkjiDI~AxY8onG~!0|DWh+3|}2T(K$S*}@Gi;IRPP}%*K&Attma8*<t zP-o}E{f*hK`b)zj6v&9r!VXl^`@mm{y%|M(1p_}k_v-RM1u7u}aVWjPw)7VcuGVXG z_n^*sAqp8GW0Bvu%8PMS0Rru(X3@VlyofL`g7q+WKL32LZ~CXAn6tzR59o~9#i`um z%PuxK*?n)cP|=(^Xc+4#fKOH0dme^kHzdEP5CbQX9-aPXN<3CJFxIH7Am5+Q?^CEd z3ItacEB)KH6F?<Uj<Ou*!<h-|%$9WDn<FLRMO`|N-Om#W+Y-%n^;zA~49^gI9FFt; zIX0Ytkcne3ER(0MOXS)sRA$4HCB|c_C{4)>FrbP1?-%#-d{H&i*OhtJBC*mKfJAFl zNW=TIDx)Yo-WaJ7?`Blg%*cR>wB)eSu72=sd3<(qgFV@&bF=TML}f-IFE>VlsM{z_ zlr{HuHL5ZlDOp&XjLFthLO@5c314mA@MwxofTHwtGq=Ka=kye4t;W*g<^Hg1BQU>s zS!IoQ+gTjtU0It79y7XH$+#+}zvGrVw$`9|UQ@wvFN#d)miF2G)5o^9S%tTaKgYpN zAis_=v(q=THZP9yZ!zQs;ykSK{W`sJtR@m9LXu`(OEIoY%pf!xm)C7Sa3U6f_`k_2 zUF@DTOwlZ3=+|?-I;-Dff~(OmfSB2h%U`|$MYT}<4UQV;#yjDpnL-LbWEd8wcc`8% zWX?kVQGqEUBxONtSz5*JTfLVpt|DOlhNaf4zvYtvV;R}Ey$``*lX>gy)P0}+<mWcB zt*P;R(TxGD7IW}$-c|P?g7FP92;QLRMi9{fX|S_CJ(d_9sE3|jdeWpvK(X-kb&FI# z^et=lyD?x@e0INKC{!hf)4^yfY}zoyPJ?b@pkKUk^DF$7Yc2mw9dD1!hGBrk&9GUR z%Ya{EpYH}VX`q`si2WFLmWZ)nGy;h&K5_aBO4_sXTB9~H+FM*xr&Ntu2Lc9k@o)@` zOtCXt-nCSxMseZYfO1D?iNYe0R9W)P^GPGjMFm+E#Smta;rz)>Eqxn~b!=S1P^Oai zo<TURdump=LfoNT;{HqgR}k>`a<ompJvpy>0ud+w)p~Ueoo{CJ(XW}OM+{?QPluat z1`fE+jR^yS&GsV2sU<C|7xVww;2sS9{S)cu$AHVO(~YyqX6lW-OP!1%OrbC%$(<l! z-oOrnLrk_2lDs#LdE2-D$@bfHyUFy0SY`#%E;RF7Q@`UOlOf9rvef{?UPpWwy&#T* zZM8ZWiklT&_M}H`|D-#3Csn7ULU31i44`IaOda%=EvsQY&wQ=pd+eac1Lai~EYc%` z{>Eu~o+_X7`a2o`>nOOKc8a~t^F{%*#Lw^Vl*`DE8VrzGPzgYl8Y;aKhsc~uWz|`% zr5H;V;wd}lO=#3Tce}wAVwgg?qTRaHa%UMJLMjg7f@2f8dBD;8!D<hAv9uikz}OCQ zb%g?)(PO5sBmtmJnjf>AGHr71_2$|dCYanHmZ985XY*6-ktm6#NAYCyTUi}yA+KjI z&hm?>iarNAETP#gI#M*yBjz`IIAmRL(4-4HU?wy*npd5$1F(d>a8)eq<N@`U@2^nc zI)z6OuE3<Pv8u0OADnL6gK>@~$z2>;FBNIWNS-xM_<6c(K#mRq@2=8Q*N;*)OC6Yp z%hLZ4X7(4IERv^AV6FNzLtV|^hT14#Y)cIr<{Ng3Jn3B<c6?>BpehPc5b)*Vnbq!d z;{YETpXN&}gKJyIDL>;r({@zm9!gH`F^6eiF?6`G9D-`3eU>+)n4ejCW9<0S2ZKBs z#NqfE*y-CTdCkwPY3QIt?BXu9#$>9-T91m1IiJ67Qs`aek8`piraj-!#(TwW5;b0S zHjly#j@)GRDaJ>Sn$359+Z}E?5L_Aq-$kr=%3mlKf=et36XGUJd)on-zR&XfL<CWo z;Qb-AUN+HQCvMi?bd{Pu`T&o{hA*;&lZPpw+e+Ey%|g7lCiCB`oE!hOs&Tlr$8_}& zJ(n!6RTQWkj87$BoQMvuW3#>7=JI%;*2P=|Rwq*TT<}_dP8B!JR^a@g_Bg0gF5Lu8 zMW_NU!C#<6h(S8Rim%OvgVJR_#dOZUy$^cAGOV>{Ac6!Cq8jmk$JyPg0wjECsMRP} zjjvs-(Y0ln&%DQ}#wk!WNgu=8lIQ0ko(*|}ANC}vp0z>oLFX02Yu9eSYlV{IVDN0> z*CDkLQ3B^LBwg+dr2-hNAl}i6qJ<Eu?&8K>u%p;P3608f%0nc!?x&7ivSW-P9INn| z$3ze`f=oKZ-K8yHy@O_1AG51b10j8zSr-`}^mA;!upTOY?I-=wC&gWS7ARjG!Q2zJ z+zdT>AAH}ev|r~$WgB}!;1zSj`h7I!Hmyq|Wp$%s&&txjEQUh&gpsCQ+JG^9-Ub%} zw)k9LbH0|1Ru{7zSMxzAvif{DdjJLDtsJPUe>Y3?Age@?PI95K8tpOb5^*ohjNVVZ zA6_=ml8H++rO#SAF11T_1V`6fo^~?+!Dxzcb$3zq&(d<{zqbKCQwkVTJ}`~E;~$*n zxnjJehE(xQma>r?rt)Otg<@V9jAD*hU^-<f^W{_h_^r-IB@_pD@L&_q&rdf=w?wDG zPh-l4f%l=CC7}w3_CoLQoLDDlXwIy%9iK_Se&=|{8_y4yvu+=Z{t$Sfs3)R|g6DYX zDZa?hqL(Up(0Z2~XK;vMnG=f*&mmy{eIJ>}y629(@%shzDLk%4Sd~7mtWGLcF&bRI zjL4;^E`nW)lY)tGe}sO?Xza^^Pg&v6LtkD^dc4xini?h~i5Zl}M8ZPCM#4eDMZ!Y@ zAmJksv<*rVno^?j3uSfJ5wf+Z8xt<P0=GwN;~6KQ5Ro^3OT@P@j^(PcNu;L#9-oH8 zVMPYzOj_@9M7&YEhF!)pcYtiEE-EUAjX;fsA@M@^BNcVgO}-#6u7;!7iF!1cn;-bi z_iYzS#lwAlr7pC0t)}X^@LT>;3Qmp~5R~d;y!Q2DJbc?jKtxhf)HY@t!S+=Sywu!e z<g*la-gmW>6^O4!#@ZBpOjde^`eZh;R)a9PFj#Jy9A!D`tS-=A7>ybmhI#0wo9_Ch zb!$RkHtj9ufK$q^)O80nRR{Y4EUmVl#0MkQw2t<)i$$Y~eluIyDsGZhb=(kbZHro{ zIt4fT<rFABWzbBDiBeR|9$^8VG9&!=aHBnakeX*_$e_ymBaEa%Iar>O8ES1Zb|9|K zuF=9YXl-P-GRO5W$}Sp6qUPA}Q$Ft=f)(V#+x<mJf^7unNK*H2Sq9OAt8K@CkH)g# zG4*iHteHH)pHFWT5KBjDGm>;<JB8Yf=5xJTZK<)4KlGyNzGo;ZJjz$#PV2%9iiQ$W zpwDOuamGj6AqX|u+f8X9D;UNYgr$#FUw6%ZCn*)eJ-;DOHRj66{)A1-v|FOz&_6%l zGdT2g*e^QlV7Q<$H_WuINn2%>_rXp8x+y7xsa}&TZ(nVDd=%iXxb#Il`&ThNV}FlG zR|`ZUTwaw#EH74M$aV)@z&AR$<YYwA3p*Rgl%YxlVAK19B=Y}>S9qZ+@u9BQQ&hrf zGM8?KWFBfLx^O;2ybG5_(6xvf2I9gfg)F4ld+k39!sPNC)7BZzgi_1CD}SKqOA}l* z#A191zv7>Qf}g|ol2Y7t#y4FI#uZB;K9Q+d`3h)Q7yRX|wASE9s)8P?&(fKg>48Jh zwHATVn$P|R%|ijc>q<HA-T2%*uG)gtKPG?o%HtdgLLg3P)y8{Wn{s9hJnNcZq6_=( zh#wC=QD|dz4puAJ2X+gUA(^h|IUm)>Q!hVoKuNZ8HX7`9IhXpYvSMbQTr;T~D+;38 z5Mu_(gSFlbELvc<84+|~v$Cq0R9bGXOXr*!<_F;oPPpLl`>9O423geU<~b{Elx8Eg zmpO)K=^UR5!jZ>84G?xc<LRq2Yqj=oMQk2GP$~C`pbMEQ25uH$xrvVZ;onvAueS>t z@3Wu30sSRk4Y!AT&k!z@ii~V86(Z(#{zjPZjdJ*HqV<4%2aVXRiwp@>Jp`oBKef(# zu86N{K5;3{Y1}(KObI<)osWQrLi-Lp@Ma%-@Lv-Nt&QV~@#(|F_@2nG-dk>!^=}kA zydCVH*xe*qkrK6eSeR?h{_Df%{4MFnQs0G-(-hBt&Q@<byo-!wEYsc1er|7Jj)PHX zitf~ay9#Jv?~X8yi&MT=ca+6&qvGn7p^RRS-Vj_t289Y6gk~d47j0B7uMG?4xz)TG ziE9+e_32m{rK4!|;upQ0rPwFja#_LgilgVpr-i?N<#I#XU9TrXcdZwbSO?WP*WDav zCgF5CiM`&3q(Q^|zyKm<y5UYL)FYkR<(#JwF_H|hk+GxZ*E0@o9yI3KNk+}IuMkfm zb2wZOC!@*S=IE-l#HB&;t*YKyE80y+DSsP{@nX{O+D><mz21Yb48PyxQ}@K?2~Sz5 zvniP$gg`U#xdP)Sg$ogXVOnoK4gYlVlpu;CkA?NzuRDX9xu)Bg(zWA=UcU(VkmBI( zn$Rj3U{2W@kgeGnhzT4tUjN|ayz{f9KuB!Ccr5*EYANVBG+shAgeQ7T@Qr2bgU{OT zUE5QV>&M#uL6tj~<(<Lis>nK9@{eBZCT;>BPRI85@=K-*ZkYRL`Y#6t8kBkNuh-Y^ z&Q=B^{9KD(h@V}!{5`MBc8(W}EG~cT{Ma)F7XYC=FH%86HYSJjnr{P#_)M+&*M?yi zEVCMvZh{%BUxg$Sam1S<wB{8;5?(#Z7+;@%4G!mTAZ)OI`DX8PX&F7b=CHHkkQQ&% zoqR?Sa5>7_PI{K#3Z_49$tLKOY)Tk|kO^~ftr<UOfv5`>0i{PS%Zh?ipf=|MSN7lF z*1hNfre_m9&qcdZM!IyvA(pdVs~p{NzaB=ks1s|oZf3EiKlG$*E1^zjCpFmH8%{mY zJGNwYPX*r<IDP{c+-Kh=+)dd01<zk4!x8mZr5q1`m@8^FX6B7@q*sRl-)xjbKd@?P zW^_2p!GeiJZXTmPQ$n!iP#K>x+u8%{HOF4V4mzD<70tY#0a(B%ExmY{5M)^?n>q?r zf-L-1*C|>UJ6d*Mw^Gp5FN%CvQVoWbDczX{^DXGp^nMW^iqq#AQcL=(c82R<liq%u zWl3VU#~HPnUg$27d^GMU89c)b_w{4loAKGaA!Xt{euqJ<8oWS8xK^1e?_CVOHy0M? z7@{Goy0YV@fTd5j#{Qf&+8DGMoBK`D7Vgdn$!aL4BNNvO^wXwgqGU)_`qZUE8VSfi zsmEQ8NjKL}!!oZ&Unh|M2cdt=?H!WBXSz8yg3k#ACumqKt8P>$&nJ;iUp!n#k-PhE zvu7meI<rR%h)`wE#&STHb*8}=j&;+%O}RGTEFJF(z2P}?T0#F3625E&pa<&0>l6B( zHPjw@5xNW91tFlojxT~q+}4qI)uxozcT6w76e}eidP*?b`-lCWkE*2Tw}BOBf$EJl zf|zO8iRXysDwf`nF+;|MmPHG3$T7t?-SX$>?8?2sq+gb|<3x7Vbj9F1$vVsaGaf^9 zT$h|BONfbRp$gX%{+(7q2w)OMciTz6C-ZLu;;E$xVb-xUK*6;3Cdpx(pp^^xRG|2U z1D*P`No+|{a#b?K&sX?aC*huv8lNag!K=;ZJIbeCJfrG&9fZxT9Kf{8gN4SSeMd3t zLg{ZmFD^jc+)Y#I0fQD_>jJB`%<%Nn6zXBJX~)5F>crelOISFMaIa;sxX7tQ-i@UM zt~<rh^10+6Z^q+o?fnI7GaahaeM=0Hrx}#BRxTp12|kYYwumNYc;t7CN-unCKD_J~ zOoBU3vH#yjSayUlSaw86#7HDaq)22)<VX}qlt^!osF2<wQ6teH(IU|y(IYV+F(NS` z0g;%ISddte*pS$f-XU=yaUyXcaU<~{@gngd@wZ{wy*Jup;o}$L6BZQZ=QUx+;n8rn zlehM^r)QJ@z|YGk%=^D%TeP|W|7#sV1qC@_Sy6sjeo;Y1d3j!WAwB^i8D4n>MNt8M zSy5R@dh!4BVvfV3Ztv#k?L;rkFGBzS0uanRtEQ}{O+9#i*)={&vtdh6I~$alEqXBe zO@f6k<5bDOAt57kuHq|wNz~+kJ)}rTqNs=!%p0ER%4kN89!5x{%^}QXs)sfI^qkut z^CG%i@P2pvF8}lR+Vi34u4tS+6}7&CR*bA3Dy`mhmV*7FlSHr(RWugfjN?pDY!4VK zxYbkWPq$HMLGf^uxQNV#fW9Q@q!}92nOyd(19M)9@oSiqP&et=T(^Kb7pWX}O~k+N zuT+cis{4M(u2_nK?5awW!y-KJSm?3YouZB>2b#HfU$<<<uekoCN(rRM`vRG$`GGCe zYfwp^wcvo}Q^U{V85~t{Qgn(U5W5P_Yg59oek%|!&gR3|Wr@i#VFwbx*O<iPN=9it zEbn_D!ltzd*#~|)^Y5N54E+In<!6`|xtoPbo|7-1$-NxVnLF6wf9=*<oc|rySI&g7 ztLOTR)+q*mFOqu^_rXfaTaMsLw($01D2{H;7IxK(as~;E`48)Byk%~y5<VJOX)^&r z*JTq-oj!>8!pguDa;W=OWSj@_Zdkzn<{)mvZ~dI%LNz^u4Ho62^T<6?mFl=NlA*<+ z=1<P8mft_+sTwNSF5)PbUpAcus#88%ZPqU{SLsMKXP%|XScIGksh(u49=7+bXNa;N zI{6WUMXXE7y|-$~-94-2jd8)i>w$4)Y<TMD9j$b^DIB(}TWE7y#e{@r&W7aY>5D0y z5`8Vkb1}<WXPI3Cwl>)xWW%+S0<);rb1rc_7ENyirQ<W2KS?C3)TZ@ej<lzOm=jg0 zGeq9Kno?8=%5G3nhNfz^y#45z{)Y{romyHd9h82{cCNiuDv^47y=Y*%pV8N74meJ; zLwtnYGcN^fRKHTO2yve)0y?@I2?7y6&|%TccLy&Pia@fQmiUJXJ7rd8!PPN38#X+m yGX|WcWV9d2n0HzzQG^kUDgZL{2Fjm1&#&l&s;9Hi^#mBOMMZF!nH4@N;`|S?ZRCUi diff --git a/examples/USER/cgdna/util/generate.py b/examples/USER/cgdna/util/generate.py new file mode 100644 index 0000000000..d5a74e9bf7 --- /dev/null +++ b/examples/USER/cgdna/util/generate.py @@ -0,0 +1,630 @@ +#!/usr/bin/env python +""" +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Oliver Henrich (EPCC, University of Edinburgh) +------------------------------------------------------------------------- */ +""" + + +""" +Import basic modules +""" +import sys, os, timeit + +from timeit import default_timer as timer +start_time = timer() +""" +Try to import numpy; if failed, import a local version mynumpy +which needs to be provided +""" +try: + import numpy as np +except: + print >> sys.stderr, "numpy not found. Exiting." + sys.exit(1) + +""" +Check that the required arguments (box offset and size in simulation units +and the sequence file were provided +""" +try: + box_offset = float(sys.argv[1]) + box_length = float(sys.argv[2]) + infile = sys.argv[3] +except: + print >> sys.stderr, "Usage: %s <%s> <%s> <%s>" % (sys.argv[0], \ + "box offset", "box length", "file with sequences") + sys.exit(1) +box = np.array ([box_length, box_length, box_length]) + +""" +Try to open the file and fail gracefully if file cannot be opened +""" +try: + inp = open (infile, 'r') + inp.close() +except: + print >> sys.stderr, "Could not open file '%s' for reading. \ + Aborting." % infile + sys.exit(2) + +# return parts of a string +def partition(s, d): + if d in s: + sp = s.split(d, 1) + return sp[0], d, sp[1] + else: + return s, "", "" + +""" +Define the model constants +""" +# set model constants +PI = np.pi +POS_BASE = 0.4 +POS_BACK = -0.4 +EXCL_RC1 = 0.711879214356 +EXCL_RC2 = 0.335388426126 +EXCL_RC3 = 0.52329943261 + +""" +Define auxillary variables for the construction of a helix +""" +# center of the double strand +CM_CENTER_DS = POS_BASE + 0.2 + +# ideal distance between base sites of two nucleotides +# which are to be base paired in a duplex +BASE_BASE = 0.3897628551303122 + +# cutoff distance for overlap check +RC2 = 16 + +# squares of the excluded volume distances for overlap check +RC2_BACK = EXCL_RC1**2 +RC2_BASE = EXCL_RC2**2 +RC2_BACK_BASE = EXCL_RC3**2 + +# enumeration to translate from letters to numbers and vice versa +number_to_base = {1 : 'A', 2 : 'C', 3 : 'G', 4 : 'T'} +base_to_number = {'A' : 1, 'a' : 1, 'C' : 2, 'c' : 2, + 'G' : 3, 'g' : 3, 'T' : 4, 't' : 4} + +# auxillary arrays +positions = [] +a1s = [] +a3s = [] +quaternions = [] + +newpositions = [] +newa1s = [] +newa3s = [] + +basetype = [] +strandnum = [] + +bonds = [] + +""" +Convert local body frame to quaternion DOF +""" +def exyz_to_quat (mya1, mya3): + + mya2 = np.cross(mya3, mya1) + myquat = [1,0,0,0] + + q0sq = 0.25 * (mya1[0] + mya2[1] + mya3[2] + 1.0) + q1sq = q0sq - 0.5 * (mya2[1] + mya3[2]) + q2sq = q0sq - 0.5 * (mya1[0] + mya3[2]) + q3sq = q0sq - 0.5 * (mya1[0] + mya2[1]) + + # some component must be greater than 1/4 since they sum to 1 + # compute other components from it + + if q0sq >= 0.25: + myquat[0] = np.sqrt(q0sq) + myquat[1] = (mya2[2] - mya3[1]) / (4.0*myquat[0]) + myquat[2] = (mya3[0] - mya1[2]) / (4.0*myquat[0]) + myquat[3] = (mya1[1] - mya2[0]) / (4.0*myquat[0]) + elif q1sq >= 0.25: + myquat[1] = np.sqrt(q1sq) + myquat[0] = (mya2[2] - mya3[1]) / (4.0*myquat[1]) + myquat[2] = (mya2[0] + mya1[1]) / (4.0*myquat[1]) + myquat[3] = (mya1[2] + mya3[0]) / (4.0*myquat[1]) + elif q2sq >= 0.25: + myquat[2] = np.sqrt(q2sq) + myquat[0] = (mya3[0] - mya1[2]) / (4.0*myquat[2]) + myquat[1] = (mya2[0] + mya1[1]) / (4.0*myquat[2]) + myquat[3] = (mya3[1] + mya2[2]) / (4.0*myquat[2]) + elif q3sq >= 0.25: + myquat[3] = np.sqrt(q3sq) + myquat[0] = (mya1[1] - mya2[0]) / (4.0*myquat[3]) + myquat[1] = (mya3[0] + mya1[2]) / (4.0*myquat[3]) + myquat[2] = (mya3[1] + mya2[2]) / (4.0*myquat[3]) + + norm = 1.0/np.sqrt(myquat[0]*myquat[0] + myquat[1]*myquat[1] + \ + myquat[2]*myquat[2] + myquat[3]*myquat[3]) + myquat[0] *= norm + myquat[1] *= norm + myquat[2] *= norm + myquat[3] *= norm + + return np.array([myquat[0],myquat[1],myquat[2],myquat[3]]) + +""" +Adds a strand to the system by appending it to the array of previous strands +""" +def add_strands (mynewpositions, mynewa1s, mynewa3s): + overlap = False + + # This is a simple check for each of the particles where for previously + # placed particles i we check whether it overlaps with any of the + # newly created particles j + + print >> sys.stdout, "## Checking for overlaps" + + for i in xrange(len(positions)): + + p = positions[i] + pa1 = a1s[i] + + for j in xrange (len(mynewpositions)): + + q = mynewpositions[j] + qa1 = mynewa1s[j] + + # skip particles that are anyway too far away + dr = p - q + dr -= box * np.rint (dr / box) + if np.dot(dr, dr) > RC2: + continue + + # base site and backbone site of the two particles + p_pos_back = p + pa1 * POS_BACK + p_pos_base = p + pa1 * POS_BASE + q_pos_back = q + qa1 * POS_BACK + q_pos_base = q + qa1 * POS_BASE + + # check for no overlap between the two backbone sites + dr = p_pos_back - q_pos_back + dr -= box * np.rint (dr / box) + if np.dot(dr, dr) < RC2_BACK: + overlap = True + + # check for no overlap between the two base sites + dr = p_pos_base - q_pos_base + dr -= box * np.rint (dr / box) + if np.dot(dr, dr) < RC2_BASE: + overlap = True + + # check for no overlap between backbone site of particle p + # with base site of particle q + dr = p_pos_back - q_pos_base + dr -= box * np.rint (dr / box) + if np.dot(dr, dr) < RC2_BACK_BASE: + overlap = True + + # check for no overlap between base site of particle p and + # backbone site of particle q + dr = p_pos_base - q_pos_back + dr -= box * np.rint (dr / box) + if np.dot(dr, dr) < RC2_BACK_BASE: + overlap = True + + # exit if there is an overlap + if overlap: + return False + + # append to the existing list if no overlap is found + if not overlap: + + for p in mynewpositions: + positions.append(p) + for p in mynewa1s: + a1s.append (p) + for p in mynewa3s: + a3s.append (p) + # calculate quaternion from local body frame and append + for ia in xrange(len(mynewpositions)): + mynewquaternions = exyz_to_quat(mynewa1s[ia],mynewa3s[ia]) + quaternions.append(mynewquaternions) + + return True + + +""" +Returns the rotation matrix defined by an axis and angle +""" +def get_rotation_matrix(axis, anglest): + # The argument anglest can be either an angle in radiants + # (accepted types are float, int or np.float64 or np.float64) + # or a tuple [angle, units] where angle is a number and + # units is a string. It tells the routine whether to use degrees, + # radiants (the default) or base pairs turns. + if not isinstance (anglest, (np.float64, np.float32, float, int)): + if len(anglest) > 1: + if anglest[1] in ["degrees", "deg", "o"]: + #angle = np.deg2rad (anglest[0]) + angle = (np.pi / 180.) * (anglest[0]) + elif anglest[1] in ["bp"]: + angle = int(anglest[0]) * (np.pi / 180.) * (35.9) + else: + angle = float(anglest[0]) + else: + angle = float(anglest[0]) + else: + angle = float(anglest) # in degrees (?) + + axis = np.array(axis) + axis /= np.sqrt(np.dot(axis, axis)) + + ct = np.cos(angle) + st = np.sin(angle) + olc = 1. - ct + x, y, z = axis + + return np.array([[olc*x*x+ct, olc*x*y-st*z, olc*x*z+st*y], + [olc*x*y+st*z, olc*y*y+ct, olc*y*z-st*x], + [olc*x*z-st*y, olc*y*z+st*x, olc*z*z+ct]]) + +""" +Generates the position and orientation vectors of a +(single or double) strand from a sequence string +""" +def generate_strand(bp, sequence=None, start_pos=np.array([0, 0, 0]), \ + dir=np.array([0, 0, 1]), perp=False, double=True, rot=0.): + # generate empty arrays + mynewpositions, mynewa1s, mynewa3s = [], [], [] + + # cast the provided start_pos array into a numpy array + start_pos = np.array(start_pos, dtype=float) + + # overall direction of the helix + dir = np.array(dir, dtype=float) + if sequence == None: + sequence = np.random.randint(1, 5, bp) + + # the elseif here is most likely redundant + elif len(sequence) != bp: + n = bp - len(sequence) + sequence += np.random.randint(1, 5, n) + print >> sys.stderr, "sequence is too short, adding %d random bases" % n + + # normalize direction + dir_norm = np.sqrt(np.dot(dir,dir)) + if dir_norm < 1e-10: + print >> sys.stderr, "direction must be a valid vector, \ + defaulting to (0, 0, 1)" + dir = np.array([0, 0, 1]) + else: dir /= dir_norm + + # find a vector orthogonal to dir to act as helix direction, + # if not provided switch off random orientation + if perp is None or perp is False: + v1 = np.random.random_sample(3) + v1 -= dir * (np.dot(dir, v1)) + v1 /= np.sqrt(sum(v1*v1)) + else: + v1 = perp; + + # generate rotational matrix representing the overall rotation of the helix + R0 = get_rotation_matrix(dir, rot) + + # rotation matrix corresponding to one step along the helix + R = get_rotation_matrix(dir, [1, "bp"]) + + # set the vector a1 (backbone to base) to v1 + a1 = v1 + + # apply the global rotation to a1 + a1 = np.dot(R0, a1) + + # set the position of the fist backbone site to start_pos + rb = np.array(start_pos) + + # set a3 to the direction of the helix + a3 = dir + for i in range(bp): + # work out the position of the centre of mass of the nucleotide + rcdm = rb - CM_CENTER_DS * a1 + + # append to newpositions + mynewpositions.append(rcdm) + mynewa1s.append(a1) + mynewa3s.append(a3) + + # if we are not at the end of the helix, we work out a1 and rb for the + # next nucleotide along the helix + if i != bp - 1: + a1 = np.dot(R, a1) + rb += a3 * BASE_BASE + + # if we are working on a double strand, we do a cycle similar + # to the previous one but backwards + if double == True: + a1 = -a1 + a3 = -dir + R = R.transpose() + for i in range(bp): + rcdm = rb - CM_CENTER_DS * a1 + mynewpositions.append (rcdm) + mynewa1s.append (a1) + mynewa3s.append (a3) + a1 = np.dot(R, a1) + rb += a3 * BASE_BASE + + assert (len (mynewpositions) > 0) + + return [mynewpositions, mynewa1s, mynewa3s] + + +""" +Main function for this script. +Reads a text file with the following format: +- Each line contains the sequence for a single strand (A,C,G,T) +- Lines beginning with the keyword 'DOUBLE' produce double-stranded DNA + +Ex: Two ssDNA (single stranded DNA) +ATATATA +GCGCGCG + +Ex: Two strands, one double stranded, the other single stranded. +DOUBLE AGGGCT +CCTGTA + +""" + +def read_strands(filename): + try: + infile = open (filename) + except: + print >> sys.stderr, "Could not open file '%s'. Aborting." % filename + sys.exit(2) + + # This block works out the number of nucleotides and strands by reading + # the number of non-empty lines in the input file and the number of letters, + # taking the possible DOUBLE keyword into account. + nstrands, nnucl, nbonds = 0, 0, 0 + lines = infile.readlines() + for line in lines: + line = line.upper().strip() + if len(line) == 0: + continue + if line[:6] == 'DOUBLE': + line = line.split()[1] + length = len(line) + print >> sys.stdout, "## Found duplex of %i base pairs" % length + nnucl += 2*length + nstrands += 2 + nbonds += (2*length-2) + else: + line = line.split()[0] + length = len(line) + print >> sys.stdout, \ + "## Found single strand of %i bases" % length + nnucl += length + nstrands += 1 + nbonds += length-1 + # rewind the sequence input file + infile.seek(0) + + print >> sys.stdout, "## nstrands, nnucl = ", nstrands, nnucl + + # generate the data file in LAMMPS format + try: + out = open ("data.oxdna", "w") + except: + print >> sys.stderr, "Could not open data file for writing. Aborting." + sys.exit(2) + + lines = infile.readlines() + nlines = len(lines) + i = 1 + myns = 0 + noffset = 1 + + for line in lines: + line = line.upper().strip() + + # skip empty lines + if len(line) == 0: + i += 1 + continue + + # block for duplexes: last argument of the generate function + # is set to 'True' + if line[:6] == 'DOUBLE': + line = line.split()[1] + length = len(line) + seq = [(base_to_number[x]) for x in line] + + myns += 1 + for b in xrange(length): + basetype.append(seq[b]) + strandnum.append(myns) + + for b in xrange(length-1): + bondpair = [noffset + b, noffset + b + 1] + bonds.append(bondpair) + noffset += length + + # create the sequence of the second strand as made of + # complementary bases + seq2 = [5-s for s in seq] + seq2.reverse() + + myns += 1 + for b in xrange(length): + basetype.append(seq2[b]) + strandnum.append(myns) + + for b in xrange(length-1): + bondpair = [noffset + b, noffset + b + 1] + bonds.append(bondpair) + noffset += length + + print >> sys.stdout, "## Created duplex of %i bases" % (2*length) + + # generate random position of the first nucleotide + cdm = box_offset + np.random.random_sample(3) * box + + # generate the random direction of the helix + axis = np.random.random_sample(3) + axis /= np.sqrt(np.dot(axis, axis)) + + # use the generate function defined above to create + # the position and orientation vector of the strand + newpositions, newa1s, newa3s = generate_strand(len(line), \ + sequence=seq, dir=axis, start_pos=cdm, double=True) + + # generate a new position for the strand until it does not overlap + # with anything already present + start = timer() + while not add_strands(newpositions, newa1s, newa3s): + cdm = box_offset + np.random.random_sample(3) * box + axis = np.random.random_sample(3) + axis /= np.sqrt(np.dot(axis, axis)) + newpositions, newa1s, newa3s = generate_strand(len(line), \ + sequence=seq, dir=axis, start_pos=cdm, double=True) + print >> sys.stdout, "## Trying %i" % i + end = timer() + print >> sys.stdout, "## Added duplex of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \ + (2*length, i, nlines, end-start, len(positions), nnucl) + + # block for single strands: last argument of the generate function + # is set to 'False' + else: + length = len(line) + seq = [(base_to_number[x]) for x in line] + + myns += 1 + for b in xrange(length): + basetype.append(seq[b]) + strandnum.append(myns) + + for b in xrange(length-1): + bondpair = [noffset + b, noffset + b + 1] + bonds.append(bondpair) + noffset += length + + # generate random position of the first nucleotide + cdm = box_offset + np.random.random_sample(3) * box + + # generate the random direction of the helix + axis = np.random.random_sample(3) + axis /= np.sqrt(np.dot(axis, axis)) + + print >> sys.stdout, \ + "## Created single strand of %i bases" % length + + newpositions, newa1s, newa3s = generate_strand(length, \ + sequence=seq, dir=axis, start_pos=cdm, double=False) + start = timer() + while not add_strands(newpositions, newa1s, newa3s): + cdm = box_offset + np.random.random_sample(3) * box + axis = np.random.random_sample(3) + axis /= np.sqrt(np.dot(axis, axis)) + newpositions, newa1s, newa3s = generate_strand(length, \ + sequence=seq, dir=axis, start_pos=cdm, double=False) + print >> sys.stdout, "## Trying %i" % (i) + end = timer() + print >> sys.stdout, "## Added single strand of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \ + (length, i, nlines, end-start,len(positions), nnucl) + + i += 1 + + # sanity check + if not len(positions) == nnucl: + print len(positions), nnucl + raise AssertionError + + out.write('# LAMMPS data file\n') + out.write('%d atoms\n' % nnucl) + out.write('%d ellipsoids\n' % nnucl) + out.write('%d bonds\n' % nbonds) + out.write('\n') + out.write('4 atom types\n') + out.write('1 bond types\n') + out.write('\n') + out.write('# System size\n') + out.write('%f %f xlo xhi\n' % (box_offset,box_offset+box_length)) + out.write('%f %f ylo yhi\n' % (box_offset,box_offset+box_length)) + out.write('%f %f zlo zhi\n' % (box_offset,box_offset+box_length)) + + out.write('\n') + out.write('Masses\n') + out.write('\n') + out.write('1 3.1575\n') + out.write('2 3.1575\n') + out.write('3 3.1575\n') + out.write('4 3.1575\n') + + # for each nucleotide print a line under the headers + # Atoms, Velocities, Ellipsoids and Bonds + out.write('\n') + out.write(\ + '# Atom-ID, type, position, molecule-ID, ellipsoid flag, density\n') + out.write('Atoms\n') + out.write('\n') + + for i in xrange(nnucl): + out.write('%d %d %22.15le %22.15le %22.15le %d 1 1\n' \ + % (i+1, basetype[i], \ + positions[i][0], positions[i][1], positions[i][2], \ + strandnum[i])) + + out.write('\n') + out.write('# Atom-ID, translational, rotational velocity\n') + out.write('Velocities\n') + out.write('\n') + + for i in xrange(nnucl): + out.write("%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n" \ + % (i+1,0.0,0.0,0.0,0.0,0.0,0.0)) + + out.write('\n') + out.write('# Atom-ID, shape, quaternion\n') + out.write('Ellipsoids\n') + out.write('\n') + + for i in xrange(nnucl): + out.write(\ + "%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n" \ + % (i+1,1.1739845031423408,1.1739845031423408,1.1739845031423408, \ + quaternions[i][0],quaternions[i][1], quaternions[i][2],quaternions[i][3])) + + out.write('\n') + out.write('# Bond topology\n') + out.write('Bonds\n') + out.write('\n') + + for i in xrange(nbonds): + out.write("%d %d %d %d\n" % (i+1,1,bonds[i][0],bonds[i][1])) + + out.close() + + print >> sys.stdout, "## Wrote data to 'data.oxdna'" + print >> sys.stdout, "## DONE" + +# call the above main() function, which executes the program +read_strands (infile) + +end_time=timer() +runtime = end_time-start_time +hours = runtime/3600 +minutes = (runtime-np.rint(hours)*3600)/60 +seconds = (runtime-np.rint(hours)*3600-np.rint(minutes)*60)%60 +print >> sys.stdout, "## Total runtime %ih:%im:%.2fs" % (hours,minutes,seconds) diff --git a/examples/USER/cgdna/util/generate_input.py b/examples/USER/cgdna/util/generate_simple.py similarity index 96% rename from examples/USER/cgdna/util/generate_input.py rename to examples/USER/cgdna/util/generate_simple.py index 25cfedaae2..33cf1ee7f5 100644 --- a/examples/USER/cgdna/util/generate_input.py +++ b/examples/USER/cgdna/util/generate_simple.py @@ -29,7 +29,7 @@ def single(): strandstart=len(nucleotide)+1 - for letter in strand[2]: + for letter in strand[1]: temp=[] temp.append(nt2num[letter]) @@ -58,7 +58,7 @@ def single_helix(): strand = inp[1].split(':') com_start=strand[0].split(',') - twist=float(strand[1]) + twist=0.6 posx = float(com_start[0]) posy = float(com_start[1]) @@ -79,7 +79,7 @@ def single_helix(): qrot2=0 qrot3=math.sin(0.5*twist) - for letter in strand[2]: + for letter in strand[1]: temp=[] temp.append(nt2num[letter]) @@ -114,7 +114,7 @@ def duplex(): strand = inp[1].split(':') com_start=strand[0].split(',') - twist=float(strand[1]) + twist=0.6 compstrand=[] comptopo=[] @@ -145,7 +145,7 @@ def duplex(): qrot2=0 qrot3=math.sin(0.5*twist) - for letter in strand[2]: + for letter in strand[1]: temp1=[] temp2=[] @@ -189,7 +189,7 @@ def duplex(): if (len(nucleotide)+1 > strandstart): topology.append([1,len(nucleotide),len(nucleotide)+1]) - comptopo.append([1,len(nucleotide)+len(strand[2]),len(nucleotide)+len(strand[2])+1]) + comptopo.append([1,len(nucleotide)+len(strand[1]),len(nucleotide)+len(strand[1])+1]) nucleotide.append(temp1) compstrand.append(temp2) @@ -208,7 +208,7 @@ def duplex_array(): strand = inp[1].split(':') number=strand[0].split(',') posz1_0 = float(strand[1]) - twist=float(strand[2]) + twist=0.6 nx = int(number[0]) ny = int(number[1]) @@ -249,7 +249,7 @@ def duplex_array(): qrot2=0 qrot3=math.sin(0.5*twist) - for letter in strand[3]: + for letter in strand[2]: temp1=[] temp2=[] @@ -293,7 +293,7 @@ def duplex_array(): if (len(nucleotide)+1 > strandstart): topology.append([1,len(nucleotide),len(nucleotide)+1]) - comptopo.append([1,len(nucleotide)+len(strand[3]),len(nucleotide)+len(strand[3])+1]) + comptopo.append([1,len(nucleotide)+len(strand[2]),len(nucleotide)+len(strand[2])+1]) nucleotide.append(temp1) compstrand.append(temp2) diff --git a/examples/USER/cgdna/util/sequence.txt b/examples/USER/cgdna/util/sequence.txt index fff469c8be..8f34319b82 100644 --- a/examples/USER/cgdna/util/sequence.txt +++ b/examples/USER/cgdna/util/sequence.txt @@ -1,4 +1,3 @@ -single 0,0,0:0.6:AAAAA -single_helix 0,0,0:0.6:AAAAA -duplex 0,0,0:0.6:AAAAA -duplex_array 10,10:-112.0:0.6:AAAAA +DOUBLE ACGTA + +ACGTA diff --git a/examples/USER/cgdna/util/sequence_simple.txt b/examples/USER/cgdna/util/sequence_simple.txt new file mode 100644 index 0000000000..81baac8470 --- /dev/null +++ b/examples/USER/cgdna/util/sequence_simple.txt @@ -0,0 +1,4 @@ +single 0,0,0:AAAAA +single_helix 0,0,0:AAAAA +duplex 0,0,0:AAAAA +duplex_array 10,10:-112.0:AAAAA -- GitLab