From cfb4d07466a7ae37c7f0adeaac6be6d76eb463f9 Mon Sep 17 00:00:00 2001
From: Aretha Foo <pfoo@ed.ac.uk>
Date: Fri, 28 Feb 2025 16:51:28 +0000
Subject: [PATCH] Added the related files for this part 2 course

---
 .gitignore                   |   4 +
 Ex2-Menu_TXT.txt             |   7 +
 Ex2-Students_CSV.csv         |   5 +
 Ex2-Students_Excel.xlsx      | Bin 0 -> 10178 bytes
 RBeginnersExercise2.Rmd      | 267 +++++++++++++++++++++++++++++++++++
 RBeginnersExercise2_Sol.Rmd  | 267 +++++++++++++++++++++++++++++++++++
 part-2-data-processing.Rproj |  13 ++
 7 files changed, 563 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Ex2-Menu_TXT.txt
 create mode 100644 Ex2-Students_CSV.csv
 create mode 100644 Ex2-Students_Excel.xlsx
 create mode 100644 RBeginnersExercise2.Rmd
 create mode 100644 RBeginnersExercise2_Sol.Rmd
 create mode 100644 part-2-data-processing.Rproj

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5b6a065
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
diff --git a/Ex2-Menu_TXT.txt b/Ex2-Menu_TXT.txt
new file mode 100644
index 0000000..138a606
--- /dev/null
+++ b/Ex2-Menu_TXT.txt
@@ -0,0 +1,7 @@
+Item	Price
+Burger	10.90
+Fries	4.45
+Juice	NA
+Salad	9.50
+Chicken	10.90
+Drinks	3.33
diff --git a/Ex2-Students_CSV.csv b/Ex2-Students_CSV.csv
new file mode 100644
index 0000000..cfaa345
--- /dev/null
+++ b/Ex2-Students_CSV.csv
@@ -0,0 +1,5 @@
+ID,First Name,Last Name
+D001,NA,Smith
+D002,Anne,Clare
+D003,Charlotte,NA
+D004,Thomas,Train
diff --git a/Ex2-Students_Excel.xlsx b/Ex2-Students_Excel.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..68dc8c59487aff04167805f3e0003ae68477875e
GIT binary patch
literal 10178
zcmeHtg;yNe_I2YD+%34fB|xy?!9BRUL*o|Qz3~tT1Pu<s-D%w2Ef63y5=ii0PiE$u
znaunCg7>P{x^-)H?Nilt_rB-UIafs<4jvDH2tWn^08{|elWc247yuv<9ss}vAj9fO
zI61mmIJy~WcspCT8nAmg*i(Lhhh@wKz(TM8-}Ybp2Fg=LlzKU_q`tqplh|dGTdjSG
z{OoJ+AP%#tNLOF-K&hEtj*ZP@PSh=ycpknjUk%R0sxSB1n02kALql-nK&u*BOc1zB
z(~y*pqkr(2u9J`$1k}}m<l>RO<RUgQinGiDWVkf;s_{y0iOH3dSmP6j1ny)n55C6U
z7-&W-)h!VKuXJxJy<W%WAO1LlwKIh~Yiq;gYk0B4CJP@!?Ym0gVOXd_rLE3ns@4p=
zNtf?%Wo+V^Gd6===L}D5$5<QRKy2;cBUj`lsr?xJtyFg)(T=6lPv6g{lQBN#o61eg
zb9`$fHocseK~%xhivSh7m^y+AZpR7#K2iL9g^|-9%C%d%EnKp`4N~)#oP<PkB_e7E
z*3H7f97$yXKIY&z#o(bP{x0ROW@LVjMN>$-@KQe>@vgAHv|MzFV-0mt(fUVx-2{IT
z^gW*3(+fE{OZyVQ{eTDnJU+q!RQ@5$dQDE6Z&0o&KvnkyDoZ1vg}o~W`)}?4<oI8#
z!N2|Wa*(2OFDF_!<kejS`1{;iB9^$kr`W4jDvdXRva6W&arv~QYn=>aSQ;cDa54ey
zZ|;Uy)`a2?!Bkh<yyY=Cc!D$yo)zKgH!ki-%=ExCX_xY?K5UP<%ekve898r8kM;!S
zvZj(e`JoLO>6tUh8mtL+EfN&mLb7lI;mjc20Y#m4(;t;EbK>e>E5fTA`En1FC$a(-
z(u%&Ii$)4498YKBgI!InmMZ<g_EeYGB%12h0yfpAIlvdx-bUv3T^EvBow)aYY)bFI
zYSg?qKe#7k2I+Dy19a<n&qlJn`}k0L%fP2Y!7&si>rfB%Z;`x{_)@qG707F-gfO9R
z#><|=(+Oy2>f~hio8KzXG;mtx#Pfe&_h@`A+=oGxr)sjTURL+I<m<AGIgSJ?i2qYI
zZ0XI0XExSye0pVESCs46S@ZWCGxmVv3p8Pu!Z23E3=d@Kwnz)==rO=!$9qBl*g6xb
z9&Vje7!l{V0JP44)>YYh1KDONfXV@kQXCV_!ko*nYms2>E`sE|+TLiFhJ(HV^Egg6
z(D8iq5*cOnT@)-Ia?%ZQxPFz?Xfbys&@cm0@+_7(w@O|0kbo^g6gHiwzCw;6%^L-a
zB?-l~M2u^A=fGC|RsU<^g7#wJI;~TbO?ebF!&cnT@rGfk@`5zG85rwED*rT5I`%?{
zT(xLb_h6!*))|9|ua{|IuyQS`l6E%>5uK5AK4pb23pO4k*}5H<4-=DPfqDls&JCMo
zgfwkM$%4p2AxnoEO&O6t^yBK8Cx?b+JWG<;>_w{qW@~TqJ3>@Czp>KM)sRFzFK1~i
zeJ|lwoKUV|G%W-kKk6rNFN8i{YL3k<CTj%uOtC$xqx5%c=W>AYr3qy03kgRHO4M}J
z#n98rXg$v*WQrBe)S^FN8=!<GA&jg7+4evT@&p`Z6)U<jt&IoakjZD}dGM?klPSdE
z1abrVcGaD?(woPtijPz-1S>7gDoymwpRYHl#p9W6E!sFyyD#Rx5!Ri@3yYcg$Aa=9
zRk^PBd#*2i{9hE~Mj#4LoDJ0-#rD2A7;RfGAXJj|6nWeJl7KnVbci{byx)`y1~bKl
zpvCg`xwx!G)UNxZRe&TM_fubtrx6B&6y9dVkcipLIQD_$YC%*Po^w;4`oZmxO~HF+
z7D1_78C6;!Ul&HbC4bxdlf%vYR|=@W)>C2a>9*UUetLv~6t}2nN$0GeVBXLVjs)G#
zV)SM9YGOK+Tkdw`X;5^%tmw%9@O;c3BH&ph_j1HJcph8NEIKLrSm}^C&mD7xVvsea
zoB7I;5cFd4fe!5vO7?%tAV?%7{Q^q=7U-`e0Av`b41RYPe;31_t^)=d$w9^R-@TQq
zDa!Y8Vzobch~V_h^1#Pla^s*r);z*Q9;#znp{L?`^KFBaxy3+xUY-NaDcl<}Ht6yF
z8OAmo?qxSiNi+_;53W5#5W#8eVhj%1^lJrqunYn&&hg<<+1Do+Z(VVlML=NI96bKd
zKN#tVBl3%=CBHRKhx6KwbGmcex=ubFhEAO$b-RH#q3XvK!F7#|ly8|V_a2)iR<01=
z2YXXYT@a(l3ZZ)grVeJQSblU%T_K;h-B~JnkK1+U-=OW0mC2KrJ`?r}ccY8|%TNR`
zfn4`KWVUKTw%o*ThnYH8&RIvkK`eKN2In@QBmHm6DDe%MeSp#>6*_YgL9h5@=5)2T
zuyAwb_<7*^Z3=w{8nT6keBqF=j*v;=wCuQnKBhuC-p1*#mEaw1$ru(U(n833(hO^&
zd`Zn-4e!GP@fV5>r+R8TXG2}}Nmf}V7#e$~u@AWvkeTtPbRy2`Knrv{H>8|(Wk;t0
z(xbEvL}YlB0m3?rydmO?4~S#_rlg!rYC6T<UCNEd9WGrXR}2-}e2NhYt~LibiLG`-
z4W$Kv3@oon6c#1O7|E;O7>x{2SnKMC%&f9$DavTCQJS)+qO}spMG;)vo0RXkhWR^v
z5+O7{Iz8E*5d_y=5X$MITD4b*2b(ukUDw`YlX9`g7mPeq^_csAHfGy_$x&H*Oie+L
zFF!$<zh$uZ3hn<2yp~HtxL10t#OV0MUr@YNeI>!LqqdDQSQ!~onv1FPz@KZ+?RbB|
zWU@11iu2<R9WN44lBA0BRiM#>7}ttFm3pcXB?&B*Xy*lAi27oTb1k5rnoAnRW0pSw
z(`ROWnj=0%<t!Do%HskJsEUMl!T@Lg%!iJ{Q+X5NEna`JA?rQiZ-e-ABvU|o7(J9&
zQjOEq044-K-W17|I4s(;vbW-;C+l2X{MoaT4@Bel{LRhuhT^Y6<j3EB@u@0`M|s_!
zMM~4AJ5(B>GTykqkM#l5>`=Qs>8;Mr#u82F7E6!t%JS(F$%*K3Xr^{8`6qfF4AiB3
zrn5ljtHeGy9x>yDJYB*doCgm09vw+s?9qk-tWl36^3p!Io+UG7Ax=Ro%+bCCaknVs
z&`*)MxH>Oek`M}Lwl=mpem?fkrk3fIg7T#ztK7sW%WJT#B1?@X_au*&8so6SL|~2l
z`ID{>MRKUJ*K~B(&76kPwx)8n*pXJ{1&L|zjbL{UR+bLZ_X#ZstqO|L7Jr+={>|%d
zLaYuYBmiK5^0%+qpS*Unws5fE_}TuX^s&xZ3@IO8C*h4KnycsCvz-LG_00+U<TYCJ
z>=d&4hGW&&Jlvo*Qe0R%flD>Ug2HrhC*hRomvHFq-x5%0>%LG-F^nzJRP51a*~{sA
zpvTBOv=<kD&w1l>;dVCNp5Y^t@uUZoX4Ii_F8nTbF@ts{Ex?YSk=IZ8RZ1+j1&ZVK
z6V^NVmG^Ie*bYO;gNtz2lJ<FN6tV&<%mn?&;Ym$hmi$2*^s$4ldT8Dg43a$!S<fI~
zDO9T<q5=Id3^rBq_2;IdIYv*F3-JBero6?KB#s`-xS>^ALzh>x(_1SZ8Frv{@|0@%
z+$4ufkbK*B`a2^I&Kv7%y8C^?Mh=3h*YuKmh&mFExsTk<3dVlgQ?BP<x^1T^kwbKS
z3yt7P^an4G?sYpGri)>$d4p4n8Wvw1r8*{>psU=><+W--G)D;5Cyfj~H9sv)2wTY2
z2&fLHKx^5QdQv6EB%<FNk!@LQb+nL+XQbD=Apg}+rSY4cili*CDV#m5-qy2u8Q;9;
zARc!eEkvvx#5AFrh+cNArmmn(&9|o;d%`UO2!vImmE9idM^m0eu5F586xi%rp_@U*
zaSF$wH*V5kLYLG0Q6&=eP(%~mz{y>ZYG53tO{#X~W!g8S&~V3+JI6=Kkt6K;aQfvU
zqEk5F+rf3OQEhWQv%Dh`ox1wTT!PW#{XOBjQRl<y;ThkTS%$NozRtVrk@U{Tvlo}u
z+pG8-9scL%Uy=zo{4e*F6P_-wkUh;frHZOKt=uw$(8rhrVced<h-UY3iuZwDVjp;v
zoQ!~B4fQAwd)%wP_N<U;4<NJ&b=obpP@FZ>#%ESZ*C4E##d;8VzSt~%J92vJoTIRo
ze9;`$8a0_@cfu}L4*0&$ldH9@wiH)w@fkfDC#s0Qds@A-Mk4lUS*t|k-hwdqwnBB(
zw-c<YkjNQ237|2Z8hf3DY2-+c1!d^+`PjNcRd0#ioUorcjS2KQINwBm+#49NW>-B{
z6OiP3_Ov6$e3u&-&@Dq>$w4dW6U}sSW5yaP|M^A4C5c!kD%uXlExAG8bC5jnV7fVm
zh$l(dV=+!e$aPr(1nzr7k3l7s<QXDM=$Ei0C@CyKrc%qb+KAH7b8kuAgs^M-HiaSb
z#$x<wO<QbcI|ZRzaCMBuvQWcf{a6%_;ERiosmrZI>Ta<EM81t8uG*6(JR|$fQ(Dx<
zsZWpP%2~kUnwRg4VJtwUcJoZYbYvEb?`J6v)mBa}!CnN6QP>*=*Cdi_at8c+?=*a}
z<1rLBpTX`??8eM6Eg&3lf?7LekK&MZtkQi5PW%j<S>8B(8Z<Uq7yjA-PwZz)OnKFK
zHHM{X7c68Fj*N;%xE36V|DZ3KU8ZY)l9IKU^&ArpPx^H`Zt`wi|3p+R!=2|axhIS(
zB0_@R*=@>;EJllc$)Hat-ziw_e5`X;yPQGguJhiSu7-0z9&bx6XADKEEXM6JFLVbk
zuP()GGu!-vhUp>a2RX`ZHG4c%DJLTc{jK7j{_t&6u}GXHE2=FF&9*S()IZ)n%V1qi
zp<&x`p%-P~UQU=q)0MDT>v|WhaH-+NL!WC_5aG^(|9-OKR_VekA#O&=s=2)7?Z+nw
zpldHC{WytqxuOGIVaR&L?9rP84NskPM8>5VA`=BmQdM^%RU-^>JVUEc!=+5^DL3sY
zmBaKrU1^s+R89wce6PK$O{w<12(e|ca7(h3eKY*xy3DrNO4RX8X`f`CkC`wkrW`z!
z32y33EiT@+?lz_EQUz=@v2LH@IHx&-3Ab1U$I@<f23j&BX1SG{O`A!9Jcn_Z*mhFX
z3{64w^y-s+f{xeqw#bdlG`x1#xCcbN3touLG_!hyAJm&Xh-Gsee|Vat5fj!%NT$5d
zPgWX6<Vwo$P3dDZVhcz`=ba+^;J~64h$W)C^Y$eRMV1Sen;_VWyPL))e2qvoM~Nb6
zOTE(hp|pSFX*ul@Sx-AfTtYkCi+w)q0&Q&$7Nq#7z?cx=;TTs?RyBUo-nHkp@v!wm
zTlBO*N<7Vr7!{8X8Rj`X&!Q?vr<7SJh?JM!cIO?j3uTnTU9~H(@lnhTyCT|rwkqmp
zKAd$+{t_X$i(YmvF<Z-A=y<Qh2CTGWZ^pox^OkGdn)Q!?>*i#8KgzAB)27rL{ppkH
zN*xa?GMe)2^B)fG()_|_l?<Zb>``6!m#jx}**|WESKH<+kvxNhe4GRLi`5QP#GTS<
z7;mOY!70y;V^))O;*dAm1Q^&J-!R!Dt;sF2rI46(Vrhh^<zY9Jg?SLY9=Wg9KXhlt
z+%!&7)yJ4Qx`EL*nUwOdsTy2hKiS!18w<mZiBT{bC9L1N+3wI^@^gV;Q)b`R59jJQ
zc-V$n)(bPjw=3mO9?!oBGpx{qp!=*<Pmu|ow$q?vEYCTr9Py#n0~_^_ZRsrD<m;3m
zrOFAGG?L!u-cz!e8SgRqn|A=d+qO15{>Kk;FW*L>9yG7~S_}X{|K0t%x_R4Mxc(MP
z_UW%U?mWlx=a>w@g8w!(wblT;60;b!jL`4}HtU0bs>PB?ehk%=UaQn&yTqY5cgIwe
zDfOa(JoW^d6$a)|QggoY3hz|Mpj6bkeYqZAQDwhQ3WrJq$x){$LVI3iD*q^f3CQY9
zd!xVb!RCAiGam;6`my3lW`<5#3oI8kYwLIx%k2|CQ{6VS;DXqx^I3|-w2Ae#bTj+5
zvnoj1#&of0iEkRq!Y9l*OPZU~M!U^^zMWBdflAsng|aPQyCqG!@x7LgVdw?C{+g~M
zk&RxN(c*H0b?juTuZJ2QzIP_a8Tuy4H8ARf*M`ByX&k5NtH8VGv?DwUdIz@+UNj&(
zyfaV9>0)K+5ux5fg1sy>32rKPX!Osa^WaI4xY50KQ!`X@Cv!_q?>smiH^exc+&0O%
ziy<xpF*hO6QXcJrAA2H6t{2WE=P~DywVL~-^1VDZ{QFhjNSahZ)MK>oH#>=fzV}ld
zxpO=s8)c_eaP4oCJNU4_(Xw@L1qZV&6H@6#_%4K1_*=T!9$~@7%=wig!B~e_yhWOs
zr$V<gdl-fH)#CY#UOCUh;2bPgER5R(KFi0T<2ryh2tSYQ-gI;hsCV{oI@X5RQgzHD
zQ^HlQFj5;*eD~%?22VWm0`4(Y<i}1C*M&0S(a8$t3u0upZH=45+fk4a>Hr9J@2_nd
z!#@g!SdgHUsS103{-TL}!kVRI1VkSe5__`UZ!1lRc3J)Yk|KBsu71Ru{-_^&k}?Bb
z1n|w|aH8eA+vi(@kY$)VH#DF9r1#^km>&;Wi!KA*TY<IxGn~{%rWcl*w-1wtm=O{i
z?yL)iVmS-3(o59q12Hx05LSg-a<9!G2-<xIdVI!7;@g%|Bn*De)3!XT>xL%*C6WPc
zNCtzmGn!VZ7cW&`3t-||q*ijqVWf1DGx_i7><Ou)XpX~4={$8S?}Gc=dEs0V#vrps
zCf-EZJEZ8^VeeeVvTQaHJA|v$KP|0iYqqoo->(XB5U4mf+11e>7nb@atKx8Q9j~IQ
zrG^H$bcA>A3k;}$a$*S>_4jl#659=C#6Qq!K=M>X=g)Ye4ZAW_1j^?ZQ=<}UCrd-*
zU$32XJwvLr`d${NC{r}!74-be8;I@-(gl(UZH@4T3wwCMuwhvB`gN^%xY)T?v2rbf
z;<K?jbGS`O36voMgKtUrm*%(N`otreg_pGn`Z4j^bzr{J?F0AV5M%4NZk}#jMi_88
z@t>ET-<}Dr?7ozoX!sJ$r=E2;sadeQxTy<13%8>jBCYIS@p5(EM3IWQI3pkz-IHd!
z<aCvE0r}DBKJ3?Ki{t{ih#k^(8)QT90>VEav~|)!*54+mke7?TR+n;zQ3uyZKd%#A
zef4DF$ozYg?7j>3WO~aiNxS@T;<2sVKEH>5dah>LK;6{`El#5tvO(-3j%=I$ZyBQF
z*a77vuLj<KU-V2^jNq3Y+!4MqaSGt1z#=TX3bK;>0chQ(Cr^aUnJvEi0!pe5u@efU
zEja~vkj5dsRK_Ro8g4oRAP)-xJe8`qg?GBBdtN`mbq>sgzooFi;r*ODgn7KjbsffT
zWYMJeZXI??Cu3_nzsFC(R8tl$-Gj)672er}8Hw}AgJ(}?kk?4(CI1uxg|_5N#L8Yd
zCD-mE)c&1UVg;}2pKQk465=U#5)2}IcwO3?YmNB%no*&<Sn9(_7#1X$lg?-gAqTC;
zuaF+v<P9KP-dLAS%fzFdzR;`ZUMz(Ntx$bjkkLyOIX73~NNivZ+6CttQaEVdfRn}A
zU%ogbOpN#OcN5X3K<_ZtO;v6lW)eVj@!rx{VP8RI?njcW<Yt+-(7PoO@aCL%xL~L#
zUX#Ms5U0AsPmwxk5<_VdX8?ziOcjMTs?8i)`V;Kn5%ksc)JCnkYE!nSo5G04NzDfL
z1}FJ45oA<VGcFMk;8k!8z#>%DM5eiF=a!}4ASl|ha$V(^AFoMx7sQaW8zvePcY3j=
z$#7c%wI}!FYqZIucK_@RMednmO0)Nydeh=kM<BSh;+2i%%YRET@dnjEC`M_prP&QZ
z`g>4}ROO35J0SLvzL-H%Z#?&&)L9hrm_5~56fHG|-|q4A#zriqU@tw;89wSrHrV~g
z=XQ-Y7QD1)Nqh0MBhU^<(nck1C$z()s;Mul`B5dqGJ49LE?*FxFzzrqw1!RsRt76y
z5f({g>zx^od$b$XEn6lPmv4MNp#!3mx*nyAP@;_n;(<x4QV(mNcg;-BHk;#CUx6>k
zS#;l<lm<J<p4^=51_5KCr`e#wKtGbKfj!h_Fqnv3EwHsO%Y!rWj;2_hBgjT3gt658
zo)+#xK+kCzElO9e$dNdbgV8sNklM8mIeDxSZQ87@TWawfl4>lA_&KR9#EDBXDz|FZ
zsEU|kE7f;oN=r2bt=oxoG%SgrXGveymYusjK%A(<Vg}9Bq=l|Zq<|7Q+M<L!q`2h#
zRu*+9p&AUf#tuJWi6{Spy`tN77*J!eF_`NzxcfukzDT;2BJh-b+oDPKRQ-0OW?bY^
zuCo)-y!8D|bjN;%c&z(XCb?F)Dw@#-p!r&@Cw*+w$U!}~<^j^{wz(k+@8M*v6d}A2
z)t&=Ug!twNX02}^b=YKmdTf$7h48~FPaOAN4FR1-;4y{?-L35QJ8OK+vRg}5i#HHq
z8@)IDTb<Uu)0Uo)%^Wu0J1>%6J*^`2O7q|FEXFk4bm{PQ5@L2DNz;7V!Z7aJNL1(&
zTuZN|82+O*k+w-_3MGFiwEICKbm;e#$gj0CaAVq&BWU!o1&vYg{)j@XO@J2W>TW<A
zN2{N~h+EQ{92^%`7zAO1YImK!LNwA`{WvX{dR?1>@;F}6m}kk-vcIDpTa2nKeP3|j
z*?6E_benRWO8wO$cu!m&X0cFYGcT#?cCEhxuMfkQk2X{~CvbN;=RGksL1E17s|lfX
zCaLU*1M4K|uI;u$vZsWZn9&1hl?<3&E-EY)P8x17g$L82`W{t<8JuJc)&YUf@XAeY
ztDi+zs2Uw+9gDN?Ot6c86>1KMt(YHqafbtUOOm!(YWY3#Q=x~mH=_J?Eqmko9u?Ok
zen)Aq@B3i{E8cXyxl^ud;uUlL`_CHQN%zo-{=YLM(OCH3<e)Fc0-F89h9-;6oy=5#
zPR_0zW==qhKdSxz%Mn9a;|DUb3*p4;-$1&>0C~nu=}N1NJWLkjBf8ojv(YwIjM9S4
zKiyx&EDOjFlDkd}aqR1ZsxWF%I=U8h%V3ijrC}H=r7AoSd_K-^p`(_Y$yH0UFiv8#
z>ZUGe2{<t=Am+q6b~lmOZ^{m<trJ*7^yRr4{4haFV8Il^#^qO^@1~BOM}8o!z$k5d
zMbYuoXP7l1g@2WlWGL9gt-a?R17ML!z~Q8P%8-PA^m$5T^Vr78$WvUtR&$xH<g#zy
zE#m^Duoot7t&C}Li`=@vy=(CHBQFiFKlJQ}Np0RtAvIRYI-d_*`D9$-9&fetCJg<P
z<4$9Hh1lxTiFfIxRv7rwoRM7-AI!oi_MWu|l+2eXY@1_)Xk(M3&3Tn<+PokXqOBVd
zWJu?DTLGp71GKa{Qfa(&Zb@U2qJi9lgcSLA(B$&J31~OghZY7UAUqrZfc19*nm9ZE
zkAG0&{jt3RDJ$=AV)a8_`9aIO^ENtE8UoTR#{EI~smnb}M+ziQ7UG3wO4gJoXtC+S
zgugmG2nj#l*}MBLUYCc3vWJV07Bzz@7qwfe+4?N+?SM4gw77CRW`W{lF#Q!BZ7L&p
zq_1`XJPpX&k#9?MUPU#8XLmDI!8w&?c!W18>JgU>>og^JbcVbH=Ik%vV8G{v0;k`h
z^=~#=e+f_=u@)5Z)1e?moIkXh@v=fcW4m;+Qt#7Ny>hPJOeN#MpQ-3NNzTt7PbF)#
zwTO!ld_JFghAj66W0od^y{^y&rQX(ND!4F%+buw#p&9%@6pB%dRUtbLZ(6SOoY(&=
zX+eRJnNKfCpo%O3T5kuL-C+G~o>hBbjH`(iXW;2Jnafu&oQRfu@&QHPCoY!>&QlD2
z%!q4qsaw$dGeL_07ZJz^iZXlKR*}x2uf%E6=Cw+NZF27Jq%z8T#KWXs`3|1uGQ~yl
z>%8lZgY{dBAG1Cl!edpyH^Wm3Llcm)4Jbi>r!EXE8`O#X=haew4%MH>|FCvSMgE@#
z{<*H>PvCFIbf_Nww#wsI;IFmrzo2bUnf+4v{uTVs3gKT+0H7D;C;0zUFZ|WcuVtgZ
zEL}bMe-H7mBGRu`eofN<Wn~=Wr<Gq*_P-kVHO=ss0Y!qJ2L4Jq{0jXwHuwuVL;gGT
z*GS=43;%SWf8hauQwjj!U+(l*_&=w=zr$Uq{|5hKGE|XAfQ~KzfC}9Mp&px+?zgl5
E1A^4fr2qf`

literal 0
HcmV?d00001

diff --git a/RBeginnersExercise2.Rmd b/RBeginnersExercise2.Rmd
new file mode 100644
index 0000000..d39f9b7
--- /dev/null
+++ b/RBeginnersExercise2.Rmd
@@ -0,0 +1,267 @@
+---
+title: "R Beginners Exercise 2: Data Processing"
+output:
+  word_document: default
+  html_document: 
+    toc: true
+editor_options:
+  chunk_output_type: console
+---
+
+# Introduction
+
+Welcome to R for Beginners Exercise 2! This notebook contains the exercises for the lesson that we will be looking at during exercise breaks throughout the course as well as being a work space for you to use during the session!
+
+To execute a line of code, click on it and press *Ctrl + Enter*.
+
+To execute a chunk of code, click the green run button at the top right corner of the code chunk or highlight the entire code chunk and press *Ctrl + Enter*.
+
+# 2.1 Import dataset
+
+R supports importing datasets from a range of file types. Some common file types supported by R are `.csv`, `.txt`, `.xlxs`. There are two methods to import a dataset.
+
+-   Through the "Import Dataset" button in the Environment window (top right).
+
+-   Through command *(more information regarding this method can be found in the [R documentation](https://cran.r-project.org/doc/manuals/R-data.html#XML))*
+
+To import data through command, first set working directory to the folder where your data files are stored.
+
+```{r}
+setwd("~/IntroductionToR_P2_ExerciseNotebook")
+```
+
+## 2.1.1 Import from CSV file
+
+```{r}
+students = read.csv("Ex2-Students_CSV.csv")
+students
+```
+
+## 2.1.2 Import from TXT file
+
+```{r}
+menu = read.delim("Ex2-Menu_TXT.txt") # with tab character as the delimiter
+menu
+```
+
+## 2.1.3 Import from Excel file
+
+```{r}
+library(readxl) # the package which enables Excel file importation
+students2 = read_xlsx("Ex2-Students_Excel.xlsx")
+students2
+```
+
+# 2.2 Dataset overview
+
+## 2.2.1 View dataset in new window
+
+Use the `view()` function to view the `students` dataset in another window.
+
+```{r}
+# The package which contains the view() function to display dataset in a separate window
+library(tibble)
+
+# Write your code below
+
+```
+
+## 2.2.2 View columns of dataset in console
+
+Use the `glimpse()` function to print all the columns of the `students` dataset to console.
+
+```{r}
+# Write your code below
+
+```
+
+## 2.2.3 View column names in console
+
+Use the `names()` function to print all the column names of the `students` dataset to console.
+
+```{r}
+# Write your code below
+
+```
+
+\* In R, you can view column names of a dataset when writing code by typing the dollar sign, `$`, after the dataset's name.
+
+## 2.2.4 Find datatype of a column
+
+The `class()` function in R can be used to find the datatype of a specific column in a table, e.g. `class(tableName$columnName)`. Use the `class()` function to find the datatype of the `Price` column in the `menu` dataset.
+
+```{r}
+# Write your code below
+
+```
+
+## 2.2.5 Summary of dataset
+
+Use the `summary()` function to get a summary of all columns in the `menu` dataset.
+
+```{r}
+# Write your code below
+
+```
+
+## 2.2.6 Find unique values in a column
+
+The `unique()` function in R can be used to find all unique values in a specific column in a table, e.g., `unique(tableName$columnName)`. Use the `unique()` function to find all values in the `Price` column in the `menu` dataset, without duplication.
+
+```{r}
+# Write your code below
+
+```
+
+## 2.2.7 Incomplete cases in dataset
+
+In R, incomplete cases are rows in dataset that have `Na` value(s). These incomplete cases can be viewed using the function `complete.cases`.
+
+For example, we can view the incomplete cases in the `menu` dataset by executing the code below.
+
+```{r}
+menu[!complete.cases(menu),]
+```
+
+Breaking down the code written, a condition is being used with the `[]` operator. The condition states 'rows in `menu` that are not a complete case', utilising the NOT logical operator and the `complete.cases` function. Note that the `,` sign within the `[]` operator is mandatory since `menu` is a 2-dimensional dataframe. The second index has been left out on purpose to include all columns in the result.
+
+With reference to the syntax shown above, find the incomplete cases in `students`
+
+```{r}
+# Write your code below
+
+```
+
+# 2.3 Data ordering
+
+The order of vector elements can be easily reordered using the function `sort()` in R. It can be applied to all basic datatypes to order data in numerical order (numeric, integer, complex, logical datatypes) or alphabetical order (character datatypes). It has a parameter, `decreasing`, to specify whether to sort a vector in ascending or descending order, e.g., `sort(someVector, decreasing = TRUE)`.
+
+\* Executing the code `sort(someVector)` will only print the sorting result to console. To save the sorted vector, you would need to write `someVector = sort(someVector)`.
+
+Execute the `numbers` vector defined below, then sort it in ascending order.
+
+```{r}
+numbers = c(4, 3, 5, 1, 2)
+
+# Write your code below
+
+```
+
+Execute the `words` vector defined below, then sort it in descending alphabetical order.
+
+```{r}
+words = c("table", "water", "chair", "ruler")
+
+# Write your code below
+
+```
+
+# 2.4 Dealing with incomplete cases
+
+Missing values in R are very problematic as R cannot process a function correctly with their presence, and may even run into error. In order to obtain accurate results, these incomplete cases will need to be dealt with.
+
+## 2.4.1 Drop all incomplete cases
+
+The simplest way to deal with missing value(s) is to drop (remove) all the incomplete cases. Extending from the syntax to find incomplete cases in [2.2.7 Incomplete cases in dataset], we can drop all incomplete cases by omitting the use of NOT operator such as:
+
+```{r}
+menu[complete.cases(menu),]
+```
+
+Try using this method to output a table which drops all the incomplete cases in `students`.
+
+```{r}
+# Write your code below
+
+```
+
+A simpler method uses the function `na.omit()`. Using this function, try to obtain the same output table as above.
+
+```{r}
+# Write your code below
+
+```
+
+As you might notice, dealing with missing value(s) by dropping all the incomplete cases ignores a lot of rows in the dataset. Therefore, this method is not recommended because it can greatly affect the reliability of the analysis.
+
+## 2.4.2 Drop incomplete cases in specific column
+
+A better way to deal with missing value(s) is to remove only rows with a missing value in the variable we are interested in. This minimises the number of rows being ignored. The method to do so is similar to the first method in the previous section, with the addition of the name of the column that you want to drop missing values from in the condition.
+
+```{r}
+menu[complete.cases(menu$Price),]
+```
+
+Apply this method on the `students` dataset to output a table where rows with missing value in the `First.Name` column are removed.
+
+```{r}
+# Write your code below
+
+```
+
+## 2.4.3 Replace the missing value(s)
+
+If you know your data well enough, you can replace the missing value(s) in the dataset. This can be done manually by assigning a new value to the cell. Taking the dataset `menu` as an example, the cell with missing value has index `[3, 2]`, a new value can be assigned to it as follows:
+
+```{r}
+menu[3, 2] = 4.50
+menu # Check to make sure the missing value is being replaced correctly. 
+```
+
+For a small dataset, the method above is plausible. For a larger dataset, the better way is to replace missing value(s) in a batch using the function `replace_na` in the `dbplyr` package.
+
+```{r}
+library(tidyr) # The package which includes the replace_na function. 
+replace_na(menu, list(Price = 4.50))
+```
+
+This method should be used with caution as it replaces all `NA` values in the column with the value specified. Run the code chunk below which shows an example of such an issue.
+
+```{r}
+# Creating the dataset for illustration purposes. 
+menu_example = menu
+menu_example[1:4, 2] = NA
+menu_example
+
+# Applying the function
+replace_na(menu_example, list(Price = 4.50))
+```
+
+The `students2` dataset is a complete version of the `students` dataset, refer to it to apply the following changes to the `students` dataset.
+
+1.  Replace the missing value in the `First.Name` column using the first method.
+
+2.  Replace the missing value in the `Last.Name` column using the second method.
+
+```{r}
+# Replacing with first method
+
+
+# Replacing with second method
+
+```
+
+## 2.4.4 Ignore missing value(s) in function
+
+When applying aggregate functions to a dataset with missing value(s), the value `NA` will be returned. In order to obtain a useful output, we can ask these functions to ignore the missing value(s) by setting the `na.rm` parameter to `TRUE`.
+
+\* Aggregate functions: sum, mean, median, min, max, variance, range, etc.
+
+```{r}
+numbers = c(2, 3, 5, 8, NA, 4, 7)
+
+# Using the default value assigned to the na.rm parameter
+sum(numbers)
+
+# Setting the na.rm parameter to TRUE to ignore missing value(s)
+sum(numbers, na.rm = TRUE)
+```
+
+Calculate the mean prices in the `menu_example` dataset.
+
+\* Use `datasetName$colname` to specify which column to apply the function on.
+
+```{r}
+# Write your code below
+
+```
diff --git a/RBeginnersExercise2_Sol.Rmd b/RBeginnersExercise2_Sol.Rmd
new file mode 100644
index 0000000..6014aa8
--- /dev/null
+++ b/RBeginnersExercise2_Sol.Rmd
@@ -0,0 +1,267 @@
+---
+title: "R Beginners Exercise 2: Data Processing"
+output:
+  word_document: default
+  html_document: 
+    toc: true
+editor_options:
+  chunk_output_type: console
+---
+
+# Introduction
+
+Welcome to R for Beginners Exercise 2! This notebook contains the exercises for the lesson that we will be looking at during exercise breaks throughout the course as well as being a work space for you to use during the session!
+
+To execute a line of code, click on it and press *Ctrl + Enter*.
+
+To execute a chunk of code, click the green run button at the top right corner of the code chunk or highlight the entire code chunk and press *Ctrl + Enter*.
+
+# 2.1 Import dataset
+
+R supports importing datasets from a range of file types. Some common file types supported by R are `.csv`, `.txt`, `.xlxs`. There are two methods to import a dataset.
+
+-   Through the "Import Dataset" button in the Environment window (top right).
+
+-   Through command *(more information regarding this method can be found in the [R documentation](https://cran.r-project.org/doc/manuals/R-data.html#XML))*
+
+To import data through command, first set working directory to the folder where your data files are stored.
+
+```{r eval = FALSE}
+setwd("~/IntroductionToR_P2_ExerciseNotebook_Sol")
+```
+
+## 2.1.1 Import from CSV file
+
+```{r}
+students = read.csv("Ex2-Students_CSV.csv")
+students
+```
+
+## 2.1.2 Import from TXT file
+
+```{r}
+menu = read.delim("Ex2-Menu_TXT.txt") # with tab character as the delimiter
+menu
+```
+
+## 2.1.3 Import from Excel file
+
+```{r}
+library(readxl) # the package which enables Excel file importation
+students2 = read_xlsx("Ex2-Students_Excel.xlsx")
+students2
+```
+
+# 2.2 Dataset overview
+
+## 2.2.1 View dataset in new window
+
+Use the `view()` function to view the `students` dataset in another window.
+
+```{r}
+# The package which contains the view() function to display dataset in a separate window
+library(tibble)
+
+# Write your code below
+view(students)
+```
+
+## 2.2.2 View columns of dataset in console
+
+Use the `glimpse()` function to print all the columns of the `students` dataset to console.
+
+```{r}
+# Write your code below
+glimpse(students)
+```
+
+## 2.2.3 View column names in console
+
+Use the `names()` function to print all the column names of the `students` dataset to console.
+
+```{r}
+# Write your code below
+names(students)
+```
+
+\* In R, you can view column names of a dataset when writing code by typing the dollar sign, `$`, after the dataset's name.
+
+## 2.2.4 Find datatype of a column
+
+The `class()` function in R can be used to find the datatype of a specific column in a table, e.g. `class(tableName$columnName)`. Use the `class()` function to find the datatype of the `Price` column in the `menu` dataset.
+
+```{r}
+# Write your code below
+class(menu$Price)
+```
+
+## 2.2.5 Summary of dataset
+
+Use the `summary()` function to get a summary of all columns in the `menu` dataset.
+
+```{r}
+# Write your code below
+summary(menu)
+```
+
+## 2.2.6 Find unique values in a column
+
+The `unique()` function in R can be used to find all unique values in a specific column in a table, e.g., `unique(tableName$columnName)`. Use the `unique()` function to find all values in the `Price` column in the `menu` dataset, without duplication.
+
+```{r}
+# Write your code below
+unique(menu$Price)
+```
+
+## 2.2.7 Incomplete cases in dataset
+
+In R, incomplete cases are rows in dataset that have `Na` value(s). These incomplete cases can be viewed using the function `complete.cases`.
+
+For example, we can view the incomplete cases in the `menu` dataset by executing the code below.
+
+```{r}
+menu[!complete.cases(menu),]
+```
+
+Breaking down the code written, a condition is being used with the `[]` operator. The condition states 'rows in `menu` that are not a complete case', utilising the NOT logical operator and the `complete.cases` function. Note that the `,` sign within the `[]` operator is mandatory since `menu` is a 2-dimensional dataframe. The second index has been left out on purpose to include all columns in the result.
+
+With reference to the syntax shown above, find the incomplete cases in `students`
+
+```{r}
+# Write your code below
+students[!complete.cases(students),]
+```
+
+# 2.3 Data ordering
+
+The order of vector elements can be easily reordered using the function `sort()` in R. It can be applied to all basic datatypes to order data in numerical order (numeric, integer, complex, logical datatypes) or alphabetical order (character datatypes). It has a parameter, `decreasing`, to specify whether to sort a vector in ascending or descending order, e.g., `sort(someVector, decreasing = TRUE)`.
+
+\* Executing the code `sort(someVector)` will only print the sorting result to console. To save the sorted vector, you would need to write `someVector = sort(someVector)`.
+
+Execute the `numbers` vector defined below, then sort it in ascending order.
+
+```{r}
+numbers = c(4, 3, 5, 1, 2)
+
+# Write your code below
+numbers = sort(numbers)
+```
+
+Execute the `words` vector defined below, then sort it in descending alphabetical order.
+
+```{r}
+words = c("table", "water", "chair", "ruler")
+
+# Write your code below
+words = sort(words, decreasing = TRUE)
+```
+
+# 2.4 Dealing with incomplete cases
+
+Missing values in R are very problematic as R cannot process a function correctly with their presence, and may even run into error. In order to obtain accurate results, these incomplete cases will need to be dealt with.
+
+## 2.4.1 Drop all incomplete cases
+
+The simplest way to deal with missing value(s) is to drop (remove) all the incomplete cases. Extending from the syntax to find incomplete cases in [2.2.7 Incomplete cases in dataset], we can drop all incomplete cases by omitting the use of NOT operator such as:
+
+```{r}
+menu[complete.cases(menu),]
+```
+
+Try using this method to output a table which drops all the incomplete cases in `students`.
+
+```{r}
+# Write your code below
+students[complete.cases(students),]
+```
+
+A simpler method uses the function `na.omit()`. Using this function, try to obtain the same output table as above.
+
+```{r}
+# Write your code below
+na.omit(students)
+```
+
+As you might notice, dealing with missing value(s) by dropping all the incomplete cases ignores a lot of rows in the dataset. Therefore, this method is not recommended because it can greatly affect the reliability of the analysis.
+
+## 2.4.2 Drop incomplete cases in specific column
+
+A better way to deal with missing value(s) is to remove only rows with a missing value in the variable we are interested in. This minimises the number of rows being ignored. The method to do so is similar to the first method in the previous section, with the addition of the name of the column that you want to drop missing values from in the condition.
+
+```{r}
+menu[complete.cases(menu$Price),]
+```
+
+Apply this method on the `students` dataset to output a table where rows with missing value in the `First.Name` column are removed.
+
+```{r}
+# Write your code below
+students[complete.cases(students$First.Name),]
+```
+
+## 2.4.3 Replace the missing value(s)
+
+If you know your data well enough, you can replace the missing value(s) in the dataset. This can be done manually by assigning a new value to the cell. Taking the dataset `menu` as an example, the cell with missing value has index `[3, 2]`, a new value can be assigned to it as follows:
+
+```{r}
+menu[3, 2] = 4.50
+menu # Check to make sure the missing value is being replaced correctly. 
+```
+
+For a small dataset, the method above is plausible. For a larger dataset, the better way is to replace missing value(s) in a batch using the function `replace_na` in the `dbplyr` package.
+
+```{r}
+library(tidyr) # The package which includes the replace_na function. 
+replace_na(menu, list(Price = 4.50))
+```
+
+This method should be used with caution as it replaces all `NA` values in the column with the value specified. Run the code chunk below which shows an example of such an issue.
+
+```{r}
+# Creating the dataset for illustration purposes. 
+menu_example = menu
+menu_example[1:4, 2] = NA
+menu_example
+
+# Applying the function
+replace_na(menu_example, list(Price = 4.50))
+```
+
+The `students2` dataset is a complete version of the `students` dataset, refer to it to apply the following changes to the `students` dataset.
+
+1.  Replace the missing value in the `First.Name` column using the first method.
+
+2.  Replace the missing value in the `Last.Name` column using the second method.
+
+```{r}
+# Replacing with first method
+students[1, 2] = "John"
+
+# Replacing with second method
+replace_na(students, list(Last.Name = "Brey"))
+```
+
+## 2.4.4 Ignore missing value(s) in function
+
+When applying aggregate functions to a dataset with missing value(s), the value `NA` will be returned. In order to obtain a useful output, we can ask these functions to ignore the missing value(s) by setting the `na.rm` parameter to `TRUE`.
+
+\* Aggregate functions: sum, mean, median, min, max, variance, range, etc.
+
+```{r}
+numbers = c(2, 3, 5, 8, NA, 4, 7)
+
+# Using the default value assigned to the na.rm parameter
+sum(numbers)
+
+# Setting the na.rm parameter to TRUE to ignore missing value(s)
+sum(numbers, na.rm = TRUE)
+```
+
+Calculate the mean prices in the `menu_example` dataset.
+
+\* Use `datasetName$colname` to specify which column to apply the function on.
+
+```{r}
+# Write your code below
+mean(menu_example$Price, na.rm = TRUE)
+```
diff --git a/part-2-data-processing.Rproj b/part-2-data-processing.Rproj
new file mode 100644
index 0000000..8e3c2eb
--- /dev/null
+++ b/part-2-data-processing.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
-- 
GitLab