code tasks

remove leading spaces
check number of columns (same)
subtract phenotypes
recode genotypes
associated array
bash for Julia scripts
read multiple files

remove leading spaces

#file (leading space before a,b,c)
# a 1 2
# b 1 0
# c 1 1

awk 'BEGIN{OFS=" "}{$1=$1;print}' file

check number of columns (same)

#check number of columns
awk '{print NF;exit}' file

#check whether number of columns same
awk -F"|" -v expectFldCnt=2 '{
  if (NF==expectFldCnt) { print ":everything OK" ; }
   else { print "ERR: " NF "!=" expectFldCnt  ":" $0 > "errorFile" }
   }' \
map.txt relation.txt

subtract phenotypes

#PHENOTYPE, PEDIGREE AND FIXED EFFECTS
phenotypeFile="phens_eblup.csv"
awk 'BEGIN{FS=","};NR>1{print $1,$2,$3,$5,$8}' $phenotypeFile > pheno.out 
awk 'BEGIN{FS=","};NR==1{print $1,$2,$3,$5,$8}' $phenotypeFile > pheno.header

recode genotypes

#/bin/bash/

#GENOTYPE
genotypeFile="1234"

#replace 0,1,2 with -1,0,1 // regular format //remove header
sed -e 's/0/-1/g' -e 's/\ 1/\ 0/g' -e 's/2/1/g' $genotypeFile|awk 'BEGIN{OFS=" "}{$1=$1;print}' |awk 'NR>1{print $0}'  > geno.out

echo "Genotypes have been coded as -1,0,1."
echo "Header of the genotype file has been removed."

associated array

#print match lines without sorting link
#http://stackoverflow.com/questions/15065818/compare-files-with-awk

awk 'NR==FNR{a[$1];next} ($1 in a) {print $1}' file1 file2

#It is same as(but confusing)
awk 'FNR==NR{a[$1]++;next}a[$1]'  file1 file2

#show frequency of elements in an array
awk '{a[$1]+=1}END{for (key in a)print key,a[key]}' data.txt

bash for Julia scripts

#!/bin/bash

RunName=BayesC0
for tr in BFT CWT EMA MAR
do
        Gen=`grep ${tr} refVC | awk '{print $2}'`
        Res=`grep ${tr} refVC | awk '{print $3}'`
        if [ ${tr} == "BFT" ]
          then
           PPI=$"0.95"
        elif [ ${tr} == "CWT" ]
          then
           PPI=$"0.98"
        elif [ ${tr} == "EMA" ]
          then
           PPI=$"0.95"
        else
           PPI=$"0.8"
        fi
        echo ${tr} $Gen $Res
        [[ ! -d ${tr} ]] && mkdir ${tr}
        cd ${tr}
        for vn in {1..5}
        do
                [[ ! -d v${vn} ]] && mkdir v${vn}
                cd v${vn}
                echo "Creating Julia Scripts for ${RunName} trait : ${tr} validation No. : ${vn}"
                echo "using JWAS" > ${RunName}_${tr}${vn}.jl
                echo "myOption=Dict()"  >> ${RunName}_${tr}${vn}.jl
                echo "myOption["run"]           = \"BayesC\""  >> ${RunName}_${tr}${vn}.jl
                echo "myOption["seed"]          = 314"  >> ${RunName}_${tr}${vn}.jl
                echo "myOption["chainLength"]   = 5000"  >> ${RunName}_${tr}${vn}.jl
                echo "myOption["probFixed"]     = 0.95"  >> ${RunName}_${tr}${vn}.jl
                echo "myOption["estimatePi"]    = \"no\""  >> ${RunName}_${tr}${vn}.jl
                echo "myOption["estimateScale"] = \"no\""  >> ${RunName}_${tr}${vn}.jl
                echo "myOption["varGenotypic"]  = ${Gen}"  >> ${RunName}_${tr}${vn}.jl
                echo "myOption["varResidual"]   = ${Res}"  >> ${RunName}_${tr}${vn}.jl
                echo "PhenoFile  =\"../../../01_Data/forValidation/${tr}/${tr}t.${vn}\"" >> ${RunName}_${tr}${vn}.jl
                echo "y = map(Float64,readdlm(phenoFile)[2:end,2])" >> ${RunName}_${tr}${vn}.jl
                echo "GenoFile   =\"../../../01_Data/forValidation/\"" >> ${RunName}_${tr}${vn}.jl
                echo "X = map(Float64,readdlm(GenoFile)[2:end,2])" >> ${RunName}_${tr}${vn}.jl

                echo "output = runJWAS(myOption,X,y)"  >> ${RunName}_${tr}${vn}.jl
                echo "Validation =\"../../../01_Data/forValidation/${tr}/${tr}v${vn}\"" >> ${RunName}_${tr}${vn}.jl
                echo "df = readtable(Validation, eltypes =[UTF8String, Float64], separator = ' ',header=false,names=[:ID,:EBV]);" >> ${RunName}_${tr}${vn}.jl
                echo "comp=join(out,df,on=:ID);" >> ${RunName}_${tr}${vn}.jl
                echo "PHEBVcorr=cor(comp[:EBV],comp[:EBV_1])" >> ${RunName}_${tr}${vn}.jl
                echo "writedlm(\"PHEBVcorr_${RunName}_${tr}${vn}\",PHEBVcorr)" >> ${RunName}_${tr}${vn}.jl
                cd ..
        done 
        cd ..
done

read multiple files (julia)

#make variable names
function readdlm_expr(infiles,outvar,prefix="",suffix="")
   for file in infiles
     global filename  = prefix*file*suffix
     global ex = Expr(:(=),Symbol(outvar,file),:(readdlm(filename))) 
     eval(ex)
   end
end

filelist=["50","100","200","400","1000","2000","4000","7000"]
prefix1  =PATH*"BayesC"
suffix  =".txt"
readdlm_expr(filelist,"a",prefix1,suffix)

a

using $0 as $1,$2...$9 require reconstruction like $1=$1
awk 'BEGIN{FS=",";OFS=" "}; NR==1{$1=$1;print $0}' phens_eblup.csv
awk 'BEGIN{FS=",";OFS=" "}; NR==1{print $1,$2,$3,$4,$5,$6,$7,$8,$9}' phens_eblup.csv

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

code tasks

remove leading spaces

check number of columns (same)

subtract phenotypes

recode genotypes

associated array

bash for Julia scripts

read multiple files (julia)

a

FilesExpand file tree

code_tasks.md

Latest commit

History

code_tasks.md

File metadata and controls

code tasks

remove leading spaces

check number of columns (same)

subtract phenotypes

recode genotypes

associated array

bash for Julia scripts

read multiple files (julia)

a