Sunday, February 11, 2018

Extracing Mongo Collection data and GRIDFS Pictures using Multi Thread

#!/bin/bash

#Color setting variables

MENU=`echo "\033[36m"` #Blue
RED_TEXT=`echo "\033[31m"`
NORMAL=`echo "\033[m"`
RED=`echo "\033[01;31m"`
GREEN=`echo "\033[01;32m"`
YELLOW=`echo "\033[01;33m"`
PURPLE=`echo "\033[01;35m"`
CYAN=`echo "\033[01;36m"`
WHITE=`echo "\033[01;37m"`
BOLD=`echo "\033[1m"`
UNDERLINE=`echo "\033[4m"`

############################################################################################################
############################################### USAGE ######################################################
############################################################################################################
usage()
{
echo -e  "
${BOLD}
Usage: pic-migration -i inputfile -t threads [-h mongohost] [-d dbname] [-p port]
${NORMAL} 
${PURPLE}  -i ${NORMAL}    : Input file with id list
${PURPLE}  -t ${NORMAL}         : Number of parallel threads (default 4)
${PURPLE}  -h ${NORMAL} : Mongo server hostname (default hostname)
${PURPLE}  -d ${NORMAL} : Mongo database name  (default dbname)
${PURPLE}  -p ${NORMAL} : Mongo port  (default 27108)

 ${GREEN} Example : ${NORMAL}
    ./pic-migration -i=inputfile -t=<threads count>  -h=<host> -d=<db> -p=<port#>
or
            ./pic-migration -i=inputfile
  "
}
##################################################################################################################
######################################### INPUT VALIDATION  ######################################################
##################################################################################################################

if [ "$#" -eq 0 ]
then
   usage
   exit 0
fi

mongohost=<hostname>
dbname=<dbname>
port=27108
thread_count=4
mpath=/mongo/sw/mongo-3.0.1/bin
picpath=/opt/pic
usr=testuser
pwd=user1234

pause()
{
echo -e "Press ${BOLD} any key to exit ${NORMAL} "
read -p "                "
}


#echo "$#"

#echo "$@"

if [[ "$#" > 0 ]]; then
  for i in "$@"
  do
#        echo  "$i"
case $i in

-i=*|--inputfile=*)
                  inputfile="${i#*=}"
                        shift # past argument=value
                        ;;
-t=*|--threads=*)
                        thread_count="${i#*=}"
                        shift # past argument=value
                        ;;
    -h=*|--mongohost=*)
    mongohost="${i#*=}"
    shift # past argument=value
    ;;
    -d=*|--dbname=*)
    dbname="${i#*=}"
    shift # past argument=value
    ;;
    -p=*|--port=*)
    port="${i#*=}"
    shift # past argument=value
    ;;
    *)
          echo "input error.."
usage
    exit
    ;;
esac
   done
fi

TS=./log/pic_downloaded_$(date +"%m-%d-%y-%T")

#######################################################################################
###################### SSO_ID to PIC_ID CONVERSION ####################################
###################### VALIDATE GIVEN SSOIDS ARE VALID ################################
###################### VALIDATE GIVEN SSOIDS ARE HAVING PIC/IMAGE MAPPED OR NOT########
#######################################################################################

#echo "DBQuery.shellBatchSize = 90000000000000000;" > img_cnt.py
#rm -f picid_list.py
rm -f "$inputfile"_ssoid_notfound.lst
rm -f "$inputfile"_picid.lst
rm -f "$inputfile"_imgid_notfound.lst
rm -f "$inputfile"_prefdata.out
rm -f "$inputfile"_picdata.out

while read ssoid
do
echo $ssoid
echo "db.employee.find({'"employee.DigitalProfiles.ssoid"':'"$ssoid"'},{_id:0,'"Profiles.DigitalProfiles.profilePictureId"':1})" >imgqry.js
        echo "db.employee.find({'"employee.DigitalProfiles.ssoid"':'"$ssoid"'})" >pref_data.js
        #echo "db.ProfilePicture.files.find(
#cat imgqry.js
imgqry_output=$($mpath/mongo  --host $mongohost  $dbname  --port=$port -u $usr -p $pwd --authenticationDatabase=admin --quiet < imgqry.js)
#echo "img query out   $imgqry_output"
qryout_length=`printf "%s" "$imgqry_output" | wc -m`

if [ $qryout_length -ne 0 ]; then
$mpath/mongo  --host $mongohost  $dbname  --port=$port -u $usr -p $pwd --authenticationDatabase=admin --quiet < pref_data.js >> ./data/"$inputfile"_prefdata.out
        fi
if [ $qryout_length -eq 0 ]; then
echo -e "${RED}$ssoid <<ssoid not found>> ${NORMAL}"
echo -e "${RED}$ssoid ${NORMAL}" >> ./log/"$inputfile"_ssoid_notfound.lst
elif [[ ${imgqry_output} != *"ObjectId"* ]];then
echo -e "${RED}$ssoid ${NORMAL}" >> ./log/"$inputfile"_imgid_notfound.lst
echo -e "${RED}$ssoid <<pic not found>> ${NORMAL}"
else
        #echo "awk output"
        #picid=$(echo -e $imgqry_output|awk -F ':' '{print $4}'|awk -F '}' '{print $1}')
        picid=$(echo -e $imgqry_output|awk -F ':' '{print $4}'|awk -F '}' '{print $1}')
picid=$(echo "db.ProfilePicture.files.find({_id : $picid })")
echo -e "$picid" > picdata_qry.js
#cat picdata_qry.js
$mpath/mongo  --host $mongohost  $dbname  --port=$port -u $usr -p $pwd --authenticationDatabase=admin --quiet < picdata_qry.js  >> ./data/"$inputfile"_picdata.out
echo -e $imgqry_output|awk -F ':' '{print $4}'|awk -F '}' '{print $1}'>>"$inputfile"_picid.lst
fi
done < "$inputfile"

#read -p "waiting for cancel "
###########################################################################################
############################# PIC DOWNLOAD ################################################
###########################################################################################

picids="$(cat <<EOF
`cat "$inputfile"_picid.lst`
EOF
)"

total_files=$(echo "$picids"|wc -l|awk {'print $1'})
lines_per_file="$(((($total_files + $thread_count) - 1) / $thread_count))"

#echo "total files :  $total_files"
#echo "lines per file $lines_per_file"

#read -p "wait"

#echo "dollar picids ------------> $picids"

#TS=Pic_download_"$inputfile"_report_$(date +"%m-%d-%y-%T")
echo -e " ${BOLD} ----------------------------------------------------------------------------------------------------------------------------------${NORMAL}" >>"$TS".out
echo "Host         : $mongohost              ">>"$TS".out
echo "Databse      : $dbname              ">>"$TS".out
echo "Begin Date   : $(date)                                    ">>"$TS".out
echo -e " ${BOLD}-----------------------------------------------------------------------------------------------------------------------------------${NORMAL}" >>"$TS".out

pic_download()
{
FILENAME=`echo $pic_id | awk -F '"' '{print $2}'`
echo "PIC FILENAME $FILENAME -------------> `date`"
$mpath/mongofiles --quiet -d $dbname get_id $pic_id --host $mongohost --port $port  --prefix ProfilePicture --authenticationDatabase admin -u $usr -p $pwd --local $picpath/$FILENAME.jpg  >> $TS.out 2>&1
#echo "Image downloaded list"
#cat pic_downloaded.out
}

##################################################################################################
##############################  Thread Loop ######################################################
##################################################################################################
#echo "`date`"

# param: $1 worker identity number
# param: $2 starting line number in the file to process
process_lines () {
  #cat x
  while read -r line; do
    #get filename
    pic_id=$line
    pic_id=$(echo -e "$line" | awk -F'\t' '{ print $1 }')
    #if connected message then continue
    [[ $file == 'connected to'* ]] && continue
    # sync the file with the server
    pic_download $pic_id
  done < x$i
}

#######################################################################################################
#######################  Multi Thread initiator #######################################################
#######################################################################################################

for ((i=1; i <= $thread_count; ++i)); do
  #echo "starting worker $i"
  #echo "lines per file : $lines_per_file,   counter i :  $i"
  headpos=$(($lines_per_file * $i))
  #echo "head pos : " $headpos
  echo -e "$picids" | head -n $headpos | tail -n $lines_per_file > x$i 
  #cat x$i
  #call process on this chunk of files
  #process_lines $i $(($lines_per_file * i)) &
  process_lines  &
  #record the pid for cleanup and waiting
  #echo "starting line number $i"
  _worker_pids+=($!)
  echo -e "${BOLD}${PURPLE}worker process id : $_worker_pids[$!] ${NORMAL}"
done

#######################################################################################################
#######################   Wait for Threads to complete ################################################
#######################################################################################################

#wait for each process to finish
for ((i=0; i <= ${#_worker_pids[@]}; ++i)); do
  wait ${_worker_pids[i]} > /dev/null 2>&1
done

#if no errors say we are complete
if [ $? -eq 0 ]; then
        cnt=$(grep -i "finished writing" $TS.out|wc -l|awk -F " " {'print $1'})
echo -e "${BOLD}-------------------------------------------------------------------------------------------------------------${NORMAL}">>$TS.out
echo "End Time  :      $(date)                   Number of images downloaded : $cnt ">>$TS.out
echo -e "${BOLD}-------------------------------------------------------------------------------------------------------------${NORMAL}">>$TS.out
echo -e "${BOLD}==================================================================================================================${NORMAL}"
echo -e "${BOLD}${GREEN}   Execution complete!! Please validate execution output in the  log and results in data & pic folders !! ${NORMAL}"
echo -e "${BOLD}==================================================================================================================${NORMAL}"
fi

No comments:

Post a Comment