#!/bin/sh
# Copyright (c) 2009, edogawaconan <me@myconan.net>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
# Lots of bugs here. Use with care
# USE WITH CARE
#
# what it does: fetch every picture that has the specified TAGS.
# requirement: wget, libxslt, md5sum (or md5)
# program additional paths for: cut, sed, wc, MD5(sum), wget, xsltproc, grep
ADDITIONAL_PATH=
# custom md5 path with arguments, expected output: <32digit md5><space(s)><filename>
# Leave empty for "md5sum" (Linux, Solaris), "md5 -r" (*BSD)
MD5=
# default server address. Danbooru only! I do not take responsibility of stupidity.
DEFAULT_SITE="moe.imouto.org"
# base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
# Structure is ${BASE_DIR}/<TAGS>
# Absolute path only.
# Leave empty to use whatever folder you're running this at
BASE_DIR=
# not user modifiable from here
SED_GET_FILENAME="s/.*\/\([^\/]*\)/\1/g"
SED_IS_MD5_FILE="s/\([0-9a-f]\{32\}\..*\)//g"
### TODO:
### - sanity validator(?)
### - unified repository to save bandwidth
### - bug stomping
### - sanity checking
### WILL BE FOR 0.3
# useless welcome message. Also version
Msg_Welcome() {
MOEFETCHVERSION="0.2.1"
cat <<EOF
moefetch ${MOEFETCHVERSION}
Copyright (c) 2009 edogawaconan <me@myconan.net>
EOF
}
# fatal error handler
Err_Fatal() {
printf "\nFatal error: ${1}\n"
exit 1
}
# help message
Err_Help() {
cat <<EOF
moefetch.sh COMMAND [-s SITE_URL] TAGS
COMMAND:
(quick)fetch: do a complete update. Add prefix quick to skip file checking
check: get list of new files, clean up local folder and print total new files
-s SITE_URL: Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}
TAGS: Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme
EOF
exit 2
}
# generate link by transforming xml
Generate_Link() {
printf "\nFetching xml file\n"
wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=100000" -O "${TEMP_PREFIX}-xml" -e continue=off
printf "Processing XML file..."
# xslt evilry
> "${TEMP_PREFIX}-list"
xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-list" 2>/dev/null
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="post">
<xsl:value-of select="@file_url" />
</xsl:template>
</xsl:stylesheet>
EOF
NUMFILES=$(echo $(wc -l < "${TEMP_PREFIX}-list"))
[ "${NUMFILES}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site"
echo " ${NUMFILES} file(s) available on server"
#output file: ${TARGET_DIR}-list
}
Progress_Init() {
_last="-"
printf "${_last}"
}
Progress_Anim() {
case "${_last}" in
/) _last="-";;
-) _last=\\;;
\\) _last=\|;;
\|) _last="/";;
esac
printf "\b${_last}"
}
Progress_Done() { printf "\bdone\n"; }
# getting rid of ls (as per suggestion)
Count_Files() {
_i=0
for _f in "${*}/"* "${*}/".*; do
if [ "${_f}" != "${*}/"'*' ] || [ -e "${_f}" ]; then
_i=$((_i + 1))
fi
done
echo $((_i - 2))
}
# check tools availability
Check_Tools() {
# verify all programs required do indeed exist
#MD5
if [ ! "${MD5}" ]; then
case $(uname) in
*BSD) MD5="md5 -r";;
Linux|SunOS) MD5="md5sum";;
*) Fatal_Err "No known md5 tool for this platform. Please specify manually"
esac
fi
MD5_COMMAND=$(echo ${MD5} | cut -d' ' -f1)
# basic tools
COMMANDS="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date ${MD5_COMMAND}"
for COMMAND in ${COMMANDS}
do
[ "$(command -v "${COMMAND}")" ] || Err_Fatal "${COMMAND} doesn't exist in ${PATH}"
done
}
# verify required folders exist and writeable
Check_Folders(){
[ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR}."
for FOLDER in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do
if [ ! -d "${BASE_DIR}/${FOLDER}" ]; then
mkdir "${BASE_DIR}/${FOLDER}" || Err_Fatal "${FOLDER} folder creation failed"
fi
if [ ! -O "${BASE_DIR}/${FOLDER}" ]; then
echo "You don't own the ${BASE_DIR}/${FOLDER}, applying globally writeable permission on it"
chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${FOLDER}" || Err_Fatal "Error changing ownership. This shouldn't happen"
fi
done
[ "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 ] && ISNEW=1
for i in error ok list newlist templist; do
touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen"
done
#
}
# Do some cleanup
Cleanup_Repository() {
# THE FILES
# current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}
printf "Cleaning up repository folder... "
Progress_Init
TRASH_DIR=$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")
mkdir -p "${BASE_DIR}/trash/${TRASH_DIR}" || Err_Fatal "Unable to create trash folder"
for TRASH in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
do
ISTRASH=
if [ -d "${TRASH}" ]; then
ISTRASH=1
else
if [ "$(echo "${TRASH}" | sed -e "${SED_GET_FILENAME};${SED_IS_MD5_FILE}" | grep -v ^$)" ]; then
ISTRASH=1
else
[ "$(cat "${TEMP_PREFIX}-list" | sed -e "${SED_GET_FILENAME}" | grep $(echo "${TRASH}" | sed -e "${SED_GET_FILENAME}"))" ] || ISTRASH=1
fi
fi
if [ "${ISTRASH}" ]; then
mv -f "${TRASH}" "${BASE_DIR}/trash/${TRASH_DIR}" || Err_Fatal "Error deleting files"
printf "\bMoved $(echo "${TRASH}" | sed -e "${SED_GET_FILENAME}") to ${BASE_DIR}/trash/${TRASH_DIR}\n${_last}"
fi
Progress_Anim
done
rmdir "${BASE_DIR}/trash/${TRASH_DIR}" 2>/dev/null
Progress_Done
}
# check files correctness
Check_Files() {
if [ ! "${ISNEW}" ]; then
[ "${NOCLEAN}" ] || Cleanup_Repository
printf "Checking for errors... "
Progress_Init
> "${TEMP_PREFIX}-error"
for FILE in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
do
if [ "$(echo "${FILE}" | sed -e "${SED_GET_FILENAME};${SED_IS_MD5_FILE}" | grep -v ^$)" ]; then
printf "\bNot a valid danbooru file: $(echo ${FILE} | sed -e "${SED_GET_FILENAME}")\n${_last}"
else
if [ "$(${MD5} "${FILE}" | cut -d ' ' -f1 -)" != "$(echo "${FILE}" | sed -e "${SED_GET_FILENAME}" | cut -d '.' -f1)" ]
then
echo "${FILE}" >> "${TEMP_PREFIX}-error"
printf "\bError: $(echo "${FILE}" | sed -e "${SED_GET_FILENAME}")\n${_last}"
fi
Progress_Anim
fi
done
Progress_Done
TOTAL_ERROR=$(echo $(wc -l < "${TEMP_PREFIX}-error"))
if [ "${TOTAL_ERROR}" -eq 0 ]; then
echo "All files OK"
else
printf "${TOTAL_ERROR} file(s) broken: removing..."
cat "${TEMP_PREFIX}-error" | xargs rm
echo " ${TOTAL_ERROR} file(s) removed"
fi
echo "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}") file(s) available locally"
printf "Generating list of new files... "
Progress_Init
# THE FILES
#ls "../${TARGET_DIR}" | grep -vf "${TARGET_DIR}-error" > "${TARGET_DIR}-ok"
#
find "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | comm -1 -3 "${TEMP_PREFIX}-error" - | sed -e "${SED_GET_FILENAME}" > "${TEMP_PREFIX}-ok"
cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-templist"
while read -r IS_OK; do
cat "${TEMP_PREFIX}-templist" | grep -v "${IS_OK}" > "${TEMP_PREFIX}-newlist"
cat "${TEMP_PREFIX}-newlist" > "${TEMP_PREFIX}-templist"
Progress_Anim
done < "${TEMP_PREFIX}-ok"
Progress_Done
#cat "${TEMP_PREFIX}-list" | egrep -vf "${TEMP_PREFIX}-ok" > "${TEMP_PREFIX}-newlist"
echo "$(echo $(wc -l < "${TEMP_PREFIX}-newlist")) file(s) to be downloaded"
else
if [ "${ISQUICK}" ]; then
echo "Quick mode selected. Skipping check"
else
echo "Empty local repository"
fi
cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist"
fi
}
# start downloading the images
Fetch_Images() {
if [ "$(echo $(wc -l < "${TEMP_PREFIX}-newlist"))" -eq 0 ]; then
echo "No new file"
else
printf "Starting wget... "
cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}"
wget -e continue=on -bi "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log"
fi
}
# initialize base variables and initial command check
Init(){
# path initialization
[ "${ADDITIONAL_PATH}" ] && PATH="${ADDITIONAL_PATH}:${PATH}"
export PATH
# misc variables
ISQUICK=
ISNEW=
[ $# -lt 2 ] && Err_Help
case "$1" in
check|fetch|quickfetch)
echo "Starting..."
JOB="$1"
;;
*)
Err_Help
;;
esac
shift
SITE=
TAGS=
while [ "${1}" ]; do
case "$1" in
-s|--site)
shift
SITE="$1"
;;
-nc|--noclean)
NOCLEAN=1
;;
*)
if [ "${TAGS}" ]; then
TAGS="$1 ${TAGS}"
else
TAGS="$1"
fi
;;
esac
shift
done
[ "${SITE}" ] || SITE="${DEFAULT_SITE}"
[ "${TAGS}" ] || Err_Fatal "No tag specified"
# Get base folder - default, current folder or fallback to ${HOME}
[ "${BASE_DIR}" ] || BASE_DIR="${PWD}"
[ "${BASE_DIR}" ] || BASE_DIR="{$HOME}"
[ "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" ] || BASE_DIR="/${BASE_DIR}"
echo "Tags: ${TAGS}"
# slash is not wanted for folder name
TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g')
SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g')
TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}"
}
# initialization
Msg_Welcome
Init "$@"
Check_Tools
Check_Folders
# let's do the job!
case "${JOB}" in
check)
Generate_Link
Check_Files
;;
fetch)
Generate_Link
Check_Files
Fetch_Images
;;
quickfetch)
ISNEW=1
ISQUICK=1
Generate_Link
Check_Files
Fetch_Images
;;
esac