1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52#!/usr/bin/env bash
#
# Download needed files from IMDb, even if they already exist
#
# See https://www.imdb.com/interfaces/ for a description of IMDb datasets
# Make sure we are in the correct directory
DIRNAME=$(dirname "$0")
cd "$DIRNAME" || exit
source functions/define_colors
source functions/define_files
source functions/load_functions
# Keep track of elapsed time
SECONDS=0
function terminate() {
saveDurations "$SECONDS"
# Only keep 3 duration lines for this script
trimDurations -m 3
#
saveHistory "$numRecordsFile"
# Only keep 5 history files for this script
trimHistory -m 5
exit
}
# Make sure we can execute curl. If not, quit.
checkForExecutable curl
printf "==> Downloading new IMDb .gz files.\n"
# Let us know how long it took last time
printDuration
for file in "${gzFiles[@]}"; do
source="https://datasets.imdbws.com/$file"
printf "Downloading %s\n" "$source"
curl -s -O "$source"
done
# Caches are no longer valid
rm -rf "$cacheDirectory"
mkdir -p "$cacheDirectory"
printf "==> Recording IMDb .gz file sizes.\n"
rg -cz "^." "${gzFiles[@]}" | sort | perl -p -e 's/:/\t/;' >"$numRecordsFile"
# Save durations and exit
terminate