get-tracks/get-tracks.sh

262 lines
5.4 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env sh
# From a single byte in hexadecimal per line to lines ending with 0a
# (hex for '\n'). Ex: 61 62 63 0a
# Required to easily match (and remove) multi-byte characters.
regroup_lines() awk '
BEGIN {
line_start=1
}
{
if (line_start == 1)
line = $1;
else
line = line " " $1;
line_start = 0;
if ($1 == "0a") {
print line;
line_start = 1
}
}
END {
if (line_start == 0)
print line
}
'
# From to '
simple_quote() sed "s/e2 80 99/27/g"
# From / to '-'
replace_slashes() sed "s/2f/2d/g"
remove_backslashes() sed "s/5c//g"
remove_multibyte_characters() sed "s/e2 80 .. //g"
uppercase() tr "[a-z]" "[A-Z]"
# One column decimal to plain text.
from_dec() awk '{ printf ("%c", $1 + 0) }'
# Replace spaces by line returns, outputs a single column.
spaces_to_line_returns() tr " " "\n"
# Convert input into hexadecimal and a single byte per line.
to_hex_one_column() { od -An -tx1 | awk '{for(i=1;i<=NF;i++){ print $i }}'; }
# One column hexa to one column decimal.
hex_to_dec() { { echo "obase=10;ibase=16;" ; cat ; } | bc ; }
# Reverse hexadecimal (with space separators) to original value.
from_hex() { spaces_to_line_returns | uppercase | hex_to_dec | from_dec; }
# Remove non ascii, backslashes and invalid filename characters,
# convert "" to "'", "/" to " - ".
to_ascii(){
to_hex_one_column | # Input to hexadecimal, 1-byte representation per line.
regroup_lines | # From 1-byte to x-byte lines with space separators.
simple_quote | # From "" to "'".
replace_slashes | # From / to '-'.
remove_multibyte_characters | # Remove non ascii values.
remove_backslashes | # Can mess with the script.
from_hex # Convert back from hex (x-byte per line, space separator).
}
comp_end_of_tracks() awk -v NONUMBER="$NONUMBER" -v SEPARATOR="$SEPARATOR" '
BEGIN {
OFS=" "
}
{
if (NR > 1) {
print timestamp, $1, title;
}
timestamp = $1;
if (NONUMBER == 1) {
title = $2
}
else {
if (NR < 10) {
title = "0" NR SEPARATOR $2
}
else {
title = NR SEPARATOR $2
}
}
for (i=3; i <= NF; i++) {
title = title " " $i
}
}
END {
print timestamp, "END_OF_FILE", title;
}
'
first_column_to_seconds() awk '
{
# from 10:30 to 630
n = split ($1, arr, ":")
for (i = 0; i <= n; i++) {
if (i == 0) {
v = arr[n-i];
}
else if (i == 1) {
v += 60 * arr[n-i];
}
else if (i == 2) {
v += 3600 * arr[n-i];
}
}
$1 = v;
print;
v = 0;
}
'
# Get a more usable time representation for the beginning and the end of songs.
get_timestamps(){ to_ascii | first_column_to_seconds | comp_end_of_tracks; }
run_ffmpeg(){
file=$1
from=$2
to=$3
final_title=$4
LOG_LEVEL="-loglevel error"
FROM="-ss $from"
TO=""
if [ "$to" != "" ]; then
TO="-to $to"
fi
INPUT_FILE="$file"
OUTPUT_FILE="$final_title"
case "v$VERBOSITY" in
v0)
;;
v1)
echo "extracting '$final_title'"
;;
v2)
echo "ffmpeg $LOG_LEVEL $FROM $TO -i $INPUT_FILE '$OUTPUT_FILE'"
;;
*)
echo "verbosity is not set properly" >&2
exit 1
;;
esac
if [ "$SIMULATION" = "" ]; then
ffmpeg $LOG_LEVEL $FROM $TO -i "$INPUT_FILE" "$OUTPUT_FILE"
fi
}
extraction(){
audio_file="$1"
time_file="$2"
get_timestamps < "$time_file" | while read LINE; do
track_start=$(echo $LINE | cut -d ' ' -f 1)
track_end=$(echo $LINE | cut -d ' ' -f 2)
track_title=$(echo $LINE | cut -d ' ' -f 3-)
if [ "$track_end" = "END_OF_FILE" ]; then
track_end=""
fi
# Input is /dev/null, otherwise subshells will take the output
# of "get_timestamps" as input.
# Be careful: "while read X" is a dangerous shell design.
< /dev/null run_ffmpeg "${audio_file}" \
"${track_start}" "${track_end}" \
"${track_title}.${FORMAT}"
done
}
usage(){
cat <<END
Get tracks:
usage: $0 <single-file-playlist> <song-list>
Debug mode (displays starting and ending times for each song):
usage: $0 <song-list>
Format for <song-list>:
0:00 First track
1:30 Second track
Environment variables:
- SIMULATION [empty or not]
do not invoke ffmpeg
- NONUMBER [empty or 1]
do not write song numbers
- FORMAT [mp3,ogg,opus,…]
see the ffmpeg documentation
- SEPARATOR [separator] (default: ' - ')
separator between number and name
example with SEPARATOR='_': 01_intro.opus 02_blah.opus…
- HEADERS [empty or 1]
print environment parameters (verbosity, simulation, etc.)
- VERBOSITY [0-3] (default: 1)
0: no output except errors from ffmpeg
1: simple indications on the current track being extracted
2: print actual ffmpeg commands the script currently runs
END
}
header(){
if [ "$HEADERS" = "1" ]; then
echo $*
fi
}
if [ "$FORMAT" = "" ]; then
header "default FORMAT: opus"
FORMAT="opus"
else
header "FORMAT: $FORMAT"
fi
if [ "$VERBOSITY" = "" ]; then
header "default VERBOSITY: 1"
VERBOSITY=1
else
header "VERBOSITY level: $VERBOSITY"
fi
if [ "$NONUMBER" = "" ]; then
header "default NONUMBER: disabled"
NONUMBER=0
# Assume that there should be a separator.
if [ "$SEPARATOR" = "" ]; then
header "default SEPARATOR: ' - '"
SEPARATOR=" - "
else
header "SEPARATOR: '$SEPARATOR'"
fi
else
header "NONUMBER: won't prefix tracks"
SEPARATOR=""
fi
if [ "$SIMULATION" != "" ]; then
header "SIMULATION envvar is set: this is a simulation."
fi
case $# in
0) usage; exit 0;;
1) get_timestamps < "$1" ;;
2) extraction "$1" "$2" ;;
*) usage 1>&2 ; exit 1 ;;
esac