get-tracks/get-tracks.sh

#!/usr/bin/env sh

# From a single byte in hexadecimal per line to lines ending with 0a
# (hex for '\n'). Ex: 61 62 63 0a
# Required to easily match (and remove) multi-byte characters.
regroup_lines() awk '
	BEGIN {
		line_start=1
	}

	{
		if (line_start == 1)
			line = $1;
		else
			line = line " " $1;

		line_start = 0;
		if ($1 == "0a") {
			print line;
			line_start = 1
		}
	}

	END {
		if (line_start == 0)
			print line
	}
	'

# From ’ to '
simple_quote()                sed "s/e2 80 99/27/g"

# From / to '-'
replace_slashes()             sed "s/2f/2d/g"

remove_backslashes()          sed "s/5c//g"

remove_multibyte_characters() sed "s/e2 80 .. //g"

uppercase()                   tr "[a-z]" "[A-Z]"

# One column decimal to plain text.
from_dec()                    awk '{ printf ("%c", $1 + 0) }'

# Replace spaces by line returns, outputs a single column.
spaces_to_line_returns()      tr " " "\n"

# Convert input into hexadecimal and a single byte per line.
to_hex_one_column() { od -An -tx1 | awk '{for(i=1;i<=NF;i++){ print $i }}'; }

# One column hexa to one column decimal.
hex_to_dec() { { echo "obase=10;ibase=16;" ; cat ; } | bc ; }

# Reverse hexadecimal (with space separators) to original value.
from_hex() { spaces_to_line_returns | uppercase | hex_to_dec | from_dec; }

# Remove non ascii, backslashes and invalid filename characters,
# convert "’" to "'", "/" to " - ".
to_ascii(){
	to_hex_one_column | # Input to hexadecimal, 1-byte representation per line.
		regroup_lines | # From 1-byte to x-byte lines with space separators.
		simple_quote |  # From "’" to "'".
		replace_slashes | # From / to '-'.
		remove_multibyte_characters | # Remove non ascii values.
		remove_backslashes | # Can mess with the script.
		from_hex # Convert back from hex (x-byte per line, space separator).
}

comp_end_of_tracks() awk -v NONUMBER="$NONUMBER" -v SEPARATOR="$SEPARATOR" '
	BEGIN {
		OFS="	"
	}

	{
		if (NR > 1) {
			print timestamp, $1, title;
		}

		timestamp = $1;

		if (NONUMBER == 1) {
			title = $2
		}
		else {
			if (NR < 10) {
				title = "0" NR SEPARATOR $2
			}
			else {
				title = NR SEPARATOR $2
			}
		}
		for (i=3; i <= NF; i++) {
			title = title " " $i
		}
	}

	END {
		print timestamp, "END_OF_FILE", title;
	}
	'

first_column_to_seconds() awk '
	{
		# from 10:30 to 630
		n = split ($1, arr, ":")
		for (i = 0; i <= n; i++) {
			if (i == 0) {
				v = arr[n-i];
			}
			else if (i == 1) {
				v += 60 * arr[n-i];
			}
			else if (i == 2) {
				v += 3600 * arr[n-i];
			}
		}

		$1 = v;
		print;
		v = 0;
	}
	'

# Get a more usable time representation for the beginning and the end of songs.
get_timestamps(){ to_ascii | first_column_to_seconds | comp_end_of_tracks; }

run_ffmpeg(){
	file=$1
	from=$2
	to=$3
	final_title=$4

	LOG_LEVEL="-loglevel error"
	FROM="-ss $from"
	TO=""
	if [ "$to" != "" ]; then
		TO="-to $to"
	fi
	INPUT_FILE="$file"
	OUTPUT_FILE="$final_title"

	case "v$VERBOSITY" in
		v0)
			;;
		v1)
			echo "extracting '$final_title'"
			;;
		v2)
			echo "ffmpeg $LOG_LEVEL $FROM $TO -i $INPUT_FILE '$OUTPUT_FILE'"
			;;
		*)
			echo "verbosity is not set properly" >&2
			exit 1
			;;
	esac

	if [ "$SIMULATION" = "" ]; then
		ffmpeg $LOG_LEVEL $FROM $TO -i "$INPUT_FILE" "$OUTPUT_FILE"
	fi
}

extraction(){
	audio_file="$1"
	time_file="$2"

	get_timestamps < "$time_file" | while read LINE; do
		track_start=$(echo $LINE | cut -d ' ' -f 1)
		track_end=$(echo $LINE | cut -d ' ' -f 2)
		track_title=$(echo $LINE | cut -d ' ' -f 3-)

		if [ "$track_end" = "END_OF_FILE" ]; then
			track_end=""
		fi

		# Input is /dev/null, otherwise subshells will take the output
		# of "get_timestamps" as input.
		# Be careful: "while read X" is a dangerous shell design.
		< /dev/null run_ffmpeg "${audio_file}" \
			"${track_start}" "${track_end}" \
			"${track_title}.${FORMAT}"
	done
}

usage(){
	cat <<END
Get tracks:
usage: $0 <single-file-playlist> <song-list>

Debug mode (displays starting and ending times for each song):
usage: $0 <song-list>


Format for <song-list>:
  0:00 First track
  1:30 Second track

Environment variables:
- SIMULATION [empty or not]
    do not invoke ffmpeg
- NONUMBER [empty or 1]
    do not write song numbers
- FORMAT [mp3,ogg,opus,…]
    see the ffmpeg documentation
- SEPARATOR [separator] (default: ' - ')
    separator between number and name
    example with SEPARATOR='_': 01_intro.opus 02_blah.opus…
- HEADERS [empty or 1]
    print environment parameters (verbosity, simulation, etc.)
- VERBOSITY [0-3] (default: 1)
    0: no output except errors from ffmpeg
    1: simple indications on the current track being extracted
    2: print actual ffmpeg commands the script currently runs
END
}

header(){
	if [ "$HEADERS" = "1" ]; then
		echo $*
	fi
}

if [ "$FORMAT" = "" ]; then
	header "default FORMAT: opus"
	FORMAT="opus"
else
	header "FORMAT: $FORMAT"
fi

if [ "$VERBOSITY" = "" ]; then
	header "default VERBOSITY: 1"
	VERBOSITY=1
else
	header "VERBOSITY level: $VERBOSITY"
fi

if [ "$NONUMBER" = "" ]; then
	header "default NONUMBER: disabled"
	NONUMBER=0

	# Assume that there should be a separator.
	if [ "$SEPARATOR" = "" ]; then
		header "default SEPARATOR: ' - '"
		SEPARATOR=" - "
	else
		header "SEPARATOR: '$SEPARATOR'"
	fi
else
	header "NONUMBER: won't prefix tracks"
	SEPARATOR=""
fi

if [ "$SIMULATION" != "" ]; then
	header "SIMULATION envvar is set: this is a simulation."
fi

case $# in
	0) usage; exit 0;;
	1) get_timestamps < "$1" ;;
	2) extraction "$1" "$2" ;;
	*) usage 1>&2 ; exit 1 ;;
esac