#!/usr/bin/env sh # From a single byte in hexadecimal per line to lines ending with 0a # (hex for '\n'). Ex: 61 62 63 0a # Required to easily match (and remove) multi-byte characters. regroup_lines() awk ' BEGIN { line_start=1 } { if (line_start == 1) line = $1; else line = line " " $1; line_start = 0; if ($1 == "0a") { print line; line_start = 1 } } END { if (line_start == 0) print line } ' # From ’ to ' simple_quote() sed "s/e2 80 99/27/g" # From / to '-' replace_slashes() sed "s/2f/2d/g" remove_backslashes() sed "s/5c//g" remove_multibyte_characters() sed "s/e2 80 .. //g" uppercase() tr "[a-z]" "[A-Z]" # One column decimal to plain text. from_dec() awk '{ printf ("%c", $1 + 0) }' # Replace spaces by line returns, outputs a single column. spaces_to_line_returns() tr " " "\n" # Convert input into hexadecimal and a single byte per line. to_hex_one_column() { od -An -tx1 | awk '{for(i=1;i<=NF;i++){ print $i }}'; } # One column hexa to one column decimal. hex_to_dec() { { echo "obase=10;ibase=16;" ; cat ; } | bc ; } # Reverse hexadecimal (with space separators) to original value. from_hex() { spaces_to_line_returns | uppercase | hex_to_dec | from_dec; } # Remove non ascii, backslashes and invalid filename characters, # convert "’" to "'", "/" to " - ". to_ascii(){ to_hex_one_column | # Input to hexadecimal, 1-byte representation per line. regroup_lines | # From 1-byte to x-byte lines with space separators. simple_quote | # From "’" to "'". replace_slashes | # From / to '-'. remove_multibyte_characters | # Remove non ascii values. remove_backslashes | # Can mess with the script. from_hex # Convert back from hex (x-byte per line, space separator). } comp_end_of_tracks() awk -v NONUMBER="$NONUMBER" -v SEPARATOR="$SEPARATOR" ' BEGIN { OFS=" " } { if (NR > 1) { print timestamp, $1, title; } timestamp = $1; if (NONUMBER == 1) { title = $2 } else { if (NR < 10) { title = "0" NR SEPARATOR $2 } else { title = NR SEPARATOR $2 } } for (i=3; i <= NF; i++) { title = title " " $i } } END { print timestamp, "END_OF_FILE", title; } ' first_column_to_seconds() awk ' { # from 10:30 to 630 n = split ($1, arr, ":") for (i = 0; i <= n; i++) { if (i == 0) { v = arr[n-i]; } else if (i == 1) { v += 60 * arr[n-i]; } else if (i == 2) { v += 3600 * arr[n-i]; } } $1 = v; print; v = 0; } ' # Get a more usable time representation for the beginning and the end of songs. get_timestamps(){ to_ascii | first_column_to_seconds | comp_end_of_tracks; } run_ffmpeg(){ file=$1 from=$2 to=$3 final_title=$4 LOG_LEVEL="-loglevel error" FROM="-ss $from" TO="" if [ "$to" != "" ]; then TO="-to $to" fi INPUT_FILE="$file" OUTPUT_FILE="$final_title" case "v$VERBOSITY" in v0) ;; v1) echo "extracting '$final_title'" ;; v2) echo "ffmpeg $LOG_LEVEL $FROM $TO -i $INPUT_FILE $FFOPTS '$OUTPUT_FILE'" ;; *) echo "verbosity is not set properly" >&2 exit 1 ;; esac if [ "$SIMULATION" = "" ]; then ffmpeg $LOG_LEVEL $FROM $TO -i "$INPUT_FILE" $FFOPTS "$OUTPUT_FILE" fi } extraction(){ audio_file="$1" time_file="$2" get_timestamps < "$time_file" | while read LINE; do track_start=$(echo $LINE | cut -d ' ' -f 1) track_end=$(echo $LINE | cut -d ' ' -f 2) track_title=$(echo $LINE | cut -d ' ' -f 3-) if [ "$track_end" = "END_OF_FILE" ]; then track_end="" fi # Input is /dev/null, otherwise subshells will take the output # of "get_timestamps" as input. # Be careful: "while read X" is a dangerous shell design. < /dev/null run_ffmpeg "${audio_file}" \ "${track_start}" "${track_end}" \ "${track_title}.${FORMAT}" done } usage(){ cat < Debug mode (displays starting and ending times for each song): usage: $0 Format for : 0:00 First track 1:30 Second track Environment variables: - SIMULATION [empty or not] do not invoke ffmpeg - FORMAT [mp3,ogg,opus,…] see ffmpeg documentation - FFOPTS (default: '-c:a copy') see ffmpeg documentation - NONUMBER [empty or 1] do not write song numbers - SEPARATOR [separator] (default: ' - ') separator between number and name example with SEPARATOR='_': 01_intro.opus 02_blah.opus… - HEADERS [empty or 1] print env params (verbosity, quality, etc.) - VERBOSITY [0-3] (default: 1) 0: no output except errors from ffmpeg 1: simple indications on the current track being extracted 2: print actual ffmpeg commands the script currently runs END } header(){ if [ "$HEADERS" = "1" ]; then echo $* fi } warning(){ echo "WARNING: $*" } # Default output format is based on the extension of the input audio file. if [ $# -eq 2 ]; then DEFAULT_FORMAT="$(echo $1 | awk -F . '{print $NF}')" else header "no default FORMAT selected" fi if [ "$FORMAT" = "" ]; then FORMAT="$DEFAULT_FORMAT" header "default FORMAT: ${FORMAT}" else header "FORMAT: $FORMAT" fi # For unexperienced users, print a warning when input and output formats differ. # In case FFOPTS is set, encoding is expected to be handled, drop the warning. # Example (remove the get-tracks.sh default behavior, perform re-encoding): # FFOPTS=" " if [ "$FFOPTS" = "" ] && [ "$FORMAT" != "$DEFAULT_FORMAT" ]; then warning "input and output formats seem to differ" warning "1. re-encoding may be required (through the FFOPTS envvar)" warning "2. FFOPTS represents ffmpeg options, directly given to ffmpeg" warning ' (default: "-c:a copy" = copy without re-encoding)' warning ' You can put FFOPTS=" " if you want to perform re-encoding.' fi if [ "$VERBOSITY" = "" ]; then header "default VERBOSITY: 1" VERBOSITY=1 else header "VERBOSITY level: $VERBOSITY" fi if [ "$NONUMBER" = "" ]; then header "default NONUMBER: disabled" NONUMBER=0 # Assume that there should be a separator. if [ "$SEPARATOR" = "" ]; then header "default SEPARATOR: ' - '" SEPARATOR=" - " else header "SEPARATOR: '$SEPARATOR'" fi else header "NONUMBER: won't prefix tracks" SEPARATOR="" fi if [ "$FFOPTS" != "" ]; then header "FFOPTS envvar is set: ${FFOPTS}." else FFOPTS="-c:a copy" header "default FFOPTS: ${FFOPTS}" fi if [ "$SIMULATION" != "" ]; then header "SIMULATION envvar is set: this is a simulation." fi case $# in 0) usage; exit 0;; 1) get_timestamps < "$1";; 2) extraction "$1" "$2";; *) usage 1>&2; exit 1;; esac