multi-byte characters removal, from "while read" to awk.

master
Karchnu 2021-04-06 05:18:19 +02:00
parent 7d073a56fd
commit bd9aa59a26
1 changed files with 140 additions and 66 deletions

View File

@ -1,74 +1,119 @@
#!/usr/bin/env sh #!/usr/bin/env sh
get_time(){ regroup_lines(){
echo "$*" | sed "s/[ \t].*//" awk '
BEGIN {
first=1
}
{
if (first == 1)
line = $1;
else
line = line " " $1;
first = 0;
if ($1 == "0a") {
print line;
first = 1
}
}
END {
if (first == 0)
print line
}
'
} }
get_title(){ simple_quote(){
echo "$*" | cut -d ' ' -f 2- sed "s/e2 80 99/27/g"
} }
reverse_word_order(){ remove_multibyte_characters(){
local result= sed "s/e2 80 .. //g"
for word in $@; do
result="$word $result"
done
echo "$result"
} }
# bc is mandatory: arythmetic operations are very limited in ash. to_hex_one_colomun(){
get_seconds(){ xxd -p -c 1
number_position=0 }
value=0
values=$(echo "$*" | sed 's/:/\ from_hex(){
/g' | sed "s/^0//") xxd -p -r
for i in $(reverse_word_order $values); do }
case $number_position in
0) value=$(echo "$value + $i " | bc);;
1) value=$(echo "$value + (60 * $i)" | bc);;
2) value=$(echo "$value + (3600 * $i)" | bc);;
*) echo "invalid timecode $*"; exit 1;;
esac
number_position=$((number_position+1)) to_ascii(){
done to_hex_one_colomun |
regroup_lines |
simple_quote |
remove_multibyte_characters |
from_hex
}
echo $value process_durations(){
awk -v NONUMBER="$NONUMBER" -v SEPARATOR="$SEPARATOR" '
BEGIN {
OFS=" "
}
{
if (NR > 1) {
print timestamp, $1, title;
}
timestamp = $1;
if (NONUMBER == 1) {
title = $2
}
else {
if (NR < 10) {
title = "0" NR SEPARATOR $2
}
else {
title = NR SEPARATOR $2
}
}
for (i=3; i <= NF; i++) {
title = title " " $i
}
}
END {
print timestamp, "END_OF_FILE", title;
}
'
}
first_column_to_seconds(){
awk '
{
# from 10:30 to 630
n = split ($1, arr, ":")
for (i = 0; i <= n; i++) {
if (i == 0) {
v = arr[n-i];
}
else if (i == 1) {
v += 60 * arr[n-i];
}
else if (i == 2) {
v += 3600 * arr[n-i];
}
}
$1 = v;
print;
v = 0;
}
'
} }
# Get a more usable time representation for the beginning and the end of songs. # Get a more usable time representation for the beginning and the end of songs.
get_values(){ process_time_file(){
time_file="$1"
audio_file="$1" to_ascii < "$time_file" | first_column_to_seconds | process_durations
time_file="$2"
track_number=1
while read X; do
if [ $track_number -ne 1 ]; then
to_unformatted=$(get_time $X)
to_s=$(get_seconds $to_unformatted)
echo "$from_s $to_s $title"
fi
from_unformatted=$(get_time $X)
from_s=$(get_seconds $from_unformatted)
title=$(get_title $X)
if [ "$NONUMBER" = "" ]; then
track_n=$track_number
if [ $track_number -lt 10 ]; then
track_n="0$track_number"
fi
title="${track_n}${SEPARATOR}${title}"
fi
track_number=$(echo $track_number + 1 | bc)
done < "${time_file}"
echo "$from_s $eof_marker $title"
} }
run_ffmpeg(){ run_ffmpeg(){
@ -114,11 +159,11 @@ rip(){
audio_file="$1" audio_file="$1"
time_file="$2" time_file="$2"
get_values "$audio_file" "$time_file" | while read LINE; do process_time_file "$time_file" | while read LINE; do
track_start_s=$(echo $LINE | cut -d ' ' -f 1) track_start_s=$(echo $LINE | cut -d ' ' -f 1)
track_end_s=$(echo $LINE | cut -d ' ' -f 2) track_end_s=$(echo $LINE | cut -d ' ' -f 2)
track_title=$(echo $LINE | cut -d ' ' -f 3-) track_title=$(echo $LINE | cut -d ' ' -f 3-)
if [ "$track_end_s" != "$eof_marker" ]; then if [ "$track_end_s" != "END_OF_FILE" ]; then
track_duration=$(echo "$track_end_s - $track_start_s" | bc) track_duration=$(echo "$track_end_s - $track_start_s" | bc)
else else
track_duration="" track_duration=""
@ -137,7 +182,7 @@ usage(){
echo "show output format: start end title" echo "show output format: start end title"
echo echo
echo "envvar: SIMULATION, if non empty, do not invoke ffmpeg" echo "envvar: SIMULATION, if non empty, do not invoke ffmpeg"
echo "envvar: NONUMBER, if non empty, do not write song number" echo "envvar: NONUMBER, if equals 1, do not write song number"
echo "envvar: FORMAT [mp3,ogg,opus,…], see the ffmpeg documentation" echo "envvar: FORMAT [mp3,ogg,opus,…], see the ffmpeg documentation"
echo "envvar: SEPARATOR [separator] (default: ' - '), write song number, with this separator" echo "envvar: SEPARATOR [separator] (default: ' - '), write song number, with this separator"
echo " example with SEPARATOR='_': song names will be 01_song.opus 02_song.opus…" echo " example with SEPARATOR='_': song names will be 01_song.opus 02_song.opus…"
@ -158,17 +203,31 @@ shift
if [ "$FORMAT" = "" ]; then if [ "$FORMAT" = "" ]; then
echo "default FORMAT: opus" echo "default FORMAT: opus"
FORMAT="opus" FORMAT="opus"
else
echo "FORMAT: $FORMAT"
fi fi
if [ "$VERBOSITY" = "" ]; then if [ "$VERBOSITY" = "" ]; then
echo "default VERBOSITY: 1" echo "default VERBOSITY: 1"
VERBOSITY=1 VERBOSITY=1
else
echo "VERBOSITY level: $VERBOSITY"
fi fi
# Assume that there should be a separator. if [ "$NONUMBER" = "" ]; then
if [ "$SEPARATOR" = "" ]; then echo "default NONUMBER: disabled"
NONUMBER=0
# Assume that there should be a separator.
if [ "$SEPARATOR" = "" ]; then
echo "default SEPARATOR: ' - '" echo "default SEPARATOR: ' - '"
SEPARATOR=" - " SEPARATOR=" - "
else
echo "SEPARATOR: '$SEPARATOR'"
fi
else
echo "NONUMBER: won't prefix tracks"
SEPARATOR=""
fi fi
if [ "$SIMULATION" != "" ]; then if [ "$SIMULATION" != "" ]; then
@ -178,7 +237,18 @@ fi
# soxi provides the total length of the music file. # soxi provides the total length of the music file.
#which soxi 2>/dev/null #which soxi 2>/dev/null
#total_length=$(soxi -D "${audio_file}" | sed "s/\..*//") # integer values only #total_length=$(soxi -D "${audio_file}" | sed "s/\..*//") # integer values only
eof_marker="END_OF_FILE"
</dev/null xxd -p >/dev/null 2>/dev/null
if [ $? -ne 0 ]; then
echo "xxd: you don't have an xxd program with '-p' option." 1>&2
exit 1
fi
</dev/null xxd -r >/dev/null 2>/dev/null
if [ $? -ne 0 ]; then
echo "xxd: you don't have an xxd program with '-r' option." 1>&2
exit 1
fi
case "x-${command}" in case "x-${command}" in
x-show) x-show)
@ -189,7 +259,7 @@ case "x-${command}" in
exit 1 exit 1
fi fi
get_values "$1" "$2" process_time_file "$2"
;; ;;
x-rip) x-rip)
@ -203,6 +273,10 @@ case "x-${command}" in
rip "$1" "$2" rip "$1" "$2"
;; ;;
x-input)
to_ascii < "$1"
;;
*) *)
usage 1>&2 usage 1>&2
exit 1 exit 1