You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
84 lines
2.1 KiB
84 lines
2.1 KiB
#! /usr/bin/env python2
|
|
|
|
import scipy.io.wavfile as sio
|
|
import numpy as np
|
|
from random import shuffle
|
|
from sys import argv
|
|
from fabric.api import *
|
|
|
|
THRESHOLD = 0.8
|
|
SIZE_LIMIT = 10
|
|
|
|
def rms(ss):
|
|
"""
|
|
Takes the root-mean-square of an array of samples
|
|
"""
|
|
return np.sqrt(np.abs(np.mean(np.square(ss))))
|
|
|
|
def find_ceiling(ss, factor):
|
|
"""
|
|
Attempts to find the sample at which the silence ends
|
|
"""
|
|
slice = len(ss)
|
|
while not (rms(ss[0:slice]) < THRESHOLD):
|
|
slice = int(len(ss)/factor)
|
|
factor += 1
|
|
return slice
|
|
|
|
def convert(infile):
|
|
"""
|
|
Takes a path to a file and tries to remove leading silence
|
|
"""
|
|
pcm = sio.read("/tmp/%s.to_silence.wav" % infile)
|
|
sample_rate, samples = pcm
|
|
|
|
# length in minutes
|
|
length = (len(samples) / float(sample_rate)) / 60.0
|
|
|
|
# the first n samples to cut out
|
|
ceiling = find_ceiling(samples, 2)
|
|
|
|
# the total number of samples
|
|
total = float(len(pcm[1]))
|
|
|
|
if length <= SIZE_LIMIT:
|
|
sio.write("/tmp/%s.silenced.wav" % infile, pcm[0], pcm[1][ceiling:])
|
|
local("cp /tmp/\"%s.silenced.wav\" ./shortened/\"%s.wav\"" % (infile, infile))
|
|
local("rm /tmp/\"%s.silenced.wav\"" % infile)
|
|
|
|
local("rm /tmp/\"%s.to_silence.wav\"" % infile)
|
|
|
|
def to_wav(infile):
|
|
"""
|
|
Takes a path to a file and converts it to PCM (wav format)
|
|
"""
|
|
local("ffmpeg -i \"%s\" /tmp/\"%s.to_silence.wav\"" % (infile, infile))
|
|
|
|
def silence(infile):
|
|
"""
|
|
Converts a file to PCM and then removes silence
|
|
"""
|
|
to_wav(infile)
|
|
convert(infile)
|
|
|
|
def silence_all():
|
|
"""
|
|
Removes the silence from all files in a given directory
|
|
"""
|
|
if len(argv) < 2:
|
|
print "You must pass a directory as the first argument"
|
|
return
|
|
|
|
with lcd(argv[1]):
|
|
local("mkdir -p ./shortened")
|
|
fileset = local("ls .", capture=True).split("\n")
|
|
shuffle(fileset)
|
|
for f in fileset:
|
|
if f.split(".")[-1] in ["ogg", "opus", "m4a", "mp3"]:
|
|
try:
|
|
silence(f)
|
|
local("rm -f /tmp/*wav")
|
|
except Exception:
|
|
pass
|
|
|
|
silence_all()
|
|
|