-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathfilenames_to_features.py
101 lines (86 loc) · 3.32 KB
/
filenames_to_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# -*- coding: utf-8 -*-
import argparse
from lib.audio_utils import *
from lib.io_utils import *
from lib.math_utils import *
from matplotlib import pyplot as plt
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
import os
import numpy as np
from pprint import pprint
import re
import sys
# input
parser = argparse.ArgumentParser()
parser.add_argument('-in', dest="INPUT_FILES", default="media/downloads/double-bass/*.mp3", help="Input files")
parser.add_argument('-pattern', dest="PATTERN", default="([a-z\-]+)\_([A-Z]s?)([0-9])\_([0-9]+)\_([a-z\-]+)\_([a-z\-]+)\.mp3", help="File pattern")
parser.add_argument('-features', dest="PATTERN_FEATURES", default="group,note,octave,note_dur,dynamic,articulation", help="Features that the pattern maps to")
parser.add_argument('-out', dest="OUTPUT_FILE", default="media/sampler/double-bass.csv", help="CSV output file")
parser.add_argument('-append', dest="APPEND", default=1, type=int, help="Append to existing data?")
parser.add_argument('-overwrite', dest="OVERWRITE", action="store_true", help="Overwrite existing data?")
parser.add_argument('-plot', dest="PLOT", action="store_true", help="Show plot?")
parser.add_argument('-probe', dest="PROBE", action="store_true", help="Just output details?")
args = parser.parse_args()
# Parse arguments
INPUT_FILES = args.INPUT_FILES
OUTPUT_FILE = args.OUTPUT_FILE
PATTERN = args.PATTERN
PATTERN_FEATURES = args.PATTERN_FEATURES.split(",")
APPEND = args.APPEND > 0
OVERWRITE = args.OVERWRITE
PLOT = args.PLOT
# Read files
files = getFilenames(INPUT_FILES)
fileCount = len(files)
filenames = [os.path.basename(fn) for fn in files]
rows = [{"index": i, "filename": os.path.basename(fn), "filepath": fn} for i, fn in enumerate(files)]
# Make sure output dirs exist
makeDirectories(OUTPUT_FILE)
# Check if file exists already
if os.path.isfile(OUTPUT_FILE) and not OVERWRITE and not APPEND:
print("%s already exists. Skipping." % OUTPUT_FILE)
sys.exit()
# Open existing file
fieldNames = ["filename"]
if os.path.isfile(OUTPUT_FILE) and APPEND:
fieldNames, oldRows = readCsv(OUTPUT_FILE)
if set(PATTERN_FEATURES).issubset(set(fieldNames)) and not OVERWRITE:
print("Headers already exists in %s. Skipping." % OUTPUT_FILE)
sys.exit()
# Update rows
for row in oldRows:
if row["filename"] in filenames:
existingRow = [r for r in rows if r["filename"]==row["filename"]].pop(0)
index = existingRow["index"]
rows[index].update(row)
pattern = re.compile(PATTERN)
progress = 0
def getFeatures(row):
global pattern
global progress
global fileCount
matches = pattern.match(row["filename"])
if not matches:
print("Did not match: %s" % row["filename"])
return None
for j, feature in enumerate(PATTERN_FEATURES):
row[feature] = matches.group(j+1)
sys.stdout.write('\r')
sys.stdout.write("%s%%" % round(1.0*progress/(fileCount-1)*100,1))
sys.stdout.flush()
progress += 1
return row
pool = ThreadPool()
rows = pool.map(getFeatures, rows)
pool.close()
pool.join()
# remove non-matched rows
rows = [row for row in rows if row is not None]
headings = fieldNames[:]
for feature in PATTERN_FEATURES:
if feature not in headings:
headings.append(feature)
if args.PROBE:
sys.exit()
writeCsv(OUTPUT_FILE, rows, headings)