#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Files
==================
Utility classes for handling different type of files.
AudioFile
^^^^^^^^^
File class to read audio files. Currently supports wav and flac formats.
Usage examples:
.. code-block:: python
:linenos:
# Example 1
data, fs = AudioFile(filename='test.wav', fs=22050).load()
# Example 2
data, fs = AudioFile().load(filename='test.wav', fs=44100, mono=False)
.. autosummary::
:toctree: generated/
AudioFile
AudioFile.load
AudioFile.save
AudioFile.exists
AudioFile.empty
ParameterFile
^^^^^^^^^^^^^
File class to read and write dict based parameter files in YAML format.
Usage examples:
.. code-block:: python
:linenos:
# Example 1
params = ParameterFile(filename='parameters.yaml')
params.load()
params.save()
# Example 2
params = ParameterFile(filename='parameters.yaml').load()
params.save()
# Example 3
params = ParameterFile({'test':'value'}).save(filename='parameters.yaml')
.. autosummary::
:toctree: generated/
ParameterFile
ParameterFile.load
ParameterFile.save
ParameterFile.exists
ParameterFile.empty
FeatureFile
^^^^^^^^^^^
File class to read and write dict based feature files in cpickle format.
Usage examples:
.. code-block:: python
:linenos:
# Example 1
feat = FeatureFile(filename='features.cpickle')
feat.load()
feat.save()
# Example 2
feat = FeatureFile(filename='features.cpickle').load()
feat.save()
# Example 3
feat = FeatureFile({'feature':[1,2,3,4]}).save(filename='features.cpickle')
.. autosummary::
:toctree: generated/
FeatureFile
FeatureFile.load
FeatureFile.save
FeatureFile.exists
FeatureFile.empty
DataFile
^^^^^^^^
Generic file class to read and write dict based data files in cpickle format.
Usage examples:
.. code-block:: python
:linenos:
# Example 1
data = DataFile(filename='data.cpickle')
data.load()
data.save()
# Example 2
data = DataFile(filename='data.cpickle').load()
data.save()
# Example 3
data = DataFile({'data':[1,2,3,4]}).save(filename='data.cpickle')
.. autosummary::
:toctree: generated/
DataFile
DataFile.load
DataFile.save
DataFile.exists
DataFile.empty
RepositoryFile
^^^^^^^^^^^^^^
File class to read and write dict based repositories in cpickle format.
.. autosummary::
:toctree: generated/
RepositoryFile
RepositoryFile.load
RepositoryFile.exists
RepositoryFile.empty
TextFile
^^^^^^^^
File class to read and write text files, rows in the text file is stored as items in a list.
.. autosummary::
:toctree: generated/
TextFile
TextFile.load
TextFile.save
TextFile.exists
TextFile.empty
DictFile
^^^^^^^^
Base class for all dict based file classes.
.. autosummary::
:toctree: generated/
DictFile
DictFile.load
DictFile.save
DictFile.exists
DictFile.empty
ListFile
^^^^^^^^
Base class for all list based file classes.
.. autosummary::
:toctree: generated/
ListFile
ListFile.load
ListFile.save
ListFile.exists
ListFile.empty
Mixins
^^^^^^
.. autosummary::
:toctree: generated/
FileMixin
"""
from __future__ import print_function, absolute_import
from six import iteritems
import os
import numpy
import logging
import soundfile
import copy
from .decorators import before_and_after_function_wrapper
from .containers import DottedDict, ContainerMixin
[docs]class FileMixin(object):
"""Generic file mixin"""
def get_file_information(self):
"""Get file information, filename
Returns
-------
str
"""
if self.filename:
return 'Filename: ['+self.filename+']'
else:
return ''
def detect_file_format(self, filename):
"""Detect file format from extension
Parameters
----------
filename : str
filename
Returns
-------
str
format tag
Raises
------
IOError:
Unknown file format
"""
extension = os.path.splitext(filename.lower())[1]
file_format = None
if extension == '.yaml':
file_format = 'yaml'
elif extension == '.xml':
file_format = 'xml'
elif extension == '.json':
file_format = 'json'
elif extension == '.cpickle':
file_format = 'cpickle'
elif extension == '.pickle':
file_format = 'cpickle'
elif extension == '.pkl':
file_format = 'cpickle'
elif extension == '.marshal':
file_format = 'marshal'
elif extension == '.msgpack':
file_format = 'msgpack'
elif extension == '.txt':
file_format = 'txt'
elif extension == '.hash':
file_format = 'txt'
elif extension == '.csv':
file_format = 'csv'
elif extension == '.ann':
file_format = 'ann'
elif extension == '.wav':
file_format = 'wav'
elif extension == '.flac':
file_format = 'flac'
elif extension == '.mp3':
file_format = 'mp3'
elif extension == '.m4a':
file_format = 'm4a'
elif extension == '.webm':
file_format = 'webm'
if file_format in self.valid_formats:
return file_format
else:
message = '{name}: Unknown format [{format}] for file [{file}]'.format(
name=self.__class__.__name__,
format = os.path.splitext(filename)[-1],
file=filename
)
if self.logger:
self.logger.exception(message)
raise IOError(message)
def exists(self):
"""Checks that file exists
Returns
-------
bool
"""
return os.path.isfile(self.filename)
def empty(self):
"""Check if file is empty
Returns
-------
bool
"""
if len(self) == 0:
return True
else:
return False
[docs]class DictFile(dict, FileMixin, ContainerMixin):
"""File class inherited from dict, valid file formats [yaml, json, cpickle, marshal, msgpack, txt]"""
valid_formats = ['yaml', 'json', 'cpickle', 'marshal', 'msgpack', 'txt']
[docs] def __init__(self, *args, **kwargs):
"""Constructor
Parameters
----------
filename : str, optional
File path
logger : logger
Logger class instance, If none given logger instance will be created
Default value "None"
"""
self.filename = kwargs.get('filename', None)
if self.filename:
self.format = self.detect_file_format(self.filename)
self.logger = kwargs.get('logger', logging.getLogger(__name__))
if not self.logger.handlers:
logging.basicConfig()
dict.__init__(self, *args)
@before_and_after_function_wrapper
[docs] def load(self, filename=None):
"""Load file
Parameters
----------
filename : str, optional
File path
Default value filename given to class constructor
Raises
------
ImportError:
Error if file format specific module cannot be imported
IOError:
File does not exists or has unknown file format
Returns
-------
self
"""
if filename:
self.filename = filename
self.format = self.detect_file_format(self.filename)
dict.clear(self)
if self.exists():
if self.format == 'yaml':
try:
import yaml
except ImportError:
message = '{name}: Unable to import YAML module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
try:
with open(self.filename, 'r') as infile:
dict.update(self, yaml.load(infile))
except yaml.YAMLError as exc:
self.logger.error("Error while parsing YAML file [%s]" % self.filename)
if hasattr(exc, 'problem_mark'):
if exc.context is not None:
self.logger.error(str(exc.problem_mark) + '\n ' + str(exc.problem) + ' ' + str(exc.context))
self.logger.error(' Please correct data and retry.')
else:
self.logger.error(str(exc.problem_mark) + '\n ' + str(exc.problem))
self.logger.error(' Please correct data and retry.')
else:
self.logger.error("Something went wrong while parsing yaml file [%s]" % self.filename)
return
elif self.format == 'cpickle':
try:
import cPickle as pickle
except ImportError:
try:
import pickle
except ImportError:
message = '{name}: Unable to import pickle module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
dict.update(self, pickle.load(open(self.filename, "rb")))
elif self.format == 'marshal':
try:
import marshal
except ImportError:
message = '{name}: Unable to import marshal module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
dict.update(self, marshal.load(open(self.filename, "rb")))
elif self.format == 'msgpack':
try:
import msgpack
except ImportError:
message = '{name}: Unable to import msgpack module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
dict.update(self, msgpack.load(open(self.filename, "rb")))
elif self.format == 'json':
try:
import ujson as json
except ImportError:
try:
import json
except ImportError:
message = '{name}: Unable to import json module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
dict.update(self, json.load(open(self.filename, "r")))
elif self.format == 'txt':
with open(self.filename, 'r') as f:
lines = f.readlines()
dict.update(self, dict(zip(range(0, len(lines)), lines)))
else:
message = '{name}: Unknown format [{format}]'.format(name=self.__class__.__name__, format=self.filename)
self.logger.exception(message)
raise IOError(message)
else:
message = '{name}: File does not exists [{file}]'.format(name=self.__class__.__name__, file=self.filename)
self.logger.exception(message)
raise IOError(message)
return self
@before_and_after_function_wrapper
[docs] def save(self, filename=None):
"""Save file
Parameters
----------
filename : str, optional
File path
Default value filename given to class constructor
Raises
------
ImportError:
Error if file format specific module cannot be imported
IOError:
File has unknown file format
Returns
-------
self
"""
if filename:
self.filename = filename
self.format = self.detect_file_format(self.filename)
try:
if hasattr(self, '__getstate__'):
data = dict(self.__getstate__())
else:
data = dict(self)
if self.format == 'yaml':
try:
import yaml
except ImportError:
message = '{name}: Unable to import yaml module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
with open(self.filename, 'w') as outfile:
outfile.write(yaml.dump(self.get_dump_content(data=data), default_flow_style=False))
elif self.format == 'cpickle':
try:
import cPickle as pickle
except ImportError:
try:
import pickle
except ImportError:
message = '{name}: Unable to import pickle module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
pickle.dump(data, open(self.filename, 'wb'), protocol=2) # pickle.HIGHEST_PROTOCOL)
elif self.format == 'marshal':
try:
import marshal
except ImportError:
message = '{name}: Unable to import marshal module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
marshal.dump(data, open(self.filename, 'wb'))
elif self.format == 'msgpack':
try:
import msgpack
except ImportError:
message = '{name}: Unable to import msgpack module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
msgpack.dump(data, open(self.filename, 'wb'), use_bin_type=True)
elif self.format == 'json':
try:
import ujson as json
except ImportError:
try:
import json
except ImportError:
message = '{name}: Unable to import json module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
json.dump(data, open(self.filename, 'wb'))
elif self.format == 'txt':
with open(self.filename, "w") as text_file:
for line_id in self:
text_file.write(self[line_id])
else:
message = '{name}: Unknown format [{format}]'.format(name=self.__class__.__name__, format=self.filename)
self.logger.exception(message)
raise IOError(message)
except KeyboardInterrupt:
os.remove(self.filename) # Delete the file, since most likely it was not saved fully
raise
def get_dump_content(self, data):
"""Clean internal content for saving
Numpy, DottedDict content is converted to standard types
Parameters
----------
data : dict
Returns
-------
dict
"""
if data:
data = dict(data)
for k, v in iteritems(data):
if isinstance(v, numpy.generic):
data[k] = numpy.asscalar(v)
elif isinstance(v, DottedDict):
data[k] = self.get_dump_content(data=dict(data[k]))
elif isinstance(v, dict):
data[k] = self.get_dump_content(data=data[k])
return data
[docs]class ListFile(list, FileMixin):
"""File class inherited from list, valid file formats [txt]"""
valid_formats = ['txt', 'yaml']
[docs] def __init__(self, *args, **kwargs):
"""Constructor
Parameters
----------
filename : str, optional
File path
logger : logger
Logger class instance, If none given logger instance will be created
Default value "None"
"""
self.filename = kwargs.get('filename', None)
if self.filename:
self.format = self.detect_file_format(self.filename)
self.logger = kwargs.get('logger', logging.getLogger(__name__))
if not self.logger.handlers:
logging.basicConfig()
list.__init__(self, *args)
@before_and_after_function_wrapper
[docs] def load(self, filename=None):
"""Load file
Parameters
----------
filename : str, optional
File path
Default value filename given to class constructor
Raises
------
IOError:
File does not exists or has unknown file format
Returns
-------
self
"""
if filename:
self.filename = filename
self.format = self.detect_file_format(self.filename)
if self.exists():
if self.format == 'txt':
with open(self.filename, 'r') as f:
lines = f.readlines()
# Remove line breaks
for i in range(0, len(lines)):
lines[i] = lines[i].replace('\n', '')
list.__init__(self, lines)
elif self.format == 'yaml':
try:
import yaml
except ImportError:
message = '{name}: Unable to import YAML module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
try:
with open(self.filename, 'r') as infile:
data = yaml.load(infile)
if isinstance(data, list):
list.__init__(self, data)
else:
message = '{name}: YAML data is not in list format.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
except yaml.YAMLError as exc:
self.logger.error("Error while parsing YAML file [%s]" % self.filename)
if hasattr(exc, 'problem_mark'):
if exc.context is not None:
self.logger.error(str(exc.problem_mark) + '\n ' + str(exc.problem) + ' ' + str(exc.context))
self.logger.error(' Please correct data and retry.')
else:
self.logger.error(str(exc.problem_mark) + '\n ' + str(exc.problem))
self.logger.error(' Please correct data and retry.')
else:
self.logger.error("Something went wrong while parsing yaml file [%s]" % self.filename)
return
else:
message = '{name}: Unknown format [{format}]'.format(name=self.__class__.__name__, format=self.filename)
self.logger.exception(message)
raise IOError(message)
else:
message = '{name}: File does not exists [{file}]'.format(name=self.__class__.__name__, file=self.filename)
self.logger.exception(message)
raise IOError(message)
return self
@before_and_after_function_wrapper
[docs] def save(self, filename=None):
"""Save file
Parameters
----------
filename : str, optional
File path
Default value filename given to class constructor
Raises
------
IOError:
File has unknown file format
Returns
-------
self
"""
if filename:
self.filename = filename
self.format = self.detect_file_format(self.filename)
try:
if self.format == 'yaml':
try:
import yaml
except ImportError:
message = '{name}: Unable to import yaml module.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise ImportError(message)
with open(self.filename, 'w') as outfile:
data = copy.deepcopy(list(self))
for item_id, item in enumerate(data):
data[item_id] = self.get_dump_content(data=item)
outfile.write(yaml.dump(data, default_flow_style=False))
elif self.format == 'txt':
with open(self.filename, "w") as text_file:
for line in self:
text_file.write(line+'\n')
else:
message = '{name}: Unknown format [{format}]'.format(name=self.__class__.__name__, format=self.filename)
self.logger.exception(message)
raise IOError(message)
except KeyboardInterrupt:
os.remove(self.filename) # Delete the file, since most likely it was not saved fully
raise
def get_dump_content(self, data):
"""Clean internal content for saving
Numpy, DottedDict content is converted to standard types
Parameters
----------
data : dict
Returns
-------
dict
"""
if data:
data = dict(data)
for k, v in iteritems(data):
if isinstance(v, numpy.generic):
data[k] = numpy.asscalar(v)
elif isinstance(v, DottedDict):
data[k] = self.get_dump_content(data=dict(data[k]))
elif isinstance(v, dict):
data[k] = self.get_dump_content(data=data[k])
return data
[docs]class AudioFile(FileMixin):
"""File class for audio files, valid file formats [wav, flac]"""
valid_formats = ['wav', 'flac', 'm4a', 'webm']
[docs] def __init__(self, *args, **kwargs):
"""Constructor
Parameters
----------
fs : int
Target sampling frequency, if loaded audio does have different sampling frequency, audio will be re-sampled.
Default value "44100"
mono : bool
Monophonic target, multi-channel audio will be down-mixed.
Default value "True"
filename : str, optional
File path
logger : logger
Logger class instance, If none given logger instance will be created
Default value "None"
"""
self.data = kwargs.get('data', None) # Audio data itself
self.filename = kwargs.get('filename', None)
if self.filename:
self.format = self.detect_file_format(self.filename)
self.logger = kwargs.get('logger', logging.getLogger(__name__))
if not self.logger.handlers:
logging.basicConfig()
self.fs = kwargs.get('fs', 44100)
self.mono = kwargs.get('mono', True)
@before_and_after_function_wrapper
[docs] def load(self, filename=None, fs=None, mono=None, res_type='kaiser_best', start=None, stop=None):
"""Load file
Parameters
----------
filename : str, optional
File path
Default value filename given to class constructor
fs : int
Target sampling frequency, if loaded audio does have different sampling frequency, audio will be re-sampled.
Default value one given to class constructor
mono : bool
Monophonic target, multi-channel audio will be down-mixed.
Default value one given to class constructor
res_type : str
Resample type, defined by Librosa
Default value "kaiser_best"
start : float, optional
Segment start time in seconds
Default value "None"
stop : float, optional
Segment stop time in seconds
Default value "None"
Raises
------
IOError:
File does not exists or has unknown file format
Returns
-------
self
"""
if filename is not None:
self.filename = filename
self.format = self.detect_file_format(self.filename)
if self.exists():
if fs is not None:
self.fs = fs
if mono is not None:
self.mono = mono
if self.format == 'wav':
info = soundfile.info(file=self.filename)
# Handle segment start and stop
if start is not None and stop is not None:
start_sample = int(start * info.samplerate)
stop_sample = int(stop * info.samplerate)
if stop_sample > info.frames:
stop_sample = info.frames
else:
start_sample = None
stop_sample = None
self.data, source_fs = soundfile.read(file=self.filename, start=start_sample, stop=stop_sample)
self.data = self.data.T
# Down-mix audio
if self.mono and len(self.data.shape) > 1:
self.data = numpy.mean(self.data, axis=0)
# Resample
if self.fs != source_fs:
import librosa
self.data = librosa.core.resample(self.data, source_fs, self.fs, res_type=res_type)
elif self.format in ['flac', 'm4a', 'webm']:
import librosa
if start is not None and stop is not None:
offset = start
duration = stop - start
else:
offset = 0.0
duration = None
self.data, self.fs = librosa.load(self.filename, sr=self.fs, mono=self.mono, res_type=res_type, offset=offset, duration=duration)
else:
message = '{name}: Unknown format [{format}]'.format(name=self.__class__.__name__, format=self.filename)
self.logger.exception(message)
raise IOError(message)
else:
message = '{name}: File does not exists [{file}]'.format(name=self.__class__.__name__, file=self.filename)
self.logger.exception(message)
raise IOError(message)
return self.data, self.fs
[docs] def save(self, filename=None, bitdepth=16):
"""Save audio
Parameters
----------
filename : str, optional
File path
Default value filename given to class constructor
bitdepth : int, optional
Bit depth for audio
Default value "16"
Raises
------
ImportError:
Error if file format specific module cannot be imported
IOError:
File has unknown file format
Returns
-------
self
"""
if filename:
self.filename = filename
self.format = self.detect_file_format(self.filename)
if self.format == 'wav':
if bitdepth == 16:
soundfile.write(file=self.filename,
data=self.data,
samplerate=self.fs,
subtype='PCM_16')
elif bitdepth == 24:
soundfile.write(file=self.filename,
data=self.data,
samplerate=self.fs,
subtype='PCM_24')
elif bitdepth == 32:
soundfile.write(file=self.filename,
data=self.data,
samplerate=self.fs,
subtype='PCM_32')
elif bitdepth is None:
soundfile.write(file=self.filename,
data=self.data,
samplerate=self.fs)
else:
message = '{name}: Unexpected bit depth [{bitdepth}]'.format(name=self.__class__.__name__,
bitdepth=bitdepth)
self.logger.exception(message)
raise IOError(message)
elif self.format == 'flac':
soundfile.write(file=self.filename,
data=self.data,
samplerate=self.fs)
else:
message = '{name}: Unknown format for saving [{format}]'.format(name=self.__class__.__name__,
format=self.filename)
self.logger.exception(message)
raise IOError(message)
[docs]class TextFile(ListFile):
"""File class for text files, Inherited from ListFile, valid file formats [txt]"""
valid_formats = ['txt']
[docs]class DataFile(DictFile):
"""File class for data files, Inherited from DictFile, valid file formats [cpickle]"""
valid_formats = ['cpickle']
[docs]class ParameterFile(DictFile):
"""File class for parameter files, Inherited from DictFile, valid file formats [yaml]"""
valid_formats = ['yaml']
class ParameterListFile(ListFile):
"""File class for parameter list files, Inherited from ListFile, valid file formats [yaml]"""
valid_formats = ['yaml']
[docs]class FeatureFile(DictFile):
"""File class for feature files, Inherited from DictFile, valid file formats [cpickle]"""
valid_formats = ['cpickle']
[docs]class RepositoryFile(DictFile):
"""File class for repository files, Inherited from DictFile, valid file formats [cpickle]"""
valid_formats = ['cpickle']