Extending the framework

The Framework is designed to be expandable while doing system development with it. The hierarchical class structure enables easy class extensions, and application cores can be injected with extended classes.

To see working example how to extend the framework, check examples/custom.py. Below detailed instructions how to extend framework with new datasets, features, and learners.

Adding datasets

Example how to add the DCASE 2013 Acoustic scene classification evaluation dataset:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from dcase_framework.datasets import AcousticSceneDataset
from dcase_framework.metadata import MetaDataContainer, MetaDataItem

class DCASE2013_Scene_EvaluationSet(AcousticSceneDataset):
    """DCASE 2013 Acoustic scene classification, evaluation dataset

    """
    def __init__(self, *args, **kwargs):
        kwargs['storage_name'] = kwargs.get('storage_name', 'DCASE2013-scene-evaluation')
        super(DCASE2013_Scene_EvaluationSet, self).__init__(*args, **kwargs)

        self.dataset_group = 'acoustic scene'
        self.dataset_meta = {
            'authors': 'Dimitrios Giannoulis, Emmanouil Benetos, Dan Stowell, and Mark Plumbley',
            'name_remote': 'IEEE AASP 2013 CASA Challenge - Private Dataset for Scene Classification Task',
            'url': 'http://www.elec.qmul.ac.uk/digitalmusic/sceneseventschallenge/',
            'audio_source': 'Field recording',
            'audio_type': 'Natural',
            'recording_device_model': None,
            'microphone_model': 'Soundman OKM II Klassik/studio A3 electret microphone',
        }

        self.crossvalidation_folds = 5

        self.package_list = [
            {
                'remote_package': 'https://archive.org/download/dcase2013_scene_classification_testset/scenes_stereo_testset.zip',
                'local_package': os.path.join(self.local_path, 'scenes_stereo_testset.zip'),
                'local_audio_path': os.path.join(self.local_path),
            }
        ]

    def _after_extract(self, to_return=None):
        if not self.meta_container.exists():
            meta_data = MetaDataContainer()
            for file in self.audio_files:
                meta_data.append(MetaDataItem({
                    'file': os.path.split(file)[1],
                    'scene_label': os.path.splitext(os.path.split(file)[1])[0][:-2]
                }))
            self.meta_container.update(meta_data)
            self.meta_container.save()

        all_folds_found = True
        for fold in range(1, self.crossvalidation_folds):
            if not os.path.isfile(self._get_evaluation_setup_filename(setup_part='train', fold=fold)):
                all_folds_found = False
            if not os.path.isfile(self._get_evaluation_setup_filename(setup_part='test', fold=fold)):
                all_folds_found = False

        if not all_folds_found:
            if not os.path.isdir(self.evaluation_setup_path):
                os.makedirs(self.evaluation_setup_path)

            classes = self.meta.slice_field('scene_label')
            files = numpy.array(self.meta.slice_field('file'))

            from sklearn.model_selection import StratifiedShuffleSplit
            sss = StratifiedShuffleSplit(n_splits=self.crossvalidation_folds, test_size=0.3, random_state=0)

            fold = 1
            for train_index, test_index in sss.split(y=classes, X=classes):
                MetaDataContainer(self.meta.filter(file_list=list(files[train_index])),
                                  filename=self._get_evaluation_setup_filename(setup_part='train', fold=fold)).save()

                MetaDataContainer(self.meta.filter(file_list=list(files[test_index])).remove_field('scene_label'),
                                  filename=self._get_evaluation_setup_filename(setup_part='test', fold=fold)).save()

                MetaDataContainer(self.meta.filter(file_list=list(files[test_index])),
                                  filename=self._get_evaluation_setup_filename(setup_part='evaluate', fold=fold)).save()
                fold += 1

Important things to remember:

  • Inherit class from the correct base class: AcousticSceneDataset class for scene classification datasets, SoundEventDataset class for sound event datasets, and AudioTaggingDataset for audio tagging datasets.
  • Make sure meta.txt file is generated. This file contains all needed meta data of the dataset.
  • Make sure evaluation setup is provided with the dataset or generate your own. Make sure train and test methods will return correct data.

Adding features

Example how to extend FeatureExtractor class with zero crossing rate feature:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from dcase_framework.features import FeatureExtractor

class CustomFeatureExtractor(FeatureExtractor):
    def __init__(self, *args, **kwargs):
        kwargs['valid_extractors'] = [
            'zero_crossing_rate',
        ]
        kwargs['default_parameters'] = {
            'zero_crossing_rate': {
                'mono': True,
                'center': True,
            },
        }

        super(CustomFeatureExtractor, self).__init__(*args, **kwargs)

    def _zero_crossing_rate(self, data, params):
        """Zero crossing rate

        Parameters
        ----------
        data : numpy.ndarray
            Audio data
        params : dict
            Parameters

        Returns
        -------

        """

        import librosa

        feature_matrix = []
        for channel in range(0, data.shape[0]):
            zero_crossing_rate = librosa.feature.zero_crossing_rate(y=data[channel, :],
                                                                    frame_length=params.get('win_length_samples'),
                                                                    hop_length=params.get('hop_length_samples'),
                                                                    center=params.get('center')
                                                                    )

            zero_crossing_rate = zero_crossing_rate.reshape((-1, 1))
            feature_matrix.append(zero_crossing_rate)

        return feature_matrix

Important things to remember:

  • Decide extractor name
  • Add your extractor name to valid_extractors list
  • Add default parameters to default_parameters dict
  • Add extractor method to the class. Method name is extractor name started with underscore.

Addinng learners

Example how to extend SceneClassifier class with SVM learner:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from dcase_framework.learners import SceneClassifier
from sklearn.svm import SVC

class SceneClassifierSVM(SceneClassifier):
    """Scene classifier with SVM"""
    def __init__(self, *args, **kwargs):
        super(SceneClassifierSVM, self).__init__(*args, **kwargs)
        self.method = 'svm'

    def learn(self, data, annotations, data_filenames=None):
        """Learn based on data ana annotations

        Parameters
        ----------
        data : dict of FeatureContainers
            Feature data
        annotations : dict of MetadataContainers
            Meta data

        Returns
        -------
        self

        """

        training_files = annotations.keys()  # Collect training files
        activity_matrix_dict = self._get_target_matrix_dict(data, annotations)
        X_training = numpy.vstack([data[x].feat[0] for x in training_files])
        Y_training = numpy.vstack([activity_matrix_dict[x] for x in training_files])
        y = numpy.argmax(Y_training, axis=1)

        self['model'] = SVC(**self.learner_params).fit(X_training, y)

        return self

    def _frame_probabilities(self, feature_data):
        if hasattr(self['model'], 'predict_log_proba'):
            return self['model'].predict_log_proba(feature_data).T
        elif hasattr(self['model'], 'predict_proba'):
            return self['model'].predict_proba(feature_data).T
        else:
            message = '{name}: Train model with probability flag [True].'.format(
                name=self.__class__.__name__
            )
            self.logger.exception(message)
            raise AssertionError(message)

Important things to remember:

  • Inherit class from the correct base class: use SceneClassifier class for scene classification tasks, EventDetector class for sound event tasks.
  • Implement learn method for training
  • Implement predict method for testing, or specialize methods from base class to get frame probabilities like in the example.

Extending ApplicationCore

Example how to extend AcousticSceneClassificationAppCore class with all above extensions:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
from dcase_framework.application_core import AcousticSceneClassificationAppCore

class CustomAppCore(AcousticSceneClassificationAppCore):
    def __init__(self, *args, **kwargs):
        kwargs['Datasets'] = {
            'DCASE2013_Scene_EvaluationSet': DCASE2013_Scene_EvaluationSet,
        }
        kwargs['Learners'] = {
            'svm': SceneClassifierSVM,
        }
        kwargs['FeatureExtractor'] = CustomFeatureExtractor

        super(CustomAppCore, self).__init__(*args, **kwargs)