#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Metrics
-------
Main functions:
* :func:`sed_eval.audio_tag.AudioTaggingMetrics.evaluate`: Calculate intermediate values for evaluation and accumulate them.
* :func:`sed_eval.audio_tag.AudioTaggingMetrics.results`: Calculate and return all metrics.
* :func:`sed_eval.audio_tag.AudioTaggingMetrics.results_overall_metrics`: Calculate and return overall metrics (micro-averaged).
* :func:`sed_eval.audio_tag.AudioTaggingMetrics.results_class_wise_metrics`: Calculate and return class-wise metrics.
* :func:`sed_eval.audio_tag.AudioTaggingMetrics.results_class_wise_average_metrics`: Calculate and return class-wise average metrics (macro-averaged).
Function :func:`sed_eval.audio_tag.AudioTaggingMetrics.evaluate` takes as a parameter tag lists,
(use :func:`dcase_util.containers.MetaDataContainer` to read them from a file), and probability lists (use :func:`dcase_util.containers.ProbabilityContainer` to read them from a file).
Usage example:
.. code-block:: python
:linenos:
import sed_eval
import dcase_util
reference_tag_list = dcase_util.containers.MetaDataContainer([
{
'filename': 'test1.wav',
'tags': 'cat,dog'
},
{
'filename': 'test2.wav',
'tags': 'dog'
},
{
'filename': 'test3.wav',
'tags': 'bird,cat'
},
{
'filename': 'test4.wav',
'tags': 'cat'
},
{
'filename': 'test5.wav',
'tags': 'bird,speech'
},
{
'filename': 'test6.wav',
'tags': 'dog,speech'
},
{
'filename': 'test7.wav',
'tags': 'speech'
},
])
estimated_tag_probabilities = dcase_util.containers.ProbabilityContainer([
{
'filename': 'test1.wav',
'label': 'bird',
'probability': 0.2
},
{
'filename': 'test1.wav',
'label': 'cat',
'probability': 0.99
},
{
'filename': 'test1.wav',
'label': 'dog',
'probability': 0.88
},
{
'filename': 'test1.wav',
'label': 'speech',
'probability': 0.01
},
{
'filename': 'test2.wav',
'label': 'bird',
'probability': 0.1
},
{
'filename': 'test2.wav',
'label': 'cat',
'probability': 0.3
},
{
'filename': 'test2.wav',
'label': 'dog',
'probability': 0.8
},
{
'filename': 'test2.wav',
'label': 'speech',
'probability': 0.1
},
{
'filename': 'test3.wav',
'label': 'bird',
'probability': 0.7
},
{
'filename': 'test3.wav',
'label': 'cat',
'probability': 0.6
},
{
'filename': 'test3.wav',
'label': 'dog',
'probability': 0.4
},
{
'filename': 'test3.wav',
'label': 'speech',
'probability': 0.3
},
{
'filename': 'test4.wav',
'label': 'bird',
'probability': 0.323
},
{
'filename': 'test4.wav',
'label': 'cat',
'probability': 0.6
},
{
'filename': 'test4.wav',
'label': 'dog',
'probability': 0.56
},
{
'filename': 'test4.wav',
'label': 'speech',
'probability': 0.4
},
{
'filename': 'test5.wav',
'label': 'bird',
'probability': 0.8
},
{
'filename': 'test5.wav',
'label': 'cat',
'probability': 0.7
},
{
'filename': 'test5.wav',
'label': 'dog',
'probability': 0.45
},
{
'filename': 'test5.wav',
'label': 'speech',
'probability': 0.43
},
{
'filename': 'test6.wav',
'label': 'bird',
'probability': 0.9
},
{
'filename': 'test6.wav',
'label': 'cat',
'probability': 0.53
},
{
'filename': 'test6.wav',
'label': 'dog',
'probability': 0.83
},
{
'filename': 'test6.wav',
'label': 'speech',
'probability': 0.95
},
{
'filename': 'test7.wav',
'label': 'bird',
'probability': 0.2
},
{
'filename': 'test7.wav',
'label': 'cat',
'probability': 0.2
},
{
'filename': 'test7.wav',
'label': 'dog',
'probability': 0.89
},
{
'filename': 'test7.wav',
'label': 'speech',
'probability': 0.45
},
])
estimated_tag_list = dcase_util.containers.MetaDataContainer()
for file in estimated_tag_probabilities.unique_files:
k = estimated_tag_probabilities.filter(filename=file)
tags = []
for item in k:
if item.probability > 0.5:
tags.append(item.label)
estimated_tag_list.append(
{
'filename': file,
'tags': tags
}
)
tag_evaluator = sed_eval.audio_tag.AudioTaggingMetrics(
tags=reference_tag_list.unique_tags
)
tag_evaluator.evaluate(
reference_tag_list=reference_tag_list,
estimated_tag_list=estimated_tag_list,
estimated_tag_probabilities=estimated_tag_probabilities
)
print(tag_evaluator)
.. autosummary::
:toctree: generated/
AudioTaggingMetrics
AudioTaggingMetrics.evaluate
AudioTaggingMetrics.results
AudioTaggingMetrics.results_overall_metrics
AudioTaggingMetrics.results_class_wise_metrics
AudioTaggingMetrics.results_class_wise_average_metrics
AudioTaggingMetrics.result_report_parameters
AudioTaggingMetrics.result_report_class_wise
AudioTaggingMetrics.result_report_class_wise_average
AudioTaggingMetrics.reset
"""
from __future__ import absolute_import
import numpy
from . import metric
import dcase_util
[docs]class AudioTaggingMetrics:
[docs] def __init__(self, tags=None):
self.tag_label_list = tags
self.overall = {
'Ncorr': 0.0,
'Nref': 0.0,
'Nsys': 0.0,
'Ntp': 0.0,
'Ntn': 0.0,
'Nfp': 0.0,
'Nfn': 0.0,
}
self.tag_wise = {}
self.y_true = {}
self.y_pred = {}
self.y_pred_score = {}
for label in self.tag_label_list:
self.tag_wise[label] = {
'Nref': 0.0,
'Nsys': 0.0,
'Ntp': 0.0,
'Ntn': 0.0,
'Nfp': 0.0,
'Nfn': 0.0,
}
self.y_true[label] = []
self.y_pred[label] = []
self.y_pred_score[label] = []
self.ui = dcase_util.ui.FancyStringifier()
def __str__(self):
"""Print result reports"""
output = self.ui.section_header('Audio tagging metrics') + '\n'
output += self.result_report_parameters() + '\n'
output += self.result_report_overall() + '\n'
output += self.result_report_class_wise_average() + '\n'
output += self.result_report_class_wise() + '\n'
return output
[docs] def evaluate(self, reference_tag_list, estimated_tag_list=None, estimated_tag_probabilities=None):
"""Evaluate estimated against reference
Parameters
----------
reference_tag_list : list of dict or dcase_util.containers.MetaDataContainer
Reference tag list
estimated_tag_list : list of dict or dcase_util.containers.MetaDataContainer
Estimated tag list
estimated_tag_probabilities : list of dict or dcase_util.containers.ProbabilityContainer
Estimated tag probabilities
Returns
-------
self
"""
if estimated_tag_list is None and estimated_tag_probabilities is None:
raise ValueError("Nothing to evaluate, give at least estimated_tag_list or estimated_tag_probabilities")
# Make sure reference_tag_list is dcase_util.containers.MetaDataContainer
if not isinstance(reference_tag_list, dcase_util.containers.MetaDataContainer):
reference_tag_list = dcase_util.containers.MetaDataContainer(reference_tag_list)
# Make sure estimated_tag_list is dcase_util.containers.MetaDataContainer
if estimated_tag_list is not None:
if not isinstance(estimated_tag_list, dcase_util.containers.MetaDataContainer):
estimated_tag_list = dcase_util.containers.MetaDataContainer(estimated_tag_list)
# Make sure estimated_tag_probabilities is dcase_util.containers.ProbabilityContainer
if estimated_tag_probabilities is not None:
if not isinstance(estimated_tag_probabilities, dcase_util.containers.ProbabilityContainer):
estimated_tag_probabilities = dcase_util.containers.ProbabilityContainer(estimated_tag_probabilities)
y_true = []
y_pred = []
# Go though reference and estimated list label by label, and file by file
for label in self.tag_label_list:
for filename in reference_tag_list.unique_files:
reference_item = reference_tag_list.filter(filename=filename)[0]
# Populate y_true based on reference_item
if label in reference_item.tags:
self.y_true[label].append(1)
y_true.append(1)
else:
self.y_true[label].append(0)
y_true.append(0)
if estimated_tag_list is not None:
# Evaluate based on estimated tags
estimated_item = estimated_tag_list.filter(filename=filename)[0]
if not estimated_item:
raise ValueError(
"Not all reference files estimated, please check [{file}]".format(
file=filename
)
)
# Store nref
if label in reference_item.tags:
self.tag_wise[label]['Nref'] += 1
# Populate y_pred based estimated_item
if label in estimated_item.tags:
self.y_pred[label].append(1)
y_pred.append(1)
self.tag_wise[label]['Nsys'] += 1
else:
self.y_pred[label].append(0)
y_pred.append(0)
# Accumulate intermediate values
# True positives (TP)
if label in reference_item.tags and label in estimated_item.tags:
self.tag_wise[label]['Ntp'] += 1
# True negatives (TN)
if label not in reference_item.tags and label not in estimated_item.tags:
self.tag_wise[label]['Ntn'] += 1
# False positives (FP)
if label not in reference_item.tags and label in estimated_item.tags:
self.tag_wise[label]['Nfp'] += 1
# False negatives (FN)
if label in reference_item.tags and label not in estimated_item.tags:
self.tag_wise[label]['Nfn'] += 1
if estimated_tag_probabilities is not None:
# Evaluate based on per tag probabilities
estimated_item = estimated_tag_probabilities.filter(filename=filename, label=label)[0]
self.y_pred_score[label].append(float(estimated_item['probability']))
if estimated_tag_list is not None:
# Evaluate based on estimated tags
self.overall['Nref'] += sum(y_true)
self.overall['Nsys'] += sum(y_pred)
y_true = numpy.array(y_true)
y_pred = numpy.array(y_pred)
self.overall['Ntp'] += sum(y_pred + y_true > 1)
self.overall['Ntn'] += sum(y_pred + y_true == 0)
self.overall['Nfp'] += sum(y_pred - y_true > 0)
self.overall['Nfn'] += sum(y_true - y_pred > 0)
return self
[docs] def reset(self):
"""Reset internal state
"""
self.overall = {
'Nref': 0.0,
'Nsys': 0.0,
'Ntp': 0.0,
'Ntn': 0.0,
'Nfp': 0.0,
'Nfn': 0.0,
}
self.tag_wise = {}
for label in self.tag_label_list:
self.tag_wise[label] = {
'Nref': 0.0,
'Nsys': 0.0,
'Ntp': 0.0,
'Ntn': 0.0,
'Nfp': 0.0,
'Nfn': 0.0,
}
return self
# Results
[docs] def results(self):
"""All metrics
Returns
-------
dict
results in a dictionary format
"""
results = {
'overall': self.results_overall_metrics(),
'class_wise': self.results_class_wise_metrics(),
'class_wise_average': self.results_class_wise_average_metrics(),
}
return results
[docs] def results_overall_metrics(self):
"""Overall metrics
Returns
-------
dict
results in a dictionary format
"""
# F-measure
if self.overall['Nref'] > 0:
precision = metric.precision(
Ntp=self.overall['Ntp'],
Nsys=self.overall['Nsys']
)
recall = metric.recall(
Ntp=self.overall['Ntp'],
Nref=self.overall['Nref']
)
f_measure = metric.f_measure(
precision=precision,
recall=recall
)
else:
precision = None
recall = None
f_measure = None
y_true = []
y_score = []
for tag_id, tag_label in enumerate(self.tag_label_list):
y_true += self.y_true[tag_label]
y_score += self.y_pred_score[tag_label]
if y_score:
eer = metric.equal_error_rate(
y_true=y_true,
y_score=y_score
)
else:
eer = None
return {
'count': {
'Nref': self.overall['Nref'],
'Nsys': self.overall['Nsys'],
},
'f_measure': {
'f_measure': f_measure,
'precision': precision,
'recall': recall,
},
'eer': {
'eer': eer
}
}
[docs] def results_class_wise_metrics(self):
"""Class-wise metrics
Returns
-------
dict
results in a dictionary format
"""
results = {}
for tag_id, tag_label in enumerate(self.tag_label_list):
if tag_label not in results:
results[tag_label] = {}
# Counts
results[tag_label]['count'] = {
'Nref': self.tag_wise[tag_label]['Nref'],
'Nsys': self.tag_wise[tag_label]['Nsys'],
'Ntp': self.tag_wise[tag_label]['Ntp'],
'Ntn': self.tag_wise[tag_label]['Ntn'],
'Nfp': self.tag_wise[tag_label]['Nfp'],
'Nfn': self.tag_wise[tag_label]['Nfn'],
}
# Equal error rate
if self.y_pred_score[tag_label]:
results[tag_label]['eer'] = {
'eer': metric.equal_error_rate(
y_true=self.y_true[tag_label],
y_score=self.y_pred_score[tag_label]
)
}
else:
results[tag_label]['eer'] = {
'eer': None
}
# F-measure
if self.tag_wise[tag_label]['Nref'] > 0:
precision = metric.precision(
Ntp=self.tag_wise[tag_label]['Ntp'],
Nsys=self.tag_wise[tag_label]['Nsys']
)
recall = metric.recall(
Ntp=self.tag_wise[tag_label]['Ntp'],
Nref=self.tag_wise[tag_label]['Nref']
)
f_measure = metric.f_measure(precision=precision, recall=recall)
else:
precision = None
recall = None
f_measure = None
results[tag_label]['f_measure'] = {
'f_measure': f_measure,
'precision': precision,
'recall': recall,
}
return results
[docs] def results_class_wise_average_metrics(self):
"""Class-wise averaged metrics
Returns
-------
dict
results in a dictionary format
"""
class_wise_results = self.results_class_wise_metrics()
class_wise_eer = []
class_wise_fmeasure = []
class_wise_precision = []
class_wise_recall = []
for class_label in class_wise_results:
if class_wise_results[class_label]['eer']['eer'] is not None:
class_wise_eer.append(class_wise_results[class_label]['eer']['eer'])
if class_wise_results[class_label]['f_measure']['f_measure'] is not None:
class_wise_fmeasure.append(class_wise_results[class_label]['f_measure']['f_measure'])
class_wise_precision.append(class_wise_results[class_label]['f_measure']['precision'])
class_wise_recall.append(class_wise_results[class_label]['f_measure']['recall'])
if class_wise_eer:
eer = float(numpy.nanmean(class_wise_eer))
else:
eer = None
if class_wise_fmeasure:
f_measure = float(numpy.nanmean(class_wise_fmeasure))
else:
f_measure = None
if class_wise_precision:
precision = float(numpy.nanmean(class_wise_precision))
else:
precision = None
if class_wise_recall:
recall = float(numpy.nanmean(class_wise_recall))
else:
recall = None
return {
'eer': {
'eer': eer
},
'f_measure': {
'f_measure': f_measure,
'precision': precision,
'recall': recall,
}
}
# Reports
[docs] def result_report_parameters(self):
"""Report metric parameters
Returns
-------
str
result report in string format
"""
output = self.ui.data(field='Tags', value=len(self.tag_label_list)) + '\n'
output += self.ui.data(field='Evaluated units', value=int(self.overall['Nref'])) + '\n'
return output
def result_report_overall(self):
"""Report overall results
Returns
-------
str
result report in string format
"""
results = self.results_overall_metrics()
output = self.ui.section_header('Overall metrics (micro-average)', indent=2) + '\n'
if 'f_measure' in results and results['f_measure']:
if results['f_measure']['f_measure'] is not None:
f_measure = results['f_measure']['f_measure'] * 100
else:
f_measure = None
if results['f_measure']['precision'] is not None:
precision = results['f_measure']['precision'] * 100
else:
precision = None
if results['f_measure']['recall'] is not None:
recall = results['f_measure']['recall'] * 100
else:
recall = None
output += self.ui.line('F-measure', indent=2) + '\n'
output += self.ui.data(field='F-measure (F1)', value=f_measure, unit='%', indent=4) + '\n'
output += self.ui.data(field='Precision', value=precision, unit='%', indent=4) + '\n'
output += self.ui.data(field='Recall', value=recall, unit='%', indent=4) + '\n'
if 'eer' in results and results['eer']:
if results['eer']['eer'] is not None:
eer = results['eer']['eer'] * 100
else:
eer = None
output += self.ui.line('Equal error rate', indent=2) + '\n'
output += self.ui.data(field='Equal error rate (EER)', value=eer, unit='%', indent=4) + '\n'
return output
[docs] def result_report_class_wise_average(self):
"""Report class-wise averages
Returns
-------
str
result report in string format
"""
results = self.results_class_wise_average_metrics()
output = self.ui.section_header('Class-wise average metrics (macro-average)', indent=2) + '\n'
if 'f_measure' in results and results['f_measure']:
if results['f_measure']['f_measure'] is not None:
f_measure = results['f_measure']['f_measure'] * 100
else:
f_measure = None
if results['f_measure']['precision'] is not None:
precision = results['f_measure']['precision'] * 100
else:
precision = None
if results['f_measure']['recall'] is not None:
recall = results['f_measure']['recall'] * 100
else:
recall = None
output += self.ui.line('F-measure', indent=2) + '\n'
output += self.ui.data(field='F-measure (F1)', value=f_measure, unit='%', indent=4) + '\n'
output += self.ui.data(field='Precision', value=precision, unit='%', indent=4) + '\n'
output += self.ui.data(field='Recall', value=recall, unit='%', indent=4) + '\n'
if 'eer' in results and results['eer']:
if results['eer']['eer'] is not None:
eer = results['eer']['eer'] * 100
else:
eer = None
output += self.ui.line('Equal error rate', indent=2) + '\n'
output += self.ui.data(field='Equal error rate (EER)', value=eer, unit='%', indent=4) + '\n'
return output
[docs] def result_report_class_wise(self):
"""Report class-wise results
Returns
-------
str
result report in string format
"""
results = self.results_class_wise_metrics()
output = self.ui.section_header('Class-wise metrics', indent=2) + '\n'
output += self.ui.row(
'Tag', 'Nref', 'Nsys', 'F-score', 'Pre', 'Rec', 'EER',
widths=[20, 12, 12, 12, 12, 12, 12],
separators=[True, False, True, False, False, True, False],
indent=4
) + '\n'
output += self.ui.row('-', '-', '-', '-', '-', '-', '-') + '\n'
for tag_label in self.tag_label_list:
if results[tag_label]['f_measure']['f_measure'] is not None:
f_measure = results[tag_label]['f_measure']['f_measure'] * 100
else:
f_measure = None
if results[tag_label]['f_measure']['precision'] is not None:
precision = results[tag_label]['f_measure']['precision'] * 100
else:
precision = None
if results[tag_label]['f_measure']['recall'] is not None:
recall = results[tag_label]['f_measure']['recall'] * 100
else:
recall = None
if results[tag_label]['eer']['eer'] is not None:
eer = results[tag_label]['eer']['eer'] * 100
else:
eer = None
output += self.ui.row(
tag_label,
results[tag_label]['count']['Nref'],
results[tag_label]['count']['Nsys'],
f_measure,
precision,
recall,
eer,
types=['str', 'int', 'int', 'float1_percentage', 'float1', 'float1', 'float1_percentage']
) + '\n'
return output