# Import necessary libraries
from fastai.vision.all import Path, get_files
import soundfile as sf
import librosa as lb
import librosa.display as lbd
from IPython.display import Audio
from soundfile import SoundFile
import numpy as np
import matplotlib.pyplot as plt
import random
import os
import pandas as pd
from fastbook import *
from IPython.display import Image, display, Audio, Markdown
import plotly.express as px
The AIM of this post is to understand and analyze the provided data in the competitions.
So far, you must have heard about BirdCLEF 2024
competition. If not, you can check out below references to know about the Competition.
Note: If you want to run the notebook and experiment here is the link: BirdCLIF EDA
# Set config
class Config:
= 32000
sampling_rate = 5
duration = 0
fmin = None
fmax = Path("../data/train_audio")
audios_path = Path("specs/train")
out_dir_train = Path("specs/valid") out_dir_valid
Some utility functions
# Get info of the audio file
def get_audio_info(filepath):
"""Get some properties from an audio file"""
with SoundFile(filepath) as f:
= f.samplerate
sr = f.frames
frames = float(frames)/sr
duration return {"frames": frames, "sr": sr, "duration": duration}
# Compute the spectogram of the audio file
def compute_melspec(y, sr, n_mels, fmin, fmax):
"""
Computes a mel-spectrogram and puts it at decibel scale
Arguments:
y {np array} -- signal
params {AudioParams} -- Parameters to use for the spectrogram. Expected to have the attributes sr, n_mels, f_min, f_max
Returns:
np array -- Mel-spectrogram
"""
= lb.feature.melspectrogram(
melspec =y, sr=sr, n_mels=n_mels, fmin=fmin, fmax=fmax,
y
)
= lb.power_to_db(melspec).astype(np.float32)
melspec return melspec
def mono_to_color(X, eps=1e-6, mean=None, std=None):
= mean or X.mean()
mean = std or X.std()
std = (X - mean) / (std + eps)
X
= X.min(), X.max()
_min, _max
if (_max - _min) > eps:
= np.clip(X, _min, _max)
V = 255 * (V - _min) / (_max - _min)
V = V.astype(np.uint8)
V else:
= np.zeros_like(X, dtype=np.uint8)
V
return V
= Config.sampling_rate, 128, Config.fmin, Config.fmax
sr, n_mels, fmin, fmax def audio_to_image(audio):
= compute_melspec(audio, sr=sr, n_mels = n_mels, fmin=fmin, fmax=fmax)
melspec = mono_to_color(melspec)
image return image
= Path("../data/")
path = get_files(path / "train_audio", extensions=".ogg")
audio_files print(f"Found {len(audio_files)} audio files")
Found 24459 audio files
So we we 24459 audios of different length in the training data
Lets hear some audio and see their spectrogram to get some glimps
# take a random sample
= random.choice(audio_files)
audio_path = get_audio_info(audio_path)
info print(info)
# Convert to spectrogram
= sf.read(audio_path)
audio, sr = audio_to_image(audio)
img
# show spectrogra
plt.imshow(img)
plt.show()
# play audio
= lb.load(audio_path)
y, sr =sr) Audio(y, rate
{'frames': 2066390, 'sr': 32000, 'duration': 64.5746875}
# take a random sample
= random.choice(audio_files)
audio_path = get_audio_info(audio_path)
info print(info)
# Convert to spectrogram
= sf.read(audio_path)
audio, sr = audio_to_image(audio)
img
# show spectrogra
plt.imshow(img)
plt.show()
# play audio
= lb.load(audio_path)
y, sr =sr) Audio(y, rate
{'frames': 906971, 'sr': 32000, 'duration': 28.34284375}
# take a random sample
= random.choice(audio_files)
audio_path = get_audio_info(audio_path)
info print(info)
# Convert to spectrogram
= sf.read(audio_path)
audio, sr = audio_to_image(audio)
img
# show spectrogra
plt.imshow(img)
plt.show()
# play audio
= lb.load(audio_path)
y, sr =sr) Audio(y, rate
{'frames': 301760, 'sr': 32000, 'duration': 9.43}
# take a random sample
= random.choice(audio_files)
audio_path = get_audio_info(audio_path)
info print(info)
# Convert to spectrogram
= sf.read(audio_path)
audio, sr = audio_to_image(audio)
img
# show spectrogra
plt.imshow(img)
plt.show()
# play audio
= lb.load(audio_path)
y, sr =sr) Audio(y, rate
{'frames': 1496832, 'sr': 32000, 'duration': 46.776}
# take a random sample
= random.choice(audio_files)
audio_path = get_audio_info(audio_path)
info print(info)
# Convert to spectrogram
= sf.read(audio_path)
audio, sr = audio_to_image(audio)
img
# show spectrogra
plt.imshow(img)
plt.show()
# play audio
= lb.load(audio_path)
y, sr =sr) Audio(y, rate
{'frames': 1823232, 'sr': 32000, 'duration': 56.976}
You will find that the bird sound is brighter on the spectrogram
Since sample rate
is 32000, so a 56.976s audio when loaded using python it will become a array of length 56.976 * 32000 = 1823232
which is number of frame.
If you want to dig deepler into how sound is represented digitally - check this blog
Lets study the metadata to get more insights
= pd.read_csv('../data/train_metadata.csv')
df df.shape
(24459, 12)
df.head()
primary_label | secondary_labels | type | latitude | longitude | scientific_name | common_name | author | license | rating | url | filename | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | asbfly | [] | ['call'] | 39.2297 | 118.1987 | Muscicapa dauurica | Asian Brown Flycatcher | Matt Slaymaker | Creative Commons Attribution-NonCommercial-ShareAlike 3.0 | 5.0 | https://www.xeno-canto.org/134896 | asbfly/XC134896.ogg |
1 | asbfly | [] | ['song'] | 51.4030 | 104.6401 | Muscicapa dauurica | Asian Brown Flycatcher | Magnus Hellström | Creative Commons Attribution-NonCommercial-ShareAlike 3.0 | 2.5 | https://www.xeno-canto.org/164848 | asbfly/XC164848.ogg |
2 | asbfly | [] | ['song'] | 36.3319 | 127.3555 | Muscicapa dauurica | Asian Brown Flycatcher | Stuart Fisher | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 | 2.5 | https://www.xeno-canto.org/175797 | asbfly/XC175797.ogg |
3 | asbfly | [] | ['call'] | 21.1697 | 70.6005 | Muscicapa dauurica | Asian Brown Flycatcher | vir joshi | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 | 4.0 | https://www.xeno-canto.org/207738 | asbfly/XC207738.ogg |
4 | asbfly | [] | ['call'] | 15.5442 | 73.7733 | Muscicapa dauurica | Asian Brown Flycatcher | Albert Lastukhin & Sergei Karpeev | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 | 4.0 | https://www.xeno-canto.org/209218 | asbfly/XC209218.ogg |
df.primary_label.nunique()
182
There are total 182 unique birds sounds in the competitions
= df['primary_label'].value_counts() value_counts
Number of sample available for each bird
# Plotting only the top N values
= value_counts.head(50) # Adjust N as needed
top_n ='bar', figsize=(20, 6))
top_n.plot(kind
'Top N Value Counts of column_name')
plt.title('Unique Values')
plt.xlabel('Counts')
plt.ylabel(=45)
plt.xticks(rotation plt.show()
# Plotting only the bottom N values
= value_counts.tail(50) # Adjust N as needed
top_n ='bar', figsize=(20, 6))
top_n.plot(kind
'Top N Value Counts of column_name')
plt.title('Unique Values')
plt.xlabel('Counts')
plt.ylabel(=45)
plt.xticks(rotation plt.show()
For few birds 500 samples are present while for some there are only 5
Destribution of bird on the global map
= px.scatter_mapbox(df, lat='latitude', lon='longitude', color='primary_label',
fig ='primary_label', hover_data=['latitude', 'longitude'],
hover_name='Geographical Distribution of Bird Species',
title=1, height=600)
zoom="open-street-map")
fig.update_layout(mapbox_style fig.show()
Data contain sound of all over the world, but there are two huge cluster on Asia and Europe.
Lets study about few birds
= "yebbab1"
name = df.loc[df['primary_label'] == name]
temp print(f"total number of bird in the dataset: {len(temp)}")
# Download some images of the bird
= px.scatter_mapbox(temp, lat='latitude', lon='longitude', color='primary_label',
fig ='primary_label', hover_data=['latitude', 'longitude'],
hover_name='Geographical Distribution of Bird Species',
title=1, height=600)
zoom="open-street-map")
fig.update_layout(mapbox_style
fig.show()
# Assuming 'temp' is a DataFrame with bird data
= random.randint(0, len(temp)-1)
idx = temp.iloc[idx]
entry
= entry['filename']
filename = entry['scientific_name']
scientific_name = entry['common_name']
common_name = search_images_ddg(common_name, max_images=1)
urls
# Display bird information
f"### Bird Information"))
display(Markdown(f"**Scientific Name:** {scientific_name}"))
display(Markdown(f"**Common Name:** {common_name}"))
display(Markdown(=urls[0], width=300, height=300))
display(Image(url
# Audio information
= os.path.join(Config.audios_path, filename)
audio_path = get_audio_info(audio_path)
info f"### Audio Information"))
display(Markdown(print(f"Audio Info: {info} \n")
# Audio Spectrogram
f"### Audio Spectrogram"))
display(Markdown(= sf.read(audio_path)
audio, sr = audio_to_image(audio)
img
plt.imshow(img)'off') # Optional: Hide axis
plt.axis(
plt.show()
# Play audio
= lb.load(audio_path)
y, sr =sr)) display(Audio(y, rate
total number of bird in the dataset: 28
Audio Info: {'frames': 354048, 'sr': 32000, 'duration': 11.064}
Bird Information
Scientific Name: Argya affinis
Common Name: Yellow-billed Babbler
Audio Information
Audio Spectrogram
Moipig1
= "moipig1"
name = df.loc[df['primary_label'] == name]
temp print(f"total number of bird in the dataset: {len(temp)}")
# Download some images of the bird
= px.scatter_mapbox(temp, lat='latitude', lon='longitude', color='primary_label',
fig ='primary_label', hover_data=['latitude', 'longitude'],
hover_name='Geographical Distribution of Bird Species',
title=1, height=600)
zoom="open-street-map")
fig.update_layout(mapbox_style
fig.show()
# Assuming 'temp' is a DataFrame with bird data
= random.randint(0, len(temp)-1)
idx = temp.iloc[idx]
entry
= entry['filename']
filename = entry['scientific_name']
scientific_name = entry['common_name']
common_name = search_images_ddg(common_name, max_images=1)
urls
# Display bird information
f"### Bird Information"))
display(Markdown(f"**Scientific Name:** {scientific_name}"))
display(Markdown(f"**Common Name:** {common_name}"))
display(Markdown(=urls[0], width=300, height=300))
display(Image(url
# Audio information
= os.path.join(Config.audios_path, filename)
audio_path = get_audio_info(audio_path)
info f"### Audio Information"))
display(Markdown(print(f"Audio Info: {info} \n")
# Audio Spectrogram
f"### Audio Spectrogram"))
display(Markdown(= sf.read(audio_path)
audio, sr = audio_to_image(audio)
img
plt.imshow(img)'off') # Optional: Hide axis
plt.axis(
plt.show()
# Play audio
= lb.load(audio_path)
y, sr =sr)) display(Audio(y, rate
total number of bird in the dataset: 27
Audio Info: {'frames': 589322, 'sr': 32000, 'duration': 18.4163125}
Bird Information
Scientific Name: Ducula badia
Common Name: Mountain Imperial-Pigeon
Audio Information
Audio Spectrogram
= "integr"
name = df.loc[df['primary_label'] == name]
temp print(f"total number of bird in the dataset: {len(temp)}")
# Download some images of the bird
= px.scatter_mapbox(temp, lat='latitude', lon='longitude', color='primary_label',
fig ='primary_label', hover_data=['latitude', 'longitude'],
hover_name='Geographical Distribution of Bird Species',
title=1, height=600)
zoom="open-street-map")
fig.update_layout(mapbox_style
fig.show()
# Assuming 'temp' is a DataFrame with bird data
= random.randint(0, len(temp)-1)
idx = temp.iloc[idx]
entry
= entry['filename']
filename = entry['scientific_name']
scientific_name = entry['common_name']
common_name = search_images_ddg(common_name, max_images=1)
urls
# Display bird information
f"### Bird Information"))
display(Markdown(f"**Scientific Name:** {scientific_name}"))
display(Markdown(f"**Common Name:** {common_name}"))
display(Markdown(=urls[0], width=300, height=300))
display(Image(url
# Audio information
= os.path.join(Config.audios_path, filename)
audio_path = get_audio_info(audio_path)
info f"### Audio Information"))
display(Markdown(print(f"Audio Info: {info} \n")
# Audio Spectrogram
f"### Audio Spectrogram"))
display(Markdown(= sf.read(audio_path)
audio, sr = audio_to_image(audio)
img
plt.imshow(img)'off') # Optional: Hide axis
plt.axis(
plt.show()
# Play audio
= lb.load(audio_path)
y, sr =sr)) display(Audio(y, rate
total number of bird in the dataset: 5
Audio Info: {'frames': 150465, 'sr': 32000, 'duration': 4.70203125}
Bird Information
Scientific Name: Ardea intermedia
Common Name: Intermediate Egret