https://huggingface.co/kojima-r/wav2vec2-base-birddb-small

import librosa
import torch
from transformers import Wav2Vec2ForPreTraining,Wav2Vec2Processor

sound_file = 'sample.wav'
sound_data,_ = librosa.load(sound_file, sr=16000)

model_id = "kojima-lab/Cd_kahakuDB"
model = Wav2Vec2ForPreTraining.from_pretrained(model_id)

result=model(torch.tensor([sound_data]))
hidden_vecs=result.projected_states

PCA UMAP PCA UMAP

H: red, L: green

  1. 新潟県妙高高原 #ff0000, red
  2. 埼玉県比企丘陵 #ff8000, orange
  3. 伊豆諸島新島 #804000, brown
  4. 伊豆諸島三宅島 #ffff00, yellow
  5. 奄美諸島喜界島 #00ff00, green
  6. 小笠原諸島母島 #008000, dark green
  7. 茨城県つくば市 #00ffff, light blue
  8. 小笠原諸島父島 #0000ff, blue
  9. 伊豆諸島八丈島 #000080, dark blue
  10. 南大東島 #8000ff, purple
  11. 北海道網走周辺 #ff00ff, pink