Refresh card: AudioLDMCAASteeringController quickstart
Browse files
README.md
CHANGED
|
@@ -7,15 +7,15 @@ tags:
|
|
| 7 |
- audioldm2
|
| 8 |
- caa
|
| 9 |
- diffusion
|
| 10 |
-
- female-vocals
|
| 11 |
- interpretability
|
| 12 |
- music
|
| 13 |
- steering
|
|
|
|
| 14 |
---
|
| 15 |
|
| 16 |
-
# CAA — `
|
| 17 |
|
| 18 |
-
Steering vectors for the **
|
| 19 |
|
| 20 |
## Quickstart
|
| 21 |
|
|
@@ -23,7 +23,7 @@ Steering vectors for the **female_vocals** concept on AudioLDM2, computed via co
|
|
| 23 |
from src.steering import SteerableAudioLDMModel, AudioLDMCAASteeringController
|
| 24 |
|
| 25 |
model = SteerableAudioLDMModel(device="cuda")
|
| 26 |
-
ctrl = AudioLDMCAASteeringController.from_pretrained("lukasz-staniszewski/audioldm2-caa-
|
| 27 |
|
| 28 |
with model.steer(ctrl):
|
| 29 |
out = model.generate(
|
|
@@ -39,7 +39,7 @@ with model.steer(ctrl):
|
|
| 39 |
{
|
| 40 |
"method": "standard_caa_audioldm",
|
| 41 |
"model": "cvssp/audioldm2-large",
|
| 42 |
-
"concept": "
|
| 43 |
"num_inference_steps": 100,
|
| 44 |
"audio_length_in_s": 10.0,
|
| 45 |
"guidance_scale": 4.5,
|
|
|
|
| 7 |
- audioldm2
|
| 8 |
- caa
|
| 9 |
- diffusion
|
|
|
|
| 10 |
- interpretability
|
| 11 |
- music
|
| 12 |
- steering
|
| 13 |
+
- vocal-gender
|
| 14 |
---
|
| 15 |
|
| 16 |
+
# CAA — `vocal_gender` (AudioLDM2)
|
| 17 |
|
| 18 |
+
Steering vectors for the **vocal_gender** concept on AudioLDM2, computed via contrastive activation addition (CAA).
|
| 19 |
|
| 20 |
## Quickstart
|
| 21 |
|
|
|
|
| 23 |
from src.steering import SteerableAudioLDMModel, AudioLDMCAASteeringController
|
| 24 |
|
| 25 |
model = SteerableAudioLDMModel(device="cuda")
|
| 26 |
+
ctrl = AudioLDMCAASteeringController.from_pretrained("lukasz-staniszewski/audioldm2-caa-vocal-gender", alpha=1.0)
|
| 27 |
|
| 28 |
with model.steer(ctrl):
|
| 29 |
out = model.generate(
|
|
|
|
| 39 |
{
|
| 40 |
"method": "standard_caa_audioldm",
|
| 41 |
"model": "cvssp/audioldm2-large",
|
| 42 |
+
"concept": "vocal_gender",
|
| 43 |
"num_inference_steps": 100,
|
| 44 |
"audio_length_in_s": 10.0,
|
| 45 |
"guidance_scale": 4.5,
|