思路:要想做一個網絡電話,基本遵循以下步驟
1、一方實時的錄音,把模擬信號轉換成數字信號;
2、把聲音實時壓縮;
3、通過網絡協議把壓縮後的數據傳輸給接收方;
4、接收方解壓縮接受到的音頻數據;
5、實時的把接收到的數字信號轉換成模擬信號並播放出來。
下面我們來看看每一步面臨的挑戰及其解決方案。
1、第一步,實時錄音,DirectxSound有錄音方面的API,托管的類分別是Microsoft.DirectX.DirectSound.CaptureDevicesCollection,Microsoft.DirectX.DirectSound.Capture和Microsoft.DirectX.DirectSound.CaptureBuffer,CaptureDevicesCollection用來枚舉本機的可用的錄音設備,Capture則表示一個錄音設備,CaptureBuffer是用來存放錄音數據的緩沖區,我們開始錄音後,音頻數據會不斷的寫入到環形的流式緩沖區,然後我們定期從緩沖區中把錄音數據取出來返回給上層應用層就可以了。關於環形的流式緩沖區,可以看參考鏈接部分。
2、聲音的壓縮是一個很難抉擇的步驟,默認的DirectSound只能播放和錄制PCM格式(WAV)的音頻數據,但這種聲音格式特別大。常用的聲音壓縮格式有h.7231,gsm,amr,h.711等等,各種壓縮算法都有自己的碼率和適用范圍。因為我們做的是互聯網的語音電話,不考慮慢速網絡和無線連接下的情況,也不用考慮終端設備的CPU能不能支持我們選用的壓縮算法,我們做的語音電話雙方都是PC機,應該什麼解壓縮算法都不會引起什麼性能上的問題,所以只要網絡快一些,選擇哪個壓縮算法都無所謂了,網上有h.711的壓縮算法,我打算就采用這個,他的碼率是64Kbps,比PCM的1.544Mbps和2.048Mbps要小的多。然後我們進行了音頻數據壓縮後,還可以對字節流進行GZIP或者7ZIP壓縮,前者用SharpZip,後者7zip的官方有c#的使用代碼,大家可以測試一下這兩個算法的性能後做出適合自己的決定。關於各種壓縮格式的特性可以參考我做的PPT及提供的參考鏈接。
3、網絡電話注重實時性,而把聲音從網絡上傳輸就要走IP網絡,而IP網絡不是一個等時系統,所以我們就要盡量的去模擬實時的語音傳輸,提到實時,肯定UDP比TCP要實時,因為TCP要保證傳輸的可靠性,有序性等,而專門用於實時傳輸有一個應用層協議是RTP協議,這個協議一般就是建立在UDP基礎上的,它在每個包頭提供了一些序列號、時間戳等信息,但UDP本身並不會使用這些信息,這時候就有一個RTCP協議來用這些信息進行流量控制和擁塞控制,比如說RTCP檢測到網絡擁擠,會告訴發送方變換一種低碼率的語音壓縮算法來傳輸數據。這些大多都需要自己去實現,本文的源碼沒有去實現這些,關於RTP和RTCP可以參考相關資料或者我做的PPT。
4、每個壓縮算法都有相應的解壓縮算法,呵呵。
5、播放聲音肯定也需要用到DS,也需要用到StreamBuffer,大致流程如下
1)創建一個聲音設備Microsoft.DirectX.DirectSound.Device dev = new Microsoft.DirectX.DirectSound.Device();
2)設置協調級別dev.SetCooperativeLevel(this, Microsoft.DirectX.DirectSound.CooperativeLevel.Normal);
3)創建聲音格式、緩沖區描述、及輔助緩沖區;
4)給輔助緩沖區設定通知;
5)用聲音數據填滿緩沖區;
6)播放緩沖區的聲音數據,播放到一定的通知點,通知填充線程,填充新的聲音數據;
7)循環第6步,直到沒有新的聲音數據填充到緩沖區。
具體的過程參考PPT或者具體代碼。
版權聲明:
附件源代碼裡的CaptureSound,SoundPlayer和CircularBuffer類反編譯自隨意桌面的代碼(注釋是我加的),版權歸作者所有。
PPT裡的圖片和一些文字選自一個叫做ch11-DxSound&Input2.ppt的文件,源鏈接已丟失,還有一些選擇一個叫做“SIP之 穿越NAT.ppt”的文件,網上可以搜索到,版權均歸原作者所有,源作者要是再引用別人的東西,我就不知道了。
下面看一些具體的代碼
用戶創建聲音格式
public class DirectSoundManager
{
public static WaveFormat CreateWaveFormat(int hz, short bits, short channels)
{
WaveFormat format = new WaveFormat();
//聲音的格式,通常使用WAVE_FORMAT_PCM來設定,
//因為PCM是比較常用的聲音格式。
format.FormatTag = WaveFormatTag.Pcm;
//采樣率(單位:赫茲)典型值:11025、22050、44100Hz
format.SamplesPerSecond = hz;
//每個采樣點數;8-bit或16-bit;
format.BitsPerSample = bits;
//聲道的設置,當其值為1時是單聲道,為2時是雙聲道;
format.Channels = channels;
//每個采樣點字節數
format.BlockAlign = (short)(format.Channels * (format.BitsPerSample / 8));
//平均傳輸率,每秒的數據流量
format.AverageBytesPerSecond = format.BlockAlign * format.SamplesPerSecond;
return format;
}
屬性#region 屬性
// Properties
public static WaveFormat DefaultFormat
{
get
{
return WaveFormat_8000_8_1;
}
}
public static WaveFormat WaveFormat_11025_8_1
{
get
{
return CreateWaveFormat(0x2b11, 8, 1);
}
}
public static WaveFormat WaveFormat_22050_16_2
{
get
{
return CreateWaveFormat(0x5622, 0x10, 2);
}
}
public static WaveFormat WaveFormat_44100_16_2
{
get
{
return CreateWaveFormat(0xac44, 0x10, 2);
}
}
public static WaveFormat WaveFormat_8000_8_1
{
get
{
return CreateWaveFormat(0x1f40, 8, 1);
}
}
#endregion
}
用於播放流式聲音
public class SoundPlayer : IDisposable
{
私有成員#region 私有成員
private const int MaxLatencyMs = 300;
private const int NumberRecordNotifications = 4;
private readonly CircularBuffer circularBuffer;
private readonly int m_BufferBytes;
private readonly bool m_OwnsDevice;
private readonly int notifySize;
private readonly BufferPositionNotify[] positionNotify;
private bool isRunning;
private SecondaryBuffer m_Buffer;
private Device m_Device;
private int nextWriteOffset;
private AutoResetEvent notificationEvent;
private Notify notify;
private Thread notifyThread;
#endregion
構造函數#region 構造函數
public SoundPlayer(Control owner, WaveFormat format)
: this(owner, null, format)
{
}
public SoundPlayer(Control owner, Device device, WaveFormat format)
{
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
notify = null;
notifyThread = null;
notifySize = 0;
m_Device = device;
if (m_Device == null)
{
m_Device = new Device();
m_Device.SetCooperativeLevel(owner, CooperativeLevel.Normal);
m_OwnsDevice = true;
}
// 設定通知的大小, 大小為播放一秒鐘聲音所需要的字節。這裡為什麼除以8,我不清楚
notifySize = (1024 > (format.AverageBytesPerSecond / 8)) ? (1024) : ((format.AverageBytesPerSecond / 8));
notifySize = (notifySize - (notifySize % format.BlockAlign));
m_BufferBytes = (notifySize * 4); //整體緩沖區的大小
BufferDescription desc = new BufferDescription(format);
//緩沖區具有控制音量的能力;
desc.ControlVolume = true;
//緩沖區具有控制位置的能力。
desc.ControlPositionNotify = true;
//設置緩沖區能取到當前的播放位置
desc.CanGetCurrentPosition = true;
//緩沖區不具有控制3D音效的能力;
desc.Control3D = false;
//Specifies whether the buffer supports effects processing.
desc.ControlEffects = false;
//緩沖區具有控制頻率的能力;
desc.ControlFrequency = true;
//緩沖區具有控制左右聲道的能力;
desc.ControlPan = true;
//設置是否使用全局緩存
desc.GlobalFocus = true;
//設置緩沖區大小為整個緩沖區的大小
desc.BufferBytes = m_BufferBytes;
//創建輔助緩沖區
m_Buffer = new SecondaryBuffer(desc, m_Device);
//創建環形緩沖區
circularBuffer = new CircularBuffer((m_BufferBytes * 10));
InitNotifications();
m_Buffer.Play(0, BufferPlayFlags.Looping);
}
public SoundPlayer(Control owner, int sr, short bps, short ch)
: this(owner, null, DirectSoundManager.CreateWaveFormat(sr, bps, ch))
{
}
public SoundPlayer(Control owner, Device device, int sr, short bps, short ch)
: this(owner, device, DirectSoundManager.CreateWaveFormat(sr, bps, ch))
{
}
#endregion
公開屬性#region 公開屬性
public int BitsPerSample
{
get { return m_Buffer.Format.BitsPerSample; }
}
public int Channels
{
get { return m_Buffer.Format.Channels; }
}
public Device Device
{
get { return m_Device; }
}
public int SamplingRate
{
get { return m_Buffer.Format.SamplesPerSecond; }
}
#endregion
IDisposable Members#region IDisposable Members
public void Dispose()
{
Stop();
if (m_Buffer != null)
{
m_Buffer.Dispose();
m_Buffer = null;
}
if (m_OwnsDevice && (m_Device != null))
{
m_Device.Dispose();
m_Device = null;
}
GC.SuppressFinalize(this);
}
~SoundPlayer()
{
Dispose();
}
#endregion
私有方法#region 私有方法
private void InitNotifications()
{
notifyThread = new Thread(NotifyThreadHandler);
isRunning = true;
notifyThread.IsBackground = true;
notifyThread.Start();
notificationEvent = new AutoResetEvent(false);
notify = new Notify(m_Buffer);
//把整個緩沖區分成4個緩沖區片段,每播放4分之一就會給寫線程發送一個信號
for (int i = 0; i < 4; i = (i + 1))
{
positionNotify.Offset = (((notifySize * i) + notifySize) - 1);
positionNotify.EventNotifyHandle = notificationEvent.SafeWaitHandle.DangerousGetHandle();
}
notify.SetNotificationPositions(positionNotify, 4);
nextWriteOffset = 0;
}
private void NotifyThreadHandler()
{
while (isRunning)
{
try
{
notificationEvent.WaitOne(-1, true);
Play();
}
catch (Exception)
{
}
}
}
private void Play()
{
try
{
try
{
int currentPlayPosition;
int currentWritePosition;
m_Buffer.GetCurrentPosition(out currentPlayPosition, out currentWritePosition);
//得到剛剛播放完的緩沖區片段,這個片段需要用新的數據去填充
int lockSize = (currentWritePosition - nextWriteOffset);
//todo:這裡不知道什麼時候會發生
if (lockSize < 0)
{
lockSize = (lockSize + m_BufferBytes);
}
//對齊需要填充的緩沖區片段
lockSize = (lockSize - (lockSize % notifySize));
if (0 != lockSize)
{
if (lockSize == m_BufferBytes)
{
}
byte[] data = new byte[lockSize];
if (circularBuffer.Read(data) > 0)
{
m_Buffer.Write(nextWriteOffset, data, LockFlag.None);
nextWriteOffset = (nextWriteOffset + lockSize);
//如果完整寫完一次緩沖區,那麼把寫數據指針放到緩沖區的最開始,
//因為前面設置了m_Buffer.Play(0, BufferPlayFlags.Looping);
//所以系統在播放緩沖區後會自動重新開始播放緩沖區起始處的聲音數據
nextWriteOffset = (nextWriteOffset % m_BufferBytes);
}
}
}
catch (Exception)
{
}
}
finally
{
}
}
#endregion
公開方法#region 公開方法
public void Stop()
{
isRunning = false;
if (m_Buffer != null)
{
m_Buffer.Stop();
}
}
public void Write(byte[] data)
{
try
{
Console.WriteLine("播放聲音:{0}", data.Length);
circularBuffer.Write(data);
}
catch (Exception)
{
}
}
#endregion
}
用戶錄制聲音
public class CaptureSound
{
私有成員#region 私有成員
private const int NumberRecordNotifications = 4;
private readonly BufferPositionNotify[] positionNotify;
private Capture selectedDevice;
private CaptureBuffer buffer;
private int captureBufferSize;
private string fileName;
private bool isRecording;
private int nextCaptureOffset;
private AutoResetEvent notificationEvent;
private Notify notify;
private int notifySize;
private Thread notifyThread;
private long sampleCount;
private WaveFormat selectedFormat;
private FileStream waveFile;
private BinaryWriter writer;
#endregion
構造函數#region 構造函數
public CaptureSound()
{
isRecording = false;
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
buffer = null;
fileName = string.Empty;
notify = null;
notifyThread = null;
waveFile = null;
writer = null;
captureBufferSize = 0;
nextCaptureOffset = 0;
sampleCount = 0L;
notifySize = 0;
InitializeDeviceSelector();
InitializeWaveFormatSelector();
Initialize();
}
public CaptureSound(Capture device)
{
isRecording = false;
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
buffer = null;
fileName = string.Empty;
notify = null;
notifyThread = null;
waveFile = null;
writer = null;
captureBufferSize = 0;
nextCaptureOffset = 0;
sampleCount = 0L;
notifySize = 0;
selectedDevice = device;
InitializeWaveFormatSelector();
Initialize();
}
public CaptureSound(WaveFormat waveFormat)
{
isRecording = false;
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
buffer = null;
fileName = string.Empty;
notify = null;
notifyThread = null;
waveFile = null;
writer = null;
captureBufferSize = 0;
nextCaptureOffset = 0;
sampleCount = 0L;
notifySize = 0;
selectedFormat = waveFormat;
InitializeDeviceSelector();
Initialize();
}
public CaptureSound(Capture device, WaveFormat waveFormat)
{
isRecording = false;
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
buffer = null;
fileName = string.Empty;
notify = null;
notifyThread = null;
waveFile = null;
writer = null;
captureBufferSize = 0;
nextCaptureOffset = 0;
sampleCount = 0L;
notifySize = 0;
selectedDevice = device;
selectedFormat = waveFormat;
Initialize();
}
#endregion
公開屬性#region 公開屬性
public int BufferBytes
{
get { return captureBufferSize; }
}
public string FileName
{
get { return fileName; }
set
{
fileName = value;
CreateRIFF();
}
}
public long SampleCount
{
get { return sampleCount; }
}
public WaveFormat SelectedFormat
{
get { return selectedFormat; }
}
#endregion
公開事件#region 公開事件
public event DirectSoundBufferDataEventHandler BufferData;
#endregion
私有方法#region 私有方法
private void CreateCaptureBuffer()
{
CaptureBufferDescription desc = new CaptureBufferDescription();
if (null != notify)
{
notify.Dispose();
notify = null;
}
if (null != buffer)
{
buffer.Dispose();
buffer = null;
}
if (0 != selectedFormat.Channels)
{
notifySize = (1024 > (selectedFormat.AverageBytesPerSecond / 8))
? (1024)
:
((selectedFormat.AverageBytesPerSecond / 8));
notifySize = (notifySize - (notifySize % selectedFormat.BlockAlign));
captureBufferSize = (notifySize * 4);
desc.BufferBytes = captureBufferSize;
selectedFormat.FormatTag = WaveFormatTag.Pcm;
desc.Format = selectedFormat;
buffer = new CaptureBuffer(desc, selectedDevice);
nextCaptureOffset = 0;
InitNotifications();
}
}
private void CreateRIFF()
{
waveFile = new FileStream(FileName, FileMode.Create);
writer = new BinaryWriter(waveFile);
char[] chArray = new char[] { 'R', 'I', 'F', 'F' };
char[] chArray2 = new char[] { 'W', 'A', 'V', 'E' };
char[] chArray3 = new char[] { 'f', 'm', 't', ' ' };
char[] chArray4 = new char[] { 'd', 'a', 't', 'a' };
short num = 1;
int num2 = 0x10;
int num3 = 0;
short num4 = 0;
if ((8 == selectedFormat.BitsPerSample) && (1 == selectedFormat.Channels))
{
num4 = 1;
}
else if (((8 == selectedFormat.BitsPerSample) && (2 == selectedFormat.Channels)) ||
((0x10 == selectedFormat.BitsPerSample) && (1 == selectedFormat.Channels)))
{
num4 = 2;
}
else if ((0x10 == selectedFormat.BitsPerSample) && (2 == selectedFormat.Channels))
{
num4 = 4;
}
writer.Write(chArray);
writer.Write(num3);
writer.Write(chArray2);
writer.Write(chArray3);
writer.Write(num2);
writer.Write(num);
writer.Write(selectedFormat.Channels);
writer.Write(selectedFormat.SamplesPerSecond);
writer.Write(selectedFormat.AverageBytesPerSecond);
writer.Write(num4);
writer.Write(selectedFormat.BitsPerSample);
writer.Write(chArray4);
writer.Write(0);
}
private void Initialize()
{
CreateCaptureBuffer();
}
private void InitializeDeviceSelector()
{
CaptureDevicesCollection devices = new CaptureDevicesCollection(); // 枚舉音頻捕捉設備
if (devices.Count > 0)
selectedDevice = new Capture(devices[0].DriverGuid);
else
throw new ArgumentException("無法初始化聲音設備");
}
private void InitializeWaveFormatSelector()
{
if (selectedDevice == null)
{
throw new ArgumentException("尚未設定音訊裝置,無法選擇輸出格式。");
}
selectedFormat = DirectSoundManager.DefaultFormat;
}
private void InitNotifications()
{
if (null == buffer)
{
throw new NullReferenceException();
}
if (null == notifyThread)
{
isRecording = true;
notifyThread = new Thread(WaitThread);
notifyThread.IsBackground = true;
notifyThread.Start();
notificationEvent = new AutoResetEvent(false);
}
for (int i = 0; i < 4; i++)
{
positionNotify.Offset = (((notifySize * i) + notifySize) - 1);
positionNotify.EventNotifyHandle = notificationEvent.SafeWaitHandle.DangerousGetHandle();
}
notify = new Notify(buffer);
notify.SetNotificationPositions(positionNotify, 4);
}
private void OnBufferData(object sender, DirectSoundBufferDataEventArgs e)
{
if (BufferData != null)
{
BufferData(sender, e);
}
}
private void RecordCapturedData()
{
byte[] data = null;
try
{
try
{
int currentPlayPosition;
int currentWritePosition;
buffer.GetCurrentPosition(out currentWritePosition, out currentPlayPosition);
int lockSize = (currentPlayPosition - nextCaptureOffset);
if (lockSize < 0)
{
lockSize = (lockSize + captureBufferSize);
}
lockSize = (lockSize - (lockSize % notifySize));
if (0 != lockSize)
{
data = (byte[])buffer.Read(nextCaptureOffset, typeof(byte), LockFlag.None, new int[] { lockSize });
OnBufferData(this, new DirectSoundBufferDataEventArgs(data));
if (writer != null)
{
writer.Write(data, 0, data.Length);
}
sampleCount = (sampleCount + data.Length);
nextCaptureOffset = (nextCaptureOffset + data.Length);
nextCaptureOffset = (nextCaptureOffset % captureBufferSize);
}
}
catch (Exception)
{
}
}
finally
{
data = null;
}
}
private void WaitThread()
{
while (isRecording)
{
try
{
notificationEvent.WaitOne(-1, true);
RecordCapturedData();
}
catch (Exception)
{
}
}
}
private void StartOrStopRecord(bool StartRecording)
{
if (StartRecording)
{
isRecording = true;
CreateCaptureBuffer();
buffer.Start(true);
}
else
{
isRecording = false;
buffer.Stop();
RecordCapturedData();
if (writer != null)
{
writer.Seek(4, SeekOrigin.Begin);
writer.Write(((int)(sampleCount + 0x24L)));
writer.Seek(40, SeekOrigin.Begin);
writer.Write(sampleCount);
writer.Close();
writer = null;
waveFile = null;
}
}
}
#endregion
公開方法#region 公開方法
public void Pause()
{
buffer.Stop();
}
public void Resume()
{
buffer.Start(true);
}
public void Start()
{
StartOrStopRecord(true);
}
public void Stop()
{
StartOrStopRecord(false);
notifyThread = null;
nextCaptureOffset = 0;
sampleCount = 0L;
}
#endregion
}
本文配套源碼