Here is some sample code for 16i16o using 4b ports. There are many disclaimers...
- This is NOT thoroughly tested. It seems to work OK in simulation at 96kHz with 25MHz MCLK (a bit higher than 24.576MHz) on loopback with 100, 83.33, 80, 75MHz core performance. It fails at 71.4MHz. You can call set_core_high_priority_on() before audio(c_audio, null, null, i_gpio); in main.xc to get 1/5 of the core speed.
- The core needs to be overclocked to 800MHz to get 192kHz to work, so no really chance at 500MHz. Maybe hand optimised ASM but that's a BIG project
- It has not been tested long term for stability
- DFU has not been tested
- Did I say it hadn't been properly tested? Sorry to labour the point but please satisfy yourself that it will meet your needs
- It is optimised for the same number of channels in/out
- It was quite a complex piece of SW to write - you have to be aware of the I2S spec, xmos port buffer operation and ISA at a deep level. I will not be able to offer detailed support on this due to my day job keeping me very busy!
- Saying that, please do feedback on how you get on..
- I still think 2 x audio tasks is not such a bad option, but you will need to put many days aside to do this (things like DFU get a bit tricky) and a bit of variable renaming..You could use one audio task to forward to another (although one will need to be slave). I have done this in the past and it works fine but it was not trivial
- You will need to set I2S_WIRES to 2 and something like:
Code: Select all
<Port Location="XS1_PORT_4A" Name="PORT_I2S_DAC0"/>
<port Location="XS1_PORT_4B" Name="PORT_I2S_DAC1"/>
<Port Location="XS1_PORT_4C" Name="PORT_I2S_ADC0"/>
<Port Location="XS1_PORT_4D" Name="PORT_I2S_ADC1"/>
Here it is, replace the relevant bits in audio.xc
Code: Select all
/* Added so that the transfer can be started earlier in the audio loop, hiding the interrupt latency of this channel */
static inline void StartSampleTransfer(chanend c_out, const unsigned underflowWord)
{
outuint(c_out, underflowWord);
}
#pragma unsafe arrays
static inline unsigned DoSampleTransfer(chanend c_out, const int readBuffNo, const unsigned underflowWord)
{
/* outint() disabled because this was started earlier in the audio loop to hide the interrupt latency*/
// outuint(c_out, underflowWord);
/* Check for sample freq change (or other command) or new samples from mixer*/
if(testct(c_out))
{
unsigned command = inct(c_out);
#ifndef CODEC_MASTER
if(dsdMode == DSD_MODE_OFF)
{
// Set clocks low
p_lrclk <: 0;
p_bclk <: 0;
}
else
{
#if(DSD_CHANS_DAC != 0)
/* DSD Clock might not be shared with lrclk or bclk... */
p_dsd_clk <: 0;
#endif
}
#endif
#if (DSD_CHANS_DAC > 0)
if(dsdMode == DSD_MODE_DOP)
dsdMode = DSD_MODE_OFF;
#endif
#pragma xta endpoint "received_command"
return command;
}
else
{
#if NUM_USB_CHAN_OUT > 0
#pragma loop unroll
for(int i = 0; i < NUM_USB_CHAN_OUT; i++)
{
int tmp = inuint(c_out);
samplesOut[i] = tmp;
}
#else
inuint(c_out);
#endif
#if NUM_USB_CHAN_IN > 0
#pragma loop unroll
#if NUM_USB_CHAN_IN < I2S_CHANS_ADC
for(int i = 0; i < NUM_USB_CHAN_IN; i++)
#else
for(int i = 0; i < I2S_CHANS_ADC; i++)
#endif
{
if(readBuffNo)
outuint(c_out, samplesIn_1[i]);
else
outuint(c_out, samplesIn_0[i]);
}
/* Send over the digi channels - no odd buffering required */
#pragma loop unroll
for(int i = I2S_CHANS_ADC; i < NUM_USB_CHAN_IN; i++)
{
outuint(c_out, samplesIn_0[i]);
}
#endif
}
return 0;
}
static inline void InitPorts_4b_port(unsigned divide)
{
unsigned tmp;
/* Clear I2S port buffers */
clearbuf(p_lrclk);
for(int i = 0; i < I2S_WIRES_DAC; i++)
{
clearbuf(p_i2s_dac[i]);
}
for(int i = 0; i < I2S_WIRES_ADC; i++)
{
clearbuf(p_i2s_adc[i]);
}
p_lrclk <: 0 @ tmp;
#define DELAY_START 100//Enough cycles to get to I2S inner loop
unsigned out_data_ports_delay = tmp + DELAY_START + 24; //+24 because 4b port empties 4x quicker than 1b port so delay 3/4 of lrclock cycle
unsigned in_data_ports_delay = out_data_ports_delay - 1 + 8; //-1 because we setup read for before clock. +8 because we want to lag reading by one word (and main loop blocks on in)
unsigned lr_port_delay = tmp + DELAY_START;
/* Since BCLK is free-running, setup outputs/inputs at a known point in the future */
#pragma loop unroll
for(int i = 0; i < I2S_WIRES_DAC; i++)
{
p_i2s_dac[i] @ out_data_ports_delay <: 0;
}
p_lrclk @ lr_port_delay <: 0x7FFFFFFF;
#pragma loop unroll
for(int i = 0; i < I2S_WIRES_ADC; i++)
{
asm("setpt res[%0], %1"::"r"(p_i2s_adc[i]),"r"(in_data_ports_delay));
}
}
#define NUM_IO_PER_I2S_FRAME 8 //The number of input/outputs to 4b port over one I2S frame
//---------------------------------------------------------------------------------------------------
// Description: Helper function to build a full frame of input words from two previous partial frames
// Returns: none
// Inputs: raw_rx_words - doubled buffered array of 32b read input port values
// Outputs: zipped_ins - arranged array of read input port values
//
// Because reading is buffered, data is read after it is is sampled. WHen using 4b ports, there are
// a total of 8 reads/writes per i2s cycle. Reads 0..6 are captured in the current cycle and read 7
// is captured in the next cycle. Hence a double buffered raw_rx_word buffer is implemented.
// The complete 8 reads frame is assembled in the following cycle when read 7 is available. This is
// then unzipped and written to the PCM input array samplesIn_x. Naturally, this adds a one sample
// latency.
//---------------------------------------------------------------------------------------------------
#pragma unsafe arrays
static inline void build_raw_input_buffer(unsigned raw_rx_words[2][NUM_IO_PER_I2S_FRAME],
unsigned zipped_ins[NUM_IO_PER_I2S_FRAME], unsigned writeBuffNo){
zipped_ins[0] = raw_rx_words[!writeBuffNo][1]; //Input from last I2S loop
zipped_ins[1] = raw_rx_words[!writeBuffNo][2];
zipped_ins[2] = raw_rx_words[!writeBuffNo][3];
zipped_ins[3] = raw_rx_words[!writeBuffNo][4];
zipped_ins[4] = raw_rx_words[!writeBuffNo][5];
zipped_ins[5] = raw_rx_words[!writeBuffNo][6];
zipped_ins[6] = raw_rx_words[!writeBuffNo][7];
zipped_ins[7] = raw_rx_words[writeBuffNo][0]; //Input from this I2S loop
}
//-----------------------------------------------------------------------------
// Description: Helper function to unzip read port values to PCM samples
// Returns: none
// Inputs: zipped_ins - array of input read 32b port values
// writeBuffNo - current index of PCM input double buffer
// Outputs: samplesIn_0/1 - array of PCM values
//-----------------------------------------------------------------------------
#pragma unsafe arrays
static inline void unzip_input_buffer_to_samplesIn_left(unsigned zipped_ins[NUM_IO_PER_I2S_FRAME],
unsigned samplesIn_0[], unsigned samplesIn_1[], unsigned writeBuffNo){
unsigned long long temp64_ins[2];
unsigned int unzipped_ins[4];
temp64_ins[0] = (unsigned long long)(zipped_ins[0]) << 32 | zipped_ins[1]; // 64 bits 4-bit packed data (MSBs)
temp64_ins[1] = (unsigned long long)(zipped_ins[2]) << 32 | zipped_ins[3]; // 64 bits 4-bit packed data (LSBs)
{unzipped_ins[0], unzipped_ins[1]} = unzip(temp64_ins[0], 1); // 2-bit packed words of MSBs
{unzipped_ins[2], unzipped_ins[3]} = unzip(temp64_ins[1], 1); // 2-bit packed words of LSBs
temp64_ins[0] = (unsigned long long)(unzipped_ins[0]) << 32 | unzipped_ins[2]; // 64-bits of 2-bit packed data (MSBs)
temp64_ins[1] = (unsigned long long)(unzipped_ins[1]) << 32 | unzipped_ins[3]; // 64-bits of 2-bit packed data (LSBs)
if(writeBuffNo)
{
{samplesIn_1[0], samplesIn_1[2]} = unzip(temp64_ins[0], 0); // Create 1-bit packed words (MSBs)
{samplesIn_1[4], samplesIn_1[6]} = unzip(temp64_ins[1], 0); // Create 1-bit packed words (LSBs)
}
else
{
{samplesIn_0[0], samplesIn_0[2]} = unzip(temp64_ins[0], 0); // Create 1-bit packed words (MSBs)
{samplesIn_0[4], samplesIn_0[6]} = unzip(temp64_ins[1], 0); // Create 1-bit packed words (LSBs)
}
}
//-----------------------------------------------------------------------------
// Description: Helper function to unzip read port values to PCM samples
// Returns: none
// Inputs: zipped_ins - array of input read 32b port values
// writeBuffNo - current index of PCM input double buffer
// Outputs: samplesIn_0/1 - array of PCM values
//-----------------------------------------------------------------------------
#pragma unsafe arrays
static inline void unzip_input_buffer_to_samplesIn_right(unsigned zipped_ins[NUM_IO_PER_I2S_FRAME],
unsigned samplesIn_0[], unsigned samplesIn_1[], unsigned writeBuffNo){
unsigned long long temp64_ins[2];
unsigned int unzipped_ins[4];
temp64_ins[0] = (unsigned long long)(zipped_ins[4]) << 32 | zipped_ins[5]; // 64 bits 4-bit packed data (MSBs)
temp64_ins[1] = (unsigned long long)(zipped_ins[6]) << 32 | zipped_ins[7]; // 64 bits 4-bit packed data (LSBs)
{unzipped_ins[0], unzipped_ins[1]} = unzip(temp64_ins[0], 1); // 2-bit packed words of MSBs
{unzipped_ins[2], unzipped_ins[3]} = unzip(temp64_ins[1], 1); // 2-bit packed words of LSBs
temp64_ins[0] = (unsigned long long)(unzipped_ins[0]) << 32 | unzipped_ins[2]; // 64-bits of 2-bit packed data (MSBs)
temp64_ins[1] = (unsigned long long)(unzipped_ins[1]) << 32 | unzipped_ins[3]; // 64-bits of 2-bit packed data (LSBs)
if(writeBuffNo)
{
{samplesIn_1[1], samplesIn_1[3]} = unzip(temp64_ins[0], 0); // Create 1-bit packed words (MSBs)
{samplesIn_1[5], samplesIn_1[7]} = unzip(temp64_ins[1], 0); // Create 1-bit packed words (LSBs)
}
else
{
{samplesIn_0[1], samplesIn_0[3]} = unzip(temp64_ins[0], 0); // Create 1-bit packed words (MSBs)
{samplesIn_0[5], samplesIn_0[7]} = unzip(temp64_ins[1], 0); // Create 1-bit packed words (LSBs)
}
}
//-----------------------------------------------------------------------------
// Description: Helper function to zip up output samples to 4b buffered ports
// Returns: none
// Inputs: samplesOut - array of input PCM samples
// Outputs: zipped_outs - array of 32b values to be output to buffered port
//-----------------------------------------------------------------------------
#pragma unsafe arrays
static inline void zip_samplesOut_to_output_buffer_left(unsigned samplesOut[], unsigned zipped_outs[NUM_IO_PER_I2S_FRAME]){
unsigned long long temp64_outs[2];
/* Packs of 1 bit */
temp64_outs[0] = zip(samplesOut[0], samplesOut[2], 0); // Interleave L1 & L2
temp64_outs[1] = zip(samplesOut[4], samplesOut[6], 0); // Interleave L3 & L4
zipped_outs[0] = (unsigned int)(temp64_outs[0] >> 32); // MSB of [L1,L2] into 32bits
zipped_outs[1] = (unsigned int)(temp64_outs[0] >> 0); // LSB of [L1,L2] into 32bits
zipped_outs[2] = (unsigned int)(temp64_outs[1] >> 32); // MSB of [L3,L4] into 32bits
zipped_outs[3] = (unsigned int)(temp64_outs[1] >> 0); // LSB of [L3,L4] into 32bits
/* Packs of 2 bits */
temp64_outs[0] = zip(zipped_outs[0], zipped_outs[2], 1); // Interleave MSB's of [L1,L2] with [L3,L4]
temp64_outs[1] = zip(zipped_outs[1], zipped_outs[3], 1); // Interleave LSB's of [L1,L2] with [L3,L4]
zipped_outs[0] = (unsigned int)(temp64_outs[0] >> 32); // MSBs of [L1,L2,L3,L4] into 32bits
zipped_outs[1] = (unsigned int)(temp64_outs[0] ); // MSBs of [L1,L2,L3,L4] into 32bits
zipped_outs[2] = (unsigned int)(temp64_outs[1] >> 32); // LSBs of [L1,L2,L3,L4] into 32bits
zipped_outs[3] = (unsigned int)(temp64_outs[1] ); // LSBs of [L1,L2,L3,L4] into 32bits
}
//-----------------------------------------------------------------------------
// Description: Helper function to zip up output samples to 4b buffered ports
// Returns: none
// Inputs: samplesOut - array of input PCM samples
// Outputs: zipped_outs - array of 32b values to be output to buffered port
//-----------------------------------------------------------------------------
#pragma unsafe arrays
static inline void zip_samplesOut_to_output_buffer_right(unsigned samplesOut[], unsigned zipped_outs[NUM_IO_PER_I2S_FRAME]){
unsigned long long temp64_outs[2];
/* Packs of 1 bit */
temp64_outs[0] = zip(samplesOut[1], samplesOut[3], 0); // Interleave R1 & R2
temp64_outs[1] = zip(samplesOut[5], samplesOut[7], 0); // Interleave R3 & R4
zipped_outs[0] = (unsigned int)(temp64_outs[0] >> 32); // MSB of [R1,R2] into 32bits
zipped_outs[1] = (unsigned int)(temp64_outs[0] >> 0); // LSB of [R1,R2] into 32bits
zipped_outs[2] = (unsigned int)(temp64_outs[1] >> 32); // MSB of [L3,L4] into 32bits
zipped_outs[3] = (unsigned int)(temp64_outs[1] >> 0); // LSB of [L3,L4] into 32bits
/* Packs of 2 bits */
temp64_outs[0] = zip(zipped_outs[0], zipped_outs[2], 1); // Interleave MSB's of [R1,R2] with [R3,R4]
temp64_outs[1] = zip(zipped_outs[1], zipped_outs[3], 1); // Interleave LSB's of [R1,R2] with [R3,R4]
zipped_outs[4] = (unsigned int)(temp64_outs[0] >> 32); // MSBs of [R1,R2,R3,R4] into 32bits
zipped_outs[5] = (unsigned int)(temp64_outs[0] ); // MSBs of [R1,R2,R3,R4] into 32bits
zipped_outs[6] = (unsigned int)(temp64_outs[1] >> 32); // LSBs of [R1,R2,R3,R4] into 32bits
zipped_outs[7] = (unsigned int)(temp64_outs[1] ); // LSBs of [R1,R2,R3,R4] into 32bits
}
#if (I2S_WIRES_DAC != I2S_WIRES_ADC)
#error "I2S_WIRES must be same for DAC and ADC"
#else
#define I2S_WIRES I2S_WIRES_DAC
#define NUM_CHANS_PER_WIRE (I2S_CHANS_DAC / I2S_WIRES_DAC)
#endif
// I2S delivery thread - Enhanced to support 4b ports for I2S datalines
#pragma unsafe arrays
unsigned static deliver_4b_port(chanend c_out, chanend ?c_spd_out, unsigned divide, unsigned curSamFreq, chanend ?c_adc)
{
// ------------------------------------------------------------------------
// Local Variables
// ------------------------------------------------------------------------
unsigned command;
unsigned readBuffNo = 0;
unsigned writeBuffNo = 1;
// These are the double buffered raw input words aquired from the ports
unsigned raw_rx_words[I2S_WIRES][2][NUM_IO_PER_I2S_FRAME] = {{{0}}};
// Initial sample transfer
StartSampleTransfer(c_out, 0);
command = DoSampleTransfer(c_out, readBuffNo, 0); // Underflow word forced to 0
if(command) return command;
//configure the IO ports. This sets up the port timers so that all will be in sync for main loop
InitPorts_4b_port(divide);
while(1)
{
// Local Variables
unsigned int zipped_ins[I2S_WIRES][NUM_IO_PER_I2S_FRAME];
unsigned int zipped_outs[I2S_WIRES][NUM_IO_PER_I2S_FRAME];
unsigned int unzipped_ins[I2S_WIRES][4];
unsigned tmp_in[I2S_WIRES];
for (int i = 0; i < I2S_WIRES; i++){
zip_samplesOut_to_output_buffer_left(&samplesOut[i * NUM_CHANS_PER_WIRE], zipped_outs[i]); // Turn our samples into port vals
}
// --------------------------------------------------------------------
// Change state of LRCLK to Low (LEFT)
// LR clock delayed by one clock, This is so MSB is output on the falling edge of BCLK
// after the falling edge on which LRCLK was toggled. (see I2S spec)
// --------------------------------------------------------------------
p_lrclk <: 0x80000000;
// -----------------------------------------------------------------------------
// Input and output 4 x 32b words
// Note loop unrolled manually so we can schedule compute in the gaps between IO
// -----------------------------------------------------------------------------
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_dac[i] <: bitrev(zipped_outs[i][0]); // Push 32 bits of data to the buffered out port
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_adc[i] :> tmp_in[i]; // Read in the ADC data now that clocks have run
}
for (int i = 0; i < I2S_WIRES; i++){
raw_rx_words[i][writeBuffNo][0] = bitrev(tmp_in[i]);
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_dac[i] <: bitrev(zipped_outs[i][1]); // Push 32 bits of data to the buffered out port
}
for (int i = 0; i < I2S_WIRES; i++){
build_raw_input_buffer(raw_rx_words[i], zipped_ins[i], writeBuffNo);
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_adc[i] :> tmp_in[i]; // Read in the ADC data now that clocks have run
}
for (int i = 0; i < I2S_WIRES; i++){
raw_rx_words[i][writeBuffNo][1] = bitrev(tmp_in[i]);
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_dac[i] <: bitrev(zipped_outs[i][2]); // Push 32 bits of data to the buffered out port
}
for (int i = 0; i < I2S_WIRES; i++){
unzip_input_buffer_to_samplesIn_left(zipped_ins[i], &samplesIn_0[i * NUM_CHANS_PER_WIRE], &samplesIn_1[i * NUM_CHANS_PER_WIRE], writeBuffNo); // Turn our port vals into samples
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_adc[i] :> tmp_in[i]; // Read in the ADC data now that clocks have run
}
for (int i = 0; i < I2S_WIRES; i++){
raw_rx_words[i][writeBuffNo][2] = bitrev(tmp_in[i]);
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_dac[i] <: bitrev(zipped_outs[i][3]); // Push 32 bits of data to the buffered out port
}
for (int i = 0; i < I2S_WIRES; i++){
zip_samplesOut_to_output_buffer_right(&samplesOut[i * NUM_CHANS_PER_WIRE], zipped_outs[i]); // Turn our samples into port vals
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_adc[i] :> tmp_in[i]; // Read in the ADC data now that clocks have run
}
for (int i = 0; i < I2S_WIRES; i++){
raw_rx_words[i][writeBuffNo][3] = bitrev(tmp_in[i]);
}
// ------------------------------------------------------------------------
// At this point we have just transmitted the latest LEFT data and built a
// complete raw input buffer from this and last cycle
// Unpack the received data and place in the appropriate samplesIn_X[] buffer
// zipped_ins[] ordering is 0 = most significant ... 3 = least significant
// ------------------------------------------------------------------------
p_lrclk <: 0x7FFFFFFF;
// -----------------------------------------------------------------------------
// Input and output 4 x 32b words
// Note loop unrolled manually so we can schedule compute in the gaps between IO
// -----------------------------------------------------------------------------
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_dac[i] <: bitrev(zipped_outs[i][0 + 4]); // Push 32 bits of data to the buffered out port
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_adc[i] :> tmp_in[i]; // Read in the ADC data now that clocks have run
}
for (int i = 0; i < I2S_WIRES; i++){
raw_rx_words[i][writeBuffNo][0 + 4] = bitrev(tmp_in[i]);
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_dac[i] <: bitrev(zipped_outs[i][1 + 4]); // Push 32 bits of data to the buffered out port
}
for (int i = 0; i < I2S_WIRES; i++){
unzip_input_buffer_to_samplesIn_right(zipped_ins[i], &samplesIn_0[i * NUM_CHANS_PER_WIRE], &samplesIn_1[i * NUM_CHANS_PER_WIRE], writeBuffNo); // Turn our port vals into samples
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_adc[i] :> tmp_in[i]; // Read in the ADC data now that clocks have run
}
for (int i = 0; i < I2S_WIRES; i++){
raw_rx_words[i][writeBuffNo][1 + 4] = bitrev(tmp_in[i]);
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_dac[i] <: bitrev(zipped_outs[i][2 + 4]); // Push 32 bits of data to the buffered out port
if (i == 0) StartSampleTransfer(c_out, 0); // Trigger interrupt in decouple.xc. Hides the 400ns ISR latency
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_adc[i] :> tmp_in[i]; // Read in the ADC data now that clocks have run
}
for (int i = 0; i < I2S_WIRES; i++){
raw_rx_words[i][writeBuffNo][2 + 4] = bitrev(tmp_in[i]);
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_dac[i] <: bitrev(zipped_outs[i][3 + 4]); // Push 32 bits of data to the buffered out port
if (i == I2S_WIRES - 1) {
if(readBuffNo) command = DoSampleTransfer(c_out, 1, 0); // Do sample transfer early to make use of IO gaps
else command = DoSampleTransfer(c_out, 0, 0); // Underflow fixed at 0
if(command) return command;
}
}
for (int i = 0; i < I2S_WIRES; i++){
p_i2s_adc[i] :> tmp_in[i]; // Read in the ADC data now that clocks have run
}
for (int i = 0; i < I2S_WIRES; i++){
raw_rx_words[i][writeBuffNo][3 + 4] = bitrev(tmp_in[i]);
}
// Would normally do sample transfer here, but we do it early in the previous gap. This is OK
// because all values are buffered anyhow. Inputs used in next cycle, outputs already calculated from last cycle
readBuffNo = !readBuffNo; // Flip the ADC buffer
writeBuffNo = !writeBuffNo;
}
return 0;
}