Revision 4c244c78

b/host/lib/convert/CMakeLists.txt
71 71

  
72 72
IF(HAVE_EMMINTRIN_H)
73 73
    SET(convert_with_sse2_sources
74
        ${CMAKE_CURRENT_SOURCE_DIR}/convert_fc32_with_sse2.cpp
75
        ${CMAKE_CURRENT_SOURCE_DIR}/convert_fc64_with_sse2.cpp
76
        ${CMAKE_CURRENT_SOURCE_DIR}/convert_fc32_to_sc8_with_sse2.cpp
77
        ${CMAKE_CURRENT_SOURCE_DIR}/convert_fc64_to_sc8_with_sse2.cpp
74
        ${CMAKE_CURRENT_SOURCE_DIR}/sse2_sc16_to_fc64.cpp
75
        ${CMAKE_CURRENT_SOURCE_DIR}/sse2_sc16_to_fc32.cpp
76
        ${CMAKE_CURRENT_SOURCE_DIR}/sse2_sc8_to_fc64.cpp
77
        ${CMAKE_CURRENT_SOURCE_DIR}/sse2_sc8_to_fc32.cpp
78
        ${CMAKE_CURRENT_SOURCE_DIR}/sse2_fc64_to_sc16.cpp
79
        ${CMAKE_CURRENT_SOURCE_DIR}/sse2_fc32_to_sc16.cpp
80
        ${CMAKE_CURRENT_SOURCE_DIR}/sse2_fc64_to_sc8.cpp
81
        ${CMAKE_CURRENT_SOURCE_DIR}/sse2_fc32_to_sc8.cpp
78 82
    )
79 83
    SET_SOURCE_FILES_PROPERTIES(
80 84
        ${convert_with_sse2_sources}
......
117 121
LIBUHD_APPEND_SOURCES(
118 122
    ${CMAKE_CURRENT_SOURCE_DIR}/convert_with_tables.cpp
119 123
    ${CMAKE_CURRENT_SOURCE_DIR}/convert_impl.cpp
124
    ${CMAKE_CURRENT_SOURCE_DIR}/convert_item32.cpp
120 125
)
b/host/lib/convert/convert_common.hpp
53 53

  
54 54
#ifdef __ARM_NEON__
55 55
static const int PRIORITY_LIBORC = 3;
56
static const int PRIORITY_SIMD = 1; //neon conversions could be implemented better, orc wins
57
static const int PRIORITY_TABLE = 2; //tables require large cache, so they are slower on arm
56
static const int PRIORITY_SIMD = 2; //neon conversions could be implemented better, orc wins
57
static const int PRIORITY_TABLE = 1; //tables require large cache, so they are slower on arm
58 58
#else
59
static const int PRIORITY_LIBORC = 1;
59
static const int PRIORITY_LIBORC = 2;
60 60
static const int PRIORITY_SIMD = 3;
61
static const int PRIORITY_TABLE = 2;
61
static const int PRIORITY_TABLE = 1;
62 62
#endif
63 63

  
64 64
/***********************************************************************
......
77 77

  
78 78
typedef boost::uint32_t              item32_t;
79 79

  
80
/***********************************************************************
81
 * Convert complex short buffer to items32 sc16
82
 **********************************************************************/
83
static UHD_INLINE item32_t sc16_to_item32_sc16(sc16_t num, double){
84
    boost::uint16_t real = num.real();
85
    boost::uint16_t imag = num.imag();
86
    return (item32_t(real) << 16) | (item32_t(imag) << 0);
87
}
88

  
89
/***********************************************************************
90
 * Convert items32 sc16 buffer to complex short
91
 **********************************************************************/
92
static UHD_INLINE sc16_t item32_sc16_to_sc16(item32_t item, double){
93
    return sc16_t(
94
        boost::int16_t(item >> 16),
95
        boost::int16_t(item >> 0)
96
    );
97
}
80
typedef item32_t (*xtox_t)(item32_t);
98 81

  
99 82
/***********************************************************************
100
 * Convert complex float buffer to items32 sc16
83
 * Convert xx to items32 sc16 buffer
101 84
 **********************************************************************/
102
static UHD_INLINE item32_t fc32_to_item32_sc16(fc32_t num, double scale_factor){
85
template <typename T> UHD_INLINE item32_t xx_to_item32_sc16_x1(
86
    const std::complex<T> &num, const double scale_factor
87
){
103 88
    boost::uint16_t real = boost::int16_t(num.real()*float(scale_factor));
104 89
    boost::uint16_t imag = boost::int16_t(num.imag()*float(scale_factor));
105 90
    return (item32_t(real) << 16) | (item32_t(imag) << 0);
106 91
}
107 92

  
108
/***********************************************************************
109
 * Convert items32 sc16 buffer to complex float
110
 **********************************************************************/
111
static UHD_INLINE fc32_t item32_sc16_to_fc32(item32_t item, double scale_factor){
112
    return fc32_t(
113
        float(boost::int16_t(item >> 16)*float(scale_factor)),
114
        float(boost::int16_t(item >> 0)*float(scale_factor))
115
    );
116
}
117

  
118
/***********************************************************************
119
 * Convert complex double buffer to items32 sc16
120
 **********************************************************************/
121
static UHD_INLINE item32_t fc64_to_item32_sc16(fc64_t num, double scale_factor){
122
    boost::uint16_t real = boost::int16_t(num.real()*scale_factor);
123
    boost::uint16_t imag = boost::int16_t(num.imag()*scale_factor);
93
template <> UHD_INLINE item32_t xx_to_item32_sc16_x1(
94
    const sc16_t &num, const double
95
){
96
    boost::uint16_t real = boost::int16_t(num.real());
97
    boost::uint16_t imag = boost::int16_t(num.imag());
124 98
    return (item32_t(real) << 16) | (item32_t(imag) << 0);
125 99
}
126 100

  
127
/***********************************************************************
128
 * Convert items32 sc16 buffer to complex double
129
 **********************************************************************/
130
static UHD_INLINE fc64_t item32_sc16_to_fc64(item32_t item, double scale_factor){
131
    return fc64_t(
132
        float(boost::int16_t(item >> 16)*scale_factor),
133
        float(boost::int16_t(item >> 0)*scale_factor)
134
    );
101
template <xtox_t to_wire, typename T>
102
UHD_INLINE void xx_to_item32_sc16(
103
    const std::complex<T> *input,
104
    item32_t *output,
105
    const size_t nsamps,
106
    const double scale_factor
107
){
108
    for (size_t i = 0; i < nsamps; i++){
109
        const item32_t item = xx_to_item32_sc16_x1(input[i], scale_factor);
110
        output[i] = to_wire(item);
111
    }
135 112
}
136 113

  
137 114
/***********************************************************************
138
 * Convert items32 sc8 buffer to complex char
115
 * Convert items32 sc16 buffer to xx
139 116
 **********************************************************************/
140
static UHD_INLINE void item32_sc8_to_sc8(item32_t item, sc8_t &out0, sc8_t &out1, double){
141
    out0 = sc8_t(
142
        boost::int8_t(item >> 8),
143
        boost::int8_t(item >> 0)
144
    );
145
    out1 = sc8_t(
146
        boost::int8_t(item >> 24),
147
        boost::int8_t(item >> 16)
117
template <typename T> UHD_INLINE std::complex<T> item32_sc16_x1_to_xx(
118
    const item32_t item, const double scale_factor
119
){
120
    return std::complex<T>(
121
        T(boost::int16_t(item >> 16)*float(scale_factor)),
122
        T(boost::int16_t(item >> 0)*float(scale_factor))
148 123
    );
149 124
}
150 125

  
151
/***********************************************************************
152
 * Convert items32 sc8 buffer to complex short
153
 **********************************************************************/
154
static UHD_INLINE void item32_sc8_to_sc16(item32_t item, sc16_t &out0, sc16_t &out1, double){
155
    out0 = sc16_t(
156
        boost::int8_t(item >> 8),
157
        boost::int8_t(item >> 0)
158
    );
159
    out1 = sc16_t(
160
        boost::int8_t(item >> 24),
161
        boost::int8_t(item >> 16)
162
    );
163
}
164

  
165
/***********************************************************************
166
 * Convert items32 sc8 buffer to complex float
167
 **********************************************************************/
168
static UHD_INLINE void item32_sc8_to_fc32(item32_t item, fc32_t &out0, fc32_t &out1, double scale_factor){
169
    out0 = fc32_t(
170
        float(boost::int8_t(item >> 8)*float(scale_factor)),
171
        float(boost::int8_t(item >> 0)*float(scale_factor))
172
    );
173
    out1 = fc32_t(
174
        float(boost::int8_t(item >> 24)*float(scale_factor)),
175
        float(boost::int8_t(item >> 16)*float(scale_factor))
126
template <> UHD_INLINE sc16_t item32_sc16_x1_to_xx(
127
    const item32_t item, const double
128
){
129
    return sc16_t(
130
        boost::int16_t(item >> 16), boost::int16_t(item >> 0)
176 131
    );
177 132
}
178 133

  
179
/***********************************************************************
180
 * Convert items32 sc8 buffer to complex double
181
 **********************************************************************/
182
static UHD_INLINE void item32_sc8_to_fc64(item32_t item, fc64_t &out0, fc64_t &out1, double scale_factor){
183
    out0 = fc64_t(
184
        float(boost::int8_t(item >> 8)*scale_factor),
185
        float(boost::int8_t(item >> 0)*scale_factor)
186
    );
187
    out1 = fc64_t(
188
        float(boost::int8_t(item >> 24)*scale_factor),
189
        float(boost::int8_t(item >> 16)*scale_factor)
190
    );
134
template <xtox_t to_host, typename T>
135
UHD_INLINE void item32_sc16_to_xx(
136
    const item32_t *input,
137
    std::complex<T> *output,
138
    const size_t nsamps,
139
    const double scale_factor
140
){
141
    for (size_t i = 0; i < nsamps; i++){
142
        const item32_t item_i = to_host(input[i]);
143
        output[i] = item32_sc16_x1_to_xx<T>(item_i, scale_factor);
144
    }
191 145
}
192 146

  
193 147
/***********************************************************************
194
 * Convert complex char to items32 sc8 buffer
148
 * Convert xx to items32 sc8 buffer
195 149
 **********************************************************************/
196
static UHD_INLINE item32_t sc8_to_item32_sc8(sc8_t in0, sc8_t in1, double){
197
    boost::uint8_t real0 = boost::int8_t(in0.real());
198
    boost::uint8_t imag0 = boost::int8_t(in0.imag());
199
    boost::uint8_t real1 = boost::int8_t(in1.real());
200
    boost::uint8_t imag1 = boost::int8_t(in1.imag());
150
template <typename T> UHD_INLINE item32_t xx_to_item32_sc8_x1(
151
    const std::complex<T> &in0, const std::complex<T> &in1, const double scale_factor
152
){
201 153
    return
202
        (item32_t(real0) << 8) | (item32_t(imag0) << 0) |
203
        (item32_t(real1) << 24) | (item32_t(imag1) << 16)
154
        (item32_t(boost::uint8_t(in0.real()*float(scale_factor))) << 8) |
155
        (item32_t(boost::uint8_t(in0.imag()*float(scale_factor))) << 0) |
156
        (item32_t(boost::uint8_t(in1.real()*float(scale_factor))) << 24) |
157
        (item32_t(boost::uint8_t(in1.imag()*float(scale_factor))) << 16)
204 158
    ;
205 159
}
206 160

  
207
/***********************************************************************
208
 * Convert complex short to items32 sc8 buffer
209
 **********************************************************************/
210
static UHD_INLINE item32_t sc16_to_item32_sc8(sc16_t in0, sc16_t in1, double){
211
    boost::uint8_t real0 = boost::int8_t(in0.real());
212
    boost::uint8_t imag0 = boost::int8_t(in0.imag());
213
    boost::uint8_t real1 = boost::int8_t(in1.real());
214
    boost::uint8_t imag1 = boost::int8_t(in1.imag());
161
template <> UHD_INLINE item32_t xx_to_item32_sc8_x1(
162
    const sc16_t &in0, const sc16_t &in1, const double
163
){
215 164
    return
216
        (item32_t(real0) << 8) | (item32_t(imag0) << 0) |
217
        (item32_t(real1) << 24) | (item32_t(imag1) << 16)
165
        (item32_t(boost::uint8_t(in0.real())) << 8) |
166
        (item32_t(boost::uint8_t(in0.imag())) << 0) |
167
        (item32_t(boost::uint8_t(in1.real())) << 24) |
168
        (item32_t(boost::uint8_t(in1.imag())) << 16)
218 169
    ;
219 170
}
220 171

  
221
/***********************************************************************
222
 * Convert complex float to items32 sc8 buffer
223
 **********************************************************************/
224
static UHD_INLINE item32_t fc32_to_item32_sc8(fc32_t in0, fc32_t in1, double scale_factor){
225
    boost::uint8_t real0 = boost::int8_t(in0.real()*float(scale_factor));
226
    boost::uint8_t imag0 = boost::int8_t(in0.imag()*float(scale_factor));
227
    boost::uint8_t real1 = boost::int8_t(in1.real()*float(scale_factor));
228
    boost::uint8_t imag1 = boost::int8_t(in1.imag()*float(scale_factor));
229
    return
230
        (item32_t(real0) << 8) | (item32_t(imag0) << 0) |
231
        (item32_t(real1) << 24) | (item32_t(imag1) << 16)
232
    ;
172
template <xtox_t to_wire, typename T>
173
UHD_INLINE void xx_to_item32_sc8(
174
    const std::complex<T> *input,
175
    item32_t *output,
176
    const size_t nsamps,
177
    const double scale_factor
178
){
179
    const size_t num_pairs = nsamps/2;
180
    for (size_t i = 0, j = 0; i < num_pairs; i++, j+=2){
181
        const item32_t item = xx_to_item32_sc8_x1(input[j], input[j+1], scale_factor);
182
        output[i] = to_wire(item);
183
    }
184

  
185
    if (nsamps != num_pairs*2){
186
        const item32_t item = xx_to_item32_sc8_x1(input[nsamps-1], std::complex<T>(0), scale_factor);
187
        output[num_pairs] = to_wire(item);
188
    }
233 189
}
234 190

  
235 191
/***********************************************************************
236
 * Convert complex double to items32 sc8 buffer
192
 * Convert items32 sc8 buffer to xx
237 193
 **********************************************************************/
238
static UHD_INLINE item32_t fc64_to_item32_sc8(fc64_t in0, fc64_t in1, double scale_factor){
239
    boost::uint8_t real0 = boost::int8_t(in0.real()*(scale_factor));
240
    boost::uint8_t imag0 = boost::int8_t(in0.imag()*(scale_factor));
241
    boost::uint8_t real1 = boost::int8_t(in1.real()*(scale_factor));
242
    boost::uint8_t imag1 = boost::int8_t(in1.imag()*(scale_factor));
243
    return
244
        (item32_t(real0) << 8) | (item32_t(imag0) << 0) |
245
        (item32_t(real1) << 24) | (item32_t(imag1) << 16)
246
    ;
194
template <typename T> UHD_INLINE void item32_sc8_x1_to_xx(
195
    const item32_t item, std::complex<T> &out0, std::complex<T> &out1, const double scale_factor
196
){
197
    out0 = std::complex<T>(
198
        T(boost::int8_t(item >> 8)*float(scale_factor)),
199
        T(boost::int8_t(item >> 0)*float(scale_factor))
200
    );
201
    out1 = std::complex<T>(
202
        T(boost::int8_t(item >> 24)*float(scale_factor)),
203
        T(boost::int8_t(item >> 16)*float(scale_factor))
204
    );
205
}
206

  
207
template <> UHD_INLINE void item32_sc8_x1_to_xx(
208
    const item32_t item, sc16_t &out0, sc16_t &out1, const double
209
){
210
    out0 = sc16_t(
211
        boost::int16_t(boost::int8_t(item >> 8)),
212
        boost::int16_t(boost::int8_t(item >> 0))
213
    );
214
    out1 = sc16_t(
215
        boost::int16_t(boost::int8_t(item >> 24)),
216
        boost::int16_t(boost::int8_t(item >> 16))
217
    );
218
}
219

  
220
template <xtox_t to_host, typename T>
221
UHD_INLINE void item32_sc8_to_xx(
222
    const item32_t *input,
223
    std::complex<T> *output,
224
    const size_t nsamps,
225
    const double scale_factor
226
){
227
    input = reinterpret_cast<const item32_t *>(size_t(input) & ~0x3);
228
    std::complex<T> dummy;
229
    size_t num_samps = nsamps;
230

  
231
    if ((size_t(input) & 0x3) != 0){
232
        const item32_t item0 = to_host(*input++);
233
        item32_sc8_x1_to_xx(item0, dummy, *output++, scale_factor);
234
        num_samps--;
235
    }
236

  
237
    const size_t num_pairs = num_samps/2;
238
    for (size_t i = 0, j = 0; i < num_pairs; i++, j+=2){
239
        const item32_t item_i = to_host(input[i]);
240
        item32_sc8_x1_to_xx(item_i, output[j], output[j+1], scale_factor);
241
    }
242

  
243
    if (num_samps != num_pairs*2){
244
        const item32_t item_n = to_host(input[num_pairs]);
245
        item32_sc8_x1_to_xx(item_n, output[num_samps-1], dummy, scale_factor);
246
    }
247 247
}
248 248

  
249 249
#endif /* INCLUDED_LIBUHD_CONVERT_COMMON_HPP */
/dev/null
1
//
2
// Copyright 2012 Ettus Research LLC
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

  
18
#include "convert_common.hpp"
19
#include <uhd/utils/byteswap.hpp>
20
#include <emmintrin.h>
21

  
22
using namespace uhd::convert;
23

  
24
UHD_INLINE __m128i pack_sc32_4x_be(
25
    const __m128 &in0, const __m128 &in1,
26
    const __m128 &in2, const __m128 &in3,
27
    const __m128 &scalar
28
){
29
    __m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar));
30
    tmpi0 = _mm_shuffle_epi32(tmpi0, _MM_SHUFFLE(1, 0, 3, 2));
31
    __m128i tmpi1 = _mm_cvtps_epi32(_mm_mul_ps(in1, scalar));
32
    tmpi1 = _mm_shuffle_epi32(tmpi1, _MM_SHUFFLE(1, 0, 3, 2));
33
    const __m128i lo = _mm_packs_epi32(tmpi0, tmpi1);
34

  
35
    __m128i tmpi2 = _mm_cvtps_epi32(_mm_mul_ps(in2, scalar));
36
    tmpi2 = _mm_shuffle_epi32(tmpi2, _MM_SHUFFLE(1, 0, 3, 2));
37
    __m128i tmpi3 = _mm_cvtps_epi32(_mm_mul_ps(in3, scalar));
38
    tmpi3 = _mm_shuffle_epi32(tmpi3, _MM_SHUFFLE(1, 0, 3, 2));
39
    const __m128i hi = _mm_packs_epi32(tmpi2, tmpi3);
40

  
41
    return _mm_packs_epi16(lo, hi);
42
}
43

  
44
UHD_INLINE __m128i pack_sc32_4x_le(
45
    const __m128 &in0, const __m128 &in1,
46
    const __m128 &in2, const __m128 &in3,
47
    const __m128 &scalar
48
){
49
    __m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar));
50
    tmpi0 = _mm_shuffle_epi32(tmpi0, _MM_SHUFFLE(2, 3, 0, 1));
51
    __m128i tmpi1 = _mm_cvtps_epi32(_mm_mul_ps(in1, scalar));
52
    tmpi1 = _mm_shuffle_epi32(tmpi1, _MM_SHUFFLE(2, 3, 0, 1));
53
    const __m128i lo = _mm_packs_epi32(tmpi0, tmpi1);
54

  
55
    __m128i tmpi2 = _mm_cvtps_epi32(_mm_mul_ps(in2, scalar));
56
    tmpi2 = _mm_shuffle_epi32(tmpi2, _MM_SHUFFLE(2, 3, 0, 1));
57
    __m128i tmpi3 = _mm_cvtps_epi32(_mm_mul_ps(in3, scalar));
58
    tmpi3 = _mm_shuffle_epi32(tmpi3, _MM_SHUFFLE(2, 3, 0, 1));
59
    const __m128i hi = _mm_packs_epi32(tmpi2, tmpi3);
60

  
61
    return _mm_packs_epi16(lo, hi);
62
}
63

  
64
DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD){
65
    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
66
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
67

  
68
    const __m128 scalar = _mm_set_ps1(float(scale_factor));
69

  
70
    #define convert_fc32_1_to_sc8_item32_1_bswap_guts(_al_)             \
71
    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
72
        /* load from input */                                           \
73
        __m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
74
        __m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
75
        __m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \
76
        __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \
77
                                                                        \
78
        /* convert */                                                   \
79
        const __m128i tmpi = pack_sc32_4x_be(tmp0, tmp1, tmp2, tmp3, scalar); \
80
                                                                        \
81
        /* store to output */                                           \
82
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
83
    }                                                                   \
84

  
85
    size_t i = 0;
86

  
87
    //dispatch according to alignment
88
    if ((size_t(input) & 0xf) == 0){
89
        convert_fc32_1_to_sc8_item32_1_bswap_guts(_)
90
    }
91
    else{
92
        convert_fc32_1_to_sc8_item32_1_bswap_guts(u_)
93
    }
94

  
95
    //convert remainder
96
    const size_t num_pairs = nsamps/2;
97
    for (size_t j = i/2; j < num_pairs; j++, i+=2){
98
        const item32_t item = fc32_to_item32_sc8(input[i], input[i+1], scale_factor);
99
        output[j] = uhd::byteswap(item);
100
    }
101

  
102
    if (nsamps != num_pairs*2){
103
        const item32_t item = fc32_to_item32_sc8(input[nsamps-1], 0, scale_factor);
104
        output[num_pairs] = uhd::byteswap(item);
105
    }
106
}
107

  
108
DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD){
109
    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
110
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
111

  
112
    const __m128 scalar = _mm_set_ps1(float(scale_factor));
113

  
114
    #define convert_fc32_1_to_sc8_item32_1_nswap_guts(_al_)             \
115
    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
116
        /* load from input */                                           \
117
        __m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
118
        __m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
119
        __m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \
120
        __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \
121
                                                                        \
122
        /* convert */                                                   \
123
        const __m128i tmpi = pack_sc32_4x_le(tmp0, tmp1, tmp2, tmp3, scalar); \
124
                                                                        \
125
        /* store to output */                                           \
126
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
127
    }                                                                   \
128

  
129
    size_t i = 0;
130

  
131
    //dispatch according to alignment
132
    if ((size_t(input) & 0xf) == 0){
133
        convert_fc32_1_to_sc8_item32_1_nswap_guts(_)
134
    }
135
    else{
136
        convert_fc32_1_to_sc8_item32_1_nswap_guts(u_)
137
    }
138

  
139
    //convert remainder
140
    const size_t num_pairs = nsamps/2;
141
    for (size_t j = i/2; j < num_pairs; j++, i+=2){
142
        const item32_t item = fc32_to_item32_sc8(input[i], input[i+1], scale_factor);
143
        output[j] = (item);
144
    }
145

  
146
    if (nsamps != num_pairs*2){
147
        const item32_t item = fc32_to_item32_sc8(input[nsamps-1], 0, scale_factor);
148
        output[num_pairs] = (item);
149
    }
150
}
/dev/null
1
//
2
// Copyright 2011 Ettus Research LLC
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

  
18
#include "convert_common.hpp"
19
#include <uhd/utils/byteswap.hpp>
20
#include <emmintrin.h>
21

  
22
using namespace uhd::convert;
23

  
24
DECLARE_CONVERTER(fc32, 1, sc16_item32_le, 1, PRIORITY_SIMD){
25
    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
26
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
27

  
28
    const __m128 scalar = _mm_set_ps1(float(scale_factor));
29

  
30
    #define convert_fc32_1_to_item32_1_nswap_guts(_al_)                 \
31
    for (; i+3 < nsamps; i+=4){                                         \
32
        /* load from input */                                           \
33
        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
34
        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
35
                                                                        \
36
        /* convert and scale */                                         \
37
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
38
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
39
                                                                        \
40
        /* pack + swap 16-bit pairs */                                  \
41
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
42
        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
43
        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
44
                                                                        \
45
        /* store to output */                                           \
46
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
47
    }                                                                   \
48

  
49
    size_t i = 0;
50

  
51
    //dispatch according to alignment
52
    switch (size_t(input) & 0xf){
53
    case 0x8:
54
        output[i] = fc32_to_item32_sc16(input[i], float(scale_factor)); i++;
55
    case 0x0:
56
        convert_fc32_1_to_item32_1_nswap_guts(_)
57
        break;
58
    default: convert_fc32_1_to_item32_1_nswap_guts(u_)
59
    }
60

  
61
    //convert remainder
62
    for (; i < nsamps; i++){
63
        output[i] = fc32_to_item32_sc16(input[i], float(scale_factor));
64
    }
65
}
66

  
67
DECLARE_CONVERTER(fc32, 1, sc16_item32_be, 1, PRIORITY_SIMD){
68
    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
69
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
70

  
71
    const __m128 scalar = _mm_set_ps1(float(scale_factor));
72

  
73
    #define convert_fc32_1_to_item32_1_bswap_guts(_al_)                 \
74
    for (; i+3 < nsamps; i+=4){                                         \
75
        /* load from input */                                           \
76
        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
77
        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
78
                                                                        \
79
        /* convert and scale */                                         \
80
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
81
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
82
                                                                        \
83
        /* pack + byteswap -> byteswap 16 bit words */                  \
84
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
85
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
86
                                                                        \
87
        /* store to output */                                           \
88
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
89
    }                                                                   \
90

  
91
    size_t i = 0;
92

  
93
    //dispatch according to alignment
94
    switch (size_t(input) & 0xf){
95
    case 0x8:
96
        output[i] = uhd::byteswap(fc32_to_item32_sc16(input[i], float(scale_factor))); i++;
97
    case 0x0:
98
        convert_fc32_1_to_item32_1_bswap_guts(_)
99
        break;
100
    default: convert_fc32_1_to_item32_1_bswap_guts(u_)
101
    }
102

  
103
    //convert remainder
104
    for (; i < nsamps; i++){
105
        output[i] = uhd::byteswap(fc32_to_item32_sc16(input[i], float(scale_factor)));
106
    }
107
}
108

  
109
DECLARE_CONVERTER(sc16_item32_le, 1, fc32, 1, PRIORITY_SIMD){
110
    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
111
    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
112

  
113
    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
114
    const __m128i zeroi = _mm_setzero_si128();
115

  
116
    #define convert_item32_1_to_fc32_1_nswap_guts(_al_)                 \
117
    for (; i+3 < nsamps; i+=4){                                         \
118
        /* load from input */                                           \
119
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
120
                                                                        \
121
        /* unpack + swap 16-bit pairs */                                \
122
        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
123
        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
124
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
125
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
126
                                                                        \
127
        /* convert and scale */                                         \
128
        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \
129
        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
130
                                                                        \
131
        /* store to output */                                           \
132
        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
133
        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
134
    }                                                                   \
135

  
136
    size_t i = 0;
137

  
138
    //dispatch according to alignment
139
    switch (size_t(output) & 0xf){
140
    case 0x8:
141
        output[i] = item32_sc16_to_fc32(input[i], float(scale_factor)); i++;
142
    case 0x0:
143
        convert_item32_1_to_fc32_1_nswap_guts(_)
144
        break;
145
    default: convert_item32_1_to_fc32_1_nswap_guts(u_)
146
    }
147

  
148
    //convert remainder
149
    for (; i < nsamps; i++){
150
        output[i] = item32_sc16_to_fc32(input[i], float(scale_factor));
151
    }
152
}
153

  
154
DECLARE_CONVERTER(sc16_item32_be, 1, fc32, 1, PRIORITY_SIMD){
155
    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
156
    fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
157

  
158
    const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
159
    const __m128i zeroi = _mm_setzero_si128();
160

  
161
    #define convert_item32_1_to_fc32_1_bswap_guts(_al_)                 \
162
    for (; i+3 < nsamps; i+=4){                                         \
163
        /* load from input */                                           \
164
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
165
                                                                        \
166
        /* byteswap + unpack -> byteswap 16 bit words */                \
167
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
168
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
169
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
170
                                                                        \
171
        /* convert and scale */                                         \
172
        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);     \
173
        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);     \
174
                                                                        \
175
        /* store to output */                                           \
176
        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \
177
        _mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \
178
    }                                                                   \
179

  
180
    size_t i = 0;
181

  
182
    //dispatch according to alignment
183
    switch (size_t(output) & 0xf){
184
    case 0x8:
185
        output[i] = item32_sc16_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++;
186
    case 0x0:
187
        convert_item32_1_to_fc32_1_bswap_guts(_)
188
        break;
189
    default: convert_item32_1_to_fc32_1_bswap_guts(u_)
190
    }
191

  
192
    //convert remainder
193
    for (; i < nsamps; i++){
194
        output[i] = item32_sc16_to_fc32(uhd::byteswap(input[i]), float(scale_factor));
195
    }
196
}
/dev/null
1
//
2
// Copyright 2012 Ettus Research LLC
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

  
18
#include "convert_common.hpp"
19
#include <uhd/utils/byteswap.hpp>
20
#include <emmintrin.h>
21

  
22
using namespace uhd::convert;
23

  
24
UHD_INLINE __m128i pack_sc8_item32_4x(
25
    const __m128i &in0, const __m128i &in1,
26
    const __m128i &in2, const __m128i &in3
27
){
28
    const __m128i lo = _mm_packs_epi32(in0, in1);
29
    const __m128i hi = _mm_packs_epi32(in2, in3);
30
    return _mm_packs_epi16(lo, hi);
31
}
32

  
33
UHD_INLINE __m128i pack_sc32_4x_be(
34
    const __m128d &lo, const __m128d &hi,
35
    const __m128d &scalar
36
){
37
    const __m128i tmpi_lo = _mm_cvttpd_epi32(_mm_mul_pd(hi, scalar));
38
    const __m128i tmpi_hi = _mm_cvttpd_epi32(_mm_mul_pd(lo, scalar));
39
    return _mm_unpacklo_epi64(tmpi_lo, tmpi_hi);
40
}
41

  
42
UHD_INLINE __m128i pack_sc32_4x_le(
43
    const __m128d &lo, const __m128d &hi,
44
    const __m128d &scalar
45
){
46
    const __m128i tmpi_lo = _mm_cvttpd_epi32(_mm_mul_pd(lo, scalar));
47
    const __m128i tmpi_hi = _mm_cvttpd_epi32(_mm_mul_pd(hi, scalar));
48
    const __m128i tmpi = _mm_unpacklo_epi64(tmpi_lo, tmpi_hi);
49
    return _mm_shuffle_epi32(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
50
}
51

  
52
DECLARE_CONVERTER(fc64, 1, sc8_item32_be, 1, PRIORITY_SIMD){
53
    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
54
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
55

  
56
    const __m128d scalar = _mm_set1_pd(scale_factor);
57

  
58
    #define convert_fc64_1_to_sc8_item32_1_bswap_guts(_al_)             \
59
    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
60
        /* load from input */                                           \
61
        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
62
        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
63
        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
64
        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
65
        __m128d tmp4 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+4)); \
66
        __m128d tmp5 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+5)); \
67
        __m128d tmp6 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+6)); \
68
        __m128d tmp7 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+7)); \
69
                                                                        \
70
        /* interleave */                                                \
71
        const __m128i tmpi = pack_sc8_item32_4x(                        \
72
            pack_sc32_4x_be(tmp0, tmp1, scalar),                        \
73
            pack_sc32_4x_be(tmp2, tmp3, scalar),                        \
74
            pack_sc32_4x_be(tmp4, tmp5, scalar),                        \
75
            pack_sc32_4x_be(tmp6, tmp7, scalar)                         \
76
        );                                                              \
77
                                                                        \
78
        /* store to output */                                           \
79
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
80
    }                                                                   \
81

  
82
    size_t i = 0;
83

  
84
    //dispatch according to alignment
85
    if ((size_t(input) & 0xf) == 0){
86
        convert_fc64_1_to_sc8_item32_1_bswap_guts(_)
87
    }
88
    else{
89
        convert_fc64_1_to_sc8_item32_1_bswap_guts(u_)
90
    }
91

  
92
    //convert remainder
93
    const size_t num_pairs = nsamps/2;
94
    for (size_t j = i/2; j < num_pairs; j++, i+=2){
95
        const item32_t item = fc64_to_item32_sc8(input[i], input[i+1], scale_factor);
96
        output[j] = uhd::byteswap(item);
97
    }
98

  
99
    if (nsamps != num_pairs*2){
100
        const item32_t item = fc64_to_item32_sc8(input[nsamps-1], 0, scale_factor);
101
        output[num_pairs] = uhd::byteswap(item);
102
    }
103
}
104

  
105
DECLARE_CONVERTER(fc64, 1, sc8_item32_le, 1, PRIORITY_SIMD){
106
    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
107
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
108

  
109
    const __m128d scalar = _mm_set1_pd(scale_factor);
110

  
111
    #define convert_fc64_1_to_sc8_item32_1_nswap_guts(_al_)             \
112
    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
113
        /* load from input */                                           \
114
        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
115
        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
116
        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
117
        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
118
        __m128d tmp4 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+4)); \
119
        __m128d tmp5 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+5)); \
120
        __m128d tmp6 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+6)); \
121
        __m128d tmp7 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+7)); \
122
                                                                        \
123
        /* interleave */                                                \
124
        const __m128i tmpi = pack_sc8_item32_4x(                        \
125
            pack_sc32_4x_le(tmp0, tmp1, scalar),                        \
126
            pack_sc32_4x_le(tmp2, tmp3, scalar),                        \
127
            pack_sc32_4x_le(tmp4, tmp5, scalar),                        \
128
            pack_sc32_4x_le(tmp6, tmp7, scalar)                         \
129
        );                                                              \
130
                                                                        \
131
        /* store to output */                                           \
132
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
133
    }                                                                   \
134

  
135
    size_t i = 0;
136

  
137
    //dispatch according to alignment
138
    if ((size_t(input) & 0xf) == 0){
139
        convert_fc64_1_to_sc8_item32_1_nswap_guts(_)
140
    }
141
    else{
142
        convert_fc64_1_to_sc8_item32_1_nswap_guts(u_)
143
    }
144

  
145
    //convert remainder
146
    const size_t num_pairs = nsamps/2;
147
    for (size_t j = i/2; j < num_pairs; j++, i+=2){
148
        const item32_t item = fc64_to_item32_sc8(input[i], input[i+1], scale_factor);
149
        output[j] = (item);
150
    }
151

  
152
    if (nsamps != num_pairs*2){
153
        const item32_t item = fc64_to_item32_sc8(input[nsamps-1], 0, scale_factor);
154
        output[num_pairs] = (item);
155
    }
156
}
/dev/null
1
//
2
// Copyright 2011 Ettus Research LLC
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

  
18
#include "convert_common.hpp"
19
#include <uhd/utils/byteswap.hpp>
20
#include <emmintrin.h>
21

  
22
using namespace uhd::convert;
23

  
24
DECLARE_CONVERTER(fc64, 1, sc16_item32_le, 1, PRIORITY_SIMD){
25
    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
26
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
27

  
28
    const __m128d scalar = _mm_set1_pd(scale_factor);
29

  
30
    #define convert_fc64_1_to_item32_1_nswap_guts(_al_)                 \
31
    for (; i+3 < nsamps; i+=4){                                         \
32
        /* load from input */                                           \
33
        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
34
        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
35
        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
36
        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
37
                                                                        \
38
        /* convert and scale */                                         \
39
        __m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar));     \
40
        __m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar));     \
41
        __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1);              \
42
        __m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar));     \
43
        __m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar));     \
44
        __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3);              \
45
                                                                        \
46
        /* pack + swap 16-bit pairs */                                  \
47
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
48
        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
49
        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
50
                                                                        \
51
        /* store to output */                                           \
52
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
53
    }                                                                   \
54

  
55
    size_t i = 0;
56

  
57
    //dispatch according to alignment
58
    if ((size_t(input) & 0xf) == 0){
59
        convert_fc64_1_to_item32_1_nswap_guts(_)
60
    }
61
    else{
62
        convert_fc64_1_to_item32_1_nswap_guts(u_)
63
    }
64

  
65
    //convert remainder
66
    for (; i < nsamps; i++){
67
        output[i] = fc64_to_item32_sc16(input[i], scale_factor);
68
    }
69
}
70

  
71
DECLARE_CONVERTER(fc64, 1, sc16_item32_be, 1, PRIORITY_SIMD){
72
    const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]);
73
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
74

  
75
    const __m128d scalar = _mm_set1_pd(scale_factor);
76

  
77
    #define convert_fc64_1_to_item32_1_bswap_guts(_al_)                 \
78
    for (; i+3 < nsamps; i+=4){                                         \
79
        /* load from input */                                           \
80
        __m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \
81
        __m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \
82
        __m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \
83
        __m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \
84
                                                                        \
85
        /* convert and scale */                                         \
86
        __m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar));     \
87
        __m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar));     \
88
        __m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1);              \
89
        __m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar));     \
90
        __m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar));     \
91
        __m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3);              \
92
                                                                        \
93
        /* pack + byteswap -> byteswap 16 bit words */                  \
94
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
95
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
96
                                                                        \
97
        /* store to output */                                           \
98
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
99
    }                                                                   \
100

  
101
    size_t i = 0;
102

  
103
    //dispatch according to alignment
104
    if ((size_t(input) & 0xf) == 0){
105
        convert_fc64_1_to_item32_1_bswap_guts(_)
106
    }
107
    else{
108
        convert_fc64_1_to_item32_1_bswap_guts(u_)
109
    }
110

  
111
    //convert remainder
112
    for (; i < nsamps; i++){
113
        output[i] = uhd::byteswap(fc64_to_item32_sc16(input[i], scale_factor));
114
    }
115
}
116

  
117
DECLARE_CONVERTER(sc16_item32_le, 1, fc64, 1, PRIORITY_SIMD){
118
    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
119
    fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]);
120

  
121
    const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16));
122
    const __m128i zeroi = _mm_setzero_si128();
123

  
124
    #define convert_item32_1_to_fc64_1_nswap_guts(_al_)                 \
125
    for (; i+3 < nsamps; i+=4){                                         \
126
        /* load from input */                                           \
127
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
128
                                                                        \
129
        /* unpack + swap 16-bit pairs */                                \
130
        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
131
        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
132
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
133
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
134
                                                                        \
135
        /* convert and scale */                                         \
136
        __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
137
        tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi);                     \
138
        __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
139
        __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
140
        tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi);                     \
141
        __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
142
                                                                        \
143
        /* store to output */                                           \
144
        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \
145
        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \
146
        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \
147
        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \
148
    }                                                                   \
149

  
150
    size_t i = 0;
151

  
152
    //dispatch according to alignment
153
    if ((size_t(output) & 0xf) == 0){
154
        convert_item32_1_to_fc64_1_nswap_guts(_)
155
    }
156
    else{
157
        convert_item32_1_to_fc64_1_nswap_guts(u_)
158
    }
159

  
160
    //convert remainder
161
    for (; i < nsamps; i++){
162
        output[i] = item32_sc16_to_fc64(input[i], scale_factor);
163
    }
164
}
165

  
166
DECLARE_CONVERTER(sc16_item32_be, 1, fc64, 1, PRIORITY_SIMD){
167
    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
168
    fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]);
169

  
170
    const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16));
171
    const __m128i zeroi = _mm_setzero_si128();
172

  
173
    #define convert_item32_1_to_fc64_1_bswap_guts(_al_)                 \
174
    for (; i+3 < nsamps; i+=4){                                         \
175
        /* load from input */                                           \
176
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
177
                                                                        \
178
        /* byteswap + unpack -> byteswap 16 bit words */                \
179
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
180
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
181
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);               \
182
                                                                        \
183
        /* convert and scale */                                         \
184
        __m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
185
        tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi);                     \
186
        __m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar);     \
187
        __m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
188
        tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi);                     \
189
        __m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar);     \
190
                                                                        \
191
        /* store to output */                                           \
192
        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \
193
        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \
194
        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \
195
        _mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \
196
    }                                                                   \
197

  
198
    size_t i = 0;
199

  
200
    //dispatch according to alignment
201
    if ((size_t(output) & 0xf) == 0){
202
        convert_item32_1_to_fc64_1_bswap_guts(_)
203
    }
204
    else{
205
        convert_item32_1_to_fc64_1_bswap_guts(u_)
206
    }
207

  
208
    //convert remainder
209
    for (; i < nsamps; i++){
210
        output[i] = item32_sc16_to_fc64(uhd::byteswap(input[i]), scale_factor);
211
    }
212
}
b/host/lib/convert/convert_item32.cpp
1
//
2
// Copyright 2012 Ettus Research LLC
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

  
18
#include "convert_common.hpp"
19
#include <uhd/utils/byteswap.hpp>
20

  
21
#define __DECLARE_ITEM32_CONVERTER(cpu_type, wire_type, xe, htoxx, xxtoh) \
22
    DECLARE_CONVERTER(cpu_type, 1, wire_type ## _item32_ ## xe, 1, PRIORITY_GENERAL){ \
23
        const cpu_type ## _t *input = reinterpret_cast<const cpu_type ## _t *>(inputs[0]); \
24
        item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); \
25
        xx_to_item32_ ## wire_type<htoxx>(input, output, nsamps, scale_factor); \
26
    } \
27
    DECLARE_CONVERTER(wire_type ## _item32_ ## xe, 1, cpu_type, 1, PRIORITY_GENERAL){ \
28
        const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); \
29
        cpu_type ## _t *output = reinterpret_cast<cpu_type ## _t *>(outputs[0]); \
30
        item32_ ## wire_type ## _to_xx<xxtoh>(input, output, nsamps, scale_factor); \
31
    }
32

  
33
#define _DECLARE_ITEM32_CONVERTER(cpu_type, wire_type) \
34
    __DECLARE_ITEM32_CONVERTER(cpu_type, wire_type, be, uhd::htonx, uhd::ntohx) \
35
    __DECLARE_ITEM32_CONVERTER(cpu_type, wire_type, le, uhd::htowx, uhd::wtohx)
36

  
37
#define DECLARE_ITEM32_CONVERTER(cpu_type) \
38
    _DECLARE_ITEM32_CONVERTER(cpu_type, sc8) \
39
    _DECLARE_ITEM32_CONVERTER(cpu_type, sc16)
40

  
41
DECLARE_ITEM32_CONVERTER(sc16)
42
DECLARE_ITEM32_CONVERTER(fc32)
43
DECLARE_ITEM32_CONVERTER(fc64)
b/host/lib/convert/convert_with_neon.cpp
1 1
//
2
// Copyright 2011-2011 Ettus Research LLC
2
// Copyright 2011-2012 Ettus Research LLC
3 3
//
4 4
// This program is free software: you can redistribute it and/or modify
5 5
// it under the terms of the GNU General Public License as published by
......
36 36
        vst1_s16((reinterpret_cast<int16_t *>(&output[i])), D9);
37 37
    }
38 38

  
39
    for (; i < nsamps; i++)
40
        output[i] = fc32_to_item32_sc16(input[i], scale_factor);
39
    xx_to_item32_sc16<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor);
41 40
}
42 41

  
43 42
DECLARE_CONVERTER(sc16_item32_le, 1, fc32, 1, PRIORITY_SIMD){
......
56 55
        vst1q_f32((reinterpret_cast<float *>(&output[i])), Q4);
57 56
    }
58 57

  
59
    for (; i < nsamps; i++)
60
        output[i] = item32_sc16_to_fc32(input[i], scale_factor);
58
    item32_sc16_to_xx<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor);
61 59
}
b/host/lib/convert/gen_convert_general.py
48 48
}
49 49
"""
50 50

  
51
TMPL_CONV_GEN2_SC16 = """
52
DECLARE_CONVERTER($(cpu_type), 1, sc16_item32_$(end), 1, PRIORITY_GENERAL){
53
    const $(cpu_type)_t *input = reinterpret_cast<const $(cpu_type)_t *>(inputs[0]);
54
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
55

  
56
    for (size_t i = 0; i < nsamps; i++){
57
        output[i] = $(to_wire)($(cpu_type)_to_item32_sc16(input[i], scale_factor));
58
    }
59
}
60

  
61
DECLARE_CONVERTER(sc16_item32_$(end), 1, $(cpu_type), 1, PRIORITY_GENERAL){
62
    const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
63
    $(cpu_type)_t *output = reinterpret_cast<$(cpu_type)_t *>(outputs[0]);
64

  
65
    for (size_t i = 0; i < nsamps; i++){
66
        output[i] = item32_sc16_to_$(cpu_type)($(to_host)(input[i]), scale_factor);
67
    }
68
}
69
"""
70

  
71
TMPL_CONV_GEN2_SC8 = """
72
DECLARE_CONVERTER(sc8_item32_$(end), 1, $(cpu_type), 1, PRIORITY_GENERAL){
73
    const item32_t *input = reinterpret_cast<const item32_t *>(size_t(inputs[0]) & ~0x3);
74
    $(cpu_type)_t *output = reinterpret_cast<$(cpu_type)_t *>(outputs[0]);
75
    $(cpu_type)_t dummy;
76
    size_t num_samps = nsamps;
77

  
78
    if ((size_t(inputs[0]) & 0x3) != 0){
79
        const item32_t item0 = $(to_host)(*input++);
80
        item32_sc8_to_$(cpu_type)(item0, dummy, *output++, scale_factor);
81
        num_samps--;
82
    }
83

  
84
    const size_t num_pairs = num_samps/2;
85
    for (size_t i = 0, j = 0; i < num_pairs; i++, j+=2){
86
        const item32_t item_i = $(to_host)(input[i]);
87
        item32_sc8_to_$(cpu_type)(item_i, output[j], output[j+1], scale_factor);
88
    }
89

  
90
    if (num_samps != num_pairs*2){
91
        const item32_t item_n = $(to_host)(input[num_pairs]);
92
        item32_sc8_to_$(cpu_type)(item_n, output[num_samps-1], dummy, scale_factor);
93
    }
94
}
95

  
96
DECLARE_CONVERTER($(cpu_type), 1, sc8_item32_$(end), 1, PRIORITY_GENERAL){
97
    const $(cpu_type)_t *input = reinterpret_cast<const $(cpu_type)_t *>(inputs[0]);
98
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
99

  
100
    const size_t num_pairs = nsamps/2;
101
    for (size_t i = 0, j = 0; i < num_pairs; i++, j+=2){
102
        const item32_t item = $(cpu_type)_to_item32_sc8(input[j], input[j+1], scale_factor);
103
        output[i] = $(to_wire)(item);
104
    }
105

  
106
    if (nsamps != num_pairs*2){
107
        const item32_t item = $(cpu_type)_to_item32_sc8(input[nsamps-1], 0, scale_factor);
108
        output[num_pairs] = $(to_wire)(item);
109
    }
110
}
111
"""
112

  
113 51
TMPL_CONV_USRP1_COMPLEX = """
114 52
DECLARE_CONVERTER($(cpu_type), $(width), sc16_item16_usrp1, 1, PRIORITY_GENERAL){
115 53
    #for $w in range($width)
......
176 114
        ('be', 'uhd::ntohx', 'uhd::htonx'),
177 115
        ('le', 'uhd::wtohx', 'uhd::htowx'),
178 116
    ):
179
        for cpu_type in 'fc64', 'fc32', 'sc16':
180
            output += parse_tmpl(
181
                TMPL_CONV_GEN2_SC16,
182
                end=end, to_host=to_host, to_wire=to_wire, cpu_type=cpu_type
183
            )
184
        for cpu_type in 'fc64', 'fc32', 'sc16', 'sc8':
185
            output += parse_tmpl(
186
                TMPL_CONV_GEN2_SC8,
187
                end=end, to_host=to_host, to_wire=to_wire, cpu_type=cpu_type
188
            )
189 117
        output += parse_tmpl(
190 118
                TMPL_CONV_GEN2_ITEM32,
191 119
                end=end, to_host=to_host, to_wire=to_wire
b/host/lib/convert/sse2_fc32_to_sc16.cpp
1
//
2
// Copyright 2011-2012 Ettus Research LLC
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

  
18
#include "convert_common.hpp"
19
#include <uhd/utils/byteswap.hpp>
20
#include <emmintrin.h>
21

  
22
using namespace uhd::convert;
23

  
24
DECLARE_CONVERTER(fc32, 1, sc16_item32_le, 1, PRIORITY_SIMD){
25
    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
26
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
27

  
28
    const __m128 scalar = _mm_set_ps1(float(scale_factor));
29

  
30
    #define convert_fc32_1_to_item32_1_nswap_guts(_al_)                 \
31
    for (; i+3 < nsamps; i+=4){                                         \
32
        /* load from input */                                           \
33
        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
34
        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
35
                                                                        \
36
        /* convert and scale */                                         \
37
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
38
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
39
                                                                        \
40
        /* pack + swap 16-bit pairs */                                  \
41
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
42
        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
43
        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));      \
44
                                                                        \
45
        /* store to output */                                           \
46
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
47
    }                                                                   \
48

  
49
    size_t i = 0;
50

  
51
    //dispatch according to alignment
52
    switch (size_t(input) & 0xf){
53
    case 0x8:
54
        xx_to_item32_sc16<uhd::htowx>(input, output, 1, scale_factor); i++;
55
    case 0x0:
56
        convert_fc32_1_to_item32_1_nswap_guts(_)
57
        break;
58
    default: convert_fc32_1_to_item32_1_nswap_guts(u_)
59
    }
60

  
61
    //convert remainder
62
    xx_to_item32_sc16<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor);
63
}
64

  
65
DECLARE_CONVERTER(fc32, 1, sc16_item32_be, 1, PRIORITY_SIMD){
66
    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
67
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
68

  
69
    const __m128 scalar = _mm_set_ps1(float(scale_factor));
70

  
71
    #define convert_fc32_1_to_item32_1_bswap_guts(_al_)                 \
72
    for (; i+3 < nsamps; i+=4){                                         \
73
        /* load from input */                                           \
74
        __m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
75
        __m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
76
                                                                        \
77
        /* convert and scale */                                         \
78
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));    \
79
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));    \
80
                                                                        \
81
        /* pack + byteswap -> byteswap 16 bit words */                  \
82
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);                 \
83
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
84
                                                                        \
85
        /* store to output */                                           \
86
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);  \
87
    }                                                                   \
88

  
89
    size_t i = 0;
90

  
91
    //dispatch according to alignment
92
    switch (size_t(input) & 0xf){
93
    case 0x8:
94
        xx_to_item32_sc16<uhd::htonx>(input, output, 1, scale_factor); i++;
95
    case 0x0:
96
        convert_fc32_1_to_item32_1_bswap_guts(_)
97
        break;
98
    default: convert_fc32_1_to_item32_1_bswap_guts(u_)
99
    }
100

  
101
    //convert remainder
102
    xx_to_item32_sc16<uhd::htonx>(input+i, output+i, nsamps-i, scale_factor);
103
}
b/host/lib/convert/sse2_fc32_to_sc8.cpp
1
//
2
// Copyright 2012 Ettus Research LLC
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

  
18
#include "convert_common.hpp"
19
#include <uhd/utils/byteswap.hpp>
20
#include <emmintrin.h>
21

  
22
using namespace uhd::convert;
23

  
24
UHD_INLINE __m128i pack_sc32_4x(
25
    const __m128 &in0, const __m128 &in1,
26
    const __m128 &in2, const __m128 &in3,
27
    const __m128 &scalar, const int shuf
28
){
29
    __m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar));
30
    tmpi0 = _mm_shuffle_epi32(tmpi0, shuf);
31
    __m128i tmpi1 = _mm_cvtps_epi32(_mm_mul_ps(in1, scalar));
32
    tmpi1 = _mm_shuffle_epi32(tmpi1, shuf);
33
    const __m128i lo = _mm_packs_epi32(tmpi0, tmpi1);
34

  
35
    __m128i tmpi2 = _mm_cvtps_epi32(_mm_mul_ps(in2, scalar));
36
    tmpi2 = _mm_shuffle_epi32(tmpi2, shuf);
37
    __m128i tmpi3 = _mm_cvtps_epi32(_mm_mul_ps(in3, scalar));
38
    tmpi3 = _mm_shuffle_epi32(tmpi3, shuf);
39
    const __m128i hi = _mm_packs_epi32(tmpi2, tmpi3);
40

  
41
    return _mm_packs_epi16(lo, hi);
42
}
43

  
44
DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD){
45
    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
46
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
47

  
48
    const __m128 scalar = _mm_set_ps1(float(scale_factor));
49

  
50
    #define convert_fc32_1_to_sc8_item32_1_bswap_guts(_al_)             \
51
    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
52
        /* load from input */                                           \
53
        __m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
54
        __m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
55
        __m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \
56
        __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \
57
                                                                        \
58
        /* convert */                                                   \
59
        const __m128i tmpi = pack_sc32_4x(tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(1, 0, 3, 2)); \
60
                                                                        \
61
        /* store to output */                                           \
62
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
63
    }                                                                   \
64

  
65
    size_t i = 0;
66

  
67
    //dispatch according to alignment
68
    if ((size_t(input) & 0xf) == 0){
69
        convert_fc32_1_to_sc8_item32_1_bswap_guts(_)
70
    }
71
    else{
72
        convert_fc32_1_to_sc8_item32_1_bswap_guts(u_)
73
    }
74

  
75
    //convert remainder
76
    xx_to_item32_sc8<uhd::htonx>(input+i, output+(i/2), nsamps-i, scale_factor);
77
}
78

  
79
DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD){
80
    const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
81
    item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
82

  
83
    const __m128 scalar = _mm_set_ps1(float(scale_factor));
84

  
85
    #define convert_fc32_1_to_sc8_item32_1_nswap_guts(_al_)             \
86
    for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){                       \
87
        /* load from input */                                           \
88
        __m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \
89
        __m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \
90
        __m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \
91
        __m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \
92
                                                                        \
93
        /* convert */                                                   \
94
        const __m128i tmpi = pack_sc32_4x(tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(2, 3, 0, 1)); \
95
                                                                        \
96
        /* store to output */                                           \
97
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi);  \
98
    }                                                                   \
99

  
100
    size_t i = 0;
101

  
102
    //dispatch according to alignment
103
    if ((size_t(input) & 0xf) == 0){
104
        convert_fc32_1_to_sc8_item32_1_nswap_guts(_)
105
    }
106
    else{
107
        convert_fc32_1_to_sc8_item32_1_nswap_guts(u_)
108
    }
109

  
110
    //convert remainder
111
    xx_to_item32_sc8<uhd::htowx>(input+i, output+(i/2), nsamps-i, scale_factor);
112
}
b/host/lib/convert/sse2_fc64_to_sc16.cpp
1
//
2
// Copyright 2011-2012 Ettus Research LLC
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff