Revision 4c244c78
| b/host/lib/convert/CMakeLists.txt | ||
|---|---|---|
| 71 | 71 |
|
| 72 | 72 |
IF(HAVE_EMMINTRIN_H) |
| 73 | 73 |
SET(convert_with_sse2_sources |
| 74 |
${CMAKE_CURRENT_SOURCE_DIR}/convert_fc32_with_sse2.cpp
|
|
| 75 |
${CMAKE_CURRENT_SOURCE_DIR}/convert_fc64_with_sse2.cpp
|
|
| 76 |
${CMAKE_CURRENT_SOURCE_DIR}/convert_fc32_to_sc8_with_sse2.cpp
|
|
| 77 |
${CMAKE_CURRENT_SOURCE_DIR}/convert_fc64_to_sc8_with_sse2.cpp
|
|
| 74 |
${CMAKE_CURRENT_SOURCE_DIR}/sse2_sc16_to_fc64.cpp
|
|
| 75 |
${CMAKE_CURRENT_SOURCE_DIR}/sse2_sc16_to_fc32.cpp
|
|
| 76 |
${CMAKE_CURRENT_SOURCE_DIR}/sse2_sc8_to_fc64.cpp
|
|
| 77 |
${CMAKE_CURRENT_SOURCE_DIR}/sse2_sc8_to_fc32.cpp
|
|
| 78 |
${CMAKE_CURRENT_SOURCE_DIR}/sse2_fc64_to_sc16.cpp
|
|
| 79 |
${CMAKE_CURRENT_SOURCE_DIR}/sse2_fc32_to_sc16.cpp
|
|
| 80 |
${CMAKE_CURRENT_SOURCE_DIR}/sse2_fc64_to_sc8.cpp
|
|
| 81 |
${CMAKE_CURRENT_SOURCE_DIR}/sse2_fc32_to_sc8.cpp
|
|
| 78 | 82 |
) |
| 79 | 83 |
SET_SOURCE_FILES_PROPERTIES( |
| 80 | 84 |
${convert_with_sse2_sources}
|
| ... | ... | |
| 117 | 121 |
LIBUHD_APPEND_SOURCES( |
| 118 | 122 |
${CMAKE_CURRENT_SOURCE_DIR}/convert_with_tables.cpp
|
| 119 | 123 |
${CMAKE_CURRENT_SOURCE_DIR}/convert_impl.cpp
|
| 124 |
${CMAKE_CURRENT_SOURCE_DIR}/convert_item32.cpp
|
|
| 120 | 125 |
) |
| b/host/lib/convert/convert_common.hpp | ||
|---|---|---|
| 53 | 53 |
|
| 54 | 54 |
#ifdef __ARM_NEON__ |
| 55 | 55 |
static const int PRIORITY_LIBORC = 3; |
| 56 |
static const int PRIORITY_SIMD = 1; //neon conversions could be implemented better, orc wins
|
|
| 57 |
static const int PRIORITY_TABLE = 2; //tables require large cache, so they are slower on arm
|
|
| 56 |
static const int PRIORITY_SIMD = 2; //neon conversions could be implemented better, orc wins
|
|
| 57 |
static const int PRIORITY_TABLE = 1; //tables require large cache, so they are slower on arm
|
|
| 58 | 58 |
#else |
| 59 |
static const int PRIORITY_LIBORC = 1;
|
|
| 59 |
static const int PRIORITY_LIBORC = 2;
|
|
| 60 | 60 |
static const int PRIORITY_SIMD = 3; |
| 61 |
static const int PRIORITY_TABLE = 2;
|
|
| 61 |
static const int PRIORITY_TABLE = 1;
|
|
| 62 | 62 |
#endif |
| 63 | 63 |
|
| 64 | 64 |
/*********************************************************************** |
| ... | ... | |
| 77 | 77 |
|
| 78 | 78 |
typedef boost::uint32_t item32_t; |
| 79 | 79 |
|
| 80 |
/*********************************************************************** |
|
| 81 |
* Convert complex short buffer to items32 sc16 |
|
| 82 |
**********************************************************************/ |
|
| 83 |
static UHD_INLINE item32_t sc16_to_item32_sc16(sc16_t num, double){
|
|
| 84 |
boost::uint16_t real = num.real(); |
|
| 85 |
boost::uint16_t imag = num.imag(); |
|
| 86 |
return (item32_t(real) << 16) | (item32_t(imag) << 0); |
|
| 87 |
} |
|
| 88 |
|
|
| 89 |
/*********************************************************************** |
|
| 90 |
* Convert items32 sc16 buffer to complex short |
|
| 91 |
**********************************************************************/ |
|
| 92 |
static UHD_INLINE sc16_t item32_sc16_to_sc16(item32_t item, double){
|
|
| 93 |
return sc16_t( |
|
| 94 |
boost::int16_t(item >> 16), |
|
| 95 |
boost::int16_t(item >> 0) |
|
| 96 |
); |
|
| 97 |
} |
|
| 80 |
typedef item32_t (*xtox_t)(item32_t); |
|
| 98 | 81 |
|
| 99 | 82 |
/*********************************************************************** |
| 100 |
* Convert complex float buffer to items32 sc16
|
|
| 83 |
* Convert xx to items32 sc16 buffer
|
|
| 101 | 84 |
**********************************************************************/ |
| 102 |
static UHD_INLINE item32_t fc32_to_item32_sc16(fc32_t num, double scale_factor){
|
|
| 85 |
template <typename T> UHD_INLINE item32_t xx_to_item32_sc16_x1( |
|
| 86 |
const std::complex<T> &num, const double scale_factor |
|
| 87 |
){
|
|
| 103 | 88 |
boost::uint16_t real = boost::int16_t(num.real()*float(scale_factor)); |
| 104 | 89 |
boost::uint16_t imag = boost::int16_t(num.imag()*float(scale_factor)); |
| 105 | 90 |
return (item32_t(real) << 16) | (item32_t(imag) << 0); |
| 106 | 91 |
} |
| 107 | 92 |
|
| 108 |
/*********************************************************************** |
|
| 109 |
* Convert items32 sc16 buffer to complex float |
|
| 110 |
**********************************************************************/ |
|
| 111 |
static UHD_INLINE fc32_t item32_sc16_to_fc32(item32_t item, double scale_factor){
|
|
| 112 |
return fc32_t( |
|
| 113 |
float(boost::int16_t(item >> 16)*float(scale_factor)), |
|
| 114 |
float(boost::int16_t(item >> 0)*float(scale_factor)) |
|
| 115 |
); |
|
| 116 |
} |
|
| 117 |
|
|
| 118 |
/*********************************************************************** |
|
| 119 |
* Convert complex double buffer to items32 sc16 |
|
| 120 |
**********************************************************************/ |
|
| 121 |
static UHD_INLINE item32_t fc64_to_item32_sc16(fc64_t num, double scale_factor){
|
|
| 122 |
boost::uint16_t real = boost::int16_t(num.real()*scale_factor); |
|
| 123 |
boost::uint16_t imag = boost::int16_t(num.imag()*scale_factor); |
|
| 93 |
template <> UHD_INLINE item32_t xx_to_item32_sc16_x1( |
|
| 94 |
const sc16_t &num, const double |
|
| 95 |
){
|
|
| 96 |
boost::uint16_t real = boost::int16_t(num.real()); |
|
| 97 |
boost::uint16_t imag = boost::int16_t(num.imag()); |
|
| 124 | 98 |
return (item32_t(real) << 16) | (item32_t(imag) << 0); |
| 125 | 99 |
} |
| 126 | 100 |
|
| 127 |
/*********************************************************************** |
|
| 128 |
* Convert items32 sc16 buffer to complex double |
|
| 129 |
**********************************************************************/ |
|
| 130 |
static UHD_INLINE fc64_t item32_sc16_to_fc64(item32_t item, double scale_factor){
|
|
| 131 |
return fc64_t( |
|
| 132 |
float(boost::int16_t(item >> 16)*scale_factor), |
|
| 133 |
float(boost::int16_t(item >> 0)*scale_factor) |
|
| 134 |
); |
|
| 101 |
template <xtox_t to_wire, typename T> |
|
| 102 |
UHD_INLINE void xx_to_item32_sc16( |
|
| 103 |
const std::complex<T> *input, |
|
| 104 |
item32_t *output, |
|
| 105 |
const size_t nsamps, |
|
| 106 |
const double scale_factor |
|
| 107 |
){
|
|
| 108 |
for (size_t i = 0; i < nsamps; i++){
|
|
| 109 |
const item32_t item = xx_to_item32_sc16_x1(input[i], scale_factor); |
|
| 110 |
output[i] = to_wire(item); |
|
| 111 |
} |
|
| 135 | 112 |
} |
| 136 | 113 |
|
| 137 | 114 |
/*********************************************************************** |
| 138 |
* Convert items32 sc8 buffer to complex char
|
|
| 115 |
* Convert items32 sc16 buffer to xx
|
|
| 139 | 116 |
**********************************************************************/ |
| 140 |
static UHD_INLINE void item32_sc8_to_sc8(item32_t item, sc8_t &out0, sc8_t &out1, double){
|
|
| 141 |
out0 = sc8_t( |
|
| 142 |
boost::int8_t(item >> 8), |
|
| 143 |
boost::int8_t(item >> 0) |
|
| 144 |
); |
|
| 145 |
out1 = sc8_t( |
|
| 146 |
boost::int8_t(item >> 24), |
|
| 147 |
boost::int8_t(item >> 16) |
|
| 117 |
template <typename T> UHD_INLINE std::complex<T> item32_sc16_x1_to_xx( |
|
| 118 |
const item32_t item, const double scale_factor |
|
| 119 |
){
|
|
| 120 |
return std::complex<T>( |
|
| 121 |
T(boost::int16_t(item >> 16)*float(scale_factor)), |
|
| 122 |
T(boost::int16_t(item >> 0)*float(scale_factor)) |
|
| 148 | 123 |
); |
| 149 | 124 |
} |
| 150 | 125 |
|
| 151 |
/*********************************************************************** |
|
| 152 |
* Convert items32 sc8 buffer to complex short |
|
| 153 |
**********************************************************************/ |
|
| 154 |
static UHD_INLINE void item32_sc8_to_sc16(item32_t item, sc16_t &out0, sc16_t &out1, double){
|
|
| 155 |
out0 = sc16_t( |
|
| 156 |
boost::int8_t(item >> 8), |
|
| 157 |
boost::int8_t(item >> 0) |
|
| 158 |
); |
|
| 159 |
out1 = sc16_t( |
|
| 160 |
boost::int8_t(item >> 24), |
|
| 161 |
boost::int8_t(item >> 16) |
|
| 162 |
); |
|
| 163 |
} |
|
| 164 |
|
|
| 165 |
/*********************************************************************** |
|
| 166 |
* Convert items32 sc8 buffer to complex float |
|
| 167 |
**********************************************************************/ |
|
| 168 |
static UHD_INLINE void item32_sc8_to_fc32(item32_t item, fc32_t &out0, fc32_t &out1, double scale_factor){
|
|
| 169 |
out0 = fc32_t( |
|
| 170 |
float(boost::int8_t(item >> 8)*float(scale_factor)), |
|
| 171 |
float(boost::int8_t(item >> 0)*float(scale_factor)) |
|
| 172 |
); |
|
| 173 |
out1 = fc32_t( |
|
| 174 |
float(boost::int8_t(item >> 24)*float(scale_factor)), |
|
| 175 |
float(boost::int8_t(item >> 16)*float(scale_factor)) |
|
| 126 |
template <> UHD_INLINE sc16_t item32_sc16_x1_to_xx( |
|
| 127 |
const item32_t item, const double |
|
| 128 |
){
|
|
| 129 |
return sc16_t( |
|
| 130 |
boost::int16_t(item >> 16), boost::int16_t(item >> 0) |
|
| 176 | 131 |
); |
| 177 | 132 |
} |
| 178 | 133 |
|
| 179 |
/*********************************************************************** |
|
| 180 |
* Convert items32 sc8 buffer to complex double |
|
| 181 |
**********************************************************************/ |
|
| 182 |
static UHD_INLINE void item32_sc8_to_fc64(item32_t item, fc64_t &out0, fc64_t &out1, double scale_factor){
|
|
| 183 |
out0 = fc64_t( |
|
| 184 |
float(boost::int8_t(item >> 8)*scale_factor), |
|
| 185 |
float(boost::int8_t(item >> 0)*scale_factor) |
|
| 186 |
); |
|
| 187 |
out1 = fc64_t( |
|
| 188 |
float(boost::int8_t(item >> 24)*scale_factor), |
|
| 189 |
float(boost::int8_t(item >> 16)*scale_factor) |
|
| 190 |
); |
|
| 134 |
template <xtox_t to_host, typename T> |
|
| 135 |
UHD_INLINE void item32_sc16_to_xx( |
|
| 136 |
const item32_t *input, |
|
| 137 |
std::complex<T> *output, |
|
| 138 |
const size_t nsamps, |
|
| 139 |
const double scale_factor |
|
| 140 |
){
|
|
| 141 |
for (size_t i = 0; i < nsamps; i++){
|
|
| 142 |
const item32_t item_i = to_host(input[i]); |
|
| 143 |
output[i] = item32_sc16_x1_to_xx<T>(item_i, scale_factor); |
|
| 144 |
} |
|
| 191 | 145 |
} |
| 192 | 146 |
|
| 193 | 147 |
/*********************************************************************** |
| 194 |
* Convert complex char to items32 sc8 buffer
|
|
| 148 |
* Convert xx to items32 sc8 buffer
|
|
| 195 | 149 |
**********************************************************************/ |
| 196 |
static UHD_INLINE item32_t sc8_to_item32_sc8(sc8_t in0, sc8_t in1, double){
|
|
| 197 |
boost::uint8_t real0 = boost::int8_t(in0.real()); |
|
| 198 |
boost::uint8_t imag0 = boost::int8_t(in0.imag()); |
|
| 199 |
boost::uint8_t real1 = boost::int8_t(in1.real()); |
|
| 200 |
boost::uint8_t imag1 = boost::int8_t(in1.imag()); |
|
| 150 |
template <typename T> UHD_INLINE item32_t xx_to_item32_sc8_x1( |
|
| 151 |
const std::complex<T> &in0, const std::complex<T> &in1, const double scale_factor |
|
| 152 |
){
|
|
| 201 | 153 |
return |
| 202 |
(item32_t(real0) << 8) | (item32_t(imag0) << 0) | |
|
| 203 |
(item32_t(real1) << 24) | (item32_t(imag1) << 16) |
|
| 154 |
(item32_t(boost::uint8_t(in0.real()*float(scale_factor))) << 8) | |
|
| 155 |
(item32_t(boost::uint8_t(in0.imag()*float(scale_factor))) << 0) | |
|
| 156 |
(item32_t(boost::uint8_t(in1.real()*float(scale_factor))) << 24) | |
|
| 157 |
(item32_t(boost::uint8_t(in1.imag()*float(scale_factor))) << 16) |
|
| 204 | 158 |
; |
| 205 | 159 |
} |
| 206 | 160 |
|
| 207 |
/*********************************************************************** |
|
| 208 |
* Convert complex short to items32 sc8 buffer |
|
| 209 |
**********************************************************************/ |
|
| 210 |
static UHD_INLINE item32_t sc16_to_item32_sc8(sc16_t in0, sc16_t in1, double){
|
|
| 211 |
boost::uint8_t real0 = boost::int8_t(in0.real()); |
|
| 212 |
boost::uint8_t imag0 = boost::int8_t(in0.imag()); |
|
| 213 |
boost::uint8_t real1 = boost::int8_t(in1.real()); |
|
| 214 |
boost::uint8_t imag1 = boost::int8_t(in1.imag()); |
|
| 161 |
template <> UHD_INLINE item32_t xx_to_item32_sc8_x1( |
|
| 162 |
const sc16_t &in0, const sc16_t &in1, const double |
|
| 163 |
){
|
|
| 215 | 164 |
return |
| 216 |
(item32_t(real0) << 8) | (item32_t(imag0) << 0) | |
|
| 217 |
(item32_t(real1) << 24) | (item32_t(imag1) << 16) |
|
| 165 |
(item32_t(boost::uint8_t(in0.real())) << 8) | |
|
| 166 |
(item32_t(boost::uint8_t(in0.imag())) << 0) | |
|
| 167 |
(item32_t(boost::uint8_t(in1.real())) << 24) | |
|
| 168 |
(item32_t(boost::uint8_t(in1.imag())) << 16) |
|
| 218 | 169 |
; |
| 219 | 170 |
} |
| 220 | 171 |
|
| 221 |
/*********************************************************************** |
|
| 222 |
* Convert complex float to items32 sc8 buffer |
|
| 223 |
**********************************************************************/ |
|
| 224 |
static UHD_INLINE item32_t fc32_to_item32_sc8(fc32_t in0, fc32_t in1, double scale_factor){
|
|
| 225 |
boost::uint8_t real0 = boost::int8_t(in0.real()*float(scale_factor)); |
|
| 226 |
boost::uint8_t imag0 = boost::int8_t(in0.imag()*float(scale_factor)); |
|
| 227 |
boost::uint8_t real1 = boost::int8_t(in1.real()*float(scale_factor)); |
|
| 228 |
boost::uint8_t imag1 = boost::int8_t(in1.imag()*float(scale_factor)); |
|
| 229 |
return |
|
| 230 |
(item32_t(real0) << 8) | (item32_t(imag0) << 0) | |
|
| 231 |
(item32_t(real1) << 24) | (item32_t(imag1) << 16) |
|
| 232 |
; |
|
| 172 |
template <xtox_t to_wire, typename T> |
|
| 173 |
UHD_INLINE void xx_to_item32_sc8( |
|
| 174 |
const std::complex<T> *input, |
|
| 175 |
item32_t *output, |
|
| 176 |
const size_t nsamps, |
|
| 177 |
const double scale_factor |
|
| 178 |
){
|
|
| 179 |
const size_t num_pairs = nsamps/2; |
|
| 180 |
for (size_t i = 0, j = 0; i < num_pairs; i++, j+=2){
|
|
| 181 |
const item32_t item = xx_to_item32_sc8_x1(input[j], input[j+1], scale_factor); |
|
| 182 |
output[i] = to_wire(item); |
|
| 183 |
} |
|
| 184 |
|
|
| 185 |
if (nsamps != num_pairs*2){
|
|
| 186 |
const item32_t item = xx_to_item32_sc8_x1(input[nsamps-1], std::complex<T>(0), scale_factor); |
|
| 187 |
output[num_pairs] = to_wire(item); |
|
| 188 |
} |
|
| 233 | 189 |
} |
| 234 | 190 |
|
| 235 | 191 |
/*********************************************************************** |
| 236 |
* Convert complex double to items32 sc8 buffer
|
|
| 192 |
* Convert items32 sc8 buffer to xx
|
|
| 237 | 193 |
**********************************************************************/ |
| 238 |
static UHD_INLINE item32_t fc64_to_item32_sc8(fc64_t in0, fc64_t in1, double scale_factor){
|
|
| 239 |
boost::uint8_t real0 = boost::int8_t(in0.real()*(scale_factor)); |
|
| 240 |
boost::uint8_t imag0 = boost::int8_t(in0.imag()*(scale_factor)); |
|
| 241 |
boost::uint8_t real1 = boost::int8_t(in1.real()*(scale_factor)); |
|
| 242 |
boost::uint8_t imag1 = boost::int8_t(in1.imag()*(scale_factor)); |
|
| 243 |
return |
|
| 244 |
(item32_t(real0) << 8) | (item32_t(imag0) << 0) | |
|
| 245 |
(item32_t(real1) << 24) | (item32_t(imag1) << 16) |
|
| 246 |
; |
|
| 194 |
template <typename T> UHD_INLINE void item32_sc8_x1_to_xx( |
|
| 195 |
const item32_t item, std::complex<T> &out0, std::complex<T> &out1, const double scale_factor |
|
| 196 |
){
|
|
| 197 |
out0 = std::complex<T>( |
|
| 198 |
T(boost::int8_t(item >> 8)*float(scale_factor)), |
|
| 199 |
T(boost::int8_t(item >> 0)*float(scale_factor)) |
|
| 200 |
); |
|
| 201 |
out1 = std::complex<T>( |
|
| 202 |
T(boost::int8_t(item >> 24)*float(scale_factor)), |
|
| 203 |
T(boost::int8_t(item >> 16)*float(scale_factor)) |
|
| 204 |
); |
|
| 205 |
} |
|
| 206 |
|
|
| 207 |
template <> UHD_INLINE void item32_sc8_x1_to_xx( |
|
| 208 |
const item32_t item, sc16_t &out0, sc16_t &out1, const double |
|
| 209 |
){
|
|
| 210 |
out0 = sc16_t( |
|
| 211 |
boost::int16_t(boost::int8_t(item >> 8)), |
|
| 212 |
boost::int16_t(boost::int8_t(item >> 0)) |
|
| 213 |
); |
|
| 214 |
out1 = sc16_t( |
|
| 215 |
boost::int16_t(boost::int8_t(item >> 24)), |
|
| 216 |
boost::int16_t(boost::int8_t(item >> 16)) |
|
| 217 |
); |
|
| 218 |
} |
|
| 219 |
|
|
| 220 |
template <xtox_t to_host, typename T> |
|
| 221 |
UHD_INLINE void item32_sc8_to_xx( |
|
| 222 |
const item32_t *input, |
|
| 223 |
std::complex<T> *output, |
|
| 224 |
const size_t nsamps, |
|
| 225 |
const double scale_factor |
|
| 226 |
){
|
|
| 227 |
input = reinterpret_cast<const item32_t *>(size_t(input) & ~0x3); |
|
| 228 |
std::complex<T> dummy; |
|
| 229 |
size_t num_samps = nsamps; |
|
| 230 |
|
|
| 231 |
if ((size_t(input) & 0x3) != 0){
|
|
| 232 |
const item32_t item0 = to_host(*input++); |
|
| 233 |
item32_sc8_x1_to_xx(item0, dummy, *output++, scale_factor); |
|
| 234 |
num_samps--; |
|
| 235 |
} |
|
| 236 |
|
|
| 237 |
const size_t num_pairs = num_samps/2; |
|
| 238 |
for (size_t i = 0, j = 0; i < num_pairs; i++, j+=2){
|
|
| 239 |
const item32_t item_i = to_host(input[i]); |
|
| 240 |
item32_sc8_x1_to_xx(item_i, output[j], output[j+1], scale_factor); |
|
| 241 |
} |
|
| 242 |
|
|
| 243 |
if (num_samps != num_pairs*2){
|
|
| 244 |
const item32_t item_n = to_host(input[num_pairs]); |
|
| 245 |
item32_sc8_x1_to_xx(item_n, output[num_samps-1], dummy, scale_factor); |
|
| 246 |
} |
|
| 247 | 247 |
} |
| 248 | 248 |
|
| 249 | 249 |
#endif /* INCLUDED_LIBUHD_CONVERT_COMMON_HPP */ |
| /dev/null | ||
|---|---|---|
| 1 |
// |
|
| 2 |
// Copyright 2012 Ettus Research LLC |
|
| 3 |
// |
|
| 4 |
// This program is free software: you can redistribute it and/or modify |
|
| 5 |
// it under the terms of the GNU General Public License as published by |
|
| 6 |
// the Free Software Foundation, either version 3 of the License, or |
|
| 7 |
// (at your option) any later version. |
|
| 8 |
// |
|
| 9 |
// This program is distributed in the hope that it will be useful, |
|
| 10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 12 |
// GNU General Public License for more details. |
|
| 13 |
// |
|
| 14 |
// You should have received a copy of the GNU General Public License |
|
| 15 |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
| 16 |
// |
|
| 17 |
|
|
| 18 |
#include "convert_common.hpp" |
|
| 19 |
#include <uhd/utils/byteswap.hpp> |
|
| 20 |
#include <emmintrin.h> |
|
| 21 |
|
|
| 22 |
using namespace uhd::convert; |
|
| 23 |
|
|
| 24 |
UHD_INLINE __m128i pack_sc32_4x_be( |
|
| 25 |
const __m128 &in0, const __m128 &in1, |
|
| 26 |
const __m128 &in2, const __m128 &in3, |
|
| 27 |
const __m128 &scalar |
|
| 28 |
){
|
|
| 29 |
__m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar)); |
|
| 30 |
tmpi0 = _mm_shuffle_epi32(tmpi0, _MM_SHUFFLE(1, 0, 3, 2)); |
|
| 31 |
__m128i tmpi1 = _mm_cvtps_epi32(_mm_mul_ps(in1, scalar)); |
|
| 32 |
tmpi1 = _mm_shuffle_epi32(tmpi1, _MM_SHUFFLE(1, 0, 3, 2)); |
|
| 33 |
const __m128i lo = _mm_packs_epi32(tmpi0, tmpi1); |
|
| 34 |
|
|
| 35 |
__m128i tmpi2 = _mm_cvtps_epi32(_mm_mul_ps(in2, scalar)); |
|
| 36 |
tmpi2 = _mm_shuffle_epi32(tmpi2, _MM_SHUFFLE(1, 0, 3, 2)); |
|
| 37 |
__m128i tmpi3 = _mm_cvtps_epi32(_mm_mul_ps(in3, scalar)); |
|
| 38 |
tmpi3 = _mm_shuffle_epi32(tmpi3, _MM_SHUFFLE(1, 0, 3, 2)); |
|
| 39 |
const __m128i hi = _mm_packs_epi32(tmpi2, tmpi3); |
|
| 40 |
|
|
| 41 |
return _mm_packs_epi16(lo, hi); |
|
| 42 |
} |
|
| 43 |
|
|
| 44 |
UHD_INLINE __m128i pack_sc32_4x_le( |
|
| 45 |
const __m128 &in0, const __m128 &in1, |
|
| 46 |
const __m128 &in2, const __m128 &in3, |
|
| 47 |
const __m128 &scalar |
|
| 48 |
){
|
|
| 49 |
__m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar)); |
|
| 50 |
tmpi0 = _mm_shuffle_epi32(tmpi0, _MM_SHUFFLE(2, 3, 0, 1)); |
|
| 51 |
__m128i tmpi1 = _mm_cvtps_epi32(_mm_mul_ps(in1, scalar)); |
|
| 52 |
tmpi1 = _mm_shuffle_epi32(tmpi1, _MM_SHUFFLE(2, 3, 0, 1)); |
|
| 53 |
const __m128i lo = _mm_packs_epi32(tmpi0, tmpi1); |
|
| 54 |
|
|
| 55 |
__m128i tmpi2 = _mm_cvtps_epi32(_mm_mul_ps(in2, scalar)); |
|
| 56 |
tmpi2 = _mm_shuffle_epi32(tmpi2, _MM_SHUFFLE(2, 3, 0, 1)); |
|
| 57 |
__m128i tmpi3 = _mm_cvtps_epi32(_mm_mul_ps(in3, scalar)); |
|
| 58 |
tmpi3 = _mm_shuffle_epi32(tmpi3, _MM_SHUFFLE(2, 3, 0, 1)); |
|
| 59 |
const __m128i hi = _mm_packs_epi32(tmpi2, tmpi3); |
|
| 60 |
|
|
| 61 |
return _mm_packs_epi16(lo, hi); |
|
| 62 |
} |
|
| 63 |
|
|
| 64 |
DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD){
|
|
| 65 |
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); |
|
| 66 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 67 |
|
|
| 68 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)); |
|
| 69 |
|
|
| 70 |
#define convert_fc32_1_to_sc8_item32_1_bswap_guts(_al_) \ |
|
| 71 |
for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){ \
|
|
| 72 |
/* load from input */ \ |
|
| 73 |
__m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ |
|
| 74 |
__m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ |
|
| 75 |
__m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \ |
|
| 76 |
__m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \ |
|
| 77 |
\ |
|
| 78 |
/* convert */ \ |
|
| 79 |
const __m128i tmpi = pack_sc32_4x_be(tmp0, tmp1, tmp2, tmp3, scalar); \ |
|
| 80 |
\ |
|
| 81 |
/* store to output */ \ |
|
| 82 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi); \ |
|
| 83 |
} \ |
|
| 84 |
|
|
| 85 |
size_t i = 0; |
|
| 86 |
|
|
| 87 |
//dispatch according to alignment |
|
| 88 |
if ((size_t(input) & 0xf) == 0){
|
|
| 89 |
convert_fc32_1_to_sc8_item32_1_bswap_guts(_) |
|
| 90 |
} |
|
| 91 |
else{
|
|
| 92 |
convert_fc32_1_to_sc8_item32_1_bswap_guts(u_) |
|
| 93 |
} |
|
| 94 |
|
|
| 95 |
//convert remainder |
|
| 96 |
const size_t num_pairs = nsamps/2; |
|
| 97 |
for (size_t j = i/2; j < num_pairs; j++, i+=2){
|
|
| 98 |
const item32_t item = fc32_to_item32_sc8(input[i], input[i+1], scale_factor); |
|
| 99 |
output[j] = uhd::byteswap(item); |
|
| 100 |
} |
|
| 101 |
|
|
| 102 |
if (nsamps != num_pairs*2){
|
|
| 103 |
const item32_t item = fc32_to_item32_sc8(input[nsamps-1], 0, scale_factor); |
|
| 104 |
output[num_pairs] = uhd::byteswap(item); |
|
| 105 |
} |
|
| 106 |
} |
|
| 107 |
|
|
| 108 |
DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD){
|
|
| 109 |
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); |
|
| 110 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 111 |
|
|
| 112 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)); |
|
| 113 |
|
|
| 114 |
#define convert_fc32_1_to_sc8_item32_1_nswap_guts(_al_) \ |
|
| 115 |
for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){ \
|
|
| 116 |
/* load from input */ \ |
|
| 117 |
__m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ |
|
| 118 |
__m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ |
|
| 119 |
__m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \ |
|
| 120 |
__m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \ |
|
| 121 |
\ |
|
| 122 |
/* convert */ \ |
|
| 123 |
const __m128i tmpi = pack_sc32_4x_le(tmp0, tmp1, tmp2, tmp3, scalar); \ |
|
| 124 |
\ |
|
| 125 |
/* store to output */ \ |
|
| 126 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi); \ |
|
| 127 |
} \ |
|
| 128 |
|
|
| 129 |
size_t i = 0; |
|
| 130 |
|
|
| 131 |
//dispatch according to alignment |
|
| 132 |
if ((size_t(input) & 0xf) == 0){
|
|
| 133 |
convert_fc32_1_to_sc8_item32_1_nswap_guts(_) |
|
| 134 |
} |
|
| 135 |
else{
|
|
| 136 |
convert_fc32_1_to_sc8_item32_1_nswap_guts(u_) |
|
| 137 |
} |
|
| 138 |
|
|
| 139 |
//convert remainder |
|
| 140 |
const size_t num_pairs = nsamps/2; |
|
| 141 |
for (size_t j = i/2; j < num_pairs; j++, i+=2){
|
|
| 142 |
const item32_t item = fc32_to_item32_sc8(input[i], input[i+1], scale_factor); |
|
| 143 |
output[j] = (item); |
|
| 144 |
} |
|
| 145 |
|
|
| 146 |
if (nsamps != num_pairs*2){
|
|
| 147 |
const item32_t item = fc32_to_item32_sc8(input[nsamps-1], 0, scale_factor); |
|
| 148 |
output[num_pairs] = (item); |
|
| 149 |
} |
|
| 150 |
} |
|
| /dev/null | ||
|---|---|---|
| 1 |
// |
|
| 2 |
// Copyright 2011 Ettus Research LLC |
|
| 3 |
// |
|
| 4 |
// This program is free software: you can redistribute it and/or modify |
|
| 5 |
// it under the terms of the GNU General Public License as published by |
|
| 6 |
// the Free Software Foundation, either version 3 of the License, or |
|
| 7 |
// (at your option) any later version. |
|
| 8 |
// |
|
| 9 |
// This program is distributed in the hope that it will be useful, |
|
| 10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 12 |
// GNU General Public License for more details. |
|
| 13 |
// |
|
| 14 |
// You should have received a copy of the GNU General Public License |
|
| 15 |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
| 16 |
// |
|
| 17 |
|
|
| 18 |
#include "convert_common.hpp" |
|
| 19 |
#include <uhd/utils/byteswap.hpp> |
|
| 20 |
#include <emmintrin.h> |
|
| 21 |
|
|
| 22 |
using namespace uhd::convert; |
|
| 23 |
|
|
| 24 |
DECLARE_CONVERTER(fc32, 1, sc16_item32_le, 1, PRIORITY_SIMD){
|
|
| 25 |
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); |
|
| 26 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 27 |
|
|
| 28 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)); |
|
| 29 |
|
|
| 30 |
#define convert_fc32_1_to_item32_1_nswap_guts(_al_) \ |
|
| 31 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 32 |
/* load from input */ \ |
|
| 33 |
__m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ |
|
| 34 |
__m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ |
|
| 35 |
\ |
|
| 36 |
/* convert and scale */ \ |
|
| 37 |
__m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ |
|
| 38 |
__m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ |
|
| 39 |
\ |
|
| 40 |
/* pack + swap 16-bit pairs */ \ |
|
| 41 |
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ |
|
| 42 |
tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 43 |
tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 44 |
\ |
|
| 45 |
/* store to output */ \ |
|
| 46 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ |
|
| 47 |
} \ |
|
| 48 |
|
|
| 49 |
size_t i = 0; |
|
| 50 |
|
|
| 51 |
//dispatch according to alignment |
|
| 52 |
switch (size_t(input) & 0xf){
|
|
| 53 |
case 0x8: |
|
| 54 |
output[i] = fc32_to_item32_sc16(input[i], float(scale_factor)); i++; |
|
| 55 |
case 0x0: |
|
| 56 |
convert_fc32_1_to_item32_1_nswap_guts(_) |
|
| 57 |
break; |
|
| 58 |
default: convert_fc32_1_to_item32_1_nswap_guts(u_) |
|
| 59 |
} |
|
| 60 |
|
|
| 61 |
//convert remainder |
|
| 62 |
for (; i < nsamps; i++){
|
|
| 63 |
output[i] = fc32_to_item32_sc16(input[i], float(scale_factor)); |
|
| 64 |
} |
|
| 65 |
} |
|
| 66 |
|
|
| 67 |
DECLARE_CONVERTER(fc32, 1, sc16_item32_be, 1, PRIORITY_SIMD){
|
|
| 68 |
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); |
|
| 69 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 70 |
|
|
| 71 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)); |
|
| 72 |
|
|
| 73 |
#define convert_fc32_1_to_item32_1_bswap_guts(_al_) \ |
|
| 74 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 75 |
/* load from input */ \ |
|
| 76 |
__m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ |
|
| 77 |
__m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ |
|
| 78 |
\ |
|
| 79 |
/* convert and scale */ \ |
|
| 80 |
__m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ |
|
| 81 |
__m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ |
|
| 82 |
\ |
|
| 83 |
/* pack + byteswap -> byteswap 16 bit words */ \ |
|
| 84 |
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ |
|
| 85 |
tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ |
|
| 86 |
\ |
|
| 87 |
/* store to output */ \ |
|
| 88 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ |
|
| 89 |
} \ |
|
| 90 |
|
|
| 91 |
size_t i = 0; |
|
| 92 |
|
|
| 93 |
//dispatch according to alignment |
|
| 94 |
switch (size_t(input) & 0xf){
|
|
| 95 |
case 0x8: |
|
| 96 |
output[i] = uhd::byteswap(fc32_to_item32_sc16(input[i], float(scale_factor))); i++; |
|
| 97 |
case 0x0: |
|
| 98 |
convert_fc32_1_to_item32_1_bswap_guts(_) |
|
| 99 |
break; |
|
| 100 |
default: convert_fc32_1_to_item32_1_bswap_guts(u_) |
|
| 101 |
} |
|
| 102 |
|
|
| 103 |
//convert remainder |
|
| 104 |
for (; i < nsamps; i++){
|
|
| 105 |
output[i] = uhd::byteswap(fc32_to_item32_sc16(input[i], float(scale_factor))); |
|
| 106 |
} |
|
| 107 |
} |
|
| 108 |
|
|
| 109 |
DECLARE_CONVERTER(sc16_item32_le, 1, fc32, 1, PRIORITY_SIMD){
|
|
| 110 |
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); |
|
| 111 |
fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); |
|
| 112 |
|
|
| 113 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); |
|
| 114 |
const __m128i zeroi = _mm_setzero_si128(); |
|
| 115 |
|
|
| 116 |
#define convert_item32_1_to_fc32_1_nswap_guts(_al_) \ |
|
| 117 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 118 |
/* load from input */ \ |
|
| 119 |
__m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ |
|
| 120 |
\ |
|
| 121 |
/* unpack + swap 16-bit pairs */ \ |
|
| 122 |
tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 123 |
tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 124 |
__m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ |
|
| 125 |
__m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ |
|
| 126 |
\ |
|
| 127 |
/* convert and scale */ \ |
|
| 128 |
__m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \ |
|
| 129 |
__m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \ |
|
| 130 |
\ |
|
| 131 |
/* store to output */ \ |
|
| 132 |
_mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \ |
|
| 133 |
_mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \ |
|
| 134 |
} \ |
|
| 135 |
|
|
| 136 |
size_t i = 0; |
|
| 137 |
|
|
| 138 |
//dispatch according to alignment |
|
| 139 |
switch (size_t(output) & 0xf){
|
|
| 140 |
case 0x8: |
|
| 141 |
output[i] = item32_sc16_to_fc32(input[i], float(scale_factor)); i++; |
|
| 142 |
case 0x0: |
|
| 143 |
convert_item32_1_to_fc32_1_nswap_guts(_) |
|
| 144 |
break; |
|
| 145 |
default: convert_item32_1_to_fc32_1_nswap_guts(u_) |
|
| 146 |
} |
|
| 147 |
|
|
| 148 |
//convert remainder |
|
| 149 |
for (; i < nsamps; i++){
|
|
| 150 |
output[i] = item32_sc16_to_fc32(input[i], float(scale_factor)); |
|
| 151 |
} |
|
| 152 |
} |
|
| 153 |
|
|
| 154 |
DECLARE_CONVERTER(sc16_item32_be, 1, fc32, 1, PRIORITY_SIMD){
|
|
| 155 |
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); |
|
| 156 |
fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); |
|
| 157 |
|
|
| 158 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); |
|
| 159 |
const __m128i zeroi = _mm_setzero_si128(); |
|
| 160 |
|
|
| 161 |
#define convert_item32_1_to_fc32_1_bswap_guts(_al_) \ |
|
| 162 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 163 |
/* load from input */ \ |
|
| 164 |
__m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ |
|
| 165 |
\ |
|
| 166 |
/* byteswap + unpack -> byteswap 16 bit words */ \ |
|
| 167 |
tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ |
|
| 168 |
__m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ |
|
| 169 |
__m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ |
|
| 170 |
\ |
|
| 171 |
/* convert and scale */ \ |
|
| 172 |
__m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \ |
|
| 173 |
__m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \ |
|
| 174 |
\ |
|
| 175 |
/* store to output */ \ |
|
| 176 |
_mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+0), tmplo); \ |
|
| 177 |
_mm_store ## _al_ ## ps(reinterpret_cast<float *>(output+i+2), tmphi); \ |
|
| 178 |
} \ |
|
| 179 |
|
|
| 180 |
size_t i = 0; |
|
| 181 |
|
|
| 182 |
//dispatch according to alignment |
|
| 183 |
switch (size_t(output) & 0xf){
|
|
| 184 |
case 0x8: |
|
| 185 |
output[i] = item32_sc16_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++; |
|
| 186 |
case 0x0: |
|
| 187 |
convert_item32_1_to_fc32_1_bswap_guts(_) |
|
| 188 |
break; |
|
| 189 |
default: convert_item32_1_to_fc32_1_bswap_guts(u_) |
|
| 190 |
} |
|
| 191 |
|
|
| 192 |
//convert remainder |
|
| 193 |
for (; i < nsamps; i++){
|
|
| 194 |
output[i] = item32_sc16_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); |
|
| 195 |
} |
|
| 196 |
} |
|
| /dev/null | ||
|---|---|---|
| 1 |
// |
|
| 2 |
// Copyright 2012 Ettus Research LLC |
|
| 3 |
// |
|
| 4 |
// This program is free software: you can redistribute it and/or modify |
|
| 5 |
// it under the terms of the GNU General Public License as published by |
|
| 6 |
// the Free Software Foundation, either version 3 of the License, or |
|
| 7 |
// (at your option) any later version. |
|
| 8 |
// |
|
| 9 |
// This program is distributed in the hope that it will be useful, |
|
| 10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 12 |
// GNU General Public License for more details. |
|
| 13 |
// |
|
| 14 |
// You should have received a copy of the GNU General Public License |
|
| 15 |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
| 16 |
// |
|
| 17 |
|
|
| 18 |
#include "convert_common.hpp" |
|
| 19 |
#include <uhd/utils/byteswap.hpp> |
|
| 20 |
#include <emmintrin.h> |
|
| 21 |
|
|
| 22 |
using namespace uhd::convert; |
|
| 23 |
|
|
| 24 |
UHD_INLINE __m128i pack_sc8_item32_4x( |
|
| 25 |
const __m128i &in0, const __m128i &in1, |
|
| 26 |
const __m128i &in2, const __m128i &in3 |
|
| 27 |
){
|
|
| 28 |
const __m128i lo = _mm_packs_epi32(in0, in1); |
|
| 29 |
const __m128i hi = _mm_packs_epi32(in2, in3); |
|
| 30 |
return _mm_packs_epi16(lo, hi); |
|
| 31 |
} |
|
| 32 |
|
|
| 33 |
UHD_INLINE __m128i pack_sc32_4x_be( |
|
| 34 |
const __m128d &lo, const __m128d &hi, |
|
| 35 |
const __m128d &scalar |
|
| 36 |
){
|
|
| 37 |
const __m128i tmpi_lo = _mm_cvttpd_epi32(_mm_mul_pd(hi, scalar)); |
|
| 38 |
const __m128i tmpi_hi = _mm_cvttpd_epi32(_mm_mul_pd(lo, scalar)); |
|
| 39 |
return _mm_unpacklo_epi64(tmpi_lo, tmpi_hi); |
|
| 40 |
} |
|
| 41 |
|
|
| 42 |
UHD_INLINE __m128i pack_sc32_4x_le( |
|
| 43 |
const __m128d &lo, const __m128d &hi, |
|
| 44 |
const __m128d &scalar |
|
| 45 |
){
|
|
| 46 |
const __m128i tmpi_lo = _mm_cvttpd_epi32(_mm_mul_pd(lo, scalar)); |
|
| 47 |
const __m128i tmpi_hi = _mm_cvttpd_epi32(_mm_mul_pd(hi, scalar)); |
|
| 48 |
const __m128i tmpi = _mm_unpacklo_epi64(tmpi_lo, tmpi_hi); |
|
| 49 |
return _mm_shuffle_epi32(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); |
|
| 50 |
} |
|
| 51 |
|
|
| 52 |
DECLARE_CONVERTER(fc64, 1, sc8_item32_be, 1, PRIORITY_SIMD){
|
|
| 53 |
const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]); |
|
| 54 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 55 |
|
|
| 56 |
const __m128d scalar = _mm_set1_pd(scale_factor); |
|
| 57 |
|
|
| 58 |
#define convert_fc64_1_to_sc8_item32_1_bswap_guts(_al_) \ |
|
| 59 |
for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){ \
|
|
| 60 |
/* load from input */ \ |
|
| 61 |
__m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \ |
|
| 62 |
__m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \ |
|
| 63 |
__m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \ |
|
| 64 |
__m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \ |
|
| 65 |
__m128d tmp4 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+4)); \ |
|
| 66 |
__m128d tmp5 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+5)); \ |
|
| 67 |
__m128d tmp6 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+6)); \ |
|
| 68 |
__m128d tmp7 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+7)); \ |
|
| 69 |
\ |
|
| 70 |
/* interleave */ \ |
|
| 71 |
const __m128i tmpi = pack_sc8_item32_4x( \ |
|
| 72 |
pack_sc32_4x_be(tmp0, tmp1, scalar), \ |
|
| 73 |
pack_sc32_4x_be(tmp2, tmp3, scalar), \ |
|
| 74 |
pack_sc32_4x_be(tmp4, tmp5, scalar), \ |
|
| 75 |
pack_sc32_4x_be(tmp6, tmp7, scalar) \ |
|
| 76 |
); \ |
|
| 77 |
\ |
|
| 78 |
/* store to output */ \ |
|
| 79 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi); \ |
|
| 80 |
} \ |
|
| 81 |
|
|
| 82 |
size_t i = 0; |
|
| 83 |
|
|
| 84 |
//dispatch according to alignment |
|
| 85 |
if ((size_t(input) & 0xf) == 0){
|
|
| 86 |
convert_fc64_1_to_sc8_item32_1_bswap_guts(_) |
|
| 87 |
} |
|
| 88 |
else{
|
|
| 89 |
convert_fc64_1_to_sc8_item32_1_bswap_guts(u_) |
|
| 90 |
} |
|
| 91 |
|
|
| 92 |
//convert remainder |
|
| 93 |
const size_t num_pairs = nsamps/2; |
|
| 94 |
for (size_t j = i/2; j < num_pairs; j++, i+=2){
|
|
| 95 |
const item32_t item = fc64_to_item32_sc8(input[i], input[i+1], scale_factor); |
|
| 96 |
output[j] = uhd::byteswap(item); |
|
| 97 |
} |
|
| 98 |
|
|
| 99 |
if (nsamps != num_pairs*2){
|
|
| 100 |
const item32_t item = fc64_to_item32_sc8(input[nsamps-1], 0, scale_factor); |
|
| 101 |
output[num_pairs] = uhd::byteswap(item); |
|
| 102 |
} |
|
| 103 |
} |
|
| 104 |
|
|
| 105 |
DECLARE_CONVERTER(fc64, 1, sc8_item32_le, 1, PRIORITY_SIMD){
|
|
| 106 |
const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]); |
|
| 107 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 108 |
|
|
| 109 |
const __m128d scalar = _mm_set1_pd(scale_factor); |
|
| 110 |
|
|
| 111 |
#define convert_fc64_1_to_sc8_item32_1_nswap_guts(_al_) \ |
|
| 112 |
for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){ \
|
|
| 113 |
/* load from input */ \ |
|
| 114 |
__m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \ |
|
| 115 |
__m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \ |
|
| 116 |
__m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \ |
|
| 117 |
__m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \ |
|
| 118 |
__m128d tmp4 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+4)); \ |
|
| 119 |
__m128d tmp5 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+5)); \ |
|
| 120 |
__m128d tmp6 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+6)); \ |
|
| 121 |
__m128d tmp7 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+7)); \ |
|
| 122 |
\ |
|
| 123 |
/* interleave */ \ |
|
| 124 |
const __m128i tmpi = pack_sc8_item32_4x( \ |
|
| 125 |
pack_sc32_4x_le(tmp0, tmp1, scalar), \ |
|
| 126 |
pack_sc32_4x_le(tmp2, tmp3, scalar), \ |
|
| 127 |
pack_sc32_4x_le(tmp4, tmp5, scalar), \ |
|
| 128 |
pack_sc32_4x_le(tmp6, tmp7, scalar) \ |
|
| 129 |
); \ |
|
| 130 |
\ |
|
| 131 |
/* store to output */ \ |
|
| 132 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi); \ |
|
| 133 |
} \ |
|
| 134 |
|
|
| 135 |
size_t i = 0; |
|
| 136 |
|
|
| 137 |
//dispatch according to alignment |
|
| 138 |
if ((size_t(input) & 0xf) == 0){
|
|
| 139 |
convert_fc64_1_to_sc8_item32_1_nswap_guts(_) |
|
| 140 |
} |
|
| 141 |
else{
|
|
| 142 |
convert_fc64_1_to_sc8_item32_1_nswap_guts(u_) |
|
| 143 |
} |
|
| 144 |
|
|
| 145 |
//convert remainder |
|
| 146 |
const size_t num_pairs = nsamps/2; |
|
| 147 |
for (size_t j = i/2; j < num_pairs; j++, i+=2){
|
|
| 148 |
const item32_t item = fc64_to_item32_sc8(input[i], input[i+1], scale_factor); |
|
| 149 |
output[j] = (item); |
|
| 150 |
} |
|
| 151 |
|
|
| 152 |
if (nsamps != num_pairs*2){
|
|
| 153 |
const item32_t item = fc64_to_item32_sc8(input[nsamps-1], 0, scale_factor); |
|
| 154 |
output[num_pairs] = (item); |
|
| 155 |
} |
|
| 156 |
} |
|
| /dev/null | ||
|---|---|---|
| 1 |
// |
|
| 2 |
// Copyright 2011 Ettus Research LLC |
|
| 3 |
// |
|
| 4 |
// This program is free software: you can redistribute it and/or modify |
|
| 5 |
// it under the terms of the GNU General Public License as published by |
|
| 6 |
// the Free Software Foundation, either version 3 of the License, or |
|
| 7 |
// (at your option) any later version. |
|
| 8 |
// |
|
| 9 |
// This program is distributed in the hope that it will be useful, |
|
| 10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 12 |
// GNU General Public License for more details. |
|
| 13 |
// |
|
| 14 |
// You should have received a copy of the GNU General Public License |
|
| 15 |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
| 16 |
// |
|
| 17 |
|
|
| 18 |
#include "convert_common.hpp" |
|
| 19 |
#include <uhd/utils/byteswap.hpp> |
|
| 20 |
#include <emmintrin.h> |
|
| 21 |
|
|
| 22 |
using namespace uhd::convert; |
|
| 23 |
|
|
| 24 |
DECLARE_CONVERTER(fc64, 1, sc16_item32_le, 1, PRIORITY_SIMD){
|
|
| 25 |
const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]); |
|
| 26 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 27 |
|
|
| 28 |
const __m128d scalar = _mm_set1_pd(scale_factor); |
|
| 29 |
|
|
| 30 |
#define convert_fc64_1_to_item32_1_nswap_guts(_al_) \ |
|
| 31 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 32 |
/* load from input */ \ |
|
| 33 |
__m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \ |
|
| 34 |
__m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \ |
|
| 35 |
__m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \ |
|
| 36 |
__m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \ |
|
| 37 |
\ |
|
| 38 |
/* convert and scale */ \ |
|
| 39 |
__m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar)); \ |
|
| 40 |
__m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar)); \ |
|
| 41 |
__m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1); \ |
|
| 42 |
__m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar)); \ |
|
| 43 |
__m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar)); \ |
|
| 44 |
__m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3); \ |
|
| 45 |
\ |
|
| 46 |
/* pack + swap 16-bit pairs */ \ |
|
| 47 |
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ |
|
| 48 |
tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 49 |
tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 50 |
\ |
|
| 51 |
/* store to output */ \ |
|
| 52 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ |
|
| 53 |
} \ |
|
| 54 |
|
|
| 55 |
size_t i = 0; |
|
| 56 |
|
|
| 57 |
//dispatch according to alignment |
|
| 58 |
if ((size_t(input) & 0xf) == 0){
|
|
| 59 |
convert_fc64_1_to_item32_1_nswap_guts(_) |
|
| 60 |
} |
|
| 61 |
else{
|
|
| 62 |
convert_fc64_1_to_item32_1_nswap_guts(u_) |
|
| 63 |
} |
|
| 64 |
|
|
| 65 |
//convert remainder |
|
| 66 |
for (; i < nsamps; i++){
|
|
| 67 |
output[i] = fc64_to_item32_sc16(input[i], scale_factor); |
|
| 68 |
} |
|
| 69 |
} |
|
| 70 |
|
|
| 71 |
DECLARE_CONVERTER(fc64, 1, sc16_item32_be, 1, PRIORITY_SIMD){
|
|
| 72 |
const fc64_t *input = reinterpret_cast<const fc64_t *>(inputs[0]); |
|
| 73 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 74 |
|
|
| 75 |
const __m128d scalar = _mm_set1_pd(scale_factor); |
|
| 76 |
|
|
| 77 |
#define convert_fc64_1_to_item32_1_bswap_guts(_al_) \ |
|
| 78 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 79 |
/* load from input */ \ |
|
| 80 |
__m128d tmp0 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+0)); \ |
|
| 81 |
__m128d tmp1 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+1)); \ |
|
| 82 |
__m128d tmp2 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+2)); \ |
|
| 83 |
__m128d tmp3 = _mm_load ## _al_ ## pd(reinterpret_cast<const double *>(input+i+3)); \ |
|
| 84 |
\ |
|
| 85 |
/* convert and scale */ \ |
|
| 86 |
__m128i tmpi0 = _mm_cvttpd_epi32(_mm_mul_pd(tmp0, scalar)); \ |
|
| 87 |
__m128i tmpi1 = _mm_cvttpd_epi32(_mm_mul_pd(tmp1, scalar)); \ |
|
| 88 |
__m128i tmpilo = _mm_unpacklo_epi64(tmpi0, tmpi1); \ |
|
| 89 |
__m128i tmpi2 = _mm_cvttpd_epi32(_mm_mul_pd(tmp2, scalar)); \ |
|
| 90 |
__m128i tmpi3 = _mm_cvttpd_epi32(_mm_mul_pd(tmp3, scalar)); \ |
|
| 91 |
__m128i tmpihi = _mm_unpacklo_epi64(tmpi2, tmpi3); \ |
|
| 92 |
\ |
|
| 93 |
/* pack + byteswap -> byteswap 16 bit words */ \ |
|
| 94 |
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ |
|
| 95 |
tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ |
|
| 96 |
\ |
|
| 97 |
/* store to output */ \ |
|
| 98 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ |
|
| 99 |
} \ |
|
| 100 |
|
|
| 101 |
size_t i = 0; |
|
| 102 |
|
|
| 103 |
//dispatch according to alignment |
|
| 104 |
if ((size_t(input) & 0xf) == 0){
|
|
| 105 |
convert_fc64_1_to_item32_1_bswap_guts(_) |
|
| 106 |
} |
|
| 107 |
else{
|
|
| 108 |
convert_fc64_1_to_item32_1_bswap_guts(u_) |
|
| 109 |
} |
|
| 110 |
|
|
| 111 |
//convert remainder |
|
| 112 |
for (; i < nsamps; i++){
|
|
| 113 |
output[i] = uhd::byteswap(fc64_to_item32_sc16(input[i], scale_factor)); |
|
| 114 |
} |
|
| 115 |
} |
|
| 116 |
|
|
| 117 |
DECLARE_CONVERTER(sc16_item32_le, 1, fc64, 1, PRIORITY_SIMD){
|
|
| 118 |
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); |
|
| 119 |
fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]); |
|
| 120 |
|
|
| 121 |
const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16)); |
|
| 122 |
const __m128i zeroi = _mm_setzero_si128(); |
|
| 123 |
|
|
| 124 |
#define convert_item32_1_to_fc64_1_nswap_guts(_al_) \ |
|
| 125 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 126 |
/* load from input */ \ |
|
| 127 |
__m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ |
|
| 128 |
\ |
|
| 129 |
/* unpack + swap 16-bit pairs */ \ |
|
| 130 |
tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 131 |
tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 132 |
__m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ |
|
| 133 |
__m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ |
|
| 134 |
\ |
|
| 135 |
/* convert and scale */ \ |
|
| 136 |
__m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar); \ |
|
| 137 |
tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi); \ |
|
| 138 |
__m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar); \ |
|
| 139 |
__m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar); \ |
|
| 140 |
tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi); \ |
|
| 141 |
__m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar); \ |
|
| 142 |
\ |
|
| 143 |
/* store to output */ \ |
|
| 144 |
_mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \ |
|
| 145 |
_mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \ |
|
| 146 |
_mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \ |
|
| 147 |
_mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \ |
|
| 148 |
} \ |
|
| 149 |
|
|
| 150 |
size_t i = 0; |
|
| 151 |
|
|
| 152 |
//dispatch according to alignment |
|
| 153 |
if ((size_t(output) & 0xf) == 0){
|
|
| 154 |
convert_item32_1_to_fc64_1_nswap_guts(_) |
|
| 155 |
} |
|
| 156 |
else{
|
|
| 157 |
convert_item32_1_to_fc64_1_nswap_guts(u_) |
|
| 158 |
} |
|
| 159 |
|
|
| 160 |
//convert remainder |
|
| 161 |
for (; i < nsamps; i++){
|
|
| 162 |
output[i] = item32_sc16_to_fc64(input[i], scale_factor); |
|
| 163 |
} |
|
| 164 |
} |
|
| 165 |
|
|
| 166 |
DECLARE_CONVERTER(sc16_item32_be, 1, fc64, 1, PRIORITY_SIMD){
|
|
| 167 |
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); |
|
| 168 |
fc64_t *output = reinterpret_cast<fc64_t *>(outputs[0]); |
|
| 169 |
|
|
| 170 |
const __m128d scalar = _mm_set1_pd(scale_factor/(1 << 16)); |
|
| 171 |
const __m128i zeroi = _mm_setzero_si128(); |
|
| 172 |
|
|
| 173 |
#define convert_item32_1_to_fc64_1_bswap_guts(_al_) \ |
|
| 174 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 175 |
/* load from input */ \ |
|
| 176 |
__m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ |
|
| 177 |
\ |
|
| 178 |
/* byteswap + unpack -> byteswap 16 bit words */ \ |
|
| 179 |
tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ |
|
| 180 |
__m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ |
|
| 181 |
__m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ |
|
| 182 |
\ |
|
| 183 |
/* convert and scale */ \ |
|
| 184 |
__m128d tmp0 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar); \ |
|
| 185 |
tmpilo = _mm_unpackhi_epi64(tmpilo, zeroi); \ |
|
| 186 |
__m128d tmp1 = _mm_mul_pd(_mm_cvtepi32_pd(tmpilo), scalar); \ |
|
| 187 |
__m128d tmp2 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar); \ |
|
| 188 |
tmpihi = _mm_unpackhi_epi64(tmpihi, zeroi); \ |
|
| 189 |
__m128d tmp3 = _mm_mul_pd(_mm_cvtepi32_pd(tmpihi), scalar); \ |
|
| 190 |
\ |
|
| 191 |
/* store to output */ \ |
|
| 192 |
_mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+0), tmp0); \ |
|
| 193 |
_mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+1), tmp1); \ |
|
| 194 |
_mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+2), tmp2); \ |
|
| 195 |
_mm_store ## _al_ ## pd(reinterpret_cast<double *>(output+i+3), tmp3); \ |
|
| 196 |
} \ |
|
| 197 |
|
|
| 198 |
size_t i = 0; |
|
| 199 |
|
|
| 200 |
//dispatch according to alignment |
|
| 201 |
if ((size_t(output) & 0xf) == 0){
|
|
| 202 |
convert_item32_1_to_fc64_1_bswap_guts(_) |
|
| 203 |
} |
|
| 204 |
else{
|
|
| 205 |
convert_item32_1_to_fc64_1_bswap_guts(u_) |
|
| 206 |
} |
|
| 207 |
|
|
| 208 |
//convert remainder |
|
| 209 |
for (; i < nsamps; i++){
|
|
| 210 |
output[i] = item32_sc16_to_fc64(uhd::byteswap(input[i]), scale_factor); |
|
| 211 |
} |
|
| 212 |
} |
|
| b/host/lib/convert/convert_item32.cpp | ||
|---|---|---|
| 1 |
// |
|
| 2 |
// Copyright 2012 Ettus Research LLC |
|
| 3 |
// |
|
| 4 |
// This program is free software: you can redistribute it and/or modify |
|
| 5 |
// it under the terms of the GNU General Public License as published by |
|
| 6 |
// the Free Software Foundation, either version 3 of the License, or |
|
| 7 |
// (at your option) any later version. |
|
| 8 |
// |
|
| 9 |
// This program is distributed in the hope that it will be useful, |
|
| 10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 12 |
// GNU General Public License for more details. |
|
| 13 |
// |
|
| 14 |
// You should have received a copy of the GNU General Public License |
|
| 15 |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
| 16 |
// |
|
| 17 |
|
|
| 18 |
#include "convert_common.hpp" |
|
| 19 |
#include <uhd/utils/byteswap.hpp> |
|
| 20 |
|
|
| 21 |
#define __DECLARE_ITEM32_CONVERTER(cpu_type, wire_type, xe, htoxx, xxtoh) \ |
|
| 22 |
DECLARE_CONVERTER(cpu_type, 1, wire_type ## _item32_ ## xe, 1, PRIORITY_GENERAL){ \
|
|
| 23 |
const cpu_type ## _t *input = reinterpret_cast<const cpu_type ## _t *>(inputs[0]); \ |
|
| 24 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); \ |
|
| 25 |
xx_to_item32_ ## wire_type<htoxx>(input, output, nsamps, scale_factor); \ |
|
| 26 |
} \ |
|
| 27 |
DECLARE_CONVERTER(wire_type ## _item32_ ## xe, 1, cpu_type, 1, PRIORITY_GENERAL){ \
|
|
| 28 |
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); \ |
|
| 29 |
cpu_type ## _t *output = reinterpret_cast<cpu_type ## _t *>(outputs[0]); \ |
|
| 30 |
item32_ ## wire_type ## _to_xx<xxtoh>(input, output, nsamps, scale_factor); \ |
|
| 31 |
} |
|
| 32 |
|
|
| 33 |
#define _DECLARE_ITEM32_CONVERTER(cpu_type, wire_type) \ |
|
| 34 |
__DECLARE_ITEM32_CONVERTER(cpu_type, wire_type, be, uhd::htonx, uhd::ntohx) \ |
|
| 35 |
__DECLARE_ITEM32_CONVERTER(cpu_type, wire_type, le, uhd::htowx, uhd::wtohx) |
|
| 36 |
|
|
| 37 |
#define DECLARE_ITEM32_CONVERTER(cpu_type) \ |
|
| 38 |
_DECLARE_ITEM32_CONVERTER(cpu_type, sc8) \ |
|
| 39 |
_DECLARE_ITEM32_CONVERTER(cpu_type, sc16) |
|
| 40 |
|
|
| 41 |
DECLARE_ITEM32_CONVERTER(sc16) |
|
| 42 |
DECLARE_ITEM32_CONVERTER(fc32) |
|
| 43 |
DECLARE_ITEM32_CONVERTER(fc64) |
|
| b/host/lib/convert/convert_with_neon.cpp | ||
|---|---|---|
| 1 | 1 |
// |
| 2 |
// Copyright 2011-2011 Ettus Research LLC
|
|
| 2 |
// Copyright 2011-2012 Ettus Research LLC
|
|
| 3 | 3 |
// |
| 4 | 4 |
// This program is free software: you can redistribute it and/or modify |
| 5 | 5 |
// it under the terms of the GNU General Public License as published by |
| ... | ... | |
| 36 | 36 |
vst1_s16((reinterpret_cast<int16_t *>(&output[i])), D9); |
| 37 | 37 |
} |
| 38 | 38 |
|
| 39 |
for (; i < nsamps; i++) |
|
| 40 |
output[i] = fc32_to_item32_sc16(input[i], scale_factor); |
|
| 39 |
xx_to_item32_sc16<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor); |
|
| 41 | 40 |
} |
| 42 | 41 |
|
| 43 | 42 |
DECLARE_CONVERTER(sc16_item32_le, 1, fc32, 1, PRIORITY_SIMD){
|
| ... | ... | |
| 56 | 55 |
vst1q_f32((reinterpret_cast<float *>(&output[i])), Q4); |
| 57 | 56 |
} |
| 58 | 57 |
|
| 59 |
for (; i < nsamps; i++) |
|
| 60 |
output[i] = item32_sc16_to_fc32(input[i], scale_factor); |
|
| 58 |
item32_sc16_to_xx<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor); |
|
| 61 | 59 |
} |
| b/host/lib/convert/gen_convert_general.py | ||
|---|---|---|
| 48 | 48 |
} |
| 49 | 49 |
""" |
| 50 | 50 |
|
| 51 |
TMPL_CONV_GEN2_SC16 = """ |
|
| 52 |
DECLARE_CONVERTER($(cpu_type), 1, sc16_item32_$(end), 1, PRIORITY_GENERAL){
|
|
| 53 |
const $(cpu_type)_t *input = reinterpret_cast<const $(cpu_type)_t *>(inputs[0]); |
|
| 54 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 55 |
|
|
| 56 |
for (size_t i = 0; i < nsamps; i++){
|
|
| 57 |
output[i] = $(to_wire)($(cpu_type)_to_item32_sc16(input[i], scale_factor)); |
|
| 58 |
} |
|
| 59 |
} |
|
| 60 |
|
|
| 61 |
DECLARE_CONVERTER(sc16_item32_$(end), 1, $(cpu_type), 1, PRIORITY_GENERAL){
|
|
| 62 |
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); |
|
| 63 |
$(cpu_type)_t *output = reinterpret_cast<$(cpu_type)_t *>(outputs[0]); |
|
| 64 |
|
|
| 65 |
for (size_t i = 0; i < nsamps; i++){
|
|
| 66 |
output[i] = item32_sc16_to_$(cpu_type)($(to_host)(input[i]), scale_factor); |
|
| 67 |
} |
|
| 68 |
} |
|
| 69 |
""" |
|
| 70 |
|
|
| 71 |
TMPL_CONV_GEN2_SC8 = """ |
|
| 72 |
DECLARE_CONVERTER(sc8_item32_$(end), 1, $(cpu_type), 1, PRIORITY_GENERAL){
|
|
| 73 |
const item32_t *input = reinterpret_cast<const item32_t *>(size_t(inputs[0]) & ~0x3); |
|
| 74 |
$(cpu_type)_t *output = reinterpret_cast<$(cpu_type)_t *>(outputs[0]); |
|
| 75 |
$(cpu_type)_t dummy; |
|
| 76 |
size_t num_samps = nsamps; |
|
| 77 |
|
|
| 78 |
if ((size_t(inputs[0]) & 0x3) != 0){
|
|
| 79 |
const item32_t item0 = $(to_host)(*input++); |
|
| 80 |
item32_sc8_to_$(cpu_type)(item0, dummy, *output++, scale_factor); |
|
| 81 |
num_samps--; |
|
| 82 |
} |
|
| 83 |
|
|
| 84 |
const size_t num_pairs = num_samps/2; |
|
| 85 |
for (size_t i = 0, j = 0; i < num_pairs; i++, j+=2){
|
|
| 86 |
const item32_t item_i = $(to_host)(input[i]); |
|
| 87 |
item32_sc8_to_$(cpu_type)(item_i, output[j], output[j+1], scale_factor); |
|
| 88 |
} |
|
| 89 |
|
|
| 90 |
if (num_samps != num_pairs*2){
|
|
| 91 |
const item32_t item_n = $(to_host)(input[num_pairs]); |
|
| 92 |
item32_sc8_to_$(cpu_type)(item_n, output[num_samps-1], dummy, scale_factor); |
|
| 93 |
} |
|
| 94 |
} |
|
| 95 |
|
|
| 96 |
DECLARE_CONVERTER($(cpu_type), 1, sc8_item32_$(end), 1, PRIORITY_GENERAL){
|
|
| 97 |
const $(cpu_type)_t *input = reinterpret_cast<const $(cpu_type)_t *>(inputs[0]); |
|
| 98 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 99 |
|
|
| 100 |
const size_t num_pairs = nsamps/2; |
|
| 101 |
for (size_t i = 0, j = 0; i < num_pairs; i++, j+=2){
|
|
| 102 |
const item32_t item = $(cpu_type)_to_item32_sc8(input[j], input[j+1], scale_factor); |
|
| 103 |
output[i] = $(to_wire)(item); |
|
| 104 |
} |
|
| 105 |
|
|
| 106 |
if (nsamps != num_pairs*2){
|
|
| 107 |
const item32_t item = $(cpu_type)_to_item32_sc8(input[nsamps-1], 0, scale_factor); |
|
| 108 |
output[num_pairs] = $(to_wire)(item); |
|
| 109 |
} |
|
| 110 |
} |
|
| 111 |
""" |
|
| 112 |
|
|
| 113 | 51 |
TMPL_CONV_USRP1_COMPLEX = """ |
| 114 | 52 |
DECLARE_CONVERTER($(cpu_type), $(width), sc16_item16_usrp1, 1, PRIORITY_GENERAL){
|
| 115 | 53 |
#for $w in range($width) |
| ... | ... | |
| 176 | 114 |
('be', 'uhd::ntohx', 'uhd::htonx'),
|
| 177 | 115 |
('le', 'uhd::wtohx', 'uhd::htowx'),
|
| 178 | 116 |
): |
| 179 |
for cpu_type in 'fc64', 'fc32', 'sc16': |
|
| 180 |
output += parse_tmpl( |
|
| 181 |
TMPL_CONV_GEN2_SC16, |
|
| 182 |
end=end, to_host=to_host, to_wire=to_wire, cpu_type=cpu_type |
|
| 183 |
) |
|
| 184 |
for cpu_type in 'fc64', 'fc32', 'sc16', 'sc8': |
|
| 185 |
output += parse_tmpl( |
|
| 186 |
TMPL_CONV_GEN2_SC8, |
|
| 187 |
end=end, to_host=to_host, to_wire=to_wire, cpu_type=cpu_type |
|
| 188 |
) |
|
| 189 | 117 |
output += parse_tmpl( |
| 190 | 118 |
TMPL_CONV_GEN2_ITEM32, |
| 191 | 119 |
end=end, to_host=to_host, to_wire=to_wire |
| b/host/lib/convert/sse2_fc32_to_sc16.cpp | ||
|---|---|---|
| 1 |
// |
|
| 2 |
// Copyright 2011-2012 Ettus Research LLC |
|
| 3 |
// |
|
| 4 |
// This program is free software: you can redistribute it and/or modify |
|
| 5 |
// it under the terms of the GNU General Public License as published by |
|
| 6 |
// the Free Software Foundation, either version 3 of the License, or |
|
| 7 |
// (at your option) any later version. |
|
| 8 |
// |
|
| 9 |
// This program is distributed in the hope that it will be useful, |
|
| 10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 12 |
// GNU General Public License for more details. |
|
| 13 |
// |
|
| 14 |
// You should have received a copy of the GNU General Public License |
|
| 15 |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
| 16 |
// |
|
| 17 |
|
|
| 18 |
#include "convert_common.hpp" |
|
| 19 |
#include <uhd/utils/byteswap.hpp> |
|
| 20 |
#include <emmintrin.h> |
|
| 21 |
|
|
| 22 |
using namespace uhd::convert; |
|
| 23 |
|
|
| 24 |
DECLARE_CONVERTER(fc32, 1, sc16_item32_le, 1, PRIORITY_SIMD){
|
|
| 25 |
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); |
|
| 26 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 27 |
|
|
| 28 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)); |
|
| 29 |
|
|
| 30 |
#define convert_fc32_1_to_item32_1_nswap_guts(_al_) \ |
|
| 31 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 32 |
/* load from input */ \ |
|
| 33 |
__m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ |
|
| 34 |
__m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ |
|
| 35 |
\ |
|
| 36 |
/* convert and scale */ \ |
|
| 37 |
__m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ |
|
| 38 |
__m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ |
|
| 39 |
\ |
|
| 40 |
/* pack + swap 16-bit pairs */ \ |
|
| 41 |
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ |
|
| 42 |
tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 43 |
tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 44 |
\ |
|
| 45 |
/* store to output */ \ |
|
| 46 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ |
|
| 47 |
} \ |
|
| 48 |
|
|
| 49 |
size_t i = 0; |
|
| 50 |
|
|
| 51 |
//dispatch according to alignment |
|
| 52 |
switch (size_t(input) & 0xf){
|
|
| 53 |
case 0x8: |
|
| 54 |
xx_to_item32_sc16<uhd::htowx>(input, output, 1, scale_factor); i++; |
|
| 55 |
case 0x0: |
|
| 56 |
convert_fc32_1_to_item32_1_nswap_guts(_) |
|
| 57 |
break; |
|
| 58 |
default: convert_fc32_1_to_item32_1_nswap_guts(u_) |
|
| 59 |
} |
|
| 60 |
|
|
| 61 |
//convert remainder |
|
| 62 |
xx_to_item32_sc16<uhd::htowx>(input+i, output+i, nsamps-i, scale_factor); |
|
| 63 |
} |
|
| 64 |
|
|
| 65 |
DECLARE_CONVERTER(fc32, 1, sc16_item32_be, 1, PRIORITY_SIMD){
|
|
| 66 |
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); |
|
| 67 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 68 |
|
|
| 69 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)); |
|
| 70 |
|
|
| 71 |
#define convert_fc32_1_to_item32_1_bswap_guts(_al_) \ |
|
| 72 |
for (; i+3 < nsamps; i+=4){ \
|
|
| 73 |
/* load from input */ \ |
|
| 74 |
__m128 tmplo = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ |
|
| 75 |
__m128 tmphi = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ |
|
| 76 |
\ |
|
| 77 |
/* convert and scale */ \ |
|
| 78 |
__m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ |
|
| 79 |
__m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ |
|
| 80 |
\ |
|
| 81 |
/* pack + byteswap -> byteswap 16 bit words */ \ |
|
| 82 |
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ |
|
| 83 |
tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ |
|
| 84 |
\ |
|
| 85 |
/* store to output */ \ |
|
| 86 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ |
|
| 87 |
} \ |
|
| 88 |
|
|
| 89 |
size_t i = 0; |
|
| 90 |
|
|
| 91 |
//dispatch according to alignment |
|
| 92 |
switch (size_t(input) & 0xf){
|
|
| 93 |
case 0x8: |
|
| 94 |
xx_to_item32_sc16<uhd::htonx>(input, output, 1, scale_factor); i++; |
|
| 95 |
case 0x0: |
|
| 96 |
convert_fc32_1_to_item32_1_bswap_guts(_) |
|
| 97 |
break; |
|
| 98 |
default: convert_fc32_1_to_item32_1_bswap_guts(u_) |
|
| 99 |
} |
|
| 100 |
|
|
| 101 |
//convert remainder |
|
| 102 |
xx_to_item32_sc16<uhd::htonx>(input+i, output+i, nsamps-i, scale_factor); |
|
| 103 |
} |
|
| b/host/lib/convert/sse2_fc32_to_sc8.cpp | ||
|---|---|---|
| 1 |
// |
|
| 2 |
// Copyright 2012 Ettus Research LLC |
|
| 3 |
// |
|
| 4 |
// This program is free software: you can redistribute it and/or modify |
|
| 5 |
// it under the terms of the GNU General Public License as published by |
|
| 6 |
// the Free Software Foundation, either version 3 of the License, or |
|
| 7 |
// (at your option) any later version. |
|
| 8 |
// |
|
| 9 |
// This program is distributed in the hope that it will be useful, |
|
| 10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 12 |
// GNU General Public License for more details. |
|
| 13 |
// |
|
| 14 |
// You should have received a copy of the GNU General Public License |
|
| 15 |
// along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
| 16 |
// |
|
| 17 |
|
|
| 18 |
#include "convert_common.hpp" |
|
| 19 |
#include <uhd/utils/byteswap.hpp> |
|
| 20 |
#include <emmintrin.h> |
|
| 21 |
|
|
| 22 |
using namespace uhd::convert; |
|
| 23 |
|
|
| 24 |
UHD_INLINE __m128i pack_sc32_4x( |
|
| 25 |
const __m128 &in0, const __m128 &in1, |
|
| 26 |
const __m128 &in2, const __m128 &in3, |
|
| 27 |
const __m128 &scalar, const int shuf |
|
| 28 |
){
|
|
| 29 |
__m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar)); |
|
| 30 |
tmpi0 = _mm_shuffle_epi32(tmpi0, shuf); |
|
| 31 |
__m128i tmpi1 = _mm_cvtps_epi32(_mm_mul_ps(in1, scalar)); |
|
| 32 |
tmpi1 = _mm_shuffle_epi32(tmpi1, shuf); |
|
| 33 |
const __m128i lo = _mm_packs_epi32(tmpi0, tmpi1); |
|
| 34 |
|
|
| 35 |
__m128i tmpi2 = _mm_cvtps_epi32(_mm_mul_ps(in2, scalar)); |
|
| 36 |
tmpi2 = _mm_shuffle_epi32(tmpi2, shuf); |
|
| 37 |
__m128i tmpi3 = _mm_cvtps_epi32(_mm_mul_ps(in3, scalar)); |
|
| 38 |
tmpi3 = _mm_shuffle_epi32(tmpi3, shuf); |
|
| 39 |
const __m128i hi = _mm_packs_epi32(tmpi2, tmpi3); |
|
| 40 |
|
|
| 41 |
return _mm_packs_epi16(lo, hi); |
|
| 42 |
} |
|
| 43 |
|
|
| 44 |
DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD){
|
|
| 45 |
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); |
|
| 46 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 47 |
|
|
| 48 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)); |
|
| 49 |
|
|
| 50 |
#define convert_fc32_1_to_sc8_item32_1_bswap_guts(_al_) \ |
|
| 51 |
for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){ \
|
|
| 52 |
/* load from input */ \ |
|
| 53 |
__m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ |
|
| 54 |
__m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ |
|
| 55 |
__m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \ |
|
| 56 |
__m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \ |
|
| 57 |
\ |
|
| 58 |
/* convert */ \ |
|
| 59 |
const __m128i tmpi = pack_sc32_4x(tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(1, 0, 3, 2)); \ |
|
| 60 |
\ |
|
| 61 |
/* store to output */ \ |
|
| 62 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi); \ |
|
| 63 |
} \ |
|
| 64 |
|
|
| 65 |
size_t i = 0; |
|
| 66 |
|
|
| 67 |
//dispatch according to alignment |
|
| 68 |
if ((size_t(input) & 0xf) == 0){
|
|
| 69 |
convert_fc32_1_to_sc8_item32_1_bswap_guts(_) |
|
| 70 |
} |
|
| 71 |
else{
|
|
| 72 |
convert_fc32_1_to_sc8_item32_1_bswap_guts(u_) |
|
| 73 |
} |
|
| 74 |
|
|
| 75 |
//convert remainder |
|
| 76 |
xx_to_item32_sc8<uhd::htonx>(input+i, output+(i/2), nsamps-i, scale_factor); |
|
| 77 |
} |
|
| 78 |
|
|
| 79 |
DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD){
|
|
| 80 |
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); |
|
| 81 |
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); |
|
| 82 |
|
|
| 83 |
const __m128 scalar = _mm_set_ps1(float(scale_factor)); |
|
| 84 |
|
|
| 85 |
#define convert_fc32_1_to_sc8_item32_1_nswap_guts(_al_) \ |
|
| 86 |
for (size_t j = 0; i+7 < nsamps; i+=8, j+=4){ \
|
|
| 87 |
/* load from input */ \ |
|
| 88 |
__m128 tmp0 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+0)); \ |
|
| 89 |
__m128 tmp1 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+2)); \ |
|
| 90 |
__m128 tmp2 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+4)); \ |
|
| 91 |
__m128 tmp3 = _mm_load ## _al_ ## ps(reinterpret_cast<const float *>(input+i+6)); \ |
|
| 92 |
\ |
|
| 93 |
/* convert */ \ |
|
| 94 |
const __m128i tmpi = pack_sc32_4x(tmp0, tmp1, tmp2, tmp3, scalar, _MM_SHUFFLE(2, 3, 0, 1)); \ |
|
| 95 |
\ |
|
| 96 |
/* store to output */ \ |
|
| 97 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+j), tmpi); \ |
|
| 98 |
} \ |
|
| 99 |
|
|
| 100 |
size_t i = 0; |
|
| 101 |
|
|
| 102 |
//dispatch according to alignment |
|
| 103 |
if ((size_t(input) & 0xf) == 0){
|
|
| 104 |
convert_fc32_1_to_sc8_item32_1_nswap_guts(_) |
|
| 105 |
} |
|
| 106 |
else{
|
|
| 107 |
convert_fc32_1_to_sc8_item32_1_nswap_guts(u_) |
|
| 108 |
} |
|
| 109 |
|
|
| 110 |
//convert remainder |
|
| 111 |
xx_to_item32_sc8<uhd::htowx>(input+i, output+(i/2), nsamps-i, scale_factor); |
|
| 112 |
} |
|
| b/host/lib/convert/sse2_fc64_to_sc16.cpp | ||
|---|---|---|
| 1 |
// |
|
| 2 |
// Copyright 2011-2012 Ettus Research LLC |
|
Also available in: Unified diff