1 #ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H
2 #define INCLUDED_volk_8i_s32f_convert_32f_u_H
18 static inline void volk_8i_s32f_convert_32f_u_sse4_1(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int sixteenthPoints = num_points / 16;
22 float* outputVectorPtr = outputVector;
23 const float iScalar = 1.0 / scalar;
24 __m128 invScalar = _mm_set_ps1( iScalar );
25 const int8_t* inputVectorPtr = inputVector;
30 for(;number < sixteenthPoints; number++){
31 inputVal = _mm_loadu_si128((__m128i*)inputVectorPtr);
33 interimVal = _mm_cvtepi8_epi32(inputVal);
34 ret = _mm_cvtepi32_ps(interimVal);
35 ret = _mm_mul_ps(ret, invScalar);
36 _mm_storeu_ps(outputVectorPtr, ret);
39 inputVal = _mm_srli_si128(inputVal, 4);
40 interimVal = _mm_cvtepi8_epi32(inputVal);
41 ret = _mm_cvtepi32_ps(interimVal);
42 ret = _mm_mul_ps(ret, invScalar);
43 _mm_storeu_ps(outputVectorPtr, ret);
46 inputVal = _mm_srli_si128(inputVal, 4);
47 interimVal = _mm_cvtepi8_epi32(inputVal);
48 ret = _mm_cvtepi32_ps(interimVal);
49 ret = _mm_mul_ps(ret, invScalar);
50 _mm_storeu_ps(outputVectorPtr, ret);
53 inputVal = _mm_srli_si128(inputVal, 4);
54 interimVal = _mm_cvtepi8_epi32(inputVal);
55 ret = _mm_cvtepi32_ps(interimVal);
56 ret = _mm_mul_ps(ret, invScalar);
57 _mm_storeu_ps(outputVectorPtr, ret);
63 number = sixteenthPoints * 16;
64 for(; number < num_points; number++){
65 outputVector[number] = (float)(inputVector[number]) * iScalar;
70 #ifdef LV_HAVE_GENERIC
79 static inline void volk_8i_s32f_convert_32f_u_generic(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
80 float* outputVectorPtr = outputVector;
81 const int8_t* inputVectorPtr = inputVector;
82 unsigned int number = 0;
83 const float iScalar = 1.0 / scalar;
85 for(number = 0; number < num_points; number++){
86 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;