fast_math.hpp 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16. // Copyright (C) 2015, Itseez Inc., all rights reserved.
  17. // Third party copyrights are property of their respective owners.
  18. //
  19. // Redistribution and use in source and binary forms, with or without modification,
  20. // are permitted provided that the following conditions are met:
  21. //
  22. // * Redistribution's of source code must retain the above copyright notice,
  23. // this list of conditions and the following disclaimer.
  24. //
  25. // * Redistribution's in binary form must reproduce the above copyright notice,
  26. // this list of conditions and the following disclaimer in the documentation
  27. // and/or other materials provided with the distribution.
  28. //
  29. // * The name of the copyright holders may not be used to endorse or promote products
  30. // derived from this software without specific prior written permission.
  31. //
  32. // This software is provided by the copyright holders and contributors "as is" and
  33. // any express or implied warranties, including, but not limited to, the implied
  34. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  35. // In no event shall the Intel Corporation or contributors be liable for any direct,
  36. // indirect, incidental, special, exemplary, or consequential damages
  37. // (including, but not limited to, procurement of substitute goods or services;
  38. // loss of use, data, or profits; or business interruption) however caused
  39. // and on any theory of liability, whether in contract, strict liability,
  40. // or tort (including negligence or otherwise) arising in any way out of
  41. // the use of this software, even if advised of the possibility of such damage.
  42. //
  43. //M*/
  44. #ifndef OPENCV_CORE_FAST_MATH_HPP
  45. #define OPENCV_CORE_FAST_MATH_HPP
  46. #include "opencv2/core/cvdef.h"
  47. #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  48. && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
  49. #include <emmintrin.h>
  50. #endif
  51. //! @addtogroup core_utils
  52. //! @{
  53. /****************************************************************************************\
  54. * fast math *
  55. \****************************************************************************************/
  56. #ifdef __cplusplus
  57. # include <cmath>
  58. #else
  59. # ifdef __BORLANDC__
  60. # include <fastmath.h>
  61. # else
  62. # include <math.h>
  63. # endif
  64. #endif
  65. #if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
  66. // 1. general scheme
  67. #define ARM_ROUND(_value, _asm_string) \
  68. int res; \
  69. float temp; \
  70. CV_UNUSED(temp); \
  71. __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
  72. return res
  73. // 2. version for double
  74. #ifdef __clang__
  75. #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
  76. #else
  77. #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
  78. #endif
  79. // 3. version for float
  80. #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
  81. #endif
  82. /** @brief Rounds floating-point number to the nearest integer
  83. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  84. result is not defined.
  85. */
  86. CV_INLINE int
  87. cvRound( double value )
  88. {
  89. #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  90. && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
  91. __m128d t = _mm_set_sd( value );
  92. return _mm_cvtsd_si32(t);
  93. #elif defined _MSC_VER && defined _M_IX86
  94. int t;
  95. __asm
  96. {
  97. fld value;
  98. fistp t;
  99. }
  100. return t;
  101. #elif defined CV_ICC || defined __GNUC__
  102. # if defined ARM_ROUND_DBL
  103. ARM_ROUND_DBL(value);
  104. # else
  105. return (int)lrint(value);
  106. # endif
  107. #else
  108. /* it's ok if round does not comply with IEEE754 standard;
  109. the tests should allow +/-1 difference when the tested functions use round */
  110. return (int)(value + (value >= 0 ? 0.5 : -0.5));
  111. #endif
  112. }
  113. /** @brief Rounds floating-point number to the nearest integer not larger than the original.
  114. The function computes an integer i such that:
  115. \f[i \le \texttt{value} < i+1\f]
  116. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  117. result is not defined.
  118. */
  119. CV_INLINE int cvFloor( double value )
  120. {
  121. int i = (int)value;
  122. return i - (i > value);
  123. }
  124. /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
  125. The function computes an integer i such that:
  126. \f[i \le \texttt{value} < i+1\f]
  127. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  128. result is not defined.
  129. */
  130. CV_INLINE int cvCeil( double value )
  131. {
  132. int i = (int)value;
  133. return i + (i < value);
  134. }
  135. /** @brief Determines if the argument is Not A Number.
  136. @param value The input floating-point value
  137. The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
  138. otherwise. */
  139. CV_INLINE int cvIsNaN( double value )
  140. {
  141. Cv64suf ieee754;
  142. ieee754.f = value;
  143. return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
  144. ((unsigned)ieee754.u != 0) > 0x7ff00000;
  145. }
  146. /** @brief Determines if the argument is Infinity.
  147. @param value The input floating-point value
  148. The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
  149. and 0 otherwise. */
  150. CV_INLINE int cvIsInf( double value )
  151. {
  152. Cv64suf ieee754;
  153. ieee754.f = value;
  154. return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
  155. (unsigned)ieee754.u == 0;
  156. }
  157. #ifdef __cplusplus
  158. /** @overload */
  159. CV_INLINE int cvRound(float value)
  160. {
  161. #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  162. && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
  163. __m128 t = _mm_set_ss( value );
  164. return _mm_cvtss_si32(t);
  165. #elif defined _MSC_VER && defined _M_IX86
  166. int t;
  167. __asm
  168. {
  169. fld value;
  170. fistp t;
  171. }
  172. return t;
  173. #elif defined CV_ICC || defined __GNUC__
  174. # if defined ARM_ROUND_FLT
  175. ARM_ROUND_FLT(value);
  176. # else
  177. return (int)lrintf(value);
  178. # endif
  179. #else
  180. /* it's ok if round does not comply with IEEE754 standard;
  181. the tests should allow +/-1 difference when the tested functions use round */
  182. return (int)(value + (value >= 0 ? 0.5f : -0.5f));
  183. #endif
  184. }
  185. /** @overload */
  186. CV_INLINE int cvRound( int value )
  187. {
  188. return value;
  189. }
  190. /** @overload */
  191. CV_INLINE int cvFloor( float value )
  192. {
  193. int i = (int)value;
  194. return i - (i > value);
  195. }
  196. /** @overload */
  197. CV_INLINE int cvFloor( int value )
  198. {
  199. return value;
  200. }
  201. /** @overload */
  202. CV_INLINE int cvCeil( float value )
  203. {
  204. int i = (int)value;
  205. return i + (i < value);
  206. }
  207. /** @overload */
  208. CV_INLINE int cvCeil( int value )
  209. {
  210. return value;
  211. }
  212. /** @overload */
  213. CV_INLINE int cvIsNaN( float value )
  214. {
  215. Cv32suf ieee754;
  216. ieee754.f = value;
  217. return (ieee754.u & 0x7fffffff) > 0x7f800000;
  218. }
  219. /** @overload */
  220. CV_INLINE int cvIsInf( float value )
  221. {
  222. Cv32suf ieee754;
  223. ieee754.f = value;
  224. return (ieee754.u & 0x7fffffff) == 0x7f800000;
  225. }
  226. #endif // __cplusplus
  227. //! @} core_utils
  228. #endif