GDAL
rasterio_ssse3.h
1/******************************************************************************
2 *
3 * Project: GDAL Core
4 * Purpose: SSSE3 specializations
5 * Author: Even Rouault <even dot rouault at spatialys dot com>
6 *
7 ******************************************************************************
8 * Copyright (c) 2016, Even Rouault <even dot rouault at spatialys dot com>
9 *
10 * SPDX-License-Identifier: MIT
11 ****************************************************************************/
12
13#ifndef RASTERIO_SSSE3_H_INCLUDED
14#define RASTERIO_SSSE3_H_INCLUDED
15
16#include "cpl_port.h"
17
18#if defined(HAVE_SSSE3_AT_COMPILE_TIME) && \
19 (defined(__x86_64) || defined(_M_X64))
20
21void GDALUnrolledCopy_GByte_3_1_SSSE3(GByte *CPL_RESTRICT pDest,
22 const GByte *CPL_RESTRICT pSrc,
23 GPtrDiff_t nIters);
24
25void GDALDeinterleave3Byte_SSSE3(const GByte *CPL_RESTRICT pabySrc,
26 GByte *CPL_RESTRICT pabyDest0,
27 GByte *CPL_RESTRICT pabyDest1,
28 GByte *CPL_RESTRICT pabyDest2, size_t nIters);
29
30#if !defined(__GNUC__) || defined(__clang__)
31// GCC excluded because the auto-vectorized SSE2 code is good enough
32void GDALDeinterleave4Byte_SSSE3(const GByte *CPL_RESTRICT pabySrc,
33 GByte *CPL_RESTRICT pabyDest0,
34 GByte *CPL_RESTRICT pabyDest1,
35 GByte *CPL_RESTRICT pabyDest2,
36 GByte *CPL_RESTRICT pabyDest3, size_t nIters);
37#endif
38
39#if (defined(__GNUC__) && !defined(__clang__)) || \
40 defined(__INTEL_CLANG_COMPILER)
41// Restricted to GCC/ICC only as only verified with it that it can properly
42// auto-vectorize
43void GDALDeinterleave3UInt16_SSSE3(const GUInt16 *CPL_RESTRICT panSrc,
44 GUInt16 *CPL_RESTRICT panDest0,
45 GUInt16 *CPL_RESTRICT panDest1,
46 GUInt16 *CPL_RESTRICT panDest2,
47 size_t nIters);
48
49void GDALDeinterleave4UInt16_SSSE3(const GUInt16 *CPL_RESTRICT panSrc,
50 GUInt16 *CPL_RESTRICT panDest0,
51 GUInt16 *CPL_RESTRICT panDest1,
52 GUInt16 *CPL_RESTRICT panDest2,
53 GUInt16 *CPL_RESTRICT panDest3,
54 size_t nIters);
55#endif
56
57#endif
58
59#endif /* RASTERIO_SSSE3_H_INCLUDED */
Core portability definitions for CPL.
int GPtrDiff_t
Integer type large enough to hold the difference between 2 addresses.
Definition: cpl_port.h:240
#define CPL_RESTRICT
restrict keyword to declare that pointers do not alias
Definition: cpl_port.h:1010
unsigned short GUInt16
Unsigned int16 type.
Definition: cpl_port.h:167
unsigned char GByte
Unsigned byte type.
Definition: cpl_port.h:169