GDAL
ograrrowarrayhelper.h
1/******************************************************************************
2 *
3 * Project: OpenGIS Simple Features Reference Implementation
4 * Purpose: Helper to fill ArrowArray
5 * Author: Even Rouault <even dot rouault at spatialys.com>
6 *
7 ******************************************************************************
8 * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9 *
10 * SPDX-License-Identifier: MIT
11 ****************************************************************************/
12
13#pragma once
14
16
17#include <algorithm>
18#include <limits>
19
20#include "cpl_time.h"
21
22#include "ogrsf_frmts.h"
23#include "ogr_recordbatch.h"
24
25class CPL_DLL OGRArrowArrayHelper
26{
27 OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete;
28 OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete;
29
30 public:
31 bool m_bIncludeFID = false;
32 int m_nMaxBatchSize = 0;
33 int m_nChildren = 0;
34 const int m_nFieldCount = 0;
35 const int m_nGeomFieldCount = 0;
36 std::vector<int> m_mapOGRFieldToArrowField{};
37 std::vector<int> m_mapOGRGeomFieldToArrowField{};
38 std::vector<bool> m_abNullableFields{};
39 std::vector<uint32_t> m_anArrowFieldMaxAlloc{};
40 std::vector<int> m_anTZFlags{};
41 int64_t *m_panFIDValues = nullptr;
42 struct ArrowArray *m_out_array = nullptr;
43
44 static uint32_t GetMemLimit();
45
46 static int
47 GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions);
48
49 OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
50 const CPLStringList &aosArrowArrayStreamOptions,
51 struct ArrowArray *out_array);
52
53 bool SetNull(int iArrowField, int iFeat)
54 {
55 auto psArray = m_out_array->children[iArrowField];
56 ++psArray->null_count;
57 uint8_t *pabyNull =
58 static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0]));
59 if (psArray->buffers[0] == nullptr)
60 {
61 pabyNull = static_cast<uint8_t *>(
62 VSI_MALLOC_ALIGNED_AUTO_VERBOSE((m_nMaxBatchSize + 7) / 8));
63 if (pabyNull == nullptr)
64 {
65 return false;
66 }
67 memset(pabyNull, 0xFF, (m_nMaxBatchSize + 7) / 8);
68 psArray->buffers[0] = pabyNull;
69 }
70 pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8)));
71
72 if (psArray->n_buffers == 3)
73 {
74 auto panOffsets =
75 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
76 panOffsets[iFeat + 1] = panOffsets[iFeat];
77 }
78 return true;
79 }
80
81 inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat)
82 {
83 static_cast<uint8_t *>(
84 const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |=
85 static_cast<uint8_t>(1 << (iFeat % 8));
86 }
87
88 inline static void SetInt8(struct ArrowArray *psArray, int iFeat,
89 int8_t nVal)
90 {
91 static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
92 nVal;
93 }
94
95 inline static void SetUInt8(struct ArrowArray *psArray, int iFeat,
96 uint8_t nVal)
97 {
98 static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
99 nVal;
100 }
101
102 inline static void SetInt16(struct ArrowArray *psArray, int iFeat,
103 int16_t nVal)
104 {
105 static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
106 nVal;
107 }
108
109 inline static void SetUInt16(struct ArrowArray *psArray, int iFeat,
110 uint16_t nVal)
111 {
112 static_cast<uint16_t *>(
113 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
114 }
115
116 inline static void SetInt32(struct ArrowArray *psArray, int iFeat,
117 int32_t nVal)
118 {
119 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
120 nVal;
121 }
122
123 inline static void SetUInt32(struct ArrowArray *psArray, int iFeat,
124 uint32_t nVal)
125 {
126 static_cast<uint32_t *>(
127 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
128 }
129
130 inline static void SetInt64(struct ArrowArray *psArray, int iFeat,
131 int64_t nVal)
132 {
133 static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
134 nVal;
135 }
136
137 inline static void SetUInt64(struct ArrowArray *psArray, int iFeat,
138 uint64_t nVal)
139 {
140 static_cast<uint64_t *>(
141 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
142 }
143
144 inline static void SetFloat(struct ArrowArray *psArray, int iFeat,
145 float fVal)
146 {
147 static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
148 fVal;
149 }
150
151 inline static void SetDouble(struct ArrowArray *psArray, int iFeat,
152 double dfVal)
153 {
154 static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
155 dfVal;
156 }
157
158 static void SetDate(struct ArrowArray *psArray, int iFeat,
159 struct tm &brokenDown, const OGRField &ogrField)
160 {
161 brokenDown.tm_year = ogrField.Date.Year - 1900;
162 brokenDown.tm_mon = ogrField.Date.Month - 1;
163 brokenDown.tm_mday = ogrField.Date.Day;
164 brokenDown.tm_hour = 0;
165 brokenDown.tm_min = 0;
166 brokenDown.tm_sec = 0;
167 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
168 static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
169 }
170
171 static void SetDateTime(struct ArrowArray *psArray, int iFeat,
172 struct tm &brokenDown, int nFieldTZFlag,
173 const OGRField &ogrField)
174 {
175 brokenDown.tm_year = ogrField.Date.Year - 1900;
176 brokenDown.tm_mon = ogrField.Date.Month - 1;
177 brokenDown.tm_mday = ogrField.Date.Day;
178 brokenDown.tm_hour = ogrField.Date.Hour;
179 brokenDown.tm_min = ogrField.Date.Minute;
180 brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
181 auto nVal =
182 CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
183 (static_cast<int>(ogrField.Date.Second * 1000 + 0.5) % 1000);
184 if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
185 ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
186 {
187 // Convert for ogrField.Date.TZFlag to UTC
188 const int TZOffset = (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15;
189 const int TZOffsetMS = TZOffset * 60 * 1000;
190 nVal -= TZOffsetMS;
191 }
192 static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
193 nVal;
194 }
195
196 GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen)
197 {
198 auto psArray = m_out_array->children[iArrowField];
199 auto panOffsets =
200 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
201 const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]);
202 if (nLen > m_anArrowFieldMaxAlloc[iArrowField] - nCurLength)
203 {
204 if (nLen >
205 static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) -
206 nCurLength)
207 {
208 CPLError(CE_Failure, CPLE_AppDefined,
209 "Too large string or binary content");
210 return nullptr;
211 }
212 uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen);
213 if ((m_anArrowFieldMaxAlloc[iArrowField] >> 31) == 0)
214 {
215 const uint32_t nDoubleSize =
216 2U * m_anArrowFieldMaxAlloc[iArrowField];
217 if (nNewSize < nDoubleSize)
218 nNewSize = nDoubleSize;
219 }
220 void *newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize);
221 if (newBuffer == nullptr)
222 return nullptr;
223 m_anArrowFieldMaxAlloc[iArrowField] = nNewSize;
224 memcpy(newBuffer, psArray->buffers[2], nCurLength);
225 VSIFreeAligned(const_cast<void *>(psArray->buffers[2]));
226 psArray->buffers[2] = newBuffer;
227 }
228 GByte *paby =
229 static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) +
230 nCurLength;
231 panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen);
232 return paby;
233 }
234
235 static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat)
236 {
237 auto panOffsets =
238 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
239 panOffsets[iFeat + 1] = panOffsets[iFeat];
240 }
241
242 void Shrink(int nFeatures)
243 {
244 if (nFeatures < m_nMaxBatchSize)
245 {
246 m_out_array->length = nFeatures;
247 for (int i = 0; i < m_nChildren; i++)
248 {
249 m_out_array->children[i]->length = nFeatures;
250 }
251 }
252 }
253
254 void ClearArray()
255 {
256 if (m_out_array->release)
257 m_out_array->release(m_out_array);
258 memset(m_out_array, 0, sizeof(*m_out_array));
259 }
260
261 static bool FillDict(struct ArrowArray *psChild,
262 const OGRCodedFieldDomain *poCodedDomain);
263};
264
String list class designed around our use of C "char**" string lists.
Definition: cpl_string.h:436
A set of associated raster bands, usually from one file.
Definition: gdal_priv.h:495
Definition of a coded / enumerated field domain.
Definition: ogr_feature.h:1742
Definition of a feature class or feature layer.
Definition: ogr_feature.h:501
#define CPLE_AppDefined
Application defined error.
Definition: cpl_error.h:84
void CPLError(CPLErr eErrClass, CPLErrorNum err_no, const char *fmt,...)
Report an error.
Definition: cpl_error.cpp:314
unsigned char GByte
Unsigned byte type.
Definition: cpl_port.h:169
#define VSI_MALLOC_ALIGNED_AUTO_VERBOSE(size)
VSIMallocAlignedAutoVerbose() with FILE and LINE reporting.
Definition: cpl_vsi.h:305
void VSIFreeAligned(void *ptr)
Free a buffer allocated with VSIMallocAligned().
Definition: cpl_vsisimple.cpp:979
#define OGR_TZFLAG_UTC
Time zone flag indicating UTC.
Definition: ogr_core.h:894
#define OGR_TZFLAG_MIXED_TZ
Time zone flag only returned by OGRFieldDefn::GetTZFlag() to indicate that all values in the field ha...
Definition: ogr_core.h:886
Classes related to registration of format support, and opening datasets.
OGRFeature field attribute value union.
Definition: ogr_core.h:905