Alexandria  2.25.0
SDC-CH common library for the Euclid project
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NpyCommon.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2022 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #ifndef ALEXANDRIA_NDARRAY_IMPL_NPYCOMMON_H
20 #define ALEXANDRIA_NDARRAY_IMPL_NPYCOMMON_H
21 
23 #include <boost/endian/arithmetic.hpp>
24 #include <boost/filesystem/operations.hpp>
25 #include <boost/iostreams/device/mapped_file.hpp>
26 #include <numeric>
27 
28 namespace Euclid {
29 namespace NdArray {
30 
31 using boost::endian::little_uint16_t;
32 using boost::endian::little_uint32_t;
33 
37 constexpr const char NPY_MAGIC[] = {'\x93', 'N', 'U', 'M', 'P', 'Y'};
38 
42 #if BYTE_ORDER == LITTLE_ENDIAN
43 constexpr const char* ENDIAN_MARKER = "<";
44 #elif BYTE_ORDER == BIG_ENDIAN
45 constexpr const char* ENDIAN_MARKER = ">";
46 #else
47 #error "PDP_ENDIAN not supported"
48 #endif
49 
53 template <typename T>
54 struct NpyDtype {};
55 
56 template <>
57 struct NpyDtype<int8_t> {
58  static constexpr const char* str = "b";
59 };
60 
61 template <>
62 struct NpyDtype<int16_t> {
63  static constexpr const char* str = "i2";
64 };
65 
66 template <>
67 struct NpyDtype<int32_t> {
68  static constexpr const char* str = "i4";
69 };
70 
71 template <>
72 struct NpyDtype<int64_t> {
73  static constexpr const char* str = "i8";
74 };
75 
76 template <>
77 struct NpyDtype<uint8_t> {
78  static constexpr const char* str = "B";
79 };
80 
81 template <>
82 struct NpyDtype<uint16_t> {
83  static constexpr const char* str = "u2";
84 };
85 
86 template <>
87 struct NpyDtype<uint32_t> {
88  static constexpr const char* str = "u4";
89 };
90 
91 template <>
92 struct NpyDtype<uint64_t> {
93  static constexpr const char* str = "u8";
94 };
95 
96 template <>
97 struct NpyDtype<float> {
98  static constexpr const char* str = "f4";
99 };
100 
101 template <>
102 struct NpyDtype<double> {
103  static constexpr const char* str = "f8";
104 };
105 
109 void parseSingleValue(const std::string& descr, bool& big_endian, std::string& dtype);
110 
119 void parseFieldValues(const std::string& descr, bool& big_endian, std::vector<std::string>& attrs, std::string& dtype);
120 
138 void parseNpyDict(const std::string& header, bool& fortran_order, bool& big_endian, std::string& dtype,
139  std::vector<size_t>& shape, std::vector<std::string>& attrs, size_t& n_elements);
140 
156  size_t& n_elements);
157 
161 constexpr const uint8_t NPY_VERSION[] = {'\x02', '\x00'};
162 
169  std::stringstream shape_stream;
170  shape_stream << "(";
171  for (auto s : shape) {
172  shape_stream << s << ',';
173  }
174  shape_stream << ")";
175  return shape_stream.str();
176 }
177 
179  std::stringstream dtype;
180  if (attrs.empty()) {
181  dtype << '\'' << ENDIAN_MARKER << type << '\'';
182  } else {
183  dtype << '[';
184  for (auto& attr : attrs) {
185  dtype << "('" << attr << "', '" << ENDIAN_MARKER << type << "'), ";
186  }
187  dtype << ']';
188  }
189  return dtype.str();
190 }
191 
195 template <typename T>
197  if (!attrs.empty()) {
198  if (attrs.size() != shape.back()) {
199  throw std::out_of_range("Last axis does not match number of attribute names");
200  }
201  shape.pop_back();
202  }
203  // Serialize header as a Python dict
204  std::stringstream header;
205  header << "{"
206  << "'descr': " << typeDescription(NpyDtype<T>::str, attrs)
207  << ", 'fortran_order': False, 'shape': " << npyShape(shape) << "}";
208  auto header_str = header.str();
209  little_uint32_t header_len = header_str.size();
210 
211  // Pad header with spaces so the header block is 64 bytes aligned
212  size_t total_length = sizeof(NPY_MAGIC) + sizeof(NPY_VERSION) + sizeof(header_len) + header_len + 1; // Keep 1 for \n
213  if (total_length % 64 > 0) {
214  size_t padding = 64 - total_length % 64;
215  header << std::string(padding, '\x20');
216  }
217  header << '\n';
218  header_str = header.str();
219  header_len = header_str.size();
220 
221  // Magic and version
222  out.write(NPY_MAGIC, sizeof(NPY_MAGIC));
223  out.write(reinterpret_cast<const char*>(&NPY_VERSION), sizeof(NPY_VERSION));
224 
225  // HEADER_LEN
226  out.write(reinterpret_cast<char*>(&header_len), sizeof(header_len));
227 
228  // HEADER
229  out.write(header_str.data(), header_str.size());
230 }
231 
238 template <typename T>
240 public:
241  MappedContainer(const boost::filesystem::path& path, size_t data_offset, size_t n_elements,
242  const std::vector<std::string>& attr_names, boost::iostreams::mapped_file&& input, size_t max_size)
243  : m_path(path)
244  , m_data_offset(data_offset)
245  , m_n_elements(n_elements)
246  , m_max_size(max_size)
247  , m_attr_names(attr_names)
248  , m_mapped(std::move(input))
249  , m_data(reinterpret_cast<T*>(const_cast<char*>(m_mapped.const_data()) + data_offset)) {}
250 
251  size_t size() const {
252  return m_n_elements;
253  }
254 
255  T* data() {
256  return m_data;
257  }
258 
259  void resize(const std::vector<size_t>& shape) {
260  // Generate header
261  std::stringstream header;
262  writeNpyHeader<T>(header, shape, m_attr_names);
263  auto header_str = header.str();
264  auto header_size = header_str.size();
265  // Make sure we are in place
266  if (header_size != m_data_offset) {
267  throw Elements::Exception() << "Can not resize memory mapped NPY file. "
268  "The new header length must match the allocated space.";
269  }
270 
271  m_n_elements = std::accumulate(shape.begin(), shape.end(), 1u, std::multiplies<size_t>());
272  size_t new_size = header_size + sizeof(T) * m_n_elements;
273  if (new_size > m_max_size) {
274  throw Elements::Exception() << "resize request bigger than maximum allocated size: " << new_size << " > "
275  << m_max_size;
276  }
277  boost::filesystem::resize_file(m_path, new_size);
278  std::copy(header_str.begin(), header_str.end(), m_mapped.data());
279  }
280 
281 private:
282  boost::filesystem::path m_path;
285  boost::iostreams::mapped_file m_mapped;
286  T* m_data;
287 };
288 
289 } // end of namespace NdArray
290 } // end of namespace Euclid
291 
292 #endif // ALEXANDRIA_NDARRAY_IMPL_NPYCOMMON_H
constexpr const char NPY_MAGIC[]
Definition: NpyCommon.h:37
void readNpyHeader(std::istream &input, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
Definition: NpyCommon.cpp:81
T empty(T...args)
T copy(T...args)
std::string typeDescription(const std::string &type, const std::vector< std::string > &attrs)
Definition: NpyCommon.h:178
std::vector< std::string > m_attr_names
Definition: NpyCommon.h:284
void parseFieldValues(const std::string &descr, bool &big_endian, std::vector< std::string > &attrs, std::string &dtype)
Definition: NpyCommon.cpp:30
STL class.
void resize(const std::vector< size_t > &shape)
Definition: NpyCommon.h:259
STL class.
constexpr const char * ENDIAN_MARKER
Definition: NpyCommon.h:43
constexpr double s
constexpr const uint8_t NPY_VERSION[]
Definition: NpyCommon.h:161
void parseNpyDict(const std::string &header, bool &fortran_order, bool &big_endian, std::string &dtype, std::vector< size_t > &shape, std::vector< std::string > &attrs, size_t &n_elements)
Definition: NpyCommon.cpp:55
void parseSingleValue(const std::string &descr, bool &big_endian, std::string &dtype)
Definition: NpyCommon.cpp:25
T pop_back(T...args)
T str(T...args)
boost::filesystem::path m_path
Definition: NpyCommon.h:282
void writeNpyHeader(std::ostream &out, std::vector< size_t > shape, const std::vector< std::string > &attrs)
Definition: NpyCommon.h:196
boost::iostreams::mapped_file m_mapped
Definition: NpyCommon.h:285
T size(T...args)
T write(T...args)
T back(T...args)
MappedContainer(const boost::filesystem::path &path, size_t data_offset, size_t n_elements, const std::vector< std::string > &attr_names, boost::iostreams::mapped_file &&input, size_t max_size)
Definition: NpyCommon.h:241
std::string npyShape(std::vector< size_t > shape)
Definition: NpyCommon.h:168
T accumulate(T...args)
STL class.
Path::Item path