Jazz 1.25.+
Loading...
Searching...
No Matches
block.h
1/* Jazz (c) 2018-2025 kaalam.ai (The Authors of Jazz), using (under the same license):
2
3 1. Biomodelling - The AATBlockQueue class (c) Jacques BasaldĂșa, 2009-2012 licensed
4 exclusively for the use in the Jazz server software.
5
6 Copyright 2009-2012 Jacques BasaldĂșa
7
8 2. BBVA - Jazz: A lightweight analytical web server for data-driven applications.
9
10 Copyright 2016-2017 Banco Bilbao Vizcaya Argentaria, S.A.
11
12 This product includes software developed at
13
14 BBVA (https://www.bbva.com/)
15
16 3. LMDB, Copyright 2011-2017 Howard Chu, Symas Corp. All rights reserved.
17
18 Licensed under http://www.OpenLDAP.org/license.html
19
20
21 Licensed under the Apache License, Version 2.0 (the "License");
22 you may not use this file except in compliance with the License.
23 You may obtain a copy of the License at
24
25 http://www.apache.org/licenses/LICENSE-2.0
26
27 Unless required by applicable law or agreed to in writing, software
28 distributed under the License is distributed on an "AS IS" BASIS,
29 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
30 See the License for the specific language governing permissions and
31 limitations under the License.
32*/
33
34
35#include <limits.h>
36#include <map>
37#include <string.h>
38#include <iostream>
39
40
41#include "src/jazz_elements/utils.h"
42
43#ifdef CATCH_TEST
44#ifndef INCLUDED_JAZZ_CATCH2
45#define INCLUDED_JAZZ_CATCH2
46
47#include "src/catch2/catch.hpp"
48
49#endif
50#endif
51
52
53#ifndef INCLUDED_JAZZ_ELEMENTS_BLOCK
54#define INCLUDED_JAZZ_ELEMENTS_BLOCK
55
56
57namespace jazz_elements
58{
59
60// Forward declarations
61
63typedef std::map<int, const char *> AttributeMap;
64
66typedef class Block *pBlock;
67
68
99class Block: public StaticBlockHeader {
100
101 public:
102
103 // Methods on indices.
104
113 inline void set_dimensions(int *p_dim) {
114 rank = MAX_TENSOR_RANK;
115 int j = 1;
116 for (int i = MAX_TENSOR_RANK - 1; i > 0; i--)
117 if (p_dim[i] > 0) { range.dim[i] = j; j *= p_dim[i]; } else { j = 1; range.dim[i] = 0; rank = i; }
118 range.dim[0] = j;
119 j *= p_dim[0];
120 size = j;
121 }
122
130 inline void get_dimensions(int *p_dim) {
131 int j = size;
132 for (int i = 0; i < MAX_TENSOR_RANK; i++)
133 if (i < rank) { p_dim[i] = j/range.dim[i]; j = range.dim[i]; } else p_dim[i] = 0;
134 }
135
142 inline bool validate_index(int *p_idx) {
143 int j = size;
144 for (int i = 0; i < rank; i++) {
145 if (p_idx[i] < 0 || p_idx[i]*range.dim[i] >= j) return false;
146 j = range.dim[i];
147 }
148 return true;
149 }
150
157 inline int validate_offset(int offset) { return offset >=0 & offset < size; }
158
165 inline int get_offset(int *p_idx) {
166 int j = 0;
167 for (int i = 0; i < rank; i++) j += p_idx[i]*range.dim[i];
168 return j;
169 }
170
176 inline void get_index(int offset, int *p_idx) {
177 for (int i = 0; i < rank; i++) { p_idx[i] = offset/range.dim[i]; offset -= p_idx[i]*range.dim[i]; }
178 }
179
180 // Methods on strings.
181
190 inline char *get_string(int *p_idx) {
191 return reinterpret_cast<char *>(&p_string_buffer()->buffer[tensor.cell_int[get_offset(p_idx)]]);
192 }
193
202 inline char *get_string(int offset) {
203 return reinterpret_cast<char *>(&p_string_buffer()->buffer[tensor.cell_int[offset]]);
204 }
205
217 inline void set_string(int *p_idx, const char *p_str) {
219 tensor.cell_int[get_offset(p_idx)] = get_string_offset(psb, p_str);
220 }
221
233 inline void set_string(int offset, const char *p_str) {
235 tensor.cell_int[offset] = get_string_offset(psb, p_str);
236 }
237
238 // Methods on attributes.
239
252 inline char *get_attribute(int attribute_id) {
253 int *ptk = p_attribute_keys();
254 for (int i = 0; i < num_attributes; i++)
255 if (ptk[i] == attribute_id)
256 return reinterpret_cast<char *>(&p_string_buffer()->buffer[ptk[i + num_attributes]]);
257 return nullptr;
258 }
259
268 inline void set_attributes(AttributeMap *all_att) {
269 if (num_attributes)
270 return;
271
272 if (all_att == nullptr) {
274 return;
275 }
276 num_attributes = all_att->size();
278
279 int i = 0;
280 int *ptk = p_attribute_keys();
282 for (AttributeMap::iterator it = all_att->begin(); it != all_att->end(); ++it) {
283 if (i < num_attributes) {
284 ptk[i] = it->first;
285 ptk[i + num_attributes] = get_string_offset(psb, it->second);
286 }
287 i++;
288 }
289 }
290
298 inline void get_attributes(AttributeMap *all_att) {
299 if (!num_attributes)
300 return;
301 int *ptk = p_attribute_keys();
303 for (int i = 0; i < num_attributes; i++)
304 (*all_att)[ptk[i]] = reinterpret_cast<char *>(&psb->buffer[ptk[i + num_attributes]]);
305 }
306
312 inline void init_string_buffer() {
314
315 int buff_size = total_bytes - ((uintptr_t) psb - (uintptr_t) &cell_type) - sizeof(StringBuffer);
316 if (buff_size < 4) {
317 psb->alloc_failed = true;
318 return;
319 }
320 memset(psb, 0, sizeof(StringBuffer) + 4); // This also clears psb->buffer[0..3]
321 // psb->buffer[0] = 0; // STRING_NA
322 // psb->buffer[1] = 0; // STRING_EMPTY
323 // psb->buffer[2] = 0; // The end of string for searching (will change once a string is inserted)
324 psb->buffer_size = buff_size;
325 psb->last_idx = 2; // Where the first string will be inserted
326 }
327
328 bool find_NAs_in_tensor();
329
339 inline int *align64bit(uintptr_t ipt) {
340 uintptr_t gap = (uintptr_t) this & 0x7;
341 return reinterpret_cast<int *>(((ipt - gap + 0x7) & 0xffffFFFFffffFFF8) + gap);
342 }
343
352 inline int *p_attribute_keys() {
353 return align64bit((uintptr_t) &tensor + (cell_type & 0xff)*size);
354 }
355
364 return reinterpret_cast<pStringBuffer>((uintptr_t) p_attribute_keys() + 2*num_attributes*sizeof(int));
365 }
366
367 int get_string_offset(pStringBuffer psb, const char *p_str);
368
369 // Methods for filtering (selecting).
370
371 bool is_a_filter();
372
381 inline bool can_filter(pBlock p_block) {
382 int rows = p_block->range.dim[0];
383
384 if (p_block->rank < 1 || rows <= 0 || rank != 1)
385 return false;
386
387 rows = p_block->size/rows;
388
389 switch (cell_type) {
390 case CELL_TYPE_BYTE_BOOLEAN:
391 return size == rows;
392
393 case CELL_TYPE_INTEGER:
394 return size <= rows && (size == 0 || (tensor.cell_int[0] >= 0 && tensor.cell_int[size - 1] < rows));
395 }
396 return false;
397 }
398
406 inline void close_block(int set_has_NA = SET_HAS_NA_FALSE,
407 bool set_hash = true,
408 bool set_time = true) {
409 switch (set_has_NA) {
410 case SET_HAS_NA_FALSE:
411 has_NA = false;
412 break;
413 case SET_HAS_NA_TRUE:
414 has_NA = cell_type != CELL_TYPE_BYTE; // CELL_TYPE_BYTE must always be has_NA == FALSE
415 break;
416 default:
418 }
419
420#ifdef DEBUG // Initialize the RAM between the end of the string buffer and last allocated byte for Valgrind.
421
422 void *p_start;
423 if (cell_type != CELL_TYPE_TUPLE_ITEM) {
425 p_start = &psb->buffer[psb->last_idx];
426 } else {
427 pBlock p_blk = (pBlock) ((uintptr_t) &tensor + tensor.cell_item[size - 1].data_start);
428 p_start = (void *) ((uintptr_t) p_blk + p_blk->total_bytes);
429 }
430 uintptr_t void_size = total_bytes + (uintptr_t) &cell_type - (uintptr_t) p_start;
431
432 if (void_size > 0)
433 memset(p_start, 0, void_size);
434#endif
435 if (set_hash)
437
438 if (set_time)
439 created = std::chrono::steady_clock::now();
440 }
441
446 inline bool check_hash() {
447 int siz = total_bytes - sizeof(BlockHeader);
448 return siz > 0 && hash64 == MurmurHash64A(&tensor, siz);
449 }
450};
451
452} // namespace jazz_elements
453
454#endif // ifndef INCLUDED_JAZZ_ELEMENTS_BLOCK
A block is a moveable BlockHeader followed by a Tensor and a StringBuffer.
Definition block.h:99
void init_string_buffer()
Definition block.h:312
void get_attributes(AttributeMap *all_att)
Definition block.h:298
bool is_a_filter()
Check (in depth) the validity of a filter.
Definition block.cpp:191
int validate_offset(int offset)
Definition block.h:157
void set_string(int offset, const char *p_str)
Definition block.h:233
void get_dimensions(int *p_dim)
Definition block.h:130
int get_string_offset(pStringBuffer psb, const char *p_str)
Definition block.cpp:128
char * get_attribute(int attribute_id)
Definition block.h:252
char * get_string(int offset)
Definition block.h:202
void get_index(int offset, int *p_idx)
Definition block.h:176
int * align64bit(uintptr_t ipt)
Align a pointer (as uintptr_t) to the next 8 byte boundary assuming the block is aligned.
Definition block.h:339
void set_dimensions(int *p_dim)
Definition block.h:113
bool can_filter(pBlock p_block)
Definition block.h:381
void set_string(int *p_idx, const char *p_str)
Definition block.h:217
pStringBuffer p_string_buffer()
Definition block.h:363
int get_offset(int *p_idx)
Definition block.h:165
bool check_hash()
Definition block.h:446
char * get_string(int *p_idx)
Definition block.h:190
int * p_attribute_keys()
Definition block.h:352
bool find_NAs_in_tensor()
Definition block.cpp:49
void close_block(int set_has_NA=SET_HAS_NA_FALSE, bool set_hash=true, bool set_time=true)
Definition block.h:406
void set_attributes(AttributeMap *all_att)
Definition block.h:268
bool validate_index(int *p_idx)
Definition block.h:142
The namespace for Jazz Utils, Blocks, Kinds, Tuples, Containers, etc.
Definition block.cpp:39
uint64_t MurmurHash64A(const void *key, int len)
MurmurHash2, 64-bit versions, by Austin Appleby.
Definition utils.cpp:250
class Block * pBlock
A (forward defined) pointer to a Block.
Definition block.h:66
std::map< int, const char * > AttributeMap
An stdlib map to store all the attributes of a Block at the same time used by the some Block methods.
Definition block.h:63
Header for a Movable Block (Tensor, Kind or Tuple) or a Dynamic Block (Index)
Definition types.h:242
int data_start
The data start of this tensor as an offset of &BlockHeader.tensor. (If is is a Tuple....
Definition types.h:204
A Binary Compatible BlockHeader without Index (and therefore constructors/destructors)
Definition types.h:265
int size
The total number of cells in the tensor.
Definition types.h:267
TimePoint created
Timestamp when the block was created.
Definition types.h:268
int cell_type
The type for the cells in the tensor. See CELL_TYPE_*.
Definition types.h:266
uint64_t hash64
Hash of everything but the header.
Definition types.h:274
Tensor tensor
A tensor for type cell_type and dimensions set by Block.set_dimensions()
Definition types.h:276
int total_bytes
Total size of the block everything included.
Definition types.h:272
int num_attributes
Number of elements in the JazzAttributesMap.
Definition types.h:271
bool has_NA
If true, at least one value is a NA and block requires NA-aware arithmetic.
Definition types.h:273
int rank
The number of dimensions.
Definition types.h:269
TensorDim range
The dimensions of the tensor in terms of ranges (Max. size is 2 Gb.)
Definition types.h:270
Structure at the end of a Block, initially created with init_string_buffer()
Definition types.h:282
int last_idx
The index to the first free space after the last stored string.
Definition types.h:285
int buffer_size
The size in bytes of buffer[].
Definition types.h:286
char buffer[]
The buffer where strings are stored starting with two zeroes for STRING_NA & STRING_EMPTY.
Definition types.h:287
bool alloc_failed
A previous call to get_string_offset() failed to alloc space for a string.
Definition types.h:284
int dim[MAX_TENSOR_RANK]
Dimensions for the Tensor. The product of all * (cell_type & 0xff) < 2 Gb.
Definition types.h:193
ItemHeader cell_item[0]
.. An array of BlockHeader used by Kinds and Tuples
Definition types.h:231
int cell_int[0]
.. CELL_TYPE_INTEGER, CELL_TYPE_FACTOR, CELL_TYPE_GRADE, CELL_TYPE_BOOLEAN and CELL_TYPE_STRING
Definition types.h:223