Reduct  v4.0.5-1-g4851deb
A functional and immutable language.
Loading...
Searching...
No Matches
atom.h
Go to the documentation of this file.
1#ifndef REDUCT_ATOM_H
2#define REDUCT_ATOM_H 1
3
4#include <reduct/arena.h>
5#include <reduct/defs.h>
6#include <reduct/native.h>
7#include <reduct/schema.h>
8#include <reduct/sync.h>
9
10#include <assert.h>
11#include <stdbool.h>
12#include <stdint.h>
13#include <string.h>
14
15struct reduct;
16
17/**
18 * @file atom.h
19 * @brief Atom representation and operations.
20 * @defgroup atom Atoms
21 *
22 * Atoms represent all strings within a Reduct expression, as such it also represents anything that a string can be,
23 * including numbers and natives.
24 *
25 * ## Interning
26 *
27 * Some atoms, primarily atoms loaded during initial parsing, are "interned", meaning they are stored in a global map to
28 * ensure that only one instance of a given string exists at any time. This improves memory usage but primarily it
29 * allows us to cache if an atom represents a number or a native function, avoiding repeated parsing or lookups during
30 * execution.
31 *
32 * @see [Wikipedia String interning](https://en.wikipedia.org/wiki/String_interning)
33 *
34 * ## Small and Large Strings
35 *
36 * For small strings, of a length less than `REDUCT_ATOM_SMALL_MAX`, the string data is stored directly within the atom
37 * structure.
38 *
39 * For larger strings, the data is allocated from a dedicated atom stack, with the atom referencing this stack to
40 * prevent the garbage collector from collecting it.
41 *
42 * ### Substrings and Superstrings
43 *
44 * The stack system also allows multiple atoms to share the same buffer within a stack.
45 *
46 * For example, if we wish to create an atom containing a substring of a large atom, we can simply point to the middle
47 * of the existing buffer and reference the same stack.
48 *
49 * Or, if we wish to create an atom that uses another atom as a prefix, and that other atom happens to be at the end of
50 * its stack, we can simply extend the allocation in place and return a new atom pointing to the same buffer.
51 *
52 * @{
53 */
54
55#define REDUCT_ATOM_MAP_INITIAL 64 ///< The initial size of the atom map.
56#define REDUCT_ATOM_MAP_GROWTH 2 ///< The growth factor of the atom map.
57#define REDUCT_ATOM_SMALL_MAX 16 ///< The maximum length of a small atom.
58
59#define REDUCT_ATOM_TOMBSTONE ((reduct_atom_t*)(uintptr_t)1) ///< Tombstone value for the atom map.
60
61#define REDUCT_ATOM_INDEX_NONE ((uint32_t)-1) ///< The value of an unindexed atom.
62
63/**
64 * @brief Atom lookup flags.
65 */
66typedef enum
67{
68 REDUCT_ATOM_LOOKUP_NONE = 0, ///< No flags.
69 REDUCT_ATOM_LOOKUP_QUOTED = 1 << 0 ///< Atom should be explicitly quoted.
71
72typedef uint8_t reduct_atom_flags_t;
73#define REDUCT_ATOM_FLAG_NONE 0 ///< No flags.
74#define REDUCT_ATOM_FLAG_NUMBER (1 << 0) ///< Atom is known to be number shaped.
75#define REDUCT_ATOM_FLAG_INTRINSIC (1 << 1) ///< Atom is known to represent an intrinsic.
76#define REDUCT_ATOM_FLAG_NATIVE (1 << 2) ///< Atom is known to represent a native function.
77#define REDUCT_ATOM_FLAG_NUMBER_CHECKED (1 << 3) ///< Atom has been checked for number shaping.
78#define REDUCT_ATOM_FLAG_NATIVE_CHECKED (1 << 4) ///< Atom has been checked for a native function.
79#define REDUCT_ATOM_FLAG_LARGE (1 << 5) ///< Atom has an allocated buffer within a stack.
80#define REDUCT_ATOM_FLAG_SCHEMA (1 << 6) ///< Atom is a schema field.
81#define REDUCT_ATOM_FLAG_QUOTED (1 << 7) ///< Atom is quoted.
82
83/**
84 * @brief Atom structure.
85 * @struct reduct_atom_t
86 */
87typedef struct reduct_atom
88{
89 uint32_t length; ///< The length of the string (must be first, check the `reduct_item_t` structure).
90 uint32_t hash; ///< The hash of the string.
91 uint32_t index; ///< The index within the atom map.
92 reduct_atom_flags_t flags; ///< Atom flags.
93 uint8_t _padding[3];
94 char* string; ///< Pointer to the data.
95 union {
96 char smallString[REDUCT_ATOM_SMALL_MAX]; ///< Small string data, atom must not have `REDUCT_ATOM_FLAG_LARGE`.
98 arena; ///< The arena that this atoms string was allocated from, atom must have `REDUCT_ATOM_FLAG_LARGE`.
99 };
100 union {
101 struct
102 {
103 /**
104 * An array of indexes to which this atom is a key.
105 *
106 * The array is indexed by the schema id and stores the index of the field within the schema.
107 */
109 uint32_t schemaCount;
110 };
111 double numberValue; ///< Pre-computed number value, atom must have `REDUCT_ATOM_FLAG_NUMBER`.
112 struct
113 {
114 reduct_native_fn native; ///< Cached native function, atom must have `REDUCT_ATOM_FLAG_NATIVE`.
116 intrinsic; ///< Cached intrinsic function, atom must have `REDUCT_ATOM_FLAG_NATIVE`.
117 };
118 };
120
121#define REDUCT_FNV_PRIME 16777619U ///< FNV-1a 32-bit prime.
122#define REDUCT_FNV_OFFSET 2166136261U ///< FNV-1a 32-bit offset basis.
123
124/**
125 * @brief Global atom-related environment structure.
126 * @struct reduct_atom_global_t
127 */
128typedef struct
129{
130 struct reduct_atom** map;
131 uint32_t size;
132 uint32_t capacity;
133 uint32_t mask;
134 uint32_t tombstones;
137
138/**
139 * @brief Initialize a global atom state.
140 *
141 * @param global Pointer to the global atom state to initialize.
142 */
144
145/**
146 * @brief Deinitialize a global atom state.
147 *
148 * @param global Pointer to the global atom state to deinitialize.
149 */
151
152/**
153 * @brief Check if an atom is equal to a string.
154 *
155 * @param atom Pointer to the atom.
156 * @param str The string to compare.
157 * @param len The length of the string.
158 * @return `true` if the atom is equal to the string, `false` otherwise.
159 */
160REDUCT_API bool reduct_atom_is_equal(reduct_atom_t* atom, const char* str, size_t len);
161
162/**
163 * @brief Create an atom with a reserved size.
164 *
165 * @param reduct Pointer to the Reduct structure.
166 * @param data The raw buffer to create the atom from.
167 * @param len The length of the buffer.
168 * @return A pointer to the atom.
169 */
170REDUCT_API reduct_atom_t* reduct_atom_new(struct reduct* reduct, size_t len);
171
172/**
173 * @brief Create an atom from a null-terminated string.
174 *
175 * @param reduct Pointer to the Reduct structure.
176 * @param str The null-terminated string.
177 * @return A pointer to the atom.
178 */
179REDUCT_API reduct_atom_t* reduct_atom_new_string(struct reduct* reduct, const char* str);
180
181/**
182 * @brief Create an atom from a number value.
183 *
184 * @param reduct Pointer to the Reduct structure.
185 * @param value The number value.
186 * @return A pointer to the atom.
187 */
188REDUCT_API reduct_atom_t* reduct_atom_new_number(struct reduct* reduct, double value);
189
190/**
191 * @brief Create an atom for a anonymous native function.
192 *
193 * @param reduct Pointer to the Reduct structure.
194 * @param native The native function pointer.
195 * @return A pointer to the atom.
196 */
198
199/**
200 * @brief Intern an existing atom into the Reduct structure.
201 *
202 * @param reduct Pointer to the Reduct structure.
203 * @param atom Pointer to the atom to intern.
204 * @return `true` if the atom is already interned or it was successfully added, `false` if an identical atom is already
205 * interned.
206 */
207REDUCT_API bool reduct_atom_intern(struct reduct* reduct, reduct_atom_t* atom);
208
209/**
210 * @brief Lookup an interned atom in the Reduct structure.
211 *
212 * Will create and intern a new atom if it does not exist.
213 *
214 * @param reduct Pointer to the Reduct structure.
215 * @param str The string to lookup.
216 * @param len The length of the string.
217 * @param flags Lookup flags to alter the interning behavior.
218 * @return A pointer to the atom.
219 */
220REDUCT_API reduct_atom_t* reduct_atom_lookup(struct reduct* reduct, const char* str, size_t len,
222
223/**
224 * @brief Retrieve the lookup flags required to lookup this specific atom.
225 *
226 * Really just used to check if an atom is quoted or not.
227 *
228 * @param atom The atom to check.
229 * @return The lookup flags.
230 */
232
233/**
234 * @brief Ensure an atom is interned.
235 *
236 * If the atom is already interned, it returns the existing atom.
237 * If an identical atom is already interned, the already interned atom is returned.
238 * Otherwise, it interns the atom and returns it.
239 *
240 * @param reduct Pointer to the Reduct structure.
241 * @param atom Pointer to the atom to ensure is interned.
242 * @return A pointer to the interned atom.
243 */
245 reduct_atom_t* atom)
246{
248 {
249 return atom;
250 }
251
252 if (REDUCT_LIKELY(reduct_atom_intern(reduct, atom)))
253 {
254 return atom;
255 }
256
257 return reduct_atom_lookup(reduct, atom->string, atom->length, reduct_atom_get_lookup_flags(atom));
258}
259
260/**
261 * @brief Cache if an atom is a number.
262 *
263 * @param atom Pointer to the atom.
264 */
266
267/**
268 * @brief Cache if an atom is a native function.
269 *
270 * @param reduct Pointer to the Reduct structure.
271 * @param atom Pointer to the atom.
272 */
273REDUCT_API void reduct_atom_check_native(struct reduct* reduct, reduct_atom_t* atom);
274
275/**
276 * @brief Retain an atom, preventing it from being collected by the garbage collector.
277 *
278 * @param reduct Pointer to the Reduct structure.
279 * @param atom Pointer to the atom.
280 */
281REDUCT_API void reduct_atom_retain(struct reduct* reduct, reduct_atom_t* atom);
282
283/**
284 * @brief Release an atom, potentially allowing the garbage collector to collect it.
285 *
286 * @param reduct Pointer to the Reduct structure.
287 * @param atom Pointer to the atom.
288 */
289REDUCT_API void reduct_atom_release(struct reduct* reduct, reduct_atom_t* atom);
290
291/**
292 * @brief Check if an atom is an intrinsic.
293 *
294 * @param reduct Pointer to the Reduct structure.
295 * @param atom Pointer to the atom.
296 * @return `true` if the atom is an intrinsic, `false` otherwise.
297 */
298static inline REDUCT_ALWAYS_INLINE bool reduct_atom_is_intrinsic(struct reduct* reduct, reduct_atom_t* atom)
299{
301 {
302 reduct_atom_check_native(reduct, atom);
303 }
304 return (atom->flags & REDUCT_ATOM_FLAG_INTRINSIC) != 0;
305}
306
307/**
308 * @brief Check if an atom is a native function.
309 *
310 * @param reduct Pointer to the Reduct structure.
311 * @param atom Pointer to the atom.
312 * @return `true` if the atom is a native function, `false` otherwise.
313 */
314static inline bool reduct_atom_is_native(struct reduct* reduct, reduct_atom_t* atom)
315{
317 {
318 reduct_atom_check_native(reduct, atom);
319 }
320 return (atom->flags & REDUCT_ATOM_FLAG_NATIVE) != 0;
321}
322
323/**
324 * @brief Check if an atom is number-shaped.
325 *
326 * @param atom Pointer to the atom.
327 * @return `true` if the atom is a number, `false` otherwise.
328 */
330{
332 {
334 }
335 return (atom->flags & REDUCT_ATOM_FLAG_NUMBER) != 0;
336}
337
338/**
339 * @brief Get the number value of an atom.
340 *
341 * @param atom Pointer to the atom.
342 * @return The number value.
343 */
345{
346 assert(reduct_atom_is_number(atom));
347 return atom->numberValue;
348}
349
350/**
351 * @brief Create a substring of an existing atom.
352 *
353 * @param reduct Pointer to the Reduct structure.
354 * @param atom Pointer to the source atom.
355 * @param start The starting index.
356 * @param len The length of the substring.
357 * @return A pointer to the new atom.
358 */
359REDUCT_API reduct_atom_t* reduct_atom_substr(struct reduct* reduct, reduct_atom_t* atom, size_t start, size_t len);
360
361/**
362 * @brief Create a superstring of an existing atom.
363 *
364 * If the atom is at the end of its stack and there is enough capacity, it will extend the existing allocation.
365 * Otherwise, it will allocate a new atom and copy the existing data.
366 *
367 * @param reduct Pointer to the Reduct structure.
368 * @param atom Pointer to the source atom.
369 * @param len The new total length.
370 * @return A pointer to the new atom.
371 */
372REDUCT_API reduct_atom_t* reduct_atom_superstr(struct reduct* reduct, reduct_atom_t* atom, size_t len);
373
374/**
375 * @brief Create a new atom by copying data directly into it.
376 *
377 * The atom is NOT interned and its hash is set to 0, avoiding
378 * the overhead of hash computation and map lookup.
379 *
380 * @param reduct Pointer to the Reduct structure.
381 * @param data The data to copy.
382 * @param len The length of the data.
383 * @return A pointer to the new atom.
384 */
385static inline REDUCT_ALWAYS_INLINE reduct_atom_t* reduct_atom_new_copy(struct reduct* reduct, const char* data,
386 size_t len)
387{
388 reduct_atom_t* atom = reduct_atom_new(reduct, len);
389 memcpy(atom->string, data, len);
390 return atom;
391}
392
393/**
394 * @brief Retrieve an integer value from an atom, regardless of if it is quoted or not.
395 *
396 * @param reduct Pointer to the Reduct structure.
397 * @param atom Pointer to the atom.
398 * @return The integer value.
399 */
400REDUCT_API int64_t reduct_atom_as_int(struct reduct* reduct, reduct_atom_t* atom);
401
402/**
403 * @brief Retrieve a number value from an atom, regardless of if it is quoted or not.
404 *
405 * @param reduct Pointer to the Reduct structure.
406 * @param atom Pointer to the atom.
407 * @return The number value.
408 */
409REDUCT_API double reduct_atom_as_number(struct reduct* reduct, reduct_atom_t* atom);
410
411/** @} */
412
413#endif
Arena allocation.
#define REDUCT_LIKELY(_x)
Definition defs.h:58
#define REDUCT_UNLIKELY(_x)
Definition defs.h:59
#define REDUCT_ALWAYS_INLINE
Definition defs.h:61
reduct_handle_t(* reduct_native_fn)(struct reduct *reduct, size_t argc, reduct_handle_t *argv)
Native function pointer type.
Definition defs.h:126
struct reduct_rvsdg_origin *(* reduct_native_intrinsic_fn)(struct reduct_builder *builder, struct reduct_list *expr)
Intrinsic handler function type.
Definition defs.h:132
#define REDUCT_API
Definition defs.h:24
static bool reduct_atom_is_native(struct reduct *reduct, reduct_atom_t *atom)
Check if an atom is a native function.
Definition atom.h:314
#define REDUCT_ATOM_FLAG_NATIVE
Atom is known to represent a native function.
Definition atom.h:76
REDUCT_API reduct_atom_lookup_flags_t reduct_atom_get_lookup_flags(reduct_atom_t *atom)
Retrieve the lookup flags required to lookup this specific atom.
REDUCT_API int64_t reduct_atom_as_int(struct reduct *reduct, reduct_atom_t *atom)
Retrieve an integer value from an atom, regardless of if it is quoted or not.
REDUCT_API reduct_atom_t * reduct_atom_superstr(struct reduct *reduct, reduct_atom_t *atom, size_t len)
Create a superstring of an existing atom.
static REDUCT_ALWAYS_INLINE reduct_atom_t * reduct_atom_ensure_interned(struct reduct *reduct, reduct_atom_t *atom)
Ensure an atom is interned.
Definition atom.h:244
static REDUCT_ALWAYS_INLINE double reduct_atom_get_number(reduct_atom_t *atom)
Get the number value of an atom.
Definition atom.h:344
REDUCT_API void reduct_atom_global_init(reduct_atom_global_t *global)
Initialize a global atom state.
reduct_atom_lookup_flags_t
Atom lookup flags.
Definition atom.h:67
REDUCT_API void reduct_atom_release(struct reduct *reduct, reduct_atom_t *atom)
Release an atom, potentially allowing the garbage collector to collect it.
REDUCT_API bool reduct_atom_intern(struct reduct *reduct, reduct_atom_t *atom)
Intern an existing atom into the Reduct structure.
#define REDUCT_ATOM_FLAG_NATIVE_CHECKED
Atom has been checked for a native function.
Definition atom.h:78
REDUCT_API reduct_atom_t * reduct_atom_new_number(struct reduct *reduct, double value)
Create an atom from a number value.
static REDUCT_ALWAYS_INLINE bool reduct_atom_is_number(reduct_atom_t *atom)
Check if an atom is number-shaped.
Definition atom.h:329
REDUCT_API void reduct_atom_retain(struct reduct *reduct, reduct_atom_t *atom)
Retain an atom, preventing it from being collected by the garbage collector.
#define REDUCT_ATOM_INDEX_NONE
The value of an unindexed atom.
Definition atom.h:61
REDUCT_API void reduct_atom_global_deinit(reduct_atom_global_t *global)
Deinitialize a global atom state.
static REDUCT_ALWAYS_INLINE bool reduct_atom_is_intrinsic(struct reduct *reduct, reduct_atom_t *atom)
Check if an atom is an intrinsic.
Definition atom.h:298
#define REDUCT_ATOM_SMALL_MAX
The maximum length of a small atom.
Definition atom.h:57
REDUCT_API reduct_atom_t * reduct_atom_new_string(struct reduct *reduct, const char *str)
Create an atom from a null-terminated string.
#define REDUCT_ATOM_FLAG_NUMBER_CHECKED
Atom has been checked for number shaping.
Definition atom.h:77
REDUCT_API reduct_atom_t * reduct_atom_new(struct reduct *reduct, size_t len)
Create an atom with a reserved size.
REDUCT_API bool reduct_atom_is_equal(reduct_atom_t *atom, const char *str, size_t len)
Check if an atom is equal to a string.
REDUCT_API reduct_atom_t * reduct_atom_substr(struct reduct *reduct, reduct_atom_t *atom, size_t start, size_t len)
Create a substring of an existing atom.
uint8_t reduct_atom_flags_t
Definition atom.h:72
REDUCT_API reduct_atom_t * reduct_atom_lookup(struct reduct *reduct, const char *str, size_t len, reduct_atom_lookup_flags_t flags)
Lookup an interned atom in the Reduct structure.
REDUCT_API void reduct_atom_check_number(reduct_atom_t *atom)
Cache if an atom is a number.
REDUCT_API void reduct_atom_check_native(struct reduct *reduct, reduct_atom_t *atom)
Cache if an atom is a native function.
#define REDUCT_ATOM_FLAG_NUMBER
Atom is known to be number shaped.
Definition atom.h:74
static REDUCT_ALWAYS_INLINE reduct_atom_t * reduct_atom_new_copy(struct reduct *reduct, const char *data, size_t len)
Create a new atom by copying data directly into it.
Definition atom.h:385
REDUCT_API reduct_atom_t * reduct_atom_new_native(struct reduct *reduct, reduct_native_fn native)
Create an atom for a anonymous native function.
REDUCT_API double reduct_atom_as_number(struct reduct *reduct, reduct_atom_t *atom)
Retrieve a number value from an atom, regardless of if it is quoted or not.
#define REDUCT_ATOM_FLAG_INTRINSIC
Atom is known to represent an intrinsic.
Definition atom.h:75
@ REDUCT_ATOM_LOOKUP_NONE
No flags.
Definition atom.h:68
@ REDUCT_ATOM_LOOKUP_QUOTED
Atom should be explicitly quoted.
Definition atom.h:69
uint32_t reduct_schema_index_t
Schema index type.
Definition schema.h:63
Native function and intrinsic registration.
Schema transformation.
Arena structure.
Definition arena.h:26
Global atom-related environment structure.
Definition atom.h:129
uint32_t tombstones
Definition atom.h:134
uint32_t capacity
Definition atom.h:132
reduct_rwmutex_t mutex
Definition atom.h:135
uint32_t mask
Definition atom.h:133
struct reduct_atom ** map
Definition atom.h:130
uint32_t size
Definition atom.h:131
Atom structure.
Definition atom.h:88
reduct_native_fn native
Cached native function, atom must have REDUCT_ATOM_FLAG_NATIVE.
Definition atom.h:114
uint32_t length
The length of the string (must be first, check the reduct_item_t structure).
Definition atom.h:89
double numberValue
Pre-computed number value, atom must have REDUCT_ATOM_FLAG_NUMBER.
Definition atom.h:111
reduct_native_intrinsic_fn intrinsic
Cached intrinsic function, atom must have REDUCT_ATOM_FLAG_NATIVE.
Definition atom.h:116
uint32_t hash
The hash of the string.
Definition atom.h:90
reduct_schema_index_t * schema
Definition atom.h:108
char * string
Pointer to the data.
Definition atom.h:94
reduct_arena_t * arena
The arena that this atoms string was allocated from, atom must have REDUCT_ATOM_FLAG_LARGE.
Definition atom.h:98
uint32_t index
The index within the atom map.
Definition atom.h:91
reduct_atom_flags_t flags
Atom flags.
Definition atom.h:92
uint32_t schemaCount
Definition atom.h:109
Read-Write Mutex structure.
Definition sync.h:23
Syncronization primitives.