PatchworkOS
Loading...
Searching...
No Matches
scan.c
Go to the documentation of this file.
1#include "scan.h"
2
3#include <ctype.h>
4#include <limits.h>
5#include <stdarg.h>
6#include <stddef.h>
7#include <stdint.h>
8#include <stdlib.h>
9#include <string.h>
10
11#include "common/digits.h"
12
13#ifndef __KERNEL__
14#include "user/common/file.h"
15#endif
16
17static int _scan_get(_format_ctx_t* ctx)
18{
19 int rc = EOF;
20
21#ifndef __KERNEL__
22 if (ctx->stream != NULL)
23 {
24 if (_FILE_CHECK_AVAIL(ctx->stream) != ERR)
25 {
26 rc = _FILE_GETC(ctx->stream);
27 }
28 }
29 else
30 {
31 rc = (*ctx->buffer == '\0') ? EOF : (unsigned char)*((ctx->buffer)++);
32 }
33#else
34 if (ctx->buffer != NULL)
35 {
36 rc = (*ctx->buffer == '\0') ? EOF : (unsigned char)*((ctx->buffer)++);
37 }
38#endif
39
40 if (rc != EOF)
41 {
42 ++(ctx->totalChars);
43 ++(ctx->currentChars);
44 }
45
46 return rc;
47}
48
49/* Helper function to put a read character back into the string or stream,
50 whatever is used for input.
51*/
52static void _scan_unget(int c, _format_ctx_t* ctx)
53{
54#ifndef __KERNEL__
55 if (ctx->stream != NULL)
56 {
57 ungetc(c, ctx->stream); /* TODO: Error? */
58 }
59 else
60 {
61 --(ctx->buffer);
62 }
63#else
64 (void)c;
65 if (ctx->buffer != NULL)
66 {
67 --(ctx->buffer);
68 }
69#endif
70
71 --(ctx->totalChars);
72 --(ctx->currentChars);
73}
74
75/* Helper function to check if a character is part of a given scanset */
76static int _scan_in_scanset(const char* scanlist, const char* end_scanlist, int rc)
77{
78 /* SOLAR */
79 int previous = -1;
80
81 while (scanlist != end_scanlist)
82 {
83 if ((*scanlist == '-') && (previous != -1))
84 {
85 /* possible scangroup ("a-z") */
86 if (++scanlist == end_scanlist)
87 {
88 /* '-' at end of scanlist does not describe a scangroup */
89 return rc == '-';
90 }
91
92 while (++previous <= (unsigned char)*scanlist)
93 {
94 if (previous == rc)
95 {
96 return 1;
97 }
98 }
99
100 previous = -1;
101 }
102 else
103 {
104 /* not a scangroup, check verbatim */
105 if (rc == (unsigned char)*scanlist)
106 {
107 return 1;
108 }
109
110 previous = (unsigned char)(*scanlist++);
111 }
112 }
113
114 return 0;
115}
116
117const char* _scan(const char* spec, _format_ctx_t* ctx)
118{
119 /* generic input character */
120 int rc;
121 const char* prev_spec;
122 const char* orig_spec = spec;
123 int value_parsed;
124
125 if (*(++spec) == '%')
126 {
127 /* %% -> match single '%' */
128 rc = _scan_get(ctx);
129
130 switch (rc)
131 {
132 case EOF:
133
134 /* input error */
135 if (ctx->maxChars == 0)
136 {
137 ctx->maxChars = -1;
138 }
139
140 return NULL;
141
142 case '%':
143 return ++spec;
144
145 default:
146 _scan_unget(rc, ctx);
147 break;
148 }
149 }
150
151 /* Initializing ctx structure */
152 ctx->flags = 0;
153 ctx->base = -1;
154 ctx->currentChars = 0;
155 ctx->width = 0;
156 ctx->precision = 0;
157
158 /* '*' suppresses assigning parsed value to variable */
159 if (*spec == '*')
160 {
161 ctx->flags |= FORMAT_SUPPRESSED;
162 ++spec;
163 }
164
165 /* If a width is given, strtol() will return its value. If not given,
166 strtol() will return zero. In both cases, endptr will point to the
167 rest of the conversion specifier - just what we need.
168 */
169 prev_spec = spec;
170 ctx->width = (int)strtol(spec, (char**)&spec, 10);
171
172 if (spec == prev_spec)
173 {
174 ctx->width = SIZE_MAX;
175 }
176
177 /* Optional length modifier
178 We step one character ahead in any case, and step back only if we find
179 there has been no length modifier (or step ahead another character if it
180 has been "hh" or "ll").
181 */
182 switch (*(spec++))
183 {
184 case 'h':
185 if (*spec == 'h')
186 {
187 /* hh -> char */
188 ctx->flags |= FORMAT_CHAR;
189 ++spec;
190 }
191 else
192 {
193 /* h -> short */
194 ctx->flags |= FORMAT_SHORT;
195 }
196
197 break;
198
199 case 'l':
200 if (*spec == 'l')
201 {
202 /* ll -> long long */
203 ctx->flags |= FORMAT_LLONG;
204 ++spec;
205 }
206 else
207 {
208 /* l -> long */
209 ctx->flags |= FORMAT_LONG;
210 }
211
212 break;
213
214 case 'j':
215 /* j -> intmax_t, which might or might not be long long */
216 ctx->flags |= FORMAT_INTMAX;
217 break;
218
219 case 'z':
220 /* z -> size_t, which might or might not be unsigned int */
221 ctx->flags |= FORMAT_SIZE;
222 break;
223
224 case 't':
225 /* t -> ptrdiff_t, which might or might not be long */
226 ctx->flags |= FORMAT_PTRDIFF;
227 break;
228
229 case 'L':
230 /* L -> long double */
231 ctx->flags |= FORMAT_LDOUBLE;
232 break;
233
234 default:
235 --spec;
236 break;
237 }
238
239 /* Conversion specifier */
240
241 /* whether valid input had been parsed */
242 value_parsed = 0;
243
244 switch (*spec)
245 {
246 case 'd':
247 ctx->base = 10;
248 break;
249
250 case 'i':
251 ctx->base = 0;
252 break;
253
254 case 'o':
255 ctx->base = 8;
256 ctx->flags |= FORMAT_UNSIGNED;
257 break;
258
259 case 'u':
260 ctx->base = 10;
261 ctx->flags |= FORMAT_UNSIGNED;
262 break;
263
264 case 'x':
265 ctx->base = 16;
266 ctx->flags |= FORMAT_UNSIGNED;
267 break;
268
269 case 'f':
270 case 'F':
271 case 'e':
272 case 'E':
273 case 'g':
274 case 'G':
275 case 'a':
276 case 'A':
277 break;
278
279 case 'c':
280 {
281 char* c = NULL;
282
283 if (!(ctx->flags & FORMAT_SUPPRESSED))
284 {
285 c = va_arg(ctx->arg, char*);
286 }
287
288 /* for %c, default width is one */
289 if (ctx->width == SIZE_MAX)
290 {
291 ctx->width = 1;
292 }
293
294 /* reading until width reached or input exhausted */
295 while ((ctx->currentChars < ctx->width) && ((rc = _scan_get(ctx)) != EOF))
296 {
297 if (c != NULL)
298 {
299 *(c++) = rc;
300 }
301
302 value_parsed = 1;
303 }
304
305 /* width or input exhausted */
306 if (value_parsed)
307 {
308 if (c != NULL)
309 {
310 ++ctx->maxChars;
311 }
312
313 return ++spec;
314 }
315 else
316 {
317 /* input error, no character read */
318 if (ctx->maxChars == 0)
319 {
320 ctx->maxChars = -1;
321 }
322
323 return NULL;
324 }
325 }
326
327 case 's':
328 {
329 char* c = NULL;
330
331 if (!(ctx->flags & FORMAT_SUPPRESSED))
332 {
333 c = va_arg(ctx->arg, char*);
334 }
335
336 while ((ctx->currentChars < ctx->width) && ((rc = _scan_get(ctx)) != EOF))
337 {
338 if (isspace((unsigned char)rc))
339 {
340 _scan_unget(rc, ctx);
341
342 if (value_parsed)
343 {
344 /* matching sequence terminated by whitespace */
345 if (c != NULL)
346 {
347 *c = '\0';
348 ++ctx->maxChars;
349 }
350
351 return ++spec;
352 }
353 else
354 {
355 /* matching error */
356 return NULL;
357 }
358 }
359 else
360 {
361 /* match */
362 if (c != NULL)
363 {
364 *(c++) = rc;
365 }
366
367 value_parsed = 1;
368 }
369 }
370
371 /* width or input exhausted */
372 if (value_parsed)
373 {
374 if (c != NULL)
375 {
376 *c = '\0';
377 ++ctx->maxChars;
378 }
379
380 return ++spec;
381 }
382 else
383 {
384 /* input error, no character read */
385 if (ctx->maxChars == 0)
386 {
387 ctx->maxChars = -1;
388 }
389
390 return NULL;
391 }
392 }
393
394 case '[':
395 {
396 const char* endspec = spec;
397 int negative_scanlist = 0;
398 char* c = NULL;
399
400 if (!(ctx->flags & FORMAT_SUPPRESSED))
401 {
402 c = va_arg(ctx->arg, char*);
403 }
404
405 if (*(++endspec) == '^')
406 {
407 negative_scanlist = 1;
408 ++endspec;
409 }
410
411 spec = endspec;
412
413 do
414 {
415 /* TODO: This can run beyond a malformed format string */
416 ++endspec;
417 } while (*endspec != ']');
418
419 /* read according to scanlist, equiv. to %buffer above */
420 while ((ctx->currentChars < ctx->width) && ((rc = _scan_get(ctx)) != EOF))
421 {
422 if (negative_scanlist)
423 {
424 if (_scan_in_scanset(spec, endspec, rc))
425 {
426 _scan_unget(rc, ctx);
427 break;
428 }
429 }
430 else
431 {
432 if (!_scan_in_scanset(spec, endspec, rc))
433 {
434 _scan_unget(rc, ctx);
435 break;
436 }
437 }
438
439 if (c != NULL)
440 {
441 *(c++) = rc;
442 }
443
444 value_parsed = 1;
445 }
446
447 /* width or input exhausted */
448 if (value_parsed)
449 {
450 if (c != NULL)
451 {
452 *c = '\0';
453 ++ctx->maxChars;
454 }
455
456 return ++endspec;
457 }
458 else
459 {
460 if (ctx->maxChars == 0)
461 {
462 ctx->maxChars = -1;
463 }
464
465 return NULL;
466 }
467 }
468
469 case 'p':
470 ctx->base = 16;
471 ctx->flags |= FORMAT_POINTER;
472 break;
473
474 case 'n':
475 {
476 if (!(ctx->flags & FORMAT_SUPPRESSED))
477 {
478 int* val = va_arg(ctx->arg, int*);
479 *val = ctx->totalChars;
480 }
481
482 return ++spec;
483 }
484
485 default:
486 /* No conversion specifier. Bad conversion. */
487 return orig_spec;
488 }
489
490 if (ctx->base != -1)
491 {
492 /* integer conversion */
493 uintmax_t value = 0; /* absolute value read */
494 int prefix_parsed = 0;
495 int sign = 0;
496
497 while ((ctx->currentChars < ctx->width) && ((rc = _scan_get(ctx)) != EOF))
498 {
499 if (isspace((unsigned char)rc))
500 {
501 if (sign)
502 {
503 /* matching sequence terminated by whitespace */
504 _scan_unget(rc, ctx);
505 break;
506 }
507 else
508 {
509 /* leading whitespace not counted against width */
510 ctx->currentChars--;
511 }
512 }
513 else
514 {
515 if (!sign)
516 {
517 /* no sign parsed yet */
518 switch (rc)
519 {
520 case '-':
521 sign = -1;
522 break;
523
524 case '+':
525 sign = 1;
526 break;
527
528 default:
529 /* not a sign; put back character */
530 sign = 1;
531 _scan_unget(rc, ctx);
532 break;
533 }
534 }
535 else
536 {
537 if (!prefix_parsed)
538 {
539 /* no prefix (0x... for hex, 0... for octal) parsed yet */
540 prefix_parsed = 1;
541
542 if (rc != '0')
543 {
544 /* not a prefix; if base not yet set, set to decimal */
545 if (ctx->base == 0)
546 {
547 ctx->base = 10;
548 }
549
550 _scan_unget(rc, ctx);
551 }
552 else
553 {
554 /* starts with zero, so it might be a prefix. */
555 /* check what follows next (might be 0x...) */
556 if ((ctx->currentChars < ctx->width) && ((rc = _scan_get(ctx)) != EOF))
557 {
558 if (tolower((unsigned char)rc) == 'x')
559 {
560 /* 0x... would be prefix for hex base... */
561 if ((ctx->base == 0) || (ctx->base == 16))
562 {
563 ctx->base = 16;
564 }
565 else
566 {
567 /* ...unless already set to other value */
568 _scan_unget(rc, ctx);
569 value_parsed = 1;
570 }
571 }
572 else
573 {
574 /* 0... but not 0x.... would be octal prefix */
575 _scan_unget(rc, ctx);
576
577 if (ctx->base == 0)
578 {
579 ctx->base = 8;
580 }
581
582 /* in any case we have read a zero */
583 value_parsed = 1;
584 }
585 }
586 else
587 {
588 /* failed to read beyond the initial zero */
589 value_parsed = 1;
590 break;
591 }
592 }
593 }
594 else
595 {
596 char* digitptr = (char*)memchr(_digits, tolower((unsigned char)rc), ctx->base);
597
598 if (digitptr == NULL)
599 {
600 /* end of input item */
601 _scan_unget(rc, ctx);
602 break;
603 }
604
605 value *= ctx->base;
606 value += digitptr - _digits;
607 value_parsed = 1;
608 }
609 }
610 }
611 }
612
613 /* width or input exhausted, or non-matching character */
614 if (!value_parsed)
615 {
616 /* out of input before anything could be parsed - input error */
617 /* FIXME: if first character does not match, value_parsed is not set - but it is NOT an input error */
618 if ((ctx->maxChars == 0) && (rc == EOF))
619 {
620 ctx->maxChars = -1;
621 }
622
623 return NULL;
624 }
625
626 /* convert value to target type and assign to parameter */
627 if (!(ctx->flags & FORMAT_SUPPRESSED))
628 {
629 switch (ctx->flags &
632 {
633 case FORMAT_CHAR:
634 *(va_arg(ctx->arg, char*)) = (char)(value * sign);
635 break;
636
638 *(va_arg(ctx->arg, unsigned char*)) = (unsigned char)(value * sign);
639 break;
640
641 case FORMAT_SHORT:
642 *(va_arg(ctx->arg, short*)) = (short)(value * sign);
643 break;
644
646 *(va_arg(ctx->arg, unsigned short*)) = (unsigned short)(value * sign);
647 break;
648
649 case 0:
650 *(va_arg(ctx->arg, int*)) = (int)(value * sign);
651 break;
652
653 case FORMAT_UNSIGNED:
654 *(va_arg(ctx->arg, unsigned int*)) = (unsigned int)(value * sign);
655 break;
656
657 case FORMAT_LONG:
658 *(va_arg(ctx->arg, long*)) = (long)(value * sign);
659 break;
660
662 *(va_arg(ctx->arg, unsigned long*)) = (unsigned long)(value * sign);
663 break;
664
665 case FORMAT_LLONG:
666 *(va_arg(ctx->arg, long long*)) = (long long)(value * sign);
667 break;
668
670 *(va_arg(ctx->arg, unsigned long long*)) = (unsigned long long)(value * sign);
671 break;
672
673 case FORMAT_INTMAX:
674 *(va_arg(ctx->arg, intmax_t*)) = (intmax_t)(value * sign);
675 break;
676
678 *(va_arg(ctx->arg, uintmax_t*)) = (uintmax_t)(value * sign);
679 break;
680
681 case FORMAT_SIZE:
682 /* FORMAT_SIZE always implies unsigned */
683 *(va_arg(ctx->arg, size_t*)) = (size_t)(value * sign);
684 break;
685
686 case FORMAT_PTRDIFF:
687 /* FORMAT_PTRDIFF always implies signed */
688 *(va_arg(ctx->arg, ptrdiff_t*)) = (ptrdiff_t)(value * sign);
689 break;
690
691 case FORMAT_POINTER:
692 /* FORMAT_POINTER always implies unsigned */
693 *(uintptr_t*)(va_arg(ctx->arg, void*)) = (uintptr_t)(value * sign);
694 break;
695
696 default:
697 return NULL; /* behaviour unspecified */
698 }
699
700 ++(ctx->maxChars);
701 }
702
703 return ++spec;
704 }
705
706 /* TODO: Floats. */
707 return NULL;
708}
_PUBLIC int tolower(int c)
Definition tolower.c:5
_PUBLIC int isspace(int c)
Definition isspace.c:5
const char _digits[]
Definition digits.c:3
@ FORMAT_INTMAX
Definition format.h:20
@ FORMAT_SIZE
Definition format.h:21
@ FORMAT_LDOUBLE
Definition format.h:25
@ FORMAT_CHAR
Definition format.h:16
@ FORMAT_UNSIGNED
Definition format.h:27
@ FORMAT_POINTER
Definition format.h:23
@ FORMAT_SHORT
Definition format.h:17
@ FORMAT_LONG
Definition format.h:18
@ FORMAT_SUPPRESSED
Definition format.h:15
@ FORMAT_LLONG
Definition format.h:19
@ FORMAT_PTRDIFF
Definition format.h:22
#define NULL
Pointer error value.
Definition NULL.h:23
#define ERR
Integer error value.
Definition ERR.h:17
__UINTMAX_TYPE__ uintmax_t
Definition inttypes.h:12
__INTMAX_TYPE__ intmax_t
Definition inttypes.h:11
__PTRDIFF_TYPE__ ptrdiff_t
Definition ptrdiff_t.h:4
static void _scan_unget(int c, _format_ctx_t *ctx)
Definition scan.c:52
static int _scan_in_scanset(const char *scanlist, const char *end_scanlist, int rc)
Definition scan.c:76
const char * _scan(const char *spec, _format_ctx_t *ctx)
Definition scan.c:117
static int _scan_get(_format_ctx_t *ctx)
Definition scan.c:17
#define _FILE_GETC(stream)
Definition file.h:49
#define _FILE_CHECK_AVAIL(fh)
Definition file.h:53
#define va_arg(ap, type)
Definition stdarg.h:11
#define SIZE_MAX
Definition stdint.h:127
__UINTPTR_TYPE__ uintptr_t
Definition stdint.h:43
#define EOF
Definition stdio.h:25
_PUBLIC int ungetc(int c, FILE *stream)
Definition ungetc.c:5
#define strtol(nptr, endptr, base)
Definition stdlib.h:27
_PUBLIC void * memchr(const void *s, int c, size_t n)
Definition memchr.c:3
va_list arg
Definition format.h:45
int64_t precision
Definition format.h:43
FILE * stream
Definition format.h:44
uint64_t maxChars
Definition format.h:38
uint64_t width
Definition format.h:42
_format_flags_t flags
Definition format.h:37
int32_t base
Definition format.h:36
uint64_t currentChars
Definition format.h:40
char * buffer
Definition format.h:41
uint64_t totalChars
Definition format.h:39