1
// Copyright 2014 BitPay Inc.
2
// Distributed under the MIT software license, see the accompanying
3
// file COPYING or https://opensource.org/licenses/mit-license.php.
4

            
5
#include <string.h>
6
#include <vector>
7
#include <stdio.h>
8
#include "univalue.h"
9
#include "univalue_utffilter.h"
10

            
11
/*
12
 * According to stackexchange, the original json test suite wanted
13
 * to limit depth to 22.  Widely-deployed PHP bails at depth 512,
14
 * so we will follow PHP's lead, which should be more than sufficient
15
 * (further stackexchange comments indicate depth > 32 rarely occurs).
16
 */
17
static const size_t MAX_JSON_DEPTH = 512;
18

            
19
static bool json_isdigit(int ch)
20
{
21
    return ((ch >= '0') && (ch <= '9'));
22
}
23

            
24
// convert hexadecimal string to unsigned integer
25
static const char *hatoui(const char *first, const char *last,
26
                          unsigned int& out)
27
{
28
    unsigned int result = 0;
29
    for (; first != last; ++first)
30
    {
31
        int digit;
32
        if (json_isdigit(*first))
33
            digit = *first - '0';
34

            
35
        else if (*first >= 'a' && *first <= 'f')
36
            digit = *first - 'a' + 10;
37

            
38
        else if (*first >= 'A' && *first <= 'F')
39
            digit = *first - 'A' + 10;
40

            
41
        else
42
            break;
43

            
44
        result = 16 * result + digit;
45
    }
46
    out = result;
47

            
48
    return first;
49
}
50

            
51
enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
52
                            const char *raw, const char *end)
53
{
54
    tokenVal.clear();
55
    consumed = 0;
56

            
57
    const char *rawStart = raw;
58

            
59
    while (raw < end && (json_isspace(*raw)))          // skip whitespace
60
        raw++;
61

            
62
    if (raw >= end)
63
        return JTOK_NONE;
64

            
65
    switch (*raw) {
66

            
67
    case '{':
68
        raw++;
69
        consumed = (raw - rawStart);
70
        return JTOK_OBJ_OPEN;
71
    case '}':
72
        raw++;
73
        consumed = (raw - rawStart);
74
        return JTOK_OBJ_CLOSE;
75
    case '[':
76
        raw++;
77
        consumed = (raw - rawStart);
78
        return JTOK_ARR_OPEN;
79
    case ']':
80
        raw++;
81
        consumed = (raw - rawStart);
82
        return JTOK_ARR_CLOSE;
83

            
84
    case ':':
85
        raw++;
86
        consumed = (raw - rawStart);
87
        return JTOK_COLON;
88
    case ',':
89
        raw++;
90
        consumed = (raw - rawStart);
91
        return JTOK_COMMA;
92

            
93
    case 'n':
94
    case 't':
95
    case 'f':
96
        if (!strncmp(raw, "null", 4)) {
97
            raw += 4;
98
            consumed = (raw - rawStart);
99
            return JTOK_KW_NULL;
100
        } else if (!strncmp(raw, "true", 4)) {
101
            raw += 4;
102
            consumed = (raw - rawStart);
103
            return JTOK_KW_TRUE;
104
        } else if (!strncmp(raw, "false", 5)) {
105
            raw += 5;
106
            consumed = (raw - rawStart);
107
            return JTOK_KW_FALSE;
108
        } else
109
            return JTOK_ERR;
110

            
111
    case '-':
112
    case '0':
113
    case '1':
114
    case '2':
115
    case '3':
116
    case '4':
117
    case '5':
118
    case '6':
119
    case '7':
120
    case '8':
121
    case '9': {
122
        // part 1: int
123
        std::string numStr;
124

            
125
        const char *first = raw;
126

            
127
        const char *firstDigit = first;
128
        if (!json_isdigit(*firstDigit))
129
            firstDigit++;
130
        if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
131
            return JTOK_ERR;
132

            
133
        numStr += *raw;                       // copy first char
134
        raw++;
135

            
136
        if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
137
            return JTOK_ERR;
138

            
139
        while (raw < end && json_isdigit(*raw)) {  // copy digits
140
            numStr += *raw;
141
            raw++;
142
        }
143

            
144
        // part 2: frac
145
        if (raw < end && *raw == '.') {
146
            numStr += *raw;                   // copy .
147
            raw++;
148

            
149
            if (raw >= end || !json_isdigit(*raw))
150
                return JTOK_ERR;
151
            while (raw < end && json_isdigit(*raw)) { // copy digits
152
                numStr += *raw;
153
                raw++;
154
            }
155
        }
156

            
157
        // part 3: exp
158
        if (raw < end && (*raw == 'e' || *raw == 'E')) {
159
            numStr += *raw;                   // copy E
160
            raw++;
161

            
162
            if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
163
                numStr += *raw;
164
                raw++;
165
            }
166

            
167
            if (raw >= end || !json_isdigit(*raw))
168
                return JTOK_ERR;
169
            while (raw < end && json_isdigit(*raw)) { // copy digits
170
                numStr += *raw;
171
                raw++;
172
            }
173
        }
174

            
175
        tokenVal = numStr;
176
        consumed = (raw - rawStart);
177
        return JTOK_NUMBER;
178
        }
179

            
180
    case '"': {
181
        raw++;                                // skip "
182

            
183
        std::string valStr;
184
        JSONUTF8StringFilter writer(valStr);
185

            
186
        while (true) {
187
            if (raw >= end || (unsigned char)*raw < 0x20)
188
                return JTOK_ERR;
189

            
190
            else if (*raw == '\\') {
191
                raw++;                        // skip backslash
192

            
193
                if (raw >= end)
194
                    return JTOK_ERR;
195

            
196
                switch (*raw) {
197
                case '"':  writer.push_back('\"'); break;
198
                case '\\': writer.push_back('\\'); break;
199
                case '/':  writer.push_back('/'); break;
200
                case 'b':  writer.push_back('\b'); break;
201
                case 'f':  writer.push_back('\f'); break;
202
                case 'n':  writer.push_back('\n'); break;
203
                case 'r':  writer.push_back('\r'); break;
204
                case 't':  writer.push_back('\t'); break;
205

            
206
                case 'u': {
207
                    unsigned int codepoint;
208
                    if (raw + 1 + 4 >= end ||
209
                        hatoui(raw + 1, raw + 1 + 4, codepoint) !=
210
                               raw + 1 + 4)
211
                        return JTOK_ERR;
212
                    writer.push_back_u(codepoint);
213
                    raw += 4;
214
                    break;
215
                    }
216
                default:
217
                    return JTOK_ERR;
218

            
219
                }
220

            
221
                raw++;                        // skip esc'd char
222
            }
223

            
224
            else if (*raw == '"') {
225
                raw++;                        // skip "
226
                break;                        // stop scanning
227
            }
228

            
229
            else {
230
                writer.push_back(static_cast<unsigned char>(*raw));
231
                raw++;
232
            }
233
        }
234

            
235
        if (!writer.finalize())
236
            return JTOK_ERR;
237
        tokenVal = valStr;
238
        consumed = (raw - rawStart);
239
        return JTOK_STRING;
240
        }
241

            
242
    default:
243
        return JTOK_ERR;
244
    }
245
}
246

            
247
enum expect_bits : unsigned {
248
    EXP_OBJ_NAME = (1U << 0),
249
    EXP_COLON = (1U << 1),
250
    EXP_ARR_VALUE = (1U << 2),
251
    EXP_VALUE = (1U << 3),
252
    EXP_NOT_VALUE = (1U << 4),
253
};
254

            
255
#define expect(bit) (expectMask & (EXP_##bit))
256
#define setExpect(bit) (expectMask |= EXP_##bit)
257
#define clearExpect(bit) (expectMask &= ~EXP_##bit)
258

            
259
bool UniValue::read(const char *raw, size_t size)
260
{
261
    clear();
262

            
263
    uint32_t expectMask = 0;
264
    std::vector<UniValue*> stack;
265

            
266
    std::string tokenVal;
267
    unsigned int consumed;
268
    enum jtokentype tok = JTOK_NONE;
269
    enum jtokentype last_tok = JTOK_NONE;
270
    const char* end = raw + size;
271
    do {
272
        last_tok = tok;
273

            
274
        tok = getJsonToken(tokenVal, consumed, raw, end);
275
        if (tok == JTOK_NONE || tok == JTOK_ERR)
276
            return false;
277
        raw += consumed;
278

            
279
        bool isValueOpen = jsonTokenIsValue(tok) ||
280
            tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
281

            
282
        if (expect(VALUE)) {
283
            if (!isValueOpen)
284
                return false;
285
            clearExpect(VALUE);
286

            
287
        } else if (expect(ARR_VALUE)) {
288
            bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
289
            if (!isArrValue)
290
                return false;
291

            
292
            clearExpect(ARR_VALUE);
293

            
294
        } else if (expect(OBJ_NAME)) {
295
            bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
296
            if (!isObjName)
297
                return false;
298

            
299
        } else if (expect(COLON)) {
300
            if (tok != JTOK_COLON)
301
                return false;
302
            clearExpect(COLON);
303

            
304
        } else if (!expect(COLON) && (tok == JTOK_COLON)) {
305
            return false;
306
        }
307

            
308
        if (expect(NOT_VALUE)) {
309
            if (isValueOpen)
310
                return false;
311
            clearExpect(NOT_VALUE);
312
        }
313

            
314
        switch (tok) {
315

            
316
        case JTOK_OBJ_OPEN:
317
        case JTOK_ARR_OPEN: {
318
            VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
319
            if (!stack.size()) {
320
                if (utyp == VOBJ)
321
                    setObject();
322
                else
323
                    setArray();
324
                stack.push_back(this);
325
            } else {
326
                UniValue tmpVal(utyp);
327
                UniValue *top = stack.back();
328
                top->values.push_back(tmpVal);
329

            
330
                UniValue *newTop = &(top->values.back());
331
                stack.push_back(newTop);
332
            }
333

            
334
            if (stack.size() > MAX_JSON_DEPTH)
335
                return false;
336

            
337
            if (utyp == VOBJ)
338
                setExpect(OBJ_NAME);
339
            else
340
                setExpect(ARR_VALUE);
341
            break;
342
            }
343

            
344
        case JTOK_OBJ_CLOSE:
345
        case JTOK_ARR_CLOSE: {
346
            if (!stack.size() || (last_tok == JTOK_COMMA))
347
                return false;
348

            
349
            VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
350
            UniValue *top = stack.back();
351
            if (utyp != top->getType())
352
                return false;
353

            
354
            stack.pop_back();
355
            clearExpect(OBJ_NAME);
356
            setExpect(NOT_VALUE);
357
            break;
358
            }
359

            
360
        case JTOK_COLON: {
361
            if (!stack.size())
362
                return false;
363

            
364
            UniValue *top = stack.back();
365
            if (top->getType() != VOBJ)
366
                return false;
367

            
368
            setExpect(VALUE);
369
            break;
370
            }
371

            
372
        case JTOK_COMMA: {
373
            if (!stack.size() ||
374
                (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
375
                return false;
376

            
377
            UniValue *top = stack.back();
378
            if (top->getType() == VOBJ)
379
                setExpect(OBJ_NAME);
380
            else
381
                setExpect(ARR_VALUE);
382
            break;
383
            }
384

            
385
        case JTOK_KW_NULL:
386
        case JTOK_KW_TRUE:
387
        case JTOK_KW_FALSE: {
388
            UniValue tmpVal;
389
            switch (tok) {
390
            case JTOK_KW_NULL:
391
                // do nothing more
392
                break;
393
            case JTOK_KW_TRUE:
394
                tmpVal.setBool(true);
395
                break;
396
            case JTOK_KW_FALSE:
397
                tmpVal.setBool(false);
398
                break;
399
            default: /* impossible */ break;
400
            }
401

            
402
            if (!stack.size()) {
403
                *this = tmpVal;
404
                break;
405
            }
406

            
407
            UniValue *top = stack.back();
408
            top->values.push_back(tmpVal);
409

            
410
            setExpect(NOT_VALUE);
411
            break;
412
            }
413

            
414
        case JTOK_NUMBER: {
415
            UniValue tmpVal(VNUM, tokenVal);
416
            if (!stack.size()) {
417
                *this = tmpVal;
418
                break;
419
            }
420

            
421
            UniValue *top = stack.back();
422
            top->values.push_back(tmpVal);
423

            
424
            setExpect(NOT_VALUE);
425
            break;
426
            }
427

            
428
        case JTOK_STRING: {
429
            if (expect(OBJ_NAME)) {
430
                UniValue *top = stack.back();
431
                top->keys.push_back(tokenVal);
432
                clearExpect(OBJ_NAME);
433
                setExpect(COLON);
434
            } else {
435
                UniValue tmpVal(VSTR, tokenVal);
436
                if (!stack.size()) {
437
                    *this = tmpVal;
438
                    break;
439
                }
440
                UniValue *top = stack.back();
441
                top->values.push_back(tmpVal);
442
            }
443

            
444
            setExpect(NOT_VALUE);
445
            break;
446
            }
447

            
448
        default:
449
            return false;
450
        }
451
    } while (!stack.empty ());
452

            
453
    /* Check that nothing follows the initial construct (parsed above).  */
454
    tok = getJsonToken(tokenVal, consumed, raw, end);
455
    if (tok != JTOK_NONE)
456
        return false;
457

            
458
    return true;
459
}
460