diff --git a/tutorial01/leptjson.c b/tutorial01/leptjson.c index 5299fe1d..f4f2b17b 100644 --- a/tutorial01/leptjson.c +++ b/tutorial01/leptjson.c @@ -15,6 +15,24 @@ static void lept_parse_whitespace(lept_context* c) { c->json = p; } +static int lept_parse_true(lept_context* c, lept_value* v) { + EXPECT(c, 't'); + if (c->json[0] != 'r' || c->json[1] != 'u' || c->json[2] != 'e') + return LEPT_PARSE_INVALID_VALUE; + c->json += 3; + v->type = LEPT_TRUE; + return LEPT_PARSE_OK; +} + +static int lept_parse_false(lept_context* c, lept_value* v) { + EXPECT(c, 'f'); + if (c->json[0] != 'a' || c->json[1] != 'l' || c->json[2] != 's' || c->json[3] != 'e') + return LEPT_PARSE_INVALID_VALUE; + c->json += 4; + v->type = LEPT_FALSE; + return LEPT_PARSE_OK; +} + static int lept_parse_null(lept_context* c, lept_value* v) { EXPECT(c, 'n'); if (c->json[0] != 'u' || c->json[1] != 'l' || c->json[2] != 'l') @@ -26,6 +44,8 @@ static int lept_parse_null(lept_context* c, lept_value* v) { static int lept_parse_value(lept_context* c, lept_value* v) { switch (*c->json) { + case 't': return lept_parse_true(c, v); + case 'f': return lept_parse_false(c, v); case 'n': return lept_parse_null(c, v); case '\0': return LEPT_PARSE_EXPECT_VALUE; default: return LEPT_PARSE_INVALID_VALUE; @@ -34,11 +54,17 @@ static int lept_parse_value(lept_context* c, lept_value* v) { int lept_parse(lept_value* v, const char* json) { lept_context c; + int ret; assert(v != NULL); c.json = json; v->type = LEPT_NULL; lept_parse_whitespace(&c); - return lept_parse_value(&c, v); + if ((ret = lept_parse_value(&c, v)) == LEPT_PARSE_OK) { + lept_parse_whitespace(&c); + if (*c.json != '\0') + ret = LEPT_PARSE_ROOT_NOT_SINGULAR; + } + return ret; } lept_type lept_get_type(const lept_value* v) { diff --git a/tutorial01/test.c b/tutorial01/test.c index e7672181..a601f157 100644 --- a/tutorial01/test.c +++ b/tutorial01/test.c @@ -27,6 +27,20 @@ static void test_parse_null() { EXPECT_EQ_INT(LEPT_NULL, lept_get_type(&v)); } +static void test_parse_true() { + lept_value v; + v.type = LEPT_FALSE; + EXPECT_EQ_INT(LEPT_PARSE_OK, lept_parse(&v, "true")); + EXPECT_EQ_INT(LEPT_TRUE, lept_get_type(&v)); +} + +static void test_parse_false() { + lept_value v; + v.type = LEPT_TRUE; + EXPECT_EQ_INT(LEPT_PARSE_OK, lept_parse(&v, "false")); + EXPECT_EQ_INT(LEPT_FALSE, lept_get_type(&v)); +} + static void test_parse_expect_value() { lept_value v; @@ -59,6 +73,8 @@ static void test_parse_root_not_singular() { static void test_parse() { test_parse_null(); + test_parse_true(); + test_parse_false(); test_parse_expect_value(); test_parse_invalid_value(); test_parse_root_not_singular(); diff --git a/tutorial02/leptjson.c b/tutorial02/leptjson.c index 7693e43b..0a2ca875 100644 --- a/tutorial02/leptjson.c +++ b/tutorial02/leptjson.c @@ -1,8 +1,12 @@ #include "leptjson.h" #include /* assert() */ +#include /* errno, ERANGE */ +#include /* HUGE_VAL */ #include /* NULL, strtod() */ #define EXPECT(c, ch) do { assert(*c->json == (ch)); c->json++; } while(0) +#define ISDIGIT(ch) ((ch) >= '0' && (ch) <= '9') +#define ISDIGIT1TO9(ch) ((ch) >= '1' && (ch) <= '9') typedef struct { const char* json; @@ -15,49 +19,50 @@ static void lept_parse_whitespace(lept_context* c) { c->json = p; } -static int lept_parse_true(lept_context* c, lept_value* v) { - EXPECT(c, 't'); - if (c->json[0] != 'r' || c->json[1] != 'u' || c->json[2] != 'e') - return LEPT_PARSE_INVALID_VALUE; - c->json += 3; - v->type = LEPT_TRUE; - return LEPT_PARSE_OK; -} - -static int lept_parse_false(lept_context* c, lept_value* v) { - EXPECT(c, 'f'); - if (c->json[0] != 'a' || c->json[1] != 'l' || c->json[2] != 's' || c->json[3] != 'e') - return LEPT_PARSE_INVALID_VALUE; - c->json += 4; - v->type = LEPT_FALSE; - return LEPT_PARSE_OK; -} - -static int lept_parse_null(lept_context* c, lept_value* v) { - EXPECT(c, 'n'); - if (c->json[0] != 'u' || c->json[1] != 'l' || c->json[2] != 'l') - return LEPT_PARSE_INVALID_VALUE; - c->json += 3; - v->type = LEPT_NULL; +static int lept_parse_literal(lept_context* c, lept_value* v, const char* literal, lept_type type) { + size_t i; + EXPECT(c, literal[0]); + for (i = 0; literal[i + 1]; i++) + if (c->json[i] != literal[i + 1]) + return LEPT_PARSE_INVALID_VALUE; + c->json += i; + v->type = type; return LEPT_PARSE_OK; } static int lept_parse_number(lept_context* c, lept_value* v) { - char* end; - /* \TODO validate number */ - v->n = strtod(c->json, &end); - if (c->json == end) - return LEPT_PARSE_INVALID_VALUE; - c->json = end; + const char* p = c->json; + if (*p == '-') p++; + if (*p == '0') p++; + else { + if (!ISDIGIT1TO9(*p)) return LEPT_PARSE_INVALID_VALUE; + for (p++; ISDIGIT(*p); p++); + } + if (*p == '.') { + p++; + if (!ISDIGIT(*p)) return LEPT_PARSE_INVALID_VALUE; + for (p++; ISDIGIT(*p); p++); + } + if (*p == 'e' || *p == 'E') { + p++; + if (*p == '+' || *p == '-') p++; + if (!ISDIGIT(*p)) return LEPT_PARSE_INVALID_VALUE; + for (p++; ISDIGIT(*p); p++); + } + errno = 0; + v->n = strtod(c->json, NULL); + if (errno == ERANGE && (v->n == HUGE_VAL || v->n == -HUGE_VAL)) + return LEPT_PARSE_NUMBER_TOO_BIG; v->type = LEPT_NUMBER; + c->json = p; return LEPT_PARSE_OK; } static int lept_parse_value(lept_context* c, lept_value* v) { switch (*c->json) { - case 't': return lept_parse_true(c, v); - case 'f': return lept_parse_false(c, v); - case 'n': return lept_parse_null(c, v); + case 't': return lept_parse_literal(c, v, "true", LEPT_TRUE); + case 'f': return lept_parse_literal(c, v, "false", LEPT_FALSE); + case 'n': return lept_parse_literal(c, v, "null", LEPT_NULL); default: return lept_parse_number(c, v); case '\0': return LEPT_PARSE_EXPECT_VALUE; } diff --git a/tutorial02/test.c b/tutorial02/test.c index 6e3ebed2..3a0ddceb 100644 --- a/tutorial02/test.c +++ b/tutorial02/test.c @@ -70,6 +70,15 @@ static void test_parse_number() { TEST_NUMBER(1.234E+10, "1.234E+10"); TEST_NUMBER(1.234E-10, "1.234E-10"); TEST_NUMBER(0.0, "1e-10000"); /* must underflow */ + TEST_NUMBER(1.0000000000000002, "1.0000000000000002"); /* the smallest number > 1 */ + TEST_NUMBER( 4.9406564584124654e-324, "4.9406564584124654e-324"); /* minimum denormal */ + TEST_NUMBER(-4.9406564584124654e-324, "-4.9406564584124654e-324"); + TEST_NUMBER( 2.2250738585072009e-308, "2.2250738585072009e-308"); /* Max subnormal double */ + TEST_NUMBER(-2.2250738585072009e-308, "-2.2250738585072009e-308"); + TEST_NUMBER( 2.2250738585072014e-308, "2.2250738585072014e-308"); /* Min normal positive double */ + TEST_NUMBER(-2.2250738585072014e-308, "-2.2250738585072014e-308"); + TEST_NUMBER( 1.7976931348623157e+308, "1.7976931348623157e+308"); /* Max double */ + TEST_NUMBER(-1.7976931348623157e+308, "-1.7976931348623157e+308"); } #define TEST_ERROR(error, json)\ @@ -89,7 +98,6 @@ static void test_parse_invalid_value() { TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "nul"); TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "?"); -#if 0 /* invalid number */ TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "+0"); TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "+1"); @@ -99,25 +107,20 @@ static void test_parse_invalid_value() { TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "inf"); TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "NAN"); TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "nan"); -#endif } static void test_parse_root_not_singular() { TEST_ERROR(LEPT_PARSE_ROOT_NOT_SINGULAR, "null x"); -#if 0 /* invalid number */ TEST_ERROR(LEPT_PARSE_ROOT_NOT_SINGULAR, "0123"); /* after zero should be '.' or nothing */ TEST_ERROR(LEPT_PARSE_ROOT_NOT_SINGULAR, "0x0"); TEST_ERROR(LEPT_PARSE_ROOT_NOT_SINGULAR, "0x123"); -#endif } static void test_parse_number_too_big() { -#if 0 TEST_ERROR(LEPT_PARSE_NUMBER_TOO_BIG, "1e309"); TEST_ERROR(LEPT_PARSE_NUMBER_TOO_BIG, "-1e309"); -#endif } static void test_parse() { diff --git a/tutorial03/leptjson.c b/tutorial03/leptjson.c index 07f7e2c7..5572d88c 100644 --- a/tutorial03/leptjson.c +++ b/tutorial03/leptjson.c @@ -99,10 +99,29 @@ static int lept_parse_string(lept_context* c, lept_value* v) { lept_set_string(v, (const char*)lept_context_pop(c, len), len); c->json = p; return LEPT_PARSE_OK; + case '\\': + switch (*p++) { + case '\"': PUTC(c, '\"'); break; + case '\\': PUTC(c, '\\'); break; + case '/': PUTC(c, '/' ); break; + case 'b': PUTC(c, '\b'); break; + case 'f': PUTC(c, '\f'); break; + case 'n': PUTC(c, '\n'); break; + case 'r': PUTC(c, '\r'); break; + case 't': PUTC(c, '\t'); break; + default: + c->top = head; + return LEPT_PARSE_INVALID_STRING_ESCAPE; + } + break; case '\0': c->top = head; return LEPT_PARSE_MISS_QUOTATION_MARK; default: + if ((unsigned char)ch < 0x20) { + c->top = head; + return LEPT_PARSE_INVALID_STRING_CHAR; + } PUTC(c, ch); } } @@ -153,12 +172,13 @@ lept_type lept_get_type(const lept_value* v) { } int lept_get_boolean(const lept_value* v) { - /* \TODO */ - return 0; + assert(v != NULL && (v->type == LEPT_TRUE || v->type == LEPT_FALSE)); + return v->type == LEPT_TRUE; } void lept_set_boolean(lept_value* v, int b) { - /* \TODO */ + lept_free(v); + v->type = b ? LEPT_TRUE : LEPT_FALSE; } double lept_get_number(const lept_value* v) { @@ -167,7 +187,9 @@ double lept_get_number(const lept_value* v) { } void lept_set_number(lept_value* v, double n) { - /* \TODO */ + lept_free(v); + v->u.n = n; + v->type = LEPT_NUMBER; } const char* lept_get_string(const lept_value* v) { diff --git a/tutorial03/test.c b/tutorial03/test.c index ac788aca..a66a3ba4 100644 --- a/tutorial03/test.c +++ b/tutorial03/test.c @@ -107,10 +107,8 @@ static void test_parse_number() { static void test_parse_string() { TEST_STRING("", "\"\""); TEST_STRING("Hello", "\"Hello\""); -#if 0 TEST_STRING("Hello\nWorld", "\"Hello\\nWorld\""); TEST_STRING("\" \\ / \b \f \n \r \t", "\"\\\" \\\\ \\/ \\b \\f \\n \\r \\t\""); -#endif } #define TEST_ERROR(error, json)\ @@ -163,19 +161,15 @@ static void test_parse_missing_quotation_mark() { } static void test_parse_invalid_string_escape() { -#if 0 TEST_ERROR(LEPT_PARSE_INVALID_STRING_ESCAPE, "\"\\v\""); TEST_ERROR(LEPT_PARSE_INVALID_STRING_ESCAPE, "\"\\'\""); TEST_ERROR(LEPT_PARSE_INVALID_STRING_ESCAPE, "\"\\0\""); TEST_ERROR(LEPT_PARSE_INVALID_STRING_ESCAPE, "\"\\x12\""); -#endif } static void test_parse_invalid_string_char() { -#if 0 TEST_ERROR(LEPT_PARSE_INVALID_STRING_CHAR, "\"\x01\""); TEST_ERROR(LEPT_PARSE_INVALID_STRING_CHAR, "\"\x1F\""); -#endif } static void test_access_null() { @@ -188,12 +182,23 @@ static void test_access_null() { } static void test_access_boolean() { - /* \TODO */ - /* Use EXPECT_TRUE() and EXPECT_FALSE() */ + lept_value v; + lept_init(&v); + lept_set_string(&v, "a", 1); + lept_set_boolean(&v, 1); + EXPECT_TRUE(lept_get_boolean(&v)); + lept_set_boolean(&v, 0); + EXPECT_FALSE(lept_get_boolean(&v)); + lept_free(&v); } static void test_access_number() { - /* \TODO */ + lept_value v; + lept_init(&v); + lept_set_string(&v, "a", 1); + lept_set_number(&v, 1234.5); + EXPECT_EQ_DOUBLE(1234.5, lept_get_number(&v)); + lept_free(&v); } static void test_access_string() { @@ -227,6 +232,9 @@ static void test_parse() { } int main() { +#ifdef _WINDOWS + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif test_parse(); printf("%d/%d (%3.2f%%) passed\n", test_pass, test_count, test_pass * 100.0 / test_count); return main_ret; diff --git a/tutorial03_answer/leptjson.c b/tutorial03_answer/leptjson.c index 89117e15..7cc0cdfb 100644 --- a/tutorial03_answer/leptjson.c +++ b/tutorial03_answer/leptjson.c @@ -122,7 +122,7 @@ static int lept_parse_string(lept_context* c, lept_value* v) { c->top = head; return LEPT_PARSE_MISS_QUOTATION_MARK; default: - if ((unsigned char)ch < 0x20) { + if ((unsigned char)ch < 0x20) { c->top = head; return LEPT_PARSE_INVALID_STRING_CHAR; } diff --git a/tutorial04/leptjson.c b/tutorial04/leptjson.c index 0a123bf2..8fdc824c 100644 --- a/tutorial04/leptjson.c +++ b/tutorial04/leptjson.c @@ -91,19 +91,45 @@ static int lept_parse_number(lept_context* c, lept_value* v) { } static const char* lept_parse_hex4(const char* p, unsigned* u) { - /* \TODO */ + int i; + *u = 0; + for (i = 0; i < 4; i++) { + char ch = *p++; + *u <<= 4; + if (ch >= '0' && ch <= '9') *u |= ch - '0'; + else if (ch >= 'A' && ch <= 'F') *u |= ch - ('A' - 10); + else if (ch >= 'a' && ch <= 'f') *u |= ch - ('a' - 10); + else return NULL; + } return p; } static void lept_encode_utf8(lept_context* c, unsigned u) { - /* \TODO */ + if (u <= 0x7F) + PUTC(c, u & 0xFF); + else if (u <= 0x7FF) { + PUTC(c, 0xC0 | ((u >> 6) & 0xFF)); + PUTC(c, 0x80 | ( u & 0x3F)); + } + else if (u <= 0xFFFF) { + PUTC(c, 0xE0 | ((u >> 12) & 0xFF)); + PUTC(c, 0x80 | ((u >> 6) & 0x3F)); + PUTC(c, 0x80 | ( u & 0x3F)); + } + else { + assert(u <= 0x10FFFF); + PUTC(c, 0xF0 | ((u >> 18) & 0xFF)); + PUTC(c, 0x80 | ((u >> 12) & 0x3F)); + PUTC(c, 0x80 | ((u >> 6) & 0x3F)); + PUTC(c, 0x80 | ( u & 0x3F)); + } } #define STRING_ERROR(ret) do { c->top = head; return ret; } while(0) static int lept_parse_string(lept_context* c, lept_value* v) { size_t head = c->top, len; - unsigned u; + unsigned u, u2; const char* p; EXPECT(c, '\"'); p = c->json; @@ -128,7 +154,17 @@ static int lept_parse_string(lept_context* c, lept_value* v) { case 'u': if (!(p = lept_parse_hex4(p, &u))) STRING_ERROR(LEPT_PARSE_INVALID_UNICODE_HEX); - /* \TODO surrogate handling */ + if (u >= 0xD800 && u <= 0xDBFF) { /* surrogate pair */ + if (*p++ != '\\') + STRING_ERROR(LEPT_PARSE_INVALID_UNICODE_SURROGATE); + if (*p++ != 'u') + STRING_ERROR(LEPT_PARSE_INVALID_UNICODE_SURROGATE); + if (!(p = lept_parse_hex4(p, &u2))) + STRING_ERROR(LEPT_PARSE_INVALID_UNICODE_HEX); + if (u2 < 0xDC00 || u2 > 0xDFFF) + STRING_ERROR(LEPT_PARSE_INVALID_UNICODE_SURROGATE); + u = (((u - 0xD800) << 10) | (u2 - 0xDC00)) + 0x10000; + } lept_encode_utf8(c, u); break; default: diff --git a/tutorial04_answer/leptjson.c b/tutorial04_answer/leptjson.c index 590d1220..8fdc824c 100644 --- a/tutorial04_answer/leptjson.c +++ b/tutorial04_answer/leptjson.c @@ -105,7 +105,7 @@ static const char* lept_parse_hex4(const char* p, unsigned* u) { } static void lept_encode_utf8(lept_context* c, unsigned u) { - if (u <= 0x7F) + if (u <= 0x7F) PUTC(c, u & 0xFF); else if (u <= 0x7FF) { PUTC(c, 0xC0 | ((u >> 6) & 0xFF)); diff --git a/tutorial05/leptjson.c b/tutorial05/leptjson.c index d2c83f34..5e963bee 100644 --- a/tutorial05/leptjson.c +++ b/tutorial05/leptjson.c @@ -105,7 +105,7 @@ static const char* lept_parse_hex4(const char* p, unsigned* u) { } static void lept_encode_utf8(lept_context* c, unsigned u) { - if (u <= 0x7F) + if (u <= 0x7F) PUTC(c, u & 0xFF); else if (u <= 0x7FF) { PUTC(c, 0xC0 | ((u >> 6) & 0xFF)); @@ -184,9 +184,10 @@ static int lept_parse_string(lept_context* c, lept_value* v) { static int lept_parse_value(lept_context* c, lept_value* v); static int lept_parse_array(lept_context* c, lept_value* v) { - size_t size = 0; + size_t i, size = 0; int ret; EXPECT(c, '['); + lept_parse_whitespace(c); if (*c->json == ']') { c->json++; v->type = LEPT_ARRAY; @@ -198,11 +199,14 @@ static int lept_parse_array(lept_context* c, lept_value* v) { lept_value e; lept_init(&e); if ((ret = lept_parse_value(c, &e)) != LEPT_PARSE_OK) - return ret; + break; memcpy(lept_context_push(c, sizeof(lept_value)), &e, sizeof(lept_value)); size++; - if (*c->json == ',') + lept_parse_whitespace(c); + if (*c->json == ',') { c->json++; + lept_parse_whitespace(c); + } else if (*c->json == ']') { c->json++; v->type = LEPT_ARRAY; @@ -211,9 +215,15 @@ static int lept_parse_array(lept_context* c, lept_value* v) { memcpy(v->u.a.e = (lept_value*)malloc(size), lept_context_pop(c, size), size); return LEPT_PARSE_OK; } - else - return LEPT_PARSE_MISS_COMMA_OR_SQUARE_BRACKET; + else { + ret = LEPT_PARSE_MISS_COMMA_OR_SQUARE_BRACKET; + break; + } } + /* Pop and free values on the stack */ + for (i = 0; i < size; i++) + lept_free((lept_value*)lept_context_pop(c, sizeof(lept_value))); + return ret; } static int lept_parse_value(lept_context* c, lept_value* v) { @@ -250,9 +260,19 @@ int lept_parse(lept_value* v, const char* json) { } void lept_free(lept_value* v) { + size_t i; assert(v != NULL); - if (v->type == LEPT_STRING) - free(v->u.s.s); + switch (v->type) { + case LEPT_STRING: + free(v->u.s.s); + break; + case LEPT_ARRAY: + for (i = 0; i < v->u.a.size; i++) + lept_free(&v->u.a.e[i]); + free(v->u.a.e); + break; + default: break; + } v->type = LEPT_NULL; } diff --git a/tutorial05/test.c b/tutorial05/test.c index 2d4dd21e..1a17a2a7 100644 --- a/tutorial05/test.c +++ b/tutorial05/test.c @@ -128,6 +128,7 @@ static void test_parse_string() { } static void test_parse_array() { + size_t i, j; lept_value v; lept_init(&v); @@ -135,6 +136,35 @@ static void test_parse_array() { EXPECT_EQ_INT(LEPT_ARRAY, lept_get_type(&v)); EXPECT_EQ_SIZE_T(0, lept_get_array_size(&v)); lept_free(&v); + + lept_init(&v); + EXPECT_EQ_INT(LEPT_PARSE_OK, lept_parse(&v, "[ null , false , true , 123 , \"abc\" ]")); + EXPECT_EQ_INT(LEPT_ARRAY, lept_get_type(&v)); + EXPECT_EQ_SIZE_T(5, lept_get_array_size(&v)); + EXPECT_EQ_INT(LEPT_NULL, lept_get_type(lept_get_array_element(&v, 0))); + EXPECT_EQ_INT(LEPT_FALSE, lept_get_type(lept_get_array_element(&v, 1))); + EXPECT_EQ_INT(LEPT_TRUE, lept_get_type(lept_get_array_element(&v, 2))); + EXPECT_EQ_INT(LEPT_NUMBER, lept_get_type(lept_get_array_element(&v, 3))); + EXPECT_EQ_INT(LEPT_STRING, lept_get_type(lept_get_array_element(&v, 4))); + EXPECT_EQ_DOUBLE(123.0, lept_get_number(lept_get_array_element(&v, 3))); + EXPECT_EQ_STRING("abc", lept_get_string(lept_get_array_element(&v, 4)), lept_get_string_length(lept_get_array_element(&v, 4))); + lept_free(&v); + + lept_init(&v); + EXPECT_EQ_INT(LEPT_PARSE_OK, lept_parse(&v, "[ [ ] , [ 0 ] , [ 0 , 1 ] , [ 0 , 1 , 2 ] ]")); + EXPECT_EQ_INT(LEPT_ARRAY, lept_get_type(&v)); + EXPECT_EQ_SIZE_T(4, lept_get_array_size(&v)); + for (i = 0; i < 4; i++) { + lept_value* a = lept_get_array_element(&v, i); + EXPECT_EQ_INT(LEPT_ARRAY, lept_get_type(a)); + EXPECT_EQ_SIZE_T(i, lept_get_array_size(a)); + for (j = 0; j < i; j++) { + lept_value* e = lept_get_array_element(a, j); + EXPECT_EQ_INT(LEPT_NUMBER, lept_get_type(e)); + EXPECT_EQ_DOUBLE((double)j, lept_get_number(e)); + } + } + lept_free(&v); } #define TEST_ERROR(error, json)\ @@ -167,10 +197,8 @@ static void test_parse_invalid_value() { TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "nan"); /* invalid value in array */ -#if 0 TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "[1,]"); TEST_ERROR(LEPT_PARSE_INVALID_VALUE, "[\"a\", nul]"); -#endif } static void test_parse_root_not_singular() { @@ -229,12 +257,10 @@ static void test_parse_invalid_unicode_surrogate() { } static void test_parse_miss_comma_or_square_bracket() { -#if 0 TEST_ERROR(LEPT_PARSE_MISS_COMMA_OR_SQUARE_BRACKET, "[1"); TEST_ERROR(LEPT_PARSE_MISS_COMMA_OR_SQUARE_BRACKET, "[1}"); TEST_ERROR(LEPT_PARSE_MISS_COMMA_OR_SQUARE_BRACKET, "[1 2"); TEST_ERROR(LEPT_PARSE_MISS_COMMA_OR_SQUARE_BRACKET, "[[]"); -#endif } static void test_parse() { diff --git a/tutorial05_answer/leptjson.c b/tutorial05_answer/leptjson.c index f05f6ff0..5e963bee 100644 --- a/tutorial05_answer/leptjson.c +++ b/tutorial05_answer/leptjson.c @@ -105,7 +105,7 @@ static const char* lept_parse_hex4(const char* p, unsigned* u) { } static void lept_encode_utf8(lept_context* c, unsigned u) { - if (u <= 0x7F) + if (u <= 0x7F) PUTC(c, u & 0xFF); else if (u <= 0x7FF) { PUTC(c, 0xC0 | ((u >> 6) & 0xFF)); diff --git a/tutorial06/leptjson.c b/tutorial06/leptjson.c index 64e3bd0d..13edf96e 100644 --- a/tutorial06/leptjson.c +++ b/tutorial06/leptjson.c @@ -105,7 +105,7 @@ static const char* lept_parse_hex4(const char* p, unsigned* u) { } static void lept_encode_utf8(lept_context* c, unsigned u) { - if (u <= 0x7F) + if (u <= 0x7F) PUTC(c, u & 0xFF); else if (u <= 0x7FF) { PUTC(c, 0xC0 | ((u >> 6) & 0xFF)); @@ -127,8 +127,8 @@ static void lept_encode_utf8(lept_context* c, unsigned u) { #define STRING_ERROR(ret) do { c->top = head; return ret; } while(0) -static int lept_parse_string(lept_context* c, lept_value* v) { - size_t head = c->top, len; +static int lept_parse_string_raw(lept_context* c, char** str, size_t* len) { + size_t head = c->top; unsigned u, u2; const char* p; EXPECT(c, '\"'); @@ -137,8 +137,8 @@ static int lept_parse_string(lept_context* c, lept_value* v) { char ch = *p++; switch (ch) { case '\"': - len = c->top - head; - lept_set_string(v, (const char*)lept_context_pop(c, len), len); + *len = c->top - head; + *str = (char*)lept_context_pop(c, *len); c->json = p; return LEPT_PARSE_OK; case '\\': @@ -181,6 +181,15 @@ static int lept_parse_string(lept_context* c, lept_value* v) { } } +static int lept_parse_string(lept_context* c, lept_value* v) { + int ret; + char* s; + size_t len; + if ((ret = lept_parse_string_raw(c, &s, &len)) == LEPT_PARSE_OK) + lept_set_string(v, s, len); + return ret; +} + static int lept_parse_value(lept_context* c, lept_value* v); static int lept_parse_array(lept_context* c, lept_value* v) { @@ -227,7 +236,7 @@ static int lept_parse_array(lept_context* c, lept_value* v) { } static int lept_parse_object(lept_context* c, lept_value* v) { - size_t size; + size_t i, size; lept_member m; int ret; EXPECT(c, '{'); @@ -242,18 +251,58 @@ static int lept_parse_object(lept_context* c, lept_value* v) { m.k = NULL; size = 0; for (;;) { + char* str; lept_init(&m.v); - /* \todo parse key to m.k, m.klen */ - /* \todo parse ws colon ws */ + /* parse key */ + if (*c->json != '"') { + ret = LEPT_PARSE_MISS_KEY; + break; + } + if ((ret = lept_parse_string_raw(c, &str, &m.klen)) != LEPT_PARSE_OK) + break; + memcpy(m.k = (char*)malloc(m.klen + 1), str, m.klen); + m.k[m.klen] = '\0'; + /* parse ws colon ws */ + lept_parse_whitespace(c); + if (*c->json != ':') { + ret = LEPT_PARSE_MISS_COLON; + break; + } + c->json++; + lept_parse_whitespace(c); /* parse value */ if ((ret = lept_parse_value(c, &m.v)) != LEPT_PARSE_OK) break; memcpy(lept_context_push(c, sizeof(lept_member)), &m, sizeof(lept_member)); size++; m.k = NULL; /* ownership is transferred to member on stack */ - /* \todo parse ws [comma | right-curly-brace] ws */ + /* parse ws [comma | right-curly-brace] ws */ + lept_parse_whitespace(c); + if (*c->json == ',') { + c->json++; + lept_parse_whitespace(c); + } + else if (*c->json == '}') { + size_t s = sizeof(lept_member) * size; + c->json++; + v->type = LEPT_OBJECT; + v->u.o.size = size; + memcpy(v->u.o.m = (lept_member*)malloc(s), lept_context_pop(c, s), s); + return LEPT_PARSE_OK; + } + else { + ret = LEPT_PARSE_MISS_COMMA_OR_CURLY_BRACKET; + break; + } } - /* \todo Pop and free members on the stack */ + /* Pop and free members on the stack */ + free(m.k); + for (i = 0; i < size; i++) { + lept_member* m = (lept_member*)lept_context_pop(c, sizeof(lept_member)); + free(m->k); + lept_free(&m->v); + } + v->type = LEPT_NULL; return ret; } @@ -303,6 +352,13 @@ void lept_free(lept_value* v) { lept_free(&v->u.a.e[i]); free(v->u.a.e); break; + case LEPT_OBJECT: + for (i = 0; i < v->u.o.size; i++) { + free(v->u.o.m[i].k); + lept_free(&v->u.o.m[i].v); + } + free(v->u.o.m); + break; default: break; } v->type = LEPT_NULL; diff --git a/tutorial06/test.c b/tutorial06/test.c index 8d332e45..74e5f763 100644 --- a/tutorial06/test.c +++ b/tutorial06/test.c @@ -352,9 +352,7 @@ static void test_parse() { test_parse_number(); test_parse_string(); test_parse_array(); -#if 0 test_parse_object(); -#endif test_parse_expect_value(); test_parse_invalid_value(); @@ -366,11 +364,9 @@ static void test_parse() { test_parse_invalid_unicode_hex(); test_parse_invalid_unicode_surrogate(); test_parse_miss_comma_or_square_bracket(); -#if 0 test_parse_miss_key(); test_parse_miss_colon(); test_parse_miss_comma_or_curly_bracket(); -#endif } static void test_access_null() { diff --git a/tutorial06_answer/leptjson.c b/tutorial06_answer/leptjson.c index 307917c2..4848a656 100644 --- a/tutorial06_answer/leptjson.c +++ b/tutorial06_answer/leptjson.c @@ -105,7 +105,7 @@ static const char* lept_parse_hex4(const char* p, unsigned* u) { } static void lept_encode_utf8(lept_context* c, unsigned u) { - if (u <= 0x7F) + if (u <= 0x7F) PUTC(c, u & 0xFF); else if (u <= 0x7FF) { PUTC(c, 0xC0 | ((u >> 6) & 0xFF)); diff --git a/tutorial07/leptjson.c b/tutorial07/leptjson.c index 5307b892..d719a57e 100644 --- a/tutorial07/leptjson.c +++ b/tutorial07/leptjson.c @@ -111,7 +111,7 @@ static const char* lept_parse_hex4(const char* p, unsigned* u) { } static void lept_encode_utf8(lept_context* c, unsigned u) { - if (u <= 0x7F) + if (u <= 0x7F) PUTC(c, u & 0xFF); else if (u <= 0x7FF) { PUTC(c, 0xC0 | ((u >> 6) & 0xFF)); @@ -346,11 +346,105 @@ int lept_parse(lept_value* v, const char* json) { return ret; } +#if 0 +// Unoptimized static void lept_stringify_string(lept_context* c, const char* s, size_t len) { - /* ... */ + size_t i; + assert(s != NULL); + PUTC(c, '"'); + for (i = 0; i < len; i++) { + unsigned char ch = (unsigned char)s[i]; + switch (ch) { + case '\"': PUTS(c, "\\\"", 2); break; + case '\\': PUTS(c, "\\\\", 2); break; + case '\b': PUTS(c, "\\b", 2); break; + case '\f': PUTS(c, "\\f", 2); break; + case '\n': PUTS(c, "\\n", 2); break; + case '\r': PUTS(c, "\\r", 2); break; + case '\t': PUTS(c, "\\t", 2); break; + default: + if (ch < 0x20) { + char buffer[7]; + sprintf(buffer, "\\u%04X", ch); + PUTS(c, buffer, 6); + } + else + PUTC(c, s[i]); + } + } + PUTC(c, '"'); } +#else +static void lept_stringify_string(lept_context* c, const char* s, size_t len) { + static const char hex_digits[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + size_t i, size; + char* head, *p; + assert(s != NULL); + p = head = lept_context_push(c, size = len * 6 + 2); /* "\u00xx..." */ + *p++ = '"'; + for (i = 0; i < len; i++) { + unsigned char ch = (unsigned char)s[i]; + switch (ch) { + case '\"': *p++ = '\\'; *p++ = '\"'; break; + case '\\': *p++ = '\\'; *p++ = '\\'; break; + case '\b': *p++ = '\\'; *p++ = 'b'; break; + case '\f': *p++ = '\\'; *p++ = 'f'; break; + case '\n': *p++ = '\\'; *p++ = 'n'; break; + case '\r': *p++ = '\\'; *p++ = 'r'; break; + case '\t': *p++ = '\\'; *p++ = 't'; break; + default: + if (ch >= 0x20 && ch <= 0x7F) + *p++ = ch; + else { + unsigned u, u2 = 0; + if (ch < 0x20) + u = (unsigned) ch; + else if (ch <= 0xDF) { + u = (unsigned)(ch & 0x1F) << 6; + ch = s[++i]; + u = u | ((unsigned)(ch & 0x3F)); + } + else if (ch <= 0xEF) { + u = (unsigned)(ch & 0x0F) << 12; + ch = s[++i]; + u = u | ((unsigned)(ch & 0x3F) << 6); + ch = s[++i]; + u = u | ((unsigned)(ch & 0x3F)); + } + else { + u = (unsigned)(ch & 0x07) << 18; + ch = s[++i]; + u = u | ((unsigned)(ch & 0x3F) << 12); + ch = s[++i]; + u = u | ((unsigned)(ch & 0x3F) << 6); + ch = s[++i]; + u = u | ((unsigned)(ch & 0x3F)); + u -= 0x10000; + u2 = (u & 0x3FF) + 0xDC00; + u = (u >> 10) + 0xD800; + } + *p++ = '\\'; *p++ = 'u'; + *p++ = hex_digits[u >> 12]; + *p++ = hex_digits[(u >> 8) & 0xF]; + *p++ = hex_digits[(u >> 4) & 0xF]; + *p++ = hex_digits[u & 0xF]; + if (u2) { + *p++ = '\\'; *p++ = 'u'; + *p++ = hex_digits[u2 >> 12]; + *p++ = hex_digits[(u2 >> 8) & 0xF]; + *p++ = hex_digits[(u2 >> 4) & 0xF]; + *p++ = hex_digits[u2 & 0xF]; + } + } + } + } + *p++ = '"'; + c->top -= size - (p - head); +} +#endif static void lept_stringify_value(lept_context* c, const lept_value* v) { + size_t i; switch (v->type) { case LEPT_NULL: PUTS(c, "null", 4); break; case LEPT_FALSE: PUTS(c, "false", 5); break; @@ -358,10 +452,24 @@ static void lept_stringify_value(lept_context* c, const lept_value* v) { case LEPT_NUMBER: c->top -= 32 - sprintf(lept_context_push(c, 32), "%.17g", v->u.n); break; case LEPT_STRING: lept_stringify_string(c, v->u.s.s, v->u.s.len); break; case LEPT_ARRAY: - /* ... */ + PUTC(c, '['); + for (i = 0; i < v->u.a.size; i++) { + if (i > 0) + PUTC(c, ','); + lept_stringify_value(c, &v->u.a.e[i]); + } + PUTC(c, ']'); break; case LEPT_OBJECT: - /* ... */ + PUTC(c, '{'); + for (i = 0; i < v->u.o.size; i++) { + if (i > 0) + PUTC(c, ','); + lept_stringify_string(c, v->u.o.m[i].k, v->u.o.m[i].klen); + PUTC(c, ':'); + lept_stringify_value(c, &v->u.o.m[i].v); + } + PUTC(c, '}'); break; default: assert(0 && "invalid type"); } diff --git a/tutorial07/test.c b/tutorial07/test.c index 7e34cbb7..fe613e3b 100644 --- a/tutorial07/test.c +++ b/tutorial07/test.c @@ -411,6 +411,8 @@ static void test_stringify_string() { TEST_ROUNDTRIP("\"Hello\\nWorld\""); TEST_ROUNDTRIP("\"\\\" \\\\ / \\b \\f \\n \\r \\t\""); TEST_ROUNDTRIP("\"Hello\\u0000World\""); + TEST_ROUNDTRIP("\"\\uD834\\uDD1E\""); + TEST_ROUNDTRIP("\"\\u1234\""); } static void test_stringify_array() { diff --git a/tutorial07_answer/leptjson.c b/tutorial07_answer/leptjson.c index b6f64834..9607a372 100644 --- a/tutorial07_answer/leptjson.c +++ b/tutorial07_answer/leptjson.c @@ -111,7 +111,7 @@ static const char* lept_parse_hex4(const char* p, unsigned* u) { } static void lept_encode_utf8(lept_context* c, unsigned u) { - if (u <= 0x7F) + if (u <= 0x7F) PUTC(c, u & 0xFF); else if (u <= 0x7FF) { PUTC(c, 0xC0 | ((u >> 6) & 0xFF));