2022-06-25 11:52:00 +08:00

1457 lines
41 KiB
C

/* Lua CJSON - JSON support for Lua
*
* Copyright (c) 2010-2012 Mark Pulford <mark@kyne.com.au>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/* Caveats:
* - JSON "null" values are represented as lightuserdata since Lua
* tables cannot contain "nil". Compare with cjson.null.
* - Invalid UTF-8 characters are not detected and will be passed
* untouched. If required, UTF-8 error checking should be done
* outside this library.
* - Javascript comments are not part of the JSON spec, and are not
* currently supported.
*
* Note: Decoding is slower than encoding. Lua spends significant
* time (30%) managing tables when parsing JSON since it is
* difficult to know object/array sizes ahead of time.
*/
#include <assert.h>
#include <string.h>
#include <math.h>
#include <limits.h>
#include "lua.h"
#include "lauxlib.h"
#include "strbuf.h"
#include "fpconv.h"
#ifndef CJSON_MODNAME
#define CJSON_MODNAME "cjson"
#endif
#ifndef CJSON_VERSION
#define CJSON_VERSION "2.1devel"
#endif
#ifdef _WIN32
#define __strncasecmp strnicmp
#define isnan(x) ((x) != (x))
#define isinf(x) (!isnan(x) && isnan(x - x))
#else
#define __strncasecmp strncasecmp
#endif
#define DEFAULT_SPARSE_CONVERT 0
#define DEFAULT_SPARSE_RATIO 2
#define DEFAULT_SPARSE_SAFE 10
#define DEFAULT_ENCODE_MAX_DEPTH 1000
#define DEFAULT_DECODE_MAX_DEPTH 1000
#define DEFAULT_ENCODE_INVALID_NUMBERS 0
#define DEFAULT_DECODE_INVALID_NUMBERS 1
#define DEFAULT_ENCODE_KEEP_BUFFER 1
#define DEFAULT_ENCODE_NUMBER_PRECISION 14
#define DEFAULT_DECODE_LUA_NIL 1
#ifdef DISABLE_INVALID_NUMBERS
#undef DEFAULT_DECODE_INVALID_NUMBERS
#define DEFAULT_DECODE_INVALID_NUMBERS 0
#endif
typedef enum {
T_OBJ_BEGIN,
T_OBJ_END,
T_ARR_BEGIN,
T_ARR_END,
T_STRING,
T_NUMBER,
T_BOOLEAN,
T_NULL,
T_COLON,
T_COMMA,
T_END,
T_WHITESPACE,
T_ERROR,
T_UNKNOWN
} json_token_type_t;
static const char *json_token_type_name[] = {
"T_OBJ_BEGIN",
"T_OBJ_END",
"T_ARR_BEGIN",
"T_ARR_END",
"T_STRING",
"T_NUMBER",
"T_BOOLEAN",
"T_NULL",
"T_COLON",
"T_COMMA",
"T_END",
"T_WHITESPACE",
"T_ERROR",
"T_UNKNOWN",
NULL
};
typedef struct {
json_token_type_t ch2token[256];
char escape2char[256]; /* Decoding */
/* encode_buf is only allocated and used when
* encode_keep_buffer is set */
strbuf_t encode_buf;
int encode_sparse_convert;
int encode_sparse_ratio;
int encode_sparse_safe;
int encode_max_depth;
int encode_invalid_numbers; /* 2 => Encode as "null" */
int encode_number_precision;
int encode_keep_buffer;
int decode_invalid_numbers;
int decode_max_depth;
int decode_lua_nil; /* 1 => use Lua nil for NULL */
} json_config_t;
typedef struct {
const char *data;
const char *ptr;
strbuf_t *tmp; /* Temporary storage for strings */
json_config_t *cfg;
int current_depth;
} json_parse_t;
typedef struct {
json_token_type_t type;
int index;
union {
const char *string;
double number;
int boolean;
} value;
int string_len;
} json_token_t;
static const char *char2escape[256] = {
"\\u0000", "\\u0001", "\\u0002", "\\u0003",
"\\u0004", "\\u0005", "\\u0006", "\\u0007",
"\\b", "\\t", "\\n", "\\u000b",
"\\f", "\\r", "\\u000e", "\\u000f",
"\\u0010", "\\u0011", "\\u0012", "\\u0013",
"\\u0014", "\\u0015", "\\u0016", "\\u0017",
"\\u0018", "\\u0019", "\\u001a", "\\u001b",
"\\u001c", "\\u001d", "\\u001e", "\\u001f",
NULL, NULL, "\\\"", NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\/",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, "\\\\", NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
/* ===== CONFIGURATION ===== */
static json_config_t *json_fetch_config(lua_State *l)
{
json_config_t *cfg;
cfg = lua_touserdata(l, lua_upvalueindex(1));
if (!cfg)
luaL_error(l, "BUG: Unable to fetch CJSON configuration");
return cfg;
}
/* Ensure the correct number of arguments have been provided.
* Pad with nil to allow other functions to simply check arg[i]
* to find whether an argument was provided */
static json_config_t *json_arg_init(lua_State *l, int args)
{
luaL_argcheck(l, lua_gettop(l) <= args, args + 1,
"found too many arguments");
while (lua_gettop(l) < args)
lua_pushnil(l);
return json_fetch_config(l);
}
/* Process integer options for configuration functions */
static int json_integer_option(lua_State *l, int optindex, int *setting,
int min, int max)
{
char errmsg[64];
int value;
if (!lua_isnil(l, optindex)) {
value = (int)luaL_checkinteger(l, optindex);
snprintf(errmsg, sizeof(errmsg), "expected integer between %d and %d", min, max);
luaL_argcheck(l, min <= value && value <= max, 1, errmsg);
*setting = value;
}
lua_pushinteger(l, *setting);
return 1;
}
/* Process enumerated arguments for a configuration function */
static int json_enum_option(lua_State *l, int optindex, int *setting,
const char **options, int bool_true)
{
static const char *bool_options[] = { "off", "on", NULL };
if (!options) {
options = bool_options;
bool_true = 1;
}
if (!lua_isnil(l, optindex)) {
if (bool_true && lua_isboolean(l, optindex))
*setting = lua_toboolean(l, optindex) * bool_true;
else
*setting = luaL_checkoption(l, optindex, NULL, options);
}
if (bool_true && (*setting == 0 || *setting == bool_true))
lua_pushboolean(l, *setting);
else
lua_pushstring(l, options[*setting]);
return 1;
}
/* Configures handling of extremely sparse arrays:
* convert: Convert extremely sparse arrays into objects? Otherwise error.
* ratio: 0: always allow sparse; 1: never allow sparse; >1: use ratio
* safe: Always use an array when the max index <= safe */
static int json_cfg_encode_sparse_array(lua_State *l)
{
json_config_t *cfg = json_arg_init(l, 3);
json_enum_option(l, 1, &cfg->encode_sparse_convert, NULL, 1);
json_integer_option(l, 2, &cfg->encode_sparse_ratio, 0, INT_MAX);
json_integer_option(l, 3, &cfg->encode_sparse_safe, 0, INT_MAX);
return 3;
}
/* Configures the maximum number of nested arrays/objects allowed when
* encoding */
static int json_cfg_encode_max_depth(lua_State *l)
{
json_config_t *cfg = json_arg_init(l, 1);
return json_integer_option(l, 1, &cfg->encode_max_depth, 1, INT_MAX);
}
/* Configures the maximum number of nested arrays/objects allowed when
* encoding */
static int json_cfg_decode_max_depth(lua_State *l)
{
json_config_t *cfg = json_arg_init(l, 1);
return json_integer_option(l, 1, &cfg->decode_max_depth, 1, INT_MAX);
}
/* Configures number precision when converting doubles to text */
static int json_cfg_encode_number_precision(lua_State *l)
{
json_config_t *cfg = json_arg_init(l, 1);
return json_integer_option(l, 1, &cfg->encode_number_precision, 1, 14);
}
/* Configures JSON encoding buffer persistence */
static int json_cfg_encode_keep_buffer(lua_State *l)
{
json_config_t *cfg = json_arg_init(l, 1);
int old_value;
old_value = cfg->encode_keep_buffer;
json_enum_option(l, 1, &cfg->encode_keep_buffer, NULL, 1);
/* Init / free the buffer if the setting has changed */
if (old_value ^ cfg->encode_keep_buffer) {
if (cfg->encode_keep_buffer)
strbuf_init(&cfg->encode_buf, 0);
else
strbuf_free(&cfg->encode_buf);
}
return 1;
}
#if defined(DISABLE_INVALID_NUMBERS) && !defined(USE_INTERNAL_FPCONV)
void json_verify_invalid_number_setting(lua_State *l, int *setting)
{
if (*setting == 1) {
*setting = 0;
luaL_error(l, "Infinity, NaN, and/or hexadecimal numbers are not supported.");
}
}
#else
#define json_verify_invalid_number_setting(l, s) do { } while(0)
#endif
static int json_cfg_encode_invalid_numbers(lua_State *l)
{
static const char *options[] = { "off", "on", "null", NULL };
json_config_t *cfg = json_arg_init(l, 1);
json_enum_option(l, 1, &cfg->encode_invalid_numbers, options, 1);
json_verify_invalid_number_setting(l, &cfg->encode_invalid_numbers);
return 1;
}
static int json_cfg_decode_invalid_numbers(lua_State *l)
{
json_config_t *cfg = json_arg_init(l, 1);
json_enum_option(l, 1, &cfg->decode_invalid_numbers, NULL, 1);
json_verify_invalid_number_setting(l, &cfg->encode_invalid_numbers);
return 1;
}
static int json_cfg_decode_lua_nil(lua_State *l)
{
json_config_t *cfg = json_arg_init(l, 1);
json_enum_option(l, 1, &cfg->decode_lua_nil, NULL, 1);
return 1;
}
static int json_destroy_config(lua_State *l)
{
json_config_t *cfg;
cfg = lua_touserdata(l, 1);
if (cfg)
strbuf_free(&cfg->encode_buf);
cfg = NULL;
return 0;
}
static void json_create_config(lua_State *l)
{
json_config_t *cfg;
int i;
cfg = lua_newuserdata(l, sizeof(*cfg));
/* Create GC method to clean up strbuf */
lua_newtable(l);
lua_pushcfunction(l, json_destroy_config);
lua_setfield(l, -2, "__gc");
lua_setmetatable(l, -2);
cfg->encode_sparse_convert = DEFAULT_SPARSE_CONVERT;
cfg->encode_sparse_ratio = DEFAULT_SPARSE_RATIO;
cfg->encode_sparse_safe = DEFAULT_SPARSE_SAFE;
cfg->encode_max_depth = DEFAULT_ENCODE_MAX_DEPTH;
cfg->decode_max_depth = DEFAULT_DECODE_MAX_DEPTH;
cfg->encode_invalid_numbers = DEFAULT_ENCODE_INVALID_NUMBERS;
cfg->decode_invalid_numbers = DEFAULT_DECODE_INVALID_NUMBERS;
cfg->encode_keep_buffer = DEFAULT_ENCODE_KEEP_BUFFER;
cfg->encode_number_precision = DEFAULT_ENCODE_NUMBER_PRECISION;
cfg->decode_lua_nil = DEFAULT_DECODE_LUA_NIL;
#if DEFAULT_ENCODE_KEEP_BUFFER > 0
strbuf_init(&cfg->encode_buf, 0);
#endif
/* Decoding init */
/* Tag all characters as an error */
for (i = 0; i < 256; i++)
cfg->ch2token[i] = T_ERROR;
/* Set tokens that require no further processing */
cfg->ch2token['{'] = T_OBJ_BEGIN;
cfg->ch2token['}'] = T_OBJ_END;
cfg->ch2token['['] = T_ARR_BEGIN;
cfg->ch2token[']'] = T_ARR_END;
cfg->ch2token[','] = T_COMMA;
cfg->ch2token[':'] = T_COLON;
cfg->ch2token['\0'] = T_END;
cfg->ch2token[' '] = T_WHITESPACE;
cfg->ch2token['\t'] = T_WHITESPACE;
cfg->ch2token['\n'] = T_WHITESPACE;
cfg->ch2token['\r'] = T_WHITESPACE;
/* Update characters that require further processing */
cfg->ch2token['f'] = T_UNKNOWN; /* false? */
cfg->ch2token['i'] = T_UNKNOWN; /* inf, ininity? */
cfg->ch2token['I'] = T_UNKNOWN;
cfg->ch2token['n'] = T_UNKNOWN; /* null, nan? */
cfg->ch2token['N'] = T_UNKNOWN;
cfg->ch2token['t'] = T_UNKNOWN; /* true? */
cfg->ch2token['"'] = T_UNKNOWN; /* string? */
cfg->ch2token['+'] = T_UNKNOWN; /* number? */
cfg->ch2token['-'] = T_UNKNOWN;
for (i = 0; i < 10; i++)
cfg->ch2token['0' + i] = T_UNKNOWN;
/* Lookup table for parsing escape characters */
for (i = 0; i < 256; i++)
cfg->escape2char[i] = 0; /* String error */
cfg->escape2char['"'] = '"';
cfg->escape2char['\\'] = '\\';
cfg->escape2char['/'] = '/';
cfg->escape2char['b'] = '\b';
cfg->escape2char['t'] = '\t';
cfg->escape2char['n'] = '\n';
cfg->escape2char['f'] = '\f';
cfg->escape2char['r'] = '\r';
cfg->escape2char['u'] = 'u'; /* Unicode parsing required */
}
/* ===== ENCODING ===== */
static void json_encode_exception(lua_State *l, json_config_t *cfg, strbuf_t *json, int lindex,
const char *reason)
{
if (!cfg->encode_keep_buffer)
strbuf_free(json);
luaL_error(l, "Cannot serialise %s: %s",
lua_typename(l, lua_type(l, lindex)), reason);
}
/* json_append_string args:
* - lua_State
* - JSON strbuf
* - String (Lua stack index)
*
* Returns nothing. Doesn't remove string from Lua stack */
static void json_append_string(lua_State *l, strbuf_t *json, int lindex)
{
const char *escstr;
int i;
const char *str;
size_t len;
str = lua_tolstring(l, lindex, &len);
/* Worst case is len * 6 (all unicode escapes).
* This buffer is reused constantly for small strings
* If there are any excess pages, they won't be hit anyway.
* This gains ~5% speedup. */
strbuf_ensure_empty_length(json, (int)len * 6 + 2);
strbuf_append_char_unsafe(json, '\"');
for (i = 0; i < len; i++) {
escstr = char2escape[(unsigned char)str[i]];
if (escstr)
strbuf_append_string(json, escstr);
else
strbuf_append_char_unsafe(json, str[i]);
}
strbuf_append_char_unsafe(json, '\"');
}
/* Find the size of the array on the top of the Lua stack
* -1 object (not a pure array)
* >=0 elements in array
*/
static int lua_array_length(lua_State *l, json_config_t *cfg, strbuf_t *json)
{
double k;
int max;
int items;
max = 0;
items = 0;
lua_pushnil(l);
/* table, startkey */
while (lua_next(l, -2) != 0) {
/* table, key, value */
if (lua_type(l, -2) == LUA_TNUMBER &&
(k = lua_tonumber(l, -2))) {
/* Integer >= 1 ? */
if (floor(k) == k && k >= 1) {
if (k > max)
max = k;
items++;
lua_pop(l, 1);
continue;
}
}
/* Must not be an array (non integer key) */
lua_pop(l, 2);
return -1;
}
/* Encode excessively sparse arrays as objects (if enabled) */
if (cfg->encode_sparse_ratio > 0 &&
max > items * cfg->encode_sparse_ratio &&
max > cfg->encode_sparse_safe) {
if (!cfg->encode_sparse_convert)
json_encode_exception(l, cfg, json, -1, "excessively sparse array");
return -1;
}
return max;
}
static void json_check_encode_depth(lua_State *l, json_config_t *cfg,
int current_depth, strbuf_t *json)
{
/* Ensure there are enough slots free to traverse a table (key,
* value) and push a string for a potential error message.
*
* Unlike "decode", the key and value are still on the stack when
* lua_checkstack() is called. Hence an extra slot for luaL_error()
* below is required just in case the next check to lua_checkstack()
* fails.
*
* While this won't cause a crash due to the EXTRA_STACK reserve
* slots, it would still be an improper use of the API. */
if (current_depth <= cfg->encode_max_depth && lua_checkstack(l, 3))
return;
if (!cfg->encode_keep_buffer)
strbuf_free(json);
luaL_error(l, "Cannot serialise, excessive nesting (%d)",
current_depth);
}
static void json_append_data(lua_State *l, json_config_t *cfg,
int current_depth, strbuf_t *json);
/* json_append_array args:
* - lua_State
* - JSON strbuf
* - Size of passwd Lua array (top of stack) */
static void json_append_array(lua_State *l, json_config_t *cfg, int current_depth,
strbuf_t *json, int array_length)
{
int comma, i;
strbuf_append_char(json, '[');
comma = 0;
for (i = 1; i <= array_length; i++) {
if (comma)
strbuf_append_char(json, ',');
else
comma = 1;
lua_rawgeti(l, -1, i);
json_append_data(l, cfg, current_depth, json);
lua_pop(l, 1);
}
strbuf_append_char(json, ']');
}
static void json_append_number(lua_State *l, json_config_t *cfg,
strbuf_t *json, int lindex)
{
double num = lua_tonumber(l, lindex);
int len;
if (cfg->encode_invalid_numbers == 0) {
/* Prevent encoding invalid numbers */
if (isinf(num) || isnan(num))
json_encode_exception(l, cfg, json, lindex,
"must not be NaN or Infinity");
} else if (cfg->encode_invalid_numbers == 1) {
/* Encode NaN/Infinity separately to ensure Javascript compatible
* values are used. */
if (isnan(num)) {
strbuf_append_mem(json, "NaN", 3);
return;
}
if (isinf(num)) {
if (num < 0)
strbuf_append_mem(json, "-Infinity", 9);
else
strbuf_append_mem(json, "Infinity", 8);
return;
}
} else {
/* Encode invalid numbers as "null" */
if (isinf(num) || isnan(num)) {
strbuf_append_mem(json, "null", 4);
return;
}
}
strbuf_ensure_empty_length(json, FPCONV_G_FMT_BUFSIZE);
len = fpconv_g_fmt(strbuf_empty_ptr(json), num, cfg->encode_number_precision);
strbuf_extend_length(json, len);
}
static void json_append_object(lua_State *l, json_config_t *cfg,
int current_depth, strbuf_t *json)
{
int comma, keytype;
/* Object */
strbuf_append_char(json, '{');
lua_pushnil(l);
/* table, startkey */
comma = 0;
while (lua_next(l, -2) != 0) {
if (comma)
strbuf_append_char(json, ',');
else
comma = 1;
/* table, key, value */
keytype = lua_type(l, -2);
if (keytype == LUA_TNUMBER) {
strbuf_append_char(json, '"');
json_append_number(l, cfg, json, -2);
strbuf_append_mem(json, "\":", 2);
} else if (keytype == LUA_TSTRING) {
json_append_string(l, json, -2);
strbuf_append_char(json, ':');
} else {
json_encode_exception(l, cfg, json, -2,
"table key must be a number or string");
/* never returns */
}
/* table, key, value */
json_append_data(l, cfg, current_depth, json);
lua_pop(l, 1);
/* table, key */
}
strbuf_append_char(json, '}');
}
/* Serialise Lua data into JSON string. */
static void json_append_data(lua_State *l, json_config_t *cfg,
int current_depth, strbuf_t *json)
{
int len;
switch (lua_type(l, -1)) {
case LUA_TSTRING:
json_append_string(l, json, -1);
break;
case LUA_TNUMBER:
json_append_number(l, cfg, json, -1);
break;
case LUA_TBOOLEAN:
if (lua_toboolean(l, -1))
strbuf_append_mem(json, "true", 4);
else
strbuf_append_mem(json, "false", 5);
break;
case LUA_TTABLE:
current_depth++;
json_check_encode_depth(l, cfg, current_depth, json);
len = lua_array_length(l, cfg, json);
if (len > 0)
json_append_array(l, cfg, current_depth, json, len);
else
json_append_object(l, cfg, current_depth, json);
break;
case LUA_TNIL:
strbuf_append_mem(json, "null", 4);
break;
case LUA_TLIGHTUSERDATA:
if (lua_touserdata(l, -1) == NULL) {
strbuf_append_mem(json, "null", 4);
break;
}
default:
/* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD,
* and LUA_TLIGHTUSERDATA) cannot be serialised */
json_encode_exception(l, cfg, json, -1, "type not supported");
/* never returns */
}
}
static int json_encode(lua_State *l)
{
json_config_t *cfg = json_fetch_config(l);
strbuf_t local_encode_buf;
strbuf_t *encode_buf;
char *json;
int len;
luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
if (!cfg->encode_keep_buffer) {
/* Use private buffer */
encode_buf = &local_encode_buf;
strbuf_init(encode_buf, 0);
} else {
/* Reuse existing buffer */
encode_buf = &cfg->encode_buf;
strbuf_reset(encode_buf);
}
json_append_data(l, cfg, 0, encode_buf);
json = strbuf_string(encode_buf, &len);
lua_pushlstring(l, json, len);
if (!cfg->encode_keep_buffer)
strbuf_free(encode_buf);
return 1;
}
/* ===== DECODING ===== */
static void json_process_value(lua_State *l, json_parse_t *json,
json_token_t *token);
static int hexdigit2int(char hex)
{
if ('0' <= hex && hex <= '9')
return hex - '0';
/* Force lowercase */
hex |= 0x20;
if ('a' <= hex && hex <= 'f')
return 10 + hex - 'a';
return -1;
}
static int decode_hex4(const char *hex)
{
int digit[4];
int i;
/* Convert ASCII hex digit to numeric digit
* Note: this returns an error for invalid hex digits, including
* NULL */
for (i = 0; i < 4; i++) {
digit[i] = hexdigit2int(hex[i]);
if (digit[i] < 0) {
return -1;
}
}
return (digit[0] << 12) +
(digit[1] << 8) +
(digit[2] << 4) +
digit[3];
}
/* Converts a Unicode codepoint to UTF-8.
* Returns UTF-8 string length, and up to 4 bytes in *utf8 */
static int codepoint_to_utf8(char *utf8, int codepoint)
{
/* 0xxxxxxx */
if (codepoint <= 0x7F) {
utf8[0] = codepoint;
return 1;
}
/* 110xxxxx 10xxxxxx */
if (codepoint <= 0x7FF) {
utf8[0] = (codepoint >> 6) | 0xC0;
utf8[1] = (codepoint & 0x3F) | 0x80;
return 2;
}
/* 1110xxxx 10xxxxxx 10xxxxxx */
if (codepoint <= 0xFFFF) {
utf8[0] = (codepoint >> 12) | 0xE0;
utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80;
utf8[2] = (codepoint & 0x3F) | 0x80;
return 3;
}
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
if (codepoint <= 0x1FFFFF) {
utf8[0] = (codepoint >> 18) | 0xF0;
utf8[1] = ((codepoint >> 12) & 0x3F) | 0x80;
utf8[2] = ((codepoint >> 6) & 0x3F) | 0x80;
utf8[3] = (codepoint & 0x3F) | 0x80;
return 4;
}
return 0;
}
/* Called when index pointing to beginning of UTF-16 code escape: \uXXXX
* \u is guaranteed to exist, but the remaining hex characters may be
* missing.
* Translate to UTF-8 and append to temporary token string.
* Must advance index to the next character to be processed.
* Returns: 0 success
* -1 error
*/
static int json_append_unicode_escape(json_parse_t *json)
{
char utf8[4]; /* Surrogate pairs require 4 UTF-8 bytes */
int codepoint;
int surrogate_low;
int len;
int escape_len = 6;
/* Fetch UTF-16 code unit */
codepoint = decode_hex4(json->ptr + 2);
if (codepoint < 0)
return -1;
/* UTF-16 surrogate pairs take the following 2 byte form:
* 11011 x yyyyyyyyyy
* When x = 0: y is the high 10 bits of the codepoint
* x = 1: y is the low 10 bits of the codepoint
*
* Check for a surrogate pair (high or low) */
if ((codepoint & 0xF800) == 0xD800) {
/* Error if the 1st surrogate is not high */
if (codepoint & 0x400)
return -1;
/* Ensure the next code is a unicode escape */
if (*(json->ptr + escape_len) != '\\' ||
*(json->ptr + escape_len + 1) != 'u') {
return -1;
}
/* Fetch the next codepoint */
surrogate_low = decode_hex4(json->ptr + 2 + escape_len);
if (surrogate_low < 0)
return -1;
/* Error if the 2nd code is not a low surrogate */
if ((surrogate_low & 0xFC00) != 0xDC00)
return -1;
/* Calculate Unicode codepoint */
codepoint = (codepoint & 0x3FF) << 10;
surrogate_low &= 0x3FF;
codepoint = (codepoint | surrogate_low) + 0x10000;
escape_len = 12;
}
/* Convert codepoint to UTF-8 */
len = codepoint_to_utf8(utf8, codepoint);
if (!len)
return -1;
/* Append bytes and advance parse index */
strbuf_append_mem_unsafe(json->tmp, utf8, len);
json->ptr += escape_len;
return 0;
}
static void json_set_token_error(json_token_t *token, json_parse_t *json,
const char *errtype)
{
token->type = T_ERROR;
token->index = (int)(json->ptr - json->data);
token->value.string = errtype;
}
static void json_next_string_token(json_parse_t *json, json_token_t *token)
{
char *escape2char = json->cfg->escape2char;
char ch;
/* Caller must ensure a string is next */
assert(*json->ptr == '"');
/* Skip " */
json->ptr++;
/* json->tmp is the temporary strbuf used to accumulate the
* decoded string value.
* json->tmp is sized to handle JSON containing only a string value.
*/
strbuf_reset(json->tmp);
while ((ch = *json->ptr) != '"') {
if (!ch) {
/* Premature end of the string */
json_set_token_error(token, json, "unexpected end of string");
return;
}
/* Handle escapes */
if (ch == '\\') {
/* Fetch escape character */
ch = *(json->ptr + 1);
/* Translate escape code and append to tmp string */
ch = escape2char[(unsigned char)ch];
if (ch == 'u') {
if (json_append_unicode_escape(json) == 0)
continue;
json_set_token_error(token, json,
"invalid unicode escape code");
return;
}
if (!ch) {
json_set_token_error(token, json, "invalid escape code");
return;
}
/* Skip '\' */
json->ptr++;
}
/* Append normal character or translated single character
* Unicode escapes are handled above */
strbuf_append_char_unsafe(json->tmp, ch);
json->ptr++;
}
json->ptr++; /* Eat final quote (") */
strbuf_ensure_null(json->tmp);
token->type = T_STRING;
token->value.string = strbuf_string(json->tmp, &token->string_len);
}
/* JSON numbers should take the following form:
* -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
*
* json_next_number_token() uses strtod() which allows other forms:
* - numbers starting with '+'
* - NaN, -NaN, infinity, -infinity
* - hexadecimal numbers
* - numbers with leading zeros
*
* json_is_invalid_number() detects "numbers" which may pass strtod()'s
* error checking, but should not be allowed with strict JSON.
*
* json_is_invalid_number() may pass numbers which cause strtod()
* to generate an error.
*/
static int json_is_invalid_number(json_parse_t *json)
{
const char *p = json->ptr;
/* Reject numbers starting with + */
if (*p == '+')
return 1;
/* Skip minus sign if it exists */
if (*p == '-')
p++;
/* Reject numbers starting with 0x, or leading zeros */
if (*p == '0') {
int ch2 = *(p + 1);
if ((ch2 | 0x20) == 'x' || /* Hex */
('0' <= ch2 && ch2 <= '9')) /* Leading zero */
return 1;
return 0;
} else if (*p <= '9') {
return 0; /* Ordinary number */
}
/* Reject inf/nan */
if (!__strncasecmp(p, "inf", 3))
return 1;
if (!__strncasecmp(p, "nan", 3))
return 1;
/* Pass all other numbers which may still be invalid, but
* strtod() will catch them. */
return 0;
}
static void json_next_number_token(json_parse_t *json, json_token_t *token)
{
char *endptr;
token->type = T_NUMBER;
token->value.number = fpconv_strtod(json->ptr, &endptr);
if (json->ptr == endptr)
json_set_token_error(token, json, "invalid number");
else
json->ptr = endptr; /* Skip the processed number */
return;
}
/* Fills in the token struct.
* T_STRING will return a pointer to the json_parse_t temporary string
* T_ERROR will leave the json->ptr pointer at the error.
*/
static void json_next_token(json_parse_t *json, json_token_t *token)
{
const json_token_type_t *ch2token = json->cfg->ch2token;
int ch;
/* Eat whitespace. */
while (1) {
ch = (unsigned char)*(json->ptr);
token->type = ch2token[ch];
if (token->type != T_WHITESPACE)
break;
json->ptr++;
}
/* Store location of new token. Required when throwing errors
* for unexpected tokens (syntax errors). */
token->index = (int)(json->ptr - json->data);
/* Don't advance the pointer for an error or the end */
if (token->type == T_ERROR) {
json_set_token_error(token, json, "invalid token");
return;
}
if (token->type == T_END) {
return;
}
/* Found a known single character token, advance index and return */
if (token->type != T_UNKNOWN) {
json->ptr++;
return;
}
/* Process characters which triggered T_UNKNOWN
*
* Must use strncmp() to match the front of the JSON string.
* JSON identifier must be lowercase.
* When strict_numbers if disabled, either case is allowed for
* Infinity/NaN (since we are no longer following the spec..) */
if (ch == '"') {
json_next_string_token(json, token);
return;
} else if (ch == '-' || ('0' <= ch && ch <= '9')) {
if (!json->cfg->decode_invalid_numbers && json_is_invalid_number(json)) {
json_set_token_error(token, json, "invalid number");
return;
}
json_next_number_token(json, token);
return;
} else if (!strncmp(json->ptr, "true", 4)) {
token->type = T_BOOLEAN;
token->value.boolean = 1;
json->ptr += 4;
return;
} else if (!strncmp(json->ptr, "false", 5)) {
token->type = T_BOOLEAN;
token->value.boolean = 0;
json->ptr += 5;
return;
} else if (!strncmp(json->ptr, "null", 4)) {
token->type = T_NULL;
json->ptr += 4;
return;
} else if (json->cfg->decode_invalid_numbers &&
json_is_invalid_number(json)) {
/* When decode_invalid_numbers is enabled, only attempt to process
* numbers we know are invalid JSON (Inf, NaN, hex)
* This is required to generate an appropriate token error,
* otherwise all bad tokens will register as "invalid number"
*/
json_next_number_token(json, token);
return;
}
/* Token starts with t/f/n but isn't recognised above. */
json_set_token_error(token, json, "invalid token");
}
/* This function does not return.
* DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
* The only supported exception is the temporary parser string
* json->tmp struct.
* json and token should exist on the stack somewhere.
* luaL_error() will long_jmp and release the stack */
static void json_throw_parse_error(lua_State *l, json_parse_t *json,
const char *exp, json_token_t *token)
{
const char *found;
strbuf_free(json->tmp);
if (token->type == T_ERROR)
found = token->value.string;
else
found = json_token_type_name[token->type];
/* Note: token->index is 0 based, display starting from 1 */
luaL_error(l, "Expected %s but found %s at character %d",
exp, found, token->index + 1);
}
static inline void json_decode_ascend(json_parse_t *json)
{
json->current_depth--;
}
static void json_decode_descend(lua_State *l, json_parse_t *json, int slots)
{
json->current_depth++;
if (json->current_depth <= json->cfg->decode_max_depth &&
lua_checkstack(l, slots)) {
return;
}
strbuf_free(json->tmp);
luaL_error(l, "Found too many nested data structures (%d) at character %d",
json->current_depth, json->ptr - json->data);
}
static void json_parse_object_context(lua_State *l, json_parse_t *json)
{
json_token_t token;
/* 3 slots required:
* .., table, key, value */
json_decode_descend(l, json, 3);
lua_newtable(l);
json_next_token(json, &token);
/* Handle empty objects */
if (token.type == T_OBJ_END) {
json_decode_ascend(json);
return;
}
while (1) {
if (token.type != T_STRING)
json_throw_parse_error(l, json, "object key string", &token);
/* Push key */
lua_pushlstring(l, token.value.string, token.string_len);
json_next_token(json, &token);
if (token.type != T_COLON)
json_throw_parse_error(l, json, "colon", &token);
/* Fetch value */
json_next_token(json, &token);
json_process_value(l, json, &token);
/* Set key = value */
lua_rawset(l, -3);
json_next_token(json, &token);
if (token.type == T_OBJ_END) {
json_decode_ascend(json);
return;
}
if (token.type != T_COMMA)
json_throw_parse_error(l, json, "comma or object end", &token);
json_next_token(json, &token);
}
}
/* Handle the array context */
static void json_parse_array_context(lua_State *l, json_parse_t *json)
{
json_token_t token;
int i;
/* 2 slots required:
* .., table, value */
json_decode_descend(l, json, 2);
lua_newtable(l);
json_next_token(json, &token);
/* Handle empty arrays */
if (token.type == T_ARR_END) {
json_decode_ascend(json);
return;
}
for (i = 1; ; i++) {
json_process_value(l, json, &token);
lua_rawseti(l, -2, i); /* arr[i] = value */
json_next_token(json, &token);
if (token.type == T_ARR_END) {
json_decode_ascend(json);
return;
}
if (token.type != T_COMMA)
json_throw_parse_error(l, json, "comma or array end", &token);
json_next_token(json, &token);
}
}
/* Handle the "value" context */
static void json_process_value(lua_State *l, json_parse_t *json,
json_token_t *token)
{
switch (token->type) {
case T_STRING:
lua_pushlstring(l, token->value.string, token->string_len);
break;;
case T_NUMBER:
lua_pushnumber(l, token->value.number);
break;;
case T_BOOLEAN:
lua_pushboolean(l, token->value.boolean);
break;;
case T_OBJ_BEGIN:
json_parse_object_context(l, json);
break;;
case T_ARR_BEGIN:
json_parse_array_context(l, json);
break;;
case T_NULL:
if (json->cfg->decode_lua_nil)
{
lua_pushnil(l);
}
else
{
/* In Lua, setting "t[k] = nil" will delete k from the table.
* Hence a NULL pointer lightuserdata object is used instead */
lua_pushlightuserdata(l, NULL);
}
break;;
default:
json_throw_parse_error(l, json, "value", token);
}
}
static int json_decode(lua_State *l)
{
json_parse_t json;
json_token_t token;
size_t json_len;
luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
json.cfg = json_fetch_config(l);
json.data = luaL_checklstring(l, 1, &json_len);
json.current_depth = 0;
json.ptr = json.data;
/* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
*
* CJSON can support any simple data type, hence only the first
* character is guaranteed to be ASCII (at worst: '"'). This is
* still enough to detect whether the wrong encoding is in use. */
if (json_len >= 2 && (!json.data[0] || !json.data[1]))
luaL_error(l, "JSON parser does not support UTF-16 or UTF-32");
/* Ensure the temporary buffer can hold the entire string.
* This means we no longer need to do length checks since the decoded
* string must be smaller than the entire json string */
json.tmp = strbuf_new((int)json_len);
json_next_token(&json, &token);
json_process_value(l, &json, &token);
/* Ensure there is no more input left */
json_next_token(&json, &token);
if (token.type != T_END)
json_throw_parse_error(l, &json, "the end", &token);
strbuf_free(json.tmp);
return 1;
}
/* ===== INITIALISATION ===== */
#if !defined(LUA_VERSION_NUM) || LUA_VERSION_NUM < 502
/* Compatibility for Lua 5.1.
*
* luaL_setfuncs() is used to create a module table where the functions have
* json_config_t as their first upvalue. Code borrowed from Lua 5.2 source. */
static void luaL_setfuncs (lua_State *l, const luaL_Reg *reg, int nup)
{
int i;
luaL_checkstack(l, nup, "too many upvalues");
for (; reg->name != NULL; reg++) { /* fill the table with given functions */
for (i = 0; i < nup; i++) /* copy upvalues to the top */
lua_pushvalue(l, -nup);
lua_pushcclosure(l, reg->func, nup); /* closure with those upvalues */
lua_setfield(l, -(nup + 2), reg->name);
}
lua_pop(l, nup); /* remove upvalues */
}
#endif
/* Call target function in protected mode with all supplied args.
* Assumes target function only returns a single non-nil value.
* Convert and return thrown errors as: nil, "error message" */
static int json_protect_conversion(lua_State *l)
{
int err;
/* Deliberately throw an error for invalid arguments */
luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
/* pcall() the function stored as upvalue(1) */
lua_pushvalue(l, lua_upvalueindex(1));
lua_insert(l, 1);
err = lua_pcall(l, 1, 1, 0);
if (!err)
return 1;
if (err == LUA_ERRRUN) {
lua_pushnil(l);
lua_insert(l, -2);
return 2;
}
/* Since we are not using a custom error handler, the only remaining
* errors are memory related */
return luaL_error(l, "Memory allocation error in CJSON protected call");
}
/* Return cjson module table */
static int lua_cjson_new(lua_State *l)
{
luaL_Reg reg[] = {
{ "encode", json_encode },
{ "decode", json_decode },
{ "encode_sparse_array", json_cfg_encode_sparse_array },
{ "encode_max_depth", json_cfg_encode_max_depth },
{ "decode_max_depth", json_cfg_decode_max_depth },
{ "encode_number_precision", json_cfg_encode_number_precision },
{ "encode_keep_buffer", json_cfg_encode_keep_buffer },
{ "encode_invalid_numbers", json_cfg_encode_invalid_numbers },
{ "decode_invalid_numbers", json_cfg_decode_invalid_numbers },
{ "decode_lua_nil", json_cfg_decode_lua_nil },
{ "new", lua_cjson_new },
{ NULL, NULL }
};
/* Initialise number conversions */
fpconv_init();
/* cjson module table */
lua_newtable(l);
/* Register functions with config data as upvalue */
json_create_config(l);
luaL_setfuncs(l, reg, 1);
/* Set cjson.null */
lua_pushlightuserdata(l, NULL);
lua_setfield(l, -2, "null");
/* Set module name / version fields */
lua_pushliteral(l, CJSON_MODNAME);
lua_setfield(l, -2, "_NAME");
lua_pushliteral(l, CJSON_VERSION);
lua_setfield(l, -2, "_VERSION");
return 1;
}
/* Return cjson.safe module table */
static int lua_cjson_safe_new(lua_State *l)
{
const char *func[] = { "decode", "encode", NULL };
int i;
lua_cjson_new(l);
/* Fix new() method */
lua_pushcfunction(l, lua_cjson_safe_new);
lua_setfield(l, -2, "new");
for (i = 0; func[i]; i++) {
lua_getfield(l, -1, func[i]);
lua_pushcclosure(l, json_protect_conversion, 1);
lua_setfield(l, -2, func[i]);
}
return 1;
}
int luaopen_cjson(lua_State *l)
{
lua_cjson_new(l);
#ifdef ENABLE_CJSON_GLOBAL
/* Register a global "cjson" table. */
lua_pushvalue(l, -1);
lua_setglobal(l, CJSON_MODNAME);
#endif
/* Return cjson table */
return 1;
}
int luaopen_cjson_safe(lua_State *l)
{
lua_cjson_safe_new(l);
/* Return cjson.safe table */
return 1;
}
/* vi:ai et sw=4 ts=4:
*/