Skip to content

Commit 36fefff

Browse files
authored
Merge pull request #841 from byroot/generate-dup-keys
Fail or warn on duplicated key during generation
2 parents 2d63648 + 8454149 commit 36fefff

File tree

10 files changed

+289
-9
lines changed

10 files changed

+289
-9
lines changed

CHANGES.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,17 @@
22

33
### Unreleased
44

5+
* Add new `allow_duplicate_key` generator options. By default a warning is now emitted when a duplicated key is encountered.
6+
In `json 3.0` an error will be raised.
7+
```ruby
8+
>> Warning[:deprecated] = true
9+
>> puts JSON.generate({ foo: 1, "foo" => 2 })
10+
(irb):2: warning: detected duplicate key "foo" in {foo: 1, "foo" => 2}.
11+
This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`
12+
{"foo":1,"foo":2}
13+
>> JSON.generate({ foo: 1, "foo" => 2 }, allow_duplicate_key: false)
14+
detected duplicate key "foo" in {foo: 1, "foo" => 2} (JSON::GeneratorError)
15+
```
516
* Fix `JSON.generate` `strict: true` mode to also restrict hash keys.
617
* Fix `JSON::Coder` to also invoke block for hash keys that aren't strings nor symbols.
718

ext/json/ext/fbuffer/fbuffer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ typedef unsigned char _Bool;
2424
#endif
2525
#endif
2626

27+
#ifndef NOINLINE
28+
#if defined(__has_attribute) && __has_attribute(noinline)
29+
#define NOINLINE() __attribute__((noinline))
30+
#else
31+
#define NOINLINE()
32+
#endif
33+
#endif
34+
2735
#ifndef RB_UNLIKELY
2836
#define RB_UNLIKELY(expr) expr
2937
#endif

ext/json/ext/generator/generator.c

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99

1010
/* ruby api and some helpers */
1111

12+
enum duplicate_key_action {
13+
JSON_DEPRECATED = 0,
14+
JSON_IGNORE,
15+
JSON_RAISE,
16+
};
17+
1218
typedef struct JSON_Generator_StateStruct {
1319
VALUE indent;
1420
VALUE space;
@@ -21,6 +27,8 @@ typedef struct JSON_Generator_StateStruct {
2127
long depth;
2228
long buffer_initial_length;
2329

30+
enum duplicate_key_action on_duplicate_key;
31+
2432
bool allow_nan;
2533
bool ascii_only;
2634
bool script_safe;
@@ -34,7 +42,7 @@ typedef struct JSON_Generator_StateStruct {
3442
static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
3543

3644
static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37-
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
45+
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
3846
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
3947

4048

@@ -987,8 +995,11 @@ static inline VALUE vstate_get(struct generate_json_data *data)
987995
}
988996

989997
struct hash_foreach_arg {
998+
VALUE hash;
990999
struct generate_json_data *data;
991-
int iter;
1000+
int first_key_type;
1001+
bool first;
1002+
bool mixed_keys_encountered;
9921003
};
9931004

9941005
static VALUE
@@ -1006,6 +1017,22 @@ convert_string_subclass(VALUE key)
10061017
return key_to_s;
10071018
}
10081019

1020+
NOINLINE()
1021+
static void
1022+
json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
1023+
{
1024+
if (arg->mixed_keys_encountered) {
1025+
return;
1026+
}
1027+
arg->mixed_keys_encountered = true;
1028+
1029+
JSON_Generator_State *state = arg->data->state;
1030+
if (state->on_duplicate_key != JSON_IGNORE) {
1031+
VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
1032+
rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
1033+
}
1034+
}
1035+
10091036
static int
10101037
json_object_i(VALUE key, VALUE val, VALUE _arg)
10111038
{
@@ -1016,8 +1043,16 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
10161043
JSON_Generator_State *state = data->state;
10171044

10181045
long depth = state->depth;
1046+
int key_type = rb_type(key);
1047+
1048+
if (arg->first) {
1049+
arg->first = false;
1050+
arg->first_key_type = key_type;
1051+
}
1052+
else {
1053+
fbuffer_append_char(buffer, ',');
1054+
}
10191055

1020-
if (arg->iter > 0) fbuffer_append_char(buffer, ',');
10211056
if (RB_UNLIKELY(data->state->object_nl)) {
10221057
fbuffer_append_str(buffer, data->state->object_nl);
10231058
}
@@ -1029,21 +1064,30 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
10291064
bool as_json_called = false;
10301065

10311066
start:
1032-
switch (rb_type(key)) {
1067+
switch (key_type) {
10331068
case T_STRING:
1069+
if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
1070+
json_inspect_hash_with_mixed_keys(arg);
1071+
}
1072+
10341073
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
10351074
key_to_s = key;
10361075
} else {
10371076
key_to_s = convert_string_subclass(key);
10381077
}
10391078
break;
10401079
case T_SYMBOL:
1080+
if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
1081+
json_inspect_hash_with_mixed_keys(arg);
1082+
}
1083+
10411084
key_to_s = rb_sym2str(key);
10421085
break;
10431086
default:
10441087
if (data->state->strict) {
10451088
if (RTEST(data->state->as_json) && !as_json_called) {
10461089
key = rb_proc_call_with_block(data->state->as_json, 1, &key, Qnil);
1090+
key_type = rb_type(key);
10471091
as_json_called = true;
10481092
goto start;
10491093
} else {
@@ -1064,7 +1108,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
10641108
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
10651109
generate_json(buffer, data, val);
10661110

1067-
arg->iter++;
10681111
return ST_CONTINUE;
10691112
}
10701113

@@ -1091,8 +1134,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
10911134
fbuffer_append_char(buffer, '{');
10921135

10931136
struct hash_foreach_arg arg = {
1137+
.hash = obj,
10941138
.data = data,
1095-
.iter = 0,
1139+
.first = true,
10961140
};
10971141
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
10981142

@@ -1794,6 +1838,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
17941838
return Qnil;
17951839
}
17961840

1841+
static VALUE cState_allow_duplicate_key_p(VALUE self)
1842+
{
1843+
GET_STATE(self);
1844+
switch (state->on_duplicate_key) {
1845+
case JSON_IGNORE:
1846+
return Qtrue;
1847+
case JSON_DEPRECATED:
1848+
return Qnil;
1849+
case JSON_RAISE:
1850+
return Qfalse;
1851+
}
1852+
}
1853+
17971854
/*
17981855
* call-seq: depth
17991856
*
@@ -1883,6 +1940,7 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
18831940
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
18841941
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
18851942
else if (key == sym_strict) { state->strict = RTEST(val); }
1943+
else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
18861944
else if (key == sym_as_json) {
18871945
VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
18881946
state_write_value(data, &state->as_json, proc);
@@ -2008,6 +2066,8 @@ void Init_generator(void)
20082066
rb_define_method(cState, "generate", cState_generate, -1);
20092067
rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
20102068

2069+
rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
2070+
20112071
rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
20122072

20132073
VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
@@ -2072,6 +2132,7 @@ void Init_generator(void)
20722132
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
20732133
sym_strict = ID2SYM(rb_intern("strict"));
20742134
sym_as_json = ID2SYM(rb_intern("as_json"));
2135+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
20752136

20762137
usascii_encindex = rb_usascii_encindex();
20772138
utf8_encindex = rb_utf8_encindex();

ext/json/ext/parser/parser.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1314,7 +1314,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
13141314
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
13151315
else if (key == sym_freeze) { config->freeze = RTEST(val); }
13161316
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1317-
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1317+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
13181318
else if (key == sym_decimal_class) {
13191319
if (RTEST(val)) {
13201320
if (rb_respond_to(val, i_try_convert)) {

java/src/json/ext/Generator.java

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,66 @@ void generate(ThreadContext context, final Session session, RubyHash object, fin
488488
}
489489
}
490490

491+
private static class HashKeyTracker {
492+
enum KeyType {
493+
UNKNOWN,
494+
STRING,
495+
SYMBOL,
496+
};
497+
498+
private KeyType keyType;
499+
private boolean done;
500+
private RubyHash hash;
501+
502+
private HashKeyTracker(RubyHash hash) {
503+
this.hash = hash;
504+
this.done = false;
505+
this.keyType = KeyType.UNKNOWN;
506+
}
507+
508+
public void trackFirst(ThreadContext context, Session session, IRubyObject key) {
509+
if (key instanceof RubyString) {
510+
this.keyType = KeyType.STRING;
511+
} else if (key.getType() == context.runtime.getSymbol()) {
512+
this.keyType = KeyType.SYMBOL;
513+
} else {
514+
this.done = true;
515+
report(context, session);
516+
}
517+
}
518+
519+
public void track(ThreadContext context, Session session, IRubyObject key) {
520+
if (!done) {
521+
if (keyType == KeyType.STRING) {
522+
if (!(key instanceof RubyString)) {
523+
this.report(context, session);
524+
}
525+
} else {
526+
if (!(key.getType() == context.runtime.getSymbol())) {
527+
this.report(context, session);
528+
}
529+
}
530+
}
531+
}
532+
533+
private void report(ThreadContext context, Session session) {
534+
this.done = true;
535+
536+
final RuntimeInfo info = session.getInfo(context);
537+
final GeneratorState state = session.getState(context);
538+
539+
if (!state.getAllowDuplicateKey()) {
540+
if (state.getDeprecateDuplicateKey()) {
541+
IRubyObject args[] = new IRubyObject[]{hash, context.getRuntime().getFalse()};
542+
info.jsonModule.get().callMethod(context, "on_mixed_keys_hash", args);
543+
} else {
544+
IRubyObject args[] = new IRubyObject[]{hash, context.getRuntime().getTrue()};
545+
info.jsonModule.get().callMethod(context, "on_mixed_keys_hash", args);
546+
}
547+
}
548+
}
549+
}
550+
491551
static void generateHash(ThreadContext context, Session session, RubyHash object, OutputStream buffer) throws IOException {
492552
final GeneratorState state = session.getState(context);
493553
final int depth = state.increaseDepth(context);
@@ -508,7 +568,13 @@ static void generateHash(ThreadContext context, Session session, RubyHash object
508568
buffer.write(objectNLBytes);
509569

510570
boolean firstPair = true;
571+
HashKeyTracker tracker = new HashKeyTracker(object);
511572
for (RubyHash.RubyHashEntry entry : (Set<RubyHash.RubyHashEntry>) object.directEntrySet()) {
573+
if (firstPair) {
574+
tracker.trackFirst(context, session, (IRubyObject)entry.getKey());
575+
} else {
576+
tracker.track(context, session, (IRubyObject)entry.getKey());
577+
}
512578
processEntry(context, session, buffer, entry, firstPair, objectNl, indent, spaceBefore, space);
513579
firstPair = false;
514580
}

java/src/json/ext/GeneratorState.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
* @author mernen
3535
*/
3636
public class GeneratorState extends RubyObject {
37+
private boolean allowDuplicateKey = false;
38+
private boolean deprecateDuplicateKey = true;
39+
3740
/**
3841
* The indenting unit string. Will be repeated several times for larger
3942
* indenting levels.
@@ -216,6 +219,10 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) {
216219
this.strict = orig.strict;
217220
this.bufferInitialLength = orig.bufferInitialLength;
218221
this.depth = orig.depth;
222+
223+
this.allowDuplicateKey = orig.allowDuplicateKey;
224+
this.deprecateDuplicateKey = orig.deprecateDuplicateKey;
225+
219226
return this;
220227
}
221228

@@ -479,6 +486,24 @@ private ByteList prepareByteList(ThreadContext context, IRubyObject value) {
479486
return str.getByteList().dup();
480487
}
481488

489+
@JRubyMethod(name="allow_duplicate_key?", visibility=Visibility.PRIVATE)
490+
public IRubyObject allow_duplicate_key_p(ThreadContext context) {
491+
if (allowDuplicateKey) {
492+
return context.runtime.getTrue();
493+
} else if (deprecateDuplicateKey) {
494+
return context.runtime.getNil();
495+
}
496+
return context.runtime.getFalse();
497+
}
498+
499+
public boolean getAllowDuplicateKey() {
500+
return allowDuplicateKey;
501+
}
502+
503+
public boolean getDeprecateDuplicateKey() {
504+
return deprecateDuplicateKey;
505+
}
506+
482507
/**
483508
* <code>State#configure(opts)</code>
484509
*
@@ -520,6 +545,10 @@ public IRubyObject _configure(ThreadContext context, IRubyObject vOpts) {
520545

521546
depth = opts.getInt("depth", 0);
522547

548+
if (opts.hasKey("allow_duplicate_key")) {
549+
this.allowDuplicateKey = opts.getBool("allow_duplicate_key", false);
550+
this.deprecateDuplicateKey = false;
551+
}
523552
return this;
524553
}
525554

@@ -548,6 +577,16 @@ public RubyHash to_h(ThreadContext context) {
548577
result.op_aset(context, runtime.newSymbol("strict"), strict_get(context));
549578
result.op_aset(context, runtime.newSymbol("depth"), depth_get(context));
550579
result.op_aset(context, runtime.newSymbol("buffer_initial_length"), buffer_initial_length_get(context));
580+
581+
if (this.allowDuplicateKey) {
582+
if (!this.deprecateDuplicateKey) {
583+
result.op_aset(context, runtime.newSymbol("allow_duplicate_key"), runtime.getTrue());
584+
}
585+
}
586+
else {
587+
result.op_aset(context, runtime.newSymbol("allow_duplicate_key"), runtime.getFalse());
588+
}
589+
551590
for (String name: getInstanceVariableNameList()) {
552591
result.op_aset(context, runtime.newSymbol(name.substring(1)), getInstanceVariables().getInstanceVariable(name));
553592
}

0 commit comments

Comments
 (0)