Skip to content

Commit 6a0ea13

Browse files
authored
Merge pull request #14 from b41sh/fix-escaped-ws
2 parents 43d60a4 + ad1792e commit 6a0ea13

File tree

5 files changed

+97
-80
lines changed

5 files changed

+97
-80
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## [v0.2.2] - 2023-05-06
2+
3+
### Fixed
4+
5+
- Fix: Allow parse escaped white space. (#14)
6+
17
## [v0.2.1] - 2023-05-05
28

39
### Fixed
@@ -24,6 +30,7 @@
2430
- Implemented a number of `JSONB` functions.
2531

2632

33+
[v0.2.2]: https://github.com/datafuselabs/jsonb/compare/v0.2.1...v0.2.2
2734
[v0.2.1]: https://github.com/datafuselabs/jsonb/compare/v0.2.0...v0.2.1
2835
[v0.2.0]: https://github.com/datafuselabs/jsonb/compare/v0.1.1...v0.2.0
2936
[v0.1.1]: https://github.com/datafuselabs/jsonb/compare/v0.1.0...v0.1.1

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ keywords = ["json", "jsonb", "jsonpath"]
2222
license = "Apache-2.0"
2323
name = "jsonb"
2424
repository = "https://github.com/datafuselabs/jsonb"
25-
version = "0.2.1"
25+
version = "0.2.2"
2626
rust-version = "1.68"
2727

2828
[dependencies]

src/functions.rs

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use super::constants::*;
2121
use super::error::*;
2222
use super::jentry::JEntry;
2323
use super::number::Number;
24-
use super::parser::decode_value;
24+
use super::parser::parse_value;
2525
use super::value::Value;
2626
use crate::jsonpath::ArrayIndex;
2727
use crate::jsonpath::Index;
@@ -125,8 +125,10 @@ pub fn build_object<'a, K: AsRef<str>>(
125125
/// Get the length of `JSONB` array.
126126
pub fn array_length(value: &[u8]) -> Option<usize> {
127127
if !is_jsonb(value) {
128-
let json_value = decode_value(value).unwrap();
129-
return json_value.array_length();
128+
return match parse_value(value) {
129+
Ok(val) => val.array_length(),
130+
Err(_) => None,
131+
};
130132
}
131133
let header = read_u32(value, 0).unwrap();
132134
match header & CONTAINER_HEADER_TYPE_MASK {
@@ -143,9 +145,13 @@ pub fn array_length(value: &[u8]) -> Option<usize> {
143145
pub fn get_by_path<'a>(value: &'a [u8], json_path: JsonPath<'a>) -> Vec<Vec<u8>> {
144146
let selector = Selector::new(json_path);
145147
if !is_jsonb(value) {
146-
let json_value = decode_value(value).unwrap();
147-
let value = json_value.to_vec();
148-
selector.select(value.as_slice())
148+
match parse_value(value) {
149+
Ok(val) => {
150+
let value = val.to_vec();
151+
selector.select(value.as_slice())
152+
}
153+
Err(_) => vec![],
154+
}
149155
} else {
150156
selector.select(value)
151157
}
@@ -192,8 +198,10 @@ pub fn get_by_name(value: &[u8], name: &str) -> Option<Vec<u8>> {
192198
/// Get the inner element of `JSONB` Object by key name ignoring case.
193199
pub fn get_by_name_ignore_case(value: &[u8], name: &str) -> Option<Vec<u8>> {
194200
if !is_jsonb(value) {
195-
let json_value = decode_value(value).unwrap();
196-
return json_value.get_by_name_ignore_case(name).map(Value::to_vec);
201+
return match parse_value(value) {
202+
Ok(val) => val.get_by_name_ignore_case(name).map(Value::to_vec),
203+
Err(_) => None,
204+
};
197205
}
198206

199207
let header = read_u32(value, 0).unwrap();
@@ -261,8 +269,10 @@ pub fn get_by_name_ignore_case(value: &[u8], name: &str) -> Option<Vec<u8>> {
261269
/// Get the keys of a `JSONB` object.
262270
pub fn object_keys(value: &[u8]) -> Option<Vec<u8>> {
263271
if !is_jsonb(value) {
264-
let json_value = decode_value(value).unwrap();
265-
return json_value.object_keys().map(|val| val.to_vec());
272+
return match parse_value(value) {
273+
Ok(val) => val.object_keys().map(|val| val.to_vec()),
274+
Err(_) => None,
275+
};
266276
}
267277

268278
let header = read_u32(value, 0).unwrap();
@@ -304,11 +314,11 @@ pub fn object_keys(value: &[u8]) -> Option<Vec<u8>> {
304314
/// Scalar Null > Array > Object > Other Scalars(String > Number > Boolean).
305315
pub fn compare(left: &[u8], right: &[u8]) -> Result<Ordering, Error> {
306316
if !is_jsonb(left) {
307-
let lval = decode_value(left).unwrap();
317+
let lval = parse_value(left)?;
308318
let lbuf = lval.to_vec();
309319
return compare(&lbuf, right);
310320
} else if !is_jsonb(right) {
311-
let rval = decode_value(right).unwrap();
321+
let rval = parse_value(right)?;
312322
let rbuf = rval.to_vec();
313323
return compare(left, &rbuf);
314324
}
@@ -561,15 +571,10 @@ pub fn is_null(value: &[u8]) -> bool {
561571
/// If the `JSONB` is a Null, returns (). Returns None otherwise.
562572
pub fn as_null(value: &[u8]) -> Option<()> {
563573
if !is_jsonb(value) {
564-
if value.is_empty() {
565-
return Some(());
566-
}
567-
let v = value.first().unwrap();
568-
if *v == b'n' {
569-
return Some(());
570-
} else {
571-
return None;
572-
}
574+
return match parse_value(value) {
575+
Ok(val) => val.as_null(),
576+
Err(_) => None,
577+
};
573578
}
574579
let header = read_u32(value, 0).unwrap();
575580
match header & CONTAINER_HEADER_TYPE_MASK {
@@ -592,14 +597,10 @@ pub fn is_boolean(value: &[u8]) -> bool {
592597
/// If the `JSONB` is a Boolean, returns the associated bool. Returns None otherwise.
593598
pub fn as_bool(value: &[u8]) -> Option<bool> {
594599
if !is_jsonb(value) {
595-
let v = value.first().unwrap();
596-
if *v == b't' {
597-
return Some(true);
598-
} else if *v == b'f' {
599-
return Some(false);
600-
} else {
601-
return None;
602-
}
600+
return match parse_value(value) {
601+
Ok(val) => val.as_bool(),
602+
Err(_) => None,
603+
};
603604
}
604605
let header = read_u32(value, 0).unwrap();
605606
match header & CONTAINER_HEADER_TYPE_MASK {
@@ -637,8 +638,10 @@ pub fn is_number(value: &[u8]) -> bool {
637638
/// If the `JSONB` is a Number, returns the Number. Returns None otherwise.
638639
pub fn as_number(value: &[u8]) -> Option<Number> {
639640
if !is_jsonb(value) {
640-
let json_value = decode_value(value).unwrap();
641-
return json_value.as_number().cloned();
641+
return match parse_value(value) {
642+
Ok(val) => val.as_number().cloned(),
643+
Err(_) => None,
644+
};
642645
}
643646
let header = read_u32(value, 0).unwrap();
644647
match header & CONTAINER_HEADER_TYPE_MASK {
@@ -759,13 +762,13 @@ pub fn is_string(value: &[u8]) -> bool {
759762
/// If the `JSONB` is a String, returns the String. Returns None otherwise.
760763
pub fn as_str(value: &[u8]) -> Option<Cow<'_, str>> {
761764
if !is_jsonb(value) {
762-
let v = value.first().unwrap();
763-
if *v == b'"' {
764-
let s = unsafe { std::str::from_utf8_unchecked(&value[1..value.len() - 1]) };
765-
return Some(Cow::Borrowed(s));
766-
} else {
767-
return None;
768-
}
765+
return match parse_value(value) {
766+
Ok(val) => match val {
767+
Value::String(s) => Some(s.clone()),
768+
_ => None,
769+
},
770+
Err(_) => None,
771+
};
769772
}
770773
let header = read_u32(value, 0).unwrap();
771774
match header & CONTAINER_HEADER_TYPE_MASK {
@@ -787,19 +790,21 @@ pub fn as_str(value: &[u8]) -> Option<Cow<'_, str>> {
787790

788791
/// Cast `JSONB` value to String
789792
pub fn to_str(value: &[u8]) -> Result<String, Error> {
790-
if is_null(value) {
791-
return Err(Error::InvalidCast);
792-
} else if let Some(v) = as_str(value) {
793+
if let Some(v) = as_str(value) {
793794
return Ok(v.to_string());
795+
} else if is_null(value) {
796+
return Err(Error::InvalidCast);
794797
}
795798
Ok(to_string(value))
796799
}
797800

798801
/// Returns true if the `JSONB` is An Array. Returns false otherwise.
799802
pub fn is_array(value: &[u8]) -> bool {
800803
if !is_jsonb(value) {
801-
let v = value.first().unwrap();
802-
return *v == b'[';
804+
return match parse_value(value) {
805+
Ok(val) => val.is_array(),
806+
Err(_) => false,
807+
};
803808
}
804809
let header = read_u32(value, 0).unwrap();
805810
matches!(header & CONTAINER_HEADER_TYPE_MASK, ARRAY_CONTAINER_TAG)
@@ -808,8 +813,10 @@ pub fn is_array(value: &[u8]) -> bool {
808813
/// Returns true if the `JSONB` is An Object. Returns false otherwise.
809814
pub fn is_object(value: &[u8]) -> bool {
810815
if !is_jsonb(value) {
811-
let v = value.first().unwrap();
812-
return *v == b'{';
816+
return match parse_value(value) {
817+
Ok(val) => val.is_object(),
818+
Err(_) => false,
819+
};
813820
}
814821
let header = read_u32(value, 0).unwrap();
815822
matches!(header & CONTAINER_HEADER_TYPE_MASK, OBJECT_CONTAINER_TAG)
@@ -921,7 +928,7 @@ fn scalar_to_string(
921928
// for compatibility with previous `JSON` string.
922929
fn is_jsonb(value: &[u8]) -> bool {
923930
if let Some(v) = value.first() {
924-
if *v == ARRAY_PREFIX || *v == OBJECT_PREFIX || *v == SCALAR_PREFIX {
931+
if matches!(*v, ARRAY_PREFIX | OBJECT_PREFIX | SCALAR_PREFIX) {
925932
return true;
926933
}
927934
}

src/parser.rs

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,34 +30,14 @@ pub fn parse_value(buf: &[u8]) -> Result<Value<'_>, Error> {
3030
parser.parse()
3131
}
3232

33-
// used to parse value from storage.
34-
// as value has be parsed, string don't need extra escape.
35-
pub fn decode_value(buf: &[u8]) -> Result<Value<'_>, Error> {
36-
let mut parser = Parser::new_with_escaped(buf);
37-
parser.parse()
38-
}
39-
4033
struct Parser<'a> {
4134
buf: &'a [u8],
4235
idx: usize,
43-
escaped: bool,
4436
}
4537

4638
impl<'a> Parser<'a> {
4739
fn new(buf: &'a [u8]) -> Parser<'a> {
48-
Self {
49-
buf,
50-
idx: 0,
51-
escaped: false,
52-
}
53-
}
54-
55-
fn new_with_escaped(buf: &'a [u8]) -> Parser<'a> {
56-
Self {
57-
buf,
58-
idx: 0,
59-
escaped: true,
60-
}
40+
Self { buf, idx: 0 }
6141
}
6242

6343
fn parse(&mut self) -> Result<Value<'a>, Error> {
@@ -170,13 +150,32 @@ impl<'a> Parser<'a> {
170150
Error::Syntax(code, pos)
171151
}
172152

153+
#[inline]
173154
fn skip_unused(&mut self) {
174155
while self.idx < self.buf.len() {
175156
let c = self.buf.get(self.idx).unwrap();
176-
if !matches!(c, b'\n' | b' ' | b'\r' | b'\t') {
177-
break;
157+
if c.is_ascii_whitespace() {
158+
self.step();
159+
continue;
178160
}
179-
self.step();
161+
// Allow parse escaped white space
162+
if *c == b'\\' {
163+
if self.idx + 1 < self.buf.len()
164+
&& matches!(self.buf[self.idx + 1], b'n' | b'r' | b't')
165+
{
166+
self.step_by(2);
167+
continue;
168+
}
169+
if self.idx + 3 < self.buf.len()
170+
&& self.buf[self.idx + 1] == b'x'
171+
&& self.buf[self.idx + 2] == b'0'
172+
&& self.buf[self.idx + 3] == b'C'
173+
{
174+
self.step_by(4);
175+
continue;
176+
}
177+
}
178+
break;
180179
}
181180
}
182181

@@ -299,7 +298,7 @@ impl<'a> Parser<'a> {
299298
}
300299

301300
let mut data = &self.buf[start_idx..self.idx - 1];
302-
let val = if !self.escaped && escapes > 0 {
301+
let val = if escapes > 0 {
303302
let len = self.idx - 1 - start_idx - escapes;
304303
let mut idx = start_idx + 1;
305304
let mut str_buf = String::with_capacity(len);

tests/it/parser.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -402,15 +402,19 @@ fn test_parse_object() {
402402
obj3.insert("a".to_string(), Value::Object(obj3val));
403403
let mut obj4 = Object::new();
404404
obj4.insert("c".to_string(), Value::Null);
405+
let mut obj5 = Object::new();
406+
obj5.insert("d".to_string(), Value::Number(Number::UInt64(5)));
405407

406408
test_parse_ok(vec![
407-
("{}", Value::Object(Object::new())),
408-
("{ }", Value::Object(Object::new())),
409-
("{\"a\":3}", Value::Object(obj1.clone())),
410-
("{ \"a\" : 3 }", Value::Object(obj1)),
411-
("{\"a\":3,\"b\":4}", Value::Object(obj2.clone())),
412-
(" { \"a\" : 3 , \"b\" : 4 } ", Value::Object(obj2)),
413-
("{\"a\": {\"b\": 3, \"c\": 4}}", Value::Object(obj3)),
414-
("{\"c\":null}", Value::Object(obj4)),
409+
(r#"{}"#, Value::Object(Object::new())),
410+
(r#"{ }"#, Value::Object(Object::new())),
411+
(r#"{"a":3}"#, Value::Object(obj1.clone())),
412+
(r#"{ "a" : 3 }"#, Value::Object(obj1)),
413+
(r#"{"a":3,"b":4}"#, Value::Object(obj2.clone())),
414+
(r#" { "a" : 3 , "b" : 4 } "#, Value::Object(obj2)),
415+
(r#"{"a": {"b": 3, "c": 4}}"#, Value::Object(obj3)),
416+
(r#"{"c":null}"#, Value::Object(obj4)),
417+
(r#"{\t\n\r "d": 5}"#, Value::Object(obj5.clone())),
418+
(r#"{ \x0C "d": 5}"#, Value::Object(obj5)),
415419
]);
416420
}

0 commit comments

Comments
 (0)