from typing import Any, Dict, List, Optional, Tuple, Union
SOH = '\x01' STX = '\x02' ETX = '\x03' EOT = '\x04' ENQ = '\x05' ACK = '\x06' SO = '\x0e' SI = '\x0f' DLE = '\x10' XON = '\x11' XOFF = '\x13' NAK = '\x15' SYN = '\x16' CAN = '\x18' FS = '\x1c' GS = '\x1d' RS = '\x1e' US = '\x1f'
HsvValue = Union[str, List['HsvValue'], Dict[str, 'HsvValue']]
HsvObject = Dict[str, HsvValue]
HsvDocument = Dict[str, Any]
def _unescape_binary(data: str) -> str:
result = []
i = 0
while i < len(data):
if data[i] == DLE and i + 1 < len(data):
if data[i + 1] == DLE:
result.append(DLE)
i += 2
continue
result.append(data[i])
i += 1
return ''.join(result)
def _extract_binary_sections(text: str) -> Tuple[str, Dict[str, str]]:
result = []
binaries = {}
i = 0
placeholder_count = 0
while i < len(text):
if text[i] == DLE and i + 1 < len(text) and text[i + 1] == STX:
binary_start = i + 2
j = binary_start
binary_data = []
while j < len(text):
if text[j] == DLE and j + 1 < len(text):
if text[j + 1] == ETX:
placeholder = f'\x00BINARY{placeholder_count}\x00'
binaries[placeholder] = _unescape_binary(''.join(binary_data))
result.append(placeholder)
placeholder_count += 1
i = j + 2
break
elif text[j + 1] == DLE:
binary_data.append(DLE)
j += 2
continue
binary_data.append(text[j])
j += 1
else:
result.append(text[i])
i += 1
else:
result.append(text[i])
i += 1
return ''.join(result), binaries
def _restore_binaries(value: str, binaries: Dict[str, str]) -> str:
for placeholder, data in binaries.items():
value = value.replace(placeholder, data)
return value
def _split_respecting_nesting(text: str, sep: str) -> List[str]:
parts = []
current = []
depth = 0
i = 0
while i < len(text):
if text[i] == SO:
depth += 1
current.append(text[i])
elif text[i] == SI:
depth -= 1
current.append(text[i])
elif text[i] == sep and depth == 0:
parts.append(''.join(current))
current = []
else:
current.append(text[i])
i += 1
if current:
parts.append(''.join(current))
return parts
def _parse_value(value: str, binaries: Dict[str, str]) -> HsvValue:
value = _restore_binaries(value, binaries)
if value.startswith(SO) and value.endswith(SI):
inner = value[1:-1]
return _parse_object(inner, binaries)
if GS in value:
parts = _split_respecting_nesting(value, GS)
return [_parse_value(p, binaries) for p in parts]
return value
def _parse_object(content: str, binaries: Dict[str, str]) -> HsvObject:
obj = {}
props = _split_respecting_nesting(content, RS)
for prop in props:
parts = _split_respecting_nesting(prop, US)
if len(parts) >= 2:
k = parts[0]
v = US.join(parts[1:]) obj[k] = _parse_value(v, binaries)
return obj
def parse(text: str) -> HsvDocument:
text, binaries = _extract_binary_sections(text)
result: HsvDocument = {
'header': None,
'records': []
}
i = 0
while i < len(text):
if text[i] == SOH:
stx_pos = text.find(STX, i + 1)
if stx_pos == -1:
i += 1
continue
header_content = text[i + 1:stx_pos]
result['header'] = _parse_object(header_content, binaries)
etx_pos = text.find(ETX, stx_pos + 1)
if etx_pos == -1:
i = stx_pos + 1
continue
data_content = text[stx_pos + 1:etx_pos]
for record in _split_respecting_nesting(data_content, FS):
obj = _parse_object(record, binaries)
if obj:
result['records'].append(obj)
i = etx_pos + 1
elif text[i] == STX:
etx_pos = text.find(ETX, i + 1)
if etx_pos == -1:
i += 1
continue
data_content = text[i + 1:etx_pos]
for record in _split_respecting_nesting(data_content, FS):
obj = _parse_object(record, binaries)
if obj:
result['records'].append(obj)
i = etx_pos + 1
else:
i += 1
return result
def parse_records(text: str) -> List[HsvObject]:
return parse(text)['records']
def parse_header(text: str) -> Optional[HsvObject]:
return parse(text)['header']
if __name__ == '__main__':
import sys
def test_basic():
test = f"{STX}name{US}Alice{RS}age{US}30{ETX}"
result = parse(test)
assert result['records'] == [{'name': 'Alice', 'age': '30'}]
print("✓ Basic parsing")
def test_multiple_records():
test = f"{STX}name{US}Alice{FS}name{US}Bob{ETX}"
result = parse(test)
assert result['records'] == [{'name': 'Alice'}, {'name': 'Bob'}]
print("✓ Multiple records")
def test_arrays():
test = f"{STX}tags{US}a{GS}b{GS}c{ETX}"
result = parse(test)
assert result['records'] == [{'tags': ['a', 'b', 'c']}]
print("✓ Array values")
def test_header():
test = f"{SOH}hsv{US}1.0{RS}type{US}users{STX}name{US}Alice{ETX}"
result = parse(test)
assert result['header'] == {'hsv': '1.0', 'type': 'users'}
assert result['records'] == [{'name': 'Alice'}]
print("✓ SOH header")
def test_nesting():
test = f"{STX}user{US}{SO}name{US}Alice{RS}email{US}a@b.com{SI}{ETX}"
result = parse(test)
assert result['records'] == [{'user': {'name': 'Alice', 'email': 'a@b.com'}}]
print("✓ SO/SI nesting")
def test_deep_nesting():
test = f"{STX}data{US}{SO}level1{US}{SO}level2{US}deep{SI}{SI}{ETX}"
result = parse(test)
assert result['records'] == [{'data': {'level1': {'level2': 'deep'}}}]
print("✓ Deep nesting")
def test_binary_mode():
binary_data = f"raw{STX}data{ETX}here"
test = f"{STX}type{US}image{RS}data{US}{DLE}{STX}{binary_data}{DLE}{ETX}{ETX}"
result = parse(test)
assert result['records'][0]['type'] == 'image'
assert result['records'][0]['data'] == binary_data
print("✓ DLE binary mode")
def test_binary_with_dle():
binary_data = f"has{DLE}dle"
escaped = binary_data.replace(DLE, DLE + DLE)
test = f"{STX}data{US}{DLE}{STX}{escaped}{DLE}{ETX}{ETX}"
result = parse(test)
assert result['records'][0]['data'] == binary_data
print("✓ DLE escaping")
def test_newlines():
test = f"{STX}text{US}line1\nline2\nline3{ETX}"
result = parse(test)
assert result['records'] == [{'text': 'line1\nline2\nline3'}]
print("✓ Newlines in values")
def test_quotes():
test = f'{STX}msg{US}He said "hello"{ETX}'
result = parse(test)
assert result['records'] == [{'msg': 'He said "hello"'}]
print("✓ Quotes (no escaping)")
def test_mixed_content():
test = f"ignored{STX}name{US}Alice{ETX}also ignored"
result = parse(test)
assert result['records'] == [{'name': 'Alice'}]
print("✓ Mixed content (ignores outside STX...ETX)")
def test_multiple_blocks():
test = f"{STX}a{US}1{ETX}junk{STX}b{US}2{ETX}"
result = parse(test)
assert result['records'] == [{'a': '1'}, {'b': '2'}]
print("✓ Multiple blocks")
def test_nested_array():
test = f"{STX}user{US}{SO}name{US}Alice{RS}tags{US}admin{GS}user{SI}{ETX}"
result = parse(test)
assert result['records'] == [{'user': {'name': 'Alice', 'tags': ['admin', 'user']}}]
print("✓ Nested structure with array")
def test_complex():
test = (
f"{SOH}hsv{US}1.0{RS}type{US}complex{STX}"
f"user{US}{SO}name{US}Alice{RS}tags{US}admin{GS}active{SI}{FS}"
f"user{US}{SO}name{US}Bob{RS}tags{US}user{SI}"
f"{ETX}"
)
result = parse(test)
assert result['header'] == {'hsv': '1.0', 'type': 'complex'}
assert len(result['records']) == 2
assert result['records'][0]['user']['name'] == 'Alice'
assert result['records'][0]['user']['tags'] == ['admin', 'active']
assert result['records'][1]['user']['name'] == 'Bob'
print("✓ Complex combination")
print("=" * 50)
print("HSV Parser Tests (Python)")
print("=" * 50)
test_basic()
test_multiple_records()
test_arrays()
test_header()
test_nesting()
test_deep_nesting()
test_binary_mode()
test_binary_with_dle()
test_newlines()
test_quotes()
test_mixed_content()
test_multiple_blocks()
test_nested_array()
test_complex()
print("=" * 50)
print("All tests passed!")
print("=" * 50)