「‍」 Lingenic

hsv

(⤓.js ◇.js); γ ≜ [2026-01-27T073049.034, 2026-01-27T073049.034] ∧ |γ| = 1

// Copyright 2026 Danslav Slavenskoj, Lingenic LLC
// License: CC0 1.0 - Public Domain
// https://creativecommons.org/publicdomain/zero/1.0/
// You may use this code for any purpose without attribution.
//
// Spec: https://hsvfile.com
// Repo: https://github.com/LingenicLLC/HSV

/**
 * HSV - Hierarchical Separated Values
 *
 * A text-based file format and streaming protocol using ASCII control characters.
 * Unlimited nesting (like JSON). No escaping required. Binary data supported.
 */

// Control characters
const SOH = '\x01'; // Start of Header
const STX = '\x02'; // Start of Text (data block)
const ETX = '\x03'; // End of Text
const EOT = '\x04'; // End of Transmission
const SO = '\x0e';  // Shift Out (start nested)
const SI = '\x0f';  // Shift In (end nested)
const DLE = '\x10'; // Data Link Escape (binary mode)
const FS = '\x1c';  // File/Record Separator
const GS = '\x1d';  // Group/Array Separator
const RS = '\x1e';  // Record/Property Separator
const US = '\x1f';  // Unit/Key-Value Separator

/**
 * Extract DLE+STX...DLE+ETX binary sections and replace with placeholders
 */
function extractBinarySections(text) {
  let result = '';
  const binaries = new Map();
  let i = 0;
  let placeholderCount = 0;

  while (i < text.length) {
    if (text[i] === DLE && i + 1 < text.length && text[i + 1] === STX) {
      let j = i + 2;
      let binaryData = '';

      while (j < text.length) {
        if (text[j] === DLE && j + 1 < text.length) {
          if (text[j + 1] === ETX) {
            const placeholder = `\0BINARY${placeholderCount}\0`;
            binaries.set(placeholder, unescapeBinary(binaryData));
            result += placeholder;
            placeholderCount++;
            i = j + 2;
            break;
          } else if (text[j + 1] === DLE) {
            binaryData += DLE;
            j += 2;
            continue;
          }
        }
        binaryData += text[j];
        j++;
      }

      if (j >= text.length) {
        result += text[i];
        i++;
      }
    } else {
      result += text[i];
      i++;
    }
  }

  return [result, binaries];
}

function unescapeBinary(data) {
  let result = '';
  let i = 0;

  while (i < data.length) {
    if (data[i] === DLE && i + 1 < data.length && data[i + 1] === DLE) {
      result += DLE;
      i += 2;
    } else {
      result += data[i];
      i++;
    }
  }

  return result;
}

function restoreBinaries(value, binaries) {
  for (const [placeholder, data] of binaries) {
    value = value.split(placeholder).join(data);
  }
  return value;
}

function splitRespectingNesting(text, sep) {
  const parts = [];
  let current = '';
  let depth = 0;

  for (const c of text) {
    if (c === SO) {
      depth++;
      current += c;
    } else if (c === SI) {
      depth--;
      current += c;
    } else if (c === sep && depth === 0) {
      parts.push(current);
      current = '';
    } else {
      current += c;
    }
  }

  if (current || parts.length > 0) {
    parts.push(current);
  }

  return parts;
}

function parseValue(value, binaries) {
  value = restoreBinaries(value, binaries);

  // Check for nested structure (SO at start, SI at end)
  if (value.startsWith(SO) && value.endsWith(SI)) {
    const inner = value.slice(1, -1);
    return parseObject(inner, binaries);
  }

  // Check for array
  if (value.includes(GS)) {
    const parts = splitRespectingNesting(value, GS);
    return parts.map(p => parseValue(p, binaries));
  }

  return value;
}

function parseObject(content, binaries) {
  const obj = {};

  const props = splitRespectingNesting(content, RS);
  for (const prop of props) {
    const parts = splitRespectingNesting(prop, US);
    if (parts.length >= 2) {
      const k = parts[0];
      const v = parts.slice(1).join(US);
      obj[k] = parseValue(v, binaries);
    }
  }

  return obj;
}

/**
 * Parse HSV text into a document
 * @param {string} text - HSV encoded text
 * @returns {{header: Object|null, records: Object[]}}
 */
function parse(text) {
  const [processedText, binaries] = extractBinarySections(text);

  const doc = {
    header: null,
    records: []
  };

  let i = 0;
  while (i < processedText.length) {
    if (processedText[i] === SOH) {
      const stxPos = processedText.indexOf(STX, i + 1);
      if (stxPos === -1) {
        i++;
        continue;
      }

      const headerContent = processedText.slice(i + 1, stxPos);
      doc.header = parseObject(headerContent, binaries);

      const etxPos = processedText.indexOf(ETX, stxPos + 1);
      if (etxPos === -1) {
        i = stxPos + 1;
        continue;
      }

      const dataContent = processedText.slice(stxPos + 1, etxPos);
      for (const record of splitRespectingNesting(dataContent, FS)) {
        const obj = parseObject(record, binaries);
        if (Object.keys(obj).length > 0) {
          doc.records.push(obj);
        }
      }

      i = etxPos + 1;
    } else if (processedText[i] === STX) {
      const etxPos = processedText.indexOf(ETX, i + 1);
      if (etxPos === -1) {
        i++;
        continue;
      }

      const dataContent = processedText.slice(i + 1, etxPos);
      for (const record of splitRespectingNesting(dataContent, FS)) {
        const obj = parseObject(record, binaries);
        if (Object.keys(obj).length > 0) {
          doc.records.push(obj);
        }
      }

      i = etxPos + 1;
    } else {
      i++;
    }
  }

  return doc;
}

// Export for different module systems
if (typeof module !== 'undefined' && module.exports) {
  module.exports = { parse, SOH, STX, ETX, EOT, SO, SI, DLE, FS, GS, RS, US };
}
if (typeof window !== 'undefined') {
  window.HSV = { parse, SOH, STX, ETX, EOT, SO, SI, DLE, FS, GS, RS, US };
}

// Self-test when run directly
if (typeof require !== 'undefined' && require.main === module) {
  console.log('='.repeat(50));
  console.log('HSV Parser Tests (JavaScript)');
  console.log('='.repeat(50));

  let passed = 0;
  let failed = 0;

  function test(name, fn) {
    try {
      fn();
      console.log(`${name}`);
      passed++;
    } catch (e) {
      console.log(`${name}: ${e.message}`);
      failed++;
    }
  }

  function assert(condition, msg) {
    if (!condition) throw new Error(msg || 'Assertion failed');
  }

  function assertEquals(a, b, msg) {
    if (JSON.stringify(a) !== JSON.stringify(b)) {
      throw new Error(msg || `Expected ${JSON.stringify(b)}, got ${JSON.stringify(a)}`);
    }
  }

  test('Basic parsing', () => {
    const result = parse(`${STX}name${US}Alice${RS}age${US}30${ETX}`);
    assertEquals(result.records.length, 1);
    assertEquals(result.records[0].name, 'Alice');
    assertEquals(result.records[0].age, '30');
  });

  test('Multiple records', () => {
    const result = parse(`${STX}name${US}Alice${FS}name${US}Bob${ETX}`);
    assertEquals(result.records.length, 2);
  });

  test('Array values', () => {
    const result = parse(`${STX}tags${US}a${GS}b${GS}c${ETX}`);
    assertEquals(result.records[0].tags, ['a', 'b', 'c']);
  });

  test('SOH header', () => {
    const result = parse(`${SOH}hsv${US}1.0${RS}type${US}users${STX}name${US}Alice${ETX}`);
    assertEquals(result.header.hsv, '1.0');
    assertEquals(result.header.type, 'users');
    assertEquals(result.records.length, 1);
  });

  test('SO/SI nesting', () => {
    const result = parse(`${STX}user${US}${SO}name${US}Alice${RS}email${US}a@b.com${SI}${ETX}`);
    assertEquals(result.records[0].user.name, 'Alice');
    assertEquals(result.records[0].user.email, 'a@b.com');
  });

  test('Deep nesting', () => {
    const result = parse(`${STX}data${US}${SO}level1${US}${SO}level2${US}deep${SI}${SI}${ETX}`);
    assertEquals(result.records[0].data.level1.level2, 'deep');
  });

  test('DLE binary mode', () => {
    const binaryData = `raw${STX}data${ETX}here`;
    const result = parse(`${STX}type${US}image${RS}data${US}${DLE}${STX}${binaryData}${DLE}${ETX}${ETX}`);
    assertEquals(result.records[0].type, 'image');
    assertEquals(result.records[0].data, binaryData);
  });

  test('Newlines in values', () => {
    const result = parse(`${STX}text${US}line1\nline2\nline3${ETX}`);
    assertEquals(result.records[0].text, 'line1\nline2\nline3');
  });

  test('Quotes (no escaping)', () => {
    const result = parse(`${STX}msg${US}He said "hello"${ETX}`);
    assertEquals(result.records[0].msg, 'He said "hello"');
  });

  test('Mixed content', () => {
    const result = parse(`ignored${STX}name${US}Alice${ETX}also ignored`);
    assertEquals(result.records.length, 1);
    assertEquals(result.records[0].name, 'Alice');
  });

  test('Multiple blocks', () => {
    const result = parse(`${STX}a${US}1${ETX}junk${STX}b${US}2${ETX}`);
    assertEquals(result.records.length, 2);
  });

  test('Nested structure with array', () => {
    const result = parse(`${STX}user${US}${SO}name${US}Alice${RS}tags${US}admin${GS}user${SI}${ETX}`);
    assertEquals(result.records[0].user.name, 'Alice');
    assertEquals(result.records[0].user.tags, ['admin', 'user']);
  });

  console.log('='.repeat(50));
  console.log(`${passed} passed, ${failed} failed`);
  console.log('='.repeat(50));

  process.exit(failed > 0 ? 1 : 0);
}