Source: chain.js

/**
 * Immutable tuple.
 * @typedef {external:tuple} Tuple
 */
const { tuple } = require('immutable-tuple')

const { internal, randomElement } = require('./util')

/**
 * Token to tag a complex JSON value.
 * @constant
 * @type {Symbol}
 */
const OBJECT = Symbol('@@OBJECT')

/**
 * Token to represent the start of runs.
 * @constant
 * @type {Symbol}
 */
const BEGIN = Symbol('@@BEGIN')

/**
 * Token to represent the end of runs.
 * @constant
 * @type {Symbol}
 */
const END = Symbol('@@END')

/**
 * A time-homogeneous Markov chain with optional memory.
 */
class Chain {
  /**
   * @param {object} [options={}] Options object
   * @param {Array<Array<any>>} [options.corpus=[]] Sample runs of the process
   * @param {number} [options.order=0] Size of the chain's memory
   * @param {boolean} [options.useTokenMap=false] Whether to map token to states
   * @param {Map<Tuple<any>,any>} [options.model] Prebuilt state space
   */
  constructor ({ corpus = [], order = 0, useTokenMap = false, model } = {}) {
    if (model) {
      internal(this).model = model
      internal(this).order = (model.keys().next()).value.length - 1
    } else {
      internal(this).order = order
      internal(this).model = buildModel(corpus, this)
    }

    if (useTokenMap && this.order > 0) {
      internal(this).tokenMap = buildTokenMap(this.model)
    }
  }

  /**
   * Order of chain.
   * @readonly
   * @type {number}
   */
  get order () {
    return internal(this).order
  }

  /**
   * Map of chain states.
   * @readonly
   * @type {Map<Tuple<any>,any>}
   */
  get model () {
    return internal(this).model
  }

  /**
   * Initial state with BEGIN tokens.
   * @readonly
   * @type {Tuple<any>}
   */
  get initialState () {
    if (!internal(this).initialState) {
      internal(this).initialState = getInitialState(this.order)
    }
    return internal(this).initialState
  }

  /**
   * Map of token to state.
   * @readonly
   * @type {Map<any,Tuple<any>>}
   */
  get tokenMap () {
    return internal(this).tokenMap
  }

  /**
   * Updates a model from a single run.
   *
   * @static
   * @param {Array<any>} run Array of tokens
   * @param {object} [chain={}] Chain object
   * @param {Map<Tuple<any>,any>} chain.model Model to update
   * @param {Tuple<any>} chain.initialState Starting tuple
   * @param {number} chain.order Order of chain
   * @param {Map<any,Tuple<any>>} [chain.tokenMap] Map of token to states
   */
  static seed (run, { model, tokenMap, initialState, order } = {}) {
    const items = [...initialState, ...run, END]

    for (let i = 0; i < run.length + 1; ++i) {
      const state = tuple(...items.slice(i, i + 1 + order))
      const next = items[i + 1 + order]
      const prev = items[i - 1] || BEGIN

      if (!model.has(state)) {
        model.set(state, [weightMap(), weightMap()])
      }

      const stateMaps = model.get(state)
      const nextCount = stateMaps[0].get(next) || 0
      const prevCount = stateMaps[1].get(prev) || 0

      stateMaps[0].set(next, nextCount + 1)
      stateMaps[1].set(prev, prevCount + 1)

      if (tokenMap) {
        for (const token of state) {
          if (!tokenMap.has(token)) {
            tokenMap.set(token, new Set())
          }
          const entry = tokenMap.get(token)
          entry.add(state)
        }
      }
    }
  }

  /**
   * Randomly chooses a new step from a given state.
   *
   * @private
   * @param {Tuple<any>} fromState The state to move from
   * @param {boolean} [forward] Movement direction
   * @returns {any} A possible next step of the chain
   */
  _step (fromState, forward = true) {
    const index = forward ? 0 : 1
    const failToken = forward ? END : BEGIN
    const stateArr = this.model.get(fromState)

    if (!stateArr) {
      return failToken
    }

    const choices = [...stateArr[index].keys()]
    const weights = [...stateArr[index].values()]

    return randomElement(choices, weights)
  }

  /**
   * Generates successive states until it finds a stop token.
   *
   * @private
   * @param {Tuple<any>} [fromState] Initial state
   * @param {boolean} [forward=true] Movement direction
   * @yield {any} Next step on the chain
   */
  * _walk (fromState, forward = true) {
    const stopToken = forward ? END : BEGIN
    let state = fromState || this.initialState

    while (true) {
      let step = this._step(state, forward)

      if (step === stopToken) {
        break
      }

      if (isObjectToken(step)) {
        try {
          step = JSON.parse(step[1])
        } catch (_) {}
      }

      yield step

      if (forward) {
        state = tuple(...state.slice(1), step)
      } else {
        state = tuple(step, ...state.slice(0, state.length - 1))
      }
    }
  }

  /**
   * Generates successive states until the chain reaches an END.
   *
   * @param {Tuple<any>} [fromState] Begin state of the chain walk
   * @yield {any} Next succeding step of the chain
   */
  * walkForward (fromState) {
    yield * this._walk(fromState)
  }

  /**
   * Generates successive states until the chain reaches a BEGIN.
   *
   * @param {Tuple<any>} [fromState] Starting state of the chain walk
   * @yield {any} Next preceeding step of the chain
   */
  * walkBackward (fromState) {
    yield * this._walk(fromState, false)
  }

  /**
   * Generates a state tuple from an array of tokens.
   *
   * @private
   * @param {Array<any>} [tokens=[]] Input tokens
   * @param {boolean} [useTokenMap=false] Whether to use token map
   * @returns {Tuple<any>} State tuple
   */
  _genStateFrom (tokens = [], useTokenMap = false) {
    const { order, initialState, model } = this
    const run = [...tokens]
    const tuples = []
    const items = [...initialState, ...run, END]

    for (let i = 0; i < run.length + 1; ++i) {
      tuples.push(tuple(...items.slice(i, i + 1 + order)))
    }

    const starts = tuples.slice(1)
      .filter((t) => model.has(t))

    let result = randomElement(starts)

    if (!result && useTokenMap && tokens.length > 0 && this.tokenMap) {
      const choices = tokens.filter((t) => this.tokenMap.has(t))
      if (choices.length > 0) {
        const token = randomElement(choices)
        const possibleStates = [...this.tokenMap.get(token)]
        result = randomElement(possibleStates)
      }
    }

    return result || initialState
  }

  /**
   * Walks the Markov chain and returns all steps.
   *
   * The returned step array will look like:
   * ```javascript
   * [ [backward_steps], [starting_tokens], [forward_steps] ]
   * ```
   *
   * The starting tokens are only returned when forward or backward steps were
   * actually generated from a subset of the {options.tokens} parameter.
   *
   * @param {object} [options] Options object
   * @param {Array<any>} [options.tokens=[]] Starting state tokens
   * @param {boolean} [options.backSearch=true] Should walk back
   * @param {boolean} [options.useTokenMap=true] Whether to use token map
   * @param {boolean} [options.runMissingTokens=true] Whether to answer when tokens are not in model
   * @returns {Array<Array<any>>} Array with back root and forward steps
   */
  run ({ tokens = [], backSearch = true, useTokenMap = true, runMissingTokens = true } = {}) {
    const startState = this._genStateFrom(tokens, useTokenMap)

    let hasSteps = startState !== this.initialState
    if (!runMissingTokens && tokens.length > 0 && !hasSteps) {
      return [[], [], []]
    }

    const forwardSteps = [...this.walkForward(startState)]
    hasSteps = hasSteps || forwardSteps.length > 0

    let backSteps = []
    if (backSearch) {
      backSteps = [...this.walkBackward(startState)].reverse()
      hasSteps = hasSteps || backSteps.length > 0
    }

    return [
      backSteps,
      hasSteps ? [...startState].filter((t) => t !== BEGIN) : [],
      forwardSteps
    ]
  }

  /**
   * Serialises the chain into a JSONable array.
   *
   * The returned array will look like:
   * ```javascript
   * [ [ [state], [ [next, count], ...], [ [prev, count], ...] ], ...]
   * ```
   *
   * @returns {Array<any>} JSON array
   * @see {@link https://mdn.io/stringify#toJSON()_behavior}
   */
  toJSON () {
    const serialised = []

    for (const [state, [next, prev]] of this.model) {
      const stateArr = [...state].map(stringifyToken)

      const nextArr = [...next].map(([value, count]) => {
        return [stringifyToken(value), count]
      })

      const prevArr = [...prev].map(([value, count]) => {
        return [stringifyToken(value), count]
      })

      serialised.push([stateArr, [nextArr, prevArr]])
    }

    return serialised
  }

  /**
   * Creates a Chain from a JSON string.
   *
   * @static
   * @param {string} jsonChain A chain serialised with `Chain.toJSON`
   * @param {object} options Additional options to Chain constructor
   * @returns {Chain} A new chain instance
   */
  static fromJSON (jsonChain, options) {
    let order

    const states = JSON.parse(jsonChain).map(
      ([state, [nextList, prevList]], index) => {
        const curStateSize = state.length

        if (index === 0) {
          order = curStateSize - 1
        } else if (curStateSize !== order + 1) {
          throw new Error('Inconsistent Markov chain order. ' +
            `Expected ${order} but got ${curStateSize - 1} (${state}).`)
        }

        const stateTuple = state.map(parseTokenString)

        const nextMap = weightMap()
        for (const [key, count] of nextList) {
          nextMap.set(parseTokenString(key), count)
        }

        const prevMap = weightMap()
        for (const [key, count] of prevList) {
          prevMap.set(parseTokenString(key), count)
        }

        return [tuple(...stateTuple), [nextMap, prevMap]]
      })

    const model = stateSpace()

    for (const [state, follow] of states) {
      model.set(state, follow)
    }

    return new Chain({ ...options, model })
  }
}

/**
 * Generates a initial state for a chain of the given order.
 * @param {number} order Order of chain
 * @returns {Tuple<any>} Initial state
 */
function getInitialState (order) {
  return tuple(...Array(1 + order).fill(BEGIN))
}

/**
 * Tests if token is complex object.
 * @param {any} token
 * @returns {boolean}
 */
function isObjectToken (token) {
  return tuple.isTuple(token) && token[0] === OBJECT
}

const symbolRegex = /^Symbol\((@@BEGIN|@@END)\)$/
const objectRegex = /^Object\((.*)\)$/

/**
 * Transforms token type to proper token string.
 * @param {any} token
 * @returns {string} String token
 */
function stringifyToken (token) {
  if (token === BEGIN) return BEGIN.toString()
  if (token === END) return END.toString()
  if (isObjectToken(token)) return `Object(${token[1]})`
  if (typeof token === 'string' && (symbolRegex.test(token) || objectRegex.test(token))) {
    return `"${token}"`
  }
  return token
}

/**
 * Transforms token string to proper token type.
 * @param {string} string
 * @returns {any} Chain token
 */
function parseTokenString (string) {
  if (string === BEGIN.toString()) return BEGIN
  if (string === END.toString()) return END
  let res
  if ((res = objectRegex.exec(string))) return JSON.parse(res[1])
  return string
}

/**
 * Builds a Markov chain model.
 * @param {Array<Array<any>>} corpus Corpus to build the model from
 * @param {object} [options={}] Options object
 * @param {number} [options.order] Order of chain
 * @param {Tuple<any>} [options.initialState] Initial state of the chain
 * @returns {Map<Tuple<any>,any>} Markov chain model
 */
function buildModel (corpus, { order, initialState } = {}) {
  const model = stateSpace()

  if (order < 0) {
    throw new Error('Invalid Markov chain order. ' +
    `Expected \`order >= 0\` but got ${order}.`)
  }

  if (!initialState || initialState.length - 1 !== order) {
    initialState = getInitialState(order)
  }

  for (const run of corpus) {
    Chain.seed(run, { model, initialState, order })
  }

  return model
}

/**
 * Builds a Map of token to states.
 * @param {Map<Tuple<any>,any>} model Markov chain model
 * @returns {Map<any,Tuple<any>>} Token map
 */
function buildTokenMap (model) {
  const tokenMap = new Map()

  for (const state of model.keys()) {
    for (const token of state) {
      if (!tokenMap.has(token)) {
        tokenMap.set(token, new Set())
      }
      const entry = tokenMap.get(token)
      entry.add(state)
    }
  }

  return tokenMap
}

const util = require('util')
const inspect = Symbol.for('nodejs.util.inspect.custom')

/**
 * Creates a proxy map handler.
 * @param {Function} keyFn Function to make keys
 * @returns {object} Proxy handler
 */
function mapHandler (keyFn) {
  const ops = ['set', 'get', 'has']

  const handler = {
    get (target, propertyKey) {
      let property = Reflect.get(target, propertyKey)
      if (typeof property === 'function') {
        if (ops.includes(propertyKey)) {
          property = trap.bind(target, property)
        } else {
          property = property.bind(target)
        }
      } else if (propertyKey === inspect) {
        property = inspectUtil.bind(target)
      }
      return property
    }
  }

  /**
   * Trap for map get, set and has.
   */
  function trap (operation, ...args) {
    args[0] = keyFn(args[0])
    return Reflect.apply(operation, this, args)
  }
  /**
   * Node utility.
   */
  function inspectUtil () {
    return util.inspect(this, false, null)
  }

  return handler
}

/**
 * Proxy for a state space Map.
 * @param {Map<Tuple<any>,any>} [map=new Map()] Regular Map
 * @returns {Proxy}
 */
function stateSpace (map = new Map()) {
  return new Proxy(map, mapHandler(function (key) {
    if (!tuple.isTuple(key)) {
      throw new Error('Invalid Markov chain state. ' +
      `Expected tuple but got ${typeof key} (${key || key.toString()}).`)
    }
    return key.map((v) => {
      if (typeof v === 'object') v = makeObjectToken(v)
      return v
    })
  }))
}

/**
 * Proxy for a weight Map.
 * @param {Map<any,Number>} [map=new Map()] Regular Map
 * @returns {Proxy}
 */
function weightMap (map = new Map()) {
  return new Proxy(map, mapHandler(function (key) {
    if (typeof key === 'object') {
      return makeObjectToken(key)
    }
    return key
  }))
}

/**
 * Creates a tuple to represent complex JSON data.
 * @param {any} data
 * @returns {Tuple<Symbol,any>} Complex data token
 */
function makeObjectToken (data) {
  const value = typeof data === 'string' ? data : JSON.stringify(data)
  return tuple(OBJECT, value)
}

module.exports = Chain