Parser.fan

//
// Copyright (c) 2024, Brian Frank and Andy Frank
// Licensed under the Academic Free License version 3.0
//
// History:
//   08 Oct 2024  Matthew Giannini  Creation
//

**
** Parse input text into a tree of nodes.
**
** The parser is thread-safe meaning the same parser can be
** shared by multiple actors.
**
@Js
const class Parser
{

//////////////////////////////////////////////////////////////////////////
// Constructor
//////////////////////////////////////////////////////////////////////////

  ** Obtain a builder for configuring the parser
  static ParserBuilder builder() { ParserBuilder() }

  ** Get a parser with all the default configuration
  static new make() { builder.build }

  internal new makeBuilder(ParserBuilder builder)
  {
    this.blockParserFactories =
      DocumentParser.calculateBlockParserFactories(
        builder.blockParserFactories,
        builder.enabledBlockTypes)
    this.inlineContentParserFactories = builder.inlineContentParserFactories
    this.includeSourceSpans = builder.includeSourceSpans
    this.delimiterProcessors = builder.delimiterProcessors
    this.linkProcessors = builder.linkProcessors
    this.linkMarkers = builder.linkMarkers
    this.postProcessorFactories = builder.postProcessorFactories

    // try to make an inline parser. invalid configuration might result in
    // an error, which we want to detect as soon as possible
    cx := InlineParserContext(this, Definitions())
    inline := DefaultInlineParser(cx)
  }

  internal const BlockParserFactory[] blockParserFactories
  internal const InlineContentParserFactory[] inlineContentParserFactories
  internal const IncludeSourceSpans includeSourceSpans
  internal const DelimiterProcessor[] delimiterProcessors
  internal const LinkProcessor[] linkProcessors
  internal const Int[] linkMarkers
  internal const |->PostProcessor|[] postProcessorFactories

//////////////////////////////////////////////////////////////////////////
// Parse
//////////////////////////////////////////////////////////////////////////

  ** Convenience for 'parseStream(text.in)'
  Node parse(Str text) { parseStream(text.in) }

  ** Parse the contents of the input stream into a tree of nodes.
  **
  ** pre>
  ** doc := Parser().parse("Hello *Markdown*!")
  ** <pre
  **
  Node parseStream(InStream in)
  {
    docParser := createDocumentParser
    doc :=  docParser.parse(in)
    return postProcess(doc)
  }

  private DocumentParser createDocumentParser()
  {
    DocumentParser(this)
  }

  private Node postProcess(Node doc)
  {
    postProcessorFactories.each |factory| { doc = factory().process(doc) }
    return doc
  }
}

**************************************************************************
** ParserBuilder
**************************************************************************

**
** Builder for customizing the behavior of the common mark parser
**
@Js
final class ParserBuilder
{
  internal new make() { }

  ** Get the configured `Parser`
  Parser build() { Parser(this) }

  internal IncludeSourceSpans includeSourceSpans := IncludeSourceSpans.none
  internal BlockParserFactory[] blockParserFactories := [,]
  internal InlineContentParserFactory[] inlineContentParserFactories := [,]
  internal Type[] enabledBlockTypes := DocumentParser.core_block_types
  internal DelimiterProcessor[] delimiterProcessors := [,]
  internal LinkProcessor[] linkProcessors := [,]
  internal Int[] linkMarkers := [,]
  internal |->PostProcessor|[] postProcessorFactories := [,]

  ** Describe the list of markdown features the parser will recognize and parse.
  **
  ** By default, we will recognize and parse the following set of "block" elements:
  **
  ** - `Heading` ('#')
  ** - `HtmlBlock` ('<html></html>')
  ** - `ThematicBreak` (Horizontal Rule) ('---')
  ** - `FencedCode` ('```')
  ** - `IndentedCode`
  ** - `BlockQuote` ('>')
  ** - `ListBlock` (Ordered/Unordered List) ('1. / *')
  **
  ** To parse only a subset of the features listed above, pass a lsit of each feature's
  ** associated `Block` type.
  **
  ** Example: to parse only headings and lists:
  **
  **   Parser.builder.withEnabledBlockTypes([Heading#, ListBlock#])
  **
  This withEnabledBlockTypes(Type[] enabledBlockTypes)
  {
    DocumentParser.checkEnabledBlockTypes(enabledBlockTypes)
    this.enabledBlockTypes = enabledBlockTypes
    return this
  }

  ** Add a custom block parser factory.
  **
  ** Note that custom factories are applied *before* the built-in factories. This is
  ** so that extensions can change how some syntax is parsed that would otherwise be
  ** handled by built-in factories.
  This customBlockParserFactory(BlockParserFactory factory)
  {
    blockParserFactories.add(factory)
    return this
  }

  ** Add a factory for a custom inline content parser, for extending inline parsing
  ** or overriding built-in parsing.
  **
  ** Note that parsers are triggered based on a special character as specified by
  ** `InlineContentParserFactory.triggerChars`. It is possible to register multiple
  ** parsers for the same character, or even for some built-in special character
  ** such as '`'. The custom parsers are tried first in the order in which they are
  ** registered, and then the built-in ones.
  This customInlineContentParserFactory(InlineContentParserFactory factory)
  {
    inlineContentParserFactories.add(factory)
    return this
  }

  This postProcessorFactory(|->PostProcessor| factory)
  {
    postProcessorFactories.add(factory)
    return this
  }

  ** TODO: this feature not fully implemented yet
  @NoDoc This withIncludeSourceSpans(IncludeSourceSpans val)
  {
    this.includeSourceSpans = val
    return this
  }

  This customDelimiterProcessor(DelimiterProcessor delimiterProcessor)
  {
    delimiterProcessors.add(delimiterProcessor)
    return this
  }

  ** Add a custom link/image processor for inline parsing.
  **
  ** Multiple link processors can be added, and will be tried in the order in which they
  ** were added. If no processor applies, the normal behavior applies. That means these
  ** can override built-in link parsing.
  This linkProcessor(LinkProcessor linkProcessor)
  {
    linkProcessors.add(linkProcessor)
    return this
  }

  ** Add a custom link marker for link processing. A link marker is a character like
  ** '!' which, if it appears before the '[' of a link, changes the meaning of the link.
  **
  ** If a link marker followed by a valid link is parsed, the `LinkInfo` that is passed
  ** to the `LinkProcessor` will have its `LinkInfo.marker` set. A link processor
  ** should check the `Text.literal` and then do any processing, and will probably
  ** want to use `LinkResult.includeMarker`.
  This linkMarker(Int marker)
  {
    linkMarkers = linkMarkers.add(marker).unique
    return this
  }

  ** Configure the given extensions on this parser.
  This extensions(MarkdownExt[] exts)
  {
    exts.each |ext| { ext.extendParser(this) }
    return this
  }
}