Module sbx.ui.mdlexer

Custom markdown lexer

Expand source code
"""
Custom markdown lexer
"""
import re

from pygments.lexer import (
    RegexLexer,
    bygroups,
    do_insertions,
    include,
    this,
    using,
)
from pygments.token import Generic, Keyword, Name, Number, String, Text
from pygments.util import ClassNotFound, get_bool_opt

# This lexer is based on Pygments source code, we have done some changes here

BoldText = Number
HeadingText = Name.Label


class CustomMarkdownLexer(RegexLexer):
    """
    Customized markdown lexer for SBX
    This is based on Pygments source
    """

    name = "markdown"
    aliases = ["md"]
    filenames = ["*.md"]
    mimetypes = ["text/x-markdown"]
    flags = re.MULTILINE

    def _handle_codeblock(self, match):
        """
        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
        """
        from pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String, match.group(1)
        yield match.start(2), String, match.group(2)
        yield match.start(3), Text, match.group(3)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(2).strip())
            except ClassNotFound:
                pass
        code = match.group(4)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(4), String, code
        else:
            for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
                yield item

        yield match.start(5), String, match.group(5)

    tokens = {
        "root": [
            # heading with pound prefix
            (r"^(#)([^#].+\n)", bygroups(Generic.Heading, HeadingText)),
            (r"^(#{2,6})(.+\n)", bygroups(Generic.Subheading, HeadingText)),
            # task list
            (
                r"^(\s*)([*-] )(\[[ xX]\])( .+\n)",
                bygroups(Text, Keyword, Keyword, using(this, state="inline")),
            ),
            # bulleted lists
            (
                r"^(\s*)([*-])(\s)(.+\n)",
                bygroups(Text, Keyword, Text, using(this, state="inline")),
            ),
            # numbered lists
            (
                r"^(\s*)([0-9]+\.)( .+\n)",
                bygroups(Text, Number, using(this, state="inline")),
            ),
            # quote
            (r"^(\s*>\s)(.+\n)", bygroups(Keyword, Generic.Emph)),
            # text block
            (r"^(```\n)([\w\W]*?)(^```$)", bygroups(String, Text, String)),
            # code block with language
            (r"^(```)(\w+)(\n)([\w\W]*?)(^```$)", _handle_codeblock),
            include("inline"),
        ],
        "inline": [
            # escape
            (r"\\.", Text),
            # italics
            (
                r"(\s)([*_][^*_]+[*_])(\W|\n)",
                bygroups(Text, Generic.Emph, Text),
            ),
            # bold
            # warning: the following rule eats internal tags.
            #    eg. **foo _bar_ baz** bar is not italics
            (
                r"(\s)((\*\*|__).*\3)((?=\W|\n))",
                bygroups(Text, BoldText, None, Text),
            ),
            # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))',
            #    bygroups(Text, BoldText, Text)),
            # strikethrough
            (
                r"(\s)(~~[^~]+~~)((?=\W|\n))",
                bygroups(Text, Generic.Error, Text),
            ),
            # inline code
            (r"`[^`]+`", String.Backtick),
            # mentions and topics (twitter and github stuff)
            (r"[@#][\w/:]+", Name.Entity),
            # (image?) links eg: ![name](https://example.com/example.png)
            (
                r"(!?\[)([^]]+)(\])(\()([^)]+)(\))",
                bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text),
            ),
            # reference-style links, e.g.:
            #   [an example][id]
            #   [id]: http://example.com/
            (
                r"(\[)([^]]+)(\])(\[)([^]]*)(\])",
                bygroups(Text, Name.Tag, Text, Text, Name.Label, Text),
            ),
            (
                r"^(\s*\[)([^]]*)(\]:\s*)(.+)",
                bygroups(Text, Name.Label, Text, Name.Attribute),
            ),
            # general text, must come last!
            (r"[^\\\s]+", Text),
            (r".", Text),
        ],
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, "handlecodeblocks", True)
        RegexLexer.__init__(self, **options)

Classes

class CustomMarkdownLexer (**options)

Customized markdown lexer for SBX This is based on Pygments source

Expand source code
class CustomMarkdownLexer(RegexLexer):
    """
    Customized markdown lexer for SBX
    This is based on Pygments source
    """

    name = "markdown"
    aliases = ["md"]
    filenames = ["*.md"]
    mimetypes = ["text/x-markdown"]
    flags = re.MULTILINE

    def _handle_codeblock(self, match):
        """
        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
        """
        from pygments.lexers import get_lexer_by_name

        # section header
        yield match.start(1), String, match.group(1)
        yield match.start(2), String, match.group(2)
        yield match.start(3), Text, match.group(3)

        # lookup lexer if wanted and existing
        lexer = None
        if self.handlecodeblocks:
            try:
                lexer = get_lexer_by_name(match.group(2).strip())
            except ClassNotFound:
                pass
        code = match.group(4)

        # no lexer for this language. handle it like it was a code block
        if lexer is None:
            yield match.start(4), String, code
        else:
            for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
                yield item

        yield match.start(5), String, match.group(5)

    tokens = {
        "root": [
            # heading with pound prefix
            (r"^(#)([^#].+\n)", bygroups(Generic.Heading, HeadingText)),
            (r"^(#{2,6})(.+\n)", bygroups(Generic.Subheading, HeadingText)),
            # task list
            (
                r"^(\s*)([*-] )(\[[ xX]\])( .+\n)",
                bygroups(Text, Keyword, Keyword, using(this, state="inline")),
            ),
            # bulleted lists
            (
                r"^(\s*)([*-])(\s)(.+\n)",
                bygroups(Text, Keyword, Text, using(this, state="inline")),
            ),
            # numbered lists
            (
                r"^(\s*)([0-9]+\.)( .+\n)",
                bygroups(Text, Number, using(this, state="inline")),
            ),
            # quote
            (r"^(\s*>\s)(.+\n)", bygroups(Keyword, Generic.Emph)),
            # text block
            (r"^(```\n)([\w\W]*?)(^```$)", bygroups(String, Text, String)),
            # code block with language
            (r"^(```)(\w+)(\n)([\w\W]*?)(^```$)", _handle_codeblock),
            include("inline"),
        ],
        "inline": [
            # escape
            (r"\\.", Text),
            # italics
            (
                r"(\s)([*_][^*_]+[*_])(\W|\n)",
                bygroups(Text, Generic.Emph, Text),
            ),
            # bold
            # warning: the following rule eats internal tags.
            #    eg. **foo _bar_ baz** bar is not italics
            (
                r"(\s)((\*\*|__).*\3)((?=\W|\n))",
                bygroups(Text, BoldText, None, Text),
            ),
            # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))',
            #    bygroups(Text, BoldText, Text)),
            # strikethrough
            (
                r"(\s)(~~[^~]+~~)((?=\W|\n))",
                bygroups(Text, Generic.Error, Text),
            ),
            # inline code
            (r"`[^`]+`", String.Backtick),
            # mentions and topics (twitter and github stuff)
            (r"[@#][\w/:]+", Name.Entity),
            # (image?) links eg: ![name](https://example.com/example.png)
            (
                r"(!?\[)([^]]+)(\])(\()([^)]+)(\))",
                bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text),
            ),
            # reference-style links, e.g.:
            #   [an example][id]
            #   [id]: http://example.com/
            (
                r"(\[)([^]]+)(\])(\[)([^]]*)(\])",
                bygroups(Text, Name.Tag, Text, Text, Name.Label, Text),
            ),
            (
                r"^(\s*\[)([^]]*)(\]:\s*)(.+)",
                bygroups(Text, Name.Label, Text, Name.Attribute),
            ),
            # general text, must come last!
            (r"[^\\\s]+", Text),
            (r".", Text),
        ],
    }

    def __init__(self, **options):
        self.handlecodeblocks = get_bool_opt(options, "handlecodeblocks", True)
        RegexLexer.__init__(self, **options)

Ancestors

  • pygments.lexer.RegexLexer
  • pygments.lexer.Lexer

Class variables

var aliases
var filenames
var flags
var mimetypes
var name
var tokens