Syntax highlighter for C#
April 07, 2010
This is small class for color coding, syntax highlighting, pretty printing, prettifying (any of the above) C# source code. It produces HTML with span tags and a css class. It’s not perfect and undoubtedly has bugs in it, but on the whole works. It was written in a lunchhour and given some spit and polish at night - any suggestions or bugs you find are welcomed.
The online demo is no longer available, however there is an even better version at Manoli.net
Here’s an overview:
- It’s not a beautifully engineered state machine and does not work for any language or geared for re-use. It has some re-useability through limited inheritence but is mostly aimed at one feature: producing HTML source code of prettified C#.
- If you want a BNF/state machine based syntax highlighter for ASP.NET the free Actipro CodeHighlighter is a good choice, and uses ANTLR for its parsing.
- Uses 4 CSS classes which you can configure through properties: keyword, comment, type, quote
- Has an option to insert the default Visual Studio colours as a stylesheet. This is true by default.
- Has the ability to wrap your source in pre tags
- Has the ability to add keywords via a string List. Comes with a set of default C# ones (and the list is probably missing some keywords)
- It could quite easily be modified for Java, C and C++.
Bare in mind it’s not perfect, and can occasionally get confused. On the whole though I’ve found it highlights fairly reliably.
This is how it works. It’s fairly simple: 2 methods, the HighlightSource does the hard work. It does the following:
- Removes all /**/ comments, inserts a token placeholder, appended with a number. This is done via a regex from website
- Removes all quotes and places token placeholders in their place. This isn’t done with a regex, it probably can be but I found it easier with simple looping through a char array and some basic state flags
- Removes all // comments (including XML documentation ones) and puts a token placeholder in their place.
- Highlights types. This is done in 4 regex passes, gradually replacing small variations. The basic rule is space, capital, alphanumeric,space with variations for generics and new instances.
- Replaces keywords with some basic string.Replace() calls.
- Replaces the token placeholders in 1-3 back, with their CSS.
| /// <summary> | |
| /// A basic implementation of a pretty-printer or syntax highlighter for C# soure code. | |
| /// </summary> | |
| public class SourceColorer | |
| { | |
| private string _commentCssClass; | |
| private string _keywordCssClass; | |
| private string _quotesCssClass; | |
| private string _typeCssClass; | |
| private bool _addStyleDefinition; | |
| private HashSet<string> _keywords; | |
| private bool _addPreTags; | |
| /// <summary> | |
| /// Gets the list of reserved words/keywords. | |
| /// </summary> | |
| public HashSet<string> Keywords | |
| { | |
| get { return _keywords; } | |
| } | |
| /// <summary> | |
| /// Gets or sets the CSS class used for comments. The default is 'comment'. | |
| /// </summary> | |
| public string CommentCssClass | |
| { | |
| get { return _commentCssClass; } | |
| set { _commentCssClass = value; } | |
| } | |
| /// <summary> | |
| /// Gets or sets the CSS class used for keywords. The default is 'keyword'. | |
| /// </summary> | |
| public string KeywordCssClass | |
| { | |
| get { return _keywordCssClass; } | |
| set { _keywordCssClass = value; } | |
| } | |
| /// <summary> | |
| /// Gets or sets the CSS class used for string quotes. The default is 'quotes'. | |
| /// </summary> | |
| public string QuotesCssClass | |
| { | |
| get { return _quotesCssClass; } | |
| set { _quotesCssClass = value; } | |
| } | |
| /// <summary> | |
| /// Gets or sets the CSS class used for types. The default is 'type'. | |
| /// </summary> | |
| public string TypeCssClass | |
| { | |
| get { return _typeCssClass; } | |
| set { _typeCssClass = value; } | |
| } | |
| /// <summary> | |
| /// Whether to add the CSS style definition to the top of the highlighted code. | |
| /// </summary> | |
| public bool AddStyleDefinition | |
| { | |
| get { return _addStyleDefinition; } | |
| set { _addStyleDefinition = value; } | |
| } | |
| /// <summary> | |
| /// Whether to insert opening and closing pre tags around the highlighted code. | |
| /// </summary> | |
| public bool AddPreTags | |
| { | |
| get { return _addPreTags; } | |
| set { _addPreTags = value; } | |
| } | |
| /// <summary> | |
| /// Initializes a new instance of the <see cref="SourceColorer"/> class. | |
| /// </summary> | |
| public SourceColorer() | |
| { | |
| _addStyleDefinition = true; | |
| _commentCssClass = "comment"; | |
| _keywordCssClass = "keyword"; | |
| _quotesCssClass = "quotes"; | |
| _typeCssClass = "type"; | |
| _keywords = new HashSet<string>() | |
| { | |
| "static", "using", "true", "false","new", | |
| "namespace", "void", "private", "public", | |
| "bool", "string", "return", "class","internal", | |
| "const", "readonly", "int", "double","lock", | |
| "float", "if", "else", "foreach", "for","var", | |
| "get","set","byte\\[\\]","char\\[\\]","int\\[\\]","string\\[\\]" // dumb array matching. Escaped as [] is regex syntax | |
| }; | |
| } | |
| /// <summary> | |
| /// Highlights the specified source code and returns it as stylised HTML. | |
| /// </summary> | |
| /// <param name="source">The source code.</param> | |
| /// <returns></returns> | |
| public string Highlight(string source) | |
| { | |
| StringBuilder builder = new StringBuilder(); | |
| if (AddStyleDefinition) | |
| { | |
| builder.Append("<style>"); | |
| builder.AppendFormat(".{0} {{ color: #0000FF }} ", KeywordCssClass); | |
| builder.AppendFormat(".{0} {{ color: #2B91AF }} ", TypeCssClass); | |
| builder.AppendFormat(".{0} {{ color: green }} ", CommentCssClass); | |
| builder.AppendFormat(".{0} {{ color: maroon }} ", QuotesCssClass); | |
| builder.Append("</style>"); | |
| } | |
| if (AddPreTags) | |
| builder.Append("<pre>"); | |
| builder.Append(HighlightSource(source)); | |
| if (AddPreTags) | |
| builder.Append("</pre>"); | |
| return builder.ToString(); | |
| } | |
| /// <summary> | |
| /// Occurs when the source code is highlighted, after any style (CSS) definitions are added. | |
| /// </summary> | |
| /// <param name="content">The source code content.</param> | |
| /// <returns>The highlighted source code.</returns> | |
| protected virtual string HighlightSource(string content) | |
| { | |
| if (string.IsNullOrEmpty(CommentCssClass)) | |
| throw new InvalidOperationException("The CommentCssClass should not be null or empty"); | |
| if (string.IsNullOrEmpty(KeywordCssClass)) | |
| throw new InvalidOperationException("The KeywordCssClass should not be null or empty"); | |
| if (string.IsNullOrEmpty(QuotesCssClass)) | |
| throw new InvalidOperationException("The CommentCssClass should not be null or empty"); | |
| if (string.IsNullOrEmpty(TypeCssClass)) | |
| throw new InvalidOperationException("The TypeCssClass should not be null or empty"); | |
| // Some fairly secure token placeholders | |
| const string COMMENTS_TOKEN = "`````"; | |
| const string MULTILINECOMMENTS_TOKEN = "~~~~~"; | |
| const string QUOTES_TOKEN = "¬¬¬¬¬"; | |
| // Remove /* */ quotes, taken from ostermiller.org | |
| Regex regex = new Regex(@"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/", RegexOptions.Singleline); | |
| List<string> multiLineComments = new List<string>(); | |
| if (regex.IsMatch(content)) | |
| { | |
| foreach (Match item in regex.Matches(content)) | |
| { | |
| if (!multiLineComments.Contains(item.Value)) | |
| multiLineComments.Add(item.Value); | |
| } | |
| } | |
| for (int i = 0; i < multiLineComments.Count; i++) | |
| { | |
| content = content.ReplaceToken(multiLineComments[i], MULTILINECOMMENTS_TOKEN, i); | |
| } | |
| // Remove the quotes first, so they don't get highlighted | |
| List<string> quotes = new List<string>(); | |
| bool onEscape = false; | |
| bool onComment1 = false; | |
| bool onComment2 = false; | |
| bool inQuotes = false; | |
| int start = -1; | |
| for (int i = 0; i < content.Length; i++) | |
| { | |
| if (content[i] == '/' && !inQuotes && !onComment1) | |
| onComment1 = true; | |
| else if (content[i] == '/' && !inQuotes && onComment1) | |
| onComment2 = true; | |
| else if (content[i] == '"' && !onEscape && !onComment2) | |
| { | |
| inQuotes = true; // stops cases of: var s = "// I'm a comment"; | |
| if (start > -1) | |
| { | |
| string quote = content.Substring(start, i - start + 1); | |
| if (!quotes.Contains(quote)) | |
| quotes.Add(quote); | |
| start = -1; | |
| inQuotes = false; | |
| } | |
| else | |
| { | |
| start = i; | |
| } | |
| } | |
| else if (content[i] == '\\' || content[i] == '\'') | |
| onEscape = true; | |
| else if (content[i] == '\n') | |
| { | |
| onComment1 = false; | |
| onComment2 = false; | |
| } | |
| else | |
| { | |
| onEscape = false; | |
| } | |
| } | |
| for (int i = 0; i < quotes.Count; i++) | |
| { | |
| content = content.ReplaceToken(quotes[i], QUOTES_TOKEN, i); | |
| } | |
| // Remove the comments next, so they don't get highlighted | |
| regex = new Regex("(/{2,3}.+)\n", RegexOptions.Multiline); | |
| List<string> comments = new List<string>(); | |
| if (regex.IsMatch(content)) | |
| { | |
| foreach (Match item in regex.Matches(content)) | |
| { | |
| if (!comments.Contains(item.Value + "\n")) | |
| comments.Add(item.Value); | |
| } | |
| } | |
| for (int i = 0; i < comments.Count; i++) | |
| { | |
| content = content.ReplaceToken(comments[i], COMMENTS_TOKEN, i); | |
| } | |
| // Highlight single quotes | |
| content = Regex.Replace(content, "('.{1,2}')", "<span class=\"quote\">$1</span>", RegexOptions.Singleline); | |
| // Highlight class names based on the logic: {space OR start of line OR >}{1 capital){alphanumeric}{space} | |
| regex = new Regex(@"((?:\s|^)[A-Z]\w+(?:\s))", RegexOptions.Singleline); | |
| List<string> highlightedClasses = new List<string>(); | |
| if (regex.IsMatch(content)) | |
| { | |
| foreach (Match item in regex.Matches(content)) | |
| { | |
| string val = item.Groups[1].Value; | |
| if (!highlightedClasses.Contains(val)) | |
| highlightedClasses.Add(val); | |
| } | |
| } | |
| for (int i = 0; i < highlightedClasses.Count; i++) | |
| { | |
| content = content.ReplaceWithCss(highlightedClasses[i], TypeCssClass); | |
| } | |
| // Pass 2. Doing it in N passes due to my inferior regex knowledge of back/forwardtracking. | |
| // This does {space or [}{1 capital){alphanumeric}{]} | |
| regex = new Regex(@"(?:\s|\[)([A-Z]\w+)(?:\])", RegexOptions.Singleline); | |
| highlightedClasses = new List<string>(); | |
| if (regex.IsMatch(content)) | |
| { | |
| foreach (Match item in regex.Matches(content)) | |
| { | |
| string val = item.Groups[1].Value; | |
| if (!highlightedClasses.Contains(val)) | |
| highlightedClasses.Add(val); | |
| } | |
| } | |
| for (int i = 0; i < highlightedClasses.Count; i++) | |
| { | |
| content = content.ReplaceWithCss(highlightedClasses[i], TypeCssClass); | |
| } | |
| // Pass 3. Generics | |
| regex = new Regex(@"(?:\s|\[|\()([A-Z]\w+(?:<|<))", RegexOptions.Singleline); | |
| highlightedClasses = new List<string>(); | |
| if (regex.IsMatch(content)) | |
| { | |
| foreach (Match item in regex.Matches(content)) | |
| { | |
| string val = item.Groups[1].Value; | |
| if (!highlightedClasses.Contains(val)) | |
| highlightedClasses.Add(val); | |
| } | |
| } | |
| for (int i = 0; i < highlightedClasses.Count; i++) | |
| { | |
| string val = highlightedClasses[i]; | |
| val = val.Replace("<", "").Replace("<", ""); | |
| content = content.ReplaceWithCss(highlightedClasses[i], val, "<", TypeCssClass); | |
| } | |
| // Pass 4. new keyword with a type | |
| regex = new Regex(@"new\s+([A-Z]\w+)(?:\()", RegexOptions.Singleline); | |
| highlightedClasses = new List<string>(); | |
| if (regex.IsMatch(content)) | |
| { | |
| foreach (Match item in regex.Matches(content)) | |
| { | |
| string val = item.Groups[1].Value; | |
| if (!highlightedClasses.Contains(val)) | |
| highlightedClasses.Add(val); | |
| } | |
| } | |
| // Replace the highlighted classes | |
| for (int i = 0; i < highlightedClasses.Count; i++) | |
| { | |
| content = content.ReplaceWithCss(highlightedClasses[i], TypeCssClass); | |
| } | |
| // Highlight keywords | |
| foreach (string keyword in _keywords) | |
| { | |
| Regex regexKeyword = new Regex("(" + keyword + @")(>|>|\s|\n|;|<)", RegexOptions.Singleline); | |
| content = regexKeyword.Replace(content, "<span class=\"keyword\">$1</span>$2"); | |
| } | |
| // Shove the multiline comments back in | |
| for (int i = 0; i < multiLineComments.Count; i++) | |
| { | |
| content = content.ReplaceTokenWithCss(multiLineComments[i], MULTILINECOMMENTS_TOKEN, i, CommentCssClass); | |
| } | |
| // Shove the quotes back in | |
| for (int i = 0; i < quotes.Count; i++) | |
| { | |
| content = content.ReplaceTokenWithCss(quotes[i], QUOTES_TOKEN, i, QuotesCssClass); | |
| } | |
| // Shove the single line comments back in | |
| for (int i = 0; i < comments.Count; i++) | |
| { | |
| string comment = comments[i]; | |
| // Add quotes back in | |
| for (int n = 0; n < quotes.Count; n++) | |
| { | |
| comment = comment.Replace(string.Format("{0}{1}{0}", QUOTES_TOKEN, n), quotes[n]); | |
| } | |
| content = content.ReplaceTokenWithCss(comment, COMMENTS_TOKEN, i, CommentCssClass); | |
| } | |
| return content; | |
| } | |
| } | |
| public static class MoreExtensions | |
| { | |
| public static string ReplaceWithCss(this string content, string source, string cssClass) | |
| { | |
| return content.Replace(source, string.Format("<span class=\"{0}\">{1}</span>", cssClass, source)); | |
| } | |
| public static string ReplaceWithCss(this string content, string source, string replacement, string cssClass) | |
| { | |
| return content.Replace(source, string.Format("<span class=\"{0}\">{1}</span>", cssClass, replacement)); | |
| } | |
| public static string ReplaceWithCss(this string content, string source, string replacement, string suffix, string cssClass) | |
| { | |
| return content.Replace(source, string.Format("<span class=\"{0}\">{1}</span>{2}", cssClass, replacement, suffix)); | |
| } | |
| public static string ReplaceTokenWithCss(this string content, string source, string token, int counter, string cssClass) | |
| { | |
| string formattedToken = String.Format("{0}{1}{0}", token, counter); | |
| return content.Replace(formattedToken, string.Format("<span class=\"{0}\">{1}</span>", cssClass, source)); | |
| } | |
| public static string ReplaceToken(this string content, string source, string token, int counter) | |
| { | |
| string formattedToken = String.Format("{0}{1}{0}", token, counter); | |
| return content.Replace(source, formattedToken); | |
| } | |
| } |
I'm Chris Small, a software engineer working in London. This is my tech blog. Find out more about me via Github, Stackoverflow, Resume