I've just completed a D to HTML converter (written in D) as an educational exercise. It's an executable (run from the command line) that will traverse your directory structure and convert all ".d" files into ".d.html" files. Along with the html files comes an "index.html" file to navigate through all of your code in a browser.
Downloads: (version 0.2)DToHtml.zip - Executable Only
DToHtmlWithSrc.zip - Executable and Source
Instructions:- Place the d2html.exe file in the base directory that contains your D code.
- Run d2html.exe
- Open up the index.html file that is created in the same directory as the d2html executable.
Command Line Options:----------------------------------------------------------------
Command line usage:
----------------------------------------------------------------
File or Recursive convertion Modes:
-file <filename>
-dir <directory> (default - current directory)
D Code or Program Output Modes:
-code (default - d code formatting)
-outfile (raw console program output)
Replacement Options: (Escape sequeces \r, \n, and \t work here)
-space <space string> (default " ")
-tab <tab string> (default "	")
-newline <newline string> (default "<br>")
Other Options:
-copyto <directory> (copy html files to target directory)
-? (console help - duh)
----------------------------------------------------------------
Examples:
----------------------------------------------------------------
REM This recusively converts all d programs to html in the current directory
d2html.exe
REM This recusively converts all d programs to html in the passed in directory
d2html.exe -dir "c:\pub\src\dprojects"
REM This converts the passed in file to html
d2html.exe -file "c:\pub\src\dprojects\sampleprogram.d"
REM This converts the passed in program output file to html
d2html.exe -outfile -file "c:\pub\src\dprojects\sampleprogram.output"
REM This recusively converts all d programs to html in the current directory, using the following character replacements for space, tab, and return chars.
d2html.exe -code -space " " -tab " " -newline "\r\n<br>"
REM This recusively converts all d programs to html in the passed in directory, then copies all of the html file tree to another directory
d2html.exe -dir "c:\pub\src\dprojects" -copyto "c:\pub\src\dprojects\HTML"
View Sample Code (Click here)
module d2html.parser;
import std.stdio;
import std.string;
import std.file;
import d2html.constants;
import std.stream;
enum TokenType : byte
{
Unknown = 0,
NewLine,
WhiteSpace,
Operator,
StringLiteral,
CharacterLiteral,
NumericLiteral,
Comment,
Keyword,
KeywordType,
KeywordValue,
Identifier
}
enum ParseMode : byte
{
None = 0,
Operator,
StringLiteral,
StringLiteralBackTick,
StringLiteralR,
StringLiteralX,
StringLiteralEnd,
CharLiteral,
LineComment,
BlockComment,
BlockEmbedComment,
HexString,
NumericLiteral,
HexNumericLiteral,
BinNumericLiteral,
Identifier,
WhiteSpace,
NewLine
}
public void Tokenize(string filename, void function(char[], TokenType) processToken)
{
auto mode = ParseMode.None;
auto tokenType = TokenType.Unknown;
char c;
Stream file = new BufferedFile(filename, FileMode.In);
auto f = new EndianStream(file);
auto bom = f.readBOM();
auto getNextChar = true;
char[] token = null;
while(!f.eof || !getNextChar)
{
if (getNextChar) c = f.getc();
getNextChar = true;
if (mode == ParseMode.None)
{
if (c == '`') // Backtick
{
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
token ~= c;
mode = ParseMode.StringLiteralBackTick;
}
else if (c == '"') // double quote
{
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
token ~= c;
mode = ParseMode.StringLiteral;
}
else if (c == '\'') // single quote
{
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
token ~= c;
mode = ParseMode.CharLiteral;
}
else if (inPattern(c, "a-zA-Z_")) // Identifier
{
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
token ~= c;
mode = ParseMode.Identifier;
}
else if (inPattern(c, "0-9")) // Numeric
{
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
token ~= c;
mode = ParseMode.NumericLiteral;
}
else if (IsPossibleOperator(c)) // PossibleOperator
{
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
token ~= c;
tokenType = TokenType.Operator;
mode = ParseMode.Operator;
}
else if (c == '\r' || c == '\n' ) // NewLine?
{
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
token ~= c;
mode = ParseMode.NewLine;
}
else if (std.string.iswhite(c)) // WhiteSpace?
{
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
token ~= c;
mode = ParseMode.WhiteSpace;
}
else
{
tokenType = TokenType.Unknown;
token ~= c;
}
}
else if (mode == ParseMode.NumericLiteral)
{
char b4 = token[token.length-1];
if ((token.length == 1) && (b4 == '0') && (c == 'X' || c == 'x'))
{
token ~= c;
continue;
}
else if ((token.length == 1) && (b4 == '0') && (c == 'B' || c == 'b'))
{
token ~= c;
continue;
}
if (inPattern(c, "A-Fa-f0-9") || c == 'L' || c == 'l' || c == 'U' || c == 'u' ||
c == 'F' || c == 'f' || c == 'i' ||
c == 'E' || c == 'e' || c == 'P' || c == 'p' ||
c == '_' || c == '.')
{
token ~= c;
}
else if ( (c == '-') &&
((b4 == 'P') || (b4 == 'p') || (b4 == 'E') || (b4 == 'e')) )
{
token ~= c;
}
else
{
tokenType = TokenType.NumericLiteral;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
getNextChar = false;
mode = ParseMode.None;
}
}
else if (mode == ParseMode.NewLine)
{
if (c == '\r')
{
token ~= c;
}
else if (c == '\n' ) // NewLine?
{
token ~= c;
tokenType = TokenType.NewLine;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
mode = ParseMode.None;
}
else
{
tokenType = TokenType.NewLine;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
getNextChar = false;
mode = ParseMode.None;
}
}
else if (mode == ParseMode.WhiteSpace)
{
if (std.string.iswhite(cast(dchar)c)) // WhiteSpace?
{
token ~= c;
}
else
{
tokenType = TokenType.WhiteSpace;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
getNextChar = false;
mode = ParseMode.None;
}
}
else if (mode == ParseMode.LineComment)
{
if ((c == '\r') || (c == '\n'))
{
tokenType = TokenType.Comment;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
getNextChar = false;
mode = ParseMode.None;
}
else
{
token ~= c;
}
}
else if (mode == ParseMode.BlockComment)
{
if ((c == '/') && (token[token.length-1] == '*'))
{
tokenType = TokenType.Comment;
token ~= c;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
mode = ParseMode.None;
}
else
{
token ~= c;
}
}
else if (mode == ParseMode.BlockEmbedComment)
{
if ((c == '/') && (token[token.length-1] == '+'))
{
tokenType = TokenType.Comment;
token ~= c;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
mode = ParseMode.None;
}
else
{
token ~= c;
}
}
else if (mode == ParseMode.CharLiteral)
{
if (c == '\'')
{
bool done = true;
if (token[token.length-1] == '\\')
{
done = false;
if (token.length > 1)
{
if (token[token.length-2] == '\\')
{
done = true;
}
}
}
token ~= c;
if (done)
{
tokenType = TokenType.CharacterLiteral;
mode = ParseMode.StringLiteralEnd;
}
}
else
{
token ~= c;
}
}
else if (mode == ParseMode.StringLiteral)
{
if (c == '"')
{
bool done = true;
if (token[token.length-1] == '\\')
{
done = false;
if (token.length > 1)
{
if (token[token.length-2] == '\\')
{
done = true;
}
}
}
token ~= c;
if (done)
{
tokenType = TokenType.StringLiteral;
mode = ParseMode.StringLiteralEnd;
}
}
else
{
token ~= c;
}
}
else if (mode == ParseMode.StringLiteralBackTick)
{
token ~= c;
if (c == '`') // if it's an ending backtick, we're done with this
{
tokenType = TokenType.StringLiteral;
mode = ParseMode.StringLiteralEnd;
}
}
else if (mode == ParseMode.StringLiteralR)
{
token ~= c;
if (c == '"') // if it's a double quote
{
tokenType = TokenType.StringLiteral;
mode = ParseMode.StringLiteralEnd;
}
}
else if (mode == ParseMode.StringLiteralX)
{
token ~= c;
if (c == '"') // if it's a double quote
{
tokenType = TokenType.StringLiteral;
mode = ParseMode.StringLiteralEnd;
}
}
else if (mode == ParseMode.StringLiteralEnd)
{
if ((c == 'c') || (c == 'd') || (c == 'd')) // if it's a double quote
{
token ~= c;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
}
else
{
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
getNextChar = false;
}
mode = ParseMode.None;
}
else if (mode == ParseMode.Operator)
{
if ((token.length > 0) && (token[0] == '/')) // Identifier
{
switch (c)
{
case '+':
token ~= c;
mode = ParseMode.BlockEmbedComment;
continue;
case '*':
token ~= c;
mode = ParseMode.BlockComment;
continue;
case '/':
token ~= c;
mode = ParseMode.LineComment;
continue;
default:
break;
}
}
if (IsPossibleOperator(token ~ c)) // Identifier
{
tokenType = TokenType.Operator;
token ~= c;
}
else if (IsExactOperator(token))
{
tokenType = TokenType.Operator;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
getNextChar = false;
mode = ParseMode.None;
}
else
{
// error? go back to none mode?
getNextChar = false;
mode = ParseMode.None;
}
}
else if (mode == ParseMode.Identifier)
{
if (token.length == 1 && c == '"')
{
switch (token[0])
{
case 'r':
token ~= c;
mode = ParseMode.StringLiteralR;
continue;
case 'x':
token ~= c;
mode = ParseMode.StringLiteralX;
continue;
default:
break;
}
}
if (inPattern(c, "a-zA-Z0-9_"))
{
token ~= c;
}
else
{
if (itemInArray(token, D_VALUE_KEYWORDS)) tokenType = TokenType.KeywordValue;
else if (itemInArray(token, D_TYPE_KEYWORDS)) tokenType = TokenType.KeywordType;
else if (itemInArray(token, D_KEYWORDS)) tokenType = TokenType.Keyword;
else if (itemInArray(token, D_NAMED_CHARS)) tokenType = TokenType.Keyword;
else if (itemInArray(token, D_SPECIAL_TOKENS)) tokenType = TokenType.Keyword;
else tokenType = TokenType.Identifier;
if ((token != null) && (token.length > 0)) processToken(token, tokenType);
token.length = 0;
getNextChar = false;
mode = ParseMode.None;
}
}
else
{
token ~= c;
}
}
if (token != null) processToken(token, tokenType);
file.close();
delete file;
}
bool itemInArray(char[] item, invariant char[][] arr)
{
foreach (invariant char[] piece; arr)
{
if (piece.length == item.length)
{
if (piece == item) return true;
}
}
return false;
}
bool IsPossibleOperator(char[] pop)
{
foreach (invariant char[] op; D_OPERATORS)
{
if (op.length == pop.length)
{
if (op == pop) return true;
}
if (op.length > pop.length)
{
auto opres = op[0 .. (pop.length-1)];
if (opres == pop) return true;
}
}
return false;
}
bool IsPossibleOperator(char c)
{
foreach (invariant char[] op; D_OPERATORS)
{
if (op[0] == c) return true;
}
return false;
}
bool IsExactOperator(char[] pop)
{
return itemInArray(pop, D_OPERATORS);
}