regex - Parse custom HTML list tags in C# or Java -
i have text this:
this simple line [olist] [#]this line 1 [#]this line 2 [olist] [#]this line 2.1 [#]this line 2.2 [#]this line 2.3 , continues here [/olist] [#]this line 3 [/olist] line
how can parse in c# html below
this simple line <ol> <li>this line 1</li> <li>this line 2 <ol> <li>this line 2.1</li> <li>this line 2.2</li> <li>this line 2.3 , continues here</li> </ol> </li> <li>this line 3</li> </ol> line
i splitting , concatenating sub lists not being handled properly.
update: - sample code
this doing.
var html = replacelist(customhtml,"olist","ol"); private static string replacelist(string text, string key, string tag) { var itemtmpl = getlistentry(text, key); while (itemtmpl != null) { var buf = new stringbuilder(); var arr = itemtmpl.split(new[] { "[#]" }, stringsplitoptions.removeemptyentries); foreach (var str in arr) { if (!string.isnullorwhitespace(str)) buf.appendformat("<li>{0}</li>", str.trim()); } var content = string.format("<{0}>{1}</{0}>", tag, buf); text = text.substringbefore("[" + key + "]") + content + text.substringafter("[/" + key + "]"); itemtmpl = getlistentry(text, key); } return text; } private static string getlistentry(string text, string key) { var tag1 = string.format("[{0}]", key); var tag2 = string.format("[/{0}]", key); var start = text.indexof(tag1, stringcomparison.ordinal); var end = (start > -1) ? text.indexof(tag2, start, stringcomparison.ordinal) : -1; if (start < 0 || end <= start) return null; var result = text.substring(start + tag1.length, end - start - tag1.length); return result; }
note that list items span multiple lines , may include line breaks
you have parse abstraction tree first, compose result abstraction tree. i.e.:
public interface ielement { void addelement(ielement element); ielement parent { get; } } class olelement : ielement { public ilist<lielement> elements { get; set; } public ielement parent { get; set; } public olelement(ielement parent) { parent = parent; elements = new list<lielement>(); } public void addelement(ielement element) { elements.add((lielement)element); } public override string tostring() { var builder = new stringbuilder(); builder.appendline("<ol>"); foreach(var child in elements) { builder.appendline(child.tostring()); } builder.appendline("</ol>"); return builder.tostring(); } } class lielement : ielement { public string text { get; set; } public ielement parent { get; set; } public ilist<olelement> elements { get; set; } public lielement(ielement parent, string text) { parent = parent; text = text; elements = new list<olelement>(); } public void addelement(ielement element) { elements.add((olelement)element); } public override string tostring() { var builder = new stringbuilder(); builder.append("<li>"); builder.append(text); foreach (var child in elements) { builder.appendline(child.tostring()); } builder.appendline("</li>"); return builder.tostring(); } }
getting result:
const string text = @"[olist] [#]this line 1 [#]this line 2 [olist] [#]this line 2.1 [#]this line 2.2 [#]this line 2.3 [/olist] [#]this line 3 [/olist]"; var regex = new regex(@"^\s*\[(?<tag>[^\]]+)\](?<text>.*)$"); var builder = new stringbuilder(); var root = new olelement(null); var currentelement = (ielement)root; using (var reader = new stringreader(text)) { string line; while ((line = reader.readline()) != null) { var match = regex.match(line); if (match.success) { switch (match.groups["tag"].value) { case "#": if (currentelement olelement) { var child = new lielement(currentelement, match.groups["text"].value); currentelement.addelement(child); currentelement = child; break; } if (currentelement lielement) { var child = new lielement(currentelement.parent, match.groups["text"].value); currentelement.parent.addelement(child); currentelement = child; } break; case "olist": if (currentelement == root) { break; } if (currentelement lielement) { var child = new olelement(currentelement); currentelement.addelement(child); currentelement = child; } break; case "/olist": if (currentelement lielement) { currentelement = currentelement.parent.parent; break; } if (currentelement olelement) { currentelement = currentelement.parent; } break; default: break; } } } } var result = root.tostring();
Comments
Post a Comment