regex - Parse custom HTML list tags in C# or Java -


i have text this:

this simple line [olist]     [#]this line 1     [#]this line 2         [olist]             [#]this line 2.1             [#]this line 2.2             [#]this line 2.3     , continues here         [/olist]     [#]this line 3 [/olist] line 

how can parse in c# html below

this simple line <ol>     <li>this line 1</li>     <li>this line 2         <ol>             <li>this line 2.1</li>             <li>this line 2.2</li>             <li>this line 2.3     , continues here</li>         </ol>     </li>     <li>this line 3</li> </ol> line 

i splitting , concatenating sub lists not being handled properly.

update: - sample code

this doing.

var html = replacelist(customhtml,"olist","ol");  private static string replacelist(string text, string key, string tag) {     var itemtmpl = getlistentry(text, key);     while (itemtmpl != null)     {         var buf = new stringbuilder();         var arr = itemtmpl.split(new[] { "[#]" }, stringsplitoptions.removeemptyentries);         foreach (var str in arr)         {             if (!string.isnullorwhitespace(str))                 buf.appendformat("<li>{0}</li>", str.trim());         }          var content = string.format("<{0}>{1}</{0}>", tag, buf);           text = text.substringbefore("[" + key + "]") + content +                         text.substringafter("[/" + key + "]");          itemtmpl = getlistentry(text, key);     }      return text; }  private static string getlistentry(string text, string key) {     var tag1 = string.format("[{0}]", key);     var tag2 = string.format("[/{0}]", key);      var start = text.indexof(tag1, stringcomparison.ordinal);     var end = (start > -1) ? text.indexof(tag2, start, stringcomparison.ordinal) : -1;      if (start < 0 || end <= start)         return null;      var result = text.substring(start + tag1.length, end - start - tag1.length);      return result; } 

note that list items span multiple lines , may include line breaks

you have parse abstraction tree first, compose result abstraction tree. i.e.:

public interface ielement {   void addelement(ielement element);   ielement parent { get; } }  class olelement : ielement {   public ilist<lielement> elements { get; set; }   public ielement parent { get; set; }    public olelement(ielement parent)   {     parent = parent;     elements = new list<lielement>();   }    public void addelement(ielement element)   {     elements.add((lielement)element);   }    public override string tostring()   {     var builder = new stringbuilder();     builder.appendline("<ol>");     foreach(var child in elements)     {       builder.appendline(child.tostring());     }     builder.appendline("</ol>");     return builder.tostring();   } }  class lielement : ielement {   public string text { get; set; }   public ielement parent { get; set; }   public ilist<olelement> elements { get; set; }    public lielement(ielement parent, string text)   {     parent = parent;     text = text;     elements = new list<olelement>();   }    public void addelement(ielement element)   {     elements.add((olelement)element);   }    public override string tostring()   {     var builder = new stringbuilder();     builder.append("<li>");     builder.append(text);     foreach (var child in elements)     {       builder.appendline(child.tostring());     }     builder.appendline("</li>");     return builder.tostring();   } } 

getting result:

const string text = @"[olist] [#]this line 1 [#]this line 2     [olist]         [#]this line 2.1         [#]this line 2.2         [#]this line 2.3     [/olist] [#]this line 3 [/olist]"; var regex = new regex(@"^\s*\[(?<tag>[^\]]+)\](?<text>.*)$"); var builder = new stringbuilder(); var root = new olelement(null); var currentelement = (ielement)root; using (var reader = new stringreader(text)) {   string line;   while ((line = reader.readline()) != null)   {     var match = regex.match(line);     if (match.success)     {       switch (match.groups["tag"].value)       {         case "#":           if (currentelement olelement)           {             var child = new lielement(currentelement, match.groups["text"].value);             currentelement.addelement(child);             currentelement = child;             break;           }           if (currentelement lielement)           {             var child = new lielement(currentelement.parent, match.groups["text"].value);             currentelement.parent.addelement(child);             currentelement = child;           }           break;         case "olist":           if (currentelement == root)           {             break;           }           if (currentelement lielement)           {             var child = new olelement(currentelement);             currentelement.addelement(child);             currentelement = child;           }           break;         case "/olist":           if (currentelement lielement)           {             currentelement = currentelement.parent.parent;             break;           }           if (currentelement olelement)           {             currentelement = currentelement.parent;           }           break;         default:           break;       }     }   } } var result = root.tostring(); 

Comments

Popular posts from this blog

java - Plugin org.apache.maven.plugins:maven-install-plugin:2.4 or one of its dependencies could not be resolved -

Round ImageView Android -

How can I utilize Yahoo Weather API in android -