/////////////////////////////////////////////////
// Graph Utilities
void TGUtil::GetCdf(const TIntPrV& PdfV, TIntPrV& CdfV) {
CdfV = PdfV;
for (int i = 1; i < CdfV.Len(); i++) {
CdfV[i].Val2 = CdfV[i-1].Val2 + CdfV[i].Val2; }
}
void TGUtil::GetCdf(const TFltPrV& PdfV, TFltPrV& CdfV) {
CdfV = PdfV;
for (int i = 1; i < CdfV.Len(); i++) {
CdfV[i].Val2 = CdfV[i-1].Val2 + CdfV[i].Val2; }
}
void TGUtil::GetCdf(const TIntFltKdV& PdfV, TIntFltKdV& CdfV) {
CdfV = PdfV;
for (int i = 1; i < CdfV.Len(); i++) {
CdfV[i].Dat = CdfV[i-1].Dat + CdfV[i].Dat; }
}
TIntPrV TGUtil::GetCdf(const TIntPrV& PdfV) {
TIntPrV CdfV;
GetCdf(PdfV, CdfV);
return CdfV;
}
TFltPrV TGUtil::GetCdf(const TFltPrV& PdfV) {
TFltPrV CdfV;
GetCdf(PdfV, CdfV);
return CdfV;
}
void TGUtil::GetCCdf(const TIntPrV& PdfV, TIntPrV& CCdfV) {
CCdfV = PdfV;
for (int i = CCdfV.Len()-2; i >= 0; i--) {
CCdfV[i].Val2 = CCdfV[i+1].Val2 + CCdfV[i].Val2; }
}
void TGUtil::GetCCdf(const TFltPrV& PdfV, TFltPrV& CCdfV) {
CCdfV = PdfV;
for (int i = CCdfV.Len()-2; i >= 0; i--) {
CCdfV[i].Val2 = CCdfV[i+1].Val2 + CCdfV[i].Val2; }
}
void TGUtil::GetCCdf(const TIntFltKdV& PdfV, TIntFltKdV& CCdfV) {
CCdfV = PdfV;
for (int i = CCdfV.Len()-2; i >= 0; i--) {
CCdfV[i].Dat = CCdfV[i+1].Dat + CCdfV[i].Dat; }
}
TIntPrV TGUtil::GetCCdf(const TIntPrV& PdfV) {
TIntPrV CCdfV;
GetCCdf(PdfV, CCdfV);
return CCdfV;
}
TFltPrV TGUtil::GetCCdf(const TFltPrV& PdfV) {
TFltPrV CCdfV;
GetCCdf(PdfV, CCdfV);
return CCdfV;
}
void TGUtil::GetPdf(const TIntPrV& CdfV, TIntPrV& PdfV) {
PdfV = CdfV;
for (int i = PdfV.Len()-1; i > 0; i--) {
PdfV[i].Val2 = PdfV[i].Val2 - PdfV[i-1].Val2; }
}
void TGUtil::GetPdf(const TFltPrV& CdfV, TFltPrV& PdfV) {
PdfV = CdfV;
for (int i = PdfV.Len()-1; i > 0; i--) {
PdfV[i].Val2 = PdfV[i].Val2 - PdfV[i-1].Val2; }
}
void TGUtil::GetPdf(const TIntFltKdV& CdfV, TIntFltKdV& PdfV) {
PdfV = CdfV;
for (int i = PdfV.Len()-1; i > 0; i--) {
PdfV[i].Dat = PdfV[i].Dat - PdfV[i-1].Dat; }
}
void TGUtil::Normalize(TFltPrV& PdfV) {
double Sum = 0.0;
for (int i = 0; i < PdfV.Len(); i++) {
Sum += PdfV[i].Val2; }
if (Sum <= 0.0) { return; }
for (int i = 0; i < PdfV.Len(); i++) {
PdfV[i].Val2 /= Sum; }
}
void TGUtil::Normalize(TIntFltKdV& PdfV) {
double Sum = 0.0;
for (int i = 0; i < PdfV.Len(); i++) {
Sum += PdfV[i].Dat; }
if (Sum <= 0.0) { return; }
for (int i = 0; i < PdfV.Len(); i++) {
PdfV[i].Dat /= Sum; }
}
void TGUtil::MakeExpBins(const TFltPrV& XYValV, TFltPrV& ExpXYValV, const double& BinFactor, const double& MinYVal) {
TGnuPlot::MakeExpBins(XYValV, ExpXYValV, BinFactor, MinYVal);
}
void TGUtil::MakeExpBins(const TFltKdV& XYValV, TFltKdV& ExpXYValV, const double& BinFactor, const double& MinYVal) {
TGnuPlot::MakeExpBins(XYValV, ExpXYValV, BinFactor, MinYVal);
}
void TGUtil::MakeExpBins(const TFltV& YValV, TFltV& ExpYValV, const double& BinFactor) {
ExpYValV.Clr(true);
int prevI=0;
for (int i = 0; i < YValV.Len(); ) {
ExpYValV.Add(YValV[i]);
i = int(i*BinFactor);
if (i==prevI) { i++; }
prevI = i;
}
}
void TGUtil::MakeExpBins(const TIntV& YValV, TIntV& ExpYValV, const double& BinFactor) {
ExpYValV.Clr(true);
int prevI=0;
for (int i = 0; i < YValV.Len(); ) {
ExpYValV.Add(YValV[i]);
i = int(i*BinFactor);
if (i==prevI) { i++; }
prevI = i;
}
}
/////////////////////////////////////////////////
// String helper functions and utilities
// get TagVal
TChA& TStrUtil::GetXmlTagVal(TXmlLx& XmlLx, const TChA& TagNm) {
static TChA TagVal;
EAssertR(XmlLx.GetSym() == xsySTag, TagNm);
EAssertR(TagNm == XmlLx.TagNm.CStr(), TagNm);
const TXmlLxSym NextSym = XmlLx.GetSym();
TagVal = XmlLx.TxtChA;
if (NextSym == xsyStr) {
EAssertR(XmlLx.GetSym() == xsyETag, TagNm);
} else {
EAssertR(NextSym == xsyETag, TagNm); // empty tag
//printf(" token: %s empty! %s\n", XmlLx.TagNm.CStr(), XmlLx.GetFPosStr().CStr());
}
EAssertR(XmlLx.TagNm == TagNm, TagNm);
return TagVal;
}
// get TagVal
void TStrUtil::GetXmlTagNmVal(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal) {
EAssertR(XmlLx.GetSym() == xsySTag, TagNm);
TagNm = XmlLx.TagNm;
const TXmlLxSym NextSym = XmlLx.GetSym();
TagVal = XmlLx.TxtChA;
if (NextSym == xsyStr) {
EAssertR(XmlLx.GetSym() == xsyETag, TagNm);
} else {
EAssertR(NextSym == xsyETag, TagNm); // empty tag
//printf(" token: %s empty! %s\n", XmlLx.TagNm.CStr(), XmlLx.GetFPosStr().CStr());
}
}
// get * (can be many tags inbetween
bool TStrUtil::GetXmlTagNmVal2(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal, const bool& TakeTagNms) {
if (XmlLx.GetSym() != xsySTag) {
return false; }
TagVal.Clr();
TagNm = XmlLx.TagNm;
//const TXmlLxSym NextSym = XmlLx.GetSym();
while (XmlLx.Sym != xsyETag || XmlLx.TagNm != TagNm.CStr()) {
if (TakeTagNms) {
TagVal += XmlLx.TxtChA; }
else if (XmlLx.Sym == xsyStr) {
TagVal += XmlLx.TxtChA; }
XmlLx.GetSym();
}
return true;
//if (NextSym == xsyStr) {
// EAssertR(XmlLx.GetSym() == xsyETag, TagNm);
//} else {
// EAssertR(NextSym == xsyETag, TagNm); // empty tag
//printf(" token: %s empty! %s\n", XmlLx.TagNm.CStr(), XmlLx.GetFPosStr().CStr());
//}
}
// http://www.ijs.si/fdfd/blah.html --> www.ijs.si
TChA TStrUtil::GetDomNm(const TChA& UrlChA) {
int EndSlash = UrlChA.SearchCh('/', 7)-1; // skip starting http://
if (EndSlash > 0) {
const int BegSlash = UrlChA.SearchChBack('/', EndSlash);
if (BegSlash > 0) { return UrlChA.GetSubStr(BegSlash+1, EndSlash).ToLc(); }
else { return UrlChA.GetSubStr(0, UrlChA.SearchCh('/', 0)-1).ToLc(); }
} else {
if (UrlChA.IsPrefix("http://")) { return UrlChA.GetSubStr(7, UrlChA.Len()-1).ToLc(); }
EndSlash = UrlChA.SearchCh('/', 0);
if (EndSlash > 0) { return UrlChA.GetSubStr(0, EndSlash-1).ToLc(); }
else { return TChA(UrlChA).ToLc(); }
}
}
// get domain name and also strip starting www.
TChA TStrUtil::GetDomNm2(const TChA& UrlChA) {
TChA Dom = GetDomNm(UrlChA);
if (Dom.IsPrefix("www.")) { return Dom.GetSubStr(4, TInt::Mx); }
else { return Dom; }
}
int GetNthOccurence(const TChA& Url, const int& Count, const char Ch='/') {
const char *c = Url.CStr();
int cnt = 0;
while (*c && cnt != Count) {
if (*c == Ch) { cnt++; }
c++;
}
return int(c-Url.CStr()-1);
}
// get website (GetDomNm2 or blog url)
TChA TStrUtil::GetWebsiteNm(const TChA& PostUrlStr) {
TChA DomNm = TStrUtil::GetDomNm2(PostUrlStr);
// http://blog.myspace.com/index.cfm?fuseaction=blog.view&friendid=141560&blogid=420009539
if (DomNm == "blog.myspace.com") {
return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 2, '&')-1);
}
// http://blogs.msdn.com/squasta/archive/2008/08/11/annonces-microsoft-au-black-hat-2008.aspx
// http://ameblo.jp/baptism/entry-10126216277.html
// http://xfruits.com/fcuignet/?id=8793&clic=249862689&url=http%3a%2f%2fnews.google.com%2fnews%2furl%3fsa%3dt%26ct%3dfr%2f9-0%26fd%3dr%26url%3dhttp%3a%2f%2fwww.investir-en-tunisie.net%2fnews%2farticle.php%253fid%253d5026%26cid%3d1241943065%26ei%3doy6gslh9jzycxahkjfxucw%26usg%3dafqjcnen_bczqldodsyga6zps2axphxl3q
// http://scienceblogs.com/grrlscientist/2008/08/reader_comments.php
// http://blogs.sun.com/geertjan/entry/wicket_in_action_undoubtedly_the
// http://blog.wired.com/gadgets/2008/08/apple-sells-60.html
// http://weblogs.asp.net/mehfuzh/archive/2008/08/11/linqextender-1-4-enhanced-object-tracking.aspx
// http://blogs.technet.com/plitpromicrosoftcom/archive/2008/08/11/nowa-karta-sim.aspx
// http://blogs.guardian.co.uk/greenslade/2008/08/murdoch_aims_to_boost_subscrib.html
// http://blogs.clarin.com/quimeykiltru/2008/8/11/mentira-mentira-creo
// http://blogs.sun.com/geertjan/entry/wicket_in_action_undoubtedly_the
// http://blog.wired.com/gadgets/2008/08/apple-sells-60.html
// http://weblogs.asp.net/mehfuzh/archive/2008/08/11/linqextender-1-4-enhanced-object-tracking.aspx
// http://blogs.technet.com/plitpromicrosoftcom/archive/2008/08/11/nowa-karta-sim.aspx
// http://blogs.guardian.co.uk/greenslade/2008/08/murdoch_aims_to_boost_subscrib.html
// http://blogs.clarin.com/quimeykiltru/2008/8/11/mentira-mentira-creo
// http://blogs.zdnet.com/hardware/?p=2391
// http://blogs.citypages.com/sports/2008/08/ufc_87_seek_and.php
// http://voices.washingtonpost.com/achenblog/2008/08/no_medal_for_bush.html
// http://blog.tv2.dk/ole.mork/entry254689.html
// http://blogs.menomoneefallsnow.com/in_the_race/archive/2008/08/11/sometimes-it-s-about-how-you-play-the-game.asp
// http://weblogs.baltimoresun.com/entertainment/midnight_sun/blog/2008/08/heidis_bad_break_with_dubai_pa.html
// http://eonline.com/uberblog/b23076_youtubular_from_rickrolled_barackrolled.html?sid=rss_topstories&utm_source=eo
if (DomNm=="blogs.msdn.com" || DomNm=="ameblo.jp" || DomNm=="xfruits.com" || DomNm=="scienceblogs.com" || DomNm=="blogs.sun.com"
|| DomNm=="blog.wired.com" || DomNm=="weblogs.asp.net" || DomNm=="blogs.technet.com" || DomNm=="blogs.guardian.co"
|| DomNm=="blogs.clarin.com" || DomNm=="blogs.sun.com" || DomNm=="blog.wired.com" || DomNm=="weblogs.asp.net"
|| DomNm=="blogs.technet.com" || DomNm=="blogs.guardian.com" || DomNm=="blogs.clarin.com" || DomNm=="blogs.zdnet.com"
|| DomNm=="blogs.citypages.com" || DomNm=="voices.washingtonpost.com" || DomNm=="blog.tv2.dk"
|| DomNm=="blogs.menomoneefallsnow.com" || DomNm=="weblogs.baltimoresun.com" || DomNm=="eonline.com") {
return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 4)-1);
}
// http://digg.com/submit?phase=2&url=http://socialitelife.celebuzz.com/archive/2008/07/31/and_then_a_hero_came_along.php&title=and
// http://digg.com/general_sciences/mental_images_are_like_pictures_slide_show
if (DomNm == "digg.com") {
if (PostUrlStr.IsPrefix("http://digg.com/submit?")) {
const int Url = PostUrlStr.SearchStr(";url=");
if (Url != -1) {
return GetWebsiteNm(PostUrlStr.GetSubStr(Url+5, PostUrlStr.SearchCh('&', Url+5))); }
} else {
return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 4)-1); }
}
// http://bbc.co.uk/blogs/thereporters/markdevenport/2008/08/back_to_porridge.html
// http://nydailynews.com/blogs/subwaysquawkers/2008/08/anaheim-is-no-magic-kingdom-fo.html
// http://newsbusters.org/blogs/p-j-gladnick/2008/08/11/sf-chronicle-writer-predicts-global-warming-shellfish-invas
// http://nydailynews.com/blogs/subwaysquawkers/2008/08/anaheim-is-no-magic-kingdom-fo.html
if (PostUrlStr.IsPrefix("http://nydailynews.com/blogs/") || PostUrlStr.IsPrefix("http://bbc.co.uk/blogs/")
|| PostUrlStr.IsPrefix("http://nydailynews.com/blogs/") || PostUrlStr.IsPrefix("http://newsbusters.org/blogs/")) {
return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 5)-1);
}
// http://feeds.feedburner.com/~r/adesblog/ ~3/361711640
if (DomNm=="feeds.feedburner.com") {
return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 5)-1);
}
// http://groups.google.com/group/news.admin.net-abuse.sightings/browse_thread/thread/8452c47949453216/f07daa509b90295c?show_docid=f07daa509b90295c
if (DomNm=="groups.google.com") {
return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 5)-1);
}
// http://news.google.com/news/url?sa=t&ct=us/20-0&fd=r&url=http://www.theobserver.ca/articledisplay.aspx%3fe%3d1151495&cid=0&ei=yswgsjpndpbi8atc9knacw&usg=afqjcnhrbg-nc9z6ymtqfkear3_npwqqxa
if (DomNm=="news.google.com") { // redirect
const int UrlPos = PostUrlStr.SearchStr("&url=");
if (UrlPos != -1) {
return GetWebsiteNm(PostUrlStr.GetSubStr(UrlPos+5, PostUrlStr.SearchCh('&', UrlPos+5))); }
}
// http://bloggrevyen.no/go/110340/http://blog.christergulbrandsen.com/2008/08/11/is-nationalism-the-only-way-to-de
if (DomNm == "bloggrevyen.no") { // redirect
const int Http2 = PostUrlStr.SearchStr("/http://");
if (Http2!=-1) {
return GetWebsiteNm(PostUrlStr.GetSubStr(Http2+1, PostUrlStr.Len()-1)); }
}
//http://us.rd.yahoo.com/dailynews/rss/search/urgent+care/sig=11phgb4tu/*http%3a//www.newswise.com/articles/view/543340/?sc=rsmn
//http://ca.rd.yahoo.com/dailynews/rss/topstories/*http://ca.news.yahoo.com/s/reuters/080801/n_top_news/news_afgha
if (DomNm.IsSuffix(".rd.yahoo.com")) {
const int Http2 = PostUrlStr.SearchStr("/*");
if (Http2!=-1) {
return GetWebsiteNm(PostUrlStr.GetSubStr(Http2+9, PostUrlStr.Len()-1)); }
}
return DomNm;
}
// remove ending /, /index.html, etc. and strip starting www.
bool TStrUtil::GetNormalizedUrl(const TChA& UrlIn, const TChA& BaseUrl, TChA& UrlOut) {
UrlOut = UrlIn;
if (StripEnd(UrlIn, "/", UrlOut)) {}
else if (StripEnd(UrlIn, "/index.html", UrlOut)) {}
else if (StripEnd(UrlIn, "/index.htm", UrlOut)) {}
else if (StripEnd(UrlIn, "/index.php", UrlOut)) {}
if (! (UrlOut.IsPrefix("http://") || UrlOut.IsPrefix("ftp://"))) {
// if UrlIn is relative url, try combine it with BaseUrl
if (UrlIn.Empty() || ! (BaseUrl.IsPrefix("http://") || BaseUrl.IsPrefix("ftp://"))) {
//printf("** Bad URL: base:'%s' url:'%s'\n", BaseUrl.CStr(), UrlIn.CStr());
return false; }
TChA Out;
if (! GetNormalizedUrl(BaseUrl, TChA(), Out)) { return false; }
if (UrlIn[0] != '/') { Out.AddCh('/'); }
Out += UrlOut;
UrlOut = Out;
}
// http://www. --> http://
if (UrlOut.IsPrefix("http://www.")) {
UrlOut = "http://"+UrlOut.GetSubStr(11, TInt::Mx);
}
UrlOut.ToLc();
return true;
}
bool TStrUtil::StripEnd(const TChA& Str, const TChA& SearchStr, TChA& NewStr) {
const int StrLen = Str.Len();
const int SearchStrLen = SearchStr.Len();
if (StrLen < SearchStrLen) { return false; }
for (int i = 0; i < SearchStrLen; i++) {
if (Str[StrLen-i-1] != SearchStr[SearchStrLen-i-1]) { return false; }
}
NewStr = Str.GetSubStr(0, StrLen-SearchStrLen-1);
return true;
}
TChA TStrUtil::GetShorStr(const TChA& LongStr, const int MaxLen) {
if (LongStr.Len() < MaxLen) { return LongStr; }
TChA Str = LongStr.GetSubStr(0, MaxLen-1);
Str += "...";
return Str;
}
// space separated sequence of words, remove all punctuations, etc.
TChA TStrUtil::GetCleanWrdStr(const TChA& ChA) {
char *b = (char *) ChA.CStr();
while (*b && ! TCh::IsAlNum(*b)) { b++; }
if (*b == 0) { return TChA(); }
TChA OutChA(ChA.Len());
char *e = b, tmp;
while (*e) {
b = e;
while (*e && (TCh::IsAlNum(*e) || ((*e=='\'' || *e=='-') && TCh::IsAlNum(*(e+1))))) { e++; }
if (b < e) {
tmp = *e; *e=0;
OutChA += b; OutChA.AddCh(' ');
*e = tmp;
}
while (*e && ! TCh::IsAlNum(*e)) { e++; }
if (! *e) { break; }
}
OutChA.DelLastCh(); OutChA.ToLc();
return OutChA;
}
// space seprated sequence of words (includes all non-blank characters, i.e., punctuations)
TChA TStrUtil::GetCleanStr(const TChA& ChA) {
char *b = (char *) ChA.CStr();
while (*b && ! TCh::IsAlNum(*b)) { b++; }
if (*b == 0) { return TChA(); }
TChA OutChA(ChA.Len());
char *e = b;
bool ws=false;
while (*e) {
while (*e && TCh::IsWs(*e)) { e++; ws=true; }
if (! *e) { break; }
if (ws) { OutChA.AddCh(' '); ws=false; }
OutChA.AddCh(*e);
e++;
}
//OutChA.ToLc();
return OutChA;
}
int TStrUtil::CountWords(const TChA& ChA) {
return CountWords(ChA.CStr());
}
int TStrUtil::CountWords(const char* CStr) {
int WrdCnt = 1;
for (const char *c = CStr; *c; c++) {
if (TCh::IsWs(*c)) { WrdCnt++; }
}
return WrdCnt;
}
int TStrUtil::CountWords(const TChA& ChA, const TStrHash& StopWordH) {
TChA Tmp;
TVec WrdV;
SplitWords(Tmp, WrdV);
int SWordCnt = 0;
for (int w = 0; w < WrdV.Len(); w++) {
if (StopWordH.IsKey(WrdV[w])) { SWordCnt++; }
}
return WrdV.Len() - SWordCnt;
}
int TStrUtil::SplitWords(TChA& ChA, TVec& WrdV, const bool& SplitOnWs) {
WrdV.Clr(false);
WrdV.Add(ChA.CStr());
for (char *c = (char *) ChA.CStr(); *c; c++) {
if ((SplitOnWs && *c == ' ') || (! SplitOnWs && ! TCh::IsAlNum(*c))) {
*c = 0;
if (! WrdV.Empty() && strlen(WrdV.Last()) == 0) { WrdV.DelLast(); }
WrdV.Add(c+1);
}
}
return WrdV.Len();
}
int TStrUtil::SplitOnCh(TChA& ChA, TVec& WrdV, const char& Ch, const bool& SkipEmpty) {
WrdV.Clr(false);
WrdV.Add(ChA.CStr());
for (char *c = (char *) ChA.CStr(); *c; c++) {
if (*c == Ch) {
*c = 0;
if (SkipEmpty && ! WrdV.Empty() && strlen(WrdV.Last()) == 0) { WrdV.DelLast(); }
WrdV.Add(c+1);
}
}
if (SkipEmpty && ! WrdV.Empty() && strlen(WrdV.Last()) == 0) { WrdV.DelLast(); }
return WrdV.Len();
}
int TStrUtil::SplitLines(TChA& ChA, TVec& LineV, const bool& SkipEmpty) {
LineV.Clr(false);
LineV.Add(ChA.CStr());
bool IsChs=false;
for (char *c = (char *) ChA.CStr(); *c; c++) {
if (*c == '\n') {
if (c > ChA.CStr() && *(c-1)=='\r') { *(c-1)=0; } // \r\n
*c=0;
if (SkipEmpty) {
if (IsChs) { LineV.Add(c+1); }
} else {
LineV.Add(c+1);
}
IsChs=false;
} else {
IsChs=true;
}
}
return LineV.Len();
}
int TStrUtil::SplitSentences(TChA& ChA, TVec& SentenceV) {
SentenceV.Clr();
const char *B = ChA.CStr();
const char *E = B+ChA.Len();
char *c = (char *) B;
while (*c && TCh::IsWs(*c)) { c++; }
if (*c) { SentenceV.Add(c); } else { return 0; }
for (; c < E; c++) {
if (c blah"
if (c>=E) { continue; }
*c=0; c++;
char *e = c-1;
while (e>B && *e!='"' && ! TCh::IsAlNum(*e)) { *e=0; e--; } // skip trailing non-alpha-num chars
while (c
//PageHtmlStr = " jure";
/*if (UseFullHtml) {
StrB = PageHtmlStr.CStr();
StrE = StrB+PageHtmlStr.Len();
char * NewB = strstr(StrB, "");
if (NewB != NULL) { StrB = NewB+6; }
char * NewE = strstr(StrB, "body>");
if (NewE != NULL) {
while (true) {
char *E=strstr(NewE+4, "body>");
if (E == NULL) { break; } NewE = E; }
StrE = NewE;
}
} else { // only extracted post html*/
StrB = (char *) HtmlStr.CStr();
StrE = (char *) StrB+HtmlStr.Len(); //}
for (char *e = StrB; e < StrE; ) {
char* b = e;
while (e= StrE) { return; }
// if start of a comment: skip
if (e[1]=='!' && e[2]=='-' && e[3]=='-') { // comment
e += 3;
while(e')) { e++; }
e++; continue;
}
// if "