///////////////////////////////////////////////// // Community detection algorithms namespace TSnap { // GIRVAN-NEWMAN algorithm // 1. The betweenness of all existing edges in the network is calculated first. // 2. The edge with the highest betweenness is removed. // 3. The betweenness of all edges affected by the removal is recalculated. // 4. Steps 2 and 3 are repeated until no edges remain. // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821–7826 (2002) // Keep removing edges from Graph until one of the connected components of Graph splits into two. void CmtyGirvanNewmanStep(PUNGraph& Graph, TIntV& Cmty1, TIntV& Cmty2) { TIntPrFltH BtwEH; TBreathFS BFS(Graph); Cmty1.Clr(false); Cmty2.Clr(false); while (true) { TSnap::GetBetweennessCentr(Graph, BtwEH); BtwEH.SortByDat(false); if (BtwEH.Empty()) { return; } const int NId1 = BtwEH.GetKey(0).Val1; const int NId2 = BtwEH.GetKey(0).Val2; Graph->DelEdge(NId1, NId2); BFS.DoBfs(NId1, true, false, NId2, TInt::Mx); if (BFS.GetHops(NId1, NId2) == -1) { // two components TSnap::GetNodeWcc(Graph, NId1, Cmty1); TSnap::GetNodeWcc(Graph, NId2, Cmty2); return; } } } // Connected components of a graph define clusters // OutDegH and OrigEdges stores node degrees and number of edges in the original graph double _GirvanNewmanGetModularity(const PUNGraph& G, const TIntH& OutDegH, const int& OrigEdges, TCnComV& CnComV) { TSnap::GetWccs(G, CnComV); // get communities double Mod = 0; for (int c = 0; c < CnComV.Len(); c++) { const TIntV& NIdV = CnComV[c](); double EIn=0, EEIn=0; for (int i = 0; i < NIdV.Len(); i++) { TUNGraph::TNodeI NI = G->GetNI(NIdV[i]); EIn += NI.GetOutDeg(); EEIn += OutDegH.GetDat(NIdV[i]); } Mod += (EIn-EEIn*EEIn/(2.0*OrigEdges)); } if (Mod == 0) { return 0; } else { return Mod/(2.0*OrigEdges); } } // Maximum modularity clustering by Girvan-Newman algorithm (slow) // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821–7826 (2002) double CommunityGirvanNewman(PUNGraph& Graph, TCnComV& CmtyV) { TIntH OutDegH; const int NEdges = Graph->GetEdges(); for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { OutDegH.AddDat(NI.GetId(), NI.GetOutDeg()); } double BestQ = -1; // modularity TCnComV CurCmtyV; CmtyV.Clr(); TIntV Cmty1, Cmty2; while (true) { CmtyGirvanNewmanStep(Graph, Cmty1, Cmty2); const double Q = _GirvanNewmanGetModularity(Graph, OutDegH, NEdges, CurCmtyV); //printf("current modularity: %f\n", Q); if (Q > BestQ) { BestQ = Q; CmtyV.Swap(CurCmtyV); } if (Cmty1.Len()==0 || Cmty2.Len() == 0) { break; } } return BestQ; } // At every step two communities that contribute maximum positive value to global modularity are merged. // see: Finding community structure in very large networks, A. Clauset, M.E.J. Newman, C. Moore, 2004 class TCNMQMatrix { private: struct TCmtyDat { double DegFrac; TIntFltH NIdQH; int MxQId; TCmtyDat() : MxQId(-1) { } TCmtyDat(const double& NodeDegFrac, const int& OutDeg) : DegFrac(NodeDegFrac), NIdQH(OutDeg), MxQId(-1) { } void AddQ(const int& NId, const double& Q) { NIdQH.AddDat(NId, Q); if (MxQId==-1 || NIdQH[MxQId] CmtyQH; TVec MxQHeap; TUnionFind CmtyIdUF; double Q; public: TCNMQMatrix(const PUNGraph& Graph) : CmtyQH(Graph->GetNodes()), MxQHeap(Graph->GetNodes(), 0), CmtyIdUF(Graph->GetNodes()) { Init(Graph); } void Init(const PUNGraph& Graph) { const double M = 0.5/Graph->GetEdges(); // 1/2m Q = 0.0; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { CmtyIdUF.Add(NI.GetId()); const int OutDeg = NI.GetOutDeg(); if (OutDeg == 0) { continue; } TCmtyDat& Dat = CmtyQH.AddDat(NI.GetId(), TCmtyDat(M * OutDeg, OutDeg)); for (int e = 0; e < NI.GetOutDeg(); e++) { const int DstNId = NI.GetOutNId(e); const double DstMod = 2 * M * (1.0 - OutDeg * Graph->GetNI(DstNId).GetOutDeg() * M); Dat.AddQ(DstNId, DstMod); } Q += -1.0*TMath::Sqr(OutDeg*M); if (NI.GetId() < Dat.GetMxQNId()) { MxQHeap.Add(TFltIntIntTr(Dat.GetMxQ(), NI.GetId(), Dat.GetMxQNId())); } } MxQHeap.MakeHeap(); } TFltIntIntTr FindMxQEdge() { while (true) { if (MxQHeap.Empty()) { break; } const TFltIntIntTr TopQ = MxQHeap.PopHeap(); if (! CmtyQH.IsKey(TopQ.Val2) || ! CmtyQH.IsKey(TopQ.Val3)) { continue; } if (TopQ.Val1!=CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1!=CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; } return TopQ; } return TFltIntIntTr(-1, -1, -1); } bool MergeBestQ() { const TFltIntIntTr TopQ = FindMxQEdge(); if (TopQ.Val1 <= 0.0) { return false; } // joint communities const int I = TopQ.Val3; const int J = TopQ.Val2; CmtyIdUF.Union(I, J); // join Q += TopQ.Val1; TCmtyDat& DatJ = CmtyQH.GetDat(J); { TCmtyDat& DatI = CmtyQH.GetDat(I); DatI.DelLink(J); DatJ.DelLink(I); for (int i = -1; DatJ.NIdQH.FNextKeyId(i); ) { const int K = DatJ.NIdQH.GetKey(i); TCmtyDat& DatK = CmtyQH.GetDat(K); double NewQ = DatJ.NIdQH[i]; if (DatI.NIdQH.IsKey(K)) { NewQ = NewQ+DatI.NIdQH.GetDat(K); DatK.DelLink(I); } // K connected to I and J else { NewQ = NewQ-2*DatI.DegFrac*DatK.DegFrac; } // K connected to J not I DatJ.AddQ(K, NewQ); DatK.AddQ(J, NewQ); MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K))); } for (int i = -1; DatI.NIdQH.FNextKeyId(i); ) { const int K = DatI.NIdQH.GetKey(i); if (! DatJ.NIdQH.IsKey(K)) { // K connected to I not J TCmtyDat& DatK = CmtyQH.GetDat(K); const double NewQ = DatI.NIdQH[i]-2*DatJ.DegFrac*DatK.DegFrac; DatJ.AddQ(K, NewQ); DatK.DelLink(I); DatK.AddQ(J, NewQ); MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K))); } } DatJ.DegFrac += DatI.DegFrac; } if (DatJ.NIdQH.Empty()) { CmtyQH.DelKey(J); } // isolated community (done) CmtyQH.DelKey(I); return true; } static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) { TCNMQMatrix QMatrix(Graph); // maximize modularity while (QMatrix.MergeBestQ()) { } // reconstruct communities THash IdCmtyH; for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId()); } CmtyV.Gen(IdCmtyH.Len()); for (int j = 0; j < IdCmtyH.Len(); j++) { CmtyV[j].NIdV.Swap(IdCmtyH[j]); } return QMatrix.Q; } }; double CommunityCNM(const PUNGraph& Graph, TCnComV& CmtyV) { return TCNMQMatrix::CmtyCMN(Graph, CmtyV); } }; //namespace TSnap