赛题:金融风控的资金流水分析。
题目内容:通过金融风控的资金流水分析,可有效识别循环转账,辅助公安挖掘洗钱组织,帮助银行预防信用卡诈骗。基于给定的资金流水,检测并输出指定约束条件的所有循环转账,结果准确,用时最短者胜。
题目大意:通过给定的转账数据,建立有向图,求出有向图中所有长度在3~7之间的环。
输入的数据:格式为[output,intput,money]的边表,前面两个都是32位无符号整数(当然题目说明了,小于2^31,所以int32就好),边最多28W条,不重复,结点平均度数小于10。环中同一个ID不可以重复出现(若大环包括小环,则需要分开统计),环的个数不大于300W。转账金额初赛不涉及,复赛才使用到。
输出的环:第一行为环的总个数,从第二行开始,按照环的长度和环的字典序输出所有的环。即先输出长度最短的环,环的字典序不可以有重复。
解题思路:
1、要避免搜索出的路径重复,需要制定一个环的标准型,以题目为例,就是最小的ID在环表示的首位。
2、题目要求最小的结点在每个路径的第一位,故对ID排序后,重映射所有结点序号为[0,n),在搜索时,如果直接对某个结点的出边从小到大搜索,最后得到的答案自然是排好序的。
3、用拓扑排序删去一部分点(将入度为0的结点给删去)。
4、采用领域剪枝,建立图的逆邻接表,反向标记可到达的点,因为最大的环的结点数为7,所以反向标记3或4层都可以。
5、搜索,从每个点从发只搜索以该点为起点和终点的环,对小于该点值得结点不做搜索,搜索达到3层后,就开始判断点是否可以到达该结点,加快搜索速度。
代码:
#include
#include
#include
#include
#include
#include
#include
#include
// 使用mmap 包含的一些头文件
#include
#include
#include
#include
#include
typedef unsigned int uint;
using namespace std;
#define MAXLINE 280000 // 文件包含的最大边数
//#define DEBUG
#ifdef DEBUG
// #define PATH_IN"./data/test_data_10000_280000.txt"
#define PATH_IN"./data/test_data_10000_40000.txt"
// #define PATH_IN"./data/test_data.txt"
#define PATH_OUT"./data/result.txt"
#else
#define PATH_IN"/data/test_data.txt"
#define PATH_OUT"/projects/student/result.txt"
#endif
int circle3[3*500000],circle4[4*500000], circle5[5*1000000], circle6[6*2000000], circle7[7*3000000];
int *circle[]={circle3,circle4, circle5, circle6, circle7};
int Graph[280000][50]; // 邻接表
int GraphInv[280000][255]; //逆邻接表
int *G[280000];
int *GInv[280000];
class ListGraph {
public:
std::vector dataIn; //保存文件的输入数据
std::vector tmp;
std::unordered_map idMap; //进行映射, 将点排序后映射到0~n
std::vector inDegree; //统计每个顶点入度个数
int cnt; // 纪记录总共有多少点
int nodeNum; // 顶点数
// int *reachable; // 记录逆邻接矩阵,能否到达
vector reachable;
bool *visited; // 记录是否访问过
int *loop; // 遍历的时候保存环,
int *loop3, *loop4, *loop5, *loop6, *loop7; // 指针指向一片内存
int cntNum[5]; //记录每种环的数量
int totalCntNum; // 记录总共有多少环
public:
ListGraph(){ //构造函数
loadData();
}
~ListGraph() { // 析构函数
}
void loadData(){
int fd = open(PATH_IN, O_RDONLY);
if (fd == -1)
{
std::cout << "fail to open file"<< std::endl;
exit(0);
}
int len = lseek(fd, 0, SEEK_END);
char *mbuf = (char *)mmap(NULL, len, PROT_READ,MAP_PRIVATE, fd, 0);
uint transferID = 0, receiverID = 0;
dataIn.resize(MAXLINE*2); //提前分配内存,用数组的方式操作
cnt = 0;
char *txtEnd = mbuf + len;
while (mbuf < txtEnd)
{
transferID = 0;
receiverID = 0;
do
{
transferID = transferID * 10 + *mbuf - '0';
} while (*++mbuf != ',');
++mbuf;
do
{
receiverID = receiverID * 10 + *mbuf - '0';
} while (*++mbuf != ',');
while (*++mbuf != '\n');
++mbuf;
dataIn[cnt++] = transferID;
dataIn[cnt++] = receiverID;
}
dataIn.erase(dataIn.begin()+cnt,dataIn.end());
}
void constructGraph() { // 构造图
tmp = dataIn;
// 排序, 去重重复的点
sort(tmp.begin(),tmp.end());
tmp.erase(unique(tmp.begin(),tmp.end()),tmp.end());
nodeNum = tmp.size();
#ifdef DEBUG
printf("%u Nodes in total\n",nodeNum);
#endif
int i = 0;
for(;i
G[i] = Graph[i];
GInv[i] = GraphInv[i];
idMap[tmp[i]]= i; //将点值进行映射
}
// 建立图
inDegree = vector(nodeNum,0); //将每个点的入度置为0
int u,v;
for(i = 0;i
u = idMap[dataIn[i]];
v = idMap[dataIn[i+1]];
if(u > 50000)
break;
*G[u]++ = v;
*GInv[v]++ = u;
++inDegree[v];
}
std::queue que;
int count=0; //统计删除的点数
for(i = 0;i
if(inDegree[i] == 0){
for(int *p = Graph[i];p
if(--inDegree[*p] == 0 && *p < i)
que.push(*p);
}
G[i] = Graph[i];
count++;
}
}
while(!que.empty()){
u = que.front();
que.pop();
for(int *p = Graph[u];p
if(--inDegree[*p] == 0)
que.push(*p);
}
G[u] = Graph[u];
count++;
}
for( i = 0;i < nodeNum;i++){
std::sort(Graph[i],G[i]);
}
#ifdef DEBUG
cout << count << " poits romoved" <
#endif
}
void checkCircle() {
visited = new bool[nodeNum]();
reachable = vector(nodeNum,-1);
loop = new int[7]; //遍历的途中用来保存环
totalCntNum=cntNum[0]=cntNum[1]=cntNum[2]=cntNum[3]=cntNum[4]=cntNum[5]=0;
loop3 = circle3,loop4 = circle4,loop5 = circle5,loop6 =circle6,loop7 = circle7;
for(int i=0;i
#ifdef DEBUG
if(i%1000 == 0)
cout << i << "/" <
#endif
if (Graph[i] < G[i])
{
// 3邻域减枝
for (int *p1=GraphInv[i]; p1
{
if (*p1 < i || visited[*p1])
continue;
visited[*p1] = true;
for (int *p2=GraphInv[*p1]; p2
{
if (*p2 < i || visited[*p2])
continue;
reachable[*p2] = i;
visited[*p2] = true;
for (int *p3=GraphInv[*p2];p3
{
if (*p3 < i || visited[*p3])
continue;
reachable[*p3] = i;
}
visited[*p2] = false;
}
visited[*p1] = false;
}
for (int *p=GraphInv[i]; p
reachable[*p] = -2;
dfs(i);
for (int *p=GraphInv[i]; p
reachable[*p] = i;
}
}
totalCntNum = cntNum[0]+cntNum[1]+cntNum[2]+cntNum[3]+cntNum[4];
#ifdef DEBUG
cout << "find circle : " <
#endif
}
void dfs(int &head) {
int *p1, *p2, *p3, *p4, *p5, *p6;
visited[head] = true;
*loop++ = head;
for (p1=Graph[head]; p1
if (*p1 > head)
break;
for (;p1
{
int &v1 = *p1;
visited[v1] = true;
*loop++ = v1;
for (p2=Graph[v1]; p2
if (*p2 > head)
break;
for (; p2
{
int &v2 = *p2;
if (visited[v2])
continue;
if (reachable[v2] == -2)
{
*loop = v2;
for (int k = -2; k <= 0; k++)
*loop3++= tmp[loop[k]];
cntNum[0]++;
}
visited[v2] = true;
*loop++ = v2;
for (p3=Graph[v2]; p3
if (*p3 > head)
break;
for (; p3
{
int &v3 = *p3;
if (visited[v3])
continue;
if (reachable[v3] == -2)
{
*loop = v3;
for (int k = -3; k <=0; k++)
*loop4++= tmp[loop[k]];
cntNum[1]++;
}
visited[v3] = true;
*loop++ = v3;
for (p4=Graph[v3]; p4
if (*p4 > head)
break;
for (; p4
{
int &v4 = *p4;
if (visited[v4])
continue;
if (reachable[v4] == -2)
{
*loop = v4;
for (int k = -4; k <= 0; k++)
*loop5++ = tmp[loop[k]];
cntNum[2]++;
}
else if (reachable[v4] != head)
continue;
visited[v4] = true;
*loop++ = v4;
for (p5=Graph[v4]; p5
if (*p5 > head)
break;
for (; p5
{
int &v5 = *p5;
if (visited[v5])
continue;
if (reachable[v5] == -2)
{
*loop = v5;
for (int k = -5; k <= 0;k++)
*loop6++ = tmp[loop[k]];
cntNum[3]++;
}
else if (reachable[v5] != head)
continue;
visited[v5] = true;
*loop++ = v5;
for (p6=Graph[v5]; p6
if (*p6 > head)
break;
for(; p6
{
int &v6 = *p6;
if (reachable[v6] == -2&& !visited[v6])
{
*loop = v6;
for (int k = -6; k <=0; k++)
*loop7++= tmp[loop[k]];
cntNum[4]++;
}
}
visited[v5] = false;
--loop;
}
visited[v4] = false;
--loop;
}
visited[v3] = false;
--loop;
}
visited[v2] = false;
--loop;
}
visited[v1] = false;
--loop;
}
visited[head] = false;
--loop;
}
void saveResult() {
int i, j, num = totalCntNum;
off_t fileSize = 0;
uint node;
// 先计算保存文件的大小
do{
++fileSize;
num /= 10;
}while (num != 0);
++fileSize; // '\n'
for (i = 0; i < 5; i++)
{
cntNum[i] *= i+3;
fileSize += cntNum[i]; //逗号和'\n'
for (j = 0; j < cntNum[i]; j++)
{
node = circle[i][j];
do
{
++fileSize;
node/=10;
}while(node != 0);
}
}
//保存文件
int fd = open(PATH_OUT, O_RDWR|O_CREAT, 0666);
int ret = fallocate(fd, 0, 0, fileSize);
if (ret != 0) {
printf("fallocateerr %d\n", ret);
}
char *mbuf = (char *)mmap(NULL, fileSize, PROT_WRITE,MAP_SHARED, fd, 0);
char ch, *p = mbuf, *p2;
num = totalCntNum;
// 写结果个数
do{
*mbuf++ = (num % 10) + '0';
num /= 10;
}while(num != 0);
p2 = mbuf - 1;
while (p < p2) { ch = *p; *p++ = *p2; *p2-- = ch; }
*mbuf++ = '\n';
// 写结果内容
for (i = 0; i < 5; i++)
{
for (j = 0; j
{
uint x = circle[i][j];
p = mbuf;
do
{
*p++ = (x % 10) + '0';
x /= 10;
}while(x != 0);
p2 = p--;
while (mbuf < p) {ch = *p; *p-- = *mbuf;*mbuf++ = ch;}
mbuf = p2;
if (j != 0 && (j+1) % (i+3) == 0)
*mbuf++= '\n';
else
*mbuf++ = ',';
}
}
exit(0);
}
};
int main(){
#ifdef DEBUG
clock_t startTime,endTime;
startTime = clock();
#endif
ListGraph My;
My.constructGraph();
My.checkCircle();
My.saveResult();
#ifdef DEBUG
endTime = clock(); // 计时结束
cout << "The run time is : "
<< (double)(endTime - startTime) / CLOCKS_PER_SEC<< "s" << std::endl;
#endif
return 0;
}
最后系统判分:0.6268
王帅:重庆大学无线通信技术硕士研究生,主研方向为AI算法及其应用。