# 前言
闲置许久,一个完整的项目基本落地做完。近期时间稍浅,得以停下来抬头望月。过去的一段时间,有份收获,有份努力。
简单介绍代码背景:
使用Java代码来计算两组数据的相关性,使用double类型,要求返回一个二维表,表中的格子展示即为所计算出的对应值。属于源自于Clickhouse数据库,大概量级在几十万一组,计算依赖于一下坐标。返回数据需包含X轴信息、Y轴信息及坐标数值信息。
<dependency>
<groupId>net.sf.jsci</groupId>
<artifactId>jsci</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
</dependency>
1
2
3
4
5
6
7
8
9
10
2
3
4
5
6
7
8
9
10
# 第一版实现代码:
利用三个集合分别来维护X轴、Y轴、以及坐标数据。由于相关性计算的特性,可以一次计算在对角上进行赋值处理,映射到相反坐标下的另一组数据。其实本次代码已经能够实现功能并完成需求,但基于开发之初,认为有很多地方值得优化,处理的效率还可以提高
/**
* 相关性计算
*
* @param maps 按列变量数据
* @param calcMethod 计算方法
* @return dto
*/
private AnalysisHotChartDTO calcAnalysisData(Map<String, double[]> maps, Integer calcMethod) throws ParseException {
AnalysisHotChartDTO dto = new AnalysisHotChartDTO();
// 存放坐标数值
Map<String, LocationDTO> dataMap = Maps.newLinkedHashMap();
// x轴
List<String> xNameList = Lists.newArrayList(maps.keySet());
// y轴(保持插入顺序)
Set<String> yNameList = Sets.newLinkedHashSet();
// 坐标数据
List<JSONArray> locationList = Lists.newArrayList();
// 设置格式
DecimalFormat df = new DecimalFormat("0.0000" );
Integer xLocation = 0;
for (String xKey : maps.keySet()) {
Integer yLocation = 0;
for (String yKey : maps.keySet()) {
yNameList.add(yKey);
// 坐标一致时相关性为1,无需计算
if (xKey.equals(yKey)) {
LocationDTO locationDTO = new LocationDTO();
locationDTO.setXLocation(xLocation);
locationDTO.setYLocation(yLocation);
locationDTO.setValue(1);
dataMap.put(xKey + "_" + xKey, locationDTO);
yLocation++;
continue;
}
// 判断相关性是否已经计算,若存在则无需计算
if (!dataMap.containsKey(xKey + "_" + yKey) || !dataMap.containsKey(yKey + "_" + xKey)) {
LocationDTO locationDTO = new LocationDTO();
locationDTO.setXLocation(xLocation);
locationDTO.setYLocation(yLocation);
double correlation = RelevanceAnalysisUtil.correlation(maps.get(xKey), maps.get(yKey), calcMethod);
String format = df.format(correlation);
// 计算一次填入两个坐标数据
locationDTO.setValue(Double.parseDouble(format));
dataMap.put(xKey + "_" + yKey, locationDTO);
locationDTO = new LocationDTO();
locationDTO.setXLocation(yLocation);
locationDTO.setYLocation(xLocation);
locationDTO.setValue(Double.parseDouble(format));
dataMap.put(yKey + "_" + xKey, locationDTO);
}
yLocation++;
}
xLocation++;
}
dto.setYaxis(new ArrayList<>(yNameList));
dto.setXaxis(xNameList);
dataMap.forEach((k, v) -> {
JSONArray array = new JSONArray();
array.add(v.getXLocation());
array.add(v.getYLocation());
array.add(v.getValue());
locationList.add(array);
}
);
dto.setData(locationList);
return dto;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# 第二版实现代码:
因为已经知晓相关性计算坐标图表信息,故可以利用类似于冒泡的思想进行处理,无需借助坐标。并且在第二版的处理中,由于每组的数据量非常大,所以减少了很多对象的创建,同时也减少了对于集合的循环遍历操作,实际计算效率提升50%。
/**
* 计算相关性
*
* @param calcData 计算数据
* @param calcMethod 计算方法
* @return dto
*/
public static AnalysisHotChartDTO calcRelevanceAnalysis(Map<String, double[]> calcData, Integer calcMethod) {
AnalysisHotChartDTO dto = new AnalysisHotChartDTO();
List<JSONArray> locationJsonList = Lists.newArrayList();
// x轴
List<String> xNameList = Lists.newArrayList(calcData.keySet());
// y轴(实际与x轴保持一致)
List<String> yNameList = Lists.newArrayList(calcData.keySet());
// 设置格式
DecimalFormat df = new DecimalFormat("0.0000" );
// 初始化对角线为1
for (int i = 0; i < xNameList.size(); i++) {
JSONArray locationInfo = new JSONArray();
locationInfo.add(xNameList.get(i));
locationInfo.add(xNameList.get(i));
locationInfo.add(1);
locationJsonList.add(locationInfo);
}
// 计算相关性
for (int xIndex = 0; xIndex < xNameList.size(); xIndex++) {
String xKey = xNameList.get(xIndex);
for (int yIndex = xIndex + 1; yIndex < xNameList.size(); yIndex++) {
String yKey = xNameList.get(yIndex);
// 只计算上三角矩阵中的相关性
double correlation;
try {
correlation = RelevanceAnalysisUtil.correlation(calcData.get(xKey), calcData.get(yKey), calcMethod);
} catch (Exception e) {
log.error("calcRelevanceAnalysis exist error,error input var is:{} , {} , errorMessage: {}", xKey, yKey, e.getMessage());
correlation = 0.0;
}
if (Double.isNaN(correlation)) {
correlation = 0.0;
}
String format = df.format(correlation);
JSONArray locationInfo = new JSONArray();
locationInfo.add(xNameList.get(xIndex));
locationInfo.add(xNameList.get(yIndex));
locationInfo.add(Double.parseDouble(format));
locationJsonList.add(locationInfo);
locationInfo = new JSONArray();
locationInfo.add(xNameList.get(yIndex));
locationInfo.add(xNameList.get(xIndex));
locationInfo.add(Double.parseDouble(format));
locationJsonList.add(locationInfo);
}
}
dto.setXaxis(xNameList);
dto.setYaxis(yNameList);
dto.setData(locationJsonList);
return dto;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57