Created
November 25, 2018 10:03
-
-
Save badbye/bfeddae6a8504c7e04a0da331ec1d6a6 to your computer and use it in GitHub Desktop.
solve OLS in linear regression using map reduce
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# referenece: https://ieeexplore.ieee.org/document/7345473/ | |
# 验证该论文的分布式计算结果 | |
#' 准备数据 | |
X = matrix(rnorm(400), 20, 20) | |
Y = rnorm(20) | |
#' 计算分隔数据的 index | |
splitByRowIndex = function(nrows, size){ | |
block.len = nrows / size | |
res = list() | |
for (i in 1:size){ | |
res[[i]] = c((i-1) * blocksize + 1, i * blocksize) | |
} | |
res | |
} | |
#' 把 `mat 矩阵`按行等分成 `size` 份 | |
splitMatByRow = function(mat, size){ | |
row.index = splitByRowIndex(nrow(mat), size) | |
lapply(row.index, function(val) mat[val[1]:val[2], ]) | |
} | |
#' Algorithm 1 Main function | |
block.number = 4 # 数据分成 4 份 | |
x.block = splitMatByRow(X, block.number) | |
y.block = splitMatByRow(matrix(Y, ncol = 1), block.number) | |
# Algorithm 2 Mapper function in step1 | |
x.block.qr = lapply(x.block, function(val){ | |
qrresult = qr(val) | |
list(Q=qr.Q(qrresult), R=qr.R(qrresult)) | |
}) | |
# Algorithm 3 Reducer function in step1 | |
Rtemp = do.call(rbind, lapply(x.block.qr, function (l) l$R)) | |
qrresult = qr(Rtemp) | |
Rtemp.qr = list(Q=qr.Q(qrresult), R=qr.R(qrresult)) | |
R.final = Rtemp.qr$R | |
Rtemp.Q.divide = splitMatByRow(Rtemp.qr$Q, block.number) | |
Q.result = list() | |
for (i in 1:block.number){ | |
Q.result[[i]] = t(x.block.qr[[i]]$Q %*% Rtemp.Q.divide[[i]]) # Multiply | |
} | |
# Algorithm 4 Reduce function in step2 | |
V = list() | |
for (i in 1:block.number){ | |
V[[i]] = Q.result[[i]] %*% matrix(y.block[[i]], ncol = 1) # Multiply | |
} | |
# Algorithm 5 Reduce function in step3 | |
V.sum = Reduce('+', V) | |
beta = as.numeric(solve(R.final) %*% V.sum) | |
# real answer | |
ans = lm(Y ~ X - 1) | |
coefficients = ans$coefficients | |
# comare | |
sum((beta - coefficients) ** 2) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment