Created
June 26, 2018 07:40
-
-
Save pfmiles/3bc3a66793a5ea5bb0ebbe4a4b6699d8 to your computer and use it in GitHub Desktop.
可配置的随机字符串产生器,可满足任意大窗口区间内生成值绝不重复的要求
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package test; | |
import java.util.ArrayList; | |
import java.util.Iterator; | |
import java.util.LinkedHashSet; | |
import java.util.List; | |
import java.util.Random; | |
import java.util.Set; | |
/** | |
* 可自定义的随机字符串产生器; | |
* 通过指定组成最终随机值的各“段(segment)”的定义,来自动产生出"uniqness()"个数窗口内循环不重复的随机值; | |
* 若从randSeq被创建、执行后产生的第一个值算起,往后数uniqness()个值,能保证100%不重复; | |
* 若从randSeq被创建、执行后产生的第任意一个值算起,往前、或往后数uniqness()个值,能保证99.7%的概率不重复; | |
* 可通过控制一些参数来控制最终产生出的随机值看起来的“随机程度” | |
* 注意:各segment最好不要有重叠值,否则破坏结果的唯一性 | |
* | |
* 目前该工具主要用于数据安全脱敏“假名替换”场景 | |
* | |
* @author pf-miles Feb 28, 2018 4:27:22 PM | |
*/ | |
public class RandSeq implements Iterator<String> { | |
/* | |
* 决定了产生的值序列看上去的“随机程度”,也因为该值决定了内存会缓存多少待输出数据,因此该值也直接影响运行时内存占用量 | |
* 运行时内存占用量约为:randness的segRanges.length次方个待输出String片段 | |
*/ | |
private int randness; | |
private SegRange[] segRanges; | |
private transient RuntimeSeq rs = null; | |
private Random rand = new Random(); | |
/** | |
* 取下一个随机值,该随机值保证与前后"uniqness()"个连续的随机取值不重复的概率大于99.7% | |
* @return 下一个随机值 | |
*/ | |
@Override | |
public synchronized String next() { | |
// 1.若还没有运行时状态或状态已用完,则先准备运行时状态 | |
if (rs == null || !rs.hasNext()) { | |
rs = new RuntimeSeq(); | |
} | |
try { | |
// 2.直接取下一个随机值 | |
return rs.next(); | |
} catch (RuntimeSeqRefreshException e) { | |
// 重建runtimeSeq,全新轮回 | |
rs = new RuntimeSeq(); | |
return rs.next(); | |
} | |
} | |
private static final class RuntimeSeqRefreshException extends RuntimeException { | |
private static final long serialVersionUID = 179475654213413913L; | |
@Override | |
public synchronized Throwable fillInStackTrace() { | |
return this; | |
} | |
} | |
// 运行时seq,封装了randness随机逻辑 | |
private class RuntimeSeq implements Iterator<String> { | |
private List<CloneableIterator<String>> loopIters;// 各层候选值模板, 包括顶层候选值 | |
private SegNode root; // 当前动态输出值, root的cur值是null, 第二层值才是顶层值 | |
public RuntimeSeq() { | |
loopIters = new ArrayList<CloneableIterator<String>>(segRanges.length); | |
for (SegRange sr : segRanges) | |
loopIters.add(sr.iterator()); | |
} | |
@Override | |
public boolean hasNext() { | |
return loopIters.get(0).hasNext() || (root != null && !root.isFinished()); | |
} | |
@Override | |
public String next() { | |
// 1.如果root未初始化,则初始化 | |
if (root == null) | |
root = buildRandnessTree(null, 0); | |
// 2.递归下降取随机段,组合成最终值 | |
List<String> retList = new ArrayList<String>(); | |
try { | |
constructOne(retList, root, 0); | |
} catch (ChildDelException e) { | |
// root代表的整颗树已被耗尽,重新生成 | |
if (this.hasNext()) { | |
root = buildRandnessTree(null, 0); | |
try { | |
constructOne(retList, root, 0); | |
} catch (ChildDelException ex) { | |
throw new RuntimeException("Impossible.");// 这不可能发生,除非程序写错 | |
} | |
} else { | |
// 整个runtimeSeq需要被重建, 新的轮回开始 | |
throw new RuntimeSeqRefreshException(); | |
} | |
} | |
StringBuilder ret = new StringBuilder(); | |
for (String s : retList) | |
ret.append(s); | |
return ret.toString(); | |
} | |
/* | |
* 在sb中构造一条完整的随机值 | |
* | |
* @param retList 构造好的结果列表 | |
* @param thisNode 当前层级node节点 | |
* @param curLevel 当前层级,0级为root节点,1级才是顶层值 | |
* | |
* @throws ChildDelException 当children动态节点输出耗尽时抛出,表示应该删除当前children动态输出节点 | |
*/ | |
private void constructOne(List<String> retList, SegNode thisNode, | |
int curLevel) throws ChildDelException { | |
// 1.加入本层值, 若不是root层的话(root层本层值为null) | |
if (curLevel != 0) | |
retList.add(thisNode.cur); | |
// 2.若chilren耗尽则抛出childDelException,若还没耗尽,则向下(children)递归 | |
while (true) {// :child | |
if (thisNode.children != null && !thisNode.children.isEmpty()) { | |
// 2.1.动态输出节点children还有,直接取 | |
if (_fetchNextChild(retList, thisNode, curLevel)) | |
continue; | |
return; | |
} else if (thisNode.childIter != null && thisNode.childIter.hasNext()) { | |
// 2.2.动态输出节点children没有,从候选值iter补充后取 | |
List<SegNode> children = new ArrayList<SegNode>(); | |
for (String val : drawVals(thisNode.childIter)) | |
children.add(buildRandnessTree(val, curLevel + 1)); | |
thisNode.children.addAll(children); | |
// 然后从children里取 | |
if (_fetchNextChild(retList, thisNode, curLevel)) | |
continue; | |
return; | |
} else { | |
// 2.3.children和iter都没有(可能是叶子节点,也可能是分支节点children耗尽),通知上层删除本节点 | |
// 是否由于碰到叶子节点而抛出本exception | |
if (curLevel == loopIters.size()) { | |
// 遇到叶子节点,是一次成功的执行 | |
throw new ChildDelException(true); | |
} else { | |
// 由于分支节点耗尽而抛出本exception,需要将已经加入的本层值吐出来 | |
if (curLevel != 0) | |
retList.remove(retList.size() - 1); | |
throw new ChildDelException(false); | |
} | |
} | |
} | |
} | |
// 取下一个children输出节点,返回true/false指示是否需要continue | |
private boolean _fetchNextChild(List<String> retList, SegNode thisNode, int curLevel) { | |
int randIdx = (int) (rand.nextDouble() * thisNode.children.size()); | |
try { | |
constructOne(retList, thisNode.children.get(randIdx), curLevel + 1); | |
return false; | |
} catch (ChildDelException e) { | |
thisNode.children.remove(randIdx); | |
if (e.leaf) { | |
// 碰到叶子节点,说明成功构造了retList,返回 | |
return false; | |
} else { | |
// 分支节点耗尽,继续循环子节点 | |
return true;// 指示外层循环continue, goto :child | |
} | |
} | |
} | |
private SegNode buildRandnessTree(String cur, int childIterIdx) { | |
SegNode ret = new SegNode(); | |
ret.cur = cur; | |
// build children iter & children | |
if (childIterIdx < loopIters.size()) { | |
ret.childIter = childIterIdx == 0 ? loopIters.get(childIterIdx) // 顶层iter用本体,其它层用clone | |
: loopIters.get(childIterIdx).clone(); | |
List<SegNode> children = new ArrayList<SegNode>(); | |
for (String val : drawVals(ret.childIter)) | |
children.add(buildRandnessTree(val, childIterIdx + 1)); | |
ret.children = children; | |
} | |
return ret; | |
} | |
// 从iter中连续获取randness个元素,以list形式返回,如果iter中元素不够,则返回的元素个数可以少于randness个;若iter中完全没有任何元素,则整体返回null | |
private List<String> drawVals(Iterator<String> iter) { | |
if (iter == null || !iter.hasNext()) | |
return null; | |
List<String> ret = new ArrayList<String>(); | |
for (int i = 0; i < randness && iter.hasNext(); i++) | |
ret.add(iter.next()); | |
return ret; | |
} | |
} | |
// 指示上层节点应当删除该下层节点的纯控制流类型exception | |
private static final class ChildDelException extends Exception { | |
private static final long serialVersionUID = -1698558066692055092L; | |
private boolean leaf; // 指示是否由于碰到叶子节点而导致抛出本exception而不是分支节点children耗尽 | |
public ChildDelException(boolean leaf) { | |
this.leaf = leaf; | |
} | |
@Override | |
public synchronized Throwable fillInStackTrace() { | |
return this; | |
} | |
} | |
// 构造树状随机结构的节点 | |
private static class SegNode { | |
private String cur; // 当前节点值,rootNode的该值为null,rootNode的children才是顶层值 | |
private List<SegNode> children; // 下一层已选动态输出节点 | |
private Iterator<String> childIter;// 下一层可选值iter | |
public boolean isFinished() { | |
return children.isEmpty(); | |
} | |
} | |
public RandSeq(int randness, SegRange... segRanges) { | |
if (randness < 1 || segRanges == null || segRanges.length == 0) | |
throw new RuntimeException("Illegal constructor arguments!"); | |
this.randness = randness; | |
this.segRanges = segRanges; | |
} | |
/** | |
* 返回本randSeq产生的随机序列的“唯一性程度”,即该seq能保证连续产生多少个随机值不重复(或大概率不重复,>99.7%) | |
* @return uniqness | |
*/ | |
public long uniqness() { | |
long ret = 0l; | |
for (SegRange sr : this.segRanges) | |
if (ret == 0l) | |
ret = sr.size(); | |
else | |
ret *= sr.size(); | |
return ret; | |
} | |
public interface CloneableIterator<T> extends Iterator<T>, Cloneable { | |
CloneableIterator<T> clone(); | |
} | |
/** | |
* 段定义 | |
* | |
* @author pf-miles Feb 28, 2018 4:58:14 PM | |
*/ | |
public static interface SegRange extends Iterable<String> { | |
/** | |
* 获取本range所含段值个数 | |
* @return | |
*/ | |
long size(); | |
@Override | |
CloneableIterator<String> iterator(); | |
} | |
/** | |
* 固定枚举可选值列表组成的随机段 | |
* | |
* @author pf-miles Feb 28, 2018 5:41:19 PM | |
*/ | |
public static class EnumRange implements SegRange { | |
private Set<String> enums = new LinkedHashSet<String>(); | |
public EnumRange(String... enums) { | |
if (enums == null || enums.length == 0) | |
throw new RuntimeException("Illegal constructor arguments."); | |
for (String e : enums) | |
this.enums.add(e); | |
} | |
private final class EnumRangeIter implements CloneableIterator<String> { | |
private Set<String> enums; | |
private Iterator<String> iter; | |
public EnumRangeIter(Set<String> enums) { | |
this.enums = enums; | |
this.iter = this.enums.iterator(); | |
} | |
@Override | |
public boolean hasNext() { | |
return this.iter.hasNext(); | |
} | |
@Override | |
public String next() { | |
return this.iter.next(); | |
} | |
@Override | |
public CloneableIterator<String> clone() { | |
return new EnumRangeIter(this.enums); | |
} | |
} | |
@Override | |
public CloneableIterator<String> iterator() { | |
return new EnumRangeIter(enums); | |
} | |
@Override | |
public long size() { | |
return this.enums.size(); | |
} | |
} | |
/** | |
* 连续数字组成的随机段 | |
* | |
* @author pf-miles Feb 28, 2018 5:41:56 PM | |
*/ | |
public static class IntRange implements SegRange { | |
private long start; | |
private long end; | |
/** | |
* 范围起止,从start(inclusive)到end(inclusive)的所有数字作为范围 | |
* @param start inclusive | |
* @param end inclusive | |
*/ | |
public IntRange(long start, long end) { | |
if (!(start < end)) | |
throw new RuntimeException("Illegal constructor arguments."); | |
this.start = start; | |
this.end = end; | |
} | |
private final class IntRangeIter implements CloneableIterator<String> { | |
private long cur; | |
private long end; | |
@Override | |
public boolean hasNext() { | |
return cur <= end; | |
} | |
@Override | |
public String next() { | |
String ret = String.valueOf(cur); | |
cur++; | |
return ret; | |
} | |
@Override | |
public CloneableIterator<String> clone() { | |
IntRangeIter ret = new IntRangeIter(); | |
ret.cur = this.cur; | |
ret.end = this.end; | |
return ret; | |
} | |
} | |
@Override | |
public CloneableIterator<String> iterator() { | |
IntRangeIter ret = new IntRangeIter(); | |
ret.cur = start; | |
ret.end = IntRange.this.end; | |
return ret; | |
} | |
@Override | |
public long size() { | |
return end - start + 1; | |
} | |
} | |
@Override | |
public boolean hasNext() { | |
return true;// 循环往复,永远有下一个值 | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package test; | |
import java.util.ArrayList; | |
import java.util.HashSet; | |
import java.util.List; | |
import org.junit.Assert; | |
import org.junit.Test; | |
import test.RandSeq.EnumRange; | |
import test.RandSeq.IntRange; | |
/** | |
* @author pf-miles Mar 1, 2018 9:05:36 PM | |
*/ | |
public class RandSeqTest { | |
@Test | |
public void test() { | |
// randness等于range | |
System.out.println("randness equals to range length: "); | |
RandSeq seq = new RandSeq(2, new EnumRange("apple", "pear"), new IntRange(1, 3)); | |
Assert.assertTrue(seq.uniqness() == 6l); | |
for (int i = 0; i < seq.uniqness() + 1; i++) | |
System.out.println(seq.next()); | |
} | |
// 测试只有1个候选值的情况 | |
@Test | |
public void testOneCandidate() { | |
RandSeq seq = new RandSeq(1, new EnumRange("test")); | |
Assert.assertTrue(seq.uniqness() == 1l); | |
for (int i = 0; i < seq.uniqness() + 1; i++) | |
System.out.println(seq.next()); | |
} | |
// 测试randness值各种边界情况 | |
@Test | |
public void test32() { | |
// randness小于range | |
System.out.println("randness less than range length: "); | |
RandSeq seq = new RandSeq(1, new EnumRange("test1", "test2"), new IntRange(1, 2), | |
new IntRange(3, 4)); | |
Assert.assertTrue(seq.uniqness() == 8l); | |
for (int i = 0; i < seq.uniqness() + 1; i++) | |
System.out.println(seq.next()); | |
// randness大于range | |
System.out.println("randness bigger than range length: "); | |
seq = new RandSeq(3, new EnumRange("test1", "test2"), new IntRange(1, 2), | |
new IntRange(3, 4)); | |
Assert.assertTrue(seq.uniqness() == 8l); | |
for (int i = 0; i < seq.uniqness() + 1; i++) | |
System.out.println(seq.next()); | |
} | |
// 测试产生序列严格在任意位置开始、往后数uniqness个值里有至少99.7%的值不重复 | |
@Test | |
public void testUniqness() { | |
RandSeq seq = new RandSeq(20, new IntRange(1, 100), new IntRange(101, 200), | |
new IntRange(201, 300)); | |
Assert.assertTrue(seq.uniqness() == 1000000l); | |
List<String> vals = new ArrayList<String>(1000000); | |
// 从第50多w个值开始 | |
for (int i = 0; i < 500000; i++) | |
seq.next(); | |
// 累积uniqness个值 | |
for (int i = 0; i < seq.uniqness(); i++) | |
vals.add(seq.next()); | |
System.out.println(vals.size()); | |
System.out.println(new HashSet<String>(vals).size()); | |
Assert.assertTrue((new HashSet<String>(vals).size() / (double) seq.uniqness()) > 0.997); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment