Last active
December 16, 2015 16:49
-
-
Save ellispritchard/5466339 to your computer and use it in GitHub Desktop.
MarkLogic XQuery UUID version 1 implementation: generates version 1 UUIDs, but using xdmp:host() instead of MAC address (as MAC address of node is not accessible to XQuery). Posted here for comment, inspiration etc.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version '1.0-ml'; | |
module namespace uuid = "http://mobi1.co.uk/lib/uuid/v1"; | |
declare private variable $g_node as xs:unsignedLong := xdmp:host(); | |
declare private variable $g_uuidVersionBit as xs:unsignedLong := 1; | |
declare private variable $g_uuidReservedBit as xs:unsignedLong := 128; | |
declare private variable $g_gregorianTimeOffset as xs:unsignedLong := xdmp:hex-to-integer('1B21DD213814000'); | |
(: | |
Calculates a type 1 UUID. | |
http://www.ietf.org/rfc/rfc4122.txt specifies the layout of a UUID is as follows: | |
0 1 2 3 | |
0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
| time_low | | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
| time_mid | time_hi |version| | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
|clk_seq_hi |res| clk_seq_low | node (0-1) | | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
| node (2-5) | | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f | |
This implements version 1 of the UUID specification. | |
The timestamp is a 60-bit value, 100-nanoseconds since the UUID epoch (Oct 15, 1582). | |
The clock sequence is a 14 bit value. | |
The node is a 48-bit value, based on MarkLogic's xdmp:host(), rather than the MAC address. | |
Clean-room XQuery Implementation based on method in: http://www.itu.int/rec/T-REC-X.667-200409-S/en | |
[email protected] | |
:) | |
declare function uuid:uuid() as xs:string | |
{ | |
let $time := uuid:time() | |
let $clockSeq := uuid:clock-seq($time) | |
let $hTime64 := uuid:pad0(xdmp:integer-to-hex($time + $g_gregorianTimeOffset),16) (: NB 64 bit (use 60) :) | |
let $hClockSeq16 := uuid:pad0(xdmp:integer-to-hex($clockSeq),4) (: NB 16 bit (use 14) :) | |
let $hNode48 := fn:substring(uuid:pad0(xdmp:integer-to-hex($g_node),16),5,12) (: 48 bit :) | |
let $time_low := fn:substring($hTime64, 9, 8) (: 32 bits :) | |
let $time_mid := fn:substring($hTime64, 5, 4) (: 16 bits :) | |
let $time_hi := fn:substring($hTime64, 2, 3) (: 12 bits :) | |
let $version_and_time_hi := fn:concat(xs:string($g_uuidVersionBit),$time_hi) (: 16 bits :) | |
let $clockseq_high_and_res := xdmp:integer-to-hex(xdmp:hex-to-integer(fn:substring($hClockSeq16,1,2)) mod 64 + $g_uuidReservedBit) (: 2 bits res and 6 bits (13 to 8) of clockSeq + :) | |
let $clockseq_low := fn:substring($hClockSeq16,3,2) (: 8 bits :) | |
let $node_low := fn:substring($hNode48,9,4) | |
let $node_high := fn:substring($hNode48,1,8) | |
return fn:concat($time_low,'-',$time_mid,'-',$version_and_time_hi,'-',$clockseq_high_and_res,$clockseq_low,'-',$node_low,$node_high) | |
}; | |
(: | |
gets 64-bit system time (100ns resolution) | |
adjusts for 'same-tick' collision (1). | |
NB we eval() fn:current-dateTime() since always returns same value in same transation!! | |
:) | |
declare private variable $g_lastTimestamp := 0; | |
declare private variable $g_uuidsThisTick := 0; | |
declare private function uuid:time() as xs:unsignedLong { | |
(: get current time in a different transaction, manually write-locking a host-specific URI to absolutely ensure a unique result (per host)! :) | |
let $rawTimestamp := xdmp:wallclock-to-timestamp(xdmp:eval("xdmp:lock-for-update(fn:concat('/uuid/',xdmp:host())),fn:current-dateTime()")) | |
return if($rawTimestamp eq $g_lastTimestamp) then ( | |
(: (1) unlikely, but could theoretically happen on very fast CPU (repeated UUID generation typically takes ~0.2ms per UUID on 2011 era 2GHz i7 :) | |
xdmp:set($g_uuidsThisTick,$g_uuidsThisTick + 1), | |
$g_lastTimestamp + $g_uuidsThisTick | |
) else ( | |
xdmp:set($g_uuidsThisTick,0), | |
xdmp:set($g_lastTimestamp,$rawTimestamp), | |
$rawTimestamp | |
) | |
}; | |
declare private function uuid:pad0($value as xs:string, $len as xs:int) as xs:string { | |
let $diff := $len - fn:string-length($value) | |
let $pad := for $x in (1 to $diff) return '0' | |
return fn:string-join(($pad,$value),'') | |
}; | |
(: calls uuid:update-clock-seq in a different transaction to avoid holding write-lock (and thus blocking UUID generation) during rest of possibly lengthy transaction :) | |
declare private variable $g_clock-seq-cache := (); | |
declare private variable $g_clock-seq-update-period := 10 * 10 * 1000 * 1000; (: 10 seconds :) | |
declare private function uuid:clock-seq($time as xs:unsignedLong) as xs:integer { | |
let $lastUTC := xs:unsignedLong($g_clock-seq-cache/LastUTC) | |
let $clock-seq := | |
if(fn:empty($lastUTC) or ($lastUTC gt $time) or ($time > $lastUTC + $g_clock-seq-update-period)) then ( | |
(: not read config already, or time adjusted backwards, or should persist anyway :) | |
xdmp:eval("xquery version '1.0-ml'; | |
import module namespace uuid = 'http://mobi1.co.uk/lib/uuid/v1' at '/lib/uuid-v1.xqy'; | |
declare variable $time as xs:unsignedLong external; | |
uuid:update-clock-seq($time)", | |
(xs:QName('time'),$time) | |
) | |
) | |
else $g_clock-seq-cache | |
let $_ := xdmp:set($g_clock-seq-cache, $clock-seq) | |
return xs:integer($clock-seq/ClockSeq) | |
}; | |
(: | |
returns host specific configuration document, containing 'clock sequence' field; | |
persistence prevents risk of generating identical UUIDs if clock adjusted backwards. | |
:) | |
declare private variable $g_2_pow_14 := 16384; (: 2^14 - clock seq is limited to 14 bits :) | |
declare function uuid:update-clock-seq($time as xs:unsignedLong) as element() { | |
let $uri := fn:concat('/uuid/',$g_node,'/UUID-v1.xml') | |
let $config := fn:doc($uri)/node() | |
let $clockSeq := | |
if(fn:exists($config)) then ( | |
let $lastUTC := xs:unsignedLong($config/LastUTC) | |
let $clockSeqRaw := xs:integer($config/ClockSeq) | |
return if($lastUTC gt $time) then ( | |
(: time adjusted backwards, ensure UUIDs stay unique :) | |
xdmp:log(fn:concat('UUID: Detected backwards clock adjustment; was ',$lastUTC,' now ',$time)), | |
(($clockSeqRaw + 1) mod $g_2_pow_14) | |
) else ( | |
$clockSeqRaw | |
) | |
) else ( | |
(: generate new clock-seq :) | |
xdmp:random(xs:integer($g_2_pow_14 - 1)) | |
) | |
let $newconfig := | |
element UUIDConfig { | |
element LastUTC { $time }, | |
element ClockSeq { $clockSeq } | |
} | |
return ( | |
xdmp:document-insert($uri,$newconfig), | |
$newconfig | |
) | |
}; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment