Skip to content

Instantly share code, notes, and snippets.

@ellispritchard
Last active December 16, 2015 16:49
Show Gist options
  • Save ellispritchard/5466339 to your computer and use it in GitHub Desktop.
Save ellispritchard/5466339 to your computer and use it in GitHub Desktop.
MarkLogic XQuery UUID version 1 implementation: generates version 1 UUIDs, but using xdmp:host() instead of MAC address (as MAC address of node is not accessible to XQuery). Posted here for comment, inspiration etc.
xquery version '1.0-ml';
module namespace uuid = "http://mobi1.co.uk/lib/uuid/v1";
declare private variable $g_node as xs:unsignedLong := xdmp:host();
declare private variable $g_uuidVersionBit as xs:unsignedLong := 1;
declare private variable $g_uuidReservedBit as xs:unsignedLong := 128;
declare private variable $g_gregorianTimeOffset as xs:unsignedLong := xdmp:hex-to-integer('1B21DD213814000');
(:
Calculates a type 1 UUID.
http://www.ietf.org/rfc/rfc4122.txt specifies the layout of a UUID is as follows:
0 1 2 3
0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| time_low |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| time_mid | time_hi |version|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|clk_seq_hi |res| clk_seq_low | node (0-1) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| node (2-5) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f
This implements version 1 of the UUID specification.
The timestamp is a 60-bit value, 100-nanoseconds since the UUID epoch (Oct 15, 1582).
The clock sequence is a 14 bit value.
The node is a 48-bit value, based on MarkLogic's xdmp:host(), rather than the MAC address.
Clean-room XQuery Implementation based on method in: http://www.itu.int/rec/T-REC-X.667-200409-S/en
[email protected]
:)
declare function uuid:uuid() as xs:string
{
let $time := uuid:time()
let $clockSeq := uuid:clock-seq($time)
let $hTime64 := uuid:pad0(xdmp:integer-to-hex($time + $g_gregorianTimeOffset),16) (: NB 64 bit (use 60) :)
let $hClockSeq16 := uuid:pad0(xdmp:integer-to-hex($clockSeq),4) (: NB 16 bit (use 14) :)
let $hNode48 := fn:substring(uuid:pad0(xdmp:integer-to-hex($g_node),16),5,12) (: 48 bit :)
let $time_low := fn:substring($hTime64, 9, 8) (: 32 bits :)
let $time_mid := fn:substring($hTime64, 5, 4) (: 16 bits :)
let $time_hi := fn:substring($hTime64, 2, 3) (: 12 bits :)
let $version_and_time_hi := fn:concat(xs:string($g_uuidVersionBit),$time_hi) (: 16 bits :)
let $clockseq_high_and_res := xdmp:integer-to-hex(xdmp:hex-to-integer(fn:substring($hClockSeq16,1,2)) mod 64 + $g_uuidReservedBit) (: 2 bits res and 6 bits (13 to 8) of clockSeq + :)
let $clockseq_low := fn:substring($hClockSeq16,3,2) (: 8 bits :)
let $node_low := fn:substring($hNode48,9,4)
let $node_high := fn:substring($hNode48,1,8)
return fn:concat($time_low,'-',$time_mid,'-',$version_and_time_hi,'-',$clockseq_high_and_res,$clockseq_low,'-',$node_low,$node_high)
};
(:
gets 64-bit system time (100ns resolution)
adjusts for 'same-tick' collision (1).
NB we eval() fn:current-dateTime() since always returns same value in same transation!!
:)
declare private variable $g_lastTimestamp := 0;
declare private variable $g_uuidsThisTick := 0;
declare private function uuid:time() as xs:unsignedLong {
(: get current time in a different transaction, manually write-locking a host-specific URI to absolutely ensure a unique result (per host)! :)
let $rawTimestamp := xdmp:wallclock-to-timestamp(xdmp:eval("xdmp:lock-for-update(fn:concat('/uuid/',xdmp:host())),fn:current-dateTime()"))
return if($rawTimestamp eq $g_lastTimestamp) then (
(: (1) unlikely, but could theoretically happen on very fast CPU (repeated UUID generation typically takes ~0.2ms per UUID on 2011 era 2GHz i7 :)
xdmp:set($g_uuidsThisTick,$g_uuidsThisTick + 1),
$g_lastTimestamp + $g_uuidsThisTick
) else (
xdmp:set($g_uuidsThisTick,0),
xdmp:set($g_lastTimestamp,$rawTimestamp),
$rawTimestamp
)
};
declare private function uuid:pad0($value as xs:string, $len as xs:int) as xs:string {
let $diff := $len - fn:string-length($value)
let $pad := for $x in (1 to $diff) return '0'
return fn:string-join(($pad,$value),'')
};
(: calls uuid:update-clock-seq in a different transaction to avoid holding write-lock (and thus blocking UUID generation) during rest of possibly lengthy transaction :)
declare private variable $g_clock-seq-cache := ();
declare private variable $g_clock-seq-update-period := 10 * 10 * 1000 * 1000; (: 10 seconds :)
declare private function uuid:clock-seq($time as xs:unsignedLong) as xs:integer {
let $lastUTC := xs:unsignedLong($g_clock-seq-cache/LastUTC)
let $clock-seq :=
if(fn:empty($lastUTC) or ($lastUTC gt $time) or ($time > $lastUTC + $g_clock-seq-update-period)) then (
(: not read config already, or time adjusted backwards, or should persist anyway :)
xdmp:eval("xquery version '1.0-ml';
import module namespace uuid = 'http://mobi1.co.uk/lib/uuid/v1' at '/lib/uuid-v1.xqy';
declare variable $time as xs:unsignedLong external;
uuid:update-clock-seq($time)",
(xs:QName('time'),$time)
)
)
else $g_clock-seq-cache
let $_ := xdmp:set($g_clock-seq-cache, $clock-seq)
return xs:integer($clock-seq/ClockSeq)
};
(:
returns host specific configuration document, containing 'clock sequence' field;
persistence prevents risk of generating identical UUIDs if clock adjusted backwards.
:)
declare private variable $g_2_pow_14 := 16384; (: 2^14 - clock seq is limited to 14 bits :)
declare function uuid:update-clock-seq($time as xs:unsignedLong) as element() {
let $uri := fn:concat('/uuid/',$g_node,'/UUID-v1.xml')
let $config := fn:doc($uri)/node()
let $clockSeq :=
if(fn:exists($config)) then (
let $lastUTC := xs:unsignedLong($config/LastUTC)
let $clockSeqRaw := xs:integer($config/ClockSeq)
return if($lastUTC gt $time) then (
(: time adjusted backwards, ensure UUIDs stay unique :)
xdmp:log(fn:concat('UUID: Detected backwards clock adjustment; was ',$lastUTC,' now ',$time)),
(($clockSeqRaw + 1) mod $g_2_pow_14)
) else (
$clockSeqRaw
)
) else (
(: generate new clock-seq :)
xdmp:random(xs:integer($g_2_pow_14 - 1))
)
let $newconfig :=
element UUIDConfig {
element LastUTC { $time },
element ClockSeq { $clockSeq }
}
return (
xdmp:document-insert($uri,$newconfig),
$newconfig
)
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment