Skip to content

Instantly share code, notes, and snippets.

@danlentz
Created February 18, 2014 05:19

Revisions

  1. danlentz created this gist Feb 18, 2014.
    90 changes: 90 additions & 0 deletions mmap.clj
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,90 @@
    ; Copyright (c) Chris Houser, April 2008. All rights reserved.
    ; The use and distribution terms for this software are covered by the
    ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
    ; which can be found in the file epl-v10.html at the root of this distribution.
    ; By using this software in any fashion, you are agreeing to be bound by
    ; the terms of this license.
    ; You must not remove this notice, or any other, from this software.

    ; Functions for memory-mapping files, plus some functions that use a
    ; mmaped file for "normal" activies -- slurp, load-file, etc.

    (ns
    #^{:author "Chris Houser",
    :doc "Functions for memory-mapping files, plus some functions that use a
    mmaped file for \"normal\" activies -- slurp, load-file, etc."}
    clojure.contrib.mmap
    (:refer-clojure :exclude (slurp load-file))
    (:import (java.nio ByteBuffer CharBuffer)
    (java.io PushbackReader InputStream InputStreamReader
    FileInputStream)))

    ;(set! *warn-on-reflection* true)

    (def READ_ONLY #^{:private true}
    (java.nio.channels.FileChannel$MapMode/READ_ONLY))

    (defn mmap
    "Memory-map the file named f. Returns a ByteBuffer."
    [f]
    (let [channel (.getChannel (FileInputStream. f))]
    (.map channel READ_ONLY 0 (.size channel))))

    (defn slurp
    "Reads the file named by f and returns it as a string."
    [#^String f]
    (.. java.nio.charset.Charset (forName "UTF-8")
    (newDecoder) (decode (mmap f))))

    (defn buffer-stream
    "Returns an InputStream for a ByteBuffer, such as returned by mmap."
    [#^ByteBuffer buf]
    (proxy [InputStream] []
    (available [] (.remaining buf))
    (read
    ([] (if (.hasRemaining buf) (.get buf) -1))
    ([dst offset len] (let [actlen (min (.remaining buf) len)]
    (.get buf dst offset actlen)
    (if (< actlen 1) -1 actlen))))))

    (defn load-file [f]
    "Like clojure.lang/load-file, but uses mmap internally."
    (with-open [rdr (-> f mmap buffer-stream InputStreamReader. PushbackReader.)]
    (load-reader rdr)))


    (comment

    (alias 'mmap 'clojure.contrib.mmap)
    (alias 'core 'clojure.core)

    ;---
    ; zip_filter.clj is 95KB
    (def tf "/home/chouser/build/clojure/src/clj/clojure/core.clj")
    (println "\nload-file" tf)
    (time (dotimes [_ 5] (core/load-file tf))) ; 5420.177813 msecs
    (time (dotimes [_ 5] (mmap/load-file tf))) ; 7946.854434 msecs -- not so good

    ;---
    ; kern.log.0 is 961KB
    (def tf "/var/log/kern.log.0")
    (println "\nslurp" tf)
    (time (dotimes [_ 10] (.length (core/slurp tf)))) ; 435.767226 msecs
    (time (dotimes [_ 10] (.length (mmap/slurp tf)))) ; 93.176858 msecs

    ;---
    ; kern.log.0 is 961KB
    (def tf "/var/log/kern.log.0")
    (println "\nregex slurp large" tf)
    (time (dotimes [_ 10] (count (re-seq #"EXT3.*" (core/slurp tf))))) ; 416
    (time (dotimes [_ 10] (count (re-seq #"EXT3.*" (mmap/slurp tf))))) ; 101

    ;---
    ; mmap.clj is about 3.1KB
    (def tf "/home/chouser/proj/clojure-contrib/src/clojure/contrib/mmap.clj")
    (println "\nregex slurp small" tf)

    (time (dotimes [_ 1000] (count (re-seq #"defn \S*" (core/slurp tf))))) ; 308
    (time (dotimes [_ 1000] (count (re-seq #"defn \S*" (mmap/slurp tf))))) ; 198

    )