Last active
August 29, 2015 14:05
-
-
Save ian-ross/ed3a21f6238719d4aaeb to your computer and use it in GitHub Desktop.
Haskell data analysis: atmospheric non-diffusive flow data pre-processing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Prelude hiding (length, maximum, minimum, sum, map) | |
import Control.Applicative ((<$>)) | |
import Control.Monad | |
import Data.List (foldl') | |
import qualified Data.Map as M | |
import Foreign.C | |
import qualified Data.Vector.Generic as GV | |
import qualified Data.Vector.Unboxed as VU | |
import Data.Array.Repa hiding ((++)) | |
import Data.Array.Repa.Eval | |
import Data.Array.Repa.Repr.ForeignPtr (F) | |
import Data.Array.Repa.Repr.Unboxed (U) | |
import qualified Data.Vector.Storable as SV | |
import System.FilePath | |
import Data.IORef | |
import Data.NetCDF | |
import qualified Data.NetCDF.Vector as V | |
import qualified Data.NetCDF.Repa as R | |
import Debug.Trace | |
type SVRet a = IO (Either NcError (SV.Vector a)) | |
type FArray3 a = Array F DIM3 a | |
type FArray2 a = Array F DIM2 a | |
type Array2 a = Array U DIM2 a | |
type RepaRet3 a = IO (Either NcError (FArray3 a)) | |
type RepaRet2 a = IO (Either NcError (FArray2 a)) | |
type RepaRet1 a = IO (Either NcError (Array F DIM1 a)) | |
workdir :: FilePath | |
workdir = "/big/project-storage/haskell-data-analysis" </> | |
"atmos-non-diffusive/pre-processing" | |
main :: IO () | |
main = do | |
-- Open subset data file for input. | |
Right innc <- openFile $ workdir </> "z500-subset.nc" | |
let Just nlat = ncDimLength <$> ncDim innc "lat" | |
Just nlon = ncDimLength <$> ncDim innc "lon" | |
Just ntime = ncDimLength <$> ncDim innc "time" | |
let (Just lonvar) = ncVar innc "lon" | |
(Just latvar) = ncVar innc "lat" | |
(Just timevar) = ncVar innc "time" | |
(Just z500var) = ncVar innc "z500" | |
Right lon <- get innc lonvar :: SVRet CFloat | |
Right lat <- get innc latvar :: SVRet CFloat | |
Right time <- get innc timevar :: SVRet CDouble | |
-- Each year has 181 days, so 72 x 15 x 181 = 195480 data values. | |
let ndays = 181 | |
nyears = ntime `div` ndays | |
-- Use an Int array to accumulate values for calculating mean annual | |
-- cycle. Since the data is stored as short integer values, this is | |
-- enough to prevent overflow as we accumulate the 2014 - 1948 = 66 | |
-- years of data. | |
let sh = Z :. ndays :. nlat :. nlon | |
slicecount = [ndays, nlat, nlon] | |
zs = take (product slicecount) (repeat 0) | |
init = fromList sh zs :: FArray3 Int | |
-- Computation to accumulate a single year's data. | |
let doone current y = do | |
-- Read one year's data. | |
let start = [(y - 1948) * ndays, 0, 0] | |
Right slice <- getA innc z500var start slicecount :: RepaRet3 CShort | |
return $! computeS $ current +^ (map fromIntegral slice) | |
-- Accumulate all data. | |
total <- foldM doone init [1948..2013] | |
-- Calculate the final mean annual cycle. | |
let mean = computeS $ | |
map (fromIntegral . (`div` nyears)) total :: FArray3 CShort | |
-- Calculate 31-day running mean of mean annual cycle. | |
let delta = 15 | |
smoothdays = ndays - 2 * delta | |
smoothmean = runmean mean delta | |
-- Set up output file. | |
let outlatdim = NcDim "lat" nlat False | |
outlatvar = NcVar "lat" NcFloat [outlatdim] (ncVarAttrs latvar) | |
outlondim = NcDim "lon" nlon False | |
outlonvar = NcVar "lon" NcFloat [outlondim] (ncVarAttrs lonvar) | |
outtimedim = NcDim "time" 0 True | |
outtimevar = NcVar "time" NcDouble [outtimedim] (ncVarAttrs timevar) | |
outdaydim = NcDim "day" smoothdays False | |
outdayvar = NcVar "day" NcInt [outdaydim] M.empty | |
outz500attrs = foldl' (flip M.delete) (ncVarAttrs z500var) | |
["scale_factor", "add_offset"] | |
outz500var = NcVar "z500" NcShort | |
[outtimedim, outlatdim, outlondim] outz500attrs | |
outmeanvar = NcVar "mean" NcShort [outdaydim, outlatdim, outlondim] | |
(ncVarAttrs z500var) | |
outncinfo = | |
emptyNcInfo (workdir </> "z500-anomalies.nc") # | |
addNcDim outlatdim # addNcDim outlondim # | |
addNcDim outtimedim # addNcDim outdaydim # | |
addNcVar outlatvar # addNcVar outlonvar # | |
addNcVar outtimevar # addNcVar outdayvar # | |
addNcVar outz500var # addNcVar outmeanvar | |
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $ | |
\outnc -> do | |
-- Write fixed variable values. | |
put outnc outlatvar lat | |
put outnc outlonvar lon | |
put outnc outdayvar (SV.enumFromN 1 smoothdays :: SV.Vector CInt) | |
put outnc outmeanvar smoothmean | |
-- Process one input year at a time, accumulating all relevant | |
-- time-steps into a single output file. | |
itoutref <- newIORef 0 :: IO (IORef Int) | |
let count = [1, nlat, nlon] | |
forM_ [0..ntime-1] $ \it -> do | |
-- If "it" is within the 151 day winter block... | |
when (it `mod` 181 >= 15 && it `mod` 181 <= 165) $ do | |
-- Read time slice. | |
Right sli <- getA innc z500var [it, 0, 0] count :: RepaRet2 CShort | |
-- Calculate anomalies and write out. | |
let isl = it `mod` 181 - 15 | |
sl = Z :. isl :. All :. All | |
anom = computeS $ sli -^ (slice smoothmean sl) :: FArray2 CShort | |
itout <- readIORef itoutref | |
putStrLn $ show it ++ " -> " ++ show itout | |
modifyIORef itoutref (+1) | |
putA outnc outtimevar [itout] [1] (SV.slice it 1 time) | |
putA outnc outz500var [itout, 0, 0] count anom | |
return () | |
-- Calculate running mean of 3-dimensional array stored in (time, y, | |
-- x) coordinate order, from time positions (i - d) to (i + d) for | |
-- each valid time index i. | |
runmean :: FArray3 CShort -> Int -> FArray3 CShort | |
runmean x d = computeS $ traverse x (const smshape) doone | |
where (Z :. nz :. ny :. nx) = extent x | |
-- ^ Extent of input array. | |
smshape = Z :. nz - 2 * d :. ny :. nx | |
-- ^ Extent of smoothed output array: spatial dimensions are | |
-- the same as the input, and just the time dimension is | |
-- reduced by the averaging. | |
sz = Z :. (2 * d + 1) :. 1 :. 1 | |
-- ^ Slice extent for averaging. | |
len = fromIntegral (2 * d + 1) :: Double | |
-- ^ Averaging length. | |
dem x = fromIntegral (truncate x :: Int) | |
-- ^ Type demotion from Double to CShort for output. | |
pro :: Array U DIM3 Double | |
pro = computeS $ map (\x -> fromIntegral (fromIntegral x :: Int)) x | |
-- ^ Type promotion of input from CShort to Double. | |
doone _ i = dem $ (sumAllS (extract i sz pro)) / len | |
-- ^ Calculate a single averaged value. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Prelude hiding (length, maximum, minimum, sum) | |
import Control.Applicative ((<$>)) | |
import Control.Monad | |
import Data.List (foldl') | |
import qualified Data.Map as M | |
import Foreign.C | |
import Data.Vector.Generic hiding ((++), map, forM_, foldl') | |
import qualified Data.Array.Repa as Repa | |
import Data.Array.Repa.Repr.ForeignPtr (F) | |
import Data.Array.Repa.Repr.Unboxed (U) | |
import qualified Data.Vector.Storable as SV | |
import System.FilePath | |
import Data.IORef | |
import Data.NetCDF | |
import qualified Data.NetCDF.Vector as V | |
import qualified Data.NetCDF.Repa as R | |
type SVRet a = IO (Either NcError (SV.Vector a)) | |
type FArray2 a = Repa.Array F Repa.DIM2 a | |
type Array2 a = Repa.Array U Repa.DIM2 a | |
type RepaRet2 a = IO (Either NcError (FArray2 a)) | |
type RepaRet1 a = IO (Either NcError (Repa.Array F Repa.DIM1 a)) | |
indir :: FilePath | |
indir = "/big/data/reanalysis/NCEP/pressure/hgt" | |
outdir :: FilePath | |
outdir = "/big/project-storage/haskell-data-analysis" </> | |
"atmos-non-diffusive/pre-processing" | |
main :: IO () | |
main = do | |
-- Open first annual file to get metadata necessary for setting up | |
-- output file and for subsetting index calculations. | |
Right refnc <- openFile $ indir </> "hgt.1948.nc" | |
let Just nlat = ncDimLength <$> ncDim refnc "lat" | |
Just nlon = ncDimLength <$> ncDim refnc "lon" | |
Just nlev = ncDimLength <$> ncDim refnc "level" | |
let (Just lonvar) = ncVar refnc "lon" | |
(Just latvar) = ncVar refnc "lat" | |
(Just levvar) = ncVar refnc "level" | |
(Just timevar) = ncVar refnc "time" | |
(Just zvar) = ncVar refnc "hgt" | |
Right lon <- get refnc lonvar :: SVRet CFloat | |
Right lat <- get refnc latvar :: SVRet CFloat | |
Right lev <- get refnc levvar :: SVRet CFloat | |
-- Determine spatial subsetting index bounds. | |
let late = vectorIndex LT FromEnd lat 17.5 | |
lats = vectorIndex GT FromStart lat 87.5 | |
levi = vectorIndex GT FromStart lev 500.0 | |
-- Determine appropriate time indexes for winter days: "winter" in | |
-- this context is 181 days long and starts on 1 November each year. | |
-- This means that we get the first 120 days of each year plus the | |
-- days from 1 November to the end of the year. | |
let inov1non = 305 - 1 | |
-- ^ Index of 1 November in non-leap years. | |
wintertsnon = [0..119] ++ [inov1non..365-1] | |
-- ^ Indexes of all winter days for non-leap years. | |
inov1leap = 305 + 1 - 1 | |
-- ^ Index of 1 November in leap years. | |
wintertsleap = [0..119] ++ [inov1leap..366-1] | |
-- ^ Indexes of all winter days for leap years. | |
winterts1948 = [inov1leap..366-1] | |
winterts2014 = [0..119] | |
-- ^ Indexes for winters in start and end years. | |
-- Set up output file. | |
let outlat = SV.fromList $ map (lat SV.!) [lats,lats+2..late] | |
outlon = SV.fromList $ map (lon SV.!) [0,2..nlon-1] | |
noutlat = SV.length outlat | |
noutlon = SV.length outlon | |
outlatdim = NcDim "lat" noutlat False | |
outlatvar = NcVar "lat" NcFloat [outlatdim] (ncVarAttrs latvar) | |
outlondim = NcDim "lon" noutlon False | |
outlonvar = NcVar "lon" NcFloat [outlondim] (ncVarAttrs lonvar) | |
outtimedim = NcDim "time" 0 True | |
outtimeattrs = foldl' (flip M.delete) (ncVarAttrs timevar) | |
["actual_range"] | |
outtimevar = NcVar "time" NcDouble [outtimedim] outtimeattrs | |
outz500attrs = foldl' (flip M.delete) (ncVarAttrs zvar) | |
["actual_range", "level_desc", "valid_range"] | |
outz500var = NcVar "z500" NcShort | |
[outtimedim, outlatdim, outlondim] outz500attrs | |
outncinfo = | |
emptyNcInfo (outdir </> "z500-subset.nc") # | |
addNcDim outlatdim # addNcDim outlondim # addNcDim outtimedim # | |
addNcVar outlatvar # addNcVar outlonvar # addNcVar outtimevar # | |
addNcVar outz500var | |
flip (withCreateFile outncinfo) (putStrLn . ("ERROR: " ++) . show) $ | |
\outnc -> do | |
-- Write coordinate variable values. | |
put outnc outlatvar outlat | |
put outnc outlonvar outlon | |
-- Set up output time step index. | |
outidxref <- newIORef 0 | |
-- Process one input year at a time, accumulating all relevant | |
-- time-steps into a single output file. | |
forM_ [1948..2014] $ \y -> do | |
-- Open annual file. | |
syncFile outnc | |
putStrLn $ "Processing: " ++ show y | |
Right nc <- openFile $ indir </> ("hgt." ++ show y) <.> "nc" | |
-- Read metadata. | |
let Just ntime = ncDimLength <$> ncDim nc "time" | |
let (Just timevar) = ncVar nc "time" | |
(Just zvar) = ncVar nc "hgt" | |
Right time <- get nc timevar :: SVRet CDouble | |
-- Determine appropriate time indexes for winter days: | |
-- "winter" in this context is 181 days long and starts on 1 | |
-- November each year. This means that we get the first 120 | |
-- days of each year plus the days from 1 November to the end | |
-- of the year. | |
let leap = y `mod` 400 == 0 || (y `mod` 100 /= 0 && y `mod` 4 == 0) | |
-- ^ Is this a leap year? | |
winterts = case y of | |
1948 -> winterts1948 | |
2014 -> winterts2014 | |
_ -> if leap then wintertsleap else wintertsnon | |
-- ^ Indexes of all winter days. | |
-- Process one time-step at a time. | |
forM_ winterts $ \it -> do | |
-- Read a slice of a single time-step of data: Northern | |
-- Hemisphere (17.5-87.5 degrees), 5 degree resolution, 500 | |
-- mb level only. | |
let start = [it, levi, lats, 0] | |
count = [1, 1, (late - lats) `div` 2 + 1, nlon `div` 2] | |
stride = [1, 1, 2, 2] | |
Right slice <- getS nc zvar start count stride :: RepaRet2 CShort | |
-- Write the time and Z500 data out. | |
outidx <- readIORef outidxref | |
modifyIORef outidxref (+1) | |
putA outnc outtimevar [outidx] [1] (SV.slice it 1 time) | |
putA outnc outz500var [outidx, 0, 0] [1, noutlat, noutlon] slice | |
data IndexStart = FromStart | FromEnd | |
vectorIndex :: (SV.Storable a, Ord a) | |
=> Ordering -> IndexStart -> SV.Vector a -> a -> Int | |
vectorIndex o s v val = case (go o, s) of | |
(Nothing, _) -> (-1) | |
(Just i, FromStart) -> i | |
(Just i, FromEnd) -> SV.length v - 1 - i | |
where go LT = SV.findIndex (>= val) vord | |
go GT = SV.findIndex (<= val) vord | |
vord = case s of | |
FromStart -> v | |
FromEnd -> SV.reverse v |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment