Skip to content

Commit

Permalink
Initial data drop
Browse files Browse the repository at this point in the history
Downloads a series of files. Merge them into single NEtCDF
  • Loading branch information
dgrechka authored Jun 16, 2016
1 parent 04d63f2 commit c570eb0
Show file tree
Hide file tree
Showing 5 changed files with 249 additions and 0 deletions.
6 changes: 6 additions & 0 deletions App.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8" ?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2" />
</startup>
</configuration>
41 changes: 41 additions & 0 deletions AssemblyInfo.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
namespace MergeReanalysis.AssemblyInfo

open System.Reflection
open System.Runtime.CompilerServices
open System.Runtime.InteropServices

// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[<assembly: AssemblyTitle("BulkReanalysisDownload")>]
[<assembly: AssemblyDescription("")>]
[<assembly: AssemblyConfiguration("")>]
[<assembly: AssemblyCompany("")>]
[<assembly: AssemblyProduct("BulkReanalysisDownload")>]
[<assembly: AssemblyCopyright("Copyright © 2016")>]
[<assembly: AssemblyTrademark("")>]
[<assembly: AssemblyCulture("")>]

// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[<assembly: ComVisible(false)>]

// The following GUID is for the ID of the typelib if this project is exposed to COM
[<assembly: Guid("420c7da4-b854-4476-a9af-8fb4e192b543")>]

// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [<assembly: AssemblyVersion("1.0.*")>]
[<assembly: AssemblyVersion("1.0.0.0")>]
[<assembly: AssemblyFileVersion("1.0.0.0")>]

do
()
78 changes: 78 additions & 0 deletions BulkReanalysisDownload.fsproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>420c7da4-b854-4476-a9af-8fb4e192b543</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>MergeReanalysis</RootNamespace>
<AssemblyName>MergeReanalysis</AssemblyName>
<TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<TargetFSharpCoreVersion>4.4.0.0</TargetFSharpCoreVersion>
<Name>BulkReanalysisDownload</Name>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<Tailcalls>false</Tailcalls>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<WarningLevel>3</WarningLevel>
<PlatformTarget>AnyCPU</PlatformTarget>
<DocumentationFile>bin\Debug\MergeReanalysis.XML</DocumentationFile>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<Tailcalls>true</Tailcalls>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<WarningLevel>3</WarningLevel>
<PlatformTarget>AnyCPU</PlatformTarget>
<DocumentationFile>bin\Release\MergeReanalysis.XML</DocumentationFile>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
<Reference Include="Microsoft.Research.Science.Data, Version=1.3.14517.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" />
<Reference Include="mscorlib" />
<Reference Include="FSharp.Core, Version=$(TargetFSharpCoreVersion), Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
<Private>True</Private>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Numerics" />
</ItemGroup>
<ItemGroup>
<Compile Include="AssemblyInfo.fs" />
<Compile Include="Program.fs" />
<None Include="App.config" />
</ItemGroup>
<PropertyGroup>
<MinimumVisualStudioVersion Condition="'$(MinimumVisualStudioVersion)' == ''">11</MinimumVisualStudioVersion>
</PropertyGroup>
<Choose>
<When Condition="'$(VisualStudioVersion)' == '11.0'">
<PropertyGroup Condition="Exists('$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.0\Framework\v4.0\Microsoft.FSharp.Targets')">
<FSharpTargetsPath>$(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.0\Framework\v4.0\Microsoft.FSharp.Targets</FSharpTargetsPath>
</PropertyGroup>
</When>
<Otherwise>
<PropertyGroup Condition="Exists('$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\FSharp\Microsoft.FSharp.Targets')">
<FSharpTargetsPath>$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\FSharp\Microsoft.FSharp.Targets</FSharpTargetsPath>
</PropertyGroup>
</Otherwise>
</Choose>
<Import Project="$(FSharpTargetsPath)" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>
22 changes: 22 additions & 0 deletions BulkReanalysisDownload.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25123.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "BulkReanalysisDownload", "BulkReanalysisDownload.fsproj", "{420C7DA4-B854-4476-A9AF-8FB4E192B543}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{420C7DA4-B854-4476-A9AF-8FB4E192B543}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{420C7DA4-B854-4476-A9AF-8FB4E192B543}.Debug|Any CPU.Build.0 = Debug|Any CPU
{420C7DA4-B854-4476-A9AF-8FB4E192B543}.Release|Any CPU.ActiveCfg = Release|Any CPU
{420C7DA4-B854-4476-A9AF-8FB4E192B543}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
102 changes: 102 additions & 0 deletions Program.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
open System.IO
open System.Net
open System.Text

type DataSet = Microsoft.Research.Science.Data.DataSet
type Variable = Microsoft.Research.Science.Data.Variable

let creds = NetworkCredential("anonymous","[email protected]")

let bufferSize=1024*1024*256

let getSurfaceVariableFile varName year =
let rec copy (rs:Stream) (ws:Stream) buffer =
let bytesRead = rs.Read(buffer,0,buffer.Length)
if bytesRead = 0 then
()
else
ws.Write(buffer, 0, bytesRead);
copy rs ws buffer
try
let addr = sprintf "ftp://ftp.cdc.noaa.gov/Datasets/ncep.reanalysis/surface/%s.%d.nc" varName year
let request =
WebRequest.Create(addr)
:?> (FtpWebRequest)
request.Method <- WebRequestMethods.Ftp.DownloadFile
request.Credentials <- creds :> ICredentials
//request.UseBinary <- true;
request.UsePassive <- true;
request.KeepAlive <- true;
printf "Downloading %s..." addr
let response = request.GetResponse()
let response = response :?> FtpWebResponse
let filename = Path.GetRandomFileName()
use rs = response.GetResponseStream()
use ws = new FileStream(filename,FileMode.Create)
let buffer = Array.zeroCreate<byte>(bufferSize)
copy rs ws buffer
printfn "Done (%s)" response.StatusDescription
Some(filename)
with
| :? System.Net.WebException ->
printfn "Not found";
None

[<EntryPoint>]
let main argv =
let startYear = 2015
let varName = "air"
let layerName = "sig995"

let varFileName = sprintf "%s.%s" varName layerName

let datasetURL = sprintf "msds:nc?file=%s.nc&openMode=create" varFileName
use dataSet = Microsoft.Research.Science.Data.DataSet.Open(datasetURL)

let variablesToBulkCopy = ["lat"; "lon"]

let folder (dataSet:DataSet) file =
let sourceDs = DataSet.Open(sprintf "msds:nc?file=%s&openMode=readOnly" file)
let sourceVar,sourceTimeVar = sourceDs.Variables.[varName],sourceDs.Variables.["time"]
let sourceData,timeData = sourceVar.GetData(),sourceTimeVar.GetData()
let targetVar,targetTimeVar =
if dataSet.Variables.Contains(varName) then
dataSet.Variables.[varName],dataSet.Variables.["time"]
else
//first file in a series
for bulkCopyVar in variablesToBulkCopy do
let sourceV = sourceDs.Variables.[bulkCopyVar]
let v = dataSet.AddVariable<System.Single>(bulkCopyVar,sourceV.GetData(),sourceV.Dimensions.AsNamesArray())
//metadata for 1D variables
for key in sourceV.Metadata.AsDictionary().Keys do
v.Metadata.[key] <- sourceV.Metadata.[key]

//global metadata
for key in sourceDs.Metadata.AsDictionary().Keys do
dataSet.Metadata.[key] <- sourceDs.Metadata.[key]

//placeholders for incremental updates
let targetV = dataSet.AddVariable<System.Single>(varName,sourceVar.Dimensions.AsNamesArray()) :> Microsoft.Research.Science.Data.Variable
let targetTimeV = dataSet.AddVariable<float>("time",[|"time"|]) :> Microsoft.Research.Science.Data.Variable

//target var metadata
for key in sourceVar.Metadata.AsDictionary().Keys do
targetV.Metadata.[key] <- sourceVar.Metadata.[key]
//time var metadat
for key in sourceTimeVar.Metadata.AsDictionary().Keys do
targetTimeV.Metadata.[key] <- sourceTimeVar.Metadata.[key]
targetV,targetTimeV
targetVar.Append(sourceData,"time")
targetTimeVar.Append(timeData)
dataSet.Commit()
sourceDs.Dispose()
File.Delete file
dataSet

Seq.initInfinite (fun i -> startYear+i)
|> Seq.map (fun year -> getSurfaceVariableFile varFileName year)
|> Seq.takeWhile (fun elem -> elem.IsSome)
|> Seq.choose (fun elem -> elem)
|> Seq.fold folder dataSet
|> ignore
0 // return an integer exit code

0 comments on commit c570eb0

Please sign in to comment.