NetCDF Java Library - dump to SQLite
Objective:
NetCDF is a format for storing data in multi-dimension array. It is highly compressed and suitable for many cases. Unfortunately, data in the tables cannot be easily searched and the API is not generally available, so I try to write a small Java program to insert the data to a file-based database to solve the problem.
Requirement:
Download and import the NetCDF Library from Unidata. Version 4.1 is used in this example.
Download and import the SqliteJDBC Library from http://www.zentus.com/sqlitejdbc/. v056 is used in this example.
Usage:
Complie as JAR, execute "java -jar JavaApplication3.jar _file1_ _file2_ ... _fileN_"
(Multiple files should have the same table definition, including the base time)
Potential problem:
If the table is too large, but the heap size (or ram size) is too small, "Array dataArray = variables.get(i).read();" will return error.
The SQLite file is a lot larger than the data files as there is a lot of duplicated data. (It is unavoidable, so please use a small dataset...)
package javaapplication3; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import ucar.ma2.Array; import ucar.nc2.Dimension; import ucar.nc2.Variable; import ucar.nc2.dataset.NetcdfDataset; import java.sql.*; public class Main { public static void main(String[] args) { try { System.out.println("Starting, please wait..."); // Import sqlite JDBC engine try { Class.forName("org.sqlite.JDBC"); } catch (ClassNotFoundException ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } // Connect to data.sqlite in the same folder Connection conn = DriverManager.getConnection("jdbc:sqlite:data.sqlite"); // Don't commit (for permance) conn.setAutoCommit(false); Statement stat = conn.createStatement(); // Clear all data and create tables stat.execute("drop table if exists data;"); stat.execute("drop table if exists tableInfo;"); // value table // These columns are chosen according to http://www.unidata.ucar.edu/software/netcdf-java/tutorial/CoordinateAttributes.html stat.execute("create table data (typeId, Time, Lat, Lon, Height, Pressure, GeoX, GeoY, GeoZ, RadialAzimuth, RadialElevation, RadialDistance, Runtime, Ensemble, value);"); // meta-data table stat.execute("create table tableInfo (typeId INTEGER PRIMARY KEY AUTOINCREMENT, name, unit, dimensions);"); // loop through the arguments for files for (int fileNumber = 0; fileNumber < args.length; fileNumber++) { System.out.println("Reading " + args[fileNumber]); // open netcdf/grib/grib2 file from argument NetcdfDataset gid = NetcdfDataset.openDataset(args[fileNumber]); // get all grid tables in the file Listvariables = gid.getReferencedFile().getVariables(); for (int i = 0; i < variables.size(); i++) { // prevent meta or coordinate variable to be processed if ((!variables.get(i).isMetadata()) && (!variables.get(i).isCoordinateVariable()) && (!variables.get(i).getRanges().isEmpty())) { //the dimensions of the grid table List dimensions = variables.get(i).getDimensions(); String dimensionsString = ""; for (int j = 0; j < dimensions.size(); j++) { if (j != 0) { dimensionsString += ";"; } // construct the string dimensionsString += "[" + dimensions.get(j).getName() + "," + gid.getReferencedFile().findVariable(dimensions.get(j).getName()).findAttribute("_CoordinateAxisType").getStringValue() + "," + gid.getReferencedFile().findVariable(dimensions.get(j).getName()).getUnitsString() + "]"; } // check whether the name is already exist ResultSet result = stat.executeQuery("SELECT COUNT(*) FROM tableInfo WHERE name='" + variables.get(i).getName() + "'"); if (result.getInt(1) == 0) { // if not, insert the data stat.execute("INSERT INTO tableInfo (name, unit, dimensions) VALUES ('" + variables.get(i).getName() + "','" + variables.get(i).getUnitsString() + "','" + dimensionsString + "')"); } // get the id of that variable result = stat.executeQuery("SELECT typeId FROM tableInfo WHERE name='" + variables.get(i).getName() + "'"); int typeId = result.getInt(1); // read the data of the variable in the grid table Array dataArray = variables.get(i).read(); // calculate the total number of dimension combination int dimensionTotal = 1; ArrayList dimensionsData = new ArrayList (); for (int k = 0; k < dimensions.size(); k++) { dimensionTotal *= dimensions.get(k).getLength(); //preload the dimension value dimensionsData.add(gid.getReferencedFile().findVariable(dimensions.get(k).getName()).read()); } for (int j = 0; j < variables.get(i).getSize(); j++) { String dataName = variables.get(i).getName(); // for merging the dimension value int previousDimensionTotal = dimensionTotal; String sqlColumn = ""; String sqlValue = ""; for (int k = 0; k < dimensions.size(); k++) { previousDimensionTotal /= dimensions.get(k).getLength(); int arrayNumber = (j / previousDimensionTotal) % (dimensions.get(k).getLength()); Variable dimension = gid.getReferencedFile().findVariable(dimensions.get(k).getName()); sqlColumn += dimension.findAttribute("_CoordinateAxisType").getStringValue() + ","; sqlValue += dimensionsData.get(k).getFloat(arrayNumber) + ","; } // print the value Float value = dataArray.getFloat(j); if (!value.isNaN()) { stat.execute("INSERT INTO data (typeId," + sqlColumn + " value) VALUES (" + typeId + ", " + sqlValue + value + ")"); } else { stat.execute("INSERT INTO data (typeId," + sqlColumn + " value) VALUES (" + typeId + ", " + sqlValue + "null)"); } } // just to show the progress System.out.print("."); } } //close the file gid.close(); System.out.println(); } System.out.println("Commiting data"); //write the data conn.commit(); conn.close(); System.out.println("Finished!"); } catch (SQLException ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } } } /* * Example of queries: * SELECT name, data.* FROM data JOIN tableInfo ON data.typeId = tableInfo.typeId * SELECT name, data.* , (data.value-273.15) FROM data JOIN tableInfo ON data.typeId = tableInfo.typeId WHERE name="Temperature" AND Pressure = 100000 AND Time = 48 ORDER BY Lat, Lon */
Sample Output of reading GFS GRIB2 data from NOAA:
data (with > 675K rows!):
tableInfo:
本文連結