NetCDF Java Library - dump to SQLite

NetCDF Java Library - load GRIB/GRIB2/NetCDF data and dump to SQLite

Objective:
NetCDF is a format for storing data in multi-dimension array. It is highly compressed and suitable for many cases. Unfortunately, data in the tables cannot be easily searched and the API is not generally available, so I try to write a small Java program to insert the data to a file-based database to solve the problem.

Requirement:
Download and import the NetCDF Library from Unidata. Version 4.1 is used in this example.
Download and import the SqliteJDBC Library from http://www.zentus.com/sqlitejdbc/. v056 is used in this example.

Usage:
Complie as JAR, execute "java -jar JavaApplication3.jar _file1_ _file2_ ... _fileN_"
(Multiple files should have the same table definition, including the base time)

Potential problem:
If the table is too large, but the heap size (or ram size) is too small, "Array dataArray = variables.get(i).read();" will return error.
The SQLite file is a lot larger than the data files as there is a lot of duplicated data. (It is unavoidable, so please use a small dataset...)

 
package javaapplication3;
 
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import ucar.ma2.Array;
import ucar.nc2.Dimension;
import ucar.nc2.Variable;
import ucar.nc2.dataset.NetcdfDataset;
import java.sql.*;
 
public class Main {
 
    public static void main(String[] args) {
        try {
            System.out.println("Starting, please wait...");
            // Import sqlite JDBC engine
            try {
                Class.forName("org.sqlite.JDBC");
            } catch (ClassNotFoundException ex) {
                Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
            }
            // Connect to data.sqlite in the same folder
            Connection conn = DriverManager.getConnection("jdbc:sqlite:data.sqlite");
            // Don't commit (for permance)
            conn.setAutoCommit(false);
            Statement stat = conn.createStatement();
            // Clear all data and create tables
            stat.execute("drop table if exists data;");
            stat.execute("drop table if exists tableInfo;");
            // value table
            // These columns are chosen according to http://www.unidata.ucar.edu/software/netcdf-java/tutorial/CoordinateAttributes.html
            stat.execute("create table data (typeId, Time, Lat, Lon, Height, Pressure, GeoX, GeoY, GeoZ, RadialAzimuth, RadialElevation, RadialDistance, Runtime, Ensemble, value);");
            // meta-data table
            stat.execute("create table tableInfo (typeId INTEGER PRIMARY KEY AUTOINCREMENT, name, unit, dimensions);");
            // loop through the arguments for files
            for (int fileNumber = 0; fileNumber < args.length; fileNumber++) {
                System.out.println("Reading " + args[fileNumber]);
                // open netcdf/grib/grib2 file from argument
                NetcdfDataset gid = NetcdfDataset.openDataset(args[fileNumber]);
                // get all grid tables in the file
                List variables = gid.getReferencedFile().getVariables();
                for (int i = 0; i < variables.size(); i++) {
                    // prevent meta or coordinate variable to be processed
                    if ((!variables.get(i).isMetadata()) && (!variables.get(i).isCoordinateVariable()) && (!variables.get(i).getRanges().isEmpty())) {
                        //the dimensions of the grid table
                        List dimensions = variables.get(i).getDimensions();
                        String dimensionsString = "";
                        for (int j = 0; j < dimensions.size(); j++) {
                            if (j != 0) {
                                dimensionsString += ";";
                            }
                            // construct the string
                            dimensionsString += "[" + dimensions.get(j).getName() + "," +
                                    gid.getReferencedFile().findVariable(dimensions.get(j).getName()).findAttribute("_CoordinateAxisType").getStringValue() + "," +
                                    gid.getReferencedFile().findVariable(dimensions.get(j).getName()).getUnitsString() + "]";
                        }
                        // check whether the name is already exist
                        ResultSet result = stat.executeQuery("SELECT COUNT(*) FROM tableInfo WHERE name='" + variables.get(i).getName() + "'");
                        if (result.getInt(1) == 0) {
                            // if not, insert the data
                            stat.execute("INSERT INTO tableInfo (name, unit, dimensions) VALUES ('" +
                                    variables.get(i).getName() + "','" +
                                    variables.get(i).getUnitsString() + "','" +
                                    dimensionsString + "')");
                        }
                        // get the id of that variable
                        result = stat.executeQuery("SELECT typeId FROM tableInfo WHERE name='" + variables.get(i).getName() + "'");
                        int typeId = result.getInt(1);
                        // read the data of the variable in the grid table
                        Array dataArray = variables.get(i).read();
                        // calculate the total number of dimension combination
                        int dimensionTotal = 1;
                        ArrayList dimensionsData = new ArrayList();
                        for (int k = 0; k < dimensions.size(); k++) {
                            dimensionTotal *= dimensions.get(k).getLength();
                            //preload the dimension value
                            dimensionsData.add(gid.getReferencedFile().findVariable(dimensions.get(k).getName()).read());
                        }
                        for (int j = 0; j < variables.get(i).getSize(); j++) {
                            String dataName = variables.get(i).getName();
 
                            // for merging the dimension value
                            int previousDimensionTotal = dimensionTotal;
                            String sqlColumn = "";
                            String sqlValue = "";
                            for (int k = 0; k < dimensions.size(); k++) {
                                previousDimensionTotal /= dimensions.get(k).getLength();
                                int arrayNumber = (j / previousDimensionTotal) % (dimensions.get(k).getLength());
                                Variable dimension = gid.getReferencedFile().findVariable(dimensions.get(k).getName());
                                sqlColumn += dimension.findAttribute("_CoordinateAxisType").getStringValue() + ",";
                                sqlValue += dimensionsData.get(k).getFloat(arrayNumber) + ",";
                            }
                            // print the value
                            Float value = dataArray.getFloat(j);
                            if (!value.isNaN()) {
                                stat.execute("INSERT INTO data (typeId," + sqlColumn + " value) VALUES (" + typeId + ", " + sqlValue + value + ")");
                            } else {
                                stat.execute("INSERT INTO data (typeId," + sqlColumn + " value) VALUES (" + typeId + ", " + sqlValue + "null)");
                            }
 
                        }
                        // just to show the progress
                        System.out.print(".");
                    }
                }
                //close the file
                gid.close();
                System.out.println();
            }
            System.out.println("Commiting data");
            //write the data
            conn.commit();
            conn.close();
            System.out.println("Finished!");
        } catch (SQLException ex) {
            Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}
/*
 * Example of queries:
 * SELECT name, data.* FROM data JOIN tableInfo ON data.typeId = tableInfo.typeId
 * SELECT name, data.* , (data.value-273.15) FROM data JOIN tableInfo ON data.typeId = tableInfo.typeId WHERE name="Temperature" AND Pressure = 100000 AND Time = 48 ORDER BY Lat, Lon
 */

Sample Output of reading GFS GRIB2 data from NOAA:
data (with > 675K rows!):
data
tableInfo:
tableInfo


本文連結