nidas  v1.2-1520
Public Member Functions | Static Public Member Functions | Protected Member Functions | Private Member Functions | Static Private Member Functions | Private Attributes | Static Private Attributes | List of all members
nidas::core::CalFile Class Reference

A class for reading ASCII files containing a time series of calibration data. More...

#include <CalFile.h>

Inheritance diagram for nidas::core::CalFile:
Inheritance graph
[legend]

Public Member Functions

 CalFile ()
 
 CalFile (const CalFile &)
 Copy constructor. More...
 
CalFileoperator= (const CalFile &)
 Assignment operator, like the copy constructor. More...
 
 ~CalFile ()
 Closes the file if necessary. More...
 
const std::string & getFile () const
 
void setFile (const std::string &val)
 Set the base name of the file to be opened. More...
 
const std::string & getPath () const
 Set the search path to find the file, and any included files: one or more directory paths separated by colons ':'. More...
 
void setPath (const std::string &val)
 Set the search path to find the file, and any included files: one or more directory paths separated by colons ':'. More...
 
const std::string & getCurrentFileName () const
 Return the full file path of the current file. More...
 
void setName (const std::string &val)
 An instance of CalFile can have a name. More...
 
const std::string & getName () const
 
int getLineNumber () const
 
void open () throw (nidas::util::IOException)
 Open the file. More...
 
void close () throw ()
 Close file. More...
 
bool eof () const
 Have we reached eof. More...
 
const std::string & getTimeZone () const
 
void setTimeZone (const std::string &val)
 Set the timezone for the dates & times read from the file. More...
 
const std::string & getDateTimeFormat () const
 
void setDateTimeFormat (const std::string &val)
 Set the format for reading the date & time from the file. More...
 
nidas::util::UTime search (const nidas::util::UTime &tsearch) throw (nidas::util::IOException,nidas::util::ParseException)
 Search forward in a file, returning the time of the last record in the file with a time less than or equal to tsearch. More...
 
int readCF (nidas::util::UTime &time, float *data, int ndata, std::vector< std::string > *fields=0) throw (nidas::util::IOException,nidas::util::ParseException)
 Read the time and data from the current record, and return the number of values read. More...
 
const std::vector< std::string > & getCurrentFields (nidas::util::UTime *time=0)
 Return the time and fields of the current record, the one last read with readCF(). More...
 
nidas::util::UTime getCurrentTime ()
 
float getFloatField (int column, const std::vector< std::string > *fields=0)
 Convert the field at index column in the fields vector to a number, and return the number. More...
 
int getFields (int begin, int end, float *data, const std::vector< std::string > *fields=0)
 Parse a range of columns from the fields vector as numbers and store them in the array data. More...
 
nidas::util::UTime nextTime () throw ()
 
void setDSMSensor (const DSMSensor *val)
 Set the DSMSensor associated with this CalFile. More...
 
const DSMSensorgetDSMSensor () const
 
void fromDOMElement (const xercesc::DOMElement *node) throw (nidas::util::InvalidParameterException)
 Initialize myself from a xercesc::DOMElement. More...
 
virtual xercesc::DOMElement * toDOMParent (xercesc::DOMElement *parent, bool complete) const throw (xercesc::DOMException)
 Create a DOMElement and append it to the parent. More...
 
virtual xercesc::DOMElement * toDOMElement (xercesc::DOMElement *node, bool complete) const throw (xercesc::DOMException)
 Add my content into a DOMElement. More...
 

Static Public Member Functions

static std::vector< std::string > getAllPaths ()
 Return all the paths that have been set in all CalFile instances, in the order they were seen. More...
 
static const XMLCh * getNamespaceURI ()
 

Protected Member Functions

nidas::util::UTime parseTime () throw (nidas::util::ParseException)
 
void readLine () throw (nidas::util::IOException,nidas::util::ParseException)
 Read forward to next non-comment line in CalFile. More...
 
bool parseTimeComments ()
 Check the current line for special comments with timezone and datetime format settings. More...
 
int parseInclude ()
 If the current calfile record line is an include directive, parse the include filename, open it, and return 1. More...
 
void openInclude (const std::string &name) throw (nidas::util::IOException,nidas::util::ParseException)
 
int readCFInclude (nidas::util::UTime &time, float *data, int ndata, std::vector< std::string > *fields_out)
 Internal version of readCF() which reads records from the current include file, if any. More...
 

Private Member Functions

nidas::util::UTime readTime () throw (nidas::util::IOException,nidas::util::ParseException)
 Read lines, parsing special comment lines and skipping other comments or blank lines, until a record line is found. More...
 
int readCFNoLock (nidas::util::UTime &time, float *data, int ndata, std::vector< std::string > *fields) throw (nidas::util::IOException,nidas::util::ParseException)
 

Static Private Member Functions

static void freeREs ()
 
static void compileREs () throw (nidas::util::ParseException)
 

Private Attributes

std::string _name
 
std::string _fileName
 
std::string _path
 
std::string _currentFileName
 
std::string _timeZone
 
bool _utcZone
 
std::string _dateTimeFormat
 
std::ifstream _fin
 
int _curlineLength
 
char * _curline
 
int _curpos
 
bool _eofState
 
int _nline
 
nidas::util::UTime _nextTime
 
nidas::util::UTime _currentTime
 
std::vector< std::string > _currentFields
 
nidas::util::UTime _includeTime
 Time stamp of include "file" record. More...
 
nidas::util::UTime _timeAfterInclude
 Time stamp of record after include "file". More...
 
nidas::util::UTime _timeFromInclude
 Time stamp of last record in include file with time <= _includeTime. More...
 
CalFile_include
 
const DSMSensor_sensor
 
nidas::util::Mutex _mutex
 

Static Private Attributes

static const int INITIAL_CURLINE_LENGTH = 128
 
static nidas::util::Mutex _staticMutex
 
static int _reUsers = 0
 
static bool _reCompiled = false
 
static regex_t _dateFormatPreg
 
static regex_t _timeZonePreg
 
static regex_t _includePreg
 
static std::vector< std::string > _allPaths
 

Detailed Description

A class for reading ASCII files containing a time series of calibration data.

CalFile supports reading files like the following:

 # example cal file
 # dateFormat = "%Y %b %d %H:%M:%S"
 # timeZone = "US/Mountain"
 #
 2006 Sep 23 00:00:00    0.0 1.0
 # Offset of 1.0 after Sep 29 01:13:00
 2006 Sep 29 01:13:00    1.0 1.0
 # use calibrations for ACME sensor SN#99 after Oct 1
 2006 Oct 01 00:00:00    include "acme_sn99.dat"

As shown, comment lines begin with a '#'. There are two special comment lines, of the form 'dateFormat="blahblah"' and 'timeZone="blahblah". These specify the format of time in the calibration file. The dateFormat should contain date and time format descriptors as supported by the UNIX strftime function or as supported by the java.text.SimpleDateFormat class. Typically the dateFormat comment is found once in the file, before any data records.

If the dateFormat comment is not found in a file, and the dateTimeFormat attribute is not set on the instance of CalFile, then times must be in a format supported by nidas::util::UTime::parse.

fieldexampleUNIXjava
year2006%YYYYY
month abrevSep%bMMM
numeric month9%mMM
day of month14%ddd
day of year (1-366)257%jDDD
hour in day (00-23)13%HHH
minute(00-59)24%Mmm
second(00-59)47%Sss
millisecond(000-999)447%3f
(UTime extension)
SSS

Following the time fields in each record should be either numeric values or an "include" directive.

The numeric values should be space or tab separated values compatible with the usual floating point syntax, or the strings "na" or "nan" in either upper or lower case, representing not-a-number, or NaN. Since math operations using NaN result in a NaN, a calibration record containing a NaN value is a way to generate output values of NaN, indicating non-recoverable data.

An "include" directive causes another calibration file to be opened for input. The included file will be sequentially searched to set the input position to the last record with a time less than or equal to the time value of the "include directive. What this means is that the next readCF() will return data from the included file which is valid for the time of the include directive.

An included file can also contain "include" directives.

The include directive is useful when sensors are swapped during a data acquisition period. One can keep the sensor specific calibrations in separate files, and then create a CalFile which includes the sensor calibrations for the periods that a sensor was deployed. Example:

 # initially krypton hygrometer 1101 at this site
 2006 Sep 23 00:00:00    include "krypton1101"
 # Replaced 1101 with 1393 on Oct 3.
 2006 Oct  3 01:13:00    include "krypton1393"

A typical usage of a CalFile is as follows:

 CalFile calfile;
 calfile.setFile("acme_sn1.dat")
 calfile.setPath("$ROOT/projects/$PROJECT/cal_files:$ROOT/cal_files");
 ...
 while (tsample > calfile.nextTime()) {
     try {
         dsm_time_t calTime;
         float caldata[5];
         int n = calfile.readCF(calTime, caldata, 5);
         for (int i = 0; i < n; i++) coefs[i] = caldata[i];
     }
     catch(const nidas::util::IOException& e) {
         log(e.what);
         break;
     }
     catch(const nidas::util::ParseException& e) {
         log(e.what);
         break;
     }
 }
 // use coefs[] to calibrate sample.

Constructor & Destructor Documentation

CalFile::CalFile ( )
CalFile::CalFile ( const CalFile x)

Copy constructor.

Copies the value of getFileName() and getPath() attributes. The CalFile will not be opened in the new copy.

References _curline, _reUsers, _staticMutex, getTimeZone(), and setTimeZone().

CalFile::~CalFile ( )

Closes the file if necessary.

References _curline, _include, _reCompiled, _reUsers, _staticMutex, close(), and freeREs().

Member Function Documentation

void CalFile::close ( )
throw (
)

Close file.

An opened CalFile is closed in the destructor, so it is not necessary to call close.

References _fin, _include, _nline, and close().

Referenced by close(), operator=(), readCFInclude(), readTime(), PConfig::resolveCalFile(), and ~CalFile().

void CalFile::compileREs ( )
throw (nidas::util::ParseException
)
staticprivate

Referenced by parseInclude(), and parseTimeComments().

bool nidas::core::CalFile::eof ( ) const
inline

Have we reached eof.

References _eofState, and _include.

Referenced by main(), readLine(), and readTime().

void CalFile::freeREs ( )
staticprivate

Referenced by ~CalFile().

void CalFile::fromDOMElement ( const xercesc::DOMElement *  )
throw (nidas::util::InvalidParameterException
)
virtual
static std::vector<std::string> nidas::core::CalFile::getAllPaths ( )
inlinestatic

Return all the paths that have been set in all CalFile instances, in the order they were seen.

These have been separated at the colons.

References _allPaths, _staticMutex, nidas::util::Mutex::lock(), and nidas::util::Mutex::unlock().

const std::vector< std::string > & CalFile::getCurrentFields ( nidas::util::UTime time = 0)

Return the time and fields of the current record, the one last read with readCF().

If there is no current record, then the return vector will be empty and the time will be LONG_LONG_MIN. If an include file is being read, then this returns the current fields of the included file.

References _currentFields, _currentTime, _include, and getCurrentFields().

Referenced by getCurrentFields(), nidas::dynld::isff::NCAR_TRH::handleRawRH(), and nidas::dynld::isff::NCAR_TRH::handleRawT().

const std::string& nidas::core::CalFile::getCurrentFileName ( ) const
inline
nidas::util::UTime nidas::core::CalFile::getCurrentTime ( )
inline
const std::string& nidas::core::CalFile::getDateTimeFormat ( ) const
inline

References _dateTimeFormat.

const DSMSensor * CalFile::getDSMSensor ( ) const

References _sensor.

int CalFile::getFields ( int  begin,
int  end,
float *  data,
const std::vector< std::string > *  fields = 0 
)

Parse a range of columns from the fields vector as numbers and store them in the array data.

begin is the index of the first field to parse, and end is one greater than the index of the last field to parse. The first field is index 0. If there are fewer fields than numbers, the remaining numbers are filled with nan. So the data array must point to memory for at least (end - begin) numbers. The return value is the number of fields that were parsed, so it may be less than the number of data values filled in.

Like getField(), throws nidas::util::ParseException if a field cannot be converted to a number.

If fields is null, then use the same fields as getCurrentFields() would return.

References _curline, _curpos, _currentFields, nidas::core::floatNAN, getCurrentFileName(), and getLineNumber().

Referenced by getFloatField(), nidas::dynld::isff::NCAR_TRH::handleRawRH(), nidas::dynld::isff::NCAR_TRH::handleRawT(), nidas::dynld::isff::CS_Krypton::parseFields(), nidas::core::Linear::parseFields(), and nidas::core::Polynomial::parseFields().

const string & CalFile::getFile ( ) const
float CalFile::getFloatField ( int  column,
const std::vector< std::string > *  fields = 0 
)

Convert the field at index column in the fields vector to a number, and return the number.

Throws nidas::util::ParseException if the field cannot be converted to a number, and the message indicates which column caused the error. Column is a 0-based index into fields. If fields is null, then use the same fields as getCurrentFields() would return.

References getFields().

int nidas::core::CalFile::getLineNumber ( ) const
inline
const std::string& nidas::core::CalFile::getName ( ) const
inline

References _name.

Referenced by nidas::core::DSMSensor::addCalFile().

static const XMLCh* nidas::core::DOMable::getNamespaceURI ( )
inlinestaticinherited
const std::string & CalFile::getPath ( ) const

Set the search path to find the file, and any included files: one or more directory paths separated by colons ':'.

References _path.

Referenced by nidas::dynld::raf::SyncRecordSource::createHeader(), and open().

const std::string& nidas::core::CalFile::getTimeZone ( ) const
inline

References _timeZone.

Referenced by CalFile(), and operator=().

nidas::util::UTime nidas::core::CalFile::nextTime ( )
throw (
)
inline
void CalFile::open ( )
throw (nidas::util::IOException
)

Open the file.

It is not necessary to call open(). If the user has not done an open() it will be done in the first readCF(), or search().

References _curline, _curpos, _currentFileName, _eofState, _fin, _nextTime, _path, _sensor, nidas::core::DSMSensor::expandString(), getFile(), getPath(), and ILOG.

Referenced by readLine(), and PConfig::resolveCalFile().

void CalFile::openInclude ( const std::string &  name)
throw (nidas::util::IOException,
nidas::util::ParseException
)
protected

Referenced by parseInclude().

CalFile & CalFile::operator= ( const CalFile rhs)

Assignment operator, like the copy constructor.

If a file is currently open it will be closed before the assignment.

References _curpos, _dateTimeFormat, _eofState, _fileName, _include, _name, _nline, _path, _sensor, close(), getTimeZone(), and setTimeZone().

int CalFile::parseInclude ( )
protected

If the current calfile record line is an include directive, parse the include filename, open it, and return 1.

Otherwise return 0. It is up to the caller to recurse into the include file to read the next cal record.

References _curline, _curpos, _includePreg, _reCompiled, _staticMutex, compileREs(), and openInclude().

n_u::UTime CalFile::parseTime ( )
throw (nidas::util::ParseException
)
protected
bool CalFile::parseTimeComments ( )
protected

Check the current line for special comments with timezone and datetime format settings.

Return true if such a comment was found and handled. Throw ParseException if there is an error with the regular expression matching.

References _curline, _curpos, _dateFormatPreg, _reCompiled, _staticMutex, _timeZonePreg, compileREs(), setDateTimeFormat(), and setTimeZone().

Referenced by readLine().

int CalFile::readCF ( nidas::util::UTime time,
float *  data,
int  ndata,
std::vector< std::string > *  fields = 0 
)
throw (nidas::util::IOException,
nidas::util::ParseException
)

Read the time and data from the current record, and return the number of values read.

The return value may be less than ndata, in which case values in data after n will be filled with NANs. As part of this call, the next time in the file is also read, and its result is available with nextTime(). This method uses a mutex so that multi-threaded calls should not result in crashes or unparseable data. However two threads reading the same CalFile will "steal" each other's data, meaning each thread won't read a full copy of the CalFile.

If fields is not null, then it points to a string vector to which all the fields in the calfile record will be assigned. So all the numeric fields parsed and stored in data will also be included in fields, followed by any fields past the last parsed numeric field.

For example, given this calfile line:

2016 may 1 00:00:00 0.00 0.00 0.00 0.00 0 16.70 0.0 1.0 flipped

Then 9 strings will be added to fields: "0.00", ..., "1.0", "flipped", but the returned value will still be 8, same as if fields had been null. If a caller only wants string fields, then it can retrieve them like so:

std::vector<std::string> fields;
readCF(time, 0, 0, &fields);

Cal files can have non-numeric columns interspersed with the numeric columns, in which case all the columns can be read into the fields vector, and then individual fields can be converted to numbers using getField() and getFields().

After successfully reading a record with readCF(), the fields of the current record are also stashed in this CalFile and can be retrieved with getCurrentFields(). The fields are not valid except after calling readCF().

Referenced by main(), and readCFInclude().

int CalFile::readCFInclude ( nidas::util::UTime time,
float *  data,
int  ndata,
std::vector< std::string > *  fields_out 
)
protected

Internal version of readCF() which reads records from the current include file, if any.

Returns the result from readCF(), or else -1 if the include file has been exhausted of records and closed.

References _fileName, _include, _includeTime, _nextTime, _timeAfterInclude, nidas::util::LogContext::active(), close(), nidas::util::UTime::format(), nidas::util::LogContext::log(), LOG_VERBOSE, nextTime(), readCF(), nidas::util::UTime::toUsecs(), and VLOG.

int CalFile::readCFNoLock ( nidas::util::UTime time,
float *  data,
int  ndata,
std::vector< std::string > *  fields 
)
throw (nidas::util::IOException,
nidas::util::ParseException
)
private
void CalFile::readLine ( )
throw (nidas::util::IOException,
nidas::util::ParseException
)
protected

Read forward to next non-comment line in CalFile.

Place result in _curline, and index of first non-space character in _curpos. Set _eofState=true if that is the case. Also parses special comment lines like below, using parseTimeComments():

dateFormat = "xxxxx"

timeZone = "xxx"

References _curline, _curlineLength, _curpos, _eofState, _fin, _nline, eof(), getCurrentFileName(), open(), parseTimeComments(), and VLOG.

Referenced by readTime().

n_u::UTime CalFile::readTime ( )
throw (nidas::util::IOException,
nidas::util::ParseException
)
private

Read lines, parsing special comment lines and skipping other comments or blank lines, until a record line is found.

Then parse and return the time from that record. On EOF, the returned time will be a huge value, far off in the mega-distant future. Does not return an EOFException on EOF. After parsing the time from a record, curline contains that record, and curpos points to the character after the datetime field. Also sets _nextTime to the returned time.

References _nextTime, close(), eof(), parseTime(), and readLine().

Search forward in a file, returning the time of the last record in the file with a time less than or equal to tsearch.

The time is available by calling nextTime(). The next call to readCF() will return that record.

void CalFile::setDateTimeFormat ( const std::string &  val)

Set the format for reading the date & time from the file.

If a "dateFormat" comment is found at the beginning of the file, this attribute will be set to that value.

References _dateTimeFormat, _fileName, nidas::util::replaceCharsIn(), and VLOG.

Referenced by parseTimeComments().

void CalFile::setDSMSensor ( const DSMSensor val)

Set the DSMSensor associated with this CalFile.

CalFile may need this in order to substitute for tokens like $DSM and $HEIGHT in the file or path names. Otherwise it is not necessary to setDSMSensor.

References _sensor.

Referenced by nidas::core::VariableConverter::fromDOMElement(), nidas::core::SampleTag::fromDOMElement(), and nidas::core::DSMSensor::fromDOMElement().

void CalFile::setFile ( const std::string &  val)

Set the base name of the file to be opened.

References _fileName.

Referenced by main(), and nidas::dynld::raf::SyncRecordReader::scanHeader().

void nidas::core::CalFile::setName ( const std::string &  val)
inline

An instance of CalFile can have a name.

Then more than one CalFile can be associated with an object, such as a DSMSensor, and it can differentiate them by name.

References _name.

void CalFile::setPath ( const std::string &  val)

Set the search path to find the file, and any included files: one or more directory paths separated by colons ':'.

References _allPaths, _path, _staticMutex, nidas::util::Mutex::lock(), and nidas::util::Mutex::unlock().

Referenced by main(), and nidas::dynld::raf::SyncRecordReader::scanHeader().

void CalFile::setTimeZone ( const std::string &  val)

Set the timezone for the dates & times read from the file.

If a "timeZone" comment is found at the beginning of the file, this attribute will be set to that value.

References _fileName, _timeZone, _utcZone, and VLOG.

Referenced by CalFile(), operator=(), and parseTimeComments().

xercesc::DOMElement * DOMable::toDOMElement ( xercesc::DOMElement *  node,
bool  complete 
) const
throw (xercesc::DOMException
)
virtualinherited
xercesc::DOMElement * DOMable::toDOMParent ( xercesc::DOMElement *  parent,
bool  complete 
) const
throw (xercesc::DOMException
)
virtualinherited

Member Data Documentation

vector< string > CalFile::_allPaths
staticprivate

Referenced by getAllPaths(), and setPath().

char* nidas::core::CalFile::_curline
private
int nidas::core::CalFile::_curlineLength
private

Referenced by readLine().

int nidas::core::CalFile::_curpos
private
std::vector<std::string> nidas::core::CalFile::_currentFields
private

Referenced by getCurrentFields(), and getFields().

std::string nidas::core::CalFile::_currentFileName
private

Referenced by getCurrentFileName(), and open().

nidas::util::UTime nidas::core::CalFile::_currentTime
private

Referenced by getCurrentFields(), and getCurrentTime().

regex_t CalFile::_dateFormatPreg
staticprivate

Referenced by parseTimeComments().

std::string nidas::core::CalFile::_dateTimeFormat
private
bool nidas::core::CalFile::_eofState
private

Referenced by eof(), open(), operator=(), and readLine().

std::string nidas::core::CalFile::_fileName
private
std::ifstream nidas::core::CalFile::_fin
private

Referenced by close(), open(), and readLine().

CalFile* nidas::core::CalFile::_include
private
regex_t CalFile::_includePreg
staticprivate

Referenced by parseInclude().

nidas::util::UTime nidas::core::CalFile::_includeTime
private

Time stamp of include "file" record.

Referenced by readCFInclude().

nidas::util::Mutex nidas::core::CalFile::_mutex
private
std::string nidas::core::CalFile::_name
private

Referenced by getName(), operator=(), and setName().

nidas::util::UTime nidas::core::CalFile::_nextTime
private
int nidas::core::CalFile::_nline
private
std::string nidas::core::CalFile::_path
private

Referenced by getPath(), open(), operator=(), and setPath().

bool CalFile::_reCompiled = false
staticprivate
int CalFile::_reUsers = 0
staticprivate

Referenced by CalFile(), and ~CalFile().

const DSMSensor* nidas::core::CalFile::_sensor
private
n_u::Mutex CalFile::_staticMutex
staticprivate
nidas::util::UTime nidas::core::CalFile::_timeAfterInclude
private

Time stamp of record after include "file".

Referenced by readCFInclude().

nidas::util::UTime nidas::core::CalFile::_timeFromInclude
private

Time stamp of last record in include file with time <= _includeTime.

std::string nidas::core::CalFile::_timeZone
private

Referenced by getTimeZone(), parseTime(), and setTimeZone().

regex_t CalFile::_timeZonePreg
staticprivate

Referenced by parseTimeComments().

bool nidas::core::CalFile::_utcZone
private

Referenced by parseTime(), and setTimeZone().

const int nidas::core::CalFile::INITIAL_CURLINE_LENGTH = 128
staticprivate

The documentation for this class was generated from the following files: