Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
else: # linux, unix, cygwin
args = {
"include_dirs": [numpy.get_include(), "include/", '/usr/include/EyeLink/'],
"library_dirs": ["lib/"],
"library_dirs": ["lib/", "/usr/lib/x86_64-linux-gnu"],
"libraries": ["edfapi"],
"extra_compile_args": ["-fopenmp"],
"extra_link_args": ["-fopenmp"],
Expand Down
972 changes: 486 additions & 486 deletions src/pyedfread/data/SUB001.asc

Large diffs are not rendered by default.

280 changes: 225 additions & 55 deletions src/pyedfread/edf_read.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cimport numpy as np
import numpy as np
import string

from libc.stdint cimport int16_t, uint16_t, uint32_t, int64_t
from libc.stdint cimport int16_t, uint16_t, uint32_t, int64_t, uint64_t
from libc.stdlib cimport malloc, free

from libc.stdio cimport printf
Expand Down Expand Up @@ -236,15 +236,59 @@ def parse_message(data, trial, message_accumulator, message_filter, trial_marker
return trial


def parse_edf(

## REV: numpy type should match system float type used by eyelink... currently usually float32, but python defaults to
## float64 so may as well use that. Even better would be to "check"
def create_sample_array_memory(num_elements, flttype=None):
#First, just initialize all to float to save time.
# Note, num_elements is longer than the maximum that we will
# actually hold, so we must be sure to truncate at very end
# of parse.

if( None==flttype ):
fltbytes=sizeof(float);
if(4==sizeof(float)):
flttype=np.float32;
#print("\n\n\n\nFLOATS ARE 32\n\n\n\n");
pass;
elif(8==sizeof(float)):
flttype=np.float64;
#print("\n\n\n\nFLOATS ARE 64\n\n\n\n");
pass;
else:
raise Exception("Unsupported float type on this processor? Bytes in native C float = {}".format(fltbytes));
pass;


mydict = { colname : np.ndarray(num_elements, dtype=flttype) for colname in sample_columns };
#Then, reallocate only those which are not float64s...
mydict['time'] = np.ndarray(num_elements, dtype=np.uint32);
mydict['flags'] = np.ndarray(num_elements, dtype=np.uint16);
mydict['input'] = np.ndarray(num_elements, dtype=np.uint16);
mydict['buttons'] = np.ndarray(num_elements, dtype=np.uint16);
mydict['htype'] = np.ndarray(num_elements, dtype=np.int16);
mydict['errors'] = np.ndarray(num_elements, dtype=np.uint16);
# mydict['hdata'] is not included but may be added in future
# (it is reserved for future use).
# REV: We should check that htype is always 0. If not, this must
# be fixed to address new API/capabilities.
return mydict;



def parse_edf(
filename, ignore_samples=False, message_filter=None, trial_marker='TRIALID'
):
"""Read samples, events, and messages from an EDF file."""
"""Read samples, events, and messages from an EDF file. REV: version which uses numpy ndarrays in case bitlength of
mapping from cdef cython struct matters (probably not)

Samples type is dict with keys colnames, values are 1-d NDarrays containing (appropriately typed uint32, float64, etc. values).
"""
cdef int errval = 1
cdef char * buf = < char * > malloc(1024 * sizeof(char))
cdef EDFFILE * ef
cdef int sample_type, cnt, trial

# open the file
ef = edf_open_file(filename.encode('utf-8'), 0, 1, 1, & errval)
if errval < 0:
Expand All @@ -255,9 +299,10 @@ def parse_edf(
num_elements = edf_get_element_count(ef)
if ignore_samples:
num_elements = 0
cdef np.ndarray npsamples = np.ndarray((num_elements, 40), dtype=np.float64)
cdef np.float64_t[:, :] samples = npsamples
pass;

samples = create_sample_array_memory(num_elements);

# parse samples and events
trial = -1
cnt = 0
Expand All @@ -272,55 +317,65 @@ def parse_edf(
break

if not ignore_samples and (sample_type == SAMPLE_TYPE):
# fd is a ALLF_DATA type, a union including multiple "views" including .fs view
# which means we will access memory offsets according to FSAMPLE struct.
fd = edf_get_float_data(ef)
# Map fields explicitly into memory view
samples[cnt, 0] = float(fd.fs.time)
samples[cnt, 1] = float(fd.fs.px[0])
samples[cnt, 2] = float(fd.fs.px[1])
samples[cnt, 3] = float(fd.fs.py[0])
samples[cnt, 4] = float(fd.fs.py[1])
samples[cnt, 5] = float(fd.fs.hx[0])
samples[cnt, 6] = float(fd.fs.hx[1])
samples[cnt, 7] = float(fd.fs.hy[0])
samples[cnt, 8] = float(fd.fs.hy[1])
samples[cnt, 9] = float(fd.fs.pa[0])
samples[cnt, 10] = fd.fs.pa[1]
samples[cnt, 11] = fd.fs.gx[0]
samples[cnt, 12] = fd.fs.gx[1]
samples[cnt, 13] = fd.fs.gy[0]
samples[cnt, 14] = fd.fs.gy[1]
samples[cnt, 15] = fd.fs.rx
samples[cnt, 16] = fd.fs.ry

samples[cnt, 17] = fd.fs.gxvel[0]
samples[cnt, 18] = fd.fs.gxvel[1]
samples[cnt, 19] = fd.fs.gyvel[0]
samples[cnt, 20] = fd.fs.gyvel[1]
samples[cnt, 21] = fd.fs.hxvel[0]
samples[cnt, 22] = fd.fs.hxvel[1]
samples[cnt, 23] = fd.fs.hyvel[0]
samples[cnt, 24] = fd.fs.hyvel[1]
samples[cnt, 25] = fd.fs.rxvel[0]
samples[cnt, 26] = fd.fs.rxvel[1]
samples[cnt, 27] = fd.fs.ryvel[0]
samples[cnt, 28] = fd.fs.ryvel[1]

samples[cnt, 29] = fd.fs.fgxvel[0]
samples[cnt, 30] = fd.fs.fgyvel[0]
samples[cnt, 31] = fd.fs.fhxvel[0]
samples[cnt, 32] = fd.fs.fhyvel[0]
samples[cnt, 33] = fd.fs.frxvel[0]
samples[cnt, 34] = fd.fs.fryvel[0]

# samples[cnt, 39:48] = <float>fd.fs.hdata # head-tracker data
# (not prescaled)
samples[cnt, 35] = fd.fs.flags # flags to indicate contents
samples[cnt, 36] = fd.fs.input # extra (input word)
samples[cnt, 37] = fd.fs.buttons # button state & changes
samples[cnt, 38] = fd.fs.htype # head-tracker data type
samples[cnt, 39] = fd.fs.errors
cnt += 1


samples['time'][cnt] = fd.fs.time; #0
samples['px_left'][cnt] = fd.fs.px[0]; #1
samples['px_right'][cnt] = fd.fs.px[1]; #2
samples['py_left'][cnt] = fd.fs.py[0]; #3
samples['py_right'][cnt] = fd.fs.py[1]; #4
samples['hx_left'][cnt] = fd.fs.hx[0]; #5
samples['hx_right'][cnt] = fd.fs.hx[1]; #6
samples['hy_left'][cnt] = fd.fs.hy[0]; #7
samples['hy_right'][cnt] = fd.fs.hy[1]; #8
samples['pa_left'][cnt] = fd.fs.pa[0]; #9
samples['pa_right'][cnt] = fd.fs.pa[1]; #10
samples['gx_left'][cnt] = fd.fs.gx[0]; #11
samples['gx_right'][cnt] = fd.fs.gx[1]; #12
samples['gy_left'][cnt] = fd.fs.gy[0]; #13
samples['gy_right'][cnt] = fd.fs.gy[1]; #14
samples['rx'][cnt] = fd.fs.rx; #15
samples['ry'][cnt] = fd.fs.ry; #16
samples['gxvel_left'][cnt] = fd.fs.gxvel[0]; #17
samples['gxvel_right'][cnt] = fd.fs.gxvel[1]; #18
samples['gyvel_left'][cnt] = fd.fs.gyvel[0]; #19
samples['gyvel_right'][cnt] = fd.fs.gyvel[1]; #20
samples['hxvel_left'][cnt] = fd.fs.hxvel[0]; #21
samples['hxvel_right'][cnt] = fd.fs.hxvel[1]; #22
samples['hyvel_left'][cnt] = fd.fs.hyvel[0]; #23
samples['hyvel_right'][cnt] = fd.fs.hyvel[1]; #24
samples['rxvel_left'][cnt] = fd.fs.rxvel[0]; #25
samples['rxvel_right'][cnt] = fd.fs.rxvel[1]; #26
samples['ryvel_left'][cnt] = fd.fs.ryvel[0]; #27
samples['ryvel_right'][cnt] = fd.fs.ryvel[1]; #28

## REV: These are all accessing 0th element for some reason
samples['fgxvel'][cnt] = fd.fs.fgxvel[0]; #29
samples['fgyvel'][cnt] = fd.fs.fgyvel[0]; #30
samples['fhxvel'][cnt] = fd.fs.fhxvel[0]; #31
samples['fhyvel'][cnt] = fd.fs.fhyvel[0]; #32
samples['frxvel'][cnt] = fd.fs.frxvel[0]; #33
samples['fryvel'][cnt] = fd.fs.fryvel[0]; #34

samples['flags'][cnt] = fd.fs.flags; #35
samples['input'][cnt] = fd.fs.input; #36 # extra (input word)
samples['buttons'][cnt] = fd.fs.buttons #37 # button state & changes
samples['htype'][cnt] = fd.fs.htype; #38 # head-tracker data type
samples['errors'][cnt] = fd.fs.errors; #39


'''
# REV: hdata Don't use for now (reserved for future...)
for i in range(8):
samples['hdata_{}'][cnt][i] = fd.fs.hdata[i];
pass;
'''

cnt += 1;
pass;

elif sample_type == MESSAGEEVENT:
data = data2dict(sample_type, ef)
trial = parse_message(
Expand All @@ -335,5 +390,120 @@ def parse_edf(
free(buf)
# num_elements contained number of combined samples, messages, and events. This truncates
# to only number of samples read (cnt).
samples = samples[:cnt, :];


for key in samples:
#print("Reducing column [{}] from num_elements [={}] to cnt [={}]".format(key, num_elements, cnt));
samples[key] = samples[key][:cnt];
pass;

return samples, event_accumulator, message_accumulator



#parse_edf = parse_edf_fltint_pydict;

#REV: converts samples 'time' column from default uint32 to float64, with 0.5 msec offsets.
def samples_to_ftime(samples):
"""
Converts time column from uint32 to float64, and adds 0.5 for each with flags column
SAMPLE_ADD_OFFSET bit set true.

Input: pandas dataframe samples.

Output: Returns modified dataframe samples with time converted to float64, and with appropriate OFFSET
added.
"""
bitmask = SAMPLE_ADD_OFFSET; #bitmask = 0x0002;
HALFMS=0.5; #unit is already MSEC for edf's "time".
if( samples['flags'].dtype != np.uint16 ):
raise Exception("ERROR: flags should be of unsigned integer type (expect uint16) for bitwise-AND to work properly. Found type: {}".format(samples['flags'].dtype));
samples['_halfms'] = (0 != (bitmask & samples['flags']));
samples['time'] = samples['time'].astype(np.float64);
samples.loc[ (True==samples['_halfms']), 'time' ] += HALFMS;

# Drop temporary _halfms column
samples = samples[ [a for a in samples.columns if a is not '_halfms' ] ];

return samples;



## REV: It *says* that all non-included values will be set to MISSING_DATA (-32000ish), but I do not see that, I see random noise.
def sanitize_samples_by_flags(s):
s = s.copy();
s = s.reset_index(drop=True);

s['flags'] = s['flags'].astype(np.uint16);


if( 'eye' in s.columns ):
raise Exception("WTF eye already in samples");
s['eye'] = -1;
s['eye'] = s.eye.astype(np.int8);

f = s['flags'];

#REV: "truth statements" dont work.
s.loc[ (SAMPLE_LEFT & f) != 0, 'eye' ] = 0;
s.loc[ (SAMPLE_RIGHT & f) != 0, 'eye' ] = 1;
s.loc[ ((SAMPLE_LEFT & f) & (SAMPLE_RIGHT & f)) != 0 , 'eye' ] = 2; #2 for binoc i guess.


#REV: oh wait, it shares row...
lcols=[ c for c in s.columns if '_left' in c ];
rcols=[ c for c in s.columns if '_right' in c ];


## Delete right eye data if left eye recorded only
s.loc[ (s.eye==0), rcols ] = np.nan;

## Delete left eye data if right eye recorded only
s.loc[ (s.eye==1), lcols ] = np.nan;

haspupilsize = 0 == (f & SAMPLE_PUPILSIZE);
haspupilxy = 0 == (f & SAMPLE_PUPILXY);
hasgazexy = 0 == (f & SAMPLE_GAZEXY);
hashrefxy = 0 == (f & SAMPLE_HREFXY);
hasgazeres = 0 == (f & SAMPLE_GAZERES);


#hasstatus = 0 == (f & SAMPLE_STATUS);
hasinput = 0 == (f & SAMPLE_INPUTS);
hasbuttons = 0 == (f & SAMPLE_BUTTONS);


s.loc[ (haspupilsize), ['pa_left', 'pa_right'] ] = np.nan;

s.loc[ (haspupilxy), ['px_left', 'px_right', #REV: assuming that "r" (raw) is same as "p" (pupil)...
'py_left', 'py_right',
'rxvel_left', 'rxvel_right',
'ryvel_left', 'ryvel_right',
'frxvel', 'fryvel'] ] = np.nan;

s.loc[ (hasgazexy), ['gx_left', 'gx_right',
'gy_left', 'gy_right',
'gxvel_left', 'gxvel_right',
'gyvel_left', 'gyvel_right',
'fgxvel', 'fgyvel'] ] = np.nan;

s.loc[ (hashrefxy), ['hx_left', 'hx_right',
'hy_left', 'hy_right',
'hxvel_left', 'hxvel_right',
'hyvel_left', 'hyvel_right',
'fhxvel', 'fhyvel'] ] = np.nan;

s.loc[ (hasgazeres), ['rx', 'ry'] ] = np.nan;

#s.loc[ (hasstatus), 'errors' ] = np.nan; #if FLT?
s.loc[ (hasinput), 'input' ] = np.nan; #if FLT?
s.loc[ (hasbuttons), 'buttons' ] = np.nan; #if FLT?

for c in s.columns:
if c == 'time' or c == 'eye' or c == 'flags':
continue;
s.loc[ ((s[c]<=MISSING_DATA) | (s[c]>=1e8)), c ] = np.nan;
#s[c] = vec_float_or_nan(s[c]);
pass;

return s;
31 changes: 30 additions & 1 deletion src/pyedfread/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ def read_edf(
ignore_samples=False,
message_filter=None,
trial_marker="TRIALID",
ftime=True,
exclude_vel_cols=False,
):
"""
Parse an EDF file into a pandas.DataFrame.
Expand Down Expand Up @@ -39,6 +41,16 @@ def read_edf(
Messages that start with this string will be assumed to
indicate the start of a trial.

ftime : bool, optional (default True)
Should return time column "time" as a floating point number with appropriate adjustments made (+0.5 msec) for
samples with the appropriate flag set (for 2000 Hz sampling rate).

exclude_vel_cols : bool, optional (default True)
(Hopefully temporary) fix to exclude *vel* columns (actually sets values to NAN). fgxvel, gxvel_left, fgxvel_left, etc. all
columns containing "vel" in their string are set to NaN to avoid a seeming bug in the
(linux version eyelink-edfapi/stable,now 4.2.1197.0 amd64) library which leads to memory corruption in values returend by
edf.h edf_get_float_data()

Returns
-------
samples : pandas.DataFrame
Expand All @@ -58,7 +70,24 @@ def read_edf(
)
events = pd.DataFrame(events)
messages = pd.DataFrame(messages)
samples = pd.DataFrame(np.asarray(samples), columns=edf_read.sample_columns)
samples = pd.DataFrame(samples)

# if ftime option is set, convert time column to float and add 0.5 msec offsets if required
# by each row's "flag" column's bits.
if( True == ftime ):
samples = edf_read.samples_to_ftime(samples);
pass;

if( True == exclude_vel_cols ):
velcols=[ c for c in samples.columns if 'vel' in c ];
samples.loc[:, velcols] = np.nan;
pass;

## Reorder samples column to be in same order as previously (based on FSAMPLE struct)
samples = samples[ list(edf_read.sample_columns) ];

samples = edf_read.sanitize_samples_by_flags(samples);

return samples, events, messages


Expand Down
8 changes: 8 additions & 0 deletions tests/R_sanity/edf2csv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

library(eyelinkReader)
args <- commandArgs(trailingOnly = TRUE)
fn = args[1];
tag = args[2];
gaze <- read_edf(fn, import_samples=TRUE);
outfn = paste('df', string(tag), '.csv', sep='');
write.csv(gaze$samples, outfn);
Loading