s-ccs · flyingfalling · Apr 20, 2025 · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025
diff --git a/setup.py b/setup.py
@@ -44,7 +44,7 @@
 else:  # linux, unix, cygwin
     args = {
         "include_dirs": [numpy.get_include(), "include/", '/usr/include/EyeLink/'],
-        "library_dirs": ["lib/"],
+        "library_dirs": ["lib/", "/usr/lib/x86_64-linux-gnu"],
         "libraries": ["edfapi"],
         "extra_compile_args": ["-fopenmp"],
         "extra_link_args": ["-fopenmp"],

diff --git a/src/pyedfread/data/SUB001.asc b/src/pyedfread/data/SUB001.asc
diff --git a/src/pyedfread/edf_read.pyx b/src/pyedfread/edf_read.pyx
@@ -6,7 +6,7 @@ cimport numpy as np
 import numpy as np
 import string
 
-from libc.stdint cimport int16_t, uint16_t, uint32_t, int64_t
+from libc.stdint cimport int16_t, uint16_t, uint32_t, int64_t, uint64_t
 from libc.stdlib cimport malloc, free
 
 from libc.stdio cimport printf
@@ -236,15 +236,59 @@ def parse_message(data, trial, message_accumulator, message_filter, trial_marker
     return trial
 
 
-def parse_edf(
+
+## REV: numpy type should match system float type used by eyelink... currently usually float32, but python defaults to
+## float64 so may as well use that. Even better would be to "check"
+def create_sample_array_memory(num_elements, flttype=None):
+    #First, just initialize all to float to save time.
+    # Note, num_elements is longer than the maximum that we will
+    # actually hold, so we must be sure to truncate at very end
+    # of parse.
+
+    if( None==flttype ):
+        fltbytes=sizeof(float);
+        if(4==sizeof(float)):
+            flttype=np.float32;
+            #print("\n\n\n\nFLOATS ARE 32\n\n\n\n");
+            pass;
+        elif(8==sizeof(float)):
+            flttype=np.float64;
+            #print("\n\n\n\nFLOATS ARE 64\n\n\n\n");
+            pass;
+        else:
+            raise Exception("Unsupported float type on this processor? Bytes in native C float = {}".format(fltbytes));
+        pass;
+
+
+    mydict = {  colname : np.ndarray(num_elements, dtype=flttype)  for colname in sample_columns };
+    #Then, reallocate only those which are not float64s...
+    mydict['time'] = np.ndarray(num_elements, dtype=np.uint32);
+    mydict['flags'] = np.ndarray(num_elements, dtype=np.uint16);
+    mydict['input'] = np.ndarray(num_elements, dtype=np.uint16);
+    mydict['buttons'] = np.ndarray(num_elements, dtype=np.uint16);
+    mydict['htype'] = np.ndarray(num_elements, dtype=np.int16);
+    mydict['errors'] = np.ndarray(num_elements, dtype=np.uint16);
+    # mydict['hdata'] is not included but may be added in future
+    # (it is reserved for future use).
+    # REV: We should check that htype is always 0. If not, this must
+    # be fixed to address new API/capabilities.
+    return mydict;
+
+
+
+def parse_edf( 
     filename, ignore_samples=False, message_filter=None, trial_marker='TRIALID'
 ):
-    """Read samples, events, and messages from an EDF file."""
+    """Read samples, events, and messages from an EDF file. REV: version which uses numpy ndarrays in case bitlength of
+    mapping from cdef cython struct matters (probably not)
+
+    Samples type is dict with keys colnames, values are 1-d NDarrays containing (appropriately typed uint32, float64, etc. values).
+    """
     cdef int errval = 1
     cdef char * buf = < char * > malloc(1024 * sizeof(char))
     cdef EDFFILE * ef
     cdef int sample_type, cnt, trial
-
+    
     # open the file
     ef = edf_open_file(filename.encode('utf-8'), 0, 1, 1, & errval)
     if errval < 0:
@@ -255,9 +299,10 @@ def parse_edf(
     num_elements = edf_get_element_count(ef)
     if ignore_samples:
         num_elements = 0
-    cdef np.ndarray npsamples = np.ndarray((num_elements, 40), dtype=np.float64)
-    cdef np.float64_t[:, :] samples = npsamples
+        pass;
 
+    samples = create_sample_array_memory(num_elements);
+
     # parse samples and events
     trial = -1
     cnt = 0
@@ -272,55 +317,65 @@ def parse_edf(
             break
 
         if not ignore_samples and (sample_type == SAMPLE_TYPE):
+            # fd is a ALLF_DATA type, a union including multiple "views" including .fs view
+            # which means we will access memory offsets according to FSAMPLE struct.
             fd = edf_get_float_data(ef)
-            # Map fields explicitly into memory view
-            samples[cnt, 0] = float(fd.fs.time)
-            samples[cnt, 1] = float(fd.fs.px[0])
-            samples[cnt, 2] = float(fd.fs.px[1])
-            samples[cnt, 3] = float(fd.fs.py[0])
-            samples[cnt, 4] = float(fd.fs.py[1])
-            samples[cnt, 5] = float(fd.fs.hx[0])
-            samples[cnt, 6] = float(fd.fs.hx[1])
-            samples[cnt, 7] = float(fd.fs.hy[0])
-            samples[cnt, 8] = float(fd.fs.hy[1])
-            samples[cnt, 9] = float(fd.fs.pa[0])
-            samples[cnt, 10] = fd.fs.pa[1]
-            samples[cnt, 11] = fd.fs.gx[0]
-            samples[cnt, 12] = fd.fs.gx[1]
-            samples[cnt, 13] = fd.fs.gy[0]
-            samples[cnt, 14] = fd.fs.gy[1]
-            samples[cnt, 15] = fd.fs.rx
-            samples[cnt, 16] = fd.fs.ry
-
-            samples[cnt, 17] = fd.fs.gxvel[0]
-            samples[cnt, 18] = fd.fs.gxvel[1]
-            samples[cnt, 19] = fd.fs.gyvel[0]
-            samples[cnt, 20] = fd.fs.gyvel[1]
-            samples[cnt, 21] = fd.fs.hxvel[0]
-            samples[cnt, 22] = fd.fs.hxvel[1]
-            samples[cnt, 23] = fd.fs.hyvel[0]
-            samples[cnt, 24] = fd.fs.hyvel[1]
-            samples[cnt, 25] = fd.fs.rxvel[0]
-            samples[cnt, 26] = fd.fs.rxvel[1]
-            samples[cnt, 27] = fd.fs.ryvel[0]
-            samples[cnt, 28] = fd.fs.ryvel[1]
-
-            samples[cnt, 29] = fd.fs.fgxvel[0]
-            samples[cnt, 30] = fd.fs.fgyvel[0]
-            samples[cnt, 31] = fd.fs.fhxvel[0]
-            samples[cnt, 32] = fd.fs.fhyvel[0]
-            samples[cnt, 33] = fd.fs.frxvel[0]
-            samples[cnt, 34] = fd.fs.fryvel[0]
-
-            # samples[cnt, 39:48] =  <float>fd.fs.hdata # head-tracker data
-            # (not prescaled)
-            samples[cnt, 35] = fd.fs.flags  # flags to indicate contents
-            samples[cnt, 36] = fd.fs.input  # extra (input word)
-            samples[cnt, 37] = fd.fs.buttons  # button state & changes
-            samples[cnt, 38] = fd.fs.htype  # head-tracker data type
-            samples[cnt, 39] = fd.fs.errors
-            cnt += 1
-
+
+            samples['time'][cnt] = fd.fs.time; #0
+            samples['px_left'][cnt] = fd.fs.px[0]; #1
+            samples['px_right'][cnt] = fd.fs.px[1]; #2
+            samples['py_left'][cnt] = fd.fs.py[0]; #3
+            samples['py_right'][cnt] = fd.fs.py[1]; #4
+            samples['hx_left'][cnt] = fd.fs.hx[0]; #5
+            samples['hx_right'][cnt] = fd.fs.hx[1]; #6
+            samples['hy_left'][cnt] = fd.fs.hy[0]; #7
+            samples['hy_right'][cnt] = fd.fs.hy[1]; #8
+            samples['pa_left'][cnt] = fd.fs.pa[0]; #9
+            samples['pa_right'][cnt] = fd.fs.pa[1]; #10
+            samples['gx_left'][cnt] = fd.fs.gx[0]; #11
+            samples['gx_right'][cnt] = fd.fs.gx[1]; #12
+            samples['gy_left'][cnt] = fd.fs.gy[0]; #13
+            samples['gy_right'][cnt] = fd.fs.gy[1]; #14
+            samples['rx'][cnt] = fd.fs.rx; #15
+            samples['ry'][cnt] = fd.fs.ry; #16
+            samples['gxvel_left'][cnt] = fd.fs.gxvel[0]; #17
+            samples['gxvel_right'][cnt] = fd.fs.gxvel[1]; #18
+            samples['gyvel_left'][cnt] = fd.fs.gyvel[0]; #19
+            samples['gyvel_right'][cnt] = fd.fs.gyvel[1]; #20
+            samples['hxvel_left'][cnt] = fd.fs.hxvel[0]; #21
+            samples['hxvel_right'][cnt] = fd.fs.hxvel[1]; #22
+            samples['hyvel_left'][cnt] = fd.fs.hyvel[0]; #23
+            samples['hyvel_right'][cnt] = fd.fs.hyvel[1]; #24
+            samples['rxvel_left'][cnt] = fd.fs.rxvel[0]; #25
+            samples['rxvel_right'][cnt] = fd.fs.rxvel[1]; #26
+            samples['ryvel_left'][cnt] = fd.fs.ryvel[0]; #27
+            samples['ryvel_right'][cnt] = fd.fs.ryvel[1]; #28
+
+            ## REV: These are all accessing 0th element for some reason
+            samples['fgxvel'][cnt] = fd.fs.fgxvel[0]; #29
+            samples['fgyvel'][cnt] = fd.fs.fgyvel[0]; #30
+            samples['fhxvel'][cnt] = fd.fs.fhxvel[0]; #31
+            samples['fhyvel'][cnt] = fd.fs.fhyvel[0]; #32
+            samples['frxvel'][cnt] = fd.fs.frxvel[0]; #33
+            samples['fryvel'][cnt] = fd.fs.fryvel[0]; #34
+
+            samples['flags'][cnt] = fd.fs.flags; #35
+            samples['input'][cnt] = fd.fs.input; #36 # extra (input word)
+            samples['buttons'][cnt] = fd.fs.buttons  #37 # button state & changes
+            samples['htype'][cnt] = fd.fs.htype; #38  # head-tracker data type
+            samples['errors'][cnt] = fd.fs.errors; #39
+
+
+            '''
+            # REV: hdata Don't use for now (reserved for future...)
+            for i in range(8):
+                samples['hdata_{}'][cnt][i] = fd.fs.hdata[i];
+                pass;
+            '''
+
+            cnt += 1;
+            pass;
+
         elif sample_type == MESSAGEEVENT:
             data = data2dict(sample_type, ef)
             trial = parse_message(
@@ -335,5 +390,120 @@ def parse_edf(
     free(buf)
     # num_elements contained number of combined samples, messages, and events. This truncates
     # to only number of samples read (cnt).
-    samples = samples[:cnt, :];
+
+
+    for key in samples:
+        #print("Reducing column [{}] from num_elements [={}] to cnt [={}]".format(key, num_elements, cnt));
+        samples[key] = samples[key][:cnt];
+        pass;
+
     return samples, event_accumulator, message_accumulator
+
+
+
+#parse_edf = parse_edf_fltint_pydict;
+
+#REV: converts samples 'time' column from default uint32 to float64, with 0.5 msec offsets.
+def samples_to_ftime(samples):
+    """
+    Converts time column from uint32 to float64, and adds 0.5 for each with flags column
+    SAMPLE_ADD_OFFSET bit set true.
+
+    Input: pandas dataframe samples.
+
+    Output: Returns modified dataframe samples with time converted to float64, and with appropriate OFFSET
+    added.
+    """
+    bitmask = SAMPLE_ADD_OFFSET; #bitmask = 0x0002;
+    HALFMS=0.5; #unit is already MSEC for edf's "time".
+    if( samples['flags'].dtype != np.uint16 ):
+        raise Exception("ERROR: flags should be of unsigned integer type (expect uint16) for bitwise-AND to work properly. Found type: {}".format(samples['flags'].dtype));
+    samples['_halfms'] = (0 != (bitmask & samples['flags']));
+    samples['time'] = samples['time'].astype(np.float64);
+    samples.loc[ (True==samples['_halfms']), 'time' ] += HALFMS;
+
+    # Drop temporary _halfms column
+    samples = samples[ [a for a in samples.columns if a is not '_halfms' ] ];
+
+    return samples;
+
+
+
+## REV: It *says* that all non-included values will be set to MISSING_DATA (-32000ish), but I do not see that, I see random noise.
+def sanitize_samples_by_flags(s):
+    s = s.copy();
+    s = s.reset_index(drop=True);
+
+    s['flags'] = s['flags'].astype(np.uint16);
+
+
+    if( 'eye' in s.columns ):
+        raise Exception("WTF eye already in samples");
+    s['eye'] = -1;
+    s['eye'] = s.eye.astype(np.int8);
+
+    f = s['flags'];
+
+    #REV: "truth statements" dont work.
+    s.loc[ (SAMPLE_LEFT & f) != 0, 'eye' ] = 0;
+    s.loc[ (SAMPLE_RIGHT & f) != 0, 'eye' ] = 1;
+    s.loc[ ((SAMPLE_LEFT & f) & (SAMPLE_RIGHT & f)) != 0 , 'eye' ] = 2; #2 for binoc i guess.
+
+
+    #REV: oh wait, it shares row...
+    lcols=[ c for c in s.columns if '_left' in c ];
+    rcols=[ c for c in s.columns if '_right' in c ];
+
+
+    ## Delete right eye data if left eye recorded only
+    s.loc[ (s.eye==0), rcols ] = np.nan;
+
+    ## Delete left eye data if right eye recorded only
+    s.loc[ (s.eye==1), lcols ] = np.nan;
+
+    haspupilsize = 0 == (f & SAMPLE_PUPILSIZE);
+    haspupilxy = 0 == (f & SAMPLE_PUPILXY);
+    hasgazexy = 0 == (f & SAMPLE_GAZEXY);
+    hashrefxy = 0 == (f & SAMPLE_HREFXY);
+    hasgazeres = 0 == (f & SAMPLE_GAZERES);
+
+
+    #hasstatus = 0 == (f & SAMPLE_STATUS);
+    hasinput = 0 == (f & SAMPLE_INPUTS);
+    hasbuttons = 0 == (f & SAMPLE_BUTTONS);
+
+
+    s.loc[ (haspupilsize), ['pa_left', 'pa_right'] ] = np.nan;
+
+    s.loc[ (haspupilxy), ['px_left', 'px_right',  #REV: assuming that "r" (raw) is same as "p" (pupil)...
+                           'py_left', 'py_right',
+                           'rxvel_left', 'rxvel_right',
+                           'ryvel_left', 'ryvel_right',
+                           'frxvel', 'fryvel'] ] = np.nan;
+
+    s.loc[ (hasgazexy), ['gx_left', 'gx_right',
+                           'gy_left', 'gy_right',
+                           'gxvel_left', 'gxvel_right',
+                           'gyvel_left', 'gyvel_right',
+                           'fgxvel', 'fgyvel'] ] = np.nan;
+
+    s.loc[ (hashrefxy), ['hx_left', 'hx_right',
+                           'hy_left', 'hy_right',
+                           'hxvel_left', 'hxvel_right',
+                           'hyvel_left', 'hyvel_right',
+                           'fhxvel', 'fhyvel'] ] = np.nan;
+
+    s.loc[ (hasgazeres), ['rx', 'ry'] ] = np.nan;
+
+    #s.loc[ (hasstatus), 'errors' ] = np.nan; #if FLT?
+    s.loc[ (hasinput), 'input' ] = np.nan; #if FLT?
+    s.loc[ (hasbuttons), 'buttons' ] = np.nan; #if FLT?
+
+    for c in s.columns:
+        if c == 'time' or c == 'eye' or c == 'flags':
+            continue;
+        s.loc[ ((s[c]<=MISSING_DATA) | (s[c]>=1e8)), c ] = np.nan;
+        #s[c] = vec_float_or_nan(s[c]);
+        pass;
+
+    return s;
diff --git a/src/pyedfread/parse.py b/src/pyedfread/parse.py
@@ -11,6 +11,8 @@ def read_edf(
     ignore_samples=False,
     message_filter=None,
     trial_marker="TRIALID",
+    ftime=True,
+    exclude_vel_cols=False,
 ):
     """
     Parse an EDF file into a pandas.DataFrame.
@@ -39,6 +41,16 @@ def read_edf(
         Messages that start with this string will be assumed to
         indicate the start of a trial.
 
+    ftime : bool, optional (default True)
+        Should return time column "time" as a floating point number with appropriate adjustments made (+0.5 msec) for
+        samples with the appropriate flag set (for 2000 Hz sampling rate).
+
+    exclude_vel_cols : bool, optional (default True)
+        (Hopefully temporary) fix to exclude *vel* columns (actually sets values to NAN). fgxvel, gxvel_left, fgxvel_left, etc. all
+        columns containing "vel" in their string are set to NaN to avoid a seeming bug in the
+        (linux version eyelink-edfapi/stable,now 4.2.1197.0 amd64) library which leads to memory corruption in values returend by
+        edf.h edf_get_float_data()
+
     Returns
     -------
     samples : pandas.DataFrame
@@ -58,7 +70,24 @@ def read_edf(
     )
     events = pd.DataFrame(events)
     messages = pd.DataFrame(messages)
-    samples = pd.DataFrame(np.asarray(samples), columns=edf_read.sample_columns)
+    samples = pd.DataFrame(samples)
+
+    # if ftime option is set, convert time column to float and add 0.5 msec offsets if required
+    # by each row's "flag" column's bits.
+    if( True == ftime ):
+        samples = edf_read.samples_to_ftime(samples);
+        pass;
+
+    if( True == exclude_vel_cols ):
+        velcols=[ c for c in samples.columns if 'vel' in c ];
+        samples.loc[:, velcols] = np.nan;
+        pass;
+
+    ## Reorder samples column to be in same order as previously (based on FSAMPLE struct)
+    samples = samples[ list(edf_read.sample_columns) ];
+
+    samples = edf_read.sanitize_samples_by_flags(samples);
+
     return samples, events, messages
 
 

diff --git a/tests/R_sanity/edf2csv.R b/tests/R_sanity/edf2csv.R
@@ -0,0 +1,8 @@
+
+library(eyelinkReader)
+args <- commandArgs(trailingOnly = TRUE)
+fn = args[1];
+tag = args[2];
+gaze <- read_edf(fn, import_samples=TRUE);
+outfn = paste('df', string(tag), '.csv', sep='');
+write.csv(gaze$samples, outfn);