pandas to_feather “Unsupported numpy type 5” and force df.eval() to explicit dtype





.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty,.everyoneloves__bot-mid-leaderboard:empty{ height:90px;width:728px;box-sizing:border-box;
}







0















I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:



import pandas as pd

df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])
print(df)
print(df.dtypes)

df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)
df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
print(df.dtypes)

df.to_feather('c:/temp/test.feather')


With the following result:



     x      y    txt
0 1.0 0.678 hello
1 2.0 0.779 foo
2 3.0 0.218 bar

x float32
y float32
txt object
dtype: object

x int32
y float32
txt object
z float64
dtype: object

---------------------------------------------------------------------------
ArrowNotImplementedError Traceback (most recent call last)
<ipython-input-35-0843a56bb3a8> in <module>()
----> 1 df.to_feather('c:/temp/test.feather')

~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)
1887 """
1888 from pandas.io.feather_format import to_feather
-> 1889 to_feather(self, fname)
1890
1891 def to_parquet(self, fname, engine='auto', compression='snappy',

~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)
81 raise ValueError("feather must have string column names")
82
---> 83 feather.write_dataframe(df, path)
84
85

~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)
98 writer = FeatherWriter(dest)
99 try:
--> 100 writer.write(df)
101 except Exception:
102 # Try to make sure the resource is closed

~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)
78 # TODO(wesm): Remove this length check, see ARROW-1732
79 if len(df.columns) > 0:
---> 80 batch = RecordBatch.from_pandas(df, preserve_index=False)
81 for i, name in enumerate(batch.schema.names):
82 col = batch[i]

table.pxi in pyarrow.lib.RecordBatch.from_pandas()

~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)
369 arrays = [convert_column(c, t)
370 for c, t in zip(columns_to_convert,
--> 371 convert_types)]
372 else:
373 from concurrent import futures

~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)
368 if nthreads == 1:
369 arrays = [convert_column(c, t)
--> 370 for c, t in zip(columns_to_convert,
371 convert_types)]
372 else:

~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)
364
365 def convert_column(col, ty):
--> 366 return pa.array(col, from_pandas=True, type=ty)
367
368 if nthreads == 1:

array.pxi in pyarrow.lib.array()

array.pxi in pyarrow.lib._ndarray_to_array()

error.pxi in pyarrow.lib.check_status()

ArrowNotImplementedError: Unsupported numpy type 5


This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather



The following code fixed the problem:



df['x'] = df['x'].astype('float32')
df.to_feather('c:/temp/test.feather')


So I have two questions:



1) What am I doing that is causing problems for to_feather()?



2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)



I tried this:



df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)


but it results in:



ValueError: "float" is not a supported function









share|improve this question





























    0















    I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:



    import pandas as pd

    df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])
    print(df)
    print(df.dtypes)

    df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)
    df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
    print(df.dtypes)

    df.to_feather('c:/temp/test.feather')


    With the following result:



         x      y    txt
    0 1.0 0.678 hello
    1 2.0 0.779 foo
    2 3.0 0.218 bar

    x float32
    y float32
    txt object
    dtype: object

    x int32
    y float32
    txt object
    z float64
    dtype: object

    ---------------------------------------------------------------------------
    ArrowNotImplementedError Traceback (most recent call last)
    <ipython-input-35-0843a56bb3a8> in <module>()
    ----> 1 df.to_feather('c:/temp/test.feather')

    ~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)
    1887 """
    1888 from pandas.io.feather_format import to_feather
    -> 1889 to_feather(self, fname)
    1890
    1891 def to_parquet(self, fname, engine='auto', compression='snappy',

    ~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)
    81 raise ValueError("feather must have string column names")
    82
    ---> 83 feather.write_dataframe(df, path)
    84
    85

    ~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)
    98 writer = FeatherWriter(dest)
    99 try:
    --> 100 writer.write(df)
    101 except Exception:
    102 # Try to make sure the resource is closed

    ~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)
    78 # TODO(wesm): Remove this length check, see ARROW-1732
    79 if len(df.columns) > 0:
    ---> 80 batch = RecordBatch.from_pandas(df, preserve_index=False)
    81 for i, name in enumerate(batch.schema.names):
    82 col = batch[i]

    table.pxi in pyarrow.lib.RecordBatch.from_pandas()

    ~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)
    369 arrays = [convert_column(c, t)
    370 for c, t in zip(columns_to_convert,
    --> 371 convert_types)]
    372 else:
    373 from concurrent import futures

    ~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)
    368 if nthreads == 1:
    369 arrays = [convert_column(c, t)
    --> 370 for c, t in zip(columns_to_convert,
    371 convert_types)]
    372 else:

    ~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)
    364
    365 def convert_column(col, ty):
    --> 366 return pa.array(col, from_pandas=True, type=ty)
    367
    368 if nthreads == 1:

    array.pxi in pyarrow.lib.array()

    array.pxi in pyarrow.lib._ndarray_to_array()

    error.pxi in pyarrow.lib.check_status()

    ArrowNotImplementedError: Unsupported numpy type 5


    This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather



    The following code fixed the problem:



    df['x'] = df['x'].astype('float32')
    df.to_feather('c:/temp/test.feather')


    So I have two questions:



    1) What am I doing that is causing problems for to_feather()?



    2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)



    I tried this:



    df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)


    but it results in:



    ValueError: "float" is not a supported function









    share|improve this question

























      0












      0








      0








      I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:



      import pandas as pd

      df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])
      print(df)
      print(df.dtypes)

      df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)
      df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
      print(df.dtypes)

      df.to_feather('c:/temp/test.feather')


      With the following result:



           x      y    txt
      0 1.0 0.678 hello
      1 2.0 0.779 foo
      2 3.0 0.218 bar

      x float32
      y float32
      txt object
      dtype: object

      x int32
      y float32
      txt object
      z float64
      dtype: object

      ---------------------------------------------------------------------------
      ArrowNotImplementedError Traceback (most recent call last)
      <ipython-input-35-0843a56bb3a8> in <module>()
      ----> 1 df.to_feather('c:/temp/test.feather')

      ~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)
      1887 """
      1888 from pandas.io.feather_format import to_feather
      -> 1889 to_feather(self, fname)
      1890
      1891 def to_parquet(self, fname, engine='auto', compression='snappy',

      ~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)
      81 raise ValueError("feather must have string column names")
      82
      ---> 83 feather.write_dataframe(df, path)
      84
      85

      ~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)
      98 writer = FeatherWriter(dest)
      99 try:
      --> 100 writer.write(df)
      101 except Exception:
      102 # Try to make sure the resource is closed

      ~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)
      78 # TODO(wesm): Remove this length check, see ARROW-1732
      79 if len(df.columns) > 0:
      ---> 80 batch = RecordBatch.from_pandas(df, preserve_index=False)
      81 for i, name in enumerate(batch.schema.names):
      82 col = batch[i]

      table.pxi in pyarrow.lib.RecordBatch.from_pandas()

      ~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)
      369 arrays = [convert_column(c, t)
      370 for c, t in zip(columns_to_convert,
      --> 371 convert_types)]
      372 else:
      373 from concurrent import futures

      ~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)
      368 if nthreads == 1:
      369 arrays = [convert_column(c, t)
      --> 370 for c, t in zip(columns_to_convert,
      371 convert_types)]
      372 else:

      ~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)
      364
      365 def convert_column(col, ty):
      --> 366 return pa.array(col, from_pandas=True, type=ty)
      367
      368 if nthreads == 1:

      array.pxi in pyarrow.lib.array()

      array.pxi in pyarrow.lib._ndarray_to_array()

      error.pxi in pyarrow.lib.check_status()

      ArrowNotImplementedError: Unsupported numpy type 5


      This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather



      The following code fixed the problem:



      df['x'] = df['x'].astype('float32')
      df.to_feather('c:/temp/test.feather')


      So I have two questions:



      1) What am I doing that is causing problems for to_feather()?



      2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)



      I tried this:



      df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)


      but it results in:



      ValueError: "float" is not a supported function









      share|improve this question














      I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:



      import pandas as pd

      df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])
      print(df)
      print(df.dtypes)

      df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)
      df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
      print(df.dtypes)

      df.to_feather('c:/temp/test.feather')


      With the following result:



           x      y    txt
      0 1.0 0.678 hello
      1 2.0 0.779 foo
      2 3.0 0.218 bar

      x float32
      y float32
      txt object
      dtype: object

      x int32
      y float32
      txt object
      z float64
      dtype: object

      ---------------------------------------------------------------------------
      ArrowNotImplementedError Traceback (most recent call last)
      <ipython-input-35-0843a56bb3a8> in <module>()
      ----> 1 df.to_feather('c:/temp/test.feather')

      ~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)
      1887 """
      1888 from pandas.io.feather_format import to_feather
      -> 1889 to_feather(self, fname)
      1890
      1891 def to_parquet(self, fname, engine='auto', compression='snappy',

      ~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)
      81 raise ValueError("feather must have string column names")
      82
      ---> 83 feather.write_dataframe(df, path)
      84
      85

      ~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)
      98 writer = FeatherWriter(dest)
      99 try:
      --> 100 writer.write(df)
      101 except Exception:
      102 # Try to make sure the resource is closed

      ~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)
      78 # TODO(wesm): Remove this length check, see ARROW-1732
      79 if len(df.columns) > 0:
      ---> 80 batch = RecordBatch.from_pandas(df, preserve_index=False)
      81 for i, name in enumerate(batch.schema.names):
      82 col = batch[i]

      table.pxi in pyarrow.lib.RecordBatch.from_pandas()

      ~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)
      369 arrays = [convert_column(c, t)
      370 for c, t in zip(columns_to_convert,
      --> 371 convert_types)]
      372 else:
      373 from concurrent import futures

      ~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)
      368 if nthreads == 1:
      369 arrays = [convert_column(c, t)
      --> 370 for c, t in zip(columns_to_convert,
      371 convert_types)]
      372 else:

      ~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)
      364
      365 def convert_column(col, ty):
      --> 366 return pa.array(col, from_pandas=True, type=ty)
      367
      368 if nthreads == 1:

      array.pxi in pyarrow.lib.array()

      array.pxi in pyarrow.lib._ndarray_to_array()

      error.pxi in pyarrow.lib.check_status()

      ArrowNotImplementedError: Unsupported numpy type 5


      This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather



      The following code fixed the problem:



      df['x'] = df['x'].astype('float32')
      df.to_feather('c:/temp/test.feather')


      So I have two questions:



      1) What am I doing that is causing problems for to_feather()?



      2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)



      I tried this:



      df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)


      but it results in:



      ValueError: "float" is not a supported function






      python pandas eval feather






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Nov 27 '18 at 0:58









      Scott WilsonScott Wilson

      84




      84
























          0






          active

          oldest

          votes












          Your Answer






          StackExchange.ifUsing("editor", function () {
          StackExchange.using("externalEditor", function () {
          StackExchange.using("snippets", function () {
          StackExchange.snippets.init();
          });
          });
          }, "code-snippets");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "1"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          autoActivateHeartbeat: false,
          convertImagesToLinks: true,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: 10,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });














          draft saved

          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53491301%2fpandas-to-feather-unsupported-numpy-type-5-and-force-df-eval-to-explicit-dty%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown

























          0






          active

          oldest

          votes








          0






          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes
















          draft saved

          draft discarded




















































          Thanks for contributing an answer to Stack Overflow!


          • Please be sure to answer the question. Provide details and share your research!

          But avoid



          • Asking for help, clarification, or responding to other answers.

          • Making statements based on opinion; back them up with references or personal experience.


          To learn more, see our tips on writing great answers.




          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53491301%2fpandas-to-feather-unsupported-numpy-type-5-and-force-df-eval-to-explicit-dty%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          Wiesbaden

          Marschland

          Dieringhausen