pandas to_feather “Unsupported numpy type 5” and force df.eval() to explicit dtype

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty,.everyoneloves__bot-mid-leaderboard:empty{ height:90px;width:728px;box-sizing:border-box;
}

I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:

import pandas as pd



df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])

print(df)

print(df.dtypes)



df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)

df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)

print(df.dtypes)



df.to_feather('c:/temp/test.feather')

With the following result:

     x      y    txt

0  1.0  0.678  hello

1  2.0  0.779    foo

2  3.0  0.218    bar



x      float32

y      float32

txt     object

dtype: object



x        int32

y      float32

txt     object

z      float64

dtype: object



---------------------------------------------------------------------------

ArrowNotImplementedError                  Traceback (most recent call last)

<ipython-input-35-0843a56bb3a8> in <module>()

----> 1 df.to_feather('c:/temp/test.feather')



~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)

   1887         """

   1888         from pandas.io.feather_format import to_feather

-> 1889         to_feather(self, fname)

   1890 

   1891     def to_parquet(self, fname, engine='auto', compression='snappy',



~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)

     81         raise ValueError("feather must have string column names")

     82 

---> 83     feather.write_dataframe(df, path)

     84 

     85 



~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)

     98     writer = FeatherWriter(dest)

     99     try:

--> 100         writer.write(df)

    101     except Exception:

    102         # Try to make sure the resource is closed



~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)

     78         # TODO(wesm): Remove this length check, see ARROW-1732

     79         if len(df.columns) > 0:

---> 80             batch = RecordBatch.from_pandas(df, preserve_index=False)

     81             for i, name in enumerate(batch.schema.names):

     82                 col = batch[i]



table.pxi in pyarrow.lib.RecordBatch.from_pandas()



~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)

    369         arrays = [convert_column(c, t)

    370                   for c, t in zip(columns_to_convert,

--> 371                                   convert_types)]

    372     else:

    373         from concurrent import futures



~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)

    368     if nthreads == 1:

    369         arrays = [convert_column(c, t)

--> 370                   for c, t in zip(columns_to_convert,

    371                                   convert_types)]

    372     else:



~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)

    364 

    365     def convert_column(col, ty):

--> 366         return pa.array(col, from_pandas=True, type=ty)

    367 

    368     if nthreads == 1:



array.pxi in pyarrow.lib.array()



array.pxi in pyarrow.lib._ndarray_to_array()



error.pxi in pyarrow.lib.check_status()



ArrowNotImplementedError: Unsupported numpy type 5

This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather

The following code fixed the problem:

df['x'] = df['x'].astype('float32')

df.to_feather('c:/temp/test.feather')

So I have two questions:

1) What am I doing that is causing problems for to_feather()?

2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)

I tried this:

df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)

but it results in:

ValueError: "float" is not a supported function

asked Nov 27 '18 at 0:58

Scott Wilson

add a comment |

I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:

import pandas as pd



df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])

print(df)

print(df.dtypes)



df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)

df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)

print(df.dtypes)



df.to_feather('c:/temp/test.feather')

With the following result:

     x      y    txt

0  1.0  0.678  hello

1  2.0  0.779    foo

2  3.0  0.218    bar



x      float32

y      float32

txt     object

dtype: object



x        int32

y      float32

txt     object

z      float64

dtype: object



---------------------------------------------------------------------------

ArrowNotImplementedError                  Traceback (most recent call last)

<ipython-input-35-0843a56bb3a8> in <module>()

----> 1 df.to_feather('c:/temp/test.feather')



~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)

   1887         """

   1888         from pandas.io.feather_format import to_feather

-> 1889         to_feather(self, fname)

   1890 

   1891     def to_parquet(self, fname, engine='auto', compression='snappy',



~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)

     81         raise ValueError("feather must have string column names")

     82 

---> 83     feather.write_dataframe(df, path)

     84 

     85 



~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)

     98     writer = FeatherWriter(dest)

     99     try:

--> 100         writer.write(df)

    101     except Exception:

    102         # Try to make sure the resource is closed



~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)

     78         # TODO(wesm): Remove this length check, see ARROW-1732

     79         if len(df.columns) > 0:

---> 80             batch = RecordBatch.from_pandas(df, preserve_index=False)

     81             for i, name in enumerate(batch.schema.names):

     82                 col = batch[i]



table.pxi in pyarrow.lib.RecordBatch.from_pandas()



~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)

    369         arrays = [convert_column(c, t)

    370                   for c, t in zip(columns_to_convert,

--> 371                                   convert_types)]

    372     else:

    373         from concurrent import futures



~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)

    368     if nthreads == 1:

    369         arrays = [convert_column(c, t)

--> 370                   for c, t in zip(columns_to_convert,

    371                                   convert_types)]

    372     else:



~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)

    364 

    365     def convert_column(col, ty):

--> 366         return pa.array(col, from_pandas=True, type=ty)

    367 

    368     if nthreads == 1:



array.pxi in pyarrow.lib.array()



array.pxi in pyarrow.lib._ndarray_to_array()



error.pxi in pyarrow.lib.check_status()



ArrowNotImplementedError: Unsupported numpy type 5

This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather

The following code fixed the problem:

df['x'] = df['x'].astype('float32')

df.to_feather('c:/temp/test.feather')

So I have two questions:

1) What am I doing that is causing problems for to_feather()?

2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)

I tried this:

df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)

but it results in:

ValueError: "float" is not a supported function

asked Nov 27 '18 at 0:58

Scott Wilson

add a comment |

I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:

import pandas as pd



df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])

print(df)

print(df.dtypes)



df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)

df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)

print(df.dtypes)



df.to_feather('c:/temp/test.feather')

With the following result:

     x      y    txt

0  1.0  0.678  hello

1  2.0  0.779    foo

2  3.0  0.218    bar



x      float32

y      float32

txt     object

dtype: object



x        int32

y      float32

txt     object

z      float64

dtype: object



---------------------------------------------------------------------------

ArrowNotImplementedError                  Traceback (most recent call last)

<ipython-input-35-0843a56bb3a8> in <module>()

----> 1 df.to_feather('c:/temp/test.feather')



~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)

   1887         """

   1888         from pandas.io.feather_format import to_feather

-> 1889         to_feather(self, fname)

   1890 

   1891     def to_parquet(self, fname, engine='auto', compression='snappy',



~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)

     81         raise ValueError("feather must have string column names")

     82 

---> 83     feather.write_dataframe(df, path)

     84 

     85 



~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)

     98     writer = FeatherWriter(dest)

     99     try:

--> 100         writer.write(df)

    101     except Exception:

    102         # Try to make sure the resource is closed



~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)

     78         # TODO(wesm): Remove this length check, see ARROW-1732

     79         if len(df.columns) > 0:

---> 80             batch = RecordBatch.from_pandas(df, preserve_index=False)

     81             for i, name in enumerate(batch.schema.names):

     82                 col = batch[i]



table.pxi in pyarrow.lib.RecordBatch.from_pandas()



~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)

    369         arrays = [convert_column(c, t)

    370                   for c, t in zip(columns_to_convert,

--> 371                                   convert_types)]

    372     else:

    373         from concurrent import futures



~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)

    368     if nthreads == 1:

    369         arrays = [convert_column(c, t)

--> 370                   for c, t in zip(columns_to_convert,

    371                                   convert_types)]

    372     else:



~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)

    364 

    365     def convert_column(col, ty):

--> 366         return pa.array(col, from_pandas=True, type=ty)

    367 

    368     if nthreads == 1:



array.pxi in pyarrow.lib.array()



array.pxi in pyarrow.lib._ndarray_to_array()



error.pxi in pyarrow.lib.check_status()



ArrowNotImplementedError: Unsupported numpy type 5

This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather

The following code fixed the problem:

df['x'] = df['x'].astype('float32')

df.to_feather('c:/temp/test.feather')

So I have two questions:

1) What am I doing that is causing problems for to_feather()?

2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)

I tried this:

df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)

but it results in:

ValueError: "float" is not a supported function

asked Nov 27 '18 at 0:58

Scott Wilson

I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:

import pandas as pd



df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])

print(df)

print(df.dtypes)



df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)

df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)

print(df.dtypes)



df.to_feather('c:/temp/test.feather')

With the following result:

     x      y    txt

0  1.0  0.678  hello

1  2.0  0.779    foo

2  3.0  0.218    bar



x      float32

y      float32

txt     object

dtype: object



x        int32

y      float32

txt     object

z      float64

dtype: object



---------------------------------------------------------------------------

ArrowNotImplementedError                  Traceback (most recent call last)

<ipython-input-35-0843a56bb3a8> in <module>()

----> 1 df.to_feather('c:/temp/test.feather')



~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)

   1887         """

   1888         from pandas.io.feather_format import to_feather

-> 1889         to_feather(self, fname)

   1890 

   1891     def to_parquet(self, fname, engine='auto', compression='snappy',



~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)

     81         raise ValueError("feather must have string column names")

     82 

---> 83     feather.write_dataframe(df, path)

     84 

     85 



~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)

     98     writer = FeatherWriter(dest)

     99     try:

--> 100         writer.write(df)

    101     except Exception:

    102         # Try to make sure the resource is closed



~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)

     78         # TODO(wesm): Remove this length check, see ARROW-1732

     79         if len(df.columns) > 0:

---> 80             batch = RecordBatch.from_pandas(df, preserve_index=False)

     81             for i, name in enumerate(batch.schema.names):

     82                 col = batch[i]



table.pxi in pyarrow.lib.RecordBatch.from_pandas()



~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)

    369         arrays = [convert_column(c, t)

    370                   for c, t in zip(columns_to_convert,

--> 371                                   convert_types)]

    372     else:

    373         from concurrent import futures



~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)

    368     if nthreads == 1:

    369         arrays = [convert_column(c, t)

--> 370                   for c, t in zip(columns_to_convert,

    371                                   convert_types)]

    372     else:



~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)

    364 

    365     def convert_column(col, ty):

--> 366         return pa.array(col, from_pandas=True, type=ty)

    367 

    368     if nthreads == 1:



array.pxi in pyarrow.lib.array()



array.pxi in pyarrow.lib._ndarray_to_array()



error.pxi in pyarrow.lib.check_status()



ArrowNotImplementedError: Unsupported numpy type 5

This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather

The following code fixed the problem:

df['x'] = df['x'].astype('float32')

df.to_feather('c:/temp/test.feather')

So I have two questions:

1) What am I doing that is causing problems for to_feather()?

2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)

I tried this:

df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)

but it results in:

ValueError: "float" is not a supported function

python pandas eval feather

asked Nov 27 '18 at 0:58

Scott Wilson

asked Nov 27 '18 at 0:58

Scott Wilson

asked Nov 27 '18 at 0:58

Scott Wilson

asked Nov 27 '18 at 0:58

Scott Wilson

asked Nov 27 '18 at 0:58

Scott Wilson

add a comment |

0

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53491301%2fpandas-to-feather-unsupported-numpy-type-5-and-force-df-eval-to-explicit-dty%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

0

active

oldest

votes

0

active

oldest

votes

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Ytukyg