pandas to_feather “Unsupported numpy type 5” and force df.eval() to explicit dtype
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty,.everyoneloves__bot-mid-leaderboard:empty{ height:90px;width:728px;box-sizing:border-box;
}
I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:
import pandas as pd
df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])
print(df)
print(df.dtypes)
df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)
df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
print(df.dtypes)
df.to_feather('c:/temp/test.feather')
With the following result:
x y txt
0 1.0 0.678 hello
1 2.0 0.779 foo
2 3.0 0.218 bar
x float32
y float32
txt object
dtype: object
x int32
y float32
txt object
z float64
dtype: object
---------------------------------------------------------------------------
ArrowNotImplementedError Traceback (most recent call last)
<ipython-input-35-0843a56bb3a8> in <module>()
----> 1 df.to_feather('c:/temp/test.feather')
~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)
1887 """
1888 from pandas.io.feather_format import to_feather
-> 1889 to_feather(self, fname)
1890
1891 def to_parquet(self, fname, engine='auto', compression='snappy',
~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)
81 raise ValueError("feather must have string column names")
82
---> 83 feather.write_dataframe(df, path)
84
85
~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)
98 writer = FeatherWriter(dest)
99 try:
--> 100 writer.write(df)
101 except Exception:
102 # Try to make sure the resource is closed
~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)
78 # TODO(wesm): Remove this length check, see ARROW-1732
79 if len(df.columns) > 0:
---> 80 batch = RecordBatch.from_pandas(df, preserve_index=False)
81 for i, name in enumerate(batch.schema.names):
82 col = batch[i]
table.pxi in pyarrow.lib.RecordBatch.from_pandas()
~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)
369 arrays = [convert_column(c, t)
370 for c, t in zip(columns_to_convert,
--> 371 convert_types)]
372 else:
373 from concurrent import futures
~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)
368 if nthreads == 1:
369 arrays = [convert_column(c, t)
--> 370 for c, t in zip(columns_to_convert,
371 convert_types)]
372 else:
~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)
364
365 def convert_column(col, ty):
--> 366 return pa.array(col, from_pandas=True, type=ty)
367
368 if nthreads == 1:
array.pxi in pyarrow.lib.array()
array.pxi in pyarrow.lib._ndarray_to_array()
error.pxi in pyarrow.lib.check_status()
ArrowNotImplementedError: Unsupported numpy type 5
This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather
The following code fixed the problem:
df['x'] = df['x'].astype('float32')
df.to_feather('c:/temp/test.feather')
So I have two questions:
1) What am I doing that is causing problems for to_feather()?
2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)
I tried this:
df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
but it results in:
ValueError: "float" is not a supported function
python pandas eval feather
add a comment |
I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:
import pandas as pd
df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])
print(df)
print(df.dtypes)
df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)
df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
print(df.dtypes)
df.to_feather('c:/temp/test.feather')
With the following result:
x y txt
0 1.0 0.678 hello
1 2.0 0.779 foo
2 3.0 0.218 bar
x float32
y float32
txt object
dtype: object
x int32
y float32
txt object
z float64
dtype: object
---------------------------------------------------------------------------
ArrowNotImplementedError Traceback (most recent call last)
<ipython-input-35-0843a56bb3a8> in <module>()
----> 1 df.to_feather('c:/temp/test.feather')
~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)
1887 """
1888 from pandas.io.feather_format import to_feather
-> 1889 to_feather(self, fname)
1890
1891 def to_parquet(self, fname, engine='auto', compression='snappy',
~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)
81 raise ValueError("feather must have string column names")
82
---> 83 feather.write_dataframe(df, path)
84
85
~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)
98 writer = FeatherWriter(dest)
99 try:
--> 100 writer.write(df)
101 except Exception:
102 # Try to make sure the resource is closed
~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)
78 # TODO(wesm): Remove this length check, see ARROW-1732
79 if len(df.columns) > 0:
---> 80 batch = RecordBatch.from_pandas(df, preserve_index=False)
81 for i, name in enumerate(batch.schema.names):
82 col = batch[i]
table.pxi in pyarrow.lib.RecordBatch.from_pandas()
~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)
369 arrays = [convert_column(c, t)
370 for c, t in zip(columns_to_convert,
--> 371 convert_types)]
372 else:
373 from concurrent import futures
~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)
368 if nthreads == 1:
369 arrays = [convert_column(c, t)
--> 370 for c, t in zip(columns_to_convert,
371 convert_types)]
372 else:
~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)
364
365 def convert_column(col, ty):
--> 366 return pa.array(col, from_pandas=True, type=ty)
367
368 if nthreads == 1:
array.pxi in pyarrow.lib.array()
array.pxi in pyarrow.lib._ndarray_to_array()
error.pxi in pyarrow.lib.check_status()
ArrowNotImplementedError: Unsupported numpy type 5
This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather
The following code fixed the problem:
df['x'] = df['x'].astype('float32')
df.to_feather('c:/temp/test.feather')
So I have two questions:
1) What am I doing that is causing problems for to_feather()?
2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)
I tried this:
df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
but it results in:
ValueError: "float" is not a supported function
python pandas eval feather
add a comment |
I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:
import pandas as pd
df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])
print(df)
print(df.dtypes)
df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)
df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
print(df.dtypes)
df.to_feather('c:/temp/test.feather')
With the following result:
x y txt
0 1.0 0.678 hello
1 2.0 0.779 foo
2 3.0 0.218 bar
x float32
y float32
txt object
dtype: object
x int32
y float32
txt object
z float64
dtype: object
---------------------------------------------------------------------------
ArrowNotImplementedError Traceback (most recent call last)
<ipython-input-35-0843a56bb3a8> in <module>()
----> 1 df.to_feather('c:/temp/test.feather')
~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)
1887 """
1888 from pandas.io.feather_format import to_feather
-> 1889 to_feather(self, fname)
1890
1891 def to_parquet(self, fname, engine='auto', compression='snappy',
~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)
81 raise ValueError("feather must have string column names")
82
---> 83 feather.write_dataframe(df, path)
84
85
~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)
98 writer = FeatherWriter(dest)
99 try:
--> 100 writer.write(df)
101 except Exception:
102 # Try to make sure the resource is closed
~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)
78 # TODO(wesm): Remove this length check, see ARROW-1732
79 if len(df.columns) > 0:
---> 80 batch = RecordBatch.from_pandas(df, preserve_index=False)
81 for i, name in enumerate(batch.schema.names):
82 col = batch[i]
table.pxi in pyarrow.lib.RecordBatch.from_pandas()
~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)
369 arrays = [convert_column(c, t)
370 for c, t in zip(columns_to_convert,
--> 371 convert_types)]
372 else:
373 from concurrent import futures
~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)
368 if nthreads == 1:
369 arrays = [convert_column(c, t)
--> 370 for c, t in zip(columns_to_convert,
371 convert_types)]
372 else:
~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)
364
365 def convert_column(col, ty):
--> 366 return pa.array(col, from_pandas=True, type=ty)
367
368 if nthreads == 1:
array.pxi in pyarrow.lib.array()
array.pxi in pyarrow.lib._ndarray_to_array()
error.pxi in pyarrow.lib.check_status()
ArrowNotImplementedError: Unsupported numpy type 5
This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather
The following code fixed the problem:
df['x'] = df['x'].astype('float32')
df.to_feather('c:/temp/test.feather')
So I have two questions:
1) What am I doing that is causing problems for to_feather()?
2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)
I tried this:
df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
but it results in:
ValueError: "float" is not a supported function
python pandas eval feather
I've got a 500GB datafile that I'm modifying with df.eval() and then saving in feather format. Here is an example program showing the feather error:
import pandas as pd
df = pd.DataFrame([[1.0, 0.678, 'hello'], [2.0, 0.779, 'foo'], [3.0, 0.218, 'bar']], dtype='float32', columns=['x','y','txt'])
print(df)
print(df.dtypes)
df.eval('z=(0 + 1*(y>0.7 and y<=0.85) + 2*(y>0.85))', inplace=True)
df.eval('x=(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
print(df.dtypes)
df.to_feather('c:/temp/test.feather')
With the following result:
x y txt
0 1.0 0.678 hello
1 2.0 0.779 foo
2 3.0 0.218 bar
x float32
y float32
txt object
dtype: object
x int32
y float32
txt object
z float64
dtype: object
---------------------------------------------------------------------------
ArrowNotImplementedError Traceback (most recent call last)
<ipython-input-35-0843a56bb3a8> in <module>()
----> 1 df.to_feather('c:/temp/test.feather')
~Anaconda3libsite-packagespandascoreframe.py in to_feather(self, fname)
1887 """
1888 from pandas.io.feather_format import to_feather
-> 1889 to_feather(self, fname)
1890
1891 def to_parquet(self, fname, engine='auto', compression='snappy',
~Anaconda3libsite-packagespandasiofeather_format.py in to_feather(df, path)
81 raise ValueError("feather must have string column names")
82
---> 83 feather.write_dataframe(df, path)
84
85
~Anaconda3libsite-packagespyarrowfeather.py in write_feather(df, dest)
98 writer = FeatherWriter(dest)
99 try:
--> 100 writer.write(df)
101 except Exception:
102 # Try to make sure the resource is closed
~Anaconda3libsite-packagespyarrowfeather.py in write(self, df)
78 # TODO(wesm): Remove this length check, see ARROW-1732
79 if len(df.columns) > 0:
---> 80 batch = RecordBatch.from_pandas(df, preserve_index=False)
81 for i, name in enumerate(batch.schema.names):
82 col = batch[i]
table.pxi in pyarrow.lib.RecordBatch.from_pandas()
~Anaconda3libsite-packagespyarrowpandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads)
369 arrays = [convert_column(c, t)
370 for c, t in zip(columns_to_convert,
--> 371 convert_types)]
372 else:
373 from concurrent import futures
~Anaconda3libsite-packagespyarrowpandas_compat.py in <listcomp>(.0)
368 if nthreads == 1:
369 arrays = [convert_column(c, t)
--> 370 for c, t in zip(columns_to_convert,
371 convert_types)]
372 else:
~Anaconda3libsite-packagespyarrowpandas_compat.py in convert_column(col, ty)
364
365 def convert_column(col, ty):
--> 366 return pa.array(col, from_pandas=True, type=ty)
367
368 if nthreads == 1:
array.pxi in pyarrow.lib.array()
array.pxi in pyarrow.lib._ndarray_to_array()
error.pxi in pyarrow.lib.check_status()
ArrowNotImplementedError: Unsupported numpy type 5
This table has 2500 columns, so it took me a while to find the offending column, especially because int32 is listed as a valid feather type: https://github.com/wesm/feather
The following code fixed the problem:
df['x'] = df['x'].astype('float32')
df.to_feather('c:/temp/test.feather')
So I have two questions:
1) What am I doing that is causing problems for to_feather()?
2) Is there a way to explicitly cast the result of an assignment? (I'm trying to avoid the extra memory alloc/dealloc required by using astype() since this is slow and provides no benefit.)
I tried this:
df.eval('x=float(0 + 1*(txt=="hello" or txt=="foo"))', inplace=True)
but it results in:
ValueError: "float" is not a supported function
python pandas eval feather
python pandas eval feather
asked Nov 27 '18 at 0:58
Scott WilsonScott Wilson
84
84
add a comment |
add a comment |
0
active
oldest
votes
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53491301%2fpandas-to-feather-unsupported-numpy-type-5-and-force-df-eval-to-explicit-dty%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
0
active
oldest
votes
0
active
oldest
votes
active
oldest
votes
active
oldest
votes
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53491301%2fpandas-to-feather-unsupported-numpy-type-5-and-force-df-eval-to-explicit-dty%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown