Skip to content

Commit 4672c74

Browse files
committed
Transfer unique values from backend in binary
1 parent 18e095d commit 4672c74

File tree

4 files changed

+144
-122
lines changed

4 files changed

+144
-122
lines changed

ipydatagrid/datagrid.py

+26-21
Original file line numberDiff line numberDiff line change
@@ -1072,6 +1072,7 @@ def _apply_frontend_transforms(self, frontend_transforms, dataframe):
10721072
elif operator == "notempty":
10731073
dataframe = dataframe[dataframe[column].notna()]
10741074
elif operator == "in":
1075+
value = pd.Series(value, dtype=dataframe[column].dtype)
10751076
dataframe = dataframe[dataframe[column].isin(value)]
10761077
elif operator == "between":
10771078
dataframe = dataframe[
@@ -1121,22 +1122,7 @@ def _handle_comm_msg(self, _, content, _buffs):
11211122

11221123
value = value.iloc[r1 : r2 + 1, c1 : c2 + 1]
11231124

1124-
# Primary key used
1125-
index_key = self.get_dataframe_index(value)
1126-
1127-
serialized = _data_serialization_impl(
1128-
self.generate_data_object(value, "ipydguuid", index_key), None
1129-
)
1130-
1131-
# Extract all buffers
1132-
buffers = []
1133-
for column in serialized["data"].keys():
1134-
if (
1135-
not isinstance(serialized["data"][column], list)
1136-
and not serialized["data"][column]["type"] == "raw"
1137-
):
1138-
buffers.append(serialized["data"][column]["value"])
1139-
serialized["data"][column]["value"] = len(buffers) - 1
1125+
serialized, buffers = self._serialize_helper(value)
11401126

11411127
answer = {
11421128
"event_type": "data-reply",
@@ -1151,14 +1137,15 @@ def _handle_comm_msg(self, _, content, _buffs):
11511137

11521138
elif event_type == "unique-values-request":
11531139
column = content.get("column")
1154-
unique = (
1155-
self.__dataframe_reference[column].drop_duplicates().to_numpy()
1156-
)
1140+
original = self.__dataframe_reference[column].drop_duplicates()
1141+
serialized, buffers = self._serialize_helper(pd.DataFrame(original))
1142+
11571143
answer = {
11581144
"event_type": "unique-values-reply",
1159-
"values": unique,
1145+
"column": column,
1146+
"value": serialized,
11601147
}
1161-
self.send(answer)
1148+
self.send(answer, buffers)
11621149

11631150
@observe("_transforms")
11641151
def _on_transforms_changed(self, change):
@@ -1178,3 +1165,21 @@ def _on_transforms_changed(self, change):
11781165

11791166
# Should only request a tick if the transforms have changed.
11801167
self.tick()
1168+
1169+
def _serialize_helper(self, dataframe):
1170+
# Primary key used
1171+
index_key = self.get_dataframe_index(dataframe)
1172+
1173+
serialized = _data_serialization_impl(
1174+
self.generate_data_object(dataframe, "ipydguuid", index_key), None
1175+
)
1176+
1177+
# Extract all buffers
1178+
buffers = []
1179+
for column in serialized["data"].keys():
1180+
col = serialized["data"][column]
1181+
if not isinstance(col, list) and col["type"] != "raw":
1182+
buffers.append(col["value"])
1183+
col["value"] = len(buffers) - 1
1184+
1185+
return serialized, buffers

js/core/deserialize.ts

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import { Dict } from '@jupyter-widgets/base';
2+
import { array_or_json_serializer } from 'bqplot';
3+
import { DataSource } from '../datasource';
4+
5+
export function unpack_raw_data(
6+
value: any | Dict<unknown> | string | (Dict<unknown> | string)[],
7+
): any {
8+
if (Array.isArray(value)) {
9+
const unpacked: any[] = [];
10+
value.forEach((sub_value, key) => {
11+
unpacked.push(unpack_raw_data(sub_value));
12+
});
13+
return unpacked;
14+
} else if (value instanceof Object && typeof value !== 'string') {
15+
const unpacked: { [key: string]: any } = {};
16+
Object.keys(value).forEach((key) => {
17+
unpacked[key] = unpack_raw_data(value[key]);
18+
});
19+
return unpacked;
20+
} else if (value === '$NaN$') {
21+
return Number.NaN;
22+
} else if (value === '$Infinity$') {
23+
return Number.POSITIVE_INFINITY;
24+
} else if (value === '$NegInfinity$') {
25+
return Number.NEGATIVE_INFINITY;
26+
} else if (value === '$NaT$') {
27+
return new Date('INVALID');
28+
} else {
29+
return value;
30+
}
31+
}
32+
33+
export function deserialize_data_simple(data: any, manager: any): any {
34+
const deserialized_data: any = {};
35+
36+
// Backward compatibility for when data.data was an array of rows
37+
// (should be removed in ipydatagrid 2.x?)
38+
if (Array.isArray(data.data)) {
39+
if (data.data.length === 0) {
40+
return deserialized_data;
41+
}
42+
43+
const unpacked = unpack_raw_data(data.data);
44+
// Turn array of rows (old approach) into a dictionary of columns as arrays (new approach)
45+
for (const column of Object.keys(unpacked[0])) {
46+
const columnData = new Array(unpacked.length);
47+
let rowIdx = 0;
48+
49+
for (const row of unpacked) {
50+
columnData[rowIdx++] = row[column];
51+
}
52+
53+
deserialized_data[column] = columnData;
54+
}
55+
56+
return deserialized_data;
57+
}
58+
59+
for (const column of Object.keys(data.data)) {
60+
deserialized_data[column] = [];
61+
62+
if (Array.isArray(data.data[column])) {
63+
deserialized_data[column] = data.data[column];
64+
continue;
65+
}
66+
67+
if (data.data[column].type == 'raw') {
68+
deserialized_data[column] = unpack_raw_data(data.data[column].value);
69+
} else {
70+
if (data.data[column].value.length !== 0) {
71+
let deserialized_array = array_or_json_serializer.deserialize(
72+
data.data[column],
73+
manager,
74+
);
75+
76+
// Turning back float32 dates into isoformat
77+
if (deserialized_array.type === 'date') {
78+
const float32Array = deserialized_array;
79+
deserialized_array = [];
80+
81+
for (let i = 0; i < float32Array.length; i++) {
82+
deserialized_array[i] = new Date(float32Array[i]).toISOString();
83+
}
84+
}
85+
86+
deserialized_data[column] = deserialized_array;
87+
}
88+
}
89+
}
90+
return deserialized_data
91+
}
92+
93+
export function deserialize_data(data: any, manager: any): DataSource {
94+
const deserialized = deserialize_data_simple(data, manager);
95+
return new DataSource(deserialized, data.fields, data.schema, true);
96+
}

js/core/streamingviewbasedjsonmodel.ts

+21-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import { PromiseDelegate } from '@lumino/coreutils';
21
import { DataModel } from '@lumino/datagrid';
2+
import { deserialize_data_simple } from './deserialize';
33
import { StreamingView } from './streamingview';
44
import { TransformStateManager } from './transformStateManager';
55
import { ViewBasedJSONModel } from './viewbasedjsonmodel';
@@ -64,22 +64,30 @@ export class StreamingViewBasedJSONModel extends ViewBasedJSONModel {
6464
return Promise.resolve(this._unique.values);
6565
}
6666

67-
const promiseDelegate = new PromiseDelegate<any[]>();
68-
this._dataModel.on('msg:custom', (content) => {
69-
// when message received, want to drop this handler...
70-
// Or keep it going but need a way of identifying where to put the received data??????
71-
if (content.event_type === 'unique-values-reply') {
72-
this._unique = { region, column, values: content.values };
73-
promiseDelegate.resolve(this._unique.values);
74-
}
75-
76-
// Do I need to cancel this callback?????????
67+
const promise = new Promise<any>(resolve => {
68+
this._dataModel.once('msg:custom', (content, buffers) => {
69+
if (content.event_type === 'unique-values-reply') {
70+
const { value } = content;
71+
72+
// Bring back buffers at their original position in the data structure
73+
for (const col of Object.keys(value.data)) {
74+
if (value.data[col].type !== 'raw') {
75+
value.data[col].value = buffers[value.data[col].value];
76+
}
77+
}
78+
79+
const deserialized = deserialize_data_simple(value, null);
80+
const values = deserialized[content.column];
81+
82+
this._unique = { region, column: content.column, values };
83+
resolve(this._unique.values);
84+
}
85+
});
7786
});
7887

7988
const msg = { type: 'unique-values-request', column: column };
8089
this._dataModel.send(msg);
81-
82-
return promiseDelegate.promise;
90+
return promise;
8391
}
8492

8593
updateDataset(options: StreamingViewBasedJSONModel.IOptions): void {

js/datagrid.ts

+1-88
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import {
3131

3232
import { array_or_json_serializer } from 'bqplot';
3333

34+
import { deserialize_data } from './core/deserialize';
3435
import { ViewBasedJSONModel } from './core/viewbasedjsonmodel';
3536
import { StreamingViewBasedJSONModel } from './core/streamingviewbasedjsonmodel';
3637

@@ -45,34 +46,6 @@ import { DataSource } from './datasource';
4546
// Import CSS
4647
import '../style/jupyter-widget.css';
4748

48-
function unpack_raw_data(
49-
value: any | Dict<unknown> | string | (Dict<unknown> | string)[],
50-
): any {
51-
if (Array.isArray(value)) {
52-
const unpacked: any[] = [];
53-
value.forEach((sub_value, key) => {
54-
unpacked.push(unpack_raw_data(sub_value));
55-
});
56-
return unpacked;
57-
} else if (value instanceof Object && typeof value !== 'string') {
58-
const unpacked: { [key: string]: any } = {};
59-
Object.keys(value).forEach((key) => {
60-
unpacked[key] = unpack_raw_data(value[key]);
61-
});
62-
return unpacked;
63-
} else if (value === '$NaN$') {
64-
return Number.NaN;
65-
} else if (value === '$Infinity$') {
66-
return Number.POSITIVE_INFINITY;
67-
} else if (value === '$NegInfinity$') {
68-
return Number.NEGATIVE_INFINITY;
69-
} else if (value === '$NaT$') {
70-
return new Date('INVALID');
71-
} else {
72-
return value;
73-
}
74-
}
75-
7649
function serialize_data(data: DataSource, manager: any): any {
7750
const serialized_data: any = {};
7851
for (const column of Object.keys(data.data)) {
@@ -84,66 +57,6 @@ function serialize_data(data: DataSource, manager: any): any {
8457
return { data: serialized_data, fields: data.fields, schema: data.schema };
8558
}
8659

87-
function deserialize_data(data: any, manager: any): DataSource {
88-
const deserialized_data: any = {};
89-
90-
// Backward compatibility for when data.data was an array of rows
91-
// (should be removed in ipydatagrid 2.x?)
92-
if (Array.isArray(data.data)) {
93-
if (data.data.length === 0) {
94-
return new DataSource(deserialized_data, data.fields, data.schema, true);
95-
}
96-
97-
const unpacked = unpack_raw_data(data.data);
98-
// Turn array of rows (old approach) into a dictionary of columns as arrays (new approach)
99-
for (const column of Object.keys(unpacked[0])) {
100-
const columnData = new Array(unpacked.length);
101-
let rowIdx = 0;
102-
103-
for (const row of unpacked) {
104-
columnData[rowIdx++] = row[column];
105-
}
106-
107-
deserialized_data[column] = columnData;
108-
}
109-
110-
return new DataSource(deserialized_data, data.fields, data.schema, true);
111-
}
112-
113-
for (const column of Object.keys(data.data)) {
114-
deserialized_data[column] = [];
115-
116-
if (Array.isArray(data.data[column])) {
117-
deserialized_data[column] = data.data[column];
118-
continue;
119-
}
120-
121-
if (data.data[column].type == 'raw') {
122-
deserialized_data[column] = unpack_raw_data(data.data[column].value);
123-
} else {
124-
if (data.data[column].value.length !== 0) {
125-
let deserialized_array = array_or_json_serializer.deserialize(
126-
data.data[column],
127-
manager,
128-
);
129-
130-
// Turning back float32 dates into isoformat
131-
if (deserialized_array.type === 'date') {
132-
const float32Array = deserialized_array;
133-
deserialized_array = [];
134-
135-
for (let i = 0; i < float32Array.length; i++) {
136-
deserialized_array[i] = new Date(float32Array[i]).toISOString();
137-
}
138-
}
139-
140-
deserialized_data[column] = deserialized_array;
141-
}
142-
}
143-
}
144-
return new DataSource(deserialized_data, data.fields, data.schema, true);
145-
}
146-
14760
export class DataGridModel extends DOMWidgetModel {
14861
defaults() {
14962
return {

0 commit comments

Comments
 (0)