Skip to content

scalars: multiplex data fetches within a tag #4050

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 20, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions tensorboard/components_polymer3/tf_backend/requestManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@ export class RequestOptions {
}
}

// Form data for a POST request as a convenient multidict interface,
// since the built-in `FormData` type doesn't have a value constructor.
//
// A raw string value is equivalent to a singleton array, and thus an
// empty array value is equivalent to omitting the key entirely.
export interface PostData {
[key: string]: string | string[];
}

export class RequestManager {
private _queue: ResolveReject[];
private _maxRetries: number;
Expand All @@ -108,12 +117,7 @@ export class RequestManager {
* postData is provided, this request will use POST, not GET. This is an
* object mapping POST keys to string values.
*/
public request(
url: string,
postData?: {
[key: string]: string;
}
): Promise<any> {
public request(url: string, postData?: PostData): Promise<any> {
const requestOptions = requestOptionsFromPostData(postData);
return this.requestWithOptions(url, requestOptions);
}
Expand Down Expand Up @@ -272,9 +276,7 @@ function buildXMLHttpRequest(
return req;
}

function requestOptionsFromPostData(postData?: {
[key: string]: string;
}): RequestOptions {
function requestOptionsFromPostData(postData?: PostData): RequestOptions {
const result = new RequestOptions();
if (!postData) {
result.methodType = HttpMethodType.GET;
Expand All @@ -285,13 +287,12 @@ function requestOptionsFromPostData(postData?: {
return result;
}

function formDataFromDictionary(postData: {[key: string]: string}) {
function formDataFromDictionary(postData: PostData) {
const formData = new FormData();
for (let postKey in postData) {
if (postKey) {
// The linter requires 'for in' loops to be filtered by an if
// condition.
formData.append(postKey, postData[postKey]);
for (const [key, maybeValues] of Object.entries(postData)) {
const values = Array.isArray(maybeValues) ? maybeValues : [maybeValues];
for (const value of values) {
formData.append(key, value);
}
}
return formData;
Expand Down
22 changes: 22 additions & 0 deletions tensorboard/plugins/scalar/http_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,25 @@ instead be in CSV format:
1443856985.705543,1448,0.7461960315704346
1443857105.704628,3438,0.5427092909812927
1443857225.705133,5417,0.5457325577735901

## `/data/plugin/scalars/scalars_multirun` (POST)

Accepts form-encoded POST data with a (required) singleton key `tag` and a
repeated key `runs`. Returns a JSON object mapping run names to arrays of the
Comment on lines +67 to +68
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the contract if I violate this constraint? (0 or 2+ tag)? Unlike the zero length runs case, it looks like we are throwing 400.

Also, do you think it is a good idea to perhaps write an example of the request?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Zero-length runs is fine; it just returns an empty object. Missing
tag is a 400. Duplicate tags will silently pick one of them
(probably last?). I can make that an error, though; probably a good
idea. Thanks.

Also, do you think it is a good idea to perhaps write an example of
the request?

Yep. Done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I expected to see FormData but this works too :)

form returned by `/data/plugin/scalars/scalars`. A run will only be present in
the output if there actually exists data for that run-tag combination. If there
is no data for some or all of the run-tag combinations, no error is raised, but
the response may lack runs requested in the input or be an empty object
entirely.

Example:

{
"train": [
[1443856985.705543, 1448, 0.7461960315704346],
[1443857105.704628, 3438, 0.5427092909812927]
],
"test": [
[1443857225.705133, 5417, 0.5457325577735901],
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -225,14 +225,18 @@ export class TfScalarCard extends PolymerElement {

// This function is called when data is received from the backend.
@property({type: Object})
_loadDataCallback: object = (scalarChart, datum, data) => {
const formattedData = data.map((datum) => ({
_loadDataCallback: object = (scalarChart, item, maybeData) => {
if (maybeData == null) {
console.error('Failed to load data for:', item);
return;
}
const formattedData = maybeData.map((datum) => ({
wall_time: new Date(datum[0] * 1000),
step: datum[1],
scalar: datum[2],
}));
const name = this._getSeriesNameFromDatum(datum);
scalarChart.setSeriesMetadata(name, datum);
const name = this._getSeriesNameFromDatum(item);
scalarChart.setSeriesMetadata(name, item);
scalarChart.setSeriesData(name, formattedData);
scalarChart.commitChanges();
};
Expand All @@ -257,19 +261,56 @@ export class TfScalarCard extends PolymerElement {
// this.requestManager.request(
// this.getDataLoadUrl({tag, run, experiment})
@property({type: Object})
requestData: RequestDataCallback<RunTagItem, ScalarDatum[]> = (
requestData: RequestDataCallback<RunTagItem, ScalarDatum[] | null> = (
items,
onLoad,
onFinish
) => {
const router = getRouter();
const baseUrl = router.pluginRoute('scalars', '/scalars');
const url = router.pluginRoute('scalars', '/scalars_multirun');
const runsByTag = new Map<string, string[]>();
for (const {tag, run} of items) {
let runs = runsByTag.get(tag);
if (runs == null) {
runsByTag.set(tag, (runs = []));
}
runs.push(run);
}

// Request at most this many runs at once.
//
// Back-of-the-envelope math: each scalar datum JSON value contains
// two floats and a small-ish integer. Floats are about 18 bytes,
// since f64s have -log_10(2^-53) ~= 16 digits of precision plus
// decimal point and leading zero. Small-ish integers (steps) are
// about 5 bytes. Add JSON overhead `[,,],` and you're looking at
// about 48 bytes per datum. With standard downsampling of
// 1000 points per time series, expect ~50 KB of response payload
// per requested time series.
//
// Requesting 64 time series warrants a ~3 MB response, which seems
// reasonable.
const BATCH_SIZE = 64;

const requestGroups = [];
for (const [tag, runs] of runsByTag) {
for (let i = 0; i < runs.length; i += BATCH_SIZE) {
requestGroups.push({tag, runs: runs.slice(i, i + BATCH_SIZE)});
}
}

Promise.all(
items.map((item) => {
const url = addParams(baseUrl, {tag: item.tag, run: item.run});
return this.requestManager
.request(url)
.then((data) => void onLoad({item, data}));
requestGroups.map(({tag, runs}) => {
return this.requestManager.request(url, {tag, runs}).then((allData) => {
for (const run of runs) {
const item = {tag, run};
if (Object.prototype.hasOwnProperty.call(allData, run)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason why we have to use Object.prototype.hasOwnProperty? (i.e., why not allData.hasOwnProperty?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you have a run called hasOwnProperty, then that would be a type
error.

> ({train: 1}).hasOwnProperty("train")
true
> ({train: 1, hasOwnProperty: 2}).hasOwnProperty("train")
Thrown:
TypeError: {(intermediate value)(intermediate value)}.hasOwnProperty is not a function
> Object.prototype.hasOwnProperty.call({train: 1, hasOwnProperty: 2}, "train")
true

Yeah.

I prefer to write this kind of code defensively—even if it seems
unlikely, it sometimes happens (e.g.: #1283).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One day https://github.com/hasOwnProperty will show up in a JSON
object mapping GitHub login names to database IDs, and someone will have
a fun time…

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. Thanks.

onLoad({item, data: allData[run]});
} else {
onLoad({item, data: null});
}
}
});
})
).finally(() => void onFinish());
};
Expand Down
34 changes: 34 additions & 0 deletions tensorboard/plugins/scalar/scalars_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import six
from six import StringIO
import werkzeug.exceptions
from werkzeug import wrappers

from tensorboard import errors
Expand Down Expand Up @@ -64,6 +65,7 @@ def __init__(self, context):
def get_plugin_apps(self):
return {
"/scalars": self.scalars_route,
"/scalars_multirun": self.scalars_multirun_route,
"/tags": self.tags_route,
}

Expand Down Expand Up @@ -115,6 +117,21 @@ def scalars_impl(self, ctx, tag, run, experiment, output_format):
else:
return (values, "application/json")

def scalars_multirun_impl(self, ctx, tag, runs, experiment):
"""Result of the form `(body, mime_type)`."""
all_scalars = self._data_provider.read_scalars(
ctx,
experiment_id=experiment,
plugin_name=metadata.PLUGIN_NAME,
downsample=self._downsample_to,
run_tag_filter=provider.RunTagFilter(runs=runs, tags=[tag]),
)
body = {
run: [(x.wall_time, x.step, x.value) for x in run_data[tag]]
for (run, run_data) in all_scalars.items()
}
return (body, "application/json")

@wrappers.Request.application
def tags_route(self, request):
ctx = plugin_util.context(request.environ)
Expand All @@ -140,3 +157,20 @@ def scalars_route(self, request):
ctx, tag, run, experiment, output_format
)
return http_util.Respond(request, body, mime_type)

@wrappers.Request.application
def scalars_multirun_route(self, request):
"""Given a tag and list of runs, return dict of ScalarEvent arrays."""
if request.method != "POST":
raise werkzeug.exceptions.MethodNotAllowed(["POST"])
tag = request.form.get("tag")
runs = request.form.getlist("runs")
if tag is None:
raise errors.InvalidArgumentError("tag must be specified")

ctx = plugin_util.context(request.environ)
experiment = plugin_util.experiment_id(request.environ)
(body, mime_type) = self.scalars_multirun_impl(
ctx, tag, runs, experiment
)
return http_util.Respond(request, body, mime_type)
95 changes: 95 additions & 0 deletions tensorboard/plugins/scalar/scalars_plugin_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class ScalarsPluginTest(tf.test.TestCase):

_RUN_WITH_LEGACY_SCALARS = "_RUN_WITH_LEGACY_SCALARS"
_RUN_WITH_SCALARS = "_RUN_WITH_SCALARS"
_RUN_WITH_SCALARS_2 = "_RUN_WITH_SCALARS_2"
_RUN_WITH_SCALARS_3 = "_RUN_WITH_SCALARS_3"
_RUN_WITH_HISTOGRAM = "_RUN_WITH_HISTOGRAM"

def load_plugin(self, run_names):
Expand Down Expand Up @@ -99,6 +101,20 @@ def generate_run(self, logdir, run_name):
display_name=self._DISPLAY_NAME,
description=self._DESCRIPTION,
).numpy()
elif run_name == self._RUN_WITH_SCALARS_2:
summ = summary.op(
self._SCALAR_TAG,
2 * tf.reduce_sum(data),
display_name=self._DISPLAY_NAME,
description=self._DESCRIPTION,
).numpy()
elif run_name == self._RUN_WITH_SCALARS_3:
summ = summary.op(
self._SCALAR_TAG,
3 * tf.reduce_sum(data),
display_name=self._DISPLAY_NAME,
description=self._DESCRIPTION,
).numpy()
elif run_name == self._RUN_WITH_HISTOGRAM:
summ = tf.compat.v1.summary.histogram(
self._HISTOGRAM_TAG, data
Expand Down Expand Up @@ -191,6 +207,85 @@ def test_scalars_with_histogram(self):
)
self.assertEqual(404, response.status_code)

def test_scalars_multirun(self):
server = self.load_server(
[
self._RUN_WITH_SCALARS,
self._RUN_WITH_SCALARS_2,
self._RUN_WITH_SCALARS_3,
]
)
response = server.post(
"/data/plugin/scalars/scalars_multirun",
data={
"tag": "%s/scalar_summary" % self._SCALAR_TAG,
"runs": [
self._RUN_WITH_SCALARS,
# skip _RUN_WITH_SCALARS_2
self._RUN_WITH_SCALARS_3,
self._RUN_WITH_HISTOGRAM, # no data for this tag; okay
"nonexistent_run", # no data at all; okay
],
},
)
self.assertEqual(200, response.status_code)
self.assertEqual("application/json", response.headers["Content-Type"])
data = json.loads(response.get_data())
self.assertCountEqual(
[self._RUN_WITH_SCALARS, self._RUN_WITH_SCALARS_3], data
)
self.assertLen(data[self._RUN_WITH_SCALARS], self._STEPS)
self.assertLen(data[self._RUN_WITH_SCALARS_3], self._STEPS)
self.assertNotEqual(
data[self._RUN_WITH_SCALARS][0][2],
data[self._RUN_WITH_SCALARS_3][0][2],
)

def test_scalars_multirun_single_run(self):
# Checks for any problems with singleton arrays.
server = self.load_server(
[
self._RUN_WITH_SCALARS,
self._RUN_WITH_SCALARS_2,
self._RUN_WITH_SCALARS_3,
]
)
response = server.post(
"/data/plugin/scalars/scalars_multirun",
data={
"tag": "%s/scalar_summary" % self._SCALAR_TAG,
"runs": [self._RUN_WITH_SCALARS],
},
)
self.assertEqual(200, response.status_code)
self.assertEqual("application/json", response.headers["Content-Type"])
data = json.loads(response.get_data())
self.assertCountEqual([self._RUN_WITH_SCALARS], data)
self.assertLen(data[self._RUN_WITH_SCALARS], self._STEPS)

def test_scalars_multirun_no_tag(self):
server = self.load_server([self._RUN_WITH_SCALARS])
response = server.post(
"/data/plugin/scalars/scalars_multirun",
data={"runs": [self._RUN_WITH_SCALARS, self._RUN_WITH_SCALARS_2]},
)
self.assertEqual(400, response.status_code)
self.assertIn(
"tag must be specified", response.get_data().decode("utf-8")
)

def test_scalars_multirun_bad_method(self):
server = self.load_server([self._RUN_WITH_SCALARS])
response = server.get(
"/data/plugin/scalars/scalars_multirun",
query_string={
"tag": "%s/scalar_summary" % self._SCALAR_TAG,
"runs": [self._RUN_WITH_SCALARS, self._RUN_WITH_SCALARS_3,],
},
)
self.assertEqual(405, response.status_code)
self.assertEqual(response.headers["Allow"], "POST")

def test_active_with_legacy_scalars(self):
plugin = self.load_plugin([self._RUN_WITH_LEGACY_SCALARS])
self.assertFalse(plugin.is_active())
Expand Down