Skip to content
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions benchmark/index.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
BEGIN;
CREATE TEMP TABLE _benchmark (
dbname TEXT,
filename TEXT,
query TEXT,
avg FLOAT,
Expand Down Expand Up @@ -44,6 +45,7 @@ BEGIN

INSERT INTO _benchmark
SELECT
current_database() AS dbname,
_filename,
_query,
round(avg(elapsed)::numeric, 0),
Expand Down
20 changes: 20 additions & 0 deletions benchmark/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,26 @@ ls -lh ../vbb-2022-07-01.gtfs

env | grep '^PG' || true

psql -c 'CREATE DATABASE benchmark_raw'
export PGDATABASE=benchmark_raw

../cli.js -d \
--stops-location-index --stats-by-route-date=view \
../vbb-2022-07-01.gtfs/*.csv | sponge | psql -b

./run.sh

# The VBB 2022-07-01 GTFS feed doesn't contain any frequencies rows. In order to benchmark the frequencies implementations, we use `--minimize-stoptimes` to generate an equivalent feed that includes frequencies.
gtfstidy --show-warnings \
--minimize-stoptimes \
-o ../vbb-2022-07-01.tidied.gtfs ../vbb-2022-07-01.gtfs
ls -lh ../vbb-2022-07-01.tidied.gtfs

psql -c 'CREATE DATABASE benchmark_tidied'
export PGDATABASE=benchmark_tidied

../cli.js -d \
--stops-location-index --stats-by-route-date=view \
../vbb-2022-07-01.tidied.gtfs/*.txt | sponge | psql -b

./run.sh
195 changes: 103 additions & 92 deletions lib/stop_times.js
Original file line number Diff line number Diff line change
Expand Up @@ -262,46 +262,13 @@ WITH stop_times_based AS NOT MATERIALIZED (
)
-- stop_times-based arrivals/departures
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((stop_sequence::text)::bytea, 'base64')
-- frequencies_row
|| ':' || encode('-1'::bytea, 'base64')
-- frequencies_it
|| ':' || encode('-1'::bytea, 'base64')
) as arrival_departure_id,

stop_times_based.*,
-- todo: expose local arrival/departure "wall clock time"?

-1 AS frequencies_row,
-1 AS frequencies_it
-1 AS frequencies_row
FROM stop_times_based
UNION ALL
-- frequencies-based arrivals/departures
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((stop_sequence::text)::bytea, 'base64')
|| ':' || encode((frequencies_row::text)::bytea, 'base64')
|| ':' || encode((frequencies_it::text)::bytea, 'base64')
) as arrival_departure_id,
*
FROM (
SELECT
*,
row_number() OVER (PARTITION BY trip_id, "date", stop_sequence)::integer AS frequencies_it
FROM (
SELECT
-- stop_times_based.* except t_arrival & t_departure, duh
-- todo: find a way to use all columns without explicitly enumerating them here
Expand Down Expand Up @@ -341,15 +308,52 @@ FROM (
FROM stop_times_based
JOIN "${opt.schema}".frequencies ON frequencies.trip_id = stop_times_based.trip_id
WHERE frequencies.exact_times = 'schedule_based' -- todo: is this correct?
) t
) t
) frequencies_based;

${opt.postgraphile ? `\
-- todo: currently named arrivalsDeparture, should be arrivalDeparture (but allArrivalsDeparturesList!)
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.station_name IS E'@omit';
-- > If you want to rename just one field or type, your best bet is to use a [@name] smart comment […].
-- > NOTE: this still uses the inflectors, but it pretends that the tables name is different, so the input to the inflectors differs.
-- https://www.graphile.org/postgraphile/inflection/#overriding-naming---one-off
COMMENT ON VIEW "${opt.schema}".arrivals_departures IS E'@name arrival_departures\\n@primaryKey trip_id,date,stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (stop_id) references stops|@fieldName stop\\n@foreignKey (station_id) references stops|@fieldName station';
` : ''}

CREATE OR REPLACE VIEW "${opt.schema}".arrivals_departures_with_ids AS
SELECT
*,
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((stop_sequence::text)::bytea, 'base64')
|| ':' || encode((frequencies_row::text)::bytea, 'base64')
|| ':' || encode((frequencies_it::text)::bytea, 'base64')
) as arrival_departure_id
FROM (
SELECT
*,
(CASE WHEN frequencies_row = -1
THEN -1
ELSE row_number() OVER (PARTITION BY trip_id, "date", stop_sequence)::integer
END) AS frequencies_it
FROM "${opt.schema}".arrivals_departures
) t;

CREATE OR REPLACE FUNCTION "${opt.schema}".arrival_departure_by_arrival_departure_id(id TEXT)
RETURNS "${opt.schema}".arrivals_departures
RETURNS "${opt.schema}".arrivals_departures_with_ids
AS $$
SELECT *
FROM "${opt.schema}".arrivals_departures
FROM "${opt.schema}".arrivals_departures_with_ids
WHERE trip_id = convert_from(decode(split_part(id, ':', 1), 'base64'), 'UTF-8')::text
AND "date" = (convert_from(decode(split_part(id, ':', 2), 'base64'), 'UTF-8')::text)::timestamp
AND stop_sequence = (convert_from(decode(split_part(id, ':', 3), 'base64'), 'UTF-8')::text)::integer
Expand All @@ -360,18 +364,19 @@ AS $$
$$ LANGUAGE SQL STABLE STRICT;

${opt.postgraphile ? `\
-- todo: currently named arrivalsDeparture, should be arrivalDeparture (but allArrivalsDeparturesList!)
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures.station_name IS E'@omit';
-- todo: DRY with comments on arrivals_departures
-- todo: what is the graphql field name? postgraphile singularifies most names
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".arrivals_departures_with_ids.station_name IS E'@omit';
-- > If you want to rename just one field or type, your best bet is to use a [@name] smart comment […].
-- > NOTE: this still uses the inflectors, but it pretends that the tables name is different, so the input to the inflectors differs.
-- https://www.graphile.org/postgraphile/inflection/#overriding-naming---one-off
COMMENT ON VIEW "${opt.schema}".arrivals_departures IS E'@name arrival_departures\\n@primaryKey trip_id,date,stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (stop_id) references stops|@fieldName stop\\n@foreignKey (station_id) references stops|@fieldName station';
COMMENT ON VIEW "${opt.schema}".arrivals_departures_with_ids IS E'@name arrivals_departures_with_ids\\n@primaryKey trip_id,date,stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (stop_id) references stops|@fieldName stop\\n@foreignKey (station_id) references stops|@fieldName station';
` : ''}

CREATE OR REPLACE VIEW "${opt.schema}".connections AS
Expand Down Expand Up @@ -503,40 +508,13 @@ WITH stop_times_based AS NOT MATERIALIZED (
)
-- stop_times-based connections
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((from_stop_sequence::text)::bytea, 'base64')
-- frequencies_row
|| ':' || encode('-1'::bytea, 'base64')
-- frequencies_it
|| ':' || encode('-1'::bytea, 'base64')
) as connection_id,

stop_times_based.*,

-1 AS frequencies_row,
-1 AS frequencies_it
-1 AS frequencies_row
FROM stop_times_based
UNION ALL
-- frequencies-based connections
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((from_stop_sequence::text)::bytea, 'base64')
|| ':' || encode((frequencies_row::text)::bytea, 'base64')
|| ':' || encode((frequencies_it::text)::bytea, 'base64')
) as connection_id,

-- stop_times_based.* except t_arrival & t_departure, duh
-- todo: find a way to use all columns without explicitly enumerating them here
route_id, route_short_name, route_long_name, route_type,
Expand Down Expand Up @@ -586,26 +564,60 @@ SELECT
to_station_name,
to_wheelchair_boarding,

frequencies_row,
frequencies_it
frequencies_row
FROM (
SELECT
stop_times_based.*,
frequencies.start_time,
frequencies.end_time,
frequencies.headway_secs,
frequencies_row,
(row_number() OVER (PARTITION BY stop_times_based.trip_id, "date", frequencies_row ORDER BY from_stop_sequence))::integer as frequencies_it
frequencies_row
FROM stop_times_based
JOIN "${opt.schema}".frequencies ON frequencies.trip_id = stop_times_based.trip_id
WHERE frequencies.exact_times = 'schedule_based' -- todo: is this correct?
) frequencies_based;

${opt.postgraphile ? `\
-- todo: allow filtering based on stop and/or route and/or trip and/or time frame
-- https://www.graphile.org/postgraphile/functions/#setof-functions---connections
COMMENT ON COLUMN "${opt.schema}".connections.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.from_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.from_station_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.to_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.to_station_name IS E'@omit';
COMMENT ON VIEW "${opt.schema}".connections IS E'@primaryKey trip_id,date,from_stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (from_stop_id) references stops|@fieldName fromStop\\n@foreignKey (from_station_id) references stops|@fieldName fromStation\\n@foreignKey (to_stop_id) references stops|@fieldName toStop\\n@foreignKey (to_station_id) references stops|@fieldName toStation';
` : ''}

CREATE VIEW "${opt.schema}".connections_with_ids AS
SELECT
(
encode(trip_id::bytea, 'base64')
|| ':' || encode((
extract(ISOYEAR FROM "date")
|| '-' || lpad(extract(MONTH FROM "date")::text, 2, '0')
|| '-' || lpad(extract(DAY FROM "date")::text, 2, '0')
)::bytea, 'base64')
|| ':' || encode((from_stop_sequence::text)::bytea, 'base64')
|| ':' || encode((frequencies_row::text)::bytea, 'base64')
|| ':' || encode((frequencies_it::text)::bytea, 'base64')
) as connection_id,
*
FROM (
SELECT
*,
(row_number() OVER (PARTITION BY trip_id, "date", frequencies_row ORDER BY from_stop_sequence))::integer as frequencies_it
FROM "${opt.schema}".connections
) t;

CREATE OR REPLACE FUNCTION "${opt.schema}".connection_by_connection_id(id TEXT)
RETURNS "${opt.schema}".connections
RETURNS "${opt.schema}".connections_with_ids
AS $$
SELECT *
FROM "${opt.schema}".connections
FROM "${opt.schema}".connections_with_ids
WHERE trip_id = convert_from(decode(split_part(id, ':', 1), 'base64'), 'UTF-8')::text
AND "date" = (convert_from(decode(split_part(id, ':', 2), 'base64'), 'UTF-8')::text)::timestamp
AND from_stop_sequence = (convert_from(decode(split_part(id, ':', 3), 'base64'), 'UTF-8')::text)::integer
Expand All @@ -616,19 +628,18 @@ AS $$
$$ LANGUAGE SQL STABLE STRICT;

${opt.postgraphile ? `\
-- todo: currently named arrivalsDeparture, should be arrivalDeparture (but allArrivalsDeparturesList!)
-- todo: allow filtering based on stop and/or route and/or trip and/or time frame
-- https://www.graphile.org/postgraphile/functions/#setof-functions---connections
COMMENT ON COLUMN "${opt.schema}".connections.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.from_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.from_station_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.to_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections.to_station_name IS E'@omit';
COMMENT ON VIEW "${opt.schema}".connections IS E'@primaryKey trip_id,date,from_stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (from_stop_id) references stops|@fieldName fromStop\\n@foreignKey (from_station_id) references stops|@fieldName fromStation\\n@foreignKey (to_stop_id) references stops|@fieldName toStop\\n@foreignKey (to_station_id) references stops|@fieldName toStation';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.route_short_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.route_long_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.route_type IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.direction_id IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.trip_headsign IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.from_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.from_station_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.to_stop_name IS E'@omit';
COMMENT ON COLUMN "${opt.schema}".connections_with_ids.to_station_name IS E'@omit';
COMMENT ON VIEW "${opt.schema}".connections_with_ids IS E'@primaryKey trip_id,date,from_stop_sequence,frequencies_row,frequencies_it\\n@foreignKey (route_id) references routes|@fieldName route\\n@foreignKey (trip_id) references trips|@fieldName trip\\n@foreignKey (from_stop_id) references stops|@fieldName fromStop\\n@foreignKey (from_station_id) references stops|@fieldName fromStation\\n@foreignKey (to_stop_id) references stops|@fieldName toStop\\n@foreignKey (to_station_id) references stops|@fieldName toStation';
` : ''}
`

Expand Down
Loading