Skip to content

Commit a0de79d

Browse files
committed
DuckDB rewrite: remove non-default schema support πŸ’₯πŸ“βœ…
1 parent f0a724d commit a0de79d

28 files changed

+271
-443
lines changed

β€Žcli.jsβ€Ž

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,6 @@ const {
5656
'stats-active-trips-by-hour': {
5757
type: 'string',
5858
},
59-
'schema': {
60-
type: 'string',
61-
},
6259
'import-metadata': {
6360
type: 'boolean',
6461
}
@@ -109,12 +106,6 @@ Options:
109106
currently running trips over time, by hour.
110107
Like --stats-by-route-date, this flag accepts
111108
none, view & materialized-view.
112-
--schema The schema to use for the database. Default: public
113-
Even when importing into a schema other than \`public\`,
114-
a function \`public.gtfs_via_postgres_import_version()\`
115-
gets created, to ensure that multiple imports into the
116-
same database are all made using the same version. See
117-
also multiple-datasets.md in the docs.
118109
--import-metadata Create functions returning import metadata:
119110
- gtfs_data_imported_at (timestamp with time zone)
120111
- gtfs_via_postgres_version (text)
@@ -155,7 +146,6 @@ const opt = {
155146
statsByRouteIdAndDate: flags['stats-by-route-date'] || 'none',
156147
statsByAgencyIdAndRouteIdAndStopAndHour: flags['stats-by-agency-route-stop-hour'] || 'none',
157148
statsActiveTripsByHour: flags['stats-active-trips-by-hour'] || 'none',
158-
schema: flags['schema'] || 'public',
159149
importMetadata: !!flags['import-metadata'],
160150
}
161151
if ('stops-without-level-id' in flags) {

β€Ždocs/import-metadata.mdβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ SELECT gtfs_via_postgres_version()
1212
-- 4.5.3
1313

1414
SELECT gtfs_via_postgres_options()
15-
-- {"schema": "public", "silent": false, "importStart": 1681417454781, "importMetadata": true, … }
15+
-- {"silent": false, "importStart": 1681417454781, "importMetadata": true, … }
1616
SELECT (gtfs_via_postgres_options())['tripsWithoutShapeId']
1717
-- true
1818
```

β€Ždocs/multiple-datasets.mdβ€Ž

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,3 @@
11
# working with multiple datasets
22

3-
Using `gtfs-via-postgres`, you can import more than one dataset into a single PostgreSQL database by importing them into separate [schemas](https://www.postgresql.org/docs/14/ddl-schemas.html). You can then run queries combine or compare data from them.
4-
5-
As an example, let's compare two datasets from [Paris](https://en.wikipedia.org/wiki/Île-de-France_Mobilités) and [Berlin](https://en.wikipedia.org/wiki/Verkehrsverbund_Berlin-Brandenburg).
6-
7-
```shell
8-
wget -U 'gtfs-via-duckdb demo' -O paris.gtfs.zip 'https://eu.ftp.opendatasoft.com/stif/GTFS/IDFM-gtfs.zip'
9-
unzip -d paris.gtfs paris.gtfs.zip
10-
gtfs-to-sql --require-dependencies \
11-
--schema paris -- paris.gtfs/*.txt \
12-
| sponge | psql -b
13-
14-
wget -U 'gtfs-via-duckdb demo' -O berlin.gtfs.zip 'https://www.vbb.de/vbbgtfs'
15-
unzip -d berlin.gtfs berlin.gtfs.zip
16-
gtfs-to-sql --require-dependencies \
17-
--schema berlin -- berlin.gtfs/*.txt \
18-
| sponge | psql -b
19-
```
20-
21-
We can now do queries across both datasets, for example finding the geographically furthest 2 stops:
22-
23-
```sql
24-
-- warning: takes a long time to compute!
25-
SELECT
26-
paris.stop_id AS paris_stop_id,
27-
berlin.stop_id AS berlin_stop_id
28-
FROM
29-
paris.stops paris,
30-
berlin.stops berlin
31-
ORDER BY paris.stop_loc <-> berlin.stop_loc DESC
32-
LIMIT 100
33-
```
34-
35-
*Note:* During an import, a function `public.gtfs_via_postgres_import_version()` gets created that returns `gtfs-via-postgres`'s version. If that function already exists (because it has been created by a previous import), its return value is compared to `gtfs-via-postgres`'s version, and if these two versions are not equal, the second import will fail. This ensures that multiple imports into the same database can only be made using the exact same `gtfs-via-postgres` version.
3+
Using `gtfs-via-postgres`, it is currently *not possible* to import more than one dataset into a single PostgreSQL database.

β€Žindex.jsβ€Ž

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ const convertGtfsToSql = async function* (files, opt = {}) {
2525
statsByRouteIdAndDate: 'none',
2626
statsByAgencyIdAndRouteIdAndStopAndHour: 'none',
2727
statsActiveTripsByHour: 'none',
28-
schema: 'public',
2928
importMetadata: false,
3029
...opt,
3130
}
@@ -142,52 +141,9 @@ ${inspect(opt, {compact: false}).split('\n').map(line => '-- ' + line).join('\n'
142141
143142
\\set ON_ERROR_STOP on
144143
CREATE EXTENSION IF NOT EXISTS postgis;
145-
${opt.schema !== 'public' ? `CREATE SCHEMA IF NOT EXISTS "${opt.schema}";` : ''}
146144
BEGIN;
147145
148-
-- gtfs-via-postgres supports importing >1 GTFS datasets into 1 DB, each dataset within its own schema. See https://github.com/public-transport/gtfs-via-postgres/issues/51 for more information.
149-
-- Because almost all helper utilities (enums, functions, etc.) are schema-specific, they get imported more than once. In order to prevent subtle bugs due to incompatibilities among two schemas imported by different gtfs-via-postgres versions, we mock a "mutex" here by checking for public.gtfs_via_postgres_import_version()'s return value.
150-
151-
-- todo: this can be done more elegantly: just a "DO" block, "ASSERT" that the version matches, create gtfs_via_postgres_import_version() in the "EXCEPTION" block
152-
CREATE FUNCTION pg_temp.get_gtfs_via_postgres_import_version()
153-
RETURNS TEXT
154-
AS $$
155-
DECLARE
156-
res TEXT;
157-
BEGIN
158-
SELECT public.gtfs_via_postgres_import_version() INTO res;
159-
RETURN res;
160-
EXCEPTION
161-
WHEN undefined_function THEN
162-
-- do nothing, silence error
163-
RETURN NULL;
164-
END;
165-
$$
166-
LANGUAGE plpgsql;
167-
168-
DO $$
169-
BEGIN
170-
IF EXISTS (
171-
SELECT version
172-
FROM (
173-
SELECT pg_temp.get_gtfs_via_postgres_import_version() AS version
174-
) t
175-
WHERE version != '${pkg.version}'
176-
) THEN
177-
RAISE EXCEPTION 'existing GTFS data imported with an incompatible version of gtfs-via-postgres';
178-
END IF;
179-
END
180-
$$
181-
LANGUAGE plpgsql;
182-
183-
CREATE OR REPLACE FUNCTION public.gtfs_via_postgres_import_version()
184-
RETURNS TEXT
185-
AS $$
186-
SELECT '${pkg.version}'
187-
$$
188-
LANGUAGE sql;
189-
190-
\n`
146+
`
191147

192148
const csv = new Stringifier({quoted: true})
193149
const nrOfRowsByName = new Map()

β€Žlib/agency.jsβ€Ž

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@
22

33
// https://gtfs.org/documentation/schedule/reference/#agencytxt
44
const beforeAll = (opt) => `\
5-
CREATE TABLE "${opt.schema}".agency (
5+
CREATE TABLE agency (
66
agency_id TEXT PRIMARY KEY,
77
agency_name TEXT NOT NULL,
88
agency_url TEXT NOT NULL,
99
agency_timezone TEXT NOT NULL
10-
CONSTRAINT valid_timezone CHECK ("${opt.schema}".is_timezone(agency_timezone)),
10+
CONSTRAINT valid_timezone CHECK (is_timezone(agency_timezone)),
1111
agency_lang TEXT, -- todo: validate?
1212
agency_phone TEXT,
1313
agency_fare_url TEXT,
1414
agency_email TEXT
1515
);
1616
17-
COPY "${opt.schema}".agency (
17+
COPY agency (
1818
agency_id,
1919
agency_name,
2020
agency_url,

β€Žlib/calendar.jsβ€Ž

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,26 @@
22

33
// https://gtfs.org/documentation/schedule/reference/#calendartxt
44
const beforeAll = (opt) => `\
5-
CREATE TYPE "${opt.schema}".availability AS ENUM (
5+
CREATE TYPE availability AS ENUM (
66
'not_available' -- 0 – Service is not available for Mondays in the date range.
77
, 'available' -- 1 – Service is available for all Mondays in the date range.
88
);
9-
CREATE CAST ("${opt.schema}".availability AS text) WITH INOUT AS IMPLICIT;
9+
CREATE CAST (availability AS text) WITH INOUT AS IMPLICIT;
1010
11-
CREATE TABLE "${opt.schema}".calendar (
11+
CREATE TABLE calendar (
1212
service_id TEXT PRIMARY KEY,
13-
monday "${opt.schema}".availability NOT NULL,
14-
tuesday "${opt.schema}".availability NOT NULL,
15-
wednesday "${opt.schema}".availability NOT NULL,
16-
thursday "${opt.schema}".availability NOT NULL,
17-
friday "${opt.schema}".availability NOT NULL,
18-
saturday "${opt.schema}".availability NOT NULL,
19-
sunday "${opt.schema}".availability NOT NULL,
13+
monday availability NOT NULL,
14+
tuesday availability NOT NULL,
15+
wednesday availability NOT NULL,
16+
thursday availability NOT NULL,
17+
friday availability NOT NULL,
18+
saturday availability NOT NULL,
19+
sunday availability NOT NULL,
2020
start_date DATE NOT NULL,
2121
end_date DATE NOT NULL
2222
);
2323
24-
COPY "${opt.schema}".calendar (
24+
COPY calendar (
2525
service_id,
2626
monday,
2727
tuesday,

β€Žlib/calendar_dates.jsβ€Ž

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@
22

33
// https://gtfs.org/documentation/schedule/reference/#calendar_datestxt
44
const beforeAll = (opt) => `\
5-
CREATE TYPE "${opt.schema}".exception_type_v AS ENUM (
5+
CREATE TYPE exception_type_v AS ENUM (
66
'added' -- 1 – Service has been added for the specified date.
77
, 'removed' -- 2 – Service has been removed for the specified date.
88
);
9-
CREATE CAST ("${opt.schema}".exception_type_v AS text) WITH INOUT AS IMPLICIT;
9+
CREATE CAST (exception_type_v AS text) WITH INOUT AS IMPLICIT;
1010
11-
CREATE TABLE "${opt.schema}".calendar_dates (
11+
CREATE TABLE calendar_dates (
1212
service_id TEXT NOT NULL,
1313
"date" DATE NOT NULL,
1414
PRIMARY KEY (service_id, "date"),
15-
exception_type "${opt.schema}".exception_type_v NOT NULL
15+
exception_type exception_type_v NOT NULL
1616
);
1717
18-
COPY "${opt.schema}".calendar_dates (
18+
COPY calendar_dates (
1919
service_id,
2020
date,
2121
exception_type
@@ -39,8 +39,8 @@ const formatCalendarDatesRow = (e) => {
3939
const afterAll = (opt) => `\
4040
\\.
4141
42-
CREATE INDEX ON "${opt.schema}".calendar_dates (service_id);
43-
CREATE INDEX ON "${opt.schema}".calendar_dates (exception_type);
42+
CREATE INDEX ON calendar_dates (service_id);
43+
CREATE INDEX ON calendar_dates (exception_type);
4444
`
4545

4646
module.exports = {

β€Žlib/feed_info.jsβ€Ž

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@ const beforeAll = (opt) => `\
66
-- https://github.com/MobilityData/gtfs-validator/blob/31ff374800f7d7883fd9de91b71049c2a4de4e45/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/MatchingFeedAndAgencyLangValidator.java#L82
77
-- https://docs.oracle.com/javase/7/docs/api/java/util/Locale.html
88
-- related: https://github.com/google/transit/pull/98
9-
CREATE TABLE "${opt.schema}".feed_info (
9+
CREATE TABLE feed_info (
1010
feed_publisher_name TEXT PRIMARY KEY,
1111
feed_publisher_url TEXT NOT NULL,
1212
feed_lang TEXT NOT NULL
1313
CONSTRAINT valid_feed_lang CHECK (
14-
"${opt.schema}".is_valid_lang_code(feed_lang)
14+
is_valid_lang_code(feed_lang)
1515
),
1616
default_lang TEXT
1717
CONSTRAINT valid_default_lang CHECK (
18-
default_lang IS NULL OR "${opt.schema}".is_valid_lang_code(default_lang)
18+
default_lang IS NULL OR is_valid_lang_code(default_lang)
1919
),
2020
feed_start_date DATE,
2121
feed_end_date DATE,
@@ -24,7 +24,7 @@ CREATE TABLE "${opt.schema}".feed_info (
2424
feed_contact_url TEXT
2525
);
2626
27-
COPY "${opt.schema}".feed_info (
27+
COPY feed_info (
2828
feed_publisher_name,
2929
feed_publisher_url,
3030
feed_lang,

β€Žlib/frequencies.jsβ€Ž

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,21 @@ const {formatTime} = require('./util')
44

55
// https://gtfs.org/documentation/schedule/reference/#frequenciestxt
66
const beforeAll = (opt) => `\
7-
CREATE TYPE "${opt.schema}".exact_times_v AS ENUM (
7+
CREATE TYPE exact_times_v AS ENUM (
88
'frequency_based' -- 0 or empty - Frequency-based trips.
99
, 'schedule_based' -- 1 – Schedule-based trips with the exact same headway throughout the day. In this case the end_time value must be greater than the last desired trip start_time but less than the last desired trip start_time + headway_secs.
1010
);
11-
CREATE CAST ("${opt.schema}".exact_times_v AS text) WITH INOUT AS IMPLICIT;
11+
CREATE CAST (exact_times_v AS text) WITH INOUT AS IMPLICIT;
1212
13-
CREATE TABLE "${opt.schema}".frequencies (
13+
CREATE TABLE frequencies (
1414
-- Used to implement arrivals_departures & connections. Filled after COPY-ing, see below.
1515
frequencies_row INTEGER,
1616
trip_id TEXT NOT NULL,
17-
FOREIGN KEY (trip_id) REFERENCES "${opt.schema}".trips,
17+
FOREIGN KEY (trip_id) REFERENCES trips,
1818
start_time INTERVAL NOT NULL,
1919
end_time INTERVAL NOT NULL,
2020
headway_secs INT NOT NULL,
21-
exact_times "${opt.schema}".exact_times_v,
21+
exact_times exact_times_v,
2222
-- frequencies' primary key currently is just (trip_id, start_time)
2323
-- see also https://github.com/google/transit/issues/514
2424
-- todo: add primary key?
@@ -31,7 +31,7 @@ CREATE TABLE "${opt.schema}".frequencies (
3131
)
3232
);
3333
34-
COPY "${opt.schema}".frequencies (
34+
COPY frequencies (
3535
trip_id,
3636
start_time,
3737
end_time,
@@ -67,7 +67,7 @@ const afterAll = (opt) => `\
6767
\\.
6868
6969
-- frequencies_row is used to implement arrivals_departures & connections.
70-
UPDATE "${opt.schema}".frequencies
70+
UPDATE frequencies
7171
-- This is ugly, but AFAICT there is no cleaner way.
7272
-- see also https://stackoverflow.com/a/4359354/1072129
7373
SET frequencies_row = t.frequencies_row
@@ -76,7 +76,7 @@ FROM (
7676
-- order by all columns so that we don't implicitly depend on the file's order
7777
(row_number() OVER (PARTITION BY trip_id, start_time ORDER BY end_time, headway_secs, exact_times))::integer AS frequencies_row,
7878
trip_id, start_time
79-
FROM "${opt.schema}".frequencies
79+
FROM frequencies
8080
) AS t
8181
-- self-join
8282
-- frequencies' primary is just (trip_id, start_time)
@@ -87,8 +87,8 @@ FROM (
8787
WHERE frequencies.trip_id = t.trip_id
8888
AND frequencies.start_time = t.start_time;
8989
90-
CREATE INDEX ON "${opt.schema}".frequencies (trip_id);
91-
CREATE INDEX ON "${opt.schema}".frequencies (exact_times);
90+
CREATE INDEX ON frequencies (trip_id);
91+
CREATE INDEX ON frequencies (exact_times);
9292
`
9393

9494
module.exports = {

β€Žlib/import_metadata.jsβ€Ž

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,19 @@ const afterAll = (opt) => {
88

99
// todo: escape properly
1010
return `\
11-
CREATE OR REPLACE FUNCTION "${opt.schema}".gtfs_data_imported_at ()
11+
CREATE OR REPLACE FUNCTION gtfs_data_imported_at ()
1212
RETURNS TIMESTAMP WITH TIME ZONE
1313
AS $$
1414
SELECT '${new Date(opt.importStart).toISOString()}'::timestamp with time zone;
1515
$$ LANGUAGE SQL IMMUTABLE;
1616
17-
CREATE OR REPLACE FUNCTION "${opt.schema}".gtfs_via_postgres_version ()
17+
CREATE OR REPLACE FUNCTION gtfs_via_postgres_version ()
1818
RETURNS TEXT
1919
AS $$
2020
SELECT '${pkg.version}';
2121
$$ LANGUAGE SQL IMMUTABLE;
2222
23-
CREATE OR REPLACE FUNCTION "${opt.schema}".gtfs_via_postgres_options ()
23+
CREATE OR REPLACE FUNCTION gtfs_via_postgres_options ()
2424
RETURNS jsonb
2525
AS $$
2626
SELECT '${JSON.stringify(opt).replace(/'/g, `''`)}'::jsonb;

0 commit comments

Comments
Β (0)