Skip to content

Commit 01a45be

Browse files
refactor(app): Standardize subdomain detection logic (#9751)
* refactor(app): Standardize subdomain detection logic * Update app/Config/Hostnames.php Co-authored-by: Pooya Parsa <pooya_parsa_dadashi@yahoo.com> * Update tests/system/Helpers/URLHelper/MiscUrlTest.php Co-authored-by: Pooya Parsa <pooya_parsa_dadashi@yahoo.com> * addressing review comments * cs fix * cs fix * cs fix * remove typo in docs ci-skip --------- Co-authored-by: Pooya Parsa <pooya_parsa_dadashi@yahoo.com>
1 parent 37ff9fa commit 01a45be

File tree

7 files changed

+157
-114
lines changed

7 files changed

+157
-114
lines changed

app/Config/Hostnames.php

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
namespace Config;
4+
5+
class Hostnames
6+
{
7+
// List of known two-part TLDs for subdomain extraction
8+
public const TWO_PART_TLDS = [
9+
'co.uk', 'org.uk', 'gov.uk', 'ac.uk', 'sch.uk', 'ltd.uk', 'plc.uk',
10+
'com.au', 'net.au', 'org.au', 'edu.au', 'gov.au', 'asn.au', 'id.au',
11+
'co.jp', 'ac.jp', 'go.jp', 'or.jp', 'ne.jp', 'gr.jp',
12+
'co.nz', 'org.nz', 'govt.nz', 'ac.nz', 'net.nz', 'geek.nz', 'maori.nz', 'school.nz',
13+
'co.in', 'net.in', 'org.in', 'ind.in', 'ac.in', 'gov.in', 'res.in',
14+
'com.cn', 'net.cn', 'org.cn', 'gov.cn', 'edu.cn',
15+
'com.sg', 'net.sg', 'org.sg', 'gov.sg', 'edu.sg', 'per.sg',
16+
'co.za', 'org.za', 'gov.za', 'ac.za', 'net.za',
17+
'co.kr', 'or.kr', 'go.kr', 'ac.kr', 'ne.kr', 'pe.kr',
18+
'co.th', 'or.th', 'go.th', 'ac.th', 'net.th', 'in.th',
19+
'com.my', 'net.my', 'org.my', 'edu.my', 'gov.my', 'mil.my', 'name.my',
20+
'com.mx', 'org.mx', 'net.mx', 'edu.mx', 'gob.mx',
21+
'com.br', 'net.br', 'org.br', 'gov.br', 'edu.br', 'art.br', 'eng.br',
22+
'co.il', 'org.il', 'ac.il', 'gov.il', 'net.il', 'muni.il',
23+
'co.id', 'or.id', 'ac.id', 'go.id', 'net.id', 'web.id', 'my.id',
24+
'com.hk', 'edu.hk', 'gov.hk', 'idv.hk', 'net.hk', 'org.hk',
25+
'com.tw', 'net.tw', 'org.tw', 'edu.tw', 'gov.tw', 'idv.tw',
26+
'com.sa', 'net.sa', 'org.sa', 'gov.sa', 'edu.sa', 'sch.sa', 'med.sa',
27+
'co.ae', 'net.ae', 'org.ae', 'gov.ae', 'ac.ae', 'sch.ae',
28+
'com.tr', 'net.tr', 'org.tr', 'gov.tr', 'edu.tr', 'av.tr', 'gen.tr',
29+
'co.ke', 'or.ke', 'go.ke', 'ac.ke', 'sc.ke', 'me.ke', 'mobi.ke', 'info.ke',
30+
'com.ng', 'org.ng', 'gov.ng', 'edu.ng', 'net.ng', 'sch.ng', 'name.ng',
31+
'com.pk', 'net.pk', 'org.pk', 'gov.pk', 'edu.pk', 'fam.pk',
32+
'com.eg', 'edu.eg', 'gov.eg', 'org.eg', 'net.eg',
33+
'com.cy', 'net.cy', 'org.cy', 'gov.cy', 'ac.cy',
34+
'com.lk', 'org.lk', 'edu.lk', 'gov.lk', 'net.lk', 'int.lk',
35+
'com.bd', 'net.bd', 'org.bd', 'ac.bd', 'gov.bd', 'mil.bd',
36+
'com.ar', 'net.ar', 'org.ar', 'gov.ar', 'edu.ar', 'mil.ar',
37+
'gob.cl', 'com.pl', 'net.pl', 'org.pl', 'gov.pl', 'edu.pl',
38+
'co.ir', 'ac.ir', 'org.ir', 'id.ir', 'gov.ir', 'sch.ir', 'net.ir',
39+
];
40+
}

system/Helpers/url_helper.php

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
use CodeIgniter\HTTP\URI;
1818
use CodeIgniter\Router\Exceptions\RouterException;
1919
use Config\App;
20+
use Config\Hostnames;
2021

2122
// CodeIgniter URL Helpers
2223

@@ -534,3 +535,52 @@ function url_is(string $path): bool
534535
return (bool) preg_match("|^{$path}$|", $currentPath, $matches);
535536
}
536537
}
538+
539+
if (! function_exists('parse_subdomain')) {
540+
/**
541+
* Parses the subdomain from the current host name.
542+
*
543+
* @param string|null $host The hostname to parse. If null, uses the current request's host.
544+
*
545+
* @return string The subdomain, or an empty string if none exists.
546+
*/
547+
function parse_subdomain(?string $host = null): string
548+
{
549+
if ($host === null) {
550+
$host = service('request')->getUri()->getHost();
551+
}
552+
553+
// Handle localhost and IP addresses - they don't have subdomains
554+
if ($host === 'localhost' || filter_var($host, FILTER_VALIDATE_IP)) {
555+
return '';
556+
}
557+
558+
$parts = explode('.', $host);
559+
$partCount = count($parts);
560+
561+
// Need at least 3 parts for a subdomain (subdomain.domain.tld)
562+
// e.g., api.example.com
563+
if ($partCount < 3) {
564+
return '';
565+
}
566+
567+
// Check if we have a two-part TLD (e.g., co.uk, com.au)
568+
$lastTwoParts = $parts[$partCount - 2] . '.' . $parts[$partCount - 1];
569+
570+
if (in_array($lastTwoParts, Hostnames::TWO_PART_TLDS, true)) {
571+
// For two-part TLD, need at least 4 parts for subdomain
572+
// e.g., api.example.co.uk (4 parts)
573+
if ($partCount < 4) {
574+
return ''; // No subdomain, just domain.co.uk
575+
}
576+
577+
// Remove the two-part TLD and domain name (last 3 parts)
578+
// e.g., admin.api.example.co.uk -> admin.api
579+
return implode('.', array_slice($parts, 0, $partCount - 3));
580+
}
581+
582+
// Standard TLD: Remove TLD and domain (last 2 parts)
583+
// e.g., admin.api.example.com -> admin.api
584+
return implode('.', array_slice($parts, 0, $partCount - 2));
585+
}
586+
}

system/Router/Attributes/Restrict.php

Lines changed: 1 addition & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -42,38 +42,6 @@
4242
#[Attribute(Attribute::TARGET_CLASS | Attribute::TARGET_METHOD | Attribute::IS_REPEATABLE)]
4343
class Restrict implements RouteAttributeInterface
4444
{
45-
private const TWO_PART_TLDS = [
46-
'co.uk', 'org.uk', 'gov.uk', 'ac.uk', 'sch.uk', 'ltd.uk', 'plc.uk',
47-
'com.au', 'net.au', 'org.au', 'edu.au', 'gov.au', 'asn.au', 'id.au',
48-
'co.jp', 'ac.jp', 'go.jp', 'or.jp', 'ne.jp', 'gr.jp',
49-
'co.nz', 'org.nz', 'govt.nz', 'ac.nz', 'net.nz', 'geek.nz', 'maori.nz', 'school.nz',
50-
'co.in', 'net.in', 'org.in', 'ind.in', 'ac.in', 'gov.in', 'res.in',
51-
'com.cn', 'net.cn', 'org.cn', 'gov.cn', 'edu.cn',
52-
'com.sg', 'net.sg', 'org.sg', 'gov.sg', 'edu.sg', 'per.sg',
53-
'co.za', 'org.za', 'gov.za', 'ac.za', 'net.za',
54-
'co.kr', 'or.kr', 'go.kr', 'ac.kr', 'ne.kr', 'pe.kr',
55-
'co.th', 'or.th', 'go.th', 'ac.th', 'net.th', 'in.th',
56-
'com.my', 'net.my', 'org.my', 'edu.my', 'gov.my', 'mil.my', 'name.my',
57-
'com.mx', 'org.mx', 'net.mx', 'edu.mx', 'gob.mx',
58-
'com.br', 'net.br', 'org.br', 'gov.br', 'edu.br', 'art.br', 'eng.br',
59-
'co.il', 'org.il', 'ac.il', 'gov.il', 'net.il', 'muni.il',
60-
'co.id', 'or.id', 'ac.id', 'go.id', 'net.id', 'web.id', 'my.id',
61-
'com.hk', 'edu.hk', 'gov.hk', 'idv.hk', 'net.hk', 'org.hk',
62-
'com.tw', 'net.tw', 'org.tw', 'edu.tw', 'gov.tw', 'idv.tw',
63-
'com.sa', 'net.sa', 'org.sa', 'gov.sa', 'edu.sa', 'sch.sa', 'med.sa',
64-
'co.ae', 'net.ae', 'org.ae', 'gov.ae', 'ac.ae', 'sch.ae',
65-
'com.tr', 'net.tr', 'org.tr', 'gov.tr', 'edu.tr', 'av.tr', 'gen.tr',
66-
'co.ke', 'or.ke', 'go.ke', 'ac.ke', 'sc.ke', 'me.ke', 'mobi.ke', 'info.ke',
67-
'com.ng', 'org.ng', 'gov.ng', 'edu.ng', 'net.ng', 'sch.ng', 'name.ng',
68-
'com.pk', 'net.pk', 'org.pk', 'gov.pk', 'edu.pk', 'fam.pk',
69-
'com.eg', 'edu.eg', 'gov.eg', 'org.eg', 'net.eg',
70-
'com.cy', 'net.cy', 'org.cy', 'gov.cy', 'ac.cy',
71-
'com.lk', 'org.lk', 'edu.lk', 'gov.lk', 'net.lk', 'int.lk',
72-
'com.bd', 'net.bd', 'org.bd', 'ac.bd', 'gov.bd', 'mil.bd',
73-
'com.ar', 'net.ar', 'org.ar', 'gov.ar', 'edu.ar', 'mil.ar',
74-
'gob.cl',
75-
];
76-
7745
public function __construct(
7846
public array|string|null $environment = null,
7947
public array|string|null $hostname = null,
@@ -145,7 +113,7 @@ private function checkSubdomain(RequestInterface $request): void
145113
return;
146114
}
147115

148-
$currentSubdomain = $this->getSubdomain($request);
116+
$currentSubdomain = parse_subdomain($request->getUri()->getHost());
149117
$allowedSubdomains = array_map('strtolower', (array) $this->subdomain);
150118

151119
// If no subdomain exists but one is required
@@ -158,40 +126,4 @@ private function checkSubdomain(RequestInterface $request): void
158126
throw new PageNotFoundException('Access denied: subdomain is blocked.');
159127
}
160128
}
161-
162-
private function getSubdomain(RequestInterface $request): string
163-
{
164-
$host = strtolower($request->getUri()->getHost());
165-
166-
// Handle localhost and IP addresses - they don't have subdomains
167-
if ($host === 'localhost' || filter_var($host, FILTER_VALIDATE_IP)) {
168-
return '';
169-
}
170-
171-
$parts = explode('.', $host);
172-
$partCount = count($parts);
173-
174-
// Need at least 3 parts for a subdomain (subdomain.domain.tld)
175-
// e.g., api.example.com
176-
if ($partCount < 3) {
177-
return '';
178-
}
179-
// Check if we have a two-part TLD (e.g., co.uk, com.au)
180-
$lastTwoParts = $parts[$partCount - 2] . '.' . $parts[$partCount - 1];
181-
if (in_array($lastTwoParts, self::TWO_PART_TLDS, true)) {
182-
// For two-part TLD, need at least 4 parts for subdomain
183-
// e.g., api.example.co.uk (4 parts)
184-
if ($partCount < 4) {
185-
return ''; // No subdomain, just domain.co.uk
186-
}
187-
188-
// Remove the two-part TLD and domain name (last 3 parts)
189-
// e.g., admin.api.example.co.uk -> admin.api
190-
return implode('.', array_slice($parts, 0, $partCount - 3));
191-
}
192-
193-
// Standard TLD: Remove TLD and domain (last 2 parts)
194-
// e.g., admin.api.example.com -> admin.api
195-
return implode('.', array_slice($parts, 0, $partCount - 2));
196-
}
197129
}

system/Router/RouteCollection.php

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1637,7 +1637,7 @@ private function checkSubdomains($subdomains): bool
16371637
}
16381638

16391639
if ($this->currentSubdomain === null) {
1640-
$this->currentSubdomain = $this->determineCurrentSubdomain();
1640+
$this->currentSubdomain = parse_subdomain($this->httpHost);
16411641
}
16421642

16431643
if (! is_array($subdomains)) {
@@ -1653,50 +1653,6 @@ private function checkSubdomains($subdomains): bool
16531653
return in_array($this->currentSubdomain, $subdomains, true);
16541654
}
16551655

1656-
/**
1657-
* Examines the HTTP_HOST to get the best match for the subdomain. It
1658-
* won't be perfect, but should work for our needs.
1659-
*
1660-
* It's especially not perfect since it's possible to register a domain
1661-
* with a period (.) as part of the domain name.
1662-
*
1663-
* @return false|string the subdomain
1664-
*/
1665-
private function determineCurrentSubdomain()
1666-
{
1667-
// We have to ensure that a scheme exists
1668-
// on the URL else parse_url will mis-interpret
1669-
// 'host' as the 'path'.
1670-
$url = $this->httpHost;
1671-
if (! str_starts_with($url, 'http')) {
1672-
$url = 'http://' . $url;
1673-
}
1674-
1675-
$parsedUrl = parse_url($url);
1676-
1677-
$host = explode('.', $parsedUrl['host']);
1678-
1679-
if ($host[0] === 'www') {
1680-
unset($host[0]);
1681-
}
1682-
1683-
// Get rid of any domains, which will be the last
1684-
unset($host[count($host) - 1]);
1685-
1686-
// Account for .co.uk, .co.nz, etc. domains
1687-
if (end($host) === 'co') {
1688-
$host = array_slice($host, 0, -1);
1689-
}
1690-
1691-
// If we only have 1 part left, then we don't have a sub-domain.
1692-
if (count($host) === 1) {
1693-
// Set it to false so we don't make it back here again.
1694-
return false;
1695-
}
1696-
1697-
return array_shift($host);
1698-
}
1699-
17001656
/**
17011657
* Reset the routes, so that a test case can provide the
17021658
* explicit ones needed for it.

tests/system/Helpers/URLHelper/MiscUrlTest.php

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,4 +963,40 @@ public function testUrlToMissingArgument(): void
963963

964964
url_to('loginURL');
965965
}
966+
967+
#[DataProvider('provideParseSubdomain')]
968+
public function testParseSubdomain(?string $host, string $expected, bool $useRequest = false): void
969+
{
970+
if ($useRequest) {
971+
// create a request whose host will be used when passing null to parse_subdomain
972+
$this->config->baseURL = 'http://sub.example.com/';
973+
$this->createRequest('http://sub.example.com/');
974+
975+
$this->assertSame($expected, parse_subdomain(null));
976+
977+
return;
978+
}
979+
980+
$this->assertSame($expected, parse_subdomain($host));
981+
}
982+
983+
/**
984+
* Provides test cases for parsing subdomains.
985+
*
986+
* @return array<string, array{0: string|null, 1: string, 2: bool}>
987+
*/
988+
public static function provideParseSubdomain(): iterable
989+
{
990+
return [
991+
'standard subdomain' => ['api.example.com', 'api', false],
992+
'multi-level subdomain' => ['admin.api.example.com', 'admin.api', false],
993+
'no subdomain (domain only)' => ['example.com', '', false],
994+
'localhost' => ['localhost', '', false],
995+
'ipv4' => ['127.0.0.1', '', false],
996+
'ipv6' => ['::1', '', false],
997+
'two-part tld no subdomain' => ['example.co.uk', '', false],
998+
'two-part tld with subdomain' => ['api.example.co.uk', 'api', false],
999+
'null uses request host' => [null, 'sub', true],
1000+
];
1001+
}
9661002
}

user_guide_src/source/helpers/url_helper.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,21 @@ The following functions are available:
361361
This function works the same as :php:func:`url_title()` but it converts all
362362
accented characters automatically.
363363

364+
.. php:function:: parse_subdomain($hostname)
365+
366+
:param string|null $hostname: The hostname to parse. If null, uses the current request's host.
367+
:returns: The subdomain, or an empty string if none exists.
368+
:rtype: string
369+
370+
Parses the subdomain from the given host name.
371+
372+
Here are some examples:
373+
374+
.. literalinclude:: url_helper/027.php
375+
376+
You can customize the list of known two-part TLDs by adding them to the
377+
``Config\Hostnames::TWO_PART_TLDS`` array.
378+
364379
.. php:function:: prep_url([$str = ''[, $secure = false]])
365380
366381
:param string $str: URL string
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?php
2+
3+
// Outputs "blog"
4+
echo parse_subdomain('blog.example.com');
5+
6+
// Outputs an empty string
7+
echo parse_subdomain('example.com');
8+
echo parse_subdomain('example.co.uk');
9+
10+
// Outputs "shop" - correctly handles two-part TLDs
11+
echo parse_subdomain('shop.example.co.uk');
12+
13+
// Outputs "shop.old"
14+
echo parse_subdomain('shop.old.example.co.uk');

0 commit comments

Comments
 (0)