1 /** 2 URI/URL/URN parser. 3 4 URI scheme: 5 --- 6 userinfo port |--query---| 7 | | | | | | 8 https://john.doe@www.ex.com:123/forum/question/;par=non?tag=n&ord=o#topmost 9 | | | | | || || | | | 10 scheme | |--host--| ||-----path-----||path params |-tag-| 11 | | 12 |------authority------| 13 14 URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment] 15 authority = [userinfo "@"] host [":" port] 16 --- 17 18 URI can be either full URI: 19 --- 20 scheme://userinfo@host:port/path?query#tag 21 --- 22 Or portions of it: 23 --- 24 scheme://userinfo@host?query 25 scheme://host/path 26 scheme:path 27 scheme://user:pass@host:244 28 --- 29 */ 30 module sily.uri; 31 32 import std.conv: to; 33 import std.string: isNumeric; 34 import std.algorithm.searching: canFind; 35 36 import sily.uni; 37 38 /// URI representation 39 struct URI { 40 /// Protocol/Scheme (https:// -> https) 41 string protocol = ""; 42 /// Ditto 43 alias scheme = protocol; 44 /// User info (s://user.name:pass@host -> user.name:pass) 45 string userinfo = ""; 46 /// Host/Domain (s://host.com/page -> host.com) 47 string host = ""; 48 /// Port (s://host:25545 -> 25545) 49 int port = -1; 50 /// Path (s://host/path/file -> path/file) 51 string path = ""; 52 /// Path parameters (s://host/path;param=val -> [param=val]) 53 string[string] parameters; 54 /// Query (s://host?q=4;s=5&e=1 -> [q: 4, s: 5, e: 1]) 55 string[string] query; 56 /// Tag/Fragment (s://host#fragment -> fragment) 57 string tag = ""; 58 /// Ditto 59 alias fragment = tag; 60 /// Is host an IP address 61 bool isHostIP = false; 62 63 /// Treats host and path as single filepath (i.e file:///home/user/ -> /home/user) 64 @property string filepath() { 65 string _out; 66 if (host.length) _out ~= host; 67 if (path.length) _out ~= path; 68 if (_out.length > 0 && _out[$-1] == '/') _out = _out[0..$-1]; 69 return _out; 70 } 71 72 /// Sets/Returns authority (userinfo@host:port) 73 @property string authority() { 74 string _out; 75 if (userinfo.length) _out ~= userinfo ~ "@"; 76 if (host.length) _out ~= host; 77 if (port >= 0) _out ~= ":" ~ port.to!string; 78 return _out; 79 } 80 /// Ditto 81 @property void authority(string uri) { 82 string temp = ""; 83 bool valid; 84 int pos = 0; 85 86 // Userdata 87 for (int i = pos; i < uri.length; ++i) { 88 char c = uri[i]; 89 if (c == '@') { 90 valid = true; 91 pos = i + 1; 92 break; 93 } 94 temp ~= c; 95 } 96 97 if (valid) userinfo = temp; 98 valid = false; 99 temp = ""; 100 101 // Host 102 for (int i = pos; i < uri.length; ++i) { 103 char c = uri[i]; 104 if (i + 1 == uri.length || c == ':') { 105 valid = true; 106 if (i + 1 == uri.length) { 107 pos = i + 1; 108 temp ~= c; 109 break; 110 } 111 112 bool isPort = true; 113 string _t = ""; 114 115 for (int j = i + 1; j < uri.length; ++j) { 116 char p = uri[j]; 117 _t ~= p; 118 if (p == ':' || !isDigit(p)) { 119 isPort = false; 120 break; 121 } 122 } 123 124 if (isPort) { 125 pos = i + 1; 126 break; 127 } 128 } 129 temp ~= c; 130 } 131 132 if (valid) { 133 host = temp; 134 if (pos < uri.length) { 135 port = to!int(uri[pos..$]); 136 } 137 } 138 } 139 140 /// Set query with query format ("key=val&key2&key3=val2") 141 void setQuery(string _query) { 142 setQueryParams(_query, '&', query); 143 } 144 /// Set path parameters with parameters format ("key=val;key2;key3=val2") 145 void setParameters(string _query) { 146 setQueryParams(_query, ';', parameters); 147 } 148 149 private void setQueryParams(string _query, char sep, ref string[string] arr) { 150 string key = ""; 151 string val = ""; 152 bool iskey = true; 153 foreach (c; _query) { 154 if (c == '=' && iskey && key.length) { 155 iskey = false; 156 continue; 157 } 158 if (c == sep) { 159 if (key.length) { 160 arr[key] = val; 161 iskey = true; 162 } 163 key = ""; 164 val = ""; 165 continue; 166 } 167 if (iskey) { 168 key ~= c; 169 } else { 170 val ~= c; 171 } 172 } 173 174 if (key.length) arr[key] = val; 175 176 } 177 } 178 import std.stdio; 179 /// Parses URI string 180 URI parseURI(string uri) { 181 URI u = URI(); 182 183 string temp = ""; 184 bool valid = false; 185 int pos = 0; 186 187 // scheme = [a-z, +, -, .]: 188 for (int i = pos; i < uri.length; ++i) { 189 char c = uri[i]; 190 if (c == ':') { 191 if (uri.length > i + 1 && isDigit(uri[i + 1])) break; 192 valid = true; 193 pos = i + 1; 194 break; 195 } 196 if (!c.validScheme) break; 197 temp ~= c; 198 } 199 200 if (valid) u.scheme = temp; 201 temp = ""; 202 valid = false; 203 204 // authority = //user:passwrd@host:port 205 // if authority and scheme 206 bool isQuery = false; 207 if (uri[pos] == '#' || uri[pos] == '?') isQuery = true; 208 bool isAuthorityMarker = uri[pos..pos+2] == "//"; 209 if (isAuthorityMarker || (u.scheme == "" && !isQuery)) { 210 if (isAuthorityMarker) pos += 2; 211 if (pos < uri.length && uri[pos] == '[') { 212 for (int i = pos + 1; i < uri.length; ++i) { 213 if (uri[i] == ']') { 214 u.isHostIP = true; 215 break; 216 } 217 } 218 } 219 for (int i = pos; i < uri.length; ++i) { 220 char c = uri[i]; 221 if (c == '/' || c == '?' || c == '#' || i + 1 == uri.length) { 222 if (i + 1 == uri.length && c != '/' && c != '?' && c != '#') temp ~= c; 223 valid = true; 224 pos = i; 225 break; 226 } 227 temp ~= c; 228 } 229 } 230 231 if (valid && !u.isHostIP) u.authority = temp; 232 if (valid && u.isHostIP) u.host = temp; 233 234 if (!valid && u.scheme == "") pos = 0; 235 236 temp = findUntil(uri, pos, '/', [';', '?', '#']); 237 if (temp.length) u.path = '/' ~ temp; 238 239 temp = findUntil(uri, pos, ';', ['?', '#']); 240 if (temp.length) u.setParameters(temp); 241 242 temp = findUntil(uri, pos, '?', ['#']); 243 if (temp.length) u.setQuery(temp); 244 245 temp = findUntil(uri, pos, '#', []); 246 if (temp.length) u.fragment = temp; 247 248 return u; 249 } 250 251 /// Encodes URI struct into string 252 string encodeURI(URI uri) { 253 string _out; 254 255 bool hasScheme = uri.scheme.length != 0; 256 bool hasAuthority = uri.authority.length != 0; 257 bool hasPath = uri.path.length != 0; 258 bool hasParams = uri.parameters.length != 0; 259 bool hasQuery = uri.query.length != 0; 260 bool hasFragment = uri.fragment.length != 0; 261 262 if (hasScheme) _out ~= uri.scheme ~ ":"; 263 264 if (hasAuthority) { 265 if (hasScheme) _out ~= "//"; 266 _out ~= uri.authority; 267 if (!hasPath) _out ~= "/"; 268 } 269 270 if (hasPath) _out ~= uri.path; 271 272 if (hasParams) { 273 _out ~= ";"; 274 _out ~= joinQuery(uri.parameters, ';'); 275 } 276 277 if (hasQuery) { 278 _out ~= "?"; 279 _out ~= joinQuery(uri.query, '&'); 280 } 281 282 if (hasFragment) _out ~= "#" ~ uri.fragment; 283 284 return _out; 285 } 286 287 private string joinQuery(ref string[string] arr, char sep) { 288 string _out; 289 string[] keys = arr.keys; 290 for (int i = 0; i < keys.length; ++i) { 291 string key = keys[i]; 292 _out ~= key; 293 if (arr[key].length) _out ~= "=" ~ arr[key]; 294 if (i + 1 != keys.length) _out ~= sep; 295 } 296 return _out; 297 } 298 299 private string findUntil(string uri, ref int pos, char _init, char[] _until) { 300 string temp; 301 for (int i = pos; i < uri.length; ++i) { 302 char c = uri[i]; 303 if (i == pos) { 304 if (c != _init) return ""; 305 continue; 306 } 307 bool isUntil = _until.canFind(c); 308 if (isUntil || i + 1 == uri.length) { 309 if (i + 1 == uri.length && !isUntil) temp ~= c; 310 pos = i; 311 break; 312 } 313 temp ~= c; 314 } 315 if (temp.length == 1 && temp[0] == _init) return ""; 316 return temp; 317 } 318 319 // TODO: encode " " as %20 320 321 private bool validScheme(char c) { 322 return isAlphaNumeric(c) || c == '+' || c == '-' || c == '.'; 323 }