1 /**
2 URI/URL/URN parser.
3 
4 URI scheme:
5 ---
6         userinfo            port                       |--query---|
7         |      |            | |                        |          |
8 https://john.doe@www.ex.com:123/forum/question/;par=non?tag=n&ord=o#topmost
9 |   |   |        |        |   ||              ||      |            |     |
10 scheme  |        |--host--|   ||-----path-----||path params        |-tag-|
11         |                     |
12         |------authority------|
13 
14 URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment]
15 authority = [userinfo "@"] host [":" port]
16 ---
17 
18 URI can be either full URI:
19 ---
20 scheme://userinfo@host:port/path?query#tag
21 ---
22 Or portions of it:
23 ---
24 scheme://userinfo@host?query
25 scheme://host/path
26 scheme:path
27 scheme://user:pass@host:244
28 ---
29 */
30 module sily.uri;
31 
32 import std.conv: to;
33 import std.string: isNumeric;
34 import std.algorithm.searching: canFind;
35 
36 import sily.uni;
37 
38 /// URI representation
39 struct URI {
40     /// Protocol/Scheme (https:// -> https)
41     string protocol = "";
42     /// Ditto
43     alias scheme = protocol;
44     /// User info (s://user.name:pass@host -> user.name:pass)
45     string userinfo = "";
46     /// Host/Domain (s://host.com/page -> host.com)
47     string host = "";
48     /// Port (s://host:25545 -> 25545)
49     int port = -1;
50     /// Path (s://host/path/file -> path/file)
51     string path = "";
52     /// Path parameters (s://host/path;param=val -> [param=val])
53     string[string] parameters;
54     /// Query (s://host?q=4;s=5&e=1 -> [q: 4, s: 5, e: 1])
55     string[string] query;
56     /// Tag/Fragment (s://host#fragment -> fragment)
57     string tag = "";
58     /// Ditto
59     alias fragment = tag;
60     /// Is host an IP address
61     bool isHostIP = false;
62     
63     /// Treats host and path as single filepath (i.e file:///home/user/ -> /home/user)
64     @property string filepath() {
65         string _out;
66         if (host.length) _out ~= host;
67         if (path.length) _out ~= path;
68         if (_out.length > 0 && _out[$-1] == '/') _out = _out[0..$-1];
69         return _out;
70     }
71     
72     /// Sets/Returns authority (userinfo@host:port)
73     @property string authority() {
74         string _out;
75         if (userinfo.length) _out ~= userinfo ~ "@";
76         if (host.length) _out ~= host;
77         if (port >= 0) _out ~= ":" ~ port.to!string;
78         return _out;
79     }
80     /// Ditto
81     @property void authority(string uri) {
82         string temp = "";
83         bool valid;
84         int pos = 0;
85         
86         // Userdata
87         for (int i = pos; i < uri.length; ++i) {
88             char c = uri[i];
89             if (c == '@') {
90                 valid = true;
91                 pos = i + 1;
92                 break;
93             }
94             temp ~= c;
95         }
96 
97         if (valid) userinfo = temp;
98         valid = false;
99         temp = "";
100         
101         // Host
102         for (int i = pos; i < uri.length; ++i) {
103             char c = uri[i];
104             if (i + 1 == uri.length || c == ':') {
105                 valid = true;
106                 if (i + 1 == uri.length) {
107                     pos = i + 1;
108                     temp ~= c;
109                     break;
110                 }
111 
112                 bool isPort = true;
113                 string _t = "";
114 
115                 for (int j = i + 1; j < uri.length; ++j) {
116                     char p = uri[j];
117                     _t ~= p;
118                     if (p == ':' || !isDigit(p)) {
119                         isPort = false;
120                         break;
121                     }
122                 }
123 
124                 if (isPort) {
125                     pos = i + 1;
126                     break;
127                 }
128             }
129             temp ~= c;
130         }
131 
132         if (valid) {
133             host = temp;
134             if (pos < uri.length) {
135                 port = to!int(uri[pos..$]);
136             }
137         }
138     }
139     
140     /// Set query with query format ("key=val&key2&key3=val2")
141     void setQuery(string _query) {
142         setQueryParams(_query, '&', query);
143     }
144     /// Set path parameters with parameters format ("key=val;key2;key3=val2")
145     void setParameters(string _query) {
146         setQueryParams(_query, ';', parameters);
147     }
148 
149     private void setQueryParams(string _query, char sep, ref string[string] arr) {
150         string key = "";
151         string val = "";
152         bool iskey = true;
153         foreach (c; _query) {
154             if (c == '=' && iskey && key.length) {
155                 iskey = false;
156                 continue;
157             }
158             if (c == sep) {
159                 if (key.length) {
160                     arr[key] = val;
161                     iskey = true;
162                 }
163                 key = "";
164                 val = "";
165                 continue;
166             }
167             if (iskey) {
168                 key ~= c;
169             } else {
170                 val ~= c;
171             }
172         }
173 
174         if (key.length) arr[key] = val;
175 
176     }
177 }
178 import std.stdio;
179 /// Parses URI string
180 URI parseURI(string uri) {
181     URI u = URI();
182     
183     string temp = "";
184     bool valid = false;
185     int pos = 0;
186     
187     // scheme = [a-z, +, -, .]:
188     for (int i = pos; i < uri.length; ++i) {
189         char c = uri[i];
190         if (c == ':') {
191             if (uri.length > i + 1 && isDigit(uri[i + 1])) break;
192             valid = true;
193             pos = i + 1;
194             break;
195         }
196         if (!c.validScheme) break;
197         temp ~= c;
198     }
199 
200     if (valid) u.scheme = temp;
201     temp = "";
202     valid = false;
203 
204     // authority = //user:passwrd@host:port
205     // if authority and scheme
206     bool isQuery = false;
207     if (uri[pos] == '#' || uri[pos] == '?') isQuery = true; 
208     bool isAuthorityMarker = uri[pos..pos+2] == "//";
209     if (isAuthorityMarker || (u.scheme == "" && !isQuery)) {
210         if (isAuthorityMarker) pos += 2;
211         if (pos < uri.length && uri[pos] == '[') {
212             for (int i = pos + 1; i < uri.length; ++i) {
213                 if (uri[i] == ']') {
214                     u.isHostIP = true;
215                     break;
216                 }
217             }
218         }
219         for (int i = pos; i < uri.length; ++i) {
220             char c = uri[i];
221             if (c == '/' || c == '?' || c == '#' || i + 1 == uri.length) {
222                 if (i + 1 == uri.length && c != '/' && c != '?' && c != '#') temp ~= c;
223                 valid = true;
224                 pos = i;
225                 break;
226             }
227             temp ~= c;
228         }
229     }
230 
231     if (valid && !u.isHostIP) u.authority = temp;
232     if (valid && u.isHostIP) u.host = temp;
233 
234     if (!valid && u.scheme == "") pos = 0;
235 
236     temp = findUntil(uri, pos, '/', [';', '?', '#']);
237     if (temp.length) u.path = '/' ~ temp;
238 
239     temp = findUntil(uri, pos, ';', ['?', '#']);
240     if (temp.length) u.setParameters(temp);
241 
242     temp = findUntil(uri, pos, '?', ['#']);
243     if (temp.length) u.setQuery(temp);
244 
245     temp = findUntil(uri, pos, '#', []);
246     if (temp.length) u.fragment = temp;
247 
248     return u;
249 }
250 
251 /// Encodes URI struct into string
252 string encodeURI(URI uri) {
253     string _out;
254 
255     bool hasScheme    = uri.scheme.length != 0;
256     bool hasAuthority = uri.authority.length != 0;
257     bool hasPath      = uri.path.length != 0;
258     bool hasParams    = uri.parameters.length != 0;
259     bool hasQuery     = uri.query.length != 0;
260     bool hasFragment  = uri.fragment.length != 0;
261 
262     if (hasScheme) _out ~= uri.scheme ~ ":";
263 
264     if (hasAuthority) {
265         if (hasScheme) _out ~= "//";
266         _out ~= uri.authority;
267         if (!hasPath) _out ~= "/";
268     }
269 
270     if (hasPath) _out ~= uri.path;
271 
272     if (hasParams) {
273         _out ~= ";";
274         _out ~= joinQuery(uri.parameters, ';');
275     }
276 
277     if (hasQuery) {
278         _out ~= "?";
279         _out ~= joinQuery(uri.query, '&');
280     }
281 
282     if (hasFragment) _out ~= "#" ~ uri.fragment;
283 
284     return _out;
285 }
286 
287 private string joinQuery(ref string[string] arr, char sep) {
288     string _out;
289     string[] keys = arr.keys;
290     for (int i = 0; i < keys.length; ++i) {
291         string key = keys[i];
292         _out ~= key;
293         if (arr[key].length) _out ~= "=" ~ arr[key];
294         if (i + 1 != keys.length) _out ~= sep;
295     }
296     return _out;
297 }
298 
299 private string findUntil(string uri, ref int pos, char _init, char[] _until) {
300     string temp;
301     for (int i = pos; i < uri.length; ++i) {
302         char c = uri[i];
303         if (i == pos) {
304             if (c != _init) return "";
305             continue;
306         }
307         bool isUntil = _until.canFind(c);
308         if (isUntil || i + 1 == uri.length) {
309             if (i + 1 == uri.length && !isUntil) temp ~= c;
310             pos = i;
311             break;
312         }
313         temp ~= c;
314     }
315     if (temp.length == 1 && temp[0] == _init) return "";
316     return temp;
317 }
318 
319 // TODO: encode " " as %20
320 
321 private bool validScheme(char c) {
322     return isAlphaNumeric(c) || c == '+' || c == '-' || c == '.';
323 }