dolibarr  13.0.2
geturl.lib.php
Go to the documentation of this file.
1 <?php
2 /* Copyright (C) 2008-2020 Laurent Destailleur <eldy@users.sourceforge.net>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <https://www.gnu.org/licenses/>.
16  * or see https://www.gnu.org/
17  */
18 
38 function getURLContent($url, $postorget = 'GET', $param = '', $followlocation = 1, $addheaders = array(), $allowedschemes = array('http', 'https'), $localurl = 0)
39 {
40  //declaring of global variables
41  global $conf;
42  $USE_PROXY = empty($conf->global->MAIN_PROXY_USE) ? 0 : $conf->global->MAIN_PROXY_USE;
43  $PROXY_HOST = empty($conf->global->MAIN_PROXY_HOST) ? 0 : $conf->global->MAIN_PROXY_HOST;
44  $PROXY_PORT = empty($conf->global->MAIN_PROXY_PORT) ? 0 : $conf->global->MAIN_PROXY_PORT;
45  $PROXY_USER = empty($conf->global->MAIN_PROXY_USER) ? 0 : $conf->global->MAIN_PROXY_USER;
46  $PROXY_PASS = empty($conf->global->MAIN_PROXY_PASS) ? 0 : $conf->global->MAIN_PROXY_PASS;
47 
48  dol_syslog("getURLContent postorget=".$postorget." URL=".$url." param=".$param);
49 
50  //setting the curl parameters.
51  $ch = curl_init();
52 
53  /*print $API_Endpoint."-".$API_version."-".$PAYPAL_API_USER."-".$PAYPAL_API_PASSWORD."-".$PAYPAL_API_SIGNATURE."<br>";
54  print $USE_PROXY."-".$gv_ApiErrorURL."<br>";
55  print $nvpStr;
56  exit;*/
57  curl_setopt($ch, CURLOPT_VERBOSE, 1);
58  curl_setopt($ch, CURLOPT_USERAGENT, 'Dolibarr geturl function');
59 
60  // We use @ here because this may return warning if safe mode is on or open_basedir is on (following location is forbidden when safe mode is on).
61  // We force value to false so we will manage redirection ourself later.
62  @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
63 
64  if (is_array($addheaders) && count($addheaders)) curl_setopt($ch, CURLOPT_HTTPHEADER, $addheaders);
65  curl_setopt($ch, CURLINFO_HEADER_OUT, true); // To be able to retrieve request header and log it
66 
67  // By default use tls decied by PHP.
68  // You can force, if supported a version like TLSv1 or TLSv1.2
69  if (!empty($conf->global->MAIN_CURL_SSLVERSION)) curl_setopt($ch, CURLOPT_SSLVERSION, $conf->global->MAIN_CURL_SSLVERSION);
70  //curl_setopt($ch, CURLOPT_SSLVERSION, 6); for tls 1.2
71 
72  // Turning off the server and peer verification(TrustManager Concept).
73  curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
74  curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
75 
76  // Restrict use to some protocols only
77  $protocols = 0;
78  if (is_array($allowedschemes)) {
79  foreach ($allowedschemes as $allowedscheme) {
80  if ($allowedscheme == 'http') $protocols |= CURLPROTO_HTTP;
81  if ($allowedscheme == 'https') $protocols |= CURLPROTO_HTTPS;
82  }
83  curl_setopt($ch, CURLOPT_PROTOCOLS, $protocols);
84  curl_setopt($ch, CURLOPT_REDIR_PROTOCOLS, $protocols);
85  }
86 
87  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, empty($conf->global->MAIN_USE_CONNECT_TIMEOUT) ? 5 : $conf->global->MAIN_USE_CONNECT_TIMEOUT);
88  curl_setopt($ch, CURLOPT_TIMEOUT, empty($conf->global->MAIN_USE_RESPONSE_TIMEOUT) ? 30 : $conf->global->MAIN_USE_RESPONSE_TIMEOUT);
89 
90  //curl_setopt($ch, CURLOPT_SAFE_UPLOAD, true); // PHP 5.5
91  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // We want response
92  if ($postorget == 'POST') {
93  curl_setopt($ch, CURLOPT_POST, 1); // POST
94  curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // Setting param x=a&y=z as POST fields
95  } elseif ($postorget == 'POSTALREADYFORMATED') {
96  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); // HTTP request is 'POST' but param string is taken as it is
97  curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
98  } elseif ($postorget == 'PUT') {
99  $array_param = null;
100  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
101  if (!is_array($param)) parse_str($param, $array_param);
102  else {
103  dol_syslog("parameter param must be a string", LOG_WARNING);
104  $array_param = $param;
105  }
106  curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param)); // Setting param x=a&y=z as PUT fields
107  } elseif ($postorget == 'PUTALREADYFORMATED') {
108  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT'
109  curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string
110  } elseif ($postorget == 'HEAD') {
111  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'HEAD'); // HTTP request is 'HEAD'
112  curl_setopt($ch, CURLOPT_NOBODY, true);
113  } elseif ($postorget == 'DELETE') {
114  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'DELETE'); // POST
115  } else {
116  curl_setopt($ch, CURLOPT_POST, 0); // GET
117  }
118 
119  //if USE_PROXY constant set at begin of this method.
120  if ($USE_PROXY) {
121  dol_syslog("getURLContent set proxy to ".$PROXY_HOST.":".$PROXY_PORT." - ".$PROXY_USER.":".$PROXY_PASS);
122  //curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); // Curl 7.10
123  curl_setopt($ch, CURLOPT_PROXY, $PROXY_HOST.":".$PROXY_PORT);
124  if ($PROXY_USER) curl_setopt($ch, CURLOPT_PROXYUSERPWD, $PROXY_USER.":".$PROXY_PASS);
125  }
126 
127  $newUrl = $url;
128  $maxRedirection = 5;
129  $info = array();
130  $response = '';
131 
132  do {
133  if ($maxRedirection < 1) break;
134 
135  curl_setopt($ch, CURLOPT_URL, $newUrl);
136 
137  // Parse $newUrl
138  $newUrlArray = parse_url($newUrl);
139  $hosttocheck = $newUrlArray['host'];
140  $hosttocheck = str_replace(array('[', ']'), '', $hosttocheck); // Remove brackets of IPv6
141 
142  if (in_array($hosttocheck, array('localhost', 'localhost.domain'))) {
143  $iptocheck = '127.0.0.1';
144  } else {
145  // TODO Resolve $iptocheck to get an IP and set CURLOPT_CONNECT_TO to use this ip
146  $iptocheck = $hosttocheck;
147  }
148 
149  if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4 | FILTER_FLAG_IPV6)) { // This is not an IP
150  $iptocheck = 0; //
151  }
152 
153  if ($iptocheck) {
154  if ($localurl == 0) { // Only external url allowed (dangerous, may allow to get malware)
155  if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
156  $info['http_code'] = 400;
157  $info['content'] = 'Error bad hostname IP (private or reserved range). Must be an external URL.';
158  break;
159  }
160  if (in_array($iptocheck, array('100.100.100.200'))) {
161  $info['http_code'] = 400;
162  $info['content'] = 'Error bad hostname IP (Used by Alibaba metadata). Must be an external URL.';
163  break;
164  }
165  }
166  if ($localurl == 1) { // Only local url allowed (dangerous, may allow to get metadata on server or make internal port scanning)
167  if (filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
168  $info['http_code'] = 400;
169  $info['content'] = 'Error bad hostname. Must be a local URL.';
170  break;
171  }
172  }
173  }
174 
175  // Getting response from server
176  $response = curl_exec($ch);
177 
178  $info = curl_getinfo($ch); // Reading of request must be done after sending request
179  $http_code = $info['http_code'];
180  if ($followlocation && ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307)) {
181  $newUrl = $info['redirect_url'];
182  $maxRedirection--;
183  // TODO Use $info['local_ip'] and $info['primary_ip'] ?
184  continue;
185  } else {
186  $http_code = 0;
187  }
188  }
189  while ($http_code);
190 
191  $request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request
192 
193  dol_syslog("getURLContent request=".$request);
194  //dol_syslog("getURLContent response =".response); // This may contains binary data, so we dont output it
195  dol_syslog("getURLContent response size=".strlen($response)); // This may contains binary data, so we dont output it
196 
197  $rep = array();
198  if (curl_errno($ch)) {
199  // Ad keys to $rep
200  $rep['content'] = $response;
201 
202  // moving to display page to display curl errors
203  $rep['curl_error_no'] = curl_errno($ch);
204  $rep['curl_error_msg'] = curl_error($ch);
205 
206  dol_syslog("getURLContent response array is ".join(',', $rep));
207  } else {
208  //$info = curl_getinfo($ch);
209 
210  // Ad keys to $rep
211  $rep = $info;
212  //$rep['header_size']=$info['header_size'];
213  //$rep['http_code']=$info['http_code'];
214  dol_syslog("getURLContent http_code=".$rep['http_code']);
215 
216  // Add more keys to $rep
217  $rep['content'] = $response;
218  $rep['curl_error_no'] = '';
219  $rep['curl_error_msg'] = '';
220  }
221 
222  //closing the curl
223  curl_close($ch);
224 
225  return $rep;
226 }
227 
228 
237 function getDomainFromURL($url, $mode = 0)
238 {
239  $tmpdomain = preg_replace('/^https?:\/\//i', '', $url); // Remove http(s)://
240  $tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after domain
241  if ($mode == 2) {
242  $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.' before 'abc.mydomain.com'
243  } else {
244  $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com'
245  }
246  if (empty($mode)) {
247  $tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove first level domain (.com, .net, ...)
248  }
249 
250  return $tmpdomain;
251 }
252 
261 function getRootURLFromURL($url)
262 {
263  $prefix = '';
264  $tmpurl = $url;
265  $reg = null;
266  if (preg_match('/^(https?:\/\/)/i', $tmpurl, $reg)) $prefix = $reg[1];
267  $tmpurl = preg_replace('/^https?:\/\//i', '', $tmpurl); // Remove http(s)://
268  $tmpurl = preg_replace('/\/.*$/i', '', $tmpurl); // Remove part after domain
269 
270  return $prefix.$tmpurl;
271 }
272 
279 function removeHtmlComment($content)
280 {
281  $content = preg_replace('/<!--[^\-]+-->/', '', $content);
282  return $content;
283 }
removeHtmlComment($content)
Function to remove comments into HTML content.
Definition: geturl.lib.php:279
getRootURLFromURL($url)
Function root url from a long url For example: https://www.abc.mydomain.com/dir/page.html return &#39;https://www.abc.mydomain.com&#39; For example: http://www.abc.mydomain.com/ return &#39;https://www.abc.mydomain.com&#39;.
Definition: geturl.lib.php:261
getURLContent($url, $postorget= 'GET', $param= '', $followlocation=1, $addheaders=array(), $allowedschemes=array('http', 'https'), $localurl=0)
Function to get a content from an URL (use proxy if proxy defined).
Definition: geturl.lib.php:38
dol_syslog($message, $level=LOG_INFO, $ident=0, $suffixinfilename= '', $restricttologhandler= '', $logcontext=null)
Write log message into outputs.
getDomainFromURL($url, $mode=0)
Function get second level domain name.
Definition: geturl.lib.php:237