[lp_include_url]

Description

Link: [lp_include_url]
Author: Bil Corry
Category: Utility
Version: 8.5.x
License: Public Domain
Posted: Aug. 17, 2007
Updated: Jan. 01, 0001
More by this author...
Charset-aware include_url, returns string instead of bytes.  Use it the same as you would [include_url], with all the same parameters.  It will sniff the headers and meta on the page to try to discover the charset, then translate it to the outbound charset.  If no charset is discovered, it will default to UTF-8.

Optionally, you can specify the charset of the page you're trying to include.

Parameters

-charset string, optional Tag will convert the included page using this charset.

Sample Usage

This example will serve an UTF-8 HTML document, but includes a SHIFT_JIS HTML document as it's source.

content_type:'text/html; charset=utf-8';
(lp_include_url:'http://www.r-agent.co.jp/');
						

Source Code

Click the "Download" button below to retrieve a copy of this tag, including the complete documentation and sample usage shown on this page. Place the downloaded ".inc" file in your LassoStartup folder, restart Lasso, and you can begin using this tag immediately.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
define_tag:'lp_include_url',
    -description='Charset-aware include_url, returns string instead of bytes.',
    -priority='replace';

    local:'lp_params' = params;
    local:'lp_headers' = '_lp_include_url_headers';
    if: #lp_params->(find:'-RetrieveMIMEHeaders')->size;
        #lp_headers = #lp_params->(find:'-RetrieveMIMEHeaders')->(get:1)->value;
    else;
        #lp_params->(insert:'-RetrieveMIMEHeaders'=#lp_headers);
    /if;
	local:'lp_charset' = string;
	if: params->(find:'-charset')->size && params->(find:'-charset')->(get:1)->type == 'pair';
		local:'lp_charset' = #lp_params->(find:'-charset')->(get:1)->value;
		#lp_params->(removeall:'-charset');
	/if;

    local:'lp_bytes' = @\include_url->(run: -name='include_url', -owner='include_url', -params=#lp_params);

	if: #lp_charset->size == 0;
	    #lp_charset = (string_findregexp: (var: #lp_headers), -find='(?i)charset\\s*=\\s*([\\w\\-]+)');
	    if: #lp_charset->size == 2;
	        #lp_charset = #lp_charset->(get:2);
	    else;  // charset not found in headers, try meta on page
	    	local:'lp_page_top' = (string: #lp_bytes->(getrange: 1, 1024)); // only look in first 1k of page
		    #lp_charset = (string_findregexp: #lp_page_top, -find='(?i)charset\\s*=\\s*([\\w\\-]+)');
		    if: #lp_charset->size == 2;
		        #lp_charset = #lp_charset->(get:2);
			else;
		        #lp_charset = 'utf-8';  // default is utf-8 if all else fails
			/if;
	    /if;
 	/if;

    if: #lp_charset == 'utf-8';
        return: (string: #lp_bytes);
    else;
        return: #lp_bytes->(exportstring:#lp_charset);
    /if;

/define_tag;

 

Comments

none

Email:


Password:



Newest

Most Popular