49 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			49 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
| 
								 | 
							
								#!/usr/bin/perl -w
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Convert unicode mappings to nginx configuration file format.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# You may find useful mappings in various places, including
							 | 
						||
| 
								 | 
							
								# unicode.org official site:
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
							 | 
						||
| 
								 | 
							
								# http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Needs perl 5.6 or later.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Written by Maxim Dounin, mdounin@mdounin.ru
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								###############################################################################
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								require 5.006;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								while (<>) {
							 | 
						||
| 
								 | 
							
									# Skip comments and empty lines
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									next if /^#/;
							 | 
						||
| 
								 | 
							
									next if /^\s*$/;
							 | 
						||
| 
								 | 
							
									chomp;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									# Convert mappings
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									if (/^\s*0x(..)\s*0x(....)\s*(#.*)/) {
							 | 
						||
| 
								 | 
							
										# Mapping <from-code> <unicode-code> "#" <unicode-name>
							 | 
						||
| 
								 | 
							
										my $cs_code = $1;
							 | 
						||
| 
								 | 
							
										my $un_code = $2;
							 | 
						||
| 
								 | 
							
										my $un_name = $3;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										# Produce UTF-8 sequence from character code;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										my $un_utf8 = join('',
							 | 
						||
| 
								 | 
							
											map { sprintf("%02X", $_) }
							 | 
						||
| 
								 | 
							
											unpack("U0C*", pack("U", hex($un_code)))
							 | 
						||
| 
								 | 
							
										);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										print "    $cs_code  $un_utf8 ; $un_name\n";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									} else {
							 | 
						||
| 
								 | 
							
										warn "Unrecognized line: '$_'";
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								###############################################################################
							 |