Mercurial > pidgin
annotate po/check_po.pl @ 22217:ad357ca94de9
We will probably eventually use purple_object_[get|set]_ui_data. Until then, this will do.
| author | Sadrul Habib Chowdhury <imadil@gmail.com> |
|---|---|
| date | Sat, 26 Jan 2008 22:33:08 +0000 |
| parents | 72f90ea7ae34 |
| children | a675137fc598 |
| rev | line source |
|---|---|
|
22015
72f90ea7ae34
Don't assume perl is at /usr/bin/perl, use /usr/bin/env instead. This is not
Richard Laager <rlaager@wiktel.com>
parents:
6238
diff
changeset
|
1 #!/usr/bin/env perl -w |
| 6238 | 2 # |
| 3 # check_po.pl - check po file translations for likely errors | |
| 4 # | |
| 5 # Written by David W. Pfitzner dwp@mso.anu.edu.au | |
| 6 # This script is hereby placed in the Public Domain. | |
| 7 # | |
| 8 # Various checks on po file translations: | |
| 9 # - printf-style format strings; | |
| 10 # - differences in trailing newlines; | |
| 11 # - empty (non-fuzzy) msgid; | |
| 12 # - likely whitespace errors on joining multi-line entries | |
| 13 # Ignores all fuzzy entries. | |
| 14 # | |
| 15 # Options: | |
| 16 # -x Don't do standard checks above (eg, just check one of below). | |
| 17 # -n Check newlines within strings; ie, that have equal numbers | |
| 18 # of newlines in msgstr and msgid. (Optional because this may | |
| 19 # happen legitimately.) | |
| 20 # -w Check leading whitespace. Sometimes whitespace is simply | |
| 21 # spacing (eg, for widget labels etc), or punctuation differences, | |
| 22 # so this may be ok. | |
| 23 # -W Check trailing whitespace. See -w above. | |
| 24 # -p Check trailing punctuation. | |
| 25 # -c Check capitalization of first non-whitespace character | |
| 26 # (only if [a-zA-Z]). | |
| 27 # -e Check on empty (c.q. new) msgstr | |
| 28 # | |
| 29 # Reads stdin (or filename args, via <>), writes any problems to stdout. | |
| 30 # | |
| 31 # Modified by Davide Pagnin nightmare@freeciv.it to support plural forms | |
| 32 # | |
| 33 # Version: 0.41 (2002-06-06) | |
| 34 | |
| 35 use strict; | |
| 36 use vars qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e); | |
| 37 use Getopt::Std; | |
| 38 | |
| 39 getopts('cnpwWxe'); | |
| 40 | |
| 41 # Globals, for current po entry: | |
| 42 # | |
| 43 # Note that msgid and msgstr have newlines represented by the | |
| 44 # two characters '\' and 'n' (and similarly for other escapes). | |
| 45 | |
| 46 my @amsgid; # lines exactly as in input | |
| 47 my @amsgstr; | |
| 48 my $entryline; # lineno where entry starts | |
| 49 my $msgid; # lines joined by "" | |
| 50 my $msgstr; | |
| 51 my $is_fuzzy; | |
| 52 my $is_cformat; | |
| 53 my $state; # From constant values below. | |
| 54 my $did_print; # Whether we have printed this entry, to | |
| 55 # print only once for multiple problems. | |
| 56 | |
| 57 use constant S_LOOKING_START => 0; # looking for start of entry | |
| 58 use constant S_DOING_MSGID => 1; # doing msgid part | |
| 59 use constant S_DOING_MSGSTR => 2; # doing msgstr part | |
| 60 | |
| 61 # Initialize or reinitalize globals to prepare for new entry: | |
| 62 sub new_entry { | |
| 63 @amsgid = (); | |
| 64 @amsgstr = (); | |
| 65 $msgid = undef; | |
| 66 $msgstr = undef; | |
| 67 $entryline = 0; | |
| 68 $is_fuzzy = 0; | |
| 69 $is_cformat = 0; | |
| 70 $did_print = 0; | |
| 71 $state = S_LOOKING_START; | |
| 72 } | |
| 73 | |
| 74 # Nicely print either a "msgid" or "msgstr" (name is one of these) | |
| 75 # with given array of data. | |
| 76 sub print_one { | |
| 77 my $name = shift; | |
| 78 print " $name \"", join("\"\n \"", @_), "\"\n"; | |
| 79 } | |
| 80 | |
| 81 # Print a problem (args like print()), preceeded by entry unless | |
| 82 # we have already printed that: label, and msgid and msgstr. | |
| 83 # | |
| 84 sub print_problem { | |
| 85 unless ($did_print) { | |
| 86 print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n"; | |
| 87 print_one("msgid", @amsgid); | |
| 88 print_one("msgstr", @amsgstr); | |
| 89 $did_print = 1; | |
| 90 } | |
| 91 print "*** ", @_; | |
| 92 } | |
| 93 | |
| 94 # Check final newline: probably, translations should end in a newline | |
| 95 # if and only if the original string does. | |
| 96 # (See also check_trailing_whitespace and check_num_newlines below.) | |
| 97 # | |
| 98 sub check_trailing_newlines { | |
| 99 if ($opt_x) { return; } | |
| 100 | |
| 101 my ($ichar, $schar); | |
| 102 | |
| 103 $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : ""; | |
| 104 $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : ""; | |
| 105 | |
| 106 if ($ichar eq "\\n" && $schar ne "\\n") { | |
| 107 print_problem "Missing trailing newline\n"; | |
| 108 } | |
| 109 if ($ichar ne "\\n" && $schar eq "\\n") { | |
| 110 print_problem "Extra trailing newline\n"; | |
| 111 } | |
| 112 | |
| 113 } | |
| 114 | |
| 115 # Check leading whitespace. In general, any leading whitespace should | |
| 116 # be the same in msgstr and msgid -- but not always. | |
| 117 # | |
| 118 sub check_leading_whitespace { | |
| 119 unless ($opt_w) { return; } | |
| 120 | |
| 121 my ($id, $str); | |
| 122 | |
| 123 if ($msgid =~ m/^(\s+)/) { | |
| 124 $id = $1; | |
| 125 } else { | |
| 126 $id = ""; | |
| 127 } | |
| 128 if ($msgstr =~ m/^(\s+)/) { | |
| 129 $str = $1; | |
| 130 } else { | |
| 131 $str = ""; | |
| 132 } | |
| 133 if ($id ne $str) { | |
| 134 print_problem "Different leading whitespace\n"; | |
| 135 } | |
| 136 } | |
| 137 | |
| 138 # Check trailing whitespace. In general, any trailing whitespace should | |
| 139 # be the same in msgstr and msgid -- but not always. | |
| 140 # | |
| 141 sub check_trailing_whitespace { | |
| 142 unless ($opt_W) { return; } | |
| 143 | |
| 144 my ($id, $str); | |
| 145 | |
| 146 if ($msgid =~ m/((?:\s|\\n)+)$/) { | |
| 147 $id = $1; | |
| 148 } else { | |
| 149 $id = ""; | |
| 150 } | |
| 151 if ($msgstr =~ m/((?:\s|\\n)+)$/) { | |
| 152 $str = $1; | |
| 153 } else { | |
| 154 $str = ""; | |
| 155 } | |
| 156 if ($id ne $str) { | |
| 157 print_problem "Different trailing whitespace\n"; | |
| 158 } | |
| 159 } | |
| 160 | |
| 161 # Check equal numbers of newlines. In general ... etc. | |
| 162 # | |
| 163 sub check_num_newlines { | |
| 164 unless ($opt_n) { return; } | |
| 165 | |
| 166 my $num_i = ($msgid =~ m(\\n)g); | |
| 167 my $num_s = ($msgstr =~ m(\\n)g); | |
| 168 | |
| 169 if ($num_i != $num_s) { | |
| 170 print_problem "Mismatch in newline count\n"; | |
| 171 } | |
| 172 | |
| 173 } | |
| 174 | |
| 175 # Check capitalization of first non-whitespace character (for [a-zA-Z] | |
| 176 # only). In general ... etc. | |
| 177 # | |
| 178 sub check_leading_capitalization { | |
| 179 unless ($opt_c) { return; } | |
| 180 | |
| 181 my ($id, $str); | |
| 182 | |
| 183 if ($msgid =~ m/^\s*([a-zA-Z])/) { | |
| 184 $id = $1; | |
| 185 } | |
| 186 if ($msgstr =~ m/^\s*([a-zA-Z])/) { | |
| 187 $str = $1; | |
| 188 } | |
| 189 if (defined($id) && defined($str)) { | |
| 190 if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) || | |
| 191 ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) { | |
| 192 print_problem "Different leading capitalization\n"; | |
| 193 } | |
| 194 } | |
| 195 } | |
| 196 | |
| 197 # Check trailing 'punctuation' characters (ignoring trailing whitespace). | |
| 198 # In general .. etc. | |
| 199 # | |
| 200 sub check_trailing_punctuation { | |
| 201 unless ($opt_p) { return; } | |
| 202 | |
| 203 my ($id, $str); | |
| 204 | |
| 205 # Might want more characters: | |
| 206 if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { | |
| 207 $id = $1; | |
| 208 } else { | |
| 209 $id = ""; | |
| 210 } | |
| 211 if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { | |
| 212 $str = $1; | |
| 213 } else { | |
| 214 $str = ""; | |
| 215 } | |
| 216 ##print "$id $str\n"; | |
| 217 if ($id ne $str) { | |
| 218 print_problem "Different trailing punctuation\n"; | |
| 219 } | |
| 220 } | |
| 221 | |
| 222 # Check that multiline strings have whitespace separation, since | |
| 223 # otherwise, eg: | |
| 224 # msgstr "this is a multiline" | |
| 225 # "string" | |
| 226 # expands to: | |
| 227 # "this is a multilinestring" | |
| 228 # | |
| 229 sub check_whitespace_joins { | |
| 230 if ($opt_x) { return; } | |
| 231 | |
| 232 my $ok = 1; | |
| 233 my $i = 0; | |
| 234 | |
| 235 foreach my $aref (\@amsgid, \@amsgstr) { | |
| 236 my $prev = undef; | |
| 237 LINE: | |
| 238 foreach my $line (@$aref) { | |
| 239 if (defined($prev) | |
| 240 && length($prev) | |
| 241 && $prev !~ /\s$/ | |
| 242 && $prev !~ /\\n$/ | |
| 243 && $line !~ /^\s/ | |
| 244 && $line !~ /^\\n/) | |
| 245 { | |
| 246 $ok = 0; | |
| 247 last LINE; | |
| 248 } | |
| 249 $prev = $line; | |
| 250 } | |
| 251 if (!$ok) { | |
| 252 print_problem("Possible non-whitespace line-join problem in ", | |
| 253 ($i==0 ? "msgid" : "msgstr"), " \n"); | |
| 254 } | |
| 255 $i++; | |
| 256 } | |
| 257 } | |
| 258 | |
| 259 # Check printf-style format entries. | |
| 260 # Non-trivial, because translation strings may use format specifiers | |
| 261 # out of order, or skip some specifiers etc. Also gettext marks | |
| 262 # anything with '%' as cformat, though not all are. | |
| 263 # | |
| 264 sub check_cformat { | |
| 265 unless ($is_cformat) { return; } | |
| 266 if ($opt_x) { return; } | |
| 267 | |
| 268 my (@iform, @sform); | |
| 269 @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g); | |
| 270 @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g); | |
| 271 | |
| 272 ##print join("::", @iform), "\n"; | |
| 273 ##print join("::", @sform), "\n"; | |
| 274 | |
| 275 my $js; # index in sform | |
| 276 my $j; # index into iform | |
| 277 SFORM: | |
| 278 for ($js=0; $js < @sform; $js++) { | |
| 279 my $sf = $sform[$js]; | |
| 280 my $sf_orig = $sf; | |
| 281 if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) { | |
| 282 $j = $1-1; | |
| 283 } else { | |
| 284 $j = $js; | |
| 285 } | |
| 286 if ($j > $#iform) { | |
| 287 print_problem("Format number mismatch for $sf_orig [msgstr:", | |
| 288 ($js+1), "]\n"); | |
| 289 next SFORM; | |
| 290 } | |
| 291 my $if = $iform[$j]; | |
| 292 if ($sf ne $if) { | |
| 293 print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]", | |
| 294 " vs $if [msgid:", ($j+1), "]\n"); | |
| 295 } | |
| 296 } | |
| 297 } | |
| 298 | |
| 299 # Run all individual checks on current entry, reporting any problems. | |
| 300 sub check_entry { | |
| 301 if ($is_fuzzy) { | |
| 302 return; | |
| 303 } | |
| 304 $msgid = join("", @amsgid); | |
| 305 $msgstr = join("", @amsgstr); | |
| 306 | |
| 307 unless ($opt_x) { | |
| 308 if (length($msgid)==0) { | |
| 309 print_problem "Zero length msgid\n"; | |
| 310 } | |
| 311 } | |
| 312 if (length($msgstr)==0) { | |
| 313 unless ($opt_e) { return; } | |
| 314 print_problem "Untranslated msgid\n"; | |
| 315 } | |
| 316 check_cformat; | |
| 317 check_whitespace_joins; | |
| 318 check_num_newlines; | |
| 319 check_leading_whitespace; | |
| 320 check_trailing_newlines; | |
| 321 check_trailing_whitespace; | |
| 322 check_leading_capitalization; | |
| 323 check_trailing_punctuation; | |
| 324 } | |
| 325 | |
| 326 new_entry; | |
| 327 | |
| 328 LINE: | |
| 329 while(<>) { | |
| 330 if ( m(^\s*$) ) { | |
| 331 if ($state==S_DOING_MSGSTR) { | |
| 332 check_entry; | |
| 333 new_entry; | |
| 334 } | |
| 335 next LINE; | |
| 336 } | |
| 337 if ( m(^\#, fuzzy) ) { | |
| 338 $is_fuzzy = 1; | |
| 339 } | |
| 340 if ( m(^\#, .*c-format) ) { | |
| 341 # .* is because can have fuzzy, c-format | |
| 342 $is_cformat = 1; | |
| 343 } | |
| 344 if ( m(^\#) ) { | |
| 345 next LINE; | |
| 346 } | |
| 347 if ( m(^msgid \"(.*)\"$) ) { | |
| 348 $entryline = $.; | |
| 349 @amsgid = ($1); | |
| 350 $state = S_DOING_MSGID; | |
| 351 next LINE; | |
| 352 } | |
| 353 if ( m(^msgid_plural \"(.*)\"$) ) { | |
| 354 $entryline = $.; | |
| 355 @amsgid = ($1); | |
| 356 $state = S_DOING_MSGID; | |
| 357 next LINE; | |
| 358 } | |
| 359 if ( m(^msgstr \"(.*)\"$) ) { | |
| 360 @amsgstr = ($1); | |
| 361 $state = S_DOING_MSGSTR; | |
| 362 next LINE; | |
| 363 } | |
| 364 if ( m(^msgstr\[[0-2]\] \"(.*)\"$) ) { | |
| 365 @amsgstr = ($1); | |
| 366 $state = S_DOING_MSGSTR; | |
| 367 next LINE; | |
| 368 } | |
| 369 if ( m(^\"(.*)\"$) ) { | |
| 370 if ($state==S_DOING_MSGID) { | |
| 371 push @amsgid, $1; | |
| 372 } elsif($state==S_DOING_MSGSTR) { | |
| 373 push @amsgstr, $1; | |
| 374 } else { | |
| 375 die "Looking at string $_ in bad state $state,"; | |
| 376 } | |
| 377 next LINE; | |
| 378 } | |
| 379 die "Unexpected at $.: ", $_; | |
| 380 } |
