Main Page | Class List | Directories | File List | Class Members | File Members

rcc-recode.c File Reference

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <locale.h>
#include <errno.h>
#include "config.h"
#include <librcc.h>

Defines

#define RCC_OPTION_TRANSLATE_SKIP_PARENT   RCC_OPTION_TRANSLATE_SKIP_PARRENT

Enumerations

enum  Modes { MODE_STDIN = 0x1000, MODE_DIRECTORY, MODE_FILE, MODE_FILELIST }
enum  Options {
  OPT_CONFIG = 'c', OPT_ENCODING_IN = 'e', OPT_FROM = 'f', OPT_HELP = 'h',
  OPT_LANGUAGE_IN = 'l', OPT_TO = 't', OPT_YES = 'y', OPT_ENCODING_OUT,
  OPT_LANGUAGE_OUT, OPT_TRANSLATION, OPT_CACHING, OPT_CACHE,
  OPT_AUTODETECT, OPT_OFFLINE, OPT_TIMEOUT, OPT_SUBDIRS
}

Functions

void Usage (int argc, char *argv[])
rcc_class_id GetClass (const char *name)
char * Translate (const char *source)
int Stdin (const char *arg)
int Directory (const char *arg)
int main (int argc, char *argv[])
char * Fullname (const char *path, const char *name)

Variables

int mode = MODE_STDIN

Define Documentation

#define RCC_OPTION_TRANSLATE_SKIP_PARENT   RCC_OPTION_TRANSLATE_SKIP_PARRENT
 


Enumeration Type Documentation

enum Modes
 

Enumeration values:
MODE_STDIN 
MODE_DIRECTORY 
MODE_FILE 
MODE_FILELIST 
00034              {
00035     MODE_STDIN = 0x1000,
00036     MODE_DIRECTORY,
00037     MODE_FILE,
00038     MODE_FILELIST
00039 } Modes;

enum Options
 

Enumeration values:
OPT_CONFIG 
OPT_ENCODING_IN 
OPT_FROM 
OPT_HELP 
OPT_LANGUAGE_IN 
OPT_TO 
OPT_YES 
OPT_ENCODING_OUT 
OPT_LANGUAGE_OUT 
OPT_TRANSLATION 
OPT_CACHING 
OPT_CACHE 
OPT_AUTODETECT 
OPT_OFFLINE 
OPT_TIMEOUT 
OPT_SUBDIRS 
00043              {
00044     OPT_CONFIG = 'c',
00045     OPT_ENCODING_IN = 'e',
00046     OPT_FROM = 'f',
00047     OPT_HELP = 'h',
00048     OPT_LANGUAGE_IN = 'l',
00049     OPT_TO = 't',
00050     OPT_YES = 'y',
00051     OPT_ENCODING_OUT,
00052     OPT_LANGUAGE_OUT,
00053     OPT_TRANSLATION,
00054     OPT_CACHING,
00055     OPT_CACHE,
00056     OPT_AUTODETECT,
00057     OPT_OFFLINE,
00058     OPT_TIMEOUT,
00059     OPT_SUBDIRS,
00060 } Options;


Function Documentation

int Directory const char *  arg  ) 
 

00537                                {
00538     int err;
00539     struct stat st;
00540     
00541     DIR *dir;
00542     struct dirent *entry;
00543     char *res;
00544     char answer;
00545     
00546     char stmp[255];
00547     char *fn, *nfn;
00548     
00549     if (!arg) arg = ".";
00550     
00551     printf("Processing directory: %s\n", arg);
00552     
00553     dir = opendir(arg);
00554     if (!dir) {
00555         fprintf(stderr, "*** Failed to process directory: %s\n", arg);
00556         return -1;
00557     }
00558     
00559     entry = readdir(dir);
00560     while (entry) {
00561         if (entry->d_name[0] == '.') {
00562             entry = readdir(dir);
00563             continue;
00564         }
00565         
00566         res = Translate(entry->d_name);
00567         if (res) {
00568             if (strcmp(res, entry->d_name)) {
00569                 if (ask) {
00570                     printf("Rename \"%s\" to \"%s\" (y/[n]) ", entry->d_name, res);
00571                     scanf("%c", &answer);
00572                     if (answer != '\n') fgets(stmp, 255, stdin);
00573                     answer = ((answer=='y')||(answer=='Y'))?1:0;
00574                 } else {
00575                     answer = 1;
00576                 }
00577                 
00578                 if (answer) {
00579                     fn = Fullname(arg, entry->d_name);
00580                     nfn = Fullname(arg, res);
00581                     if ((fn)&&(nfn)) {
00582                         if (!lstat(nfn, &st)) {
00583                             if (!ask) {
00584                                 printf("Trying rename \"%s\" to \"%s\"\n", entry->d_name, res);
00585                             }
00586                             
00587                             if (S_ISDIR(st.st_mode)) {
00588                                 printf("*** Directory with that name exists, skipping\n");
00589                                 answer = 0;
00590                             } else {
00591                                 printf("*** File exists, overwrite (y/[n]) ");
00592                                 scanf("%c", &answer);
00593                                 if (answer != '\n') fgets(stmp, 255, stdin);
00594                                 answer = ((answer=='y')||(answer=='Y'))?1:0;
00595                             }
00596                         }
00597                         if (answer) {
00598                             err = rename(fn, nfn);
00599                         }
00600                     } else err = ENOMEM;
00601                     
00602                     if (fn) free(fn);
00603                     if (nfn) free(nfn);
00604                     
00605                     if (err) {
00606                         printf("*** Renaming \"%s\" to \"%s\" is failed (errno: %u)\n", entry->d_name, res, errno);
00607                     } else if (!ask) {
00608                         printf("Rename completed: \"%s\" to \"%s\"\n", entry->d_name, res);
00609                     }
00610                 }
00611             }
00612             free(res);
00613         }
00614         entry = readdir(dir);
00615     }
00616     closedir(dir);
00617     
00618     if (process_subdirs) {
00619         dir = opendir(arg);
00620         if (!dir) return 0;
00621         
00622         entry = readdir(dir);
00623         while (entry) {
00624             if (entry->d_name[0] == '.') {
00625                 entry = readdir(dir);
00626                 continue;
00627             }
00628 
00629             fn = Fullname(arg, entry->d_name);
00630             if (fn) {
00631                 if ((!lstat(fn, &st))&&((S_ISDIR(st.st_mode)))) {
00632                     Directory(fn);
00633                 }
00634                 free(fn);
00635             }
00636             entry = readdir(dir);
00637         }
00638         closedir(dir);
00639     }
00640     
00641     
00642     return 0;
00643 }

char* Fullname const char *  path,
const char *  name
 

00522                                                    {
00523     char *res;
00524     
00525     res = (char*)malloc(strlen(path) + strlen(name) + 2);
00526     if (res) {
00527         if (path[strlen(path)-1] == '/')
00528             sprintf(res, "%s%s",path,name);
00529         else
00530             sprintf(res, "%s/%s",path,name);
00531     }
00532     return res;
00533 }

rcc_class_id GetClass const char *  name  ) 
 

00176                                         {
00177     int i;
00178     
00179     for (i = 1; classes[i].name; i++) {
00180         if ((!strcasecmp(name, classes[i].name))||(!strcasecmp(name, classes[i].fullname)))
00181             return i;
00182     }
00183     return (rcc_class_id)-1;
00184 }

int main int  argc,
char *  argv[]
 

00199                                  {
00200     rcc_language_id language_id, current_language_id, english_language_id;
00201     
00202     unsigned char c;
00203     
00204     char *arg = NULL;
00205     
00206     char *config_name = NULL;
00207     char *cache_name = NULL;
00208     
00209     char *from = "in";
00210     char *to = "out";
00211     
00212     unsigned char from_forced = 0;
00213     unsigned char to_forced = 0;
00214     
00215     char *lfrom = NULL;
00216     char *lto = NULL;
00217     
00218     int cache = RCC_OPTION_LEARNING_FLAG_USE;
00219     
00220     int ldetect = 0;
00221     int ldetect_all = 0;
00222     int ldetect_force = 0;
00223 
00224     unsigned long timeout = 0;
00225     char offline = 0;
00226     
00227     int option_index = 0;
00228     while ((c = getopt_long(argc, argv, "yhe:f:l:t:", long_options, &option_index)) != (unsigned char)-1) {
00229         switch (c) {
00230             case 0:
00231             break;
00232             case OPT_HELP:
00233                 Usage(argc, argv);
00234                 exit(0);                
00235             break;
00236             case OPT_CONFIG:
00237                 config_name = optarg;
00238             break;
00239             case OPT_CACHE:
00240                 cache_name = optarg;
00241             case OPT_FROM:
00242                 from_forced = 1;
00243                 from = optarg;
00244             break;
00245             case OPT_TO:
00246                 to_forced = 1;
00247                 to = optarg;
00248             break;
00249             case OPT_ENCODING_IN:
00250                 efrom = optarg;
00251             break;
00252             case OPT_ENCODING_OUT:
00253                 eto = optarg;
00254             break;
00255             case OPT_LANGUAGE_IN:
00256                 lfrom = optarg;
00257 /*
00258                 Selects main language, but for translation we can switch on
00259                 autodetection. Should do it manualy.
00260 */              
00261                 if (!ldetect_force) {
00262                     ldetect = 0;
00263                     ldetect_force = 1;
00264                 }
00265 
00266             break;
00267             case OPT_LANGUAGE_OUT:
00268                 lto = optarg;
00269             break;
00270             case OPT_TRANSLATION:
00271                 if (!optarg)
00272                     translate = RCC_OPTION_TRANSLATE_TO_ENGLISH;
00273                 else if (!strcasecmp(optarg, "full"))
00274                     translate = RCC_OPTION_TRANSLATE_FULL;
00275                 else if (!strcasecmp(optarg, "skip_parent"))
00276                     translate = RCC_OPTION_TRANSLATE_SKIP_PARENT;
00277                 else if (!strcasecmp(optarg, "skip_related"))
00278                     translate = RCC_OPTION_TRANSLATE_SKIP_RELATED;
00279                 else if (!strcasecmp(optarg, "english"))
00280                     translate = RCC_OPTION_TRANSLATE_TO_ENGLISH;
00281                 else if (!strcasecmp(optarg, "transliterate"))
00282                     translate = RCC_OPTION_TRANSLATE_TRANSLITERATE;
00283                 else if (!strcasecmp(optarg, "off"))
00284                     translate = RCC_OPTION_TRANSLATE_OFF;
00285                 else {
00286                     fprintf(stderr, "*** Unknown translation mode: %s\n\n", optarg);
00287                     Usage(argc, argv);
00288                     exit(0);
00289                 }
00290                 
00291                 if (!ldetect_force) {
00292                     if (!strcasecmp(optarg, "off"))
00293                         ldetect = 0;
00294                     else 
00295                         ldetect = 1;
00296                 }
00297             break;
00298             case OPT_CACHING:
00299                 if (!optarg)
00300                     cache = RCC_OPTION_LEARNING_FLAG_USE;
00301                 else if (!strcasecmp(optarg, "off"))
00302                     cache = 0;
00303                 else if (!strcasecmp(optarg, "use"))
00304                     cache = RCC_OPTION_LEARNING_FLAG_USE;
00305                 else if (!strcasecmp(optarg, "add"))
00306                     cache = RCC_OPTION_LEARNING_FLAG_USE|RCC_OPTION_LEARNING_FLAG_LEARN;
00307                 else if (!strcasecmp(optarg, "replace"))
00308                     cache = RCC_OPTION_LEARNING_FLAG_LEARN;
00309                 else {
00310                     fprintf(stderr, "*** Unknown caching mode: %s\n\n", optarg);
00311                     Usage(argc, argv);
00312                     exit(0);
00313                 }
00314             break;
00315             case OPT_AUTODETECT:
00316                 ldetect_force = 1;
00317 
00318                 if (!optarg) ldetect = 1;
00319                 else if (!strcasecmp(optarg, "off")) {
00320                     ldetect = 0;
00321                     ldetect_force = 1;
00322                 } else if (!strcasecmp(optarg, "on")) {
00323                     ldetect = 1;
00324                     ldetect_all = 0;
00325                     ldetect_force = 1;
00326                 } else if (!strcasecmp(optarg, "all")) {
00327                     ldetect = 1;
00328                     ldetect_all = 1;
00329                     ldetect_force = 1;
00330                 }
00331             break;
00332             case OPT_TIMEOUT:
00333                 timeout = atoi(optarg);
00334             break;
00335             case OPT_OFFLINE:
00336                 offline = 1;
00337             break;
00338             case OPT_SUBDIRS:
00339                 process_subdirs = 0;
00340             break;
00341             case OPT_YES:
00342                 ask = 0;
00343             break;
00344             default:
00345                 Usage(argc, argv);
00346                 exit(0);
00347         }
00348     }
00349     
00350     if (optind < argc) {
00351         if ((optind + 1) < argc) {
00352             fprintf(stderr, "*** Invalid non-option arguments:\n");
00353             for (;optind < argc;optind++) {
00354                 puts(argv[optind]);
00355             }
00356             fprintf(stderr, "\n\n");
00357             Usage(argc,argv);
00358             exit(0);
00359         }
00360         arg = argv[optind];
00361     }
00362 
00363     switch (mode) {
00364         case MODE_DIRECTORY:
00365             if (!from_forced) from = "fs";
00366             if (!to_forced) to = "fs";
00367         break;
00368         default:
00369             ;
00370     }
00371         
00372     setlocale(LC_ALL, "");
00373     
00374 
00375 
00376     rccInit();
00377     rccInitDefaultContext(NULL, 0, 0, classes, 0);
00378     rccInitDb4(NULL, cache_name, 0);
00379 
00380     if (timeout) rccSetOption(NULL, RCC_OPTION_TIMEOUT, timeout);
00381 
00382     if (config_name) rccLoad(NULL, config_name);
00383 
00384 
00385     rccSetOption(NULL, RCC_OPTION_LEARNING_MODE, cache);
00386 
00387     if (translate != RCC_OPTION_TRANSLATE_OFF) 
00388         rccSetOption(NULL, RCC_OPTION_TRANSLATE, translate);
00389 
00390     if (ldetect) {
00391         rccSetOption(NULL, RCC_OPTION_AUTODETECT_LANGUAGE, 1);
00392         if (ldetect_all) {
00393             rccSetOption(NULL, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, 0);
00394         }
00395     }
00396     
00397         // DS: More checks, sometimes we can skip that.
00398     if ((lfrom)||(lto)) {
00399 //      if (lfrom) rccSetOption(NULL, RCC_OPTION_AUTODETECT_LANGUAGE, 1);
00400         rccSetOption(NULL, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, 0);
00401     }
00402 
00403 #ifdef RCC_OPTION_OFFLINE
00404     if (offline)
00405         rccSetOption(NULL, RCC_OPTION_OFFLINE, 1);
00406 #endif /* RCC_OPTION_OFFLINE */
00407 
00408     if (from) {
00409         source_class_id = GetClass(from);
00410         if (source_class_id == (rcc_class_id)-1) {
00411             rccFree();
00412             fprintf(stderr, "*** Invalid source class (%s) specified\n", from);
00413             exit(1);
00414         }
00415     } 
00416     if (to) {
00417         target_class_id = GetClass(to);
00418         if (target_class_id == (rcc_class_id)-1) {
00419             rccFree();
00420             fprintf(stderr, "*** Invalid target class (%s) specified\n", to);
00421             exit(1);
00422         }
00423     } 
00424     
00425     current_language_id = rccGetCurrentLanguage(NULL);
00426     english_language_id = rccGetLanguageByName(NULL, "en");
00427 
00428     if (lfrom) {
00429         source_language_id = rccGetLanguageByName(NULL, lfrom);
00430         if (source_language_id == (rcc_language_id)-1) {
00431             rccFree();
00432             fprintf(stderr, "*** Invalid source language (%s) specified\n", lfrom);
00433             exit(1);
00434         }
00435     } else source_language_id = current_language_id;
00436     
00437     if (lto) {
00438         target_language_id = rccGetLanguageByName(NULL, lto);
00439         if (target_language_id == (rcc_language_id)-1) {
00440             rccFree();
00441             fprintf(stderr, "*** Invalid target language (%s) specified\n", lto);
00442             exit(1);
00443         }
00444     } else target_language_id = current_language_id;
00445     
00446     if (source_language_id == target_language_id) {
00447         language_id = source_language_id;
00448         
00449         if (language_id != current_language_id) {
00450             if ((rccSetLanguage(NULL, language_id))||(!rccGetCurrentLanguageName(NULL))) {
00451                 rccFree();
00452                 fprintf(stderr, "*** Unable to set the specified language (%s)\n", rccGetLanguageName(NULL, language_id));
00453                 exit(1);
00454             }
00455         } else {
00456             // Automatic
00457             if (!rccGetCurrentLanguageName(NULL)) {
00458                 if (current_language_id != english_language_id) {
00459                     language_id = english_language_id;
00460                     rccSetLanguage(NULL, english_language_id);
00461                 }
00462                 
00463                 if (!rccGetCurrentLanguageName(NULL)) {
00464                     rccFree();
00465                     fprintf(stderr, "*** Default language (%s) is not configured\n", rccGetLanguageName(NULL, current_language_id));
00466                     exit(1);
00467                 }
00468             }
00469         }
00470         
00471     } else {
00472         language_id = (rcc_language_id)-1;
00473         
00474             // Checking if languages are selectable
00475         if ((rccSetLanguage(NULL, source_language_id))||(!rccGetCurrentLanguageName(NULL))) {
00476             rccFree();
00477             fprintf(stderr, "*** Unable to set source language (%s)\n", rccGetLanguageName(NULL, source_language_id));
00478             exit(1);
00479         }
00480         if ((rccSetLanguage(NULL, target_language_id))||(!rccGetCurrentLanguageName(NULL))) {
00481             rccFree();
00482             fprintf(stderr, "*** Unable to set target language (%s)\n", rccGetLanguageName(NULL, target_language_id));
00483             exit(1);
00484         }
00485     }
00486     
00487     switch (mode) {
00488         case MODE_STDIN:
00489             Stdin(arg);
00490         break;
00491         case MODE_DIRECTORY:
00492             Directory(arg);
00493         break;
00494         case MODE_FILE:
00495             fprintf(stderr, "*** Mode (FILE) is not supported in current version\n");
00496         break;
00497         case MODE_FILELIST:
00498             fprintf(stderr, "*** Mode (FILELIST) is not supported in current version\n");
00499         break;
00500     }
00501 
00502     
00503     rccFree();
00504 
00505     return 0;
00506 }

int Stdin const char *  arg  ) 
 

00509                            {
00510     char *res;
00511     char buf[16384];
00512 
00513     while (fgets(buf,16384,stdin)) {
00514         res = Translate(buf);
00515         fprintf(stdout, res?res:buf);
00516         if (res) free(res);
00517     }
00518     
00519     return 0;
00520 }

char * Translate const char *  source  ) 
 

00645                                     {
00646     rcc_string rccstring;
00647     char *recoded, *stmp;
00648 
00649     if (strlen(source)<2) return NULL;
00650 
00651     if (source_language_id != target_language_id) {
00652         rccSetLanguage(NULL, source_language_id);
00653     }
00654 
00655     if (efrom) rccstring = rccFromCharset(NULL, efrom, source);
00656     else rccstring = rccFrom(NULL, source_class_id, source);
00657     
00658     if (!rccstring) return NULL;
00659 
00660     if (source_language_id != target_language_id)
00661         rccSetLanguage(NULL, target_language_id);
00662 
00663     if (eto) {
00664         if (translate = RCC_OPTION_TRANSLATE_OFF) {
00665             stmp = rccTo(NULL, target_class_id, rccstring);
00666             if (stmp) {
00667                 recoded = rccRecodeCharsets(NULL, "UTF-8", eto, stmp);
00668                 if (recoded)  free(stmp);
00669                 else recoded = stmp;
00670             } else recoded = NULL;
00671             
00672         } else {
00673             recoded = rccToCharset(NULL, eto, rccstring);
00674         }
00675     } else recoded = rccTo(NULL, target_class_id, rccstring);
00676     
00677     free(rccstring);
00678     return recoded;        
00679 }

void Usage int  argc,
char *  argv[]
 

00088                                    {
00089     printf(
00090 "Usage:\n"
00091 " %s [options] [mode] [file|directory]\n"
00092 "  Modes:\n"
00093 "       --stdin         - Convert stdin to stdout\n"
00094 "       --directory     - Convert file names in specified directory\n"
00095 "       --file          - Convert specified file\n"
00096 "       --filelist      - Convert all files writed on stdin\n"
00097 "       --help          - Help message\n"
00098 "\n"
00099 "  Options:\n"
00100 "       -c <config>     - Specify configuration name\n"
00101 "       -f <class>      - Source class ('in' is default)\n"
00102 "       -t <class>      - Output class ('out' is default)\n"
00103 "       -e <enc>        - Force specified source encoding (autodetection)\n"
00104 "       -l <lang>       - Force specified source language (from LC_CTYPE)\n"
00105 "       --force-target-encoding=<enc>\n"
00106 "                       - Convert to the specified encoding\n"
00107 "       --force-target-language=<enc>\n"
00108 "                       - Translate to the specified language\n"
00109 "       --caching=[mode]\n"
00110 "                       - Use recodings cache. Following modes are supported\n"
00111 "                       off             - Turn off\n"
00112 "                       use             - Use cached values (default)\n"
00113 "                       add             - Add new recodings to cache\n"
00114 "                       replace         - Replace encodings in cache\n"
00115 "       --cache=<name>\n"
00116 "                       - Use specified cache database instead of default one\n"
00117 "       --translation=[mode]\n"
00118 "                       - Enable translation. Following modes are supported:\n"
00119 "                       full            - Full\n"
00120 "                       skip_parent     - Skip translation to parent lang\n"
00121 "                       skip_related    - Skip translation between related langs\n"
00122 "                       english         - Translate to english (default)\n"
00123 "                       transliterate   - Transliterate\n"
00124 "       --language-detection=[mode]\n"
00125 "                       - Lanuage autodetection. Following modes are supported:\n"
00126 "                       off             - Current language is considered\n"
00127 "                       on              - Use only configured langs (default)\n"
00128 "                       all             - Try everything (slow)\n"
00129 "       --timeout=<us>\n"
00130 "                       - Specify recoding timeout in microseconds (1s default)\n"
00131 "\n"
00132 "       -y              - Do not ask any question\n"
00133 "       --disable-subdirs\n"
00134 "                       - Do not descend into the sub directories\n"
00135 "\n"
00136 " Language Relations:\n"
00137 "  To prevent unneccesary translations the concept of related/parent languages is\n"
00138 "  introduced. For each language you can specify a parent language.\n"
00139 "  skip_parent   translation option will turn off translation to parent language\n"
00140 "  skip_related  translation option will additionaly turn off translation from\n"
00141 "  parent language.\n"
00142 "\n"
00143 "  For example, in the default configuration Russian is parent of Ukrainian, and\n"
00144 "  English is parent of all other languages. With \"skip_parrent\" option the\n"
00145 "  translation from Russian to Ukrainian would be turned off, but translation\n"
00146 "  from Ukrainian to Russian would operate. With \"skip_related\" option the\n"
00147 "  translation in both directions would be disabled\n"
00148 "\n\n"
00149 " Language Detection:\n"
00150 "  Current version uses aspell dictionaries to autodetect language. Therefore,\n"
00151 "  only languages with aspell available in the system aspell dictionaries are\n"
00152 "  autodected. Beware, if your system contains a lot of installed languages,\n"
00153 "  the autodection may take considerable amount of time.\n"
00154 "\n\n",
00155 argv[0]);
00156 }


Variable Documentation

int mode = MODE_STDIN
 


Generated on Mon Apr 16 02:15:07 2007 for LibRCC by  doxygen 1.4.2